From ef2cf611e289e02ef360b025029e1c8e79cb5d43 Mon Sep 17 00:00:00 2001 From: Paul-Liu Date: Sun, 13 Nov 2016 20:41:25 -0500 Subject: [PATCH 001/353] BUG: resampling with NaT in TimedeltaIndex (#13223) --- pandas/tseries/resample.py | 13 +++++++++++-- pandas/tseries/tests/test_resample.py | 9 +++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py index e93e5637099c1..cc0cd447e22fc 100755 --- a/pandas/tseries/resample.py +++ b/pandas/tseries/resample.py @@ -1224,8 +1224,10 @@ def _get_time_delta_bins(self, ax): data=[], freq=self.freq, name=ax.name) return binner, [], labels - start = ax[0] - end = ax[-1] + # Addresses GH #13223 + start = ax.min() + end = ax.max() + labels = binner = TimedeltaIndex(start=start, end=end, freq=self.freq, @@ -1234,6 +1236,13 @@ def _get_time_delta_bins(self, ax): end_stamps = labels + 1 bins = ax.searchsorted(end_stamps, side='left') + if ax.hasnans: + binner = binner.insert(0, tslib.NaT) + labels = labels.insert(0, tslib.NaT) + + n_NaT = sum([ax_i is tslib.NaT for ax_i in ax]) + bins = np.insert(bins, 0, n_NaT) + # Addresses GH #10530 if self.base > 0: labels += type(self.freq)(self.base) diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py index 56953541265a6..bf6dfaa63ae8a 100755 --- a/pandas/tseries/tests/test_resample.py +++ b/pandas/tseries/tests/test_resample.py @@ -1025,6 +1025,15 @@ def test_resample_timedelta_idempotency(self): expected = series assert_series_equal(result, expected) + def test_resample_timedelta_missing_values(self): + # GH 13223 + index = pd.to_timedelta(['0s', pd.NaT, '2s']) + series = pd.Series([2, 3, 5], index=index) + result = series.resample('1s').mean() + expected = pd.Series([2, np.nan, 5], index=pd.timedelta_range( + start='0s', end='2s', freq='1s')) + assert_series_equal(result, expected) + def test_resample_rounding(self): # GH 8371 # odd results when rounding is needed From 89b025b361f5c4113394d3cc844c4756ea4d9821 Mon Sep 17 00:00:00 2001 From: Paul-Liu Date: Thu, 17 Nov 2016 01:39:46 -0500 Subject: [PATCH 002/353] BUG: resampling with NaT in TimedeltaIndex (#13223) --- pandas/tseries/resample.py | 7 +++++-- pandas/tseries/tests/test_resample.py | 5 +++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py index cc0cd447e22fc..600a292687fed 100755 --- a/pandas/tseries/resample.py +++ b/pandas/tseries/resample.py @@ -7,7 +7,7 @@ from pandas.core.base import AbstractMethodError, GroupByMixin from pandas.core.groupby import (BinGrouper, Grouper, _GroupBy, GroupBy, - SeriesGroupBy, groupby, PanelGroupBy) + SeriesGroupBy, groupby, PanelGroupBy, DataError) from pandas.tseries.frequencies import to_offset, is_subperiod, is_superperiod from pandas.tseries.index import DatetimeIndex, date_range @@ -1219,6 +1219,9 @@ def _get_time_delta_bins(self, ax): raise TypeError('axis must be a TimedeltaIndex, but got ' 'an instance of %r' % type(ax).__name__) + if len(ax) > 0 and all(ax._isnan): + raise DataError('axis not valid') + if not len(ax): binner = labels = TimedeltaIndex( data=[], freq=self.freq, name=ax.name) @@ -1240,7 +1243,7 @@ def _get_time_delta_bins(self, ax): binner = binner.insert(0, tslib.NaT) labels = labels.insert(0, tslib.NaT) - n_NaT = sum([ax_i is tslib.NaT for ax_i in ax]) + n_NaT = ax._isnan.sum() bins = np.insert(bins, 0, n_NaT) # Addresses GH #10530 diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py index bf6dfaa63ae8a..d2c1de884f5b6 100755 --- a/pandas/tseries/tests/test_resample.py +++ b/pandas/tseries/tests/test_resample.py @@ -1034,6 +1034,11 @@ def test_resample_timedelta_missing_values(self): start='0s', end='2s', freq='1s')) assert_series_equal(result, expected) + # all NaT + index = pd.to_timedelta([pd.NaT, pd.NaT, pd.NaT]) + series = pd.Series([2, 3, 5], index=index) + self.assertRaises(DataError, series.resample('1s').mean) + def test_resample_rounding(self): # GH 8371 # odd results when rounding is needed From 6378e383ea06cd84179ab849307ac664200bff0b Mon Sep 17 00:00:00 2001 From: Paul-Liu Date: Thu, 17 Nov 2016 16:56:31 -0500 Subject: [PATCH 003/353] fix pep8 --- pandas/tseries/resample.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py index 600a292687fed..bfe629aec94fc 100755 --- a/pandas/tseries/resample.py +++ b/pandas/tseries/resample.py @@ -7,7 +7,8 @@ from pandas.core.base import AbstractMethodError, GroupByMixin from pandas.core.groupby import (BinGrouper, Grouper, _GroupBy, GroupBy, - SeriesGroupBy, groupby, PanelGroupBy, DataError) + SeriesGroupBy, groupby, PanelGroupBy, + DataError) from pandas.tseries.frequencies import to_offset, is_subperiod, is_superperiod from pandas.tseries.index import DatetimeIndex, date_range From cbe8e799a5f46a9eaaad79e357efd4664dae768c Mon Sep 17 00:00:00 2001 From: Paul Liu Date: Tue, 31 Jan 2017 23:03:15 -0500 Subject: [PATCH 004/353] better error message for all-nan groupings --- pandas/tseries/resample.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py index bfe629aec94fc..16055c304636f 100755 --- a/pandas/tseries/resample.py +++ b/pandas/tseries/resample.py @@ -1221,7 +1221,7 @@ def _get_time_delta_bins(self, ax): 'an instance of %r' % type(ax).__name__) if len(ax) > 0 and all(ax._isnan): - raise DataError('axis not valid') + raise DataError('all-nan groupings not valid') if not len(ax): binner = labels = TimedeltaIndex( From 48fc9d613323ada9702a7d5c78c23eb0e8cae8a8 Mon Sep 17 00:00:00 2001 From: David Hoffman Date: Wed, 1 Feb 2017 15:42:58 -0500 Subject: [PATCH 005/353] BUG: Fix overflow error in cartesian_product When the numbers in `X` are large it can cause an overflow error on windows machine where the native `int` is 32 bit. Switching to np.intp alleviates this problem. Other fixes would include switching to np.uint32 or np.uint64. closes #15234 Author: David Hoffman Closes #15265 from david-hoffman/patch-1 and squashes the following commits: c9c8d5e [David Hoffman] Update v0.19.2.txt d54583e [David Hoffman] Remove `test_large_input` because it's too big 47a6c6c [David Hoffman] Update test so that it will actually run on "normal" machine 7aeee85 [David Hoffman] Added tests for large numbers b196878 [David Hoffman] Fix overflow error in cartesian_product --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/tools/util.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index f87fad051fad2..34048b8cc372d 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -444,6 +444,7 @@ Bug Fixes - Bug in compat for passing long integers to ``Timestamp.replace`` (:issue:`15030`) - Bug in ``.loc`` that would not return the correct dtype for scalar access for a DataFrame (:issue:`11617`) - Bug in ``GroupBy.get_group()`` failing with a categorical grouper (:issue:`15155`) +- Bug in ``pandas.tools.utils.cartesian_product()`` with large input can cause overflow on windows (:issue:`15265`) diff --git a/pandas/tools/util.py b/pandas/tools/util.py index 381e29283d417..8ec074fbf5950 100644 --- a/pandas/tools/util.py +++ b/pandas/tools/util.py @@ -58,7 +58,7 @@ def cartesian_product(X): if len(X) == 0: return [] - lenX = np.fromiter((len(x) for x in X), dtype=int) + lenX = np.fromiter((len(x) for x in X), dtype=np.intp) cumprodX = np.cumproduct(lenX) a = np.roll(cumprodX, 1) From 845208055845b0db58d2bfee7ba39f6862ce141c Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 1 Feb 2017 17:55:40 -0500 Subject: [PATCH 006/353] COMPAT: xarray 0.8.2 test compat w.r.t. CategoricalIndex indempotency (#15285) closes #15282 --- ci/install_travis.sh | 2 +- ci/requirements-2.7.run | 2 +- ci/requirements-3.5.pip | 1 + ci/requirements-3.5.run | 1 - pandas/tests/test_generic.py | 19 +++++++++++-------- 5 files changed, 14 insertions(+), 11 deletions(-) create mode 100644 ci/requirements-3.5.pip diff --git a/ci/install_travis.sh b/ci/install_travis.sh index ded428c677f17..52b52d787aade 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -143,7 +143,7 @@ else echo "[pip installs]" REQ="ci/requirements-${PYTHON_VERSION}${JOB_TAG}.pip" if [ -e ${REQ} ]; then - pip install --upgrade -r $REQ + pip install -r $REQ fi # may have addtl installation instructions for this build diff --git a/ci/requirements-2.7.run b/ci/requirements-2.7.run index 2bfb8a3777fdf..b5fc919297c76 100644 --- a/ci/requirements-2.7.run +++ b/ci/requirements-2.7.run @@ -20,4 +20,4 @@ html5lib=1.0b2 beautiful-soup=4.2.1 statsmodels jinja2=2.8 -xarray +xarray=0.8.0 diff --git a/ci/requirements-3.5.pip b/ci/requirements-3.5.pip new file mode 100644 index 0000000000000..0d9e44cf39fa4 --- /dev/null +++ b/ci/requirements-3.5.pip @@ -0,0 +1 @@ +xarray==0.9.1 diff --git a/ci/requirements-3.5.run b/ci/requirements-3.5.run index e15ca6079b4fe..ef354195c8f23 100644 --- a/ci/requirements-3.5.run +++ b/ci/requirements-3.5.run @@ -16,6 +16,5 @@ bottleneck sqlalchemy pymysql psycopg2 -xarray s3fs beautifulsoup4 diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py index f7b7ae8c66382..0ca8ba47b8a8f 100644 --- a/pandas/tests/test_generic.py +++ b/pandas/tests/test_generic.py @@ -7,6 +7,7 @@ from numpy import nan import pandas as pd +from distutils.version import LooseVersion from pandas.types.common import is_scalar from pandas import (Index, Series, DataFrame, Panel, isnull, date_range, period_range, Panel4D) @@ -870,6 +871,7 @@ def test_describe_none(self): def test_to_xarray(self): tm._skip_if_no_xarray() + import xarray from xarray import DataArray s = Series([]) @@ -895,15 +897,16 @@ def testit(index, check_index_type=True, check_categorical=True): check_index_type=check_index_type, check_categorical=check_categorical) - for index in [tm.makeFloatIndex, tm.makeIntIndex, - tm.makeStringIndex, tm.makeUnicodeIndex, - tm.makeDateIndex, tm.makePeriodIndex, - tm.makeTimedeltaIndex]: - testit(index) + l = [tm.makeFloatIndex, tm.makeIntIndex, + tm.makeStringIndex, tm.makeUnicodeIndex, + tm.makeDateIndex, tm.makePeriodIndex, + tm.makeTimedeltaIndex] + + if LooseVersion(xarray.__version__) >= '0.8.0': + l.append(tm.makeCategoricalIndex) - # not idempotent - testit(tm.makeCategoricalIndex, check_index_type=False, - check_categorical=False) + for index in l: + testit(index) s = Series(range(6)) s.index.name = 'foo' From f6cfaabad9b9de6d0382e51a77b080723f84d778 Mon Sep 17 00:00:00 2001 From: Michael Lamparski Date: Thu, 2 Feb 2017 15:26:12 -0500 Subject: [PATCH 007/353] BUG: Support empty dict-likes in replace() closes #15289 Author: Michael Lamparski Closes #15294 from ExpHP/bugfix-15289 and squashes the following commits: f349e0a [Michael Lamparski] BUG: Support empty dict-likes in replace() --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/generic.py | 6 +++--- pandas/tests/frame/test_replace.py | 10 ++++++++++ pandas/tests/series/test_replace.py | 6 ++++++ 4 files changed, 20 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 34048b8cc372d..d76a78c68fb73 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -512,3 +512,4 @@ Bug Fixes - Bug in ``DataFrame.boxplot`` where ``fontsize`` was not applied to the tick labels on both axes (:issue:`15108`) +- Bug in ``Series.replace`` and ``DataFrame.replace`` which failed on empty replacement dicts (:issue:`15289`) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 869062bd231fe..8074b167ff176 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -48,7 +48,7 @@ from pandas.tseries.frequencies import to_offset from pandas import compat from pandas.compat.numpy import function as nv -from pandas.compat import (map, zip, lrange, string_types, +from pandas.compat import (map, zip, lzip, lrange, string_types, isidentifier, set_function_name) import pandas.core.nanops as nanops from pandas.util.decorators import Appender, Substitution, deprecate_kwarg @@ -3509,7 +3509,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, regex = True items = list(compat.iteritems(to_replace)) - keys, values = zip(*items) + keys, values = lzip(*items) or ([], []) are_mappings = [is_dict_like(v) for v in values] @@ -3523,7 +3523,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, value_dict = {} for k, v in items: - keys, values = zip(*v.items()) + keys, values = lzip(*v.items()) or ([], []) if set(keys) & set(values): raise ValueError("Replacement not allowed with " "overlapping keys and values") diff --git a/pandas/tests/frame/test_replace.py b/pandas/tests/frame/test_replace.py index adc7af225588c..f46215105b375 100644 --- a/pandas/tests/frame/test_replace.py +++ b/pandas/tests/frame/test_replace.py @@ -1055,3 +1055,13 @@ def test_replace_datetimetz(self): Timestamp('20130103', tz='US/Eastern')], 'B': [0, np.nan, 2]}) assert_frame_equal(result, expected) + + def test_replace_with_empty_dictlike(self): + # GH 15289 + mix = {'a': lrange(4), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']} + df = DataFrame(mix) + assert_frame_equal(df, df.replace({})) + assert_frame_equal(df, df.replace(Series([]))) + + assert_frame_equal(df, df.replace({'b': {}})) + assert_frame_equal(df, df.replace(Series({'b': {}}))) diff --git a/pandas/tests/series/test_replace.py b/pandas/tests/series/test_replace.py index d80328ea3863a..aa16f2cca9475 100644 --- a/pandas/tests/series/test_replace.py +++ b/pandas/tests/series/test_replace.py @@ -223,3 +223,9 @@ def test_replace2(self): self.assertTrue((ser[:5] == -1).all()) self.assertTrue((ser[6:10] == -1).all()) self.assertTrue((ser[20:30] == -1).all()) + + def test_replace_with_empty_dictlike(self): + # GH 15289 + s = pd.Series(list('abcd')) + tm.assert_series_equal(s, s.replace(dict())) + tm.assert_series_equal(s, s.replace(pd.Series([]))) From da92a5c94cb1d3c0f6044b783bc0ac4e7acb2dc2 Mon Sep 17 00:00:00 2001 From: TrigonaMinima Date: Thu, 2 Feb 2017 20:13:54 -0500 Subject: [PATCH 008/353] TST: DatetimeIndex compiled together in test_datetime.py xref #14854 Author: TrigonaMinima Closes #15266 from TrigonaMinima/issue-14854-datetime and squashes the following commits: 6ee2bd9 [TrigonaMinima] TST: Splitting test_datetime.py into smaller chunks (gh14854) 415a748 [TrigonaMinima] TST: Moving DatetimeIndex related tests from test_timeseries.py and flake8 fixes c43c7de [TrigonaMinima] TST: proper naming of files 458d141 [TrigonaMinima] TST: splitting test_datetime.py 1ff0819 [TrigonaMinima] TST: fix flake8 errors - test_datetime.py (GH14854) 9311161 [TrigonaMinima] TST: reorg of DatetimeIndex tests from tseries/tests/test_base.py to test_datetime.py (GH14854) 54421a5 [TrigonaMinima] TST: reorg of DatetimeIndex tests from test_datetimelike.py to test_datetime.py (GH14854) f83814b [TrigonaMinima] TST: reorg of DatetimeIndex tests from test_timeseries.py to test_datetime.py --- pandas/tests/indexes/datetimes/__init__.py | 0 pandas/tests/indexes/datetimes/test_astype.py | 122 ++ .../indexes/datetimes/test_construction.py | 425 +++++ .../tests/indexes/datetimes/test_datetime.py | 836 +++++++++ .../tests/indexes/datetimes/test_indexing.py | 244 +++ pandas/tests/indexes/datetimes/test_misc.py | 333 ++++ .../tests/indexes/datetimes/test_missing.py | 51 + pandas/tests/indexes/datetimes/test_ops.py | 1073 ++++++++++++ pandas/tests/indexes/datetimes/test_setops.py | 168 ++ pandas/tests/indexes/test_datetimelike.py | 669 +------- pandas/tseries/tests/test_base.py | 897 +--------- pandas/tseries/tests/test_timeseries.py | 1521 +---------------- setup.py | 1 + 13 files changed, 3259 insertions(+), 3081 deletions(-) create mode 100644 pandas/tests/indexes/datetimes/__init__.py create mode 100644 pandas/tests/indexes/datetimes/test_astype.py create mode 100644 pandas/tests/indexes/datetimes/test_construction.py create mode 100644 pandas/tests/indexes/datetimes/test_datetime.py create mode 100644 pandas/tests/indexes/datetimes/test_indexing.py create mode 100644 pandas/tests/indexes/datetimes/test_misc.py create mode 100644 pandas/tests/indexes/datetimes/test_missing.py create mode 100644 pandas/tests/indexes/datetimes/test_ops.py create mode 100644 pandas/tests/indexes/datetimes/test_setops.py diff --git a/pandas/tests/indexes/datetimes/__init__.py b/pandas/tests/indexes/datetimes/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/indexes/datetimes/test_astype.py b/pandas/tests/indexes/datetimes/test_astype.py new file mode 100644 index 0000000000000..f64d18a69a093 --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_astype.py @@ -0,0 +1,122 @@ +import numpy as np + +import pandas as pd +import pandas.util.testing as tm +from pandas import (DatetimeIndex, date_range, Series, NaT, Index, Timestamp, + Int64Index) + + +class TestDatetimeIndex(tm.TestCase): + _multiprocess_can_split_ = True + + def test_astype(self): + # GH 13149, GH 13209 + idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN]) + + result = idx.astype(object) + expected = Index([Timestamp('2016-05-16')] + [NaT] * 3, dtype=object) + tm.assert_index_equal(result, expected) + + result = idx.astype(int) + expected = Int64Index([1463356800000000000] + + [-9223372036854775808] * 3, dtype=np.int64) + tm.assert_index_equal(result, expected) + + rng = date_range('1/1/2000', periods=10) + result = rng.astype('i8') + self.assert_index_equal(result, Index(rng.asi8)) + self.assert_numpy_array_equal(result.values, rng.asi8) + + def test_astype_with_tz(self): + + # with tz + rng = date_range('1/1/2000', periods=10, tz='US/Eastern') + result = rng.astype('datetime64[ns]') + expected = (date_range('1/1/2000', periods=10, + tz='US/Eastern') + .tz_convert('UTC').tz_localize(None)) + tm.assert_index_equal(result, expected) + + # BUG#10442 : testing astype(str) is correct for Series/DatetimeIndex + result = pd.Series(pd.date_range('2012-01-01', periods=3)).astype(str) + expected = pd.Series( + ['2012-01-01', '2012-01-02', '2012-01-03'], dtype=object) + tm.assert_series_equal(result, expected) + + result = Series(pd.date_range('2012-01-01', periods=3, + tz='US/Eastern')).astype(str) + expected = Series(['2012-01-01 00:00:00-05:00', + '2012-01-02 00:00:00-05:00', + '2012-01-03 00:00:00-05:00'], + dtype=object) + tm.assert_series_equal(result, expected) + + def test_astype_str_compat(self): + # GH 13149, GH 13209 + # verify that we are returing NaT as a string (and not unicode) + + idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN]) + result = idx.astype(str) + expected = Index(['2016-05-16', 'NaT', 'NaT', 'NaT'], dtype=object) + tm.assert_index_equal(result, expected) + + def test_astype_str(self): + # test astype string - #10442 + result = date_range('2012-01-01', periods=4, + name='test_name').astype(str) + expected = Index(['2012-01-01', '2012-01-02', '2012-01-03', + '2012-01-04'], name='test_name', dtype=object) + tm.assert_index_equal(result, expected) + + # test astype string with tz and name + result = date_range('2012-01-01', periods=3, name='test_name', + tz='US/Eastern').astype(str) + expected = Index(['2012-01-01 00:00:00-05:00', + '2012-01-02 00:00:00-05:00', + '2012-01-03 00:00:00-05:00'], + name='test_name', dtype=object) + tm.assert_index_equal(result, expected) + + # test astype string with freqH and name + result = date_range('1/1/2011', periods=3, freq='H', + name='test_name').astype(str) + expected = Index(['2011-01-01 00:00:00', '2011-01-01 01:00:00', + '2011-01-01 02:00:00'], + name='test_name', dtype=object) + tm.assert_index_equal(result, expected) + + # test astype string with freqH and timezone + result = date_range('3/6/2012 00:00', periods=2, freq='H', + tz='Europe/London', name='test_name').astype(str) + expected = Index(['2012-03-06 00:00:00+00:00', + '2012-03-06 01:00:00+00:00'], + dtype=object, name='test_name') + tm.assert_index_equal(result, expected) + + def test_astype_datetime64(self): + # GH 13149, GH 13209 + idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN]) + + result = idx.astype('datetime64[ns]') + tm.assert_index_equal(result, idx) + self.assertFalse(result is idx) + + result = idx.astype('datetime64[ns]', copy=False) + tm.assert_index_equal(result, idx) + self.assertTrue(result is idx) + + idx_tz = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN], tz='EST') + result = idx_tz.astype('datetime64[ns]') + expected = DatetimeIndex(['2016-05-16 05:00:00', 'NaT', 'NaT', 'NaT'], + dtype='datetime64[ns]') + tm.assert_index_equal(result, expected) + + def test_astype_raises(self): + # GH 13149, GH 13209 + idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN]) + + self.assertRaises(ValueError, idx.astype, float) + self.assertRaises(ValueError, idx.astype, 'timedelta64') + self.assertRaises(ValueError, idx.astype, 'timedelta64[ns]') + self.assertRaises(ValueError, idx.astype, 'datetime64') + self.assertRaises(ValueError, idx.astype, 'datetime64[D]') diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py new file mode 100644 index 0000000000000..ae4eb6ee397b6 --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -0,0 +1,425 @@ +import numpy as np +from datetime import timedelta + +import pandas as pd +import pandas.util.testing as tm +from pandas.tslib import OutOfBoundsDatetime +from pandas import (DatetimeIndex, Index, Timestamp, datetime, date_range) + + +class TestDatetimeIndex(tm.TestCase): + _multiprocess_can_split_ = True + + def test_construction_with_alt(self): + + i = pd.date_range('20130101', periods=5, freq='H', tz='US/Eastern') + i2 = DatetimeIndex(i, dtype=i.dtype) + self.assert_index_equal(i, i2) + + i2 = DatetimeIndex(i.tz_localize(None).asi8, tz=i.dtype.tz) + self.assert_index_equal(i, i2) + + i2 = DatetimeIndex(i.tz_localize(None).asi8, dtype=i.dtype) + self.assert_index_equal(i, i2) + + i2 = DatetimeIndex( + i.tz_localize(None).asi8, dtype=i.dtype, tz=i.dtype.tz) + self.assert_index_equal(i, i2) + + # localize into the provided tz + i2 = DatetimeIndex(i.tz_localize(None).asi8, tz='UTC') + expected = i.tz_localize(None).tz_localize('UTC') + self.assert_index_equal(i2, expected) + + # incompat tz/dtype + self.assertRaises(ValueError, lambda: DatetimeIndex( + i.tz_localize(None).asi8, dtype=i.dtype, tz='US/Pacific')) + + def test_construction_index_with_mixed_timezones(self): + # GH 11488 + # no tz results in DatetimeIndex + result = Index([Timestamp('2011-01-01'), + Timestamp('2011-01-02')], name='idx') + exp = DatetimeIndex([Timestamp('2011-01-01'), + Timestamp('2011-01-02')], name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + self.assertIsNone(result.tz) + + # same tz results in DatetimeIndex + result = Index([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), + Timestamp('2011-01-02 10:00', tz='Asia/Tokyo')], + name='idx') + exp = DatetimeIndex( + [Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00') + ], tz='Asia/Tokyo', name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + self.assertIsNotNone(result.tz) + self.assertEqual(result.tz, exp.tz) + + # same tz results in DatetimeIndex (DST) + result = Index([Timestamp('2011-01-01 10:00', tz='US/Eastern'), + Timestamp('2011-08-01 10:00', tz='US/Eastern')], + name='idx') + exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), + Timestamp('2011-08-01 10:00')], + tz='US/Eastern', name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + self.assertIsNotNone(result.tz) + self.assertEqual(result.tz, exp.tz) + + # different tz results in Index(dtype=object) + result = Index([Timestamp('2011-01-01 10:00'), + Timestamp('2011-01-02 10:00', tz='US/Eastern')], + name='idx') + exp = Index([Timestamp('2011-01-01 10:00'), + Timestamp('2011-01-02 10:00', tz='US/Eastern')], + dtype='object', name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertFalse(isinstance(result, DatetimeIndex)) + + result = Index([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), + Timestamp('2011-01-02 10:00', tz='US/Eastern')], + name='idx') + exp = Index([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), + Timestamp('2011-01-02 10:00', tz='US/Eastern')], + dtype='object', name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertFalse(isinstance(result, DatetimeIndex)) + + # length = 1 + result = Index([Timestamp('2011-01-01')], name='idx') + exp = DatetimeIndex([Timestamp('2011-01-01')], name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + self.assertIsNone(result.tz) + + # length = 1 with tz + result = Index( + [Timestamp('2011-01-01 10:00', tz='Asia/Tokyo')], name='idx') + exp = DatetimeIndex([Timestamp('2011-01-01 10:00')], tz='Asia/Tokyo', + name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + self.assertIsNotNone(result.tz) + self.assertEqual(result.tz, exp.tz) + + def test_construction_index_with_mixed_timezones_with_NaT(self): + # GH 11488 + result = Index([pd.NaT, Timestamp('2011-01-01'), + pd.NaT, Timestamp('2011-01-02')], name='idx') + exp = DatetimeIndex([pd.NaT, Timestamp('2011-01-01'), + pd.NaT, Timestamp('2011-01-02')], name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + self.assertIsNone(result.tz) + + # same tz results in DatetimeIndex + result = Index([pd.NaT, Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), + pd.NaT, Timestamp('2011-01-02 10:00', + tz='Asia/Tokyo')], + name='idx') + exp = DatetimeIndex([pd.NaT, Timestamp('2011-01-01 10:00'), + pd.NaT, Timestamp('2011-01-02 10:00')], + tz='Asia/Tokyo', name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + self.assertIsNotNone(result.tz) + self.assertEqual(result.tz, exp.tz) + + # same tz results in DatetimeIndex (DST) + result = Index([Timestamp('2011-01-01 10:00', tz='US/Eastern'), + pd.NaT, + Timestamp('2011-08-01 10:00', tz='US/Eastern')], + name='idx') + exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), pd.NaT, + Timestamp('2011-08-01 10:00')], + tz='US/Eastern', name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + self.assertIsNotNone(result.tz) + self.assertEqual(result.tz, exp.tz) + + # different tz results in Index(dtype=object) + result = Index([pd.NaT, Timestamp('2011-01-01 10:00'), + pd.NaT, Timestamp('2011-01-02 10:00', + tz='US/Eastern')], + name='idx') + exp = Index([pd.NaT, Timestamp('2011-01-01 10:00'), + pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern')], + dtype='object', name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertFalse(isinstance(result, DatetimeIndex)) + + result = Index([pd.NaT, Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), + pd.NaT, Timestamp('2011-01-02 10:00', + tz='US/Eastern')], name='idx') + exp = Index([pd.NaT, Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), + pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern')], + dtype='object', name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertFalse(isinstance(result, DatetimeIndex)) + + # all NaT + result = Index([pd.NaT, pd.NaT], name='idx') + exp = DatetimeIndex([pd.NaT, pd.NaT], name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + self.assertIsNone(result.tz) + + # all NaT with tz + result = Index([pd.NaT, pd.NaT], tz='Asia/Tokyo', name='idx') + exp = DatetimeIndex([pd.NaT, pd.NaT], tz='Asia/Tokyo', name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + self.assertIsNotNone(result.tz) + self.assertEqual(result.tz, exp.tz) + + def test_construction_dti_with_mixed_timezones(self): + # GH 11488 (not changed, added explicit tests) + + # no tz results in DatetimeIndex + result = DatetimeIndex( + [Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx') + exp = DatetimeIndex( + [Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + + # same tz results in DatetimeIndex + result = DatetimeIndex([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), + Timestamp('2011-01-02 10:00', + tz='Asia/Tokyo')], + name='idx') + exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), + Timestamp('2011-01-02 10:00')], + tz='Asia/Tokyo', name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + + # same tz results in DatetimeIndex (DST) + result = DatetimeIndex([Timestamp('2011-01-01 10:00', tz='US/Eastern'), + Timestamp('2011-08-01 10:00', + tz='US/Eastern')], + name='idx') + exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), + Timestamp('2011-08-01 10:00')], + tz='US/Eastern', name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + + # different tz coerces tz-naive to tz-awareIndex(dtype=object) + result = DatetimeIndex([Timestamp('2011-01-01 10:00'), + Timestamp('2011-01-02 10:00', + tz='US/Eastern')], name='idx') + exp = DatetimeIndex([Timestamp('2011-01-01 05:00'), + Timestamp('2011-01-02 10:00')], + tz='US/Eastern', name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + + # tz mismatch affecting to tz-aware raises TypeError/ValueError + + with tm.assertRaises(ValueError): + DatetimeIndex([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), + Timestamp('2011-01-02 10:00', tz='US/Eastern')], + name='idx') + + with tm.assertRaisesRegexp(TypeError, 'data is already tz-aware'): + DatetimeIndex([Timestamp('2011-01-01 10:00'), + Timestamp('2011-01-02 10:00', tz='US/Eastern')], + tz='Asia/Tokyo', name='idx') + + with tm.assertRaises(ValueError): + DatetimeIndex([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), + Timestamp('2011-01-02 10:00', tz='US/Eastern')], + tz='US/Eastern', name='idx') + + with tm.assertRaisesRegexp(TypeError, 'data is already tz-aware'): + # passing tz should results in DatetimeIndex, then mismatch raises + # TypeError + Index([pd.NaT, Timestamp('2011-01-01 10:00'), + pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern')], + tz='Asia/Tokyo', name='idx') + + def test_construction_base_constructor(self): + arr = [pd.Timestamp('2011-01-01'), pd.NaT, pd.Timestamp('2011-01-03')] + tm.assert_index_equal(pd.Index(arr), pd.DatetimeIndex(arr)) + tm.assert_index_equal(pd.Index(np.array(arr)), + pd.DatetimeIndex(np.array(arr))) + + arr = [np.nan, pd.NaT, pd.Timestamp('2011-01-03')] + tm.assert_index_equal(pd.Index(arr), pd.DatetimeIndex(arr)) + tm.assert_index_equal(pd.Index(np.array(arr)), + pd.DatetimeIndex(np.array(arr))) + + def test_construction_outofbounds(self): + # GH 13663 + dates = [datetime(3000, 1, 1), datetime(4000, 1, 1), + datetime(5000, 1, 1), datetime(6000, 1, 1)] + exp = Index(dates, dtype=object) + # coerces to object + tm.assert_index_equal(Index(dates), exp) + + with tm.assertRaises(OutOfBoundsDatetime): + # can't create DatetimeIndex + DatetimeIndex(dates) + + def test_construction_with_ndarray(self): + # GH 5152 + dates = [datetime(2013, 10, 7), + datetime(2013, 10, 8), + datetime(2013, 10, 9)] + data = DatetimeIndex(dates, freq=pd.tseries.frequencies.BDay()).values + result = DatetimeIndex(data, freq=pd.tseries.frequencies.BDay()) + expected = DatetimeIndex(['2013-10-07', + '2013-10-08', + '2013-10-09'], + freq='B') + tm.assert_index_equal(result, expected) + + def test_constructor_coverage(self): + rng = date_range('1/1/2000', periods=10.5) + exp = date_range('1/1/2000', periods=10) + tm.assert_index_equal(rng, exp) + + self.assertRaises(ValueError, DatetimeIndex, start='1/1/2000', + periods='foo', freq='D') + + self.assertRaises(ValueError, DatetimeIndex, start='1/1/2000', + end='1/10/2000') + + self.assertRaises(ValueError, DatetimeIndex, '1/1/2000') + + # generator expression + gen = (datetime(2000, 1, 1) + timedelta(i) for i in range(10)) + result = DatetimeIndex(gen) + expected = DatetimeIndex([datetime(2000, 1, 1) + timedelta(i) + for i in range(10)]) + tm.assert_index_equal(result, expected) + + # NumPy string array + strings = np.array(['2000-01-01', '2000-01-02', '2000-01-03']) + result = DatetimeIndex(strings) + expected = DatetimeIndex(strings.astype('O')) + tm.assert_index_equal(result, expected) + + from_ints = DatetimeIndex(expected.asi8) + tm.assert_index_equal(from_ints, expected) + + # string with NaT + strings = np.array(['2000-01-01', '2000-01-02', 'NaT']) + result = DatetimeIndex(strings) + expected = DatetimeIndex(strings.astype('O')) + tm.assert_index_equal(result, expected) + + from_ints = DatetimeIndex(expected.asi8) + tm.assert_index_equal(from_ints, expected) + + # non-conforming + self.assertRaises(ValueError, DatetimeIndex, + ['2000-01-01', '2000-01-02', '2000-01-04'], freq='D') + + self.assertRaises(ValueError, DatetimeIndex, start='2011-01-01', + freq='b') + self.assertRaises(ValueError, DatetimeIndex, end='2011-01-01', + freq='B') + self.assertRaises(ValueError, DatetimeIndex, periods=10, freq='D') + + def test_constructor_datetime64_tzformat(self): + # GH 6572 + tm._skip_if_no_pytz() + import pytz + # ISO 8601 format results in pytz.FixedOffset + for freq in ['AS', 'W-SUN']: + idx = date_range('2013-01-01T00:00:00-05:00', + '2016-01-01T23:59:59-05:00', freq=freq) + expected = date_range('2013-01-01T00:00:00', '2016-01-01T23:59:59', + freq=freq, tz=pytz.FixedOffset(-300)) + tm.assert_index_equal(idx, expected) + # Unable to use `US/Eastern` because of DST + expected_i8 = date_range('2013-01-01T00:00:00', + '2016-01-01T23:59:59', freq=freq, + tz='America/Lima') + self.assert_numpy_array_equal(idx.asi8, expected_i8.asi8) + + idx = date_range('2013-01-01T00:00:00+09:00', + '2016-01-01T23:59:59+09:00', freq=freq) + expected = date_range('2013-01-01T00:00:00', '2016-01-01T23:59:59', + freq=freq, tz=pytz.FixedOffset(540)) + tm.assert_index_equal(idx, expected) + expected_i8 = date_range('2013-01-01T00:00:00', + '2016-01-01T23:59:59', freq=freq, + tz='Asia/Tokyo') + self.assert_numpy_array_equal(idx.asi8, expected_i8.asi8) + + tm._skip_if_no_dateutil() + + # Non ISO 8601 format results in dateutil.tz.tzoffset + for freq in ['AS', 'W-SUN']: + idx = date_range('2013/1/1 0:00:00-5:00', '2016/1/1 23:59:59-5:00', + freq=freq) + expected = date_range('2013-01-01T00:00:00', '2016-01-01T23:59:59', + freq=freq, tz=pytz.FixedOffset(-300)) + tm.assert_index_equal(idx, expected) + # Unable to use `US/Eastern` because of DST + expected_i8 = date_range('2013-01-01T00:00:00', + '2016-01-01T23:59:59', freq=freq, + tz='America/Lima') + self.assert_numpy_array_equal(idx.asi8, expected_i8.asi8) + + idx = date_range('2013/1/1 0:00:00+9:00', + '2016/1/1 23:59:59+09:00', freq=freq) + expected = date_range('2013-01-01T00:00:00', '2016-01-01T23:59:59', + freq=freq, tz=pytz.FixedOffset(540)) + tm.assert_index_equal(idx, expected) + expected_i8 = date_range('2013-01-01T00:00:00', + '2016-01-01T23:59:59', freq=freq, + tz='Asia/Tokyo') + self.assert_numpy_array_equal(idx.asi8, expected_i8.asi8) + + def test_constructor_dtype(self): + + # passing a dtype with a tz should localize + idx = DatetimeIndex(['2013-01-01', '2013-01-02'], + dtype='datetime64[ns, US/Eastern]') + expected = DatetimeIndex(['2013-01-01', '2013-01-02'] + ).tz_localize('US/Eastern') + tm.assert_index_equal(idx, expected) + + idx = DatetimeIndex(['2013-01-01', '2013-01-02'], + tz='US/Eastern') + tm.assert_index_equal(idx, expected) + + # if we already have a tz and its not the same, then raise + idx = DatetimeIndex(['2013-01-01', '2013-01-02'], + dtype='datetime64[ns, US/Eastern]') + + self.assertRaises(ValueError, + lambda: DatetimeIndex(idx, + dtype='datetime64[ns]')) + + # this is effectively trying to convert tz's + self.assertRaises(TypeError, + lambda: DatetimeIndex(idx, + dtype='datetime64[ns, CET]')) + self.assertRaises(ValueError, + lambda: DatetimeIndex( + idx, tz='CET', + dtype='datetime64[ns, US/Eastern]')) + result = DatetimeIndex(idx, dtype='datetime64[ns, US/Eastern]') + tm.assert_index_equal(idx, result) + + def test_constructor_name(self): + idx = DatetimeIndex(start='2000-01-01', periods=1, freq='A', + name='TEST') + self.assertEqual(idx.name, 'TEST') + + def test_000constructor_resolution(self): + # 2252 + t1 = Timestamp((1352934390 * 1000000000) + 1000000 + 1000 + 1) + idx = DatetimeIndex([t1]) + + self.assertEqual(idx.nanosecond[0], t1.nanosecond) diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py new file mode 100644 index 0000000000000..a69406804cd97 --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -0,0 +1,836 @@ +import numpy as np +from datetime import date, timedelta, time + +import pandas as pd +import pandas.util.testing as tm +from pandas.compat import lrange +from pandas.compat.numpy import np_datetime64_compat +from pandas import (DatetimeIndex, Index, date_range, Series, DataFrame, + Timestamp, datetime, offsets, _np_version_under1p8) + +from pandas.util.testing import assert_series_equal, assert_almost_equal + +randn = np.random.randn + + +class TestDatetimeIndex(tm.TestCase): + _multiprocess_can_split_ = True + + def test_get_loc(self): + idx = pd.date_range('2000-01-01', periods=3) + + for method in [None, 'pad', 'backfill', 'nearest']: + self.assertEqual(idx.get_loc(idx[1], method), 1) + self.assertEqual(idx.get_loc(idx[1].to_pydatetime(), method), 1) + self.assertEqual(idx.get_loc(str(idx[1]), method), 1) + if method is not None: + self.assertEqual(idx.get_loc(idx[1], method, + tolerance=pd.Timedelta('0 days')), + 1) + + self.assertEqual(idx.get_loc('2000-01-01', method='nearest'), 0) + self.assertEqual(idx.get_loc('2000-01-01T12', method='nearest'), 1) + + self.assertEqual(idx.get_loc('2000-01-01T12', method='nearest', + tolerance='1 day'), 1) + self.assertEqual(idx.get_loc('2000-01-01T12', method='nearest', + tolerance=pd.Timedelta('1D')), 1) + self.assertEqual(idx.get_loc('2000-01-01T12', method='nearest', + tolerance=np.timedelta64(1, 'D')), 1) + self.assertEqual(idx.get_loc('2000-01-01T12', method='nearest', + tolerance=timedelta(1)), 1) + with tm.assertRaisesRegexp(ValueError, 'must be convertible'): + idx.get_loc('2000-01-01T12', method='nearest', tolerance='foo') + with tm.assertRaises(KeyError): + idx.get_loc('2000-01-01T03', method='nearest', tolerance='2 hours') + + self.assertEqual(idx.get_loc('2000', method='nearest'), slice(0, 3)) + self.assertEqual(idx.get_loc('2000-01', method='nearest'), slice(0, 3)) + + self.assertEqual(idx.get_loc('1999', method='nearest'), 0) + self.assertEqual(idx.get_loc('2001', method='nearest'), 2) + + with tm.assertRaises(KeyError): + idx.get_loc('1999', method='pad') + with tm.assertRaises(KeyError): + idx.get_loc('2001', method='backfill') + + with tm.assertRaises(KeyError): + idx.get_loc('foobar') + with tm.assertRaises(TypeError): + idx.get_loc(slice(2)) + + idx = pd.to_datetime(['2000-01-01', '2000-01-04']) + self.assertEqual(idx.get_loc('2000-01-02', method='nearest'), 0) + self.assertEqual(idx.get_loc('2000-01-03', method='nearest'), 1) + self.assertEqual(idx.get_loc('2000-01', method='nearest'), slice(0, 2)) + + # time indexing + idx = pd.date_range('2000-01-01', periods=24, freq='H') + tm.assert_numpy_array_equal(idx.get_loc(time(12)), + np.array([12]), check_dtype=False) + tm.assert_numpy_array_equal(idx.get_loc(time(12, 30)), + np.array([]), check_dtype=False) + with tm.assertRaises(NotImplementedError): + idx.get_loc(time(12, 30), method='pad') + + def test_get_indexer(self): + idx = pd.date_range('2000-01-01', periods=3) + exp = np.array([0, 1, 2], dtype=np.intp) + tm.assert_numpy_array_equal(idx.get_indexer(idx), exp) + + target = idx[0] + pd.to_timedelta(['-1 hour', '12 hours', + '1 day 1 hour']) + tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'), + np.array([-1, 0, 1], dtype=np.intp)) + tm.assert_numpy_array_equal(idx.get_indexer(target, 'backfill'), + np.array([0, 1, 2], dtype=np.intp)) + tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest'), + np.array([0, 1, 1], dtype=np.intp)) + tm.assert_numpy_array_equal( + idx.get_indexer(target, 'nearest', + tolerance=pd.Timedelta('1 hour')), + np.array([0, -1, 1], dtype=np.intp)) + with tm.assertRaises(ValueError): + idx.get_indexer(idx[[0]], method='nearest', tolerance='foo') + + def test_roundtrip_pickle_with_tz(self): + + # GH 8367 + # round-trip of timezone + index = date_range('20130101', periods=3, tz='US/Eastern', name='foo') + unpickled = self.round_trip_pickle(index) + self.assert_index_equal(index, unpickled) + + def test_reindex_preserves_tz_if_target_is_empty_list_or_array(self): + # GH7774 + index = date_range('20130101', periods=3, tz='US/Eastern') + self.assertEqual(str(index.reindex([])[0].tz), 'US/Eastern') + self.assertEqual(str(index.reindex(np.array([]))[0].tz), 'US/Eastern') + + def test_time_loc(self): # GH8667 + from datetime import time + from pandas.index import _SIZE_CUTOFF + + ns = _SIZE_CUTOFF + np.array([-100, 100], dtype=np.int64) + key = time(15, 11, 30) + start = key.hour * 3600 + key.minute * 60 + key.second + step = 24 * 3600 + + for n in ns: + idx = pd.date_range('2014-11-26', periods=n, freq='S') + ts = pd.Series(np.random.randn(n), index=idx) + i = np.arange(start, n, step) + + tm.assert_numpy_array_equal(ts.index.get_loc(key), i, + check_dtype=False) + tm.assert_series_equal(ts[key], ts.iloc[i]) + + left, right = ts.copy(), ts.copy() + left[key] *= -10 + right.iloc[i] *= -10 + tm.assert_series_equal(left, right) + + def test_time_overflow_for_32bit_machines(self): + # GH8943. On some machines NumPy defaults to np.int32 (for example, + # 32-bit Linux machines). In the function _generate_regular_range + # found in tseries/index.py, `periods` gets multiplied by `strides` + # (which has value 1e9) and since the max value for np.int32 is ~2e9, + # and since those machines won't promote np.int32 to np.int64, we get + # overflow. + periods = np.int_(1000) + + idx1 = pd.date_range(start='2000', periods=periods, freq='S') + self.assertEqual(len(idx1), periods) + + idx2 = pd.date_range(end='2000', periods=periods, freq='S') + self.assertEqual(len(idx2), periods) + + def test_nat(self): + self.assertIs(DatetimeIndex([np.nan])[0], pd.NaT) + + def test_ufunc_coercions(self): + idx = date_range('2011-01-01', periods=3, freq='2D', name='x') + + delta = np.timedelta64(1, 'D') + for result in [idx + delta, np.add(idx, delta)]: + tm.assertIsInstance(result, DatetimeIndex) + exp = date_range('2011-01-02', periods=3, freq='2D', name='x') + tm.assert_index_equal(result, exp) + self.assertEqual(result.freq, '2D') + + for result in [idx - delta, np.subtract(idx, delta)]: + tm.assertIsInstance(result, DatetimeIndex) + exp = date_range('2010-12-31', periods=3, freq='2D', name='x') + tm.assert_index_equal(result, exp) + self.assertEqual(result.freq, '2D') + + delta = np.array([np.timedelta64(1, 'D'), np.timedelta64(2, 'D'), + np.timedelta64(3, 'D')]) + for result in [idx + delta, np.add(idx, delta)]: + tm.assertIsInstance(result, DatetimeIndex) + exp = DatetimeIndex(['2011-01-02', '2011-01-05', '2011-01-08'], + freq='3D', name='x') + tm.assert_index_equal(result, exp) + self.assertEqual(result.freq, '3D') + + for result in [idx - delta, np.subtract(idx, delta)]: + tm.assertIsInstance(result, DatetimeIndex) + exp = DatetimeIndex(['2010-12-31', '2011-01-01', '2011-01-02'], + freq='D', name='x') + tm.assert_index_equal(result, exp) + self.assertEqual(result.freq, 'D') + + def test_week_of_month_frequency(self): + # GH 5348: "ValueError: Could not evaluate WOM-1SUN" shouldn't raise + d1 = date(2002, 9, 1) + d2 = date(2013, 10, 27) + d3 = date(2012, 9, 30) + idx1 = DatetimeIndex([d1, d2]) + idx2 = DatetimeIndex([d3]) + result_append = idx1.append(idx2) + expected = DatetimeIndex([d1, d2, d3]) + tm.assert_index_equal(result_append, expected) + result_union = idx1.union(idx2) + expected = DatetimeIndex([d1, d3, d2]) + tm.assert_index_equal(result_union, expected) + + # GH 5115 + result = date_range("2013-1-1", periods=4, freq='WOM-1SAT') + dates = ['2013-01-05', '2013-02-02', '2013-03-02', '2013-04-06'] + expected = DatetimeIndex(dates, freq='WOM-1SAT') + tm.assert_index_equal(result, expected) + + def test_hash_error(self): + index = date_range('20010101', periods=10) + with tm.assertRaisesRegexp(TypeError, "unhashable type: %r" % + type(index).__name__): + hash(index) + + def test_stringified_slice_with_tz(self): + # GH2658 + import datetime + start = datetime.datetime.now() + idx = DatetimeIndex(start=start, freq="1d", periods=10) + df = DataFrame(lrange(10), index=idx) + df["2013-01-14 23:44:34.437768-05:00":] # no exception here + + def test_append_join_nondatetimeindex(self): + rng = date_range('1/1/2000', periods=10) + idx = Index(['a', 'b', 'c', 'd']) + + result = rng.append(idx) + tm.assertIsInstance(result[0], Timestamp) + + # it works + rng.join(idx, how='outer') + + def test_to_period_nofreq(self): + idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-04']) + self.assertRaises(ValueError, idx.to_period) + + idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03'], + freq='infer') + self.assertEqual(idx.freqstr, 'D') + expected = pd.PeriodIndex(['2000-01-01', '2000-01-02', + '2000-01-03'], freq='D') + tm.assert_index_equal(idx.to_period(), expected) + + # GH 7606 + idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03']) + self.assertEqual(idx.freqstr, None) + tm.assert_index_equal(idx.to_period(), expected) + + def test_comparisons_coverage(self): + rng = date_range('1/1/2000', periods=10) + + # raise TypeError for now + self.assertRaises(TypeError, rng.__lt__, rng[3].value) + + result = rng == list(rng) + exp = rng == rng + self.assert_numpy_array_equal(result, exp) + + def test_comparisons_nat(self): + + fidx1 = pd.Index([1.0, np.nan, 3.0, np.nan, 5.0, 7.0]) + fidx2 = pd.Index([2.0, 3.0, np.nan, np.nan, 6.0, 7.0]) + + didx1 = pd.DatetimeIndex(['2014-01-01', pd.NaT, '2014-03-01', pd.NaT, + '2014-05-01', '2014-07-01']) + didx2 = pd.DatetimeIndex(['2014-02-01', '2014-03-01', pd.NaT, pd.NaT, + '2014-06-01', '2014-07-01']) + darr = np.array([np_datetime64_compat('2014-02-01 00:00Z'), + np_datetime64_compat('2014-03-01 00:00Z'), + np_datetime64_compat('nat'), np.datetime64('nat'), + np_datetime64_compat('2014-06-01 00:00Z'), + np_datetime64_compat('2014-07-01 00:00Z')]) + + if _np_version_under1p8: + # cannot test array because np.datetime('nat') returns today's date + cases = [(fidx1, fidx2), (didx1, didx2)] + else: + cases = [(fidx1, fidx2), (didx1, didx2), (didx1, darr)] + + # Check pd.NaT is handles as the same as np.nan + with tm.assert_produces_warning(None): + for idx1, idx2 in cases: + + result = idx1 < idx2 + expected = np.array([True, False, False, False, True, False]) + self.assert_numpy_array_equal(result, expected) + + result = idx2 > idx1 + expected = np.array([True, False, False, False, True, False]) + self.assert_numpy_array_equal(result, expected) + + result = idx1 <= idx2 + expected = np.array([True, False, False, False, True, True]) + self.assert_numpy_array_equal(result, expected) + + result = idx2 >= idx1 + expected = np.array([True, False, False, False, True, True]) + self.assert_numpy_array_equal(result, expected) + + result = idx1 == idx2 + expected = np.array([False, False, False, False, False, True]) + self.assert_numpy_array_equal(result, expected) + + result = idx1 != idx2 + expected = np.array([True, True, True, True, True, False]) + self.assert_numpy_array_equal(result, expected) + + with tm.assert_produces_warning(None): + for idx1, val in [(fidx1, np.nan), (didx1, pd.NaT)]: + result = idx1 < val + expected = np.array([False, False, False, False, False, False]) + self.assert_numpy_array_equal(result, expected) + result = idx1 > val + self.assert_numpy_array_equal(result, expected) + + result = idx1 <= val + self.assert_numpy_array_equal(result, expected) + result = idx1 >= val + self.assert_numpy_array_equal(result, expected) + + result = idx1 == val + self.assert_numpy_array_equal(result, expected) + + result = idx1 != val + expected = np.array([True, True, True, True, True, True]) + self.assert_numpy_array_equal(result, expected) + + # Check pd.NaT is handles as the same as np.nan + with tm.assert_produces_warning(None): + for idx1, val in [(fidx1, 3), (didx1, datetime(2014, 3, 1))]: + result = idx1 < val + expected = np.array([True, False, False, False, False, False]) + self.assert_numpy_array_equal(result, expected) + result = idx1 > val + expected = np.array([False, False, False, False, True, True]) + self.assert_numpy_array_equal(result, expected) + + result = idx1 <= val + expected = np.array([True, False, True, False, False, False]) + self.assert_numpy_array_equal(result, expected) + result = idx1 >= val + expected = np.array([False, False, True, False, True, True]) + self.assert_numpy_array_equal(result, expected) + + result = idx1 == val + expected = np.array([False, False, True, False, False, False]) + self.assert_numpy_array_equal(result, expected) + + result = idx1 != val + expected = np.array([True, True, False, True, True, True]) + self.assert_numpy_array_equal(result, expected) + + def test_map(self): + rng = date_range('1/1/2000', periods=10) + + f = lambda x: x.strftime('%Y%m%d') + result = rng.map(f) + exp = Index([f(x) for x in rng], dtype='= -1') + with tm.assertRaisesRegexp(ValueError, msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with tm.assertRaisesRegexp(ValueError, msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + with tm.assertRaises(IndexError): + idx.take(np.array([1, -5])) + + def test_take_fill_value_with_timezone(self): + idx = pd.DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01'], + name='xxx', tz='US/Eastern') + result = idx.take(np.array([1, 0, -1])) + expected = pd.DatetimeIndex(['2011-02-01', '2011-01-01', '2011-03-01'], + name='xxx', tz='US/Eastern') + tm.assert_index_equal(result, expected) + + # fill_value + result = idx.take(np.array([1, 0, -1]), fill_value=True) + expected = pd.DatetimeIndex(['2011-02-01', '2011-01-01', 'NaT'], + name='xxx', tz='US/Eastern') + tm.assert_index_equal(result, expected) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, + fill_value=True) + expected = pd.DatetimeIndex(['2011-02-01', '2011-01-01', '2011-03-01'], + name='xxx', tz='US/Eastern') + tm.assert_index_equal(result, expected) + + msg = ('When allow_fill=True and fill_value is not None, ' + 'all indices must be >= -1') + with tm.assertRaisesRegexp(ValueError, msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with tm.assertRaisesRegexp(ValueError, msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + with tm.assertRaises(IndexError): + idx.take(np.array([1, -5])) + + def test_map_bug_1677(self): + index = DatetimeIndex(['2012-04-25 09:30:00.393000']) + f = index.asof + + result = index.map(f) + expected = Index([f(index[0])]) + tm.assert_index_equal(result, expected) + + def test_groupby_function_tuple_1677(self): + df = DataFrame(np.random.rand(100), + index=date_range("1/1/2000", periods=100)) + monthly_group = df.groupby(lambda x: (x.year, x.month)) + + result = monthly_group.mean() + tm.assertIsInstance(result.index[0], tuple) + + def test_append_numpy_bug_1681(self): + # another datetime64 bug + dr = date_range('2011/1/1', '2012/1/1', freq='W-FRI') + a = DataFrame() + c = DataFrame({'A': 'foo', 'B': dr}, index=dr) + + result = a.append(c) + self.assertTrue((result['B'] == dr).all()) + + def test_isin(self): + index = tm.makeDateIndex(4) + result = index.isin(index) + self.assertTrue(result.all()) + + result = index.isin(list(index)) + self.assertTrue(result.all()) + + assert_almost_equal(index.isin([index[2], 5]), + np.array([False, False, True, False])) + + def test_time(self): + rng = pd.date_range('1/1/2000', freq='12min', periods=10) + result = pd.Index(rng).time + expected = [t.time() for t in rng] + self.assertTrue((result == expected).all()) + + def test_date(self): + rng = pd.date_range('1/1/2000', freq='12H', periods=10) + result = pd.Index(rng).date + expected = [t.date() for t in rng] + self.assertTrue((result == expected).all()) + + def test_does_not_convert_mixed_integer(self): + df = tm.makeCustomDataframe(10, 10, + data_gen_f=lambda *args, **kwargs: randn(), + r_idx_type='i', c_idx_type='dt') + cols = df.columns.join(df.index, how='outer') + joined = cols.join(df.columns) + self.assertEqual(cols.dtype, np.dtype('O')) + self.assertEqual(cols.dtype, joined.dtype) + tm.assert_numpy_array_equal(cols.values, joined.values) + + def test_slice_keeps_name(self): + # GH4226 + st = pd.Timestamp('2013-07-01 00:00:00', tz='America/Los_Angeles') + et = pd.Timestamp('2013-07-02 00:00:00', tz='America/Los_Angeles') + dr = pd.date_range(st, et, freq='H', name='timebucket') + self.assertEqual(dr[1:].name, dr.name) + + def test_join_self(self): + index = date_range('1/1/2000', periods=10) + kinds = 'outer', 'inner', 'left', 'right' + for kind in kinds: + joined = index.join(index, how=kind) + self.assertIs(index, joined) + + def assert_index_parameters(self, index): + assert index.freq == '40960N' + assert index.inferred_freq == '40960N' + + def test_ns_index(self): + nsamples = 400 + ns = int(1e9 / 24414) + dtstart = np.datetime64('2012-09-20T00:00:00') + + dt = dtstart + np.arange(nsamples) * np.timedelta64(ns, 'ns') + freq = ns * offsets.Nano() + index = pd.DatetimeIndex(dt, freq=freq, name='time') + self.assert_index_parameters(index) + + new_index = pd.DatetimeIndex(start=index[0], end=index[-1], + freq=index.freq) + self.assert_index_parameters(new_index) + + def test_join_with_period_index(self): + df = tm.makeCustomDataframe( + 10, 10, data_gen_f=lambda *args: np.random.randint(2), + c_idx_type='p', r_idx_type='dt') + s = df.iloc[:5, 0] + joins = 'left', 'right', 'inner', 'outer' + + for join in joins: + with tm.assertRaisesRegexp(ValueError, 'can only call with other ' + 'PeriodIndex-ed objects'): + df.columns.join(s.index, how=join) + + def test_factorize(self): + idx1 = DatetimeIndex(['2014-01', '2014-01', '2014-02', '2014-02', + '2014-03', '2014-03']) + + exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.intp) + exp_idx = DatetimeIndex(['2014-01', '2014-02', '2014-03']) + + arr, idx = idx1.factorize() + self.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, exp_idx) + + arr, idx = idx1.factorize(sort=True) + self.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, exp_idx) + + # tz must be preserved + idx1 = idx1.tz_localize('Asia/Tokyo') + exp_idx = exp_idx.tz_localize('Asia/Tokyo') + + arr, idx = idx1.factorize() + self.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, exp_idx) + + idx2 = pd.DatetimeIndex(['2014-03', '2014-03', '2014-02', '2014-01', + '2014-03', '2014-01']) + + exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.intp) + exp_idx = DatetimeIndex(['2014-01', '2014-02', '2014-03']) + arr, idx = idx2.factorize(sort=True) + self.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, exp_idx) + + exp_arr = np.array([0, 0, 1, 2, 0, 2], dtype=np.intp) + exp_idx = DatetimeIndex(['2014-03', '2014-02', '2014-01']) + arr, idx = idx2.factorize() + self.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, exp_idx) + + # freq must be preserved + idx3 = date_range('2000-01', periods=4, freq='M', tz='Asia/Tokyo') + exp_arr = np.array([0, 1, 2, 3], dtype=np.intp) + arr, idx = idx3.factorize() + self.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, idx3) + + def test_factorize_tz(self): + # GH 13750 + for tz in [None, 'UTC', 'US/Eastern', 'Asia/Tokyo']: + base = pd.date_range('2016-11-05', freq='H', periods=100, tz=tz) + idx = base.repeat(5) + + exp_arr = np.arange(100, dtype=np.intp).repeat(5) + + for obj in [idx, pd.Series(idx)]: + arr, res = obj.factorize() + self.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(res, base) + + def test_factorize_dst(self): + # GH 13750 + idx = pd.date_range('2016-11-06', freq='H', periods=12, + tz='US/Eastern') + + for obj in [idx, pd.Series(idx)]: + arr, res = obj.factorize() + self.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp)) + tm.assert_index_equal(res, idx) + + idx = pd.date_range('2016-06-13', freq='H', periods=12, + tz='US/Eastern') + + for obj in [idx, pd.Series(idx)]: + arr, res = obj.factorize() + self.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp)) + tm.assert_index_equal(res, idx) + + def test_slice_with_negative_step(self): + ts = Series(np.arange(20), + date_range('2014-01-01', periods=20, freq='MS')) + SLC = pd.IndexSlice + + def assert_slices_equivalent(l_slc, i_slc): + assert_series_equal(ts[l_slc], ts.iloc[i_slc]) + assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc]) + assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc]) + + assert_slices_equivalent(SLC[Timestamp('2014-10-01')::-1], SLC[9::-1]) + assert_slices_equivalent(SLC['2014-10-01'::-1], SLC[9::-1]) + + assert_slices_equivalent(SLC[:Timestamp('2014-10-01'):-1], SLC[:8:-1]) + assert_slices_equivalent(SLC[:'2014-10-01':-1], SLC[:8:-1]) + + assert_slices_equivalent(SLC['2015-02-01':'2014-10-01':-1], + SLC[13:8:-1]) + assert_slices_equivalent(SLC[Timestamp('2015-02-01'):Timestamp( + '2014-10-01'):-1], SLC[13:8:-1]) + assert_slices_equivalent(SLC['2015-02-01':Timestamp('2014-10-01'):-1], + SLC[13:8:-1]) + assert_slices_equivalent(SLC[Timestamp('2015-02-01'):'2014-10-01':-1], + SLC[13:8:-1]) + + assert_slices_equivalent(SLC['2014-10-01':'2015-02-01':-1], SLC[:0]) + + def test_slice_with_zero_step_raises(self): + ts = Series(np.arange(20), + date_range('2014-01-01', periods=20, freq='MS')) + self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', + lambda: ts[::0]) + self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', + lambda: ts.loc[::0]) + self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', + lambda: ts.loc[::0]) + + def test_slice_bounds_empty(self): + # GH 14354 + empty_idx = DatetimeIndex(freq='1H', periods=0, end='2015') + + right = empty_idx._maybe_cast_slice_bound('2015-01-02', 'right', 'loc') + exp = Timestamp('2015-01-02 23:59:59.999999999') + self.assertEqual(right, exp) + + left = empty_idx._maybe_cast_slice_bound('2015-01-02', 'left', 'loc') + exp = Timestamp('2015-01-02 00:00:00') + self.assertEqual(left, exp) diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py new file mode 100644 index 0000000000000..5b6bcffe71856 --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_indexing.py @@ -0,0 +1,244 @@ +import numpy as np + +import pandas as pd +import pandas.util.testing as tm +import pandas.compat as compat +from pandas import notnull, Index, DatetimeIndex, datetime, date_range + + +class TestDatetimeIndex(tm.TestCase): + _multiprocess_can_split_ = True + + def test_where_other(self): + + # other is ndarray or Index + i = pd.date_range('20130101', periods=3, tz='US/Eastern') + + for arr in [np.nan, pd.NaT]: + result = i.where(notnull(i), other=np.nan) + expected = i + tm.assert_index_equal(result, expected) + + i2 = i.copy() + i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist()) + result = i.where(notnull(i2), i2) + tm.assert_index_equal(result, i2) + + i2 = i.copy() + i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist()) + result = i.where(notnull(i2), i2.values) + tm.assert_index_equal(result, i2) + + def test_where_tz(self): + i = pd.date_range('20130101', periods=3, tz='US/Eastern') + result = i.where(notnull(i)) + expected = i + tm.assert_index_equal(result, expected) + + i2 = i.copy() + i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist()) + result = i.where(notnull(i2)) + expected = i2 + tm.assert_index_equal(result, expected) + + def test_insert(self): + idx = DatetimeIndex( + ['2000-01-04', '2000-01-01', '2000-01-02'], name='idx') + + result = idx.insert(2, datetime(2000, 1, 5)) + exp = DatetimeIndex(['2000-01-04', '2000-01-01', '2000-01-05', + '2000-01-02'], name='idx') + tm.assert_index_equal(result, exp) + + # insertion of non-datetime should coerce to object index + result = idx.insert(1, 'inserted') + expected = Index([datetime(2000, 1, 4), 'inserted', + datetime(2000, 1, 1), + datetime(2000, 1, 2)], name='idx') + self.assertNotIsInstance(result, DatetimeIndex) + tm.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + + idx = date_range('1/1/2000', periods=3, freq='M', name='idx') + + # preserve freq + expected_0 = DatetimeIndex(['1999-12-31', '2000-01-31', '2000-02-29', + '2000-03-31'], name='idx', freq='M') + expected_3 = DatetimeIndex(['2000-01-31', '2000-02-29', '2000-03-31', + '2000-04-30'], name='idx', freq='M') + + # reset freq to None + expected_1_nofreq = DatetimeIndex(['2000-01-31', '2000-01-31', + '2000-02-29', + '2000-03-31'], name='idx', + freq=None) + expected_3_nofreq = DatetimeIndex(['2000-01-31', '2000-02-29', + '2000-03-31', + '2000-01-02'], name='idx', + freq=None) + + cases = [(0, datetime(1999, 12, 31), expected_0), + (-3, datetime(1999, 12, 31), expected_0), + (3, datetime(2000, 4, 30), expected_3), + (1, datetime(2000, 1, 31), expected_1_nofreq), + (3, datetime(2000, 1, 2), expected_3_nofreq)] + + for n, d, expected in cases: + result = idx.insert(n, d) + tm.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + self.assertEqual(result.freq, expected.freq) + + # reset freq to None + result = idx.insert(3, datetime(2000, 1, 2)) + expected = DatetimeIndex(['2000-01-31', '2000-02-29', '2000-03-31', + '2000-01-02'], name='idx', freq=None) + tm.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + self.assertTrue(result.freq is None) + + # GH 7299 + tm._skip_if_no_pytz() + import pytz + + idx = date_range('1/1/2000', periods=3, freq='D', tz='Asia/Tokyo', + name='idx') + with tm.assertRaises(ValueError): + result = idx.insert(3, pd.Timestamp('2000-01-04')) + with tm.assertRaises(ValueError): + result = idx.insert(3, datetime(2000, 1, 4)) + with tm.assertRaises(ValueError): + result = idx.insert(3, pd.Timestamp('2000-01-04', tz='US/Eastern')) + with tm.assertRaises(ValueError): + result = idx.insert(3, + datetime(2000, 1, 4, + tzinfo=pytz.timezone('US/Eastern'))) + + for tz in ['US/Pacific', 'Asia/Singapore']: + idx = date_range('1/1/2000 09:00', periods=6, freq='H', tz=tz, + name='idx') + # preserve freq + expected = date_range('1/1/2000 09:00', periods=7, freq='H', tz=tz, + name='idx') + for d in [pd.Timestamp('2000-01-01 15:00', tz=tz), + pytz.timezone(tz).localize(datetime(2000, 1, 1, 15))]: + + result = idx.insert(6, d) + tm.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + self.assertEqual(result.freq, expected.freq) + self.assertEqual(result.tz, expected.tz) + + expected = DatetimeIndex(['2000-01-01 09:00', '2000-01-01 10:00', + '2000-01-01 11:00', + '2000-01-01 12:00', '2000-01-01 13:00', + '2000-01-01 14:00', + '2000-01-01 10:00'], name='idx', + tz=tz, freq=None) + # reset freq to None + for d in [pd.Timestamp('2000-01-01 10:00', tz=tz), + pytz.timezone(tz).localize(datetime(2000, 1, 1, 10))]: + result = idx.insert(6, d) + tm.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + self.assertTrue(result.freq is None) + self.assertEqual(result.tz, expected.tz) + + def test_delete(self): + idx = date_range(start='2000-01-01', periods=5, freq='M', name='idx') + + # prserve freq + expected_0 = date_range(start='2000-02-01', periods=4, freq='M', + name='idx') + expected_4 = date_range(start='2000-01-01', periods=4, freq='M', + name='idx') + + # reset freq to None + expected_1 = DatetimeIndex(['2000-01-31', '2000-03-31', '2000-04-30', + '2000-05-31'], freq=None, name='idx') + + cases = {0: expected_0, + -5: expected_0, + -1: expected_4, + 4: expected_4, + 1: expected_1} + for n, expected in compat.iteritems(cases): + result = idx.delete(n) + tm.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + self.assertEqual(result.freq, expected.freq) + + with tm.assertRaises((IndexError, ValueError)): + # either depeidnig on numpy version + result = idx.delete(5) + + for tz in [None, 'Asia/Tokyo', 'US/Pacific']: + idx = date_range(start='2000-01-01 09:00', periods=10, freq='H', + name='idx', tz=tz) + + expected = date_range(start='2000-01-01 10:00', periods=9, + freq='H', name='idx', tz=tz) + result = idx.delete(0) + tm.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + self.assertEqual(result.freqstr, 'H') + self.assertEqual(result.tz, expected.tz) + + expected = date_range(start='2000-01-01 09:00', periods=9, + freq='H', name='idx', tz=tz) + result = idx.delete(-1) + tm.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + self.assertEqual(result.freqstr, 'H') + self.assertEqual(result.tz, expected.tz) + + def test_delete_slice(self): + idx = date_range(start='2000-01-01', periods=10, freq='D', name='idx') + + # prserve freq + expected_0_2 = date_range(start='2000-01-04', periods=7, freq='D', + name='idx') + expected_7_9 = date_range(start='2000-01-01', periods=7, freq='D', + name='idx') + + # reset freq to None + expected_3_5 = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03', + '2000-01-07', '2000-01-08', '2000-01-09', + '2000-01-10'], freq=None, name='idx') + + cases = {(0, 1, 2): expected_0_2, + (7, 8, 9): expected_7_9, + (3, 4, 5): expected_3_5} + for n, expected in compat.iteritems(cases): + result = idx.delete(n) + tm.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + self.assertEqual(result.freq, expected.freq) + + result = idx.delete(slice(n[0], n[-1] + 1)) + tm.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + self.assertEqual(result.freq, expected.freq) + + for tz in [None, 'Asia/Tokyo', 'US/Pacific']: + ts = pd.Series(1, index=pd.date_range( + '2000-01-01 09:00', periods=10, freq='H', name='idx', tz=tz)) + # preserve freq + result = ts.drop(ts.index[:5]).index + expected = pd.date_range('2000-01-01 14:00', periods=5, freq='H', + name='idx', tz=tz) + tm.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + self.assertEqual(result.freq, expected.freq) + self.assertEqual(result.tz, expected.tz) + + # reset freq to None + result = ts.drop(ts.index[[1, 3, 5, 7, 9]]).index + expected = DatetimeIndex(['2000-01-01 09:00', '2000-01-01 11:00', + '2000-01-01 13:00', + '2000-01-01 15:00', '2000-01-01 17:00'], + freq=None, name='idx', tz=tz) + tm.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + self.assertEqual(result.freq, expected.freq) + self.assertEqual(result.tz, expected.tz) diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py new file mode 100644 index 0000000000000..3dfe95fa77b85 --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_misc.py @@ -0,0 +1,333 @@ +import numpy as np + +import pandas.lib as lib +import pandas.util.testing as tm +from pandas import Float64Index, date_range, Timestamp +from pandas import (Index, DatetimeIndex, datetime, offsets, to_datetime, + Series, DataFrame) + + +class TestDateTimeIndexToJulianDate(tm.TestCase): + + def test_1700(self): + r1 = Float64Index([2345897.5, 2345898.5, 2345899.5, 2345900.5, + 2345901.5]) + r2 = date_range(start=Timestamp('1710-10-01'), periods=5, + freq='D').to_julian_date() + self.assertIsInstance(r2, Float64Index) + tm.assert_index_equal(r1, r2) + + def test_2000(self): + r1 = Float64Index([2451601.5, 2451602.5, 2451603.5, 2451604.5, + 2451605.5]) + r2 = date_range(start=Timestamp('2000-02-27'), periods=5, + freq='D').to_julian_date() + self.assertIsInstance(r2, Float64Index) + tm.assert_index_equal(r1, r2) + + def test_hour(self): + r1 = Float64Index( + [2451601.5, 2451601.5416666666666666, 2451601.5833333333333333, + 2451601.625, 2451601.6666666666666666]) + r2 = date_range(start=Timestamp('2000-02-27'), periods=5, + freq='H').to_julian_date() + self.assertIsInstance(r2, Float64Index) + tm.assert_index_equal(r1, r2) + + def test_minute(self): + r1 = Float64Index( + [2451601.5, 2451601.5006944444444444, 2451601.5013888888888888, + 2451601.5020833333333333, 2451601.5027777777777777]) + r2 = date_range(start=Timestamp('2000-02-27'), periods=5, + freq='T').to_julian_date() + self.assertIsInstance(r2, Float64Index) + tm.assert_index_equal(r1, r2) + + def test_second(self): + r1 = Float64Index( + [2451601.5, 2451601.500011574074074, 2451601.5000231481481481, + 2451601.5000347222222222, 2451601.5000462962962962]) + r2 = date_range(start=Timestamp('2000-02-27'), periods=5, + freq='S').to_julian_date() + self.assertIsInstance(r2, Float64Index) + tm.assert_index_equal(r1, r2) + + +class TestTimeSeries(tm.TestCase): + _multiprocess_can_split_ = True + + def test_pass_datetimeindex_to_index(self): + # Bugs in #1396 + rng = date_range('1/1/2000', '3/1/2000') + idx = Index(rng, dtype=object) + + expected = Index(rng.to_pydatetime(), dtype=object) + + self.assert_numpy_array_equal(idx.values, expected.values) + + +class TestDatetime64(tm.TestCase): + + def test_datetimeindex_accessors(self): + dti = DatetimeIndex(freq='D', start=datetime(1998, 1, 1), periods=365) + + self.assertEqual(dti.year[0], 1998) + self.assertEqual(dti.month[0], 1) + self.assertEqual(dti.day[0], 1) + self.assertEqual(dti.hour[0], 0) + self.assertEqual(dti.minute[0], 0) + self.assertEqual(dti.second[0], 0) + self.assertEqual(dti.microsecond[0], 0) + self.assertEqual(dti.dayofweek[0], 3) + + self.assertEqual(dti.dayofyear[0], 1) + self.assertEqual(dti.dayofyear[120], 121) + + self.assertEqual(dti.weekofyear[0], 1) + self.assertEqual(dti.weekofyear[120], 18) + + self.assertEqual(dti.quarter[0], 1) + self.assertEqual(dti.quarter[120], 2) + + self.assertEqual(dti.days_in_month[0], 31) + self.assertEqual(dti.days_in_month[90], 30) + + self.assertEqual(dti.is_month_start[0], True) + self.assertEqual(dti.is_month_start[1], False) + self.assertEqual(dti.is_month_start[31], True) + self.assertEqual(dti.is_quarter_start[0], True) + self.assertEqual(dti.is_quarter_start[90], True) + self.assertEqual(dti.is_year_start[0], True) + self.assertEqual(dti.is_year_start[364], False) + self.assertEqual(dti.is_month_end[0], False) + self.assertEqual(dti.is_month_end[30], True) + self.assertEqual(dti.is_month_end[31], False) + self.assertEqual(dti.is_month_end[364], True) + self.assertEqual(dti.is_quarter_end[0], False) + self.assertEqual(dti.is_quarter_end[30], False) + self.assertEqual(dti.is_quarter_end[89], True) + self.assertEqual(dti.is_quarter_end[364], True) + self.assertEqual(dti.is_year_end[0], False) + self.assertEqual(dti.is_year_end[364], True) + + # GH 11128 + self.assertEqual(dti.weekday_name[4], u'Monday') + self.assertEqual(dti.weekday_name[5], u'Tuesday') + self.assertEqual(dti.weekday_name[6], u'Wednesday') + self.assertEqual(dti.weekday_name[7], u'Thursday') + self.assertEqual(dti.weekday_name[8], u'Friday') + self.assertEqual(dti.weekday_name[9], u'Saturday') + self.assertEqual(dti.weekday_name[10], u'Sunday') + + self.assertEqual(Timestamp('2016-04-04').weekday_name, u'Monday') + self.assertEqual(Timestamp('2016-04-05').weekday_name, u'Tuesday') + self.assertEqual(Timestamp('2016-04-06').weekday_name, u'Wednesday') + self.assertEqual(Timestamp('2016-04-07').weekday_name, u'Thursday') + self.assertEqual(Timestamp('2016-04-08').weekday_name, u'Friday') + self.assertEqual(Timestamp('2016-04-09').weekday_name, u'Saturday') + self.assertEqual(Timestamp('2016-04-10').weekday_name, u'Sunday') + + self.assertEqual(len(dti.year), 365) + self.assertEqual(len(dti.month), 365) + self.assertEqual(len(dti.day), 365) + self.assertEqual(len(dti.hour), 365) + self.assertEqual(len(dti.minute), 365) + self.assertEqual(len(dti.second), 365) + self.assertEqual(len(dti.microsecond), 365) + self.assertEqual(len(dti.dayofweek), 365) + self.assertEqual(len(dti.dayofyear), 365) + self.assertEqual(len(dti.weekofyear), 365) + self.assertEqual(len(dti.quarter), 365) + self.assertEqual(len(dti.is_month_start), 365) + self.assertEqual(len(dti.is_month_end), 365) + self.assertEqual(len(dti.is_quarter_start), 365) + self.assertEqual(len(dti.is_quarter_end), 365) + self.assertEqual(len(dti.is_year_start), 365) + self.assertEqual(len(dti.is_year_end), 365) + self.assertEqual(len(dti.weekday_name), 365) + + dti = DatetimeIndex(freq='BQ-FEB', start=datetime(1998, 1, 1), + periods=4) + + self.assertEqual(sum(dti.is_quarter_start), 0) + self.assertEqual(sum(dti.is_quarter_end), 4) + self.assertEqual(sum(dti.is_year_start), 0) + self.assertEqual(sum(dti.is_year_end), 1) + + # Ensure is_start/end accessors throw ValueError for CustomBusinessDay, + # CBD requires np >= 1.7 + bday_egypt = offsets.CustomBusinessDay(weekmask='Sun Mon Tue Wed Thu') + dti = date_range(datetime(2013, 4, 30), periods=5, freq=bday_egypt) + self.assertRaises(ValueError, lambda: dti.is_month_start) + + dti = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03']) + + self.assertEqual(dti.is_month_start[0], 1) + + tests = [ + (Timestamp('2013-06-01', freq='M').is_month_start, 1), + (Timestamp('2013-06-01', freq='BM').is_month_start, 0), + (Timestamp('2013-06-03', freq='M').is_month_start, 0), + (Timestamp('2013-06-03', freq='BM').is_month_start, 1), + (Timestamp('2013-02-28', freq='Q-FEB').is_month_end, 1), + (Timestamp('2013-02-28', freq='Q-FEB').is_quarter_end, 1), + (Timestamp('2013-02-28', freq='Q-FEB').is_year_end, 1), + (Timestamp('2013-03-01', freq='Q-FEB').is_month_start, 1), + (Timestamp('2013-03-01', freq='Q-FEB').is_quarter_start, 1), + (Timestamp('2013-03-01', freq='Q-FEB').is_year_start, 1), + (Timestamp('2013-03-31', freq='QS-FEB').is_month_end, 1), + (Timestamp('2013-03-31', freq='QS-FEB').is_quarter_end, 0), + (Timestamp('2013-03-31', freq='QS-FEB').is_year_end, 0), + (Timestamp('2013-02-01', freq='QS-FEB').is_month_start, 1), + (Timestamp('2013-02-01', freq='QS-FEB').is_quarter_start, 1), + (Timestamp('2013-02-01', freq='QS-FEB').is_year_start, 1), + (Timestamp('2013-06-30', freq='BQ').is_month_end, 0), + (Timestamp('2013-06-30', freq='BQ').is_quarter_end, 0), + (Timestamp('2013-06-30', freq='BQ').is_year_end, 0), + (Timestamp('2013-06-28', freq='BQ').is_month_end, 1), + (Timestamp('2013-06-28', freq='BQ').is_quarter_end, 1), + (Timestamp('2013-06-28', freq='BQ').is_year_end, 0), + (Timestamp('2013-06-30', freq='BQS-APR').is_month_end, 0), + (Timestamp('2013-06-30', freq='BQS-APR').is_quarter_end, 0), + (Timestamp('2013-06-30', freq='BQS-APR').is_year_end, 0), + (Timestamp('2013-06-28', freq='BQS-APR').is_month_end, 1), + (Timestamp('2013-06-28', freq='BQS-APR').is_quarter_end, 1), + (Timestamp('2013-03-29', freq='BQS-APR').is_year_end, 1), + (Timestamp('2013-11-01', freq='AS-NOV').is_year_start, 1), + (Timestamp('2013-10-31', freq='AS-NOV').is_year_end, 1), + (Timestamp('2012-02-01').days_in_month, 29), + (Timestamp('2013-02-01').days_in_month, 28)] + + for ts, value in tests: + self.assertEqual(ts, value) + + def test_datetimeindex_diff(self): + dti1 = DatetimeIndex(freq='Q-JAN', start=datetime(1997, 12, 31), + periods=100) + dti2 = DatetimeIndex(freq='Q-JAN', start=datetime(1997, 12, 31), + periods=98) + self.assertEqual(len(dti1.difference(dti2)), 2) + + def test_nanosecond_field(self): + dti = DatetimeIndex(np.arange(10)) + + self.assert_numpy_array_equal(dti.nanosecond, + np.arange(10, dtype=np.int32)) + + def test_datetimeindex_constructor(self): + arr = ['1/1/2005', '1/2/2005', 'Jn 3, 2005', '2005-01-04'] + self.assertRaises(Exception, DatetimeIndex, arr) + + arr = ['1/1/2005', '1/2/2005', '1/3/2005', '2005-01-04'] + idx1 = DatetimeIndex(arr) + + arr = [datetime(2005, 1, 1), '1/2/2005', '1/3/2005', '2005-01-04'] + idx2 = DatetimeIndex(arr) + + arr = [lib.Timestamp(datetime(2005, 1, 1)), '1/2/2005', '1/3/2005', + '2005-01-04'] + idx3 = DatetimeIndex(arr) + + arr = np.array(['1/1/2005', '1/2/2005', '1/3/2005', + '2005-01-04'], dtype='O') + idx4 = DatetimeIndex(arr) + + arr = to_datetime(['1/1/2005', '1/2/2005', '1/3/2005', '2005-01-04']) + idx5 = DatetimeIndex(arr) + + arr = to_datetime(['1/1/2005', '1/2/2005', 'Jan 3, 2005', '2005-01-04' + ]) + idx6 = DatetimeIndex(arr) + + idx7 = DatetimeIndex(['12/05/2007', '25/01/2008'], dayfirst=True) + idx8 = DatetimeIndex(['2007/05/12', '2008/01/25'], dayfirst=False, + yearfirst=True) + tm.assert_index_equal(idx7, idx8) + + for other in [idx2, idx3, idx4, idx5, idx6]: + self.assertTrue((idx1.values == other.values).all()) + + sdate = datetime(1999, 12, 25) + edate = datetime(2000, 1, 1) + idx = DatetimeIndex(start=sdate, freq='1B', periods=20) + self.assertEqual(len(idx), 20) + self.assertEqual(idx[0], sdate + 0 * offsets.BDay()) + self.assertEqual(idx.freq, 'B') + + idx = DatetimeIndex(end=edate, freq=('D', 5), periods=20) + self.assertEqual(len(idx), 20) + self.assertEqual(idx[-1], edate) + self.assertEqual(idx.freq, '5D') + + idx1 = DatetimeIndex(start=sdate, end=edate, freq='W-SUN') + idx2 = DatetimeIndex(start=sdate, end=edate, + freq=offsets.Week(weekday=6)) + self.assertEqual(len(idx1), len(idx2)) + self.assertEqual(idx1.offset, idx2.offset) + + idx1 = DatetimeIndex(start=sdate, end=edate, freq='QS') + idx2 = DatetimeIndex(start=sdate, end=edate, + freq=offsets.QuarterBegin(startingMonth=1)) + self.assertEqual(len(idx1), len(idx2)) + self.assertEqual(idx1.offset, idx2.offset) + + idx1 = DatetimeIndex(start=sdate, end=edate, freq='BQ') + idx2 = DatetimeIndex(start=sdate, end=edate, + freq=offsets.BQuarterEnd(startingMonth=12)) + self.assertEqual(len(idx1), len(idx2)) + self.assertEqual(idx1.offset, idx2.offset) + + def test_dayfirst(self): + # GH 5917 + arr = ['10/02/2014', '11/02/2014', '12/02/2014'] + expected = DatetimeIndex([datetime(2014, 2, 10), datetime(2014, 2, 11), + datetime(2014, 2, 12)]) + idx1 = DatetimeIndex(arr, dayfirst=True) + idx2 = DatetimeIndex(np.array(arr), dayfirst=True) + idx3 = to_datetime(arr, dayfirst=True) + idx4 = to_datetime(np.array(arr), dayfirst=True) + idx5 = DatetimeIndex(Index(arr), dayfirst=True) + idx6 = DatetimeIndex(Series(arr), dayfirst=True) + tm.assert_index_equal(expected, idx1) + tm.assert_index_equal(expected, idx2) + tm.assert_index_equal(expected, idx3) + tm.assert_index_equal(expected, idx4) + tm.assert_index_equal(expected, idx5) + tm.assert_index_equal(expected, idx6) + + def test_dti_set_index_reindex(self): + # GH 6631 + df = DataFrame(np.random.random(6)) + idx1 = date_range('2011/01/01', periods=6, freq='M', tz='US/Eastern') + idx2 = date_range('2013', periods=6, freq='A', tz='Asia/Tokyo') + + df = df.set_index(idx1) + tm.assert_index_equal(df.index, idx1) + df = df.reindex(idx2) + tm.assert_index_equal(df.index, idx2) + + # 11314 + # with tz + index = date_range(datetime(2015, 10, 1), + datetime(2015, 10, 1, 23), + freq='H', tz='US/Eastern') + df = DataFrame(np.random.randn(24, 1), columns=['a'], index=index) + new_index = date_range(datetime(2015, 10, 2), + datetime(2015, 10, 2, 23), + freq='H', tz='US/Eastern') + + # TODO: unused? + result = df.set_index(new_index) # noqa + + self.assertEqual(new_index.freq, index.freq) + + def test_datetimeindex_union_join_empty(self): + dti = DatetimeIndex(start='1/1/2001', end='2/1/2001', freq='D') + empty = Index([]) + + result = dti.union(empty) + tm.assertIsInstance(result, DatetimeIndex) + self.assertIs(result, result) + + result = dti.join(empty) + tm.assertIsInstance(result, DatetimeIndex) diff --git a/pandas/tests/indexes/datetimes/test_missing.py b/pandas/tests/indexes/datetimes/test_missing.py new file mode 100644 index 0000000000000..5c408d5300cdc --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_missing.py @@ -0,0 +1,51 @@ +import pandas as pd +import pandas.util.testing as tm + + +class TestDatetimeIndex(tm.TestCase): + _multiprocess_can_split_ = True + + def test_fillna_datetime64(self): + # GH 11343 + for tz in ['US/Eastern', 'Asia/Tokyo']: + idx = pd.DatetimeIndex(['2011-01-01 09:00', pd.NaT, + '2011-01-01 11:00']) + + exp = pd.DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', + '2011-01-01 11:00']) + self.assert_index_equal( + idx.fillna(pd.Timestamp('2011-01-01 10:00')), exp) + + # tz mismatch + exp = pd.Index([pd.Timestamp('2011-01-01 09:00'), + pd.Timestamp('2011-01-01 10:00', tz=tz), + pd.Timestamp('2011-01-01 11:00')], dtype=object) + self.assert_index_equal( + idx.fillna(pd.Timestamp('2011-01-01 10:00', tz=tz)), exp) + + # object + exp = pd.Index([pd.Timestamp('2011-01-01 09:00'), 'x', + pd.Timestamp('2011-01-01 11:00')], dtype=object) + self.assert_index_equal(idx.fillna('x'), exp) + + idx = pd.DatetimeIndex(['2011-01-01 09:00', pd.NaT, + '2011-01-01 11:00'], tz=tz) + + exp = pd.DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', + '2011-01-01 11:00'], tz=tz) + self.assert_index_equal( + idx.fillna(pd.Timestamp('2011-01-01 10:00', tz=tz)), exp) + + exp = pd.Index([pd.Timestamp('2011-01-01 09:00', tz=tz), + pd.Timestamp('2011-01-01 10:00'), + pd.Timestamp('2011-01-01 11:00', tz=tz)], + dtype=object) + self.assert_index_equal( + idx.fillna(pd.Timestamp('2011-01-01 10:00')), exp) + + # object + exp = pd.Index([pd.Timestamp('2011-01-01 09:00', tz=tz), + 'x', + pd.Timestamp('2011-01-01 11:00', tz=tz)], + dtype=object) + self.assert_index_equal(idx.fillna('x'), exp) diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py new file mode 100644 index 0000000000000..c25cd6a3fa90e --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -0,0 +1,1073 @@ +import warnings +import numpy as np +from datetime import timedelta + +import pandas as pd +import pandas.tslib as tslib +import pandas.util.testing as tm +from pandas.core.common import PerformanceWarning +from pandas import (DatetimeIndex, PeriodIndex, Series, Timestamp, Timedelta, + date_range, TimedeltaIndex, _np_version_under1p10, Index, + datetime, Float64Index) + +from pandas.tests.test_base import Ops + + +class TestDatetimeIndexOps(Ops): + tz = [None, 'UTC', 'Asia/Tokyo', 'US/Eastern', 'dateutil/Asia/Singapore', + 'dateutil/US/Pacific'] + + def setUp(self): + super(TestDatetimeIndexOps, self).setUp() + mask = lambda x: (isinstance(x, DatetimeIndex) or + isinstance(x, PeriodIndex)) + self.is_valid_objs = [o for o in self.objs if mask(o)] + self.not_valid_objs = [o for o in self.objs if not mask(o)] + + def test_ops_properties(self): + self.check_ops_properties( + ['year', 'month', 'day', 'hour', 'minute', 'second', 'weekofyear', + 'week', 'dayofweek', 'dayofyear', 'quarter']) + self.check_ops_properties(['date', 'time', 'microsecond', 'nanosecond', + 'is_month_start', 'is_month_end', + 'is_quarter_start', + 'is_quarter_end', 'is_year_start', + 'is_year_end', 'weekday_name'], + lambda x: isinstance(x, DatetimeIndex)) + + def test_ops_properties_basic(self): + + # sanity check that the behavior didn't change + # GH7206 + for op in ['year', 'day', 'second', 'weekday']: + self.assertRaises(TypeError, lambda x: getattr(self.dt_series, op)) + + # attribute access should still work! + s = Series(dict(year=2000, month=1, day=10)) + self.assertEqual(s.year, 2000) + self.assertEqual(s.month, 1) + self.assertEqual(s.day, 10) + self.assertRaises(AttributeError, lambda: s.weekday) + + def test_asobject_tolist(self): + idx = pd.date_range(start='2013-01-01', periods=4, freq='M', + name='idx') + expected_list = [Timestamp('2013-01-31'), + Timestamp('2013-02-28'), + Timestamp('2013-03-31'), + Timestamp('2013-04-30')] + expected = pd.Index(expected_list, dtype=object, name='idx') + result = idx.asobject + self.assertTrue(isinstance(result, Index)) + + self.assertEqual(result.dtype, object) + self.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + self.assertEqual(idx.tolist(), expected_list) + + idx = pd.date_range(start='2013-01-01', periods=4, freq='M', + name='idx', tz='Asia/Tokyo') + expected_list = [Timestamp('2013-01-31', tz='Asia/Tokyo'), + Timestamp('2013-02-28', tz='Asia/Tokyo'), + Timestamp('2013-03-31', tz='Asia/Tokyo'), + Timestamp('2013-04-30', tz='Asia/Tokyo')] + expected = pd.Index(expected_list, dtype=object, name='idx') + result = idx.asobject + self.assertTrue(isinstance(result, Index)) + self.assertEqual(result.dtype, object) + self.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + self.assertEqual(idx.tolist(), expected_list) + + idx = DatetimeIndex([datetime(2013, 1, 1), datetime(2013, 1, 2), + pd.NaT, datetime(2013, 1, 4)], name='idx') + expected_list = [Timestamp('2013-01-01'), + Timestamp('2013-01-02'), pd.NaT, + Timestamp('2013-01-04')] + expected = pd.Index(expected_list, dtype=object, name='idx') + result = idx.asobject + self.assertTrue(isinstance(result, Index)) + self.assertEqual(result.dtype, object) + self.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + self.assertEqual(idx.tolist(), expected_list) + + def test_minmax(self): + for tz in self.tz: + # monotonic + idx1 = pd.DatetimeIndex(['2011-01-01', '2011-01-02', + '2011-01-03'], tz=tz) + self.assertTrue(idx1.is_monotonic) + + # non-monotonic + idx2 = pd.DatetimeIndex(['2011-01-01', pd.NaT, '2011-01-03', + '2011-01-02', pd.NaT], tz=tz) + self.assertFalse(idx2.is_monotonic) + + for idx in [idx1, idx2]: + self.assertEqual(idx.min(), Timestamp('2011-01-01', tz=tz)) + self.assertEqual(idx.max(), Timestamp('2011-01-03', tz=tz)) + self.assertEqual(idx.argmin(), 0) + self.assertEqual(idx.argmax(), 2) + + for op in ['min', 'max']: + # Return NaT + obj = DatetimeIndex([]) + self.assertTrue(pd.isnull(getattr(obj, op)())) + + obj = DatetimeIndex([pd.NaT]) + self.assertTrue(pd.isnull(getattr(obj, op)())) + + obj = DatetimeIndex([pd.NaT, pd.NaT, pd.NaT]) + self.assertTrue(pd.isnull(getattr(obj, op)())) + + def test_numpy_minmax(self): + dr = pd.date_range(start='2016-01-15', end='2016-01-20') + + self.assertEqual(np.min(dr), + Timestamp('2016-01-15 00:00:00', freq='D')) + self.assertEqual(np.max(dr), + Timestamp('2016-01-20 00:00:00', freq='D')) + + errmsg = "the 'out' parameter is not supported" + tm.assertRaisesRegexp(ValueError, errmsg, np.min, dr, out=0) + tm.assertRaisesRegexp(ValueError, errmsg, np.max, dr, out=0) + + self.assertEqual(np.argmin(dr), 0) + self.assertEqual(np.argmax(dr), 5) + + if not _np_version_under1p10: + errmsg = "the 'out' parameter is not supported" + tm.assertRaisesRegexp(ValueError, errmsg, np.argmin, dr, out=0) + tm.assertRaisesRegexp(ValueError, errmsg, np.argmax, dr, out=0) + + def test_round(self): + for tz in self.tz: + rng = pd.date_range(start='2016-01-01', periods=5, + freq='30Min', tz=tz) + elt = rng[1] + + expected_rng = DatetimeIndex([ + Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'), + Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'), + Timestamp('2016-01-01 01:00:00', tz=tz, freq='30T'), + Timestamp('2016-01-01 02:00:00', tz=tz, freq='30T'), + Timestamp('2016-01-01 02:00:00', tz=tz, freq='30T'), + ]) + expected_elt = expected_rng[1] + + tm.assert_index_equal(rng.round(freq='H'), expected_rng) + self.assertEqual(elt.round(freq='H'), expected_elt) + + msg = pd.tseries.frequencies._INVALID_FREQ_ERROR + with tm.assertRaisesRegexp(ValueError, msg): + rng.round(freq='foo') + with tm.assertRaisesRegexp(ValueError, msg): + elt.round(freq='foo') + + msg = " is a non-fixed frequency" + tm.assertRaisesRegexp(ValueError, msg, rng.round, freq='M') + tm.assertRaisesRegexp(ValueError, msg, elt.round, freq='M') + + def test_repeat_range(self): + rng = date_range('1/1/2000', '1/1/2001') + + result = rng.repeat(5) + self.assertIsNone(result.freq) + self.assertEqual(len(result), 5 * len(rng)) + + for tz in self.tz: + index = pd.date_range('2001-01-01', periods=2, freq='D', tz=tz) + exp = pd.DatetimeIndex(['2001-01-01', '2001-01-01', + '2001-01-02', '2001-01-02'], tz=tz) + for res in [index.repeat(2), np.repeat(index, 2)]: + tm.assert_index_equal(res, exp) + self.assertIsNone(res.freq) + + index = pd.date_range('2001-01-01', periods=2, freq='2D', tz=tz) + exp = pd.DatetimeIndex(['2001-01-01', '2001-01-01', + '2001-01-03', '2001-01-03'], tz=tz) + for res in [index.repeat(2), np.repeat(index, 2)]: + tm.assert_index_equal(res, exp) + self.assertIsNone(res.freq) + + index = pd.DatetimeIndex(['2001-01-01', 'NaT', '2003-01-01'], + tz=tz) + exp = pd.DatetimeIndex(['2001-01-01', '2001-01-01', '2001-01-01', + 'NaT', 'NaT', 'NaT', + '2003-01-01', '2003-01-01', '2003-01-01'], + tz=tz) + for res in [index.repeat(3), np.repeat(index, 3)]: + tm.assert_index_equal(res, exp) + self.assertIsNone(res.freq) + + def test_repeat(self): + reps = 2 + msg = "the 'axis' parameter is not supported" + + for tz in self.tz: + rng = pd.date_range(start='2016-01-01', periods=2, + freq='30Min', tz=tz) + + expected_rng = DatetimeIndex([ + Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'), + Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'), + Timestamp('2016-01-01 00:30:00', tz=tz, freq='30T'), + Timestamp('2016-01-01 00:30:00', tz=tz, freq='30T'), + ]) + + res = rng.repeat(reps) + tm.assert_index_equal(res, expected_rng) + self.assertIsNone(res.freq) + + tm.assert_index_equal(np.repeat(rng, reps), expected_rng) + tm.assertRaisesRegexp(ValueError, msg, np.repeat, + rng, reps, axis=1) + + def test_representation(self): + + idx = [] + idx.append(DatetimeIndex([], freq='D')) + idx.append(DatetimeIndex(['2011-01-01'], freq='D')) + idx.append(DatetimeIndex(['2011-01-01', '2011-01-02'], freq='D')) + idx.append(DatetimeIndex( + ['2011-01-01', '2011-01-02', '2011-01-03'], freq='D')) + idx.append(DatetimeIndex( + ['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00' + ], freq='H', tz='Asia/Tokyo')) + idx.append(DatetimeIndex( + ['2011-01-01 09:00', '2011-01-01 10:00', pd.NaT], tz='US/Eastern')) + idx.append(DatetimeIndex( + ['2011-01-01 09:00', '2011-01-01 10:00', pd.NaT], tz='UTC')) + + exp = [] + exp.append("""DatetimeIndex([], dtype='datetime64[ns]', freq='D')""") + exp.append("DatetimeIndex(['2011-01-01'], dtype='datetime64[ns]', " + "freq='D')") + exp.append("DatetimeIndex(['2011-01-01', '2011-01-02'], " + "dtype='datetime64[ns]', freq='D')") + exp.append("DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], " + "dtype='datetime64[ns]', freq='D')") + exp.append("DatetimeIndex(['2011-01-01 09:00:00+09:00', " + "'2011-01-01 10:00:00+09:00', '2011-01-01 11:00:00+09:00']" + ", dtype='datetime64[ns, Asia/Tokyo]', freq='H')") + exp.append("DatetimeIndex(['2011-01-01 09:00:00-05:00', " + "'2011-01-01 10:00:00-05:00', 'NaT'], " + "dtype='datetime64[ns, US/Eastern]', freq=None)") + exp.append("DatetimeIndex(['2011-01-01 09:00:00+00:00', " + "'2011-01-01 10:00:00+00:00', 'NaT'], " + "dtype='datetime64[ns, UTC]', freq=None)""") + + with pd.option_context('display.width', 300): + for indx, expected in zip(idx, exp): + for func in ['__repr__', '__unicode__', '__str__']: + result = getattr(indx, func)() + self.assertEqual(result, expected) + + def test_representation_to_series(self): + idx1 = DatetimeIndex([], freq='D') + idx2 = DatetimeIndex(['2011-01-01'], freq='D') + idx3 = DatetimeIndex(['2011-01-01', '2011-01-02'], freq='D') + idx4 = DatetimeIndex( + ['2011-01-01', '2011-01-02', '2011-01-03'], freq='D') + idx5 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', + '2011-01-01 11:00'], freq='H', tz='Asia/Tokyo') + idx6 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', pd.NaT], + tz='US/Eastern') + idx7 = DatetimeIndex(['2011-01-01 09:00', '2011-01-02 10:15']) + + exp1 = """Series([], dtype: datetime64[ns])""" + + exp2 = """0 2011-01-01 +dtype: datetime64[ns]""" + + exp3 = """0 2011-01-01 +1 2011-01-02 +dtype: datetime64[ns]""" + + exp4 = """0 2011-01-01 +1 2011-01-02 +2 2011-01-03 +dtype: datetime64[ns]""" + + exp5 = """0 2011-01-01 09:00:00+09:00 +1 2011-01-01 10:00:00+09:00 +2 2011-01-01 11:00:00+09:00 +dtype: datetime64[ns, Asia/Tokyo]""" + + exp6 = """0 2011-01-01 09:00:00-05:00 +1 2011-01-01 10:00:00-05:00 +2 NaT +dtype: datetime64[ns, US/Eastern]""" + + exp7 = """0 2011-01-01 09:00:00 +1 2011-01-02 10:15:00 +dtype: datetime64[ns]""" + + with pd.option_context('display.width', 300): + for idx, expected in zip([idx1, idx2, idx3, idx4, + idx5, idx6, idx7], + [exp1, exp2, exp3, exp4, + exp5, exp6, exp7]): + result = repr(Series(idx)) + self.assertEqual(result, expected) + + def test_summary(self): + # GH9116 + idx1 = DatetimeIndex([], freq='D') + idx2 = DatetimeIndex(['2011-01-01'], freq='D') + idx3 = DatetimeIndex(['2011-01-01', '2011-01-02'], freq='D') + idx4 = DatetimeIndex( + ['2011-01-01', '2011-01-02', '2011-01-03'], freq='D') + idx5 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', + '2011-01-01 11:00'], + freq='H', tz='Asia/Tokyo') + idx6 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', pd.NaT], + tz='US/Eastern') + + exp1 = """DatetimeIndex: 0 entries +Freq: D""" + + exp2 = """DatetimeIndex: 1 entries, 2011-01-01 to 2011-01-01 +Freq: D""" + + exp3 = """DatetimeIndex: 2 entries, 2011-01-01 to 2011-01-02 +Freq: D""" + + exp4 = """DatetimeIndex: 3 entries, 2011-01-01 to 2011-01-03 +Freq: D""" + + exp5 = ("DatetimeIndex: 3 entries, 2011-01-01 09:00:00+09:00 " + "to 2011-01-01 11:00:00+09:00\n" + "Freq: H") + + exp6 = """DatetimeIndex: 3 entries, 2011-01-01 09:00:00-05:00 to NaT""" + + for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, idx6], + [exp1, exp2, exp3, exp4, exp5, exp6]): + result = idx.summary() + self.assertEqual(result, expected) + + def test_resolution(self): + for freq, expected in zip(['A', 'Q', 'M', 'D', 'H', 'T', + 'S', 'L', 'U'], + ['day', 'day', 'day', 'day', 'hour', + 'minute', 'second', 'millisecond', + 'microsecond']): + for tz in self.tz: + idx = pd.date_range(start='2013-04-01', periods=30, freq=freq, + tz=tz) + self.assertEqual(idx.resolution, expected) + + def test_union(self): + for tz in self.tz: + # union + rng1 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) + other1 = pd.date_range('1/6/2000', freq='D', periods=5, tz=tz) + expected1 = pd.date_range('1/1/2000', freq='D', periods=10, tz=tz) + + rng2 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) + other2 = pd.date_range('1/4/2000', freq='D', periods=5, tz=tz) + expected2 = pd.date_range('1/1/2000', freq='D', periods=8, tz=tz) + + rng3 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) + other3 = pd.DatetimeIndex([], tz=tz) + expected3 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) + + for rng, other, expected in [(rng1, other1, expected1), + (rng2, other2, expected2), + (rng3, other3, expected3)]: + + result_union = rng.union(other) + tm.assert_index_equal(result_union, expected) + + def test_add_iadd(self): + for tz in self.tz: + + # offset + offsets = [pd.offsets.Hour(2), timedelta(hours=2), + np.timedelta64(2, 'h'), Timedelta(hours=2)] + + for delta in offsets: + rng = pd.date_range('2000-01-01', '2000-02-01', tz=tz) + result = rng + delta + expected = pd.date_range('2000-01-01 02:00', + '2000-02-01 02:00', tz=tz) + tm.assert_index_equal(result, expected) + rng += delta + tm.assert_index_equal(rng, expected) + + # int + rng = pd.date_range('2000-01-01 09:00', freq='H', periods=10, + tz=tz) + result = rng + 1 + expected = pd.date_range('2000-01-01 10:00', freq='H', periods=10, + tz=tz) + tm.assert_index_equal(result, expected) + rng += 1 + tm.assert_index_equal(rng, expected) + + idx = DatetimeIndex(['2011-01-01', '2011-01-02']) + msg = "cannot add a datelike to a DatetimeIndex" + with tm.assertRaisesRegexp(TypeError, msg): + idx + Timestamp('2011-01-01') + + with tm.assertRaisesRegexp(TypeError, msg): + Timestamp('2011-01-01') + idx + + def test_add_dti_dti(self): + # previously performed setop (deprecated in 0.16.0), now raises + # TypeError (GH14164) + + dti = date_range('20130101', periods=3) + dti_tz = date_range('20130101', periods=3).tz_localize('US/Eastern') + + with tm.assertRaises(TypeError): + dti + dti + + with tm.assertRaises(TypeError): + dti_tz + dti_tz + + with tm.assertRaises(TypeError): + dti_tz + dti + + with tm.assertRaises(TypeError): + dti + dti_tz + + def test_difference(self): + for tz in self.tz: + # diff + rng1 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) + other1 = pd.date_range('1/6/2000', freq='D', periods=5, tz=tz) + expected1 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) + + rng2 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) + other2 = pd.date_range('1/4/2000', freq='D', periods=5, tz=tz) + expected2 = pd.date_range('1/1/2000', freq='D', periods=3, tz=tz) + + rng3 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) + other3 = pd.DatetimeIndex([], tz=tz) + expected3 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) + + for rng, other, expected in [(rng1, other1, expected1), + (rng2, other2, expected2), + (rng3, other3, expected3)]: + result_diff = rng.difference(other) + tm.assert_index_equal(result_diff, expected) + + def test_sub_isub(self): + for tz in self.tz: + + # offset + offsets = [pd.offsets.Hour(2), timedelta(hours=2), + np.timedelta64(2, 'h'), Timedelta(hours=2)] + + for delta in offsets: + rng = pd.date_range('2000-01-01', '2000-02-01', tz=tz) + expected = pd.date_range('1999-12-31 22:00', + '2000-01-31 22:00', tz=tz) + + result = rng - delta + tm.assert_index_equal(result, expected) + rng -= delta + tm.assert_index_equal(rng, expected) + + # int + rng = pd.date_range('2000-01-01 09:00', freq='H', periods=10, + tz=tz) + result = rng - 1 + expected = pd.date_range('2000-01-01 08:00', freq='H', periods=10, + tz=tz) + tm.assert_index_equal(result, expected) + rng -= 1 + tm.assert_index_equal(rng, expected) + + def test_sub_dti_dti(self): + # previously performed setop (deprecated in 0.16.0), now changed to + # return subtraction -> TimeDeltaIndex (GH ...) + + dti = date_range('20130101', periods=3) + dti_tz = date_range('20130101', periods=3).tz_localize('US/Eastern') + dti_tz2 = date_range('20130101', periods=3).tz_localize('UTC') + expected = TimedeltaIndex([0, 0, 0]) + + result = dti - dti + tm.assert_index_equal(result, expected) + + result = dti_tz - dti_tz + tm.assert_index_equal(result, expected) + + with tm.assertRaises(TypeError): + dti_tz - dti + + with tm.assertRaises(TypeError): + dti - dti_tz + + with tm.assertRaises(TypeError): + dti_tz - dti_tz2 + + # isub + dti -= dti + tm.assert_index_equal(dti, expected) + + # different length raises ValueError + dti1 = date_range('20130101', periods=3) + dti2 = date_range('20130101', periods=4) + with tm.assertRaises(ValueError): + dti1 - dti2 + + # NaN propagation + dti1 = DatetimeIndex(['2012-01-01', np.nan, '2012-01-03']) + dti2 = DatetimeIndex(['2012-01-02', '2012-01-03', np.nan]) + expected = TimedeltaIndex(['1 days', np.nan, np.nan]) + result = dti2 - dti1 + tm.assert_index_equal(result, expected) + + def test_sub_period(self): + # GH 13078 + # not supported, check TypeError + p = pd.Period('2011-01-01', freq='D') + + for freq in [None, 'D']: + idx = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], freq=freq) + + with tm.assertRaises(TypeError): + idx - p + + with tm.assertRaises(TypeError): + p - idx + + def test_comp_nat(self): + left = pd.DatetimeIndex([pd.Timestamp('2011-01-01'), pd.NaT, + pd.Timestamp('2011-01-03')]) + right = pd.DatetimeIndex([pd.NaT, pd.NaT, pd.Timestamp('2011-01-03')]) + + for l, r in [(left, right), (left.asobject, right.asobject)]: + result = l == r + expected = np.array([False, False, True]) + tm.assert_numpy_array_equal(result, expected) + + result = l != r + expected = np.array([True, True, False]) + tm.assert_numpy_array_equal(result, expected) + + expected = np.array([False, False, False]) + tm.assert_numpy_array_equal(l == pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT == r, expected) + + expected = np.array([True, True, True]) + tm.assert_numpy_array_equal(l != pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT != l, expected) + + expected = np.array([False, False, False]) + tm.assert_numpy_array_equal(l < pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT > l, expected) + + def test_value_counts_unique(self): + # GH 7735 + for tz in self.tz: + idx = pd.date_range('2011-01-01 09:00', freq='H', periods=10) + # create repeated values, 'n'th element is repeated by n+1 times + idx = DatetimeIndex(np.repeat(idx.values, range(1, len(idx) + 1)), + tz=tz) + + exp_idx = pd.date_range('2011-01-01 18:00', freq='-1H', periods=10, + tz=tz) + expected = Series(range(10, 0, -1), index=exp_idx, dtype='int64') + + for obj in [idx, Series(idx)]: + tm.assert_series_equal(obj.value_counts(), expected) + + expected = pd.date_range('2011-01-01 09:00', freq='H', periods=10, + tz=tz) + tm.assert_index_equal(idx.unique(), expected) + + idx = DatetimeIndex(['2013-01-01 09:00', '2013-01-01 09:00', + '2013-01-01 09:00', '2013-01-01 08:00', + '2013-01-01 08:00', pd.NaT], tz=tz) + + exp_idx = DatetimeIndex(['2013-01-01 09:00', '2013-01-01 08:00'], + tz=tz) + expected = Series([3, 2], index=exp_idx) + + for obj in [idx, Series(idx)]: + tm.assert_series_equal(obj.value_counts(), expected) + + exp_idx = DatetimeIndex(['2013-01-01 09:00', '2013-01-01 08:00', + pd.NaT], tz=tz) + expected = Series([3, 2, 1], index=exp_idx) + + for obj in [idx, Series(idx)]: + tm.assert_series_equal(obj.value_counts(dropna=False), + expected) + + tm.assert_index_equal(idx.unique(), exp_idx) + + def test_nonunique_contains(self): + # GH 9512 + for idx in map(DatetimeIndex, + ([0, 1, 0], [0, 0, -1], [0, -1, -1], + ['2015', '2015', '2016'], ['2015', '2015', '2014'])): + tm.assertIn(idx[0], idx) + + def test_order(self): + # with freq + idx1 = DatetimeIndex(['2011-01-01', '2011-01-02', + '2011-01-03'], freq='D', name='idx') + idx2 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', + '2011-01-01 11:00'], freq='H', + tz='Asia/Tokyo', name='tzidx') + + for idx in [idx1, idx2]: + ordered = idx.sort_values() + self.assert_index_equal(ordered, idx) + self.assertEqual(ordered.freq, idx.freq) + + ordered = idx.sort_values(ascending=False) + expected = idx[::-1] + self.assert_index_equal(ordered, expected) + self.assertEqual(ordered.freq, expected.freq) + self.assertEqual(ordered.freq.n, -1) + + ordered, indexer = idx.sort_values(return_indexer=True) + self.assert_index_equal(ordered, idx) + self.assert_numpy_array_equal(indexer, + np.array([0, 1, 2]), + check_dtype=False) + self.assertEqual(ordered.freq, idx.freq) + + ordered, indexer = idx.sort_values(return_indexer=True, + ascending=False) + expected = idx[::-1] + self.assert_index_equal(ordered, expected) + self.assert_numpy_array_equal(indexer, + np.array([2, 1, 0]), + check_dtype=False) + self.assertEqual(ordered.freq, expected.freq) + self.assertEqual(ordered.freq.n, -1) + + # without freq + for tz in self.tz: + idx1 = DatetimeIndex(['2011-01-01', '2011-01-03', '2011-01-05', + '2011-01-02', '2011-01-01'], + tz=tz, name='idx1') + exp1 = DatetimeIndex(['2011-01-01', '2011-01-01', '2011-01-02', + '2011-01-03', '2011-01-05'], + tz=tz, name='idx1') + + idx2 = DatetimeIndex(['2011-01-01', '2011-01-03', '2011-01-05', + '2011-01-02', '2011-01-01'], + tz=tz, name='idx2') + + exp2 = DatetimeIndex(['2011-01-01', '2011-01-01', '2011-01-02', + '2011-01-03', '2011-01-05'], + tz=tz, name='idx2') + + idx3 = DatetimeIndex([pd.NaT, '2011-01-03', '2011-01-05', + '2011-01-02', pd.NaT], tz=tz, name='idx3') + exp3 = DatetimeIndex([pd.NaT, pd.NaT, '2011-01-02', '2011-01-03', + '2011-01-05'], tz=tz, name='idx3') + + for idx, expected in [(idx1, exp1), (idx2, exp2), (idx3, exp3)]: + ordered = idx.sort_values() + self.assert_index_equal(ordered, expected) + self.assertIsNone(ordered.freq) + + ordered = idx.sort_values(ascending=False) + self.assert_index_equal(ordered, expected[::-1]) + self.assertIsNone(ordered.freq) + + ordered, indexer = idx.sort_values(return_indexer=True) + self.assert_index_equal(ordered, expected) + + exp = np.array([0, 4, 3, 1, 2]) + self.assert_numpy_array_equal(indexer, exp, check_dtype=False) + self.assertIsNone(ordered.freq) + + ordered, indexer = idx.sort_values(return_indexer=True, + ascending=False) + self.assert_index_equal(ordered, expected[::-1]) + + exp = np.array([2, 1, 3, 4, 0]) + self.assert_numpy_array_equal(indexer, exp, check_dtype=False) + self.assertIsNone(ordered.freq) + + def test_getitem(self): + idx1 = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx') + idx2 = pd.date_range('2011-01-01', '2011-01-31', freq='D', + tz='Asia/Tokyo', name='idx') + + for idx in [idx1, idx2]: + result = idx[0] + self.assertEqual(result, Timestamp('2011-01-01', tz=idx.tz)) + + result = idx[0:5] + expected = pd.date_range('2011-01-01', '2011-01-05', freq='D', + tz=idx.tz, name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + + result = idx[0:10:2] + expected = pd.date_range('2011-01-01', '2011-01-09', freq='2D', + tz=idx.tz, name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + + result = idx[-20:-5:3] + expected = pd.date_range('2011-01-12', '2011-01-24', freq='3D', + tz=idx.tz, name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + + result = idx[4::-1] + expected = DatetimeIndex(['2011-01-05', '2011-01-04', '2011-01-03', + '2011-01-02', '2011-01-01'], + freq='-1D', tz=idx.tz, name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + + def test_drop_duplicates_metadata(self): + # GH 10115 + idx = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx') + result = idx.drop_duplicates() + self.assert_index_equal(idx, result) + self.assertEqual(idx.freq, result.freq) + + idx_dup = idx.append(idx) + self.assertIsNone(idx_dup.freq) # freq is reset + result = idx_dup.drop_duplicates() + self.assert_index_equal(idx, result) + self.assertIsNone(result.freq) + + def test_drop_duplicates(self): + # to check Index/Series compat + base = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx') + idx = base.append(base[:5]) + + res = idx.drop_duplicates() + tm.assert_index_equal(res, base) + res = Series(idx).drop_duplicates() + tm.assert_series_equal(res, Series(base)) + + res = idx.drop_duplicates(keep='last') + exp = base[5:].append(base[:5]) + tm.assert_index_equal(res, exp) + res = Series(idx).drop_duplicates(keep='last') + tm.assert_series_equal(res, Series(exp, index=np.arange(5, 36))) + + res = idx.drop_duplicates(keep=False) + tm.assert_index_equal(res, base[5:]) + res = Series(idx).drop_duplicates(keep=False) + tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31))) + + def test_take(self): + # GH 10295 + idx1 = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx') + idx2 = pd.date_range('2011-01-01', '2011-01-31', freq='D', + tz='Asia/Tokyo', name='idx') + + for idx in [idx1, idx2]: + result = idx.take([0]) + self.assertEqual(result, Timestamp('2011-01-01', tz=idx.tz)) + + result = idx.take([0, 1, 2]) + expected = pd.date_range('2011-01-01', '2011-01-03', freq='D', + tz=idx.tz, name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + + result = idx.take([0, 2, 4]) + expected = pd.date_range('2011-01-01', '2011-01-05', freq='2D', + tz=idx.tz, name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + + result = idx.take([7, 4, 1]) + expected = pd.date_range('2011-01-08', '2011-01-02', freq='-3D', + tz=idx.tz, name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + + result = idx.take([3, 2, 5]) + expected = DatetimeIndex(['2011-01-04', '2011-01-03', + '2011-01-06'], + freq=None, tz=idx.tz, name='idx') + self.assert_index_equal(result, expected) + self.assertIsNone(result.freq) + + result = idx.take([-3, 2, 5]) + expected = DatetimeIndex(['2011-01-29', '2011-01-03', + '2011-01-06'], + freq=None, tz=idx.tz, name='idx') + self.assert_index_equal(result, expected) + self.assertIsNone(result.freq) + + def test_take_invalid_kwargs(self): + idx = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx') + indices = [1, 6, 5, 9, 10, 13, 15, 3] + + msg = r"take\(\) got an unexpected keyword argument 'foo'" + tm.assertRaisesRegexp(TypeError, msg, idx.take, + indices, foo=2) + + msg = "the 'out' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, idx.take, + indices, out=indices) + + msg = "the 'mode' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, idx.take, + indices, mode='clip') + + def test_infer_freq(self): + # GH 11018 + for freq in ['A', '2A', '-2A', 'Q', '-1Q', 'M', '-1M', 'D', '3D', + '-3D', 'W', '-1W', 'H', '2H', '-2H', 'T', '2T', 'S', + '-3S']: + idx = pd.date_range('2011-01-01 09:00:00', freq=freq, periods=10) + result = pd.DatetimeIndex(idx.asi8, freq='infer') + tm.assert_index_equal(idx, result) + self.assertEqual(result.freq, freq) + + def test_nat_new(self): + idx = pd.date_range('2011-01-01', freq='D', periods=5, name='x') + result = idx._nat_new() + exp = pd.DatetimeIndex([pd.NaT] * 5, name='x') + tm.assert_index_equal(result, exp) + + result = idx._nat_new(box=False) + exp = np.array([tslib.iNaT] * 5, dtype=np.int64) + tm.assert_numpy_array_equal(result, exp) + + def test_shift(self): + # GH 9903 + for tz in self.tz: + idx = pd.DatetimeIndex([], name='xxx', tz=tz) + tm.assert_index_equal(idx.shift(0, freq='H'), idx) + tm.assert_index_equal(idx.shift(3, freq='H'), idx) + + idx = pd.DatetimeIndex(['2011-01-01 10:00', '2011-01-01 11:00' + '2011-01-01 12:00'], name='xxx', tz=tz) + tm.assert_index_equal(idx.shift(0, freq='H'), idx) + exp = pd.DatetimeIndex(['2011-01-01 13:00', '2011-01-01 14:00' + '2011-01-01 15:00'], name='xxx', tz=tz) + tm.assert_index_equal(idx.shift(3, freq='H'), exp) + exp = pd.DatetimeIndex(['2011-01-01 07:00', '2011-01-01 08:00' + '2011-01-01 09:00'], name='xxx', tz=tz) + tm.assert_index_equal(idx.shift(-3, freq='H'), exp) + + def test_nat(self): + self.assertIs(pd.DatetimeIndex._na_value, pd.NaT) + self.assertIs(pd.DatetimeIndex([])._na_value, pd.NaT) + + for tz in [None, 'US/Eastern', 'UTC']: + idx = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], tz=tz) + self.assertTrue(idx._can_hold_na) + + tm.assert_numpy_array_equal(idx._isnan, np.array([False, False])) + self.assertFalse(idx.hasnans) + tm.assert_numpy_array_equal(idx._nan_idxs, + np.array([], dtype=np.intp)) + + idx = pd.DatetimeIndex(['2011-01-01', 'NaT'], tz=tz) + self.assertTrue(idx._can_hold_na) + + tm.assert_numpy_array_equal(idx._isnan, np.array([False, True])) + self.assertTrue(idx.hasnans) + tm.assert_numpy_array_equal(idx._nan_idxs, + np.array([1], dtype=np.intp)) + + def test_equals(self): + # GH 13107 + for tz in [None, 'UTC', 'US/Eastern', 'Asia/Tokyo']: + idx = pd.DatetimeIndex(['2011-01-01', '2011-01-02', 'NaT']) + self.assertTrue(idx.equals(idx)) + self.assertTrue(idx.equals(idx.copy())) + self.assertTrue(idx.equals(idx.asobject)) + self.assertTrue(idx.asobject.equals(idx)) + self.assertTrue(idx.asobject.equals(idx.asobject)) + self.assertFalse(idx.equals(list(idx))) + self.assertFalse(idx.equals(pd.Series(idx))) + + idx2 = pd.DatetimeIndex(['2011-01-01', '2011-01-02', 'NaT'], + tz='US/Pacific') + self.assertFalse(idx.equals(idx2)) + self.assertFalse(idx.equals(idx2.copy())) + self.assertFalse(idx.equals(idx2.asobject)) + self.assertFalse(idx.asobject.equals(idx2)) + self.assertFalse(idx.equals(list(idx2))) + self.assertFalse(idx.equals(pd.Series(idx2))) + + # same internal, different tz + idx3 = pd.DatetimeIndex._simple_new(idx.asi8, tz='US/Pacific') + tm.assert_numpy_array_equal(idx.asi8, idx3.asi8) + self.assertFalse(idx.equals(idx3)) + self.assertFalse(idx.equals(idx3.copy())) + self.assertFalse(idx.equals(idx3.asobject)) + self.assertFalse(idx.asobject.equals(idx3)) + self.assertFalse(idx.equals(list(idx3))) + self.assertFalse(idx.equals(pd.Series(idx3))) + + +class TestDateTimeIndexToJulianDate(tm.TestCase): + + def test_1700(self): + r1 = Float64Index([2345897.5, 2345898.5, 2345899.5, 2345900.5, + 2345901.5]) + r2 = date_range(start=Timestamp('1710-10-01'), periods=5, + freq='D').to_julian_date() + self.assertIsInstance(r2, Float64Index) + tm.assert_index_equal(r1, r2) + + def test_2000(self): + r1 = Float64Index([2451601.5, 2451602.5, 2451603.5, 2451604.5, + 2451605.5]) + r2 = date_range(start=Timestamp('2000-02-27'), periods=5, + freq='D').to_julian_date() + self.assertIsInstance(r2, Float64Index) + tm.assert_index_equal(r1, r2) + + def test_hour(self): + r1 = Float64Index( + [2451601.5, 2451601.5416666666666666, 2451601.5833333333333333, + 2451601.625, 2451601.6666666666666666]) + r2 = date_range(start=Timestamp('2000-02-27'), periods=5, + freq='H').to_julian_date() + self.assertIsInstance(r2, Float64Index) + tm.assert_index_equal(r1, r2) + + def test_minute(self): + r1 = Float64Index( + [2451601.5, 2451601.5006944444444444, 2451601.5013888888888888, + 2451601.5020833333333333, 2451601.5027777777777777]) + r2 = date_range(start=Timestamp('2000-02-27'), periods=5, + freq='T').to_julian_date() + self.assertIsInstance(r2, Float64Index) + tm.assert_index_equal(r1, r2) + + def test_second(self): + r1 = Float64Index( + [2451601.5, 2451601.500011574074074, 2451601.5000231481481481, + 2451601.5000347222222222, 2451601.5000462962962962]) + r2 = date_range(start=Timestamp('2000-02-27'), periods=5, + freq='S').to_julian_date() + self.assertIsInstance(r2, Float64Index) + tm.assert_index_equal(r1, r2) + + +class TestDatetimeIndex(tm.TestCase): + _multiprocess_can_split_ = True + + # GH 10699 + def test_datetime64_with_DateOffset(self): + for klass, assert_func in zip([Series, DatetimeIndex], + [self.assert_series_equal, + tm.assert_index_equal]): + s = klass(date_range('2000-01-01', '2000-01-31'), name='a') + result = s + pd.DateOffset(years=1) + result2 = pd.DateOffset(years=1) + s + exp = klass(date_range('2001-01-01', '2001-01-31'), name='a') + assert_func(result, exp) + assert_func(result2, exp) + + result = s - pd.DateOffset(years=1) + exp = klass(date_range('1999-01-01', '1999-01-31'), name='a') + assert_func(result, exp) + + s = klass([Timestamp('2000-01-15 00:15:00', tz='US/Central'), + pd.Timestamp('2000-02-15', tz='US/Central')], name='a') + result = s + pd.offsets.Day() + result2 = pd.offsets.Day() + s + exp = klass([Timestamp('2000-01-16 00:15:00', tz='US/Central'), + Timestamp('2000-02-16', tz='US/Central')], name='a') + assert_func(result, exp) + assert_func(result2, exp) + + s = klass([Timestamp('2000-01-15 00:15:00', tz='US/Central'), + pd.Timestamp('2000-02-15', tz='US/Central')], name='a') + result = s + pd.offsets.MonthEnd() + result2 = pd.offsets.MonthEnd() + s + exp = klass([Timestamp('2000-01-31 00:15:00', tz='US/Central'), + Timestamp('2000-02-29', tz='US/Central')], name='a') + assert_func(result, exp) + assert_func(result2, exp) + + # array of offsets - valid for Series only + if klass is Series: + with tm.assert_produces_warning(PerformanceWarning): + s = klass([Timestamp('2000-1-1'), Timestamp('2000-2-1')]) + result = s + Series([pd.offsets.DateOffset(years=1), + pd.offsets.MonthEnd()]) + exp = klass([Timestamp('2001-1-1'), Timestamp('2000-2-29') + ]) + assert_func(result, exp) + + # same offset + result = s + Series([pd.offsets.DateOffset(years=1), + pd.offsets.DateOffset(years=1)]) + exp = klass([Timestamp('2001-1-1'), Timestamp('2001-2-1')]) + assert_func(result, exp) + + s = klass([Timestamp('2000-01-05 00:15:00'), + Timestamp('2000-01-31 00:23:00'), + Timestamp('2000-01-01'), + Timestamp('2000-03-31'), + Timestamp('2000-02-29'), + Timestamp('2000-12-31'), + Timestamp('2000-05-15'), + Timestamp('2001-06-15')]) + + # DateOffset relativedelta fastpath + relative_kwargs = [('years', 2), ('months', 5), ('days', 3), + ('hours', 5), ('minutes', 10), ('seconds', 2), + ('microseconds', 5)] + for i, kwd in enumerate(relative_kwargs): + op = pd.DateOffset(**dict([kwd])) + assert_func(klass([x + op for x in s]), s + op) + assert_func(klass([x - op for x in s]), s - op) + op = pd.DateOffset(**dict(relative_kwargs[:i + 1])) + assert_func(klass([x + op for x in s]), s + op) + assert_func(klass([x - op for x in s]), s - op) + + # assert these are equal on a piecewise basis + offsets = ['YearBegin', ('YearBegin', {'month': 5}), 'YearEnd', + ('YearEnd', {'month': 5}), 'MonthBegin', 'MonthEnd', + 'SemiMonthEnd', 'SemiMonthBegin', + 'Week', ('Week', { + 'weekday': 3 + }), 'BusinessDay', 'BDay', 'QuarterEnd', 'QuarterBegin', + 'CustomBusinessDay', 'CDay', 'CBMonthEnd', + 'CBMonthBegin', 'BMonthBegin', 'BMonthEnd', + 'BusinessHour', 'BYearBegin', 'BYearEnd', + 'BQuarterBegin', ('LastWeekOfMonth', { + 'weekday': 2 + }), ('FY5253Quarter', {'qtr_with_extra_week': 1, + 'startingMonth': 1, + 'weekday': 2, + 'variation': 'nearest'}), + ('FY5253', {'weekday': 0, + 'startingMonth': 2, + 'variation': + 'nearest'}), ('WeekOfMonth', {'weekday': 2, + 'week': 2}), + 'Easter', ('DateOffset', {'day': 4}), + ('DateOffset', {'month': 5})] + + with warnings.catch_warnings(record=True): + for normalize in (True, False): + for do in offsets: + if isinstance(do, tuple): + do, kwargs = do + else: + do = do + kwargs = {} + + for n in [0, 5]: + if (do in ['WeekOfMonth', 'LastWeekOfMonth', + 'FY5253Quarter', 'FY5253'] and n == 0): + continue + op = getattr(pd.offsets, do)(n, + normalize=normalize, + **kwargs) + assert_func(klass([x + op for x in s]), s + op) + assert_func(klass([x - op for x in s]), s - op) + assert_func(klass([op + x for x in s]), op + s) diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py new file mode 100644 index 0000000000000..ba6beb03c7f24 --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -0,0 +1,168 @@ +import numpy as np + +import pandas as pd +import pandas.util.testing as tm +from pandas import (DatetimeIndex, date_range, Series, bdate_range, DataFrame, + Int64Index) + + +class TestDatetimeIndex(tm.TestCase): + _multiprocess_can_split_ = True + + def test_union(self): + i1 = Int64Index(np.arange(0, 20, 2)) + i2 = Int64Index(np.arange(10, 30, 2)) + result = i1.union(i2) + expected = Int64Index(np.arange(0, 30, 2)) + tm.assert_index_equal(result, expected) + + def test_union_coverage(self): + idx = DatetimeIndex(['2000-01-03', '2000-01-01', '2000-01-02']) + ordered = DatetimeIndex(idx.sort_values(), freq='infer') + result = ordered.union(idx) + tm.assert_index_equal(result, ordered) + + result = ordered[:0].union(ordered) + tm.assert_index_equal(result, ordered) + self.assertEqual(result.freq, ordered.freq) + + def test_union_bug_1730(self): + rng_a = date_range('1/1/2012', periods=4, freq='3H') + rng_b = date_range('1/1/2012', periods=4, freq='4H') + + result = rng_a.union(rng_b) + exp = DatetimeIndex(sorted(set(list(rng_a)) | set(list(rng_b)))) + tm.assert_index_equal(result, exp) + + def test_union_bug_1745(self): + left = DatetimeIndex(['2012-05-11 15:19:49.695000']) + right = DatetimeIndex(['2012-05-29 13:04:21.322000', + '2012-05-11 15:27:24.873000', + '2012-05-11 15:31:05.350000']) + + result = left.union(right) + exp = DatetimeIndex(sorted(set(list(left)) | set(list(right)))) + tm.assert_index_equal(result, exp) + + def test_union_bug_4564(self): + from pandas import DateOffset + left = date_range("2013-01-01", "2013-02-01") + right = left + DateOffset(minutes=15) + + result = left.union(right) + exp = DatetimeIndex(sorted(set(list(left)) | set(list(right)))) + tm.assert_index_equal(result, exp) + + def test_union_freq_both_none(self): + # GH11086 + expected = bdate_range('20150101', periods=10) + expected.freq = None + + result = expected.union(expected) + tm.assert_index_equal(result, expected) + self.assertIsNone(result.freq) + + def test_union_dataframe_index(self): + rng1 = date_range('1/1/1999', '1/1/2012', freq='MS') + s1 = Series(np.random.randn(len(rng1)), rng1) + + rng2 = date_range('1/1/1980', '12/1/2001', freq='MS') + s2 = Series(np.random.randn(len(rng2)), rng2) + df = DataFrame({'s1': s1, 's2': s2}) + + exp = pd.date_range('1/1/1980', '1/1/2012', freq='MS') + tm.assert_index_equal(df.index, exp) + + def test_union_with_DatetimeIndex(self): + i1 = Int64Index(np.arange(0, 20, 2)) + i2 = DatetimeIndex(start='2012-01-03 00:00:00', periods=10, freq='D') + i1.union(i2) # Works + i2.union(i1) # Fails with "AttributeError: can't set attribute" + + def test_intersection(self): + # GH 4690 (with tz) + for tz in [None, 'Asia/Tokyo', 'US/Eastern', 'dateutil/US/Pacific']: + base = date_range('6/1/2000', '6/30/2000', freq='D', name='idx') + + # if target has the same name, it is preserved + rng2 = date_range('5/15/2000', '6/20/2000', freq='D', name='idx') + expected2 = date_range('6/1/2000', '6/20/2000', freq='D', + name='idx') + + # if target name is different, it will be reset + rng3 = date_range('5/15/2000', '6/20/2000', freq='D', name='other') + expected3 = date_range('6/1/2000', '6/20/2000', freq='D', + name=None) + + rng4 = date_range('7/1/2000', '7/31/2000', freq='D', name='idx') + expected4 = DatetimeIndex([], name='idx') + + for (rng, expected) in [(rng2, expected2), (rng3, expected3), + (rng4, expected4)]: + result = base.intersection(rng) + tm.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + self.assertEqual(result.freq, expected.freq) + self.assertEqual(result.tz, expected.tz) + + # non-monotonic + base = DatetimeIndex(['2011-01-05', '2011-01-04', + '2011-01-02', '2011-01-03'], + tz=tz, name='idx') + + rng2 = DatetimeIndex(['2011-01-04', '2011-01-02', + '2011-02-02', '2011-02-03'], + tz=tz, name='idx') + expected2 = DatetimeIndex( + ['2011-01-04', '2011-01-02'], tz=tz, name='idx') + + rng3 = DatetimeIndex(['2011-01-04', '2011-01-02', + '2011-02-02', '2011-02-03'], + tz=tz, name='other') + expected3 = DatetimeIndex( + ['2011-01-04', '2011-01-02'], tz=tz, name=None) + + # GH 7880 + rng4 = date_range('7/1/2000', '7/31/2000', freq='D', tz=tz, + name='idx') + expected4 = DatetimeIndex([], tz=tz, name='idx') + + for (rng, expected) in [(rng2, expected2), (rng3, expected3), + (rng4, expected4)]: + result = base.intersection(rng) + tm.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + self.assertIsNone(result.freq) + self.assertEqual(result.tz, expected.tz) + + # empty same freq GH2129 + rng = date_range('6/1/2000', '6/15/2000', freq='T') + result = rng[0:0].intersection(rng) + self.assertEqual(len(result), 0) + + result = rng.intersection(rng[0:0]) + self.assertEqual(len(result), 0) + + def test_intersection_bug_1708(self): + from pandas import DateOffset + index_1 = date_range('1/1/2012', periods=4, freq='12H') + index_2 = index_1 + DateOffset(hours=1) + + result = index_1 & index_2 + self.assertEqual(len(result), 0) + + def test_difference_freq(self): + # GH14323: difference of DatetimeIndex should not preserve frequency + + index = date_range("20160920", "20160925", freq="D") + other = date_range("20160921", "20160924", freq="D") + expected = DatetimeIndex(["20160920", "20160925"], freq=None) + idx_diff = index.difference(other) + tm.assert_index_equal(idx_diff, expected) + tm.assert_attr_equal('freq', idx_diff, expected) + + other = date_range("20160922", "20160925", freq="D") + idx_diff = index.difference(other) + expected = DatetimeIndex(["20160920", "20160921"], freq=None) + tm.assert_index_equal(idx_diff, expected) + tm.assert_attr_equal('freq', idx_diff, expected) diff --git a/pandas/tests/indexes/test_datetimelike.py b/pandas/tests/indexes/test_datetimelike.py index 2cd73ec8d254a..32e4029a57fe9 100644 --- a/pandas/tests/indexes/test_datetimelike.py +++ b/pandas/tests/indexes/test_datetimelike.py @@ -1,18 +1,15 @@ # -*- coding: utf-8 -*- -from datetime import datetime, timedelta, time, date - import numpy as np +from datetime import timedelta +import pandas as pd +import pandas.util.testing as tm from pandas import (DatetimeIndex, Float64Index, Index, Int64Index, NaT, Period, PeriodIndex, Series, Timedelta, TimedeltaIndex, date_range, period_range, timedelta_range, notnull) -import pandas.util.testing as tm - -import pandas as pd -from pandas.tslib import Timestamp, OutOfBoundsDatetime from .common import Base @@ -88,553 +85,9 @@ def test_shift(self): '2013-01-11'], freq='D') self.assert_index_equal(result, expected) - def test_construction_with_alt(self): - - i = pd.date_range('20130101', periods=5, freq='H', tz='US/Eastern') - i2 = DatetimeIndex(i, dtype=i.dtype) - self.assert_index_equal(i, i2) - - i2 = DatetimeIndex(i.tz_localize(None).asi8, tz=i.dtype.tz) - self.assert_index_equal(i, i2) - - i2 = DatetimeIndex(i.tz_localize(None).asi8, dtype=i.dtype) - self.assert_index_equal(i, i2) - - i2 = DatetimeIndex( - i.tz_localize(None).asi8, dtype=i.dtype, tz=i.dtype.tz) - self.assert_index_equal(i, i2) - - # localize into the provided tz - i2 = DatetimeIndex(i.tz_localize(None).asi8, tz='UTC') - expected = i.tz_localize(None).tz_localize('UTC') - self.assert_index_equal(i2, expected) - - # incompat tz/dtype - self.assertRaises(ValueError, lambda: DatetimeIndex( - i.tz_localize(None).asi8, dtype=i.dtype, tz='US/Pacific')) - def test_pickle_compat_construction(self): pass - def test_construction_index_with_mixed_timezones(self): - # GH 11488 - # no tz results in DatetimeIndex - result = Index([Timestamp('2011-01-01'), - Timestamp('2011-01-02')], name='idx') - exp = DatetimeIndex([Timestamp('2011-01-01'), - Timestamp('2011-01-02')], name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - self.assertIsNone(result.tz) - - # same tz results in DatetimeIndex - result = Index([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), - Timestamp('2011-01-02 10:00', tz='Asia/Tokyo')], - name='idx') - exp = DatetimeIndex( - [Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00') - ], tz='Asia/Tokyo', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - self.assertIsNotNone(result.tz) - self.assertEqual(result.tz, exp.tz) - - # same tz results in DatetimeIndex (DST) - result = Index([Timestamp('2011-01-01 10:00', tz='US/Eastern'), - Timestamp('2011-08-01 10:00', tz='US/Eastern')], - name='idx') - exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), - Timestamp('2011-08-01 10:00')], - tz='US/Eastern', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - self.assertIsNotNone(result.tz) - self.assertEqual(result.tz, exp.tz) - - # different tz results in Index(dtype=object) - result = Index([Timestamp('2011-01-01 10:00'), - Timestamp('2011-01-02 10:00', tz='US/Eastern')], - name='idx') - exp = Index([Timestamp('2011-01-01 10:00'), - Timestamp('2011-01-02 10:00', tz='US/Eastern')], - dtype='object', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertFalse(isinstance(result, DatetimeIndex)) - - result = Index([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), - Timestamp('2011-01-02 10:00', tz='US/Eastern')], - name='idx') - exp = Index([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), - Timestamp('2011-01-02 10:00', tz='US/Eastern')], - dtype='object', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertFalse(isinstance(result, DatetimeIndex)) - - # length = 1 - result = Index([Timestamp('2011-01-01')], name='idx') - exp = DatetimeIndex([Timestamp('2011-01-01')], name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - self.assertIsNone(result.tz) - - # length = 1 with tz - result = Index( - [Timestamp('2011-01-01 10:00', tz='Asia/Tokyo')], name='idx') - exp = DatetimeIndex([Timestamp('2011-01-01 10:00')], tz='Asia/Tokyo', - name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - self.assertIsNotNone(result.tz) - self.assertEqual(result.tz, exp.tz) - - def test_construction_index_with_mixed_timezones_with_NaT(self): - # GH 11488 - result = Index([pd.NaT, Timestamp('2011-01-01'), - pd.NaT, Timestamp('2011-01-02')], name='idx') - exp = DatetimeIndex([pd.NaT, Timestamp('2011-01-01'), - pd.NaT, Timestamp('2011-01-02')], name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - self.assertIsNone(result.tz) - - # same tz results in DatetimeIndex - result = Index([pd.NaT, Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), - pd.NaT, Timestamp('2011-01-02 10:00', - tz='Asia/Tokyo')], - name='idx') - exp = DatetimeIndex([pd.NaT, Timestamp('2011-01-01 10:00'), - pd.NaT, Timestamp('2011-01-02 10:00')], - tz='Asia/Tokyo', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - self.assertIsNotNone(result.tz) - self.assertEqual(result.tz, exp.tz) - - # same tz results in DatetimeIndex (DST) - result = Index([Timestamp('2011-01-01 10:00', tz='US/Eastern'), - pd.NaT, - Timestamp('2011-08-01 10:00', tz='US/Eastern')], - name='idx') - exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), pd.NaT, - Timestamp('2011-08-01 10:00')], - tz='US/Eastern', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - self.assertIsNotNone(result.tz) - self.assertEqual(result.tz, exp.tz) - - # different tz results in Index(dtype=object) - result = Index([pd.NaT, Timestamp('2011-01-01 10:00'), - pd.NaT, Timestamp('2011-01-02 10:00', - tz='US/Eastern')], - name='idx') - exp = Index([pd.NaT, Timestamp('2011-01-01 10:00'), - pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern')], - dtype='object', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertFalse(isinstance(result, DatetimeIndex)) - - result = Index([pd.NaT, Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), - pd.NaT, Timestamp('2011-01-02 10:00', - tz='US/Eastern')], name='idx') - exp = Index([pd.NaT, Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), - pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern')], - dtype='object', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertFalse(isinstance(result, DatetimeIndex)) - - # all NaT - result = Index([pd.NaT, pd.NaT], name='idx') - exp = DatetimeIndex([pd.NaT, pd.NaT], name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - self.assertIsNone(result.tz) - - # all NaT with tz - result = Index([pd.NaT, pd.NaT], tz='Asia/Tokyo', name='idx') - exp = DatetimeIndex([pd.NaT, pd.NaT], tz='Asia/Tokyo', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - self.assertIsNotNone(result.tz) - self.assertEqual(result.tz, exp.tz) - - def test_construction_dti_with_mixed_timezones(self): - # GH 11488 (not changed, added explicit tests) - - # no tz results in DatetimeIndex - result = DatetimeIndex( - [Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx') - exp = DatetimeIndex( - [Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - - # same tz results in DatetimeIndex - result = DatetimeIndex([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), - Timestamp('2011-01-02 10:00', - tz='Asia/Tokyo')], - name='idx') - exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), - Timestamp('2011-01-02 10:00')], - tz='Asia/Tokyo', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - - # same tz results in DatetimeIndex (DST) - result = DatetimeIndex([Timestamp('2011-01-01 10:00', tz='US/Eastern'), - Timestamp('2011-08-01 10:00', - tz='US/Eastern')], - name='idx') - exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), - Timestamp('2011-08-01 10:00')], - tz='US/Eastern', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - - # different tz coerces tz-naive to tz-awareIndex(dtype=object) - result = DatetimeIndex([Timestamp('2011-01-01 10:00'), - Timestamp('2011-01-02 10:00', - tz='US/Eastern')], name='idx') - exp = DatetimeIndex([Timestamp('2011-01-01 05:00'), - Timestamp('2011-01-02 10:00')], - tz='US/Eastern', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - - # tz mismatch affecting to tz-aware raises TypeError/ValueError - - with tm.assertRaises(ValueError): - DatetimeIndex([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), - Timestamp('2011-01-02 10:00', tz='US/Eastern')], - name='idx') - - with tm.assertRaisesRegexp(TypeError, 'data is already tz-aware'): - DatetimeIndex([Timestamp('2011-01-01 10:00'), - Timestamp('2011-01-02 10:00', tz='US/Eastern')], - tz='Asia/Tokyo', name='idx') - - with tm.assertRaises(ValueError): - DatetimeIndex([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), - Timestamp('2011-01-02 10:00', tz='US/Eastern')], - tz='US/Eastern', name='idx') - - with tm.assertRaisesRegexp(TypeError, 'data is already tz-aware'): - # passing tz should results in DatetimeIndex, then mismatch raises - # TypeError - Index([pd.NaT, Timestamp('2011-01-01 10:00'), - pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern')], - tz='Asia/Tokyo', name='idx') - - def test_construction_base_constructor(self): - arr = [pd.Timestamp('2011-01-01'), pd.NaT, pd.Timestamp('2011-01-03')] - tm.assert_index_equal(pd.Index(arr), pd.DatetimeIndex(arr)) - tm.assert_index_equal(pd.Index(np.array(arr)), - pd.DatetimeIndex(np.array(arr))) - - arr = [np.nan, pd.NaT, pd.Timestamp('2011-01-03')] - tm.assert_index_equal(pd.Index(arr), pd.DatetimeIndex(arr)) - tm.assert_index_equal(pd.Index(np.array(arr)), - pd.DatetimeIndex(np.array(arr))) - - def test_construction_outofbounds(self): - # GH 13663 - dates = [datetime(3000, 1, 1), datetime(4000, 1, 1), - datetime(5000, 1, 1), datetime(6000, 1, 1)] - exp = Index(dates, dtype=object) - # coerces to object - tm.assert_index_equal(Index(dates), exp) - - with tm.assertRaises(OutOfBoundsDatetime): - # can't create DatetimeIndex - DatetimeIndex(dates) - - def test_construction_with_ndarray(self): - # GH 5152 - dates = [datetime(2013, 10, 7), - datetime(2013, 10, 8), - datetime(2013, 10, 9)] - data = DatetimeIndex(dates, freq=pd.tseries.frequencies.BDay()).values - result = DatetimeIndex(data, freq=pd.tseries.frequencies.BDay()) - expected = DatetimeIndex(['2013-10-07', - '2013-10-08', - '2013-10-09'], - freq='B') - tm.assert_index_equal(result, expected) - - def test_astype(self): - # GH 13149, GH 13209 - idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN]) - - result = idx.astype(object) - expected = Index([Timestamp('2016-05-16')] + [NaT] * 3, dtype=object) - tm.assert_index_equal(result, expected) - - result = idx.astype(int) - expected = Int64Index([1463356800000000000] + - [-9223372036854775808] * 3, dtype=np.int64) - tm.assert_index_equal(result, expected) - - rng = date_range('1/1/2000', periods=10) - result = rng.astype('i8') - self.assert_index_equal(result, Index(rng.asi8)) - self.assert_numpy_array_equal(result.values, rng.asi8) - - def test_astype_with_tz(self): - - # with tz - rng = date_range('1/1/2000', periods=10, tz='US/Eastern') - result = rng.astype('datetime64[ns]') - expected = (date_range('1/1/2000', periods=10, - tz='US/Eastern') - .tz_convert('UTC').tz_localize(None)) - tm.assert_index_equal(result, expected) - - # BUG#10442 : testing astype(str) is correct for Series/DatetimeIndex - result = pd.Series(pd.date_range('2012-01-01', periods=3)).astype(str) - expected = pd.Series( - ['2012-01-01', '2012-01-02', '2012-01-03'], dtype=object) - tm.assert_series_equal(result, expected) - - result = Series(pd.date_range('2012-01-01', periods=3, - tz='US/Eastern')).astype(str) - expected = Series(['2012-01-01 00:00:00-05:00', - '2012-01-02 00:00:00-05:00', - '2012-01-03 00:00:00-05:00'], - dtype=object) - tm.assert_series_equal(result, expected) - - def test_astype_str_compat(self): - # GH 13149, GH 13209 - # verify that we are returing NaT as a string (and not unicode) - - idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN]) - result = idx.astype(str) - expected = Index(['2016-05-16', 'NaT', 'NaT', 'NaT'], dtype=object) - tm.assert_index_equal(result, expected) - - def test_astype_str(self): - # test astype string - #10442 - result = date_range('2012-01-01', periods=4, - name='test_name').astype(str) - expected = Index(['2012-01-01', '2012-01-02', '2012-01-03', - '2012-01-04'], name='test_name', dtype=object) - tm.assert_index_equal(result, expected) - - # test astype string with tz and name - result = date_range('2012-01-01', periods=3, name='test_name', - tz='US/Eastern').astype(str) - expected = Index(['2012-01-01 00:00:00-05:00', - '2012-01-02 00:00:00-05:00', - '2012-01-03 00:00:00-05:00'], - name='test_name', dtype=object) - tm.assert_index_equal(result, expected) - - # test astype string with freqH and name - result = date_range('1/1/2011', periods=3, freq='H', - name='test_name').astype(str) - expected = Index(['2011-01-01 00:00:00', '2011-01-01 01:00:00', - '2011-01-01 02:00:00'], - name='test_name', dtype=object) - tm.assert_index_equal(result, expected) - - # test astype string with freqH and timezone - result = date_range('3/6/2012 00:00', periods=2, freq='H', - tz='Europe/London', name='test_name').astype(str) - expected = Index(['2012-03-06 00:00:00+00:00', - '2012-03-06 01:00:00+00:00'], - dtype=object, name='test_name') - tm.assert_index_equal(result, expected) - - def test_astype_datetime64(self): - # GH 13149, GH 13209 - idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN]) - - result = idx.astype('datetime64[ns]') - tm.assert_index_equal(result, idx) - self.assertFalse(result is idx) - - result = idx.astype('datetime64[ns]', copy=False) - tm.assert_index_equal(result, idx) - self.assertTrue(result is idx) - - idx_tz = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN], tz='EST') - result = idx_tz.astype('datetime64[ns]') - expected = DatetimeIndex(['2016-05-16 05:00:00', 'NaT', 'NaT', 'NaT'], - dtype='datetime64[ns]') - tm.assert_index_equal(result, expected) - - def test_astype_raises(self): - # GH 13149, GH 13209 - idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN]) - - self.assertRaises(ValueError, idx.astype, float) - self.assertRaises(ValueError, idx.astype, 'timedelta64') - self.assertRaises(ValueError, idx.astype, 'timedelta64[ns]') - self.assertRaises(ValueError, idx.astype, 'datetime64') - self.assertRaises(ValueError, idx.astype, 'datetime64[D]') - - def test_where_other(self): - - # other is ndarray or Index - i = pd.date_range('20130101', periods=3, tz='US/Eastern') - - for arr in [np.nan, pd.NaT]: - result = i.where(notnull(i), other=np.nan) - expected = i - tm.assert_index_equal(result, expected) - - i2 = i.copy() - i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist()) - result = i.where(notnull(i2), i2) - tm.assert_index_equal(result, i2) - - i2 = i.copy() - i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist()) - result = i.where(notnull(i2), i2.values) - tm.assert_index_equal(result, i2) - - def test_where_tz(self): - i = pd.date_range('20130101', periods=3, tz='US/Eastern') - result = i.where(notnull(i)) - expected = i - tm.assert_index_equal(result, expected) - - i2 = i.copy() - i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist()) - result = i.where(notnull(i2)) - expected = i2 - tm.assert_index_equal(result, expected) - - def test_get_loc(self): - idx = pd.date_range('2000-01-01', periods=3) - - for method in [None, 'pad', 'backfill', 'nearest']: - self.assertEqual(idx.get_loc(idx[1], method), 1) - self.assertEqual(idx.get_loc(idx[1].to_pydatetime(), method), 1) - self.assertEqual(idx.get_loc(str(idx[1]), method), 1) - if method is not None: - self.assertEqual(idx.get_loc(idx[1], method, - tolerance=pd.Timedelta('0 days')), - 1) - - self.assertEqual(idx.get_loc('2000-01-01', method='nearest'), 0) - self.assertEqual(idx.get_loc('2000-01-01T12', method='nearest'), 1) - - self.assertEqual(idx.get_loc('2000-01-01T12', method='nearest', - tolerance='1 day'), 1) - self.assertEqual(idx.get_loc('2000-01-01T12', method='nearest', - tolerance=pd.Timedelta('1D')), 1) - self.assertEqual(idx.get_loc('2000-01-01T12', method='nearest', - tolerance=np.timedelta64(1, 'D')), 1) - self.assertEqual(idx.get_loc('2000-01-01T12', method='nearest', - tolerance=timedelta(1)), 1) - with tm.assertRaisesRegexp(ValueError, 'must be convertible'): - idx.get_loc('2000-01-01T12', method='nearest', tolerance='foo') - with tm.assertRaises(KeyError): - idx.get_loc('2000-01-01T03', method='nearest', tolerance='2 hours') - - self.assertEqual(idx.get_loc('2000', method='nearest'), slice(0, 3)) - self.assertEqual(idx.get_loc('2000-01', method='nearest'), slice(0, 3)) - - self.assertEqual(idx.get_loc('1999', method='nearest'), 0) - self.assertEqual(idx.get_loc('2001', method='nearest'), 2) - - with tm.assertRaises(KeyError): - idx.get_loc('1999', method='pad') - with tm.assertRaises(KeyError): - idx.get_loc('2001', method='backfill') - - with tm.assertRaises(KeyError): - idx.get_loc('foobar') - with tm.assertRaises(TypeError): - idx.get_loc(slice(2)) - - idx = pd.to_datetime(['2000-01-01', '2000-01-04']) - self.assertEqual(idx.get_loc('2000-01-02', method='nearest'), 0) - self.assertEqual(idx.get_loc('2000-01-03', method='nearest'), 1) - self.assertEqual(idx.get_loc('2000-01', method='nearest'), slice(0, 2)) - - # time indexing - idx = pd.date_range('2000-01-01', periods=24, freq='H') - tm.assert_numpy_array_equal(idx.get_loc(time(12)), - np.array([12]), check_dtype=False) - tm.assert_numpy_array_equal(idx.get_loc(time(12, 30)), - np.array([]), check_dtype=False) - with tm.assertRaises(NotImplementedError): - idx.get_loc(time(12, 30), method='pad') - - def test_get_indexer(self): - idx = pd.date_range('2000-01-01', periods=3) - exp = np.array([0, 1, 2], dtype=np.intp) - tm.assert_numpy_array_equal(idx.get_indexer(idx), exp) - - target = idx[0] + pd.to_timedelta(['-1 hour', '12 hours', - '1 day 1 hour']) - tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'), - np.array([-1, 0, 1], dtype=np.intp)) - tm.assert_numpy_array_equal(idx.get_indexer(target, 'backfill'), - np.array([0, 1, 2], dtype=np.intp)) - tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest'), - np.array([0, 1, 1], dtype=np.intp)) - tm.assert_numpy_array_equal( - idx.get_indexer(target, 'nearest', - tolerance=pd.Timedelta('1 hour')), - np.array([0, -1, 1], dtype=np.intp)) - with tm.assertRaises(ValueError): - idx.get_indexer(idx[[0]], method='nearest', tolerance='foo') - - def test_roundtrip_pickle_with_tz(self): - - # GH 8367 - # round-trip of timezone - index = date_range('20130101', periods=3, tz='US/Eastern', name='foo') - unpickled = self.round_trip_pickle(index) - self.assert_index_equal(index, unpickled) - - def test_reindex_preserves_tz_if_target_is_empty_list_or_array(self): - # GH7774 - index = date_range('20130101', periods=3, tz='US/Eastern') - self.assertEqual(str(index.reindex([])[0].tz), 'US/Eastern') - self.assertEqual(str(index.reindex(np.array([]))[0].tz), 'US/Eastern') - - def test_time_loc(self): # GH8667 - from datetime import time - from pandas.index import _SIZE_CUTOFF - - ns = _SIZE_CUTOFF + np.array([-100, 100], dtype=np.int64) - key = time(15, 11, 30) - start = key.hour * 3600 + key.minute * 60 + key.second - step = 24 * 3600 - - for n in ns: - idx = pd.date_range('2014-11-26', periods=n, freq='S') - ts = pd.Series(np.random.randn(n), index=idx) - i = np.arange(start, n, step) - - tm.assert_numpy_array_equal(ts.index.get_loc(key), i, - check_dtype=False) - tm.assert_series_equal(ts[key], ts.iloc[i]) - - left, right = ts.copy(), ts.copy() - left[key] *= -10 - right.iloc[i] *= -10 - tm.assert_series_equal(left, right) - - def test_time_overflow_for_32bit_machines(self): - # GH8943. On some machines NumPy defaults to np.int32 (for example, - # 32-bit Linux machines). In the function _generate_regular_range - # found in tseries/index.py, `periods` gets multiplied by `strides` - # (which has value 1e9) and since the max value for np.int32 is ~2e9, - # and since those machines won't promote np.int32 to np.int64, we get - # overflow. - periods = np.int_(1000) - - idx1 = pd.date_range(start='2000', periods=periods, freq='S') - self.assertEqual(len(idx1), periods) - - idx2 = pd.date_range(end='2000', periods=periods, freq='S') - self.assertEqual(len(idx2), periods) - def test_intersection(self): first = self.index second = self.index[5:] @@ -665,122 +118,6 @@ def test_union(self): result = first.union(case) self.assertTrue(tm.equalContents(result, everything)) - def test_nat(self): - self.assertIs(DatetimeIndex([np.nan])[0], pd.NaT) - - def test_ufunc_coercions(self): - idx = date_range('2011-01-01', periods=3, freq='2D', name='x') - - delta = np.timedelta64(1, 'D') - for result in [idx + delta, np.add(idx, delta)]: - tm.assertIsInstance(result, DatetimeIndex) - exp = date_range('2011-01-02', periods=3, freq='2D', name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, '2D') - - for result in [idx - delta, np.subtract(idx, delta)]: - tm.assertIsInstance(result, DatetimeIndex) - exp = date_range('2010-12-31', periods=3, freq='2D', name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, '2D') - - delta = np.array([np.timedelta64(1, 'D'), np.timedelta64(2, 'D'), - np.timedelta64(3, 'D')]) - for result in [idx + delta, np.add(idx, delta)]: - tm.assertIsInstance(result, DatetimeIndex) - exp = DatetimeIndex(['2011-01-02', '2011-01-05', '2011-01-08'], - freq='3D', name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, '3D') - - for result in [idx - delta, np.subtract(idx, delta)]: - tm.assertIsInstance(result, DatetimeIndex) - exp = DatetimeIndex(['2010-12-31', '2011-01-01', '2011-01-02'], - freq='D', name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, 'D') - - def test_fillna_datetime64(self): - # GH 11343 - for tz in ['US/Eastern', 'Asia/Tokyo']: - idx = pd.DatetimeIndex(['2011-01-01 09:00', pd.NaT, - '2011-01-01 11:00']) - - exp = pd.DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', - '2011-01-01 11:00']) - self.assert_index_equal( - idx.fillna(pd.Timestamp('2011-01-01 10:00')), exp) - - # tz mismatch - exp = pd.Index([pd.Timestamp('2011-01-01 09:00'), - pd.Timestamp('2011-01-01 10:00', tz=tz), - pd.Timestamp('2011-01-01 11:00')], dtype=object) - self.assert_index_equal( - idx.fillna(pd.Timestamp('2011-01-01 10:00', tz=tz)), exp) - - # object - exp = pd.Index([pd.Timestamp('2011-01-01 09:00'), 'x', - pd.Timestamp('2011-01-01 11:00')], dtype=object) - self.assert_index_equal(idx.fillna('x'), exp) - - idx = pd.DatetimeIndex(['2011-01-01 09:00', pd.NaT, - '2011-01-01 11:00'], tz=tz) - - exp = pd.DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', - '2011-01-01 11:00'], tz=tz) - self.assert_index_equal( - idx.fillna(pd.Timestamp('2011-01-01 10:00', tz=tz)), exp) - - exp = pd.Index([pd.Timestamp('2011-01-01 09:00', tz=tz), - pd.Timestamp('2011-01-01 10:00'), - pd.Timestamp('2011-01-01 11:00', tz=tz)], - dtype=object) - self.assert_index_equal( - idx.fillna(pd.Timestamp('2011-01-01 10:00')), exp) - - # object - exp = pd.Index([pd.Timestamp('2011-01-01 09:00', tz=tz), - 'x', - pd.Timestamp('2011-01-01 11:00', tz=tz)], - dtype=object) - self.assert_index_equal(idx.fillna('x'), exp) - - def test_difference_freq(self): - # GH14323: difference of DatetimeIndex should not preserve frequency - - index = date_range("20160920", "20160925", freq="D") - other = date_range("20160921", "20160924", freq="D") - expected = DatetimeIndex(["20160920", "20160925"], freq=None) - idx_diff = index.difference(other) - tm.assert_index_equal(idx_diff, expected) - tm.assert_attr_equal('freq', idx_diff, expected) - - other = date_range("20160922", "20160925", freq="D") - idx_diff = index.difference(other) - expected = DatetimeIndex(["20160920", "20160921"], freq=None) - tm.assert_index_equal(idx_diff, expected) - tm.assert_attr_equal('freq', idx_diff, expected) - - def test_week_of_month_frequency(self): - # GH 5348: "ValueError: Could not evaluate WOM-1SUN" shouldn't raise - d1 = date(2002, 9, 1) - d2 = date(2013, 10, 27) - d3 = date(2012, 9, 30) - idx1 = DatetimeIndex([d1, d2]) - idx2 = DatetimeIndex([d3]) - result_append = idx1.append(idx2) - expected = DatetimeIndex([d1, d2, d3]) - tm.assert_index_equal(result_append, expected) - result_union = idx1.union(idx2) - expected = DatetimeIndex([d1, d3, d2]) - tm.assert_index_equal(result_union, expected) - - # GH 5115 - result = date_range("2013-1-1", periods=4, freq='WOM-1SAT') - dates = ['2013-01-05', '2013-02-02', '2013-03-02', '2013-04-06'] - expected = DatetimeIndex(dates, freq='WOM-1SAT') - tm.assert_index_equal(result, expected) - class TestPeriodIndex(DatetimeLike, tm.TestCase): _holder = PeriodIndex diff --git a/pandas/tseries/tests/test_base.py b/pandas/tseries/tests/test_base.py index bca50237081e1..4f2ac3ff0d87e 100644 --- a/pandas/tseries/tests/test_base.py +++ b/pandas/tseries/tests/test_base.py @@ -1,5 +1,5 @@ from __future__ import print_function -from datetime import datetime, timedelta +from datetime import timedelta import numpy as np import pandas as pd from pandas import (Series, Index, Int64Index, Timestamp, Period, @@ -14,901 +14,6 @@ from pandas.tests.test_base import Ops -class TestDatetimeIndexOps(Ops): - tz = [None, 'UTC', 'Asia/Tokyo', 'US/Eastern', 'dateutil/Asia/Singapore', - 'dateutil/US/Pacific'] - - def setUp(self): - super(TestDatetimeIndexOps, self).setUp() - mask = lambda x: (isinstance(x, DatetimeIndex) or - isinstance(x, PeriodIndex)) - self.is_valid_objs = [o for o in self.objs if mask(o)] - self.not_valid_objs = [o for o in self.objs if not mask(o)] - - def test_ops_properties(self): - self.check_ops_properties( - ['year', 'month', 'day', 'hour', 'minute', 'second', 'weekofyear', - 'week', 'dayofweek', 'dayofyear', 'quarter']) - self.check_ops_properties(['date', 'time', 'microsecond', 'nanosecond', - 'is_month_start', 'is_month_end', - 'is_quarter_start', - 'is_quarter_end', 'is_year_start', - 'is_year_end', 'weekday_name'], - lambda x: isinstance(x, DatetimeIndex)) - - def test_ops_properties_basic(self): - - # sanity check that the behavior didn't change - # GH7206 - for op in ['year', 'day', 'second', 'weekday']: - self.assertRaises(TypeError, lambda x: getattr(self.dt_series, op)) - - # attribute access should still work! - s = Series(dict(year=2000, month=1, day=10)) - self.assertEqual(s.year, 2000) - self.assertEqual(s.month, 1) - self.assertEqual(s.day, 10) - self.assertRaises(AttributeError, lambda: s.weekday) - - def test_asobject_tolist(self): - idx = pd.date_range(start='2013-01-01', periods=4, freq='M', - name='idx') - expected_list = [Timestamp('2013-01-31'), - Timestamp('2013-02-28'), - Timestamp('2013-03-31'), - Timestamp('2013-04-30')] - expected = pd.Index(expected_list, dtype=object, name='idx') - result = idx.asobject - self.assertTrue(isinstance(result, Index)) - - self.assertEqual(result.dtype, object) - self.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(idx.tolist(), expected_list) - - idx = pd.date_range(start='2013-01-01', periods=4, freq='M', - name='idx', tz='Asia/Tokyo') - expected_list = [Timestamp('2013-01-31', tz='Asia/Tokyo'), - Timestamp('2013-02-28', tz='Asia/Tokyo'), - Timestamp('2013-03-31', tz='Asia/Tokyo'), - Timestamp('2013-04-30', tz='Asia/Tokyo')] - expected = pd.Index(expected_list, dtype=object, name='idx') - result = idx.asobject - self.assertTrue(isinstance(result, Index)) - self.assertEqual(result.dtype, object) - self.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(idx.tolist(), expected_list) - - idx = DatetimeIndex([datetime(2013, 1, 1), datetime(2013, 1, 2), - pd.NaT, datetime(2013, 1, 4)], name='idx') - expected_list = [Timestamp('2013-01-01'), - Timestamp('2013-01-02'), pd.NaT, - Timestamp('2013-01-04')] - expected = pd.Index(expected_list, dtype=object, name='idx') - result = idx.asobject - self.assertTrue(isinstance(result, Index)) - self.assertEqual(result.dtype, object) - self.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(idx.tolist(), expected_list) - - def test_minmax(self): - for tz in self.tz: - # monotonic - idx1 = pd.DatetimeIndex(['2011-01-01', '2011-01-02', - '2011-01-03'], tz=tz) - self.assertTrue(idx1.is_monotonic) - - # non-monotonic - idx2 = pd.DatetimeIndex(['2011-01-01', pd.NaT, '2011-01-03', - '2011-01-02', pd.NaT], tz=tz) - self.assertFalse(idx2.is_monotonic) - - for idx in [idx1, idx2]: - self.assertEqual(idx.min(), Timestamp('2011-01-01', tz=tz)) - self.assertEqual(idx.max(), Timestamp('2011-01-03', tz=tz)) - self.assertEqual(idx.argmin(), 0) - self.assertEqual(idx.argmax(), 2) - - for op in ['min', 'max']: - # Return NaT - obj = DatetimeIndex([]) - self.assertTrue(pd.isnull(getattr(obj, op)())) - - obj = DatetimeIndex([pd.NaT]) - self.assertTrue(pd.isnull(getattr(obj, op)())) - - obj = DatetimeIndex([pd.NaT, pd.NaT, pd.NaT]) - self.assertTrue(pd.isnull(getattr(obj, op)())) - - def test_numpy_minmax(self): - dr = pd.date_range(start='2016-01-15', end='2016-01-20') - - self.assertEqual(np.min(dr), - Timestamp('2016-01-15 00:00:00', freq='D')) - self.assertEqual(np.max(dr), - Timestamp('2016-01-20 00:00:00', freq='D')) - - errmsg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, errmsg, np.min, dr, out=0) - tm.assertRaisesRegexp(ValueError, errmsg, np.max, dr, out=0) - - self.assertEqual(np.argmin(dr), 0) - self.assertEqual(np.argmax(dr), 5) - - if not _np_version_under1p10: - errmsg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, errmsg, np.argmin, dr, out=0) - tm.assertRaisesRegexp(ValueError, errmsg, np.argmax, dr, out=0) - - def test_round(self): - for tz in self.tz: - rng = pd.date_range(start='2016-01-01', periods=5, - freq='30Min', tz=tz) - elt = rng[1] - - expected_rng = DatetimeIndex([ - Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'), - Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'), - Timestamp('2016-01-01 01:00:00', tz=tz, freq='30T'), - Timestamp('2016-01-01 02:00:00', tz=tz, freq='30T'), - Timestamp('2016-01-01 02:00:00', tz=tz, freq='30T'), - ]) - expected_elt = expected_rng[1] - - tm.assert_index_equal(rng.round(freq='H'), expected_rng) - self.assertEqual(elt.round(freq='H'), expected_elt) - - msg = pd.tseries.frequencies._INVALID_FREQ_ERROR - with tm.assertRaisesRegexp(ValueError, msg): - rng.round(freq='foo') - with tm.assertRaisesRegexp(ValueError, msg): - elt.round(freq='foo') - - msg = " is a non-fixed frequency" - tm.assertRaisesRegexp(ValueError, msg, rng.round, freq='M') - tm.assertRaisesRegexp(ValueError, msg, elt.round, freq='M') - - def test_repeat_range(self): - rng = date_range('1/1/2000', '1/1/2001') - - result = rng.repeat(5) - self.assertIsNone(result.freq) - self.assertEqual(len(result), 5 * len(rng)) - - for tz in self.tz: - index = pd.date_range('2001-01-01', periods=2, freq='D', tz=tz) - exp = pd.DatetimeIndex(['2001-01-01', '2001-01-01', - '2001-01-02', '2001-01-02'], tz=tz) - for res in [index.repeat(2), np.repeat(index, 2)]: - tm.assert_index_equal(res, exp) - self.assertIsNone(res.freq) - - index = pd.date_range('2001-01-01', periods=2, freq='2D', tz=tz) - exp = pd.DatetimeIndex(['2001-01-01', '2001-01-01', - '2001-01-03', '2001-01-03'], tz=tz) - for res in [index.repeat(2), np.repeat(index, 2)]: - tm.assert_index_equal(res, exp) - self.assertIsNone(res.freq) - - index = pd.DatetimeIndex(['2001-01-01', 'NaT', '2003-01-01'], - tz=tz) - exp = pd.DatetimeIndex(['2001-01-01', '2001-01-01', '2001-01-01', - 'NaT', 'NaT', 'NaT', - '2003-01-01', '2003-01-01', '2003-01-01'], - tz=tz) - for res in [index.repeat(3), np.repeat(index, 3)]: - tm.assert_index_equal(res, exp) - self.assertIsNone(res.freq) - - def test_repeat(self): - reps = 2 - msg = "the 'axis' parameter is not supported" - - for tz in self.tz: - rng = pd.date_range(start='2016-01-01', periods=2, - freq='30Min', tz=tz) - - expected_rng = DatetimeIndex([ - Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'), - Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'), - Timestamp('2016-01-01 00:30:00', tz=tz, freq='30T'), - Timestamp('2016-01-01 00:30:00', tz=tz, freq='30T'), - ]) - - res = rng.repeat(reps) - tm.assert_index_equal(res, expected_rng) - self.assertIsNone(res.freq) - - tm.assert_index_equal(np.repeat(rng, reps), expected_rng) - tm.assertRaisesRegexp(ValueError, msg, np.repeat, - rng, reps, axis=1) - - def test_representation(self): - - idx = [] - idx.append(DatetimeIndex([], freq='D')) - idx.append(DatetimeIndex(['2011-01-01'], freq='D')) - idx.append(DatetimeIndex(['2011-01-01', '2011-01-02'], freq='D')) - idx.append(DatetimeIndex( - ['2011-01-01', '2011-01-02', '2011-01-03'], freq='D')) - idx.append(DatetimeIndex( - ['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00' - ], freq='H', tz='Asia/Tokyo')) - idx.append(DatetimeIndex( - ['2011-01-01 09:00', '2011-01-01 10:00', pd.NaT], tz='US/Eastern')) - idx.append(DatetimeIndex( - ['2011-01-01 09:00', '2011-01-01 10:00', pd.NaT], tz='UTC')) - - exp = [] - exp.append("""DatetimeIndex([], dtype='datetime64[ns]', freq='D')""") - exp.append("DatetimeIndex(['2011-01-01'], dtype='datetime64[ns]', " - "freq='D')") - exp.append("DatetimeIndex(['2011-01-01', '2011-01-02'], " - "dtype='datetime64[ns]', freq='D')") - exp.append("DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], " - "dtype='datetime64[ns]', freq='D')") - exp.append("DatetimeIndex(['2011-01-01 09:00:00+09:00', " - "'2011-01-01 10:00:00+09:00', '2011-01-01 11:00:00+09:00']" - ", dtype='datetime64[ns, Asia/Tokyo]', freq='H')") - exp.append("DatetimeIndex(['2011-01-01 09:00:00-05:00', " - "'2011-01-01 10:00:00-05:00', 'NaT'], " - "dtype='datetime64[ns, US/Eastern]', freq=None)") - exp.append("DatetimeIndex(['2011-01-01 09:00:00+00:00', " - "'2011-01-01 10:00:00+00:00', 'NaT'], " - "dtype='datetime64[ns, UTC]', freq=None)""") - - with pd.option_context('display.width', 300): - for indx, expected in zip(idx, exp): - for func in ['__repr__', '__unicode__', '__str__']: - result = getattr(indx, func)() - self.assertEqual(result, expected) - - def test_representation_to_series(self): - idx1 = DatetimeIndex([], freq='D') - idx2 = DatetimeIndex(['2011-01-01'], freq='D') - idx3 = DatetimeIndex(['2011-01-01', '2011-01-02'], freq='D') - idx4 = DatetimeIndex( - ['2011-01-01', '2011-01-02', '2011-01-03'], freq='D') - idx5 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', - '2011-01-01 11:00'], freq='H', tz='Asia/Tokyo') - idx6 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', pd.NaT], - tz='US/Eastern') - idx7 = DatetimeIndex(['2011-01-01 09:00', '2011-01-02 10:15']) - - exp1 = """Series([], dtype: datetime64[ns])""" - - exp2 = """0 2011-01-01 -dtype: datetime64[ns]""" - - exp3 = """0 2011-01-01 -1 2011-01-02 -dtype: datetime64[ns]""" - - exp4 = """0 2011-01-01 -1 2011-01-02 -2 2011-01-03 -dtype: datetime64[ns]""" - - exp5 = """0 2011-01-01 09:00:00+09:00 -1 2011-01-01 10:00:00+09:00 -2 2011-01-01 11:00:00+09:00 -dtype: datetime64[ns, Asia/Tokyo]""" - - exp6 = """0 2011-01-01 09:00:00-05:00 -1 2011-01-01 10:00:00-05:00 -2 NaT -dtype: datetime64[ns, US/Eastern]""" - - exp7 = """0 2011-01-01 09:00:00 -1 2011-01-02 10:15:00 -dtype: datetime64[ns]""" - - with pd.option_context('display.width', 300): - for idx, expected in zip([idx1, idx2, idx3, idx4, - idx5, idx6, idx7], - [exp1, exp2, exp3, exp4, - exp5, exp6, exp7]): - result = repr(Series(idx)) - self.assertEqual(result, expected) - - def test_summary(self): - # GH9116 - idx1 = DatetimeIndex([], freq='D') - idx2 = DatetimeIndex(['2011-01-01'], freq='D') - idx3 = DatetimeIndex(['2011-01-01', '2011-01-02'], freq='D') - idx4 = DatetimeIndex( - ['2011-01-01', '2011-01-02', '2011-01-03'], freq='D') - idx5 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', - '2011-01-01 11:00'], - freq='H', tz='Asia/Tokyo') - idx6 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', pd.NaT], - tz='US/Eastern') - - exp1 = """DatetimeIndex: 0 entries -Freq: D""" - - exp2 = """DatetimeIndex: 1 entries, 2011-01-01 to 2011-01-01 -Freq: D""" - - exp3 = """DatetimeIndex: 2 entries, 2011-01-01 to 2011-01-02 -Freq: D""" - - exp4 = """DatetimeIndex: 3 entries, 2011-01-01 to 2011-01-03 -Freq: D""" - - exp5 = ("DatetimeIndex: 3 entries, 2011-01-01 09:00:00+09:00 " - "to 2011-01-01 11:00:00+09:00\n" - "Freq: H") - - exp6 = """DatetimeIndex: 3 entries, 2011-01-01 09:00:00-05:00 to NaT""" - - for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, idx6], - [exp1, exp2, exp3, exp4, exp5, exp6]): - result = idx.summary() - self.assertEqual(result, expected) - - def test_resolution(self): - for freq, expected in zip(['A', 'Q', 'M', 'D', 'H', 'T', - 'S', 'L', 'U'], - ['day', 'day', 'day', 'day', 'hour', - 'minute', 'second', 'millisecond', - 'microsecond']): - for tz in self.tz: - idx = pd.date_range(start='2013-04-01', periods=30, freq=freq, - tz=tz) - self.assertEqual(idx.resolution, expected) - - def test_union(self): - for tz in self.tz: - # union - rng1 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) - other1 = pd.date_range('1/6/2000', freq='D', periods=5, tz=tz) - expected1 = pd.date_range('1/1/2000', freq='D', periods=10, tz=tz) - - rng2 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) - other2 = pd.date_range('1/4/2000', freq='D', periods=5, tz=tz) - expected2 = pd.date_range('1/1/2000', freq='D', periods=8, tz=tz) - - rng3 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) - other3 = pd.DatetimeIndex([], tz=tz) - expected3 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) - - for rng, other, expected in [(rng1, other1, expected1), - (rng2, other2, expected2), - (rng3, other3, expected3)]: - - result_union = rng.union(other) - tm.assert_index_equal(result_union, expected) - - def test_add_iadd(self): - for tz in self.tz: - - # offset - offsets = [pd.offsets.Hour(2), timedelta(hours=2), - np.timedelta64(2, 'h'), Timedelta(hours=2)] - - for delta in offsets: - rng = pd.date_range('2000-01-01', '2000-02-01', tz=tz) - result = rng + delta - expected = pd.date_range('2000-01-01 02:00', - '2000-02-01 02:00', tz=tz) - tm.assert_index_equal(result, expected) - rng += delta - tm.assert_index_equal(rng, expected) - - # int - rng = pd.date_range('2000-01-01 09:00', freq='H', periods=10, - tz=tz) - result = rng + 1 - expected = pd.date_range('2000-01-01 10:00', freq='H', periods=10, - tz=tz) - tm.assert_index_equal(result, expected) - rng += 1 - tm.assert_index_equal(rng, expected) - - idx = DatetimeIndex(['2011-01-01', '2011-01-02']) - msg = "cannot add a datelike to a DatetimeIndex" - with tm.assertRaisesRegexp(TypeError, msg): - idx + Timestamp('2011-01-01') - - with tm.assertRaisesRegexp(TypeError, msg): - Timestamp('2011-01-01') + idx - - def test_add_dti_dti(self): - # previously performed setop (deprecated in 0.16.0), now raises - # TypeError (GH14164) - - dti = date_range('20130101', periods=3) - dti_tz = date_range('20130101', periods=3).tz_localize('US/Eastern') - - with tm.assertRaises(TypeError): - dti + dti - - with tm.assertRaises(TypeError): - dti_tz + dti_tz - - with tm.assertRaises(TypeError): - dti_tz + dti - - with tm.assertRaises(TypeError): - dti + dti_tz - - def test_difference(self): - for tz in self.tz: - # diff - rng1 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) - other1 = pd.date_range('1/6/2000', freq='D', periods=5, tz=tz) - expected1 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) - - rng2 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) - other2 = pd.date_range('1/4/2000', freq='D', periods=5, tz=tz) - expected2 = pd.date_range('1/1/2000', freq='D', periods=3, tz=tz) - - rng3 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) - other3 = pd.DatetimeIndex([], tz=tz) - expected3 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) - - for rng, other, expected in [(rng1, other1, expected1), - (rng2, other2, expected2), - (rng3, other3, expected3)]: - result_diff = rng.difference(other) - tm.assert_index_equal(result_diff, expected) - - def test_sub_isub(self): - for tz in self.tz: - - # offset - offsets = [pd.offsets.Hour(2), timedelta(hours=2), - np.timedelta64(2, 'h'), Timedelta(hours=2)] - - for delta in offsets: - rng = pd.date_range('2000-01-01', '2000-02-01', tz=tz) - expected = pd.date_range('1999-12-31 22:00', - '2000-01-31 22:00', tz=tz) - - result = rng - delta - tm.assert_index_equal(result, expected) - rng -= delta - tm.assert_index_equal(rng, expected) - - # int - rng = pd.date_range('2000-01-01 09:00', freq='H', periods=10, - tz=tz) - result = rng - 1 - expected = pd.date_range('2000-01-01 08:00', freq='H', periods=10, - tz=tz) - tm.assert_index_equal(result, expected) - rng -= 1 - tm.assert_index_equal(rng, expected) - - def test_sub_dti_dti(self): - # previously performed setop (deprecated in 0.16.0), now changed to - # return subtraction -> TimeDeltaIndex (GH ...) - - dti = date_range('20130101', periods=3) - dti_tz = date_range('20130101', periods=3).tz_localize('US/Eastern') - dti_tz2 = date_range('20130101', periods=3).tz_localize('UTC') - expected = TimedeltaIndex([0, 0, 0]) - - result = dti - dti - tm.assert_index_equal(result, expected) - - result = dti_tz - dti_tz - tm.assert_index_equal(result, expected) - - with tm.assertRaises(TypeError): - dti_tz - dti - - with tm.assertRaises(TypeError): - dti - dti_tz - - with tm.assertRaises(TypeError): - dti_tz - dti_tz2 - - # isub - dti -= dti - tm.assert_index_equal(dti, expected) - - # different length raises ValueError - dti1 = date_range('20130101', periods=3) - dti2 = date_range('20130101', periods=4) - with tm.assertRaises(ValueError): - dti1 - dti2 - - # NaN propagation - dti1 = DatetimeIndex(['2012-01-01', np.nan, '2012-01-03']) - dti2 = DatetimeIndex(['2012-01-02', '2012-01-03', np.nan]) - expected = TimedeltaIndex(['1 days', np.nan, np.nan]) - result = dti2 - dti1 - tm.assert_index_equal(result, expected) - - def test_sub_period(self): - # GH 13078 - # not supported, check TypeError - p = pd.Period('2011-01-01', freq='D') - - for freq in [None, 'D']: - idx = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], freq=freq) - - with tm.assertRaises(TypeError): - idx - p - - with tm.assertRaises(TypeError): - p - idx - - def test_comp_nat(self): - left = pd.DatetimeIndex([pd.Timestamp('2011-01-01'), pd.NaT, - pd.Timestamp('2011-01-03')]) - right = pd.DatetimeIndex([pd.NaT, pd.NaT, pd.Timestamp('2011-01-03')]) - - for l, r in [(left, right), (left.asobject, right.asobject)]: - result = l == r - expected = np.array([False, False, True]) - tm.assert_numpy_array_equal(result, expected) - - result = l != r - expected = np.array([True, True, False]) - tm.assert_numpy_array_equal(result, expected) - - expected = np.array([False, False, False]) - tm.assert_numpy_array_equal(l == pd.NaT, expected) - tm.assert_numpy_array_equal(pd.NaT == r, expected) - - expected = np.array([True, True, True]) - tm.assert_numpy_array_equal(l != pd.NaT, expected) - tm.assert_numpy_array_equal(pd.NaT != l, expected) - - expected = np.array([False, False, False]) - tm.assert_numpy_array_equal(l < pd.NaT, expected) - tm.assert_numpy_array_equal(pd.NaT > l, expected) - - def test_value_counts_unique(self): - # GH 7735 - for tz in self.tz: - idx = pd.date_range('2011-01-01 09:00', freq='H', periods=10) - # create repeated values, 'n'th element is repeated by n+1 times - idx = DatetimeIndex(np.repeat(idx.values, range(1, len(idx) + 1)), - tz=tz) - - exp_idx = pd.date_range('2011-01-01 18:00', freq='-1H', periods=10, - tz=tz) - expected = Series(range(10, 0, -1), index=exp_idx, dtype='int64') - - for obj in [idx, Series(idx)]: - tm.assert_series_equal(obj.value_counts(), expected) - - expected = pd.date_range('2011-01-01 09:00', freq='H', periods=10, - tz=tz) - tm.assert_index_equal(idx.unique(), expected) - - idx = DatetimeIndex(['2013-01-01 09:00', '2013-01-01 09:00', - '2013-01-01 09:00', '2013-01-01 08:00', - '2013-01-01 08:00', pd.NaT], tz=tz) - - exp_idx = DatetimeIndex(['2013-01-01 09:00', '2013-01-01 08:00'], - tz=tz) - expected = Series([3, 2], index=exp_idx) - - for obj in [idx, Series(idx)]: - tm.assert_series_equal(obj.value_counts(), expected) - - exp_idx = DatetimeIndex(['2013-01-01 09:00', '2013-01-01 08:00', - pd.NaT], tz=tz) - expected = Series([3, 2, 1], index=exp_idx) - - for obj in [idx, Series(idx)]: - tm.assert_series_equal(obj.value_counts(dropna=False), - expected) - - tm.assert_index_equal(idx.unique(), exp_idx) - - def test_nonunique_contains(self): - # GH 9512 - for idx in map(DatetimeIndex, - ([0, 1, 0], [0, 0, -1], [0, -1, -1], - ['2015', '2015', '2016'], ['2015', '2015', '2014'])): - tm.assertIn(idx[0], idx) - - def test_order(self): - # with freq - idx1 = DatetimeIndex(['2011-01-01', '2011-01-02', - '2011-01-03'], freq='D', name='idx') - idx2 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', - '2011-01-01 11:00'], freq='H', - tz='Asia/Tokyo', name='tzidx') - - for idx in [idx1, idx2]: - ordered = idx.sort_values() - self.assert_index_equal(ordered, idx) - self.assertEqual(ordered.freq, idx.freq) - - ordered = idx.sort_values(ascending=False) - expected = idx[::-1] - self.assert_index_equal(ordered, expected) - self.assertEqual(ordered.freq, expected.freq) - self.assertEqual(ordered.freq.n, -1) - - ordered, indexer = idx.sort_values(return_indexer=True) - self.assert_index_equal(ordered, idx) - self.assert_numpy_array_equal(indexer, - np.array([0, 1, 2]), - check_dtype=False) - self.assertEqual(ordered.freq, idx.freq) - - ordered, indexer = idx.sort_values(return_indexer=True, - ascending=False) - expected = idx[::-1] - self.assert_index_equal(ordered, expected) - self.assert_numpy_array_equal(indexer, - np.array([2, 1, 0]), - check_dtype=False) - self.assertEqual(ordered.freq, expected.freq) - self.assertEqual(ordered.freq.n, -1) - - # without freq - for tz in self.tz: - idx1 = DatetimeIndex(['2011-01-01', '2011-01-03', '2011-01-05', - '2011-01-02', '2011-01-01'], - tz=tz, name='idx1') - exp1 = DatetimeIndex(['2011-01-01', '2011-01-01', '2011-01-02', - '2011-01-03', '2011-01-05'], - tz=tz, name='idx1') - - idx2 = DatetimeIndex(['2011-01-01', '2011-01-03', '2011-01-05', - '2011-01-02', '2011-01-01'], - tz=tz, name='idx2') - - exp2 = DatetimeIndex(['2011-01-01', '2011-01-01', '2011-01-02', - '2011-01-03', '2011-01-05'], - tz=tz, name='idx2') - - idx3 = DatetimeIndex([pd.NaT, '2011-01-03', '2011-01-05', - '2011-01-02', pd.NaT], tz=tz, name='idx3') - exp3 = DatetimeIndex([pd.NaT, pd.NaT, '2011-01-02', '2011-01-03', - '2011-01-05'], tz=tz, name='idx3') - - for idx, expected in [(idx1, exp1), (idx2, exp2), (idx3, exp3)]: - ordered = idx.sort_values() - self.assert_index_equal(ordered, expected) - self.assertIsNone(ordered.freq) - - ordered = idx.sort_values(ascending=False) - self.assert_index_equal(ordered, expected[::-1]) - self.assertIsNone(ordered.freq) - - ordered, indexer = idx.sort_values(return_indexer=True) - self.assert_index_equal(ordered, expected) - - exp = np.array([0, 4, 3, 1, 2]) - self.assert_numpy_array_equal(indexer, exp, check_dtype=False) - self.assertIsNone(ordered.freq) - - ordered, indexer = idx.sort_values(return_indexer=True, - ascending=False) - self.assert_index_equal(ordered, expected[::-1]) - - exp = np.array([2, 1, 3, 4, 0]) - self.assert_numpy_array_equal(indexer, exp, check_dtype=False) - self.assertIsNone(ordered.freq) - - def test_getitem(self): - idx1 = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx') - idx2 = pd.date_range('2011-01-01', '2011-01-31', freq='D', - tz='Asia/Tokyo', name='idx') - - for idx in [idx1, idx2]: - result = idx[0] - self.assertEqual(result, Timestamp('2011-01-01', tz=idx.tz)) - - result = idx[0:5] - expected = pd.date_range('2011-01-01', '2011-01-05', freq='D', - tz=idx.tz, name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - - result = idx[0:10:2] - expected = pd.date_range('2011-01-01', '2011-01-09', freq='2D', - tz=idx.tz, name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - - result = idx[-20:-5:3] - expected = pd.date_range('2011-01-12', '2011-01-24', freq='3D', - tz=idx.tz, name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - - result = idx[4::-1] - expected = DatetimeIndex(['2011-01-05', '2011-01-04', '2011-01-03', - '2011-01-02', '2011-01-01'], - freq='-1D', tz=idx.tz, name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - - def test_drop_duplicates_metadata(self): - # GH 10115 - idx = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx') - result = idx.drop_duplicates() - self.assert_index_equal(idx, result) - self.assertEqual(idx.freq, result.freq) - - idx_dup = idx.append(idx) - self.assertIsNone(idx_dup.freq) # freq is reset - result = idx_dup.drop_duplicates() - self.assert_index_equal(idx, result) - self.assertIsNone(result.freq) - - def test_drop_duplicates(self): - # to check Index/Series compat - base = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx') - idx = base.append(base[:5]) - - res = idx.drop_duplicates() - tm.assert_index_equal(res, base) - res = Series(idx).drop_duplicates() - tm.assert_series_equal(res, Series(base)) - - res = idx.drop_duplicates(keep='last') - exp = base[5:].append(base[:5]) - tm.assert_index_equal(res, exp) - res = Series(idx).drop_duplicates(keep='last') - tm.assert_series_equal(res, Series(exp, index=np.arange(5, 36))) - - res = idx.drop_duplicates(keep=False) - tm.assert_index_equal(res, base[5:]) - res = Series(idx).drop_duplicates(keep=False) - tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31))) - - def test_take(self): - # GH 10295 - idx1 = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx') - idx2 = pd.date_range('2011-01-01', '2011-01-31', freq='D', - tz='Asia/Tokyo', name='idx') - - for idx in [idx1, idx2]: - result = idx.take([0]) - self.assertEqual(result, Timestamp('2011-01-01', tz=idx.tz)) - - result = idx.take([0, 1, 2]) - expected = pd.date_range('2011-01-01', '2011-01-03', freq='D', - tz=idx.tz, name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - - result = idx.take([0, 2, 4]) - expected = pd.date_range('2011-01-01', '2011-01-05', freq='2D', - tz=idx.tz, name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - - result = idx.take([7, 4, 1]) - expected = pd.date_range('2011-01-08', '2011-01-02', freq='-3D', - tz=idx.tz, name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - - result = idx.take([3, 2, 5]) - expected = DatetimeIndex(['2011-01-04', '2011-01-03', - '2011-01-06'], - freq=None, tz=idx.tz, name='idx') - self.assert_index_equal(result, expected) - self.assertIsNone(result.freq) - - result = idx.take([-3, 2, 5]) - expected = DatetimeIndex(['2011-01-29', '2011-01-03', - '2011-01-06'], - freq=None, tz=idx.tz, name='idx') - self.assert_index_equal(result, expected) - self.assertIsNone(result.freq) - - def test_take_invalid_kwargs(self): - idx = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx') - indices = [1, 6, 5, 9, 10, 13, 15, 3] - - msg = r"take\(\) got an unexpected keyword argument 'foo'" - tm.assertRaisesRegexp(TypeError, msg, idx.take, - indices, foo=2) - - msg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, idx.take, - indices, out=indices) - - msg = "the 'mode' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, idx.take, - indices, mode='clip') - - def test_infer_freq(self): - # GH 11018 - for freq in ['A', '2A', '-2A', 'Q', '-1Q', 'M', '-1M', 'D', '3D', - '-3D', 'W', '-1W', 'H', '2H', '-2H', 'T', '2T', 'S', - '-3S']: - idx = pd.date_range('2011-01-01 09:00:00', freq=freq, periods=10) - result = pd.DatetimeIndex(idx.asi8, freq='infer') - tm.assert_index_equal(idx, result) - self.assertEqual(result.freq, freq) - - def test_nat_new(self): - idx = pd.date_range('2011-01-01', freq='D', periods=5, name='x') - result = idx._nat_new() - exp = pd.DatetimeIndex([pd.NaT] * 5, name='x') - tm.assert_index_equal(result, exp) - - result = idx._nat_new(box=False) - exp = np.array([tslib.iNaT] * 5, dtype=np.int64) - tm.assert_numpy_array_equal(result, exp) - - def test_shift(self): - # GH 9903 - for tz in self.tz: - idx = pd.DatetimeIndex([], name='xxx', tz=tz) - tm.assert_index_equal(idx.shift(0, freq='H'), idx) - tm.assert_index_equal(idx.shift(3, freq='H'), idx) - - idx = pd.DatetimeIndex(['2011-01-01 10:00', '2011-01-01 11:00' - '2011-01-01 12:00'], name='xxx', tz=tz) - tm.assert_index_equal(idx.shift(0, freq='H'), idx) - exp = pd.DatetimeIndex(['2011-01-01 13:00', '2011-01-01 14:00' - '2011-01-01 15:00'], name='xxx', tz=tz) - tm.assert_index_equal(idx.shift(3, freq='H'), exp) - exp = pd.DatetimeIndex(['2011-01-01 07:00', '2011-01-01 08:00' - '2011-01-01 09:00'], name='xxx', tz=tz) - tm.assert_index_equal(idx.shift(-3, freq='H'), exp) - - def test_nat(self): - self.assertIs(pd.DatetimeIndex._na_value, pd.NaT) - self.assertIs(pd.DatetimeIndex([])._na_value, pd.NaT) - - for tz in [None, 'US/Eastern', 'UTC']: - idx = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], tz=tz) - self.assertTrue(idx._can_hold_na) - - tm.assert_numpy_array_equal(idx._isnan, np.array([False, False])) - self.assertFalse(idx.hasnans) - tm.assert_numpy_array_equal(idx._nan_idxs, - np.array([], dtype=np.intp)) - - idx = pd.DatetimeIndex(['2011-01-01', 'NaT'], tz=tz) - self.assertTrue(idx._can_hold_na) - - tm.assert_numpy_array_equal(idx._isnan, np.array([False, True])) - self.assertTrue(idx.hasnans) - tm.assert_numpy_array_equal(idx._nan_idxs, - np.array([1], dtype=np.intp)) - - def test_equals(self): - # GH 13107 - for tz in [None, 'UTC', 'US/Eastern', 'Asia/Tokyo']: - idx = pd.DatetimeIndex(['2011-01-01', '2011-01-02', 'NaT']) - self.assertTrue(idx.equals(idx)) - self.assertTrue(idx.equals(idx.copy())) - self.assertTrue(idx.equals(idx.asobject)) - self.assertTrue(idx.asobject.equals(idx)) - self.assertTrue(idx.asobject.equals(idx.asobject)) - self.assertFalse(idx.equals(list(idx))) - self.assertFalse(idx.equals(pd.Series(idx))) - - idx2 = pd.DatetimeIndex(['2011-01-01', '2011-01-02', 'NaT'], - tz='US/Pacific') - self.assertFalse(idx.equals(idx2)) - self.assertFalse(idx.equals(idx2.copy())) - self.assertFalse(idx.equals(idx2.asobject)) - self.assertFalse(idx.asobject.equals(idx2)) - self.assertFalse(idx.equals(list(idx2))) - self.assertFalse(idx.equals(pd.Series(idx2))) - - # same internal, different tz - idx3 = pd.DatetimeIndex._simple_new(idx.asi8, tz='US/Pacific') - tm.assert_numpy_array_equal(idx.asi8, idx3.asi8) - self.assertFalse(idx.equals(idx3)) - self.assertFalse(idx.equals(idx3.copy())) - self.assertFalse(idx.equals(idx3.asobject)) - self.assertFalse(idx.asobject.equals(idx3)) - self.assertFalse(idx.equals(list(idx3))) - self.assertFalse(idx.equals(pd.Series(idx3))) - - class TestTimedeltaIndexOps(Ops): def setUp(self): super(TestTimedeltaIndexOps, self).setUp() diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index b5daf1ac0ec68..ff6cc4bb9853c 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -3,7 +3,6 @@ import calendar import operator import sys -import warnings from datetime import datetime, time, timedelta from numpy.random import rand @@ -23,11 +22,9 @@ import pandas.util.testing as tm from pandas import ( Index, Series, DataFrame, isnull, date_range, Timestamp, Period, - DatetimeIndex, Int64Index, to_datetime, bdate_range, Float64Index, - NaT, timedelta_range, Timedelta, _np_version_under1p8, concat) + DatetimeIndex, to_datetime, bdate_range, Float64Index, + NaT, timedelta_range, Timedelta, concat) from pandas.compat import range, long, StringIO, lrange, lmap, zip, product -from pandas.compat.numpy import np_datetime64_compat -from pandas.core.common import PerformanceWarning from pandas.tslib import iNaT from pandas.util.testing import ( assert_frame_equal, assert_series_equal, assert_almost_equal, @@ -323,15 +320,6 @@ def test_dti_slicing(self): # don't carry freq through irregular slicing self.assertIsNone(dti2.freq) - def test_pass_datetimeindex_to_index(self): - # Bugs in #1396 - rng = date_range('1/1/2000', '3/1/2000') - idx = Index(rng, dtype=object) - - expected = Index(rng.to_pydatetime(), dtype=object) - - self.assert_numpy_array_equal(idx.values, expected.values) - def test_contiguous_boolean_preserve_freq(self): rng = date_range('1/1/2000', '3/1/2000', freq='B') @@ -2718,1247 +2706,6 @@ def test_dataframe_dtypes(self): to_datetime(df) -class TestDatetimeIndex(tm.TestCase): - _multiprocess_can_split_ = True - - def test_hash_error(self): - index = date_range('20010101', periods=10) - with tm.assertRaisesRegexp(TypeError, "unhashable type: %r" % - type(index).__name__): - hash(index) - - def test_stringified_slice_with_tz(self): - # GH2658 - import datetime - start = datetime.datetime.now() - idx = DatetimeIndex(start=start, freq="1d", periods=10) - df = DataFrame(lrange(10), index=idx) - df["2013-01-14 23:44:34.437768-05:00":] # no exception here - - def test_append_join_nondatetimeindex(self): - rng = date_range('1/1/2000', periods=10) - idx = Index(['a', 'b', 'c', 'd']) - - result = rng.append(idx) - tm.assertIsInstance(result[0], Timestamp) - - # it works - rng.join(idx, how='outer') - - def test_to_period_nofreq(self): - idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-04']) - self.assertRaises(ValueError, idx.to_period) - - idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03'], - freq='infer') - self.assertEqual(idx.freqstr, 'D') - expected = pd.PeriodIndex(['2000-01-01', '2000-01-02', - '2000-01-03'], freq='D') - tm.assert_index_equal(idx.to_period(), expected) - - # GH 7606 - idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03']) - self.assertEqual(idx.freqstr, None) - tm.assert_index_equal(idx.to_period(), expected) - - def test_000constructor_resolution(self): - # 2252 - t1 = Timestamp((1352934390 * 1000000000) + 1000000 + 1000 + 1) - idx = DatetimeIndex([t1]) - - self.assertEqual(idx.nanosecond[0], t1.nanosecond) - - def test_constructor_coverage(self): - rng = date_range('1/1/2000', periods=10.5) - exp = date_range('1/1/2000', periods=10) - tm.assert_index_equal(rng, exp) - - self.assertRaises(ValueError, DatetimeIndex, start='1/1/2000', - periods='foo', freq='D') - - self.assertRaises(ValueError, DatetimeIndex, start='1/1/2000', - end='1/10/2000') - - self.assertRaises(ValueError, DatetimeIndex, '1/1/2000') - - # generator expression - gen = (datetime(2000, 1, 1) + timedelta(i) for i in range(10)) - result = DatetimeIndex(gen) - expected = DatetimeIndex([datetime(2000, 1, 1) + timedelta(i) - for i in range(10)]) - tm.assert_index_equal(result, expected) - - # NumPy string array - strings = np.array(['2000-01-01', '2000-01-02', '2000-01-03']) - result = DatetimeIndex(strings) - expected = DatetimeIndex(strings.astype('O')) - tm.assert_index_equal(result, expected) - - from_ints = DatetimeIndex(expected.asi8) - tm.assert_index_equal(from_ints, expected) - - # string with NaT - strings = np.array(['2000-01-01', '2000-01-02', 'NaT']) - result = DatetimeIndex(strings) - expected = DatetimeIndex(strings.astype('O')) - tm.assert_index_equal(result, expected) - - from_ints = DatetimeIndex(expected.asi8) - tm.assert_index_equal(from_ints, expected) - - # non-conforming - self.assertRaises(ValueError, DatetimeIndex, - ['2000-01-01', '2000-01-02', '2000-01-04'], freq='D') - - self.assertRaises(ValueError, DatetimeIndex, start='2011-01-01', - freq='b') - self.assertRaises(ValueError, DatetimeIndex, end='2011-01-01', - freq='B') - self.assertRaises(ValueError, DatetimeIndex, periods=10, freq='D') - - def test_constructor_datetime64_tzformat(self): - # GH 6572 - tm._skip_if_no_pytz() - import pytz - # ISO 8601 format results in pytz.FixedOffset - for freq in ['AS', 'W-SUN']: - idx = date_range('2013-01-01T00:00:00-05:00', - '2016-01-01T23:59:59-05:00', freq=freq) - expected = date_range('2013-01-01T00:00:00', '2016-01-01T23:59:59', - freq=freq, tz=pytz.FixedOffset(-300)) - tm.assert_index_equal(idx, expected) - # Unable to use `US/Eastern` because of DST - expected_i8 = date_range('2013-01-01T00:00:00', - '2016-01-01T23:59:59', freq=freq, - tz='America/Lima') - self.assert_numpy_array_equal(idx.asi8, expected_i8.asi8) - - idx = date_range('2013-01-01T00:00:00+09:00', - '2016-01-01T23:59:59+09:00', freq=freq) - expected = date_range('2013-01-01T00:00:00', '2016-01-01T23:59:59', - freq=freq, tz=pytz.FixedOffset(540)) - tm.assert_index_equal(idx, expected) - expected_i8 = date_range('2013-01-01T00:00:00', - '2016-01-01T23:59:59', freq=freq, - tz='Asia/Tokyo') - self.assert_numpy_array_equal(idx.asi8, expected_i8.asi8) - - tm._skip_if_no_dateutil() - - # Non ISO 8601 format results in dateutil.tz.tzoffset - for freq in ['AS', 'W-SUN']: - idx = date_range('2013/1/1 0:00:00-5:00', '2016/1/1 23:59:59-5:00', - freq=freq) - expected = date_range('2013-01-01T00:00:00', '2016-01-01T23:59:59', - freq=freq, tz=pytz.FixedOffset(-300)) - tm.assert_index_equal(idx, expected) - # Unable to use `US/Eastern` because of DST - expected_i8 = date_range('2013-01-01T00:00:00', - '2016-01-01T23:59:59', freq=freq, - tz='America/Lima') - self.assert_numpy_array_equal(idx.asi8, expected_i8.asi8) - - idx = date_range('2013/1/1 0:00:00+9:00', - '2016/1/1 23:59:59+09:00', freq=freq) - expected = date_range('2013-01-01T00:00:00', '2016-01-01T23:59:59', - freq=freq, tz=pytz.FixedOffset(540)) - tm.assert_index_equal(idx, expected) - expected_i8 = date_range('2013-01-01T00:00:00', - '2016-01-01T23:59:59', freq=freq, - tz='Asia/Tokyo') - self.assert_numpy_array_equal(idx.asi8, expected_i8.asi8) - - def test_constructor_dtype(self): - - # passing a dtype with a tz should localize - idx = DatetimeIndex(['2013-01-01', '2013-01-02'], - dtype='datetime64[ns, US/Eastern]') - expected = DatetimeIndex(['2013-01-01', '2013-01-02'] - ).tz_localize('US/Eastern') - tm.assert_index_equal(idx, expected) - - idx = DatetimeIndex(['2013-01-01', '2013-01-02'], - tz='US/Eastern') - tm.assert_index_equal(idx, expected) - - # if we already have a tz and its not the same, then raise - idx = DatetimeIndex(['2013-01-01', '2013-01-02'], - dtype='datetime64[ns, US/Eastern]') - - self.assertRaises(ValueError, - lambda: DatetimeIndex(idx, - dtype='datetime64[ns]')) - - # this is effectively trying to convert tz's - self.assertRaises(TypeError, - lambda: DatetimeIndex(idx, - dtype='datetime64[ns, CET]')) - self.assertRaises(ValueError, - lambda: DatetimeIndex( - idx, tz='CET', - dtype='datetime64[ns, US/Eastern]')) - result = DatetimeIndex(idx, dtype='datetime64[ns, US/Eastern]') - tm.assert_index_equal(idx, result) - - def test_constructor_name(self): - idx = DatetimeIndex(start='2000-01-01', periods=1, freq='A', - name='TEST') - self.assertEqual(idx.name, 'TEST') - - def test_comparisons_coverage(self): - rng = date_range('1/1/2000', periods=10) - - # raise TypeError for now - self.assertRaises(TypeError, rng.__lt__, rng[3].value) - - result = rng == list(rng) - exp = rng == rng - self.assert_numpy_array_equal(result, exp) - - def test_comparisons_nat(self): - - fidx1 = pd.Index([1.0, np.nan, 3.0, np.nan, 5.0, 7.0]) - fidx2 = pd.Index([2.0, 3.0, np.nan, np.nan, 6.0, 7.0]) - - didx1 = pd.DatetimeIndex(['2014-01-01', pd.NaT, '2014-03-01', pd.NaT, - '2014-05-01', '2014-07-01']) - didx2 = pd.DatetimeIndex(['2014-02-01', '2014-03-01', pd.NaT, pd.NaT, - '2014-06-01', '2014-07-01']) - darr = np.array([np_datetime64_compat('2014-02-01 00:00Z'), - np_datetime64_compat('2014-03-01 00:00Z'), - np_datetime64_compat('nat'), np.datetime64('nat'), - np_datetime64_compat('2014-06-01 00:00Z'), - np_datetime64_compat('2014-07-01 00:00Z')]) - - if _np_version_under1p8: - # cannot test array because np.datetime('nat') returns today's date - cases = [(fidx1, fidx2), (didx1, didx2)] - else: - cases = [(fidx1, fidx2), (didx1, didx2), (didx1, darr)] - - # Check pd.NaT is handles as the same as np.nan - with tm.assert_produces_warning(None): - for idx1, idx2 in cases: - - result = idx1 < idx2 - expected = np.array([True, False, False, False, True, False]) - self.assert_numpy_array_equal(result, expected) - - result = idx2 > idx1 - expected = np.array([True, False, False, False, True, False]) - self.assert_numpy_array_equal(result, expected) - - result = idx1 <= idx2 - expected = np.array([True, False, False, False, True, True]) - self.assert_numpy_array_equal(result, expected) - - result = idx2 >= idx1 - expected = np.array([True, False, False, False, True, True]) - self.assert_numpy_array_equal(result, expected) - - result = idx1 == idx2 - expected = np.array([False, False, False, False, False, True]) - self.assert_numpy_array_equal(result, expected) - - result = idx1 != idx2 - expected = np.array([True, True, True, True, True, False]) - self.assert_numpy_array_equal(result, expected) - - with tm.assert_produces_warning(None): - for idx1, val in [(fidx1, np.nan), (didx1, pd.NaT)]: - result = idx1 < val - expected = np.array([False, False, False, False, False, False]) - self.assert_numpy_array_equal(result, expected) - result = idx1 > val - self.assert_numpy_array_equal(result, expected) - - result = idx1 <= val - self.assert_numpy_array_equal(result, expected) - result = idx1 >= val - self.assert_numpy_array_equal(result, expected) - - result = idx1 == val - self.assert_numpy_array_equal(result, expected) - - result = idx1 != val - expected = np.array([True, True, True, True, True, True]) - self.assert_numpy_array_equal(result, expected) - - # Check pd.NaT is handles as the same as np.nan - with tm.assert_produces_warning(None): - for idx1, val in [(fidx1, 3), (didx1, datetime(2014, 3, 1))]: - result = idx1 < val - expected = np.array([True, False, False, False, False, False]) - self.assert_numpy_array_equal(result, expected) - result = idx1 > val - expected = np.array([False, False, False, False, True, True]) - self.assert_numpy_array_equal(result, expected) - - result = idx1 <= val - expected = np.array([True, False, True, False, False, False]) - self.assert_numpy_array_equal(result, expected) - result = idx1 >= val - expected = np.array([False, False, True, False, True, True]) - self.assert_numpy_array_equal(result, expected) - - result = idx1 == val - expected = np.array([False, False, True, False, False, False]) - self.assert_numpy_array_equal(result, expected) - - result = idx1 != val - expected = np.array([True, True, False, True, True, True]) - self.assert_numpy_array_equal(result, expected) - - def test_map(self): - rng = date_range('1/1/2000', periods=10) - - f = lambda x: x.strftime('%Y%m%d') - result = rng.map(f) - exp = Index([f(x) for x in rng], dtype='= -1') - with tm.assertRaisesRegexp(ValueError, msg): - idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assertRaisesRegexp(ValueError, msg): - idx.take(np.array([1, 0, -5]), fill_value=True) - - with tm.assertRaises(IndexError): - idx.take(np.array([1, -5])) - - def test_take_fill_value_with_timezone(self): - idx = pd.DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01'], - name='xxx', tz='US/Eastern') - result = idx.take(np.array([1, 0, -1])) - expected = pd.DatetimeIndex(['2011-02-01', '2011-01-01', '2011-03-01'], - name='xxx', tz='US/Eastern') - tm.assert_index_equal(result, expected) - - # fill_value - result = idx.take(np.array([1, 0, -1]), fill_value=True) - expected = pd.DatetimeIndex(['2011-02-01', '2011-01-01', 'NaT'], - name='xxx', tz='US/Eastern') - tm.assert_index_equal(result, expected) - - # allow_fill=False - result = idx.take(np.array([1, 0, -1]), allow_fill=False, - fill_value=True) - expected = pd.DatetimeIndex(['2011-02-01', '2011-01-01', '2011-03-01'], - name='xxx', tz='US/Eastern') - tm.assert_index_equal(result, expected) - - msg = ('When allow_fill=True and fill_value is not None, ' - 'all indices must be >= -1') - with tm.assertRaisesRegexp(ValueError, msg): - idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assertRaisesRegexp(ValueError, msg): - idx.take(np.array([1, 0, -5]), fill_value=True) - - with tm.assertRaises(IndexError): - idx.take(np.array([1, -5])) - - def test_map_bug_1677(self): - index = DatetimeIndex(['2012-04-25 09:30:00.393000']) - f = index.asof - - result = index.map(f) - expected = Index([f(index[0])]) - tm.assert_index_equal(result, expected) - - def test_groupby_function_tuple_1677(self): - df = DataFrame(np.random.rand(100), - index=date_range("1/1/2000", periods=100)) - monthly_group = df.groupby(lambda x: (x.year, x.month)) - - result = monthly_group.mean() - tm.assertIsInstance(result.index[0], tuple) - - def test_append_numpy_bug_1681(self): - # another datetime64 bug - dr = date_range('2011/1/1', '2012/1/1', freq='W-FRI') - a = DataFrame() - c = DataFrame({'A': 'foo', 'B': dr}, index=dr) - - result = a.append(c) - self.assertTrue((result['B'] == dr).all()) - - def test_isin(self): - index = tm.makeDateIndex(4) - result = index.isin(index) - self.assertTrue(result.all()) - - result = index.isin(list(index)) - self.assertTrue(result.all()) - - assert_almost_equal(index.isin([index[2], 5]), - np.array([False, False, True, False])) - - def test_union(self): - i1 = Int64Index(np.arange(0, 20, 2)) - i2 = Int64Index(np.arange(10, 30, 2)) - result = i1.union(i2) - expected = Int64Index(np.arange(0, 30, 2)) - tm.assert_index_equal(result, expected) - - def test_union_with_DatetimeIndex(self): - i1 = Int64Index(np.arange(0, 20, 2)) - i2 = DatetimeIndex(start='2012-01-03 00:00:00', periods=10, freq='D') - i1.union(i2) # Works - i2.union(i1) # Fails with "AttributeError: can't set attribute" - - def test_time(self): - rng = pd.date_range('1/1/2000', freq='12min', periods=10) - result = pd.Index(rng).time - expected = [t.time() for t in rng] - self.assertTrue((result == expected).all()) - - def test_date(self): - rng = pd.date_range('1/1/2000', freq='12H', periods=10) - result = pd.Index(rng).date - expected = [t.date() for t in rng] - self.assertTrue((result == expected).all()) - - def test_does_not_convert_mixed_integer(self): - df = tm.makeCustomDataframe(10, 10, - data_gen_f=lambda *args, **kwargs: randn(), - r_idx_type='i', c_idx_type='dt') - cols = df.columns.join(df.index, how='outer') - joined = cols.join(df.columns) - self.assertEqual(cols.dtype, np.dtype('O')) - self.assertEqual(cols.dtype, joined.dtype) - tm.assert_numpy_array_equal(cols.values, joined.values) - - def test_slice_keeps_name(self): - # GH4226 - st = pd.Timestamp('2013-07-01 00:00:00', tz='America/Los_Angeles') - et = pd.Timestamp('2013-07-02 00:00:00', tz='America/Los_Angeles') - dr = pd.date_range(st, et, freq='H', name='timebucket') - self.assertEqual(dr[1:].name, dr.name) - - def test_join_self(self): - index = date_range('1/1/2000', periods=10) - kinds = 'outer', 'inner', 'left', 'right' - for kind in kinds: - joined = index.join(index, how=kind) - self.assertIs(index, joined) - - def assert_index_parameters(self, index): - assert index.freq == '40960N' - assert index.inferred_freq == '40960N' - - def test_ns_index(self): - nsamples = 400 - ns = int(1e9 / 24414) - dtstart = np.datetime64('2012-09-20T00:00:00') - - dt = dtstart + np.arange(nsamples) * np.timedelta64(ns, 'ns') - freq = ns * offsets.Nano() - index = pd.DatetimeIndex(dt, freq=freq, name='time') - self.assert_index_parameters(index) - - new_index = pd.DatetimeIndex(start=index[0], end=index[-1], - freq=index.freq) - self.assert_index_parameters(new_index) - - def test_join_with_period_index(self): - df = tm.makeCustomDataframe( - 10, 10, data_gen_f=lambda *args: np.random.randint(2), - c_idx_type='p', r_idx_type='dt') - s = df.iloc[:5, 0] - joins = 'left', 'right', 'inner', 'outer' - - for join in joins: - with tm.assertRaisesRegexp(ValueError, 'can only call with other ' - 'PeriodIndex-ed objects'): - df.columns.join(s.index, how=join) - - def test_factorize(self): - idx1 = DatetimeIndex(['2014-01', '2014-01', '2014-02', '2014-02', - '2014-03', '2014-03']) - - exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.intp) - exp_idx = DatetimeIndex(['2014-01', '2014-02', '2014-03']) - - arr, idx = idx1.factorize() - self.assert_numpy_array_equal(arr, exp_arr) - tm.assert_index_equal(idx, exp_idx) - - arr, idx = idx1.factorize(sort=True) - self.assert_numpy_array_equal(arr, exp_arr) - tm.assert_index_equal(idx, exp_idx) - - # tz must be preserved - idx1 = idx1.tz_localize('Asia/Tokyo') - exp_idx = exp_idx.tz_localize('Asia/Tokyo') - - arr, idx = idx1.factorize() - self.assert_numpy_array_equal(arr, exp_arr) - tm.assert_index_equal(idx, exp_idx) - - idx2 = pd.DatetimeIndex(['2014-03', '2014-03', '2014-02', '2014-01', - '2014-03', '2014-01']) - - exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.intp) - exp_idx = DatetimeIndex(['2014-01', '2014-02', '2014-03']) - arr, idx = idx2.factorize(sort=True) - self.assert_numpy_array_equal(arr, exp_arr) - tm.assert_index_equal(idx, exp_idx) - - exp_arr = np.array([0, 0, 1, 2, 0, 2], dtype=np.intp) - exp_idx = DatetimeIndex(['2014-03', '2014-02', '2014-01']) - arr, idx = idx2.factorize() - self.assert_numpy_array_equal(arr, exp_arr) - tm.assert_index_equal(idx, exp_idx) - - # freq must be preserved - idx3 = date_range('2000-01', periods=4, freq='M', tz='Asia/Tokyo') - exp_arr = np.array([0, 1, 2, 3], dtype=np.intp) - arr, idx = idx3.factorize() - self.assert_numpy_array_equal(arr, exp_arr) - tm.assert_index_equal(idx, idx3) - - def test_factorize_tz(self): - # GH 13750 - for tz in [None, 'UTC', 'US/Eastern', 'Asia/Tokyo']: - base = pd.date_range('2016-11-05', freq='H', periods=100, tz=tz) - idx = base.repeat(5) - - exp_arr = np.arange(100, dtype=np.intp).repeat(5) - - for obj in [idx, pd.Series(idx)]: - arr, res = obj.factorize() - self.assert_numpy_array_equal(arr, exp_arr) - tm.assert_index_equal(res, base) - - def test_factorize_dst(self): - # GH 13750 - idx = pd.date_range('2016-11-06', freq='H', periods=12, - tz='US/Eastern') - - for obj in [idx, pd.Series(idx)]: - arr, res = obj.factorize() - self.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp)) - tm.assert_index_equal(res, idx) - - idx = pd.date_range('2016-06-13', freq='H', periods=12, - tz='US/Eastern') - - for obj in [idx, pd.Series(idx)]: - arr, res = obj.factorize() - self.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp)) - tm.assert_index_equal(res, idx) - - def test_slice_with_negative_step(self): - ts = Series(np.arange(20), - date_range('2014-01-01', periods=20, freq='MS')) - SLC = pd.IndexSlice - - def assert_slices_equivalent(l_slc, i_slc): - assert_series_equal(ts[l_slc], ts.iloc[i_slc]) - assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc]) - assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc]) - - assert_slices_equivalent(SLC[Timestamp('2014-10-01')::-1], SLC[9::-1]) - assert_slices_equivalent(SLC['2014-10-01'::-1], SLC[9::-1]) - - assert_slices_equivalent(SLC[:Timestamp('2014-10-01'):-1], SLC[:8:-1]) - assert_slices_equivalent(SLC[:'2014-10-01':-1], SLC[:8:-1]) - - assert_slices_equivalent(SLC['2015-02-01':'2014-10-01':-1], - SLC[13:8:-1]) - assert_slices_equivalent(SLC[Timestamp('2015-02-01'):Timestamp( - '2014-10-01'):-1], SLC[13:8:-1]) - assert_slices_equivalent(SLC['2015-02-01':Timestamp('2014-10-01'):-1], - SLC[13:8:-1]) - assert_slices_equivalent(SLC[Timestamp('2015-02-01'):'2014-10-01':-1], - SLC[13:8:-1]) - - assert_slices_equivalent(SLC['2014-10-01':'2015-02-01':-1], SLC[:0]) - - def test_slice_with_zero_step_raises(self): - ts = Series(np.arange(20), - date_range('2014-01-01', periods=20, freq='MS')) - self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: ts[::0]) - self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: ts.loc[::0]) - self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: ts.loc[::0]) - - def test_slice_bounds_empty(self): - # GH 14354 - empty_idx = DatetimeIndex(freq='1H', periods=0, end='2015') - - right = empty_idx._maybe_cast_slice_bound('2015-01-02', 'right', 'loc') - exp = Timestamp('2015-01-02 23:59:59.999999999') - self.assertEqual(right, exp) - - left = empty_idx._maybe_cast_slice_bound('2015-01-02', 'left', 'loc') - exp = Timestamp('2015-01-02 00:00:00') - self.assertEqual(left, exp) - - class TestDatetime64(tm.TestCase): """ Also test support for datetime64[ns] in Series / DataFrame @@ -3969,152 +2716,6 @@ def setUp(self): end=datetime(2005, 1, 10), freq='Min') self.series = Series(rand(len(dti)), dti) - def test_datetimeindex_accessors(self): - dti = DatetimeIndex(freq='D', start=datetime(1998, 1, 1), periods=365) - - self.assertEqual(dti.year[0], 1998) - self.assertEqual(dti.month[0], 1) - self.assertEqual(dti.day[0], 1) - self.assertEqual(dti.hour[0], 0) - self.assertEqual(dti.minute[0], 0) - self.assertEqual(dti.second[0], 0) - self.assertEqual(dti.microsecond[0], 0) - self.assertEqual(dti.dayofweek[0], 3) - - self.assertEqual(dti.dayofyear[0], 1) - self.assertEqual(dti.dayofyear[120], 121) - - self.assertEqual(dti.weekofyear[0], 1) - self.assertEqual(dti.weekofyear[120], 18) - - self.assertEqual(dti.quarter[0], 1) - self.assertEqual(dti.quarter[120], 2) - - self.assertEqual(dti.days_in_month[0], 31) - self.assertEqual(dti.days_in_month[90], 30) - - self.assertEqual(dti.is_month_start[0], True) - self.assertEqual(dti.is_month_start[1], False) - self.assertEqual(dti.is_month_start[31], True) - self.assertEqual(dti.is_quarter_start[0], True) - self.assertEqual(dti.is_quarter_start[90], True) - self.assertEqual(dti.is_year_start[0], True) - self.assertEqual(dti.is_year_start[364], False) - self.assertEqual(dti.is_month_end[0], False) - self.assertEqual(dti.is_month_end[30], True) - self.assertEqual(dti.is_month_end[31], False) - self.assertEqual(dti.is_month_end[364], True) - self.assertEqual(dti.is_quarter_end[0], False) - self.assertEqual(dti.is_quarter_end[30], False) - self.assertEqual(dti.is_quarter_end[89], True) - self.assertEqual(dti.is_quarter_end[364], True) - self.assertEqual(dti.is_year_end[0], False) - self.assertEqual(dti.is_year_end[364], True) - - # GH 11128 - self.assertEqual(dti.weekday_name[4], u'Monday') - self.assertEqual(dti.weekday_name[5], u'Tuesday') - self.assertEqual(dti.weekday_name[6], u'Wednesday') - self.assertEqual(dti.weekday_name[7], u'Thursday') - self.assertEqual(dti.weekday_name[8], u'Friday') - self.assertEqual(dti.weekday_name[9], u'Saturday') - self.assertEqual(dti.weekday_name[10], u'Sunday') - - self.assertEqual(Timestamp('2016-04-04').weekday_name, u'Monday') - self.assertEqual(Timestamp('2016-04-05').weekday_name, u'Tuesday') - self.assertEqual(Timestamp('2016-04-06').weekday_name, u'Wednesday') - self.assertEqual(Timestamp('2016-04-07').weekday_name, u'Thursday') - self.assertEqual(Timestamp('2016-04-08').weekday_name, u'Friday') - self.assertEqual(Timestamp('2016-04-09').weekday_name, u'Saturday') - self.assertEqual(Timestamp('2016-04-10').weekday_name, u'Sunday') - - self.assertEqual(len(dti.year), 365) - self.assertEqual(len(dti.month), 365) - self.assertEqual(len(dti.day), 365) - self.assertEqual(len(dti.hour), 365) - self.assertEqual(len(dti.minute), 365) - self.assertEqual(len(dti.second), 365) - self.assertEqual(len(dti.microsecond), 365) - self.assertEqual(len(dti.dayofweek), 365) - self.assertEqual(len(dti.dayofyear), 365) - self.assertEqual(len(dti.weekofyear), 365) - self.assertEqual(len(dti.quarter), 365) - self.assertEqual(len(dti.is_month_start), 365) - self.assertEqual(len(dti.is_month_end), 365) - self.assertEqual(len(dti.is_quarter_start), 365) - self.assertEqual(len(dti.is_quarter_end), 365) - self.assertEqual(len(dti.is_year_start), 365) - self.assertEqual(len(dti.is_year_end), 365) - self.assertEqual(len(dti.weekday_name), 365) - - dti = DatetimeIndex(freq='BQ-FEB', start=datetime(1998, 1, 1), - periods=4) - - self.assertEqual(sum(dti.is_quarter_start), 0) - self.assertEqual(sum(dti.is_quarter_end), 4) - self.assertEqual(sum(dti.is_year_start), 0) - self.assertEqual(sum(dti.is_year_end), 1) - - # Ensure is_start/end accessors throw ValueError for CustomBusinessDay, - # CBD requires np >= 1.7 - bday_egypt = offsets.CustomBusinessDay(weekmask='Sun Mon Tue Wed Thu') - dti = date_range(datetime(2013, 4, 30), periods=5, freq=bday_egypt) - self.assertRaises(ValueError, lambda: dti.is_month_start) - - dti = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03']) - - self.assertEqual(dti.is_month_start[0], 1) - - tests = [ - (Timestamp('2013-06-01', freq='M').is_month_start, 1), - (Timestamp('2013-06-01', freq='BM').is_month_start, 0), - (Timestamp('2013-06-03', freq='M').is_month_start, 0), - (Timestamp('2013-06-03', freq='BM').is_month_start, 1), - (Timestamp('2013-02-28', freq='Q-FEB').is_month_end, 1), - (Timestamp('2013-02-28', freq='Q-FEB').is_quarter_end, 1), - (Timestamp('2013-02-28', freq='Q-FEB').is_year_end, 1), - (Timestamp('2013-03-01', freq='Q-FEB').is_month_start, 1), - (Timestamp('2013-03-01', freq='Q-FEB').is_quarter_start, 1), - (Timestamp('2013-03-01', freq='Q-FEB').is_year_start, 1), - (Timestamp('2013-03-31', freq='QS-FEB').is_month_end, 1), - (Timestamp('2013-03-31', freq='QS-FEB').is_quarter_end, 0), - (Timestamp('2013-03-31', freq='QS-FEB').is_year_end, 0), - (Timestamp('2013-02-01', freq='QS-FEB').is_month_start, 1), - (Timestamp('2013-02-01', freq='QS-FEB').is_quarter_start, 1), - (Timestamp('2013-02-01', freq='QS-FEB').is_year_start, 1), - (Timestamp('2013-06-30', freq='BQ').is_month_end, 0), - (Timestamp('2013-06-30', freq='BQ').is_quarter_end, 0), - (Timestamp('2013-06-30', freq='BQ').is_year_end, 0), - (Timestamp('2013-06-28', freq='BQ').is_month_end, 1), - (Timestamp('2013-06-28', freq='BQ').is_quarter_end, 1), - (Timestamp('2013-06-28', freq='BQ').is_year_end, 0), - (Timestamp('2013-06-30', freq='BQS-APR').is_month_end, 0), - (Timestamp('2013-06-30', freq='BQS-APR').is_quarter_end, 0), - (Timestamp('2013-06-30', freq='BQS-APR').is_year_end, 0), - (Timestamp('2013-06-28', freq='BQS-APR').is_month_end, 1), - (Timestamp('2013-06-28', freq='BQS-APR').is_quarter_end, 1), - (Timestamp('2013-03-29', freq='BQS-APR').is_year_end, 1), - (Timestamp('2013-11-01', freq='AS-NOV').is_year_start, 1), - (Timestamp('2013-10-31', freq='AS-NOV').is_year_end, 1), - (Timestamp('2012-02-01').days_in_month, 29), - (Timestamp('2013-02-01').days_in_month, 28)] - - for ts, value in tests: - self.assertEqual(ts, value) - - def test_nanosecond_field(self): - dti = DatetimeIndex(np.arange(10)) - - self.assert_numpy_array_equal(dti.nanosecond, - np.arange(10, dtype=np.int32)) - - def test_datetimeindex_diff(self): - dti1 = DatetimeIndex(freq='Q-JAN', start=datetime(1997, 12, 31), - periods=100) - dti2 = DatetimeIndex(freq='Q-JAN', start=datetime(1997, 12, 31), - periods=98) - self.assertEqual(len(dti1.difference(dti2)), 2) - def test_fancy_getitem(self): dti = DatetimeIndex(freq='WOM-1FRI', start=datetime(2005, 1, 1), end=datetime(2010, 1, 1)) @@ -4143,87 +2744,6 @@ def test_fancy_setitem(self): s['1/2/2009':'2009-06-05'] = -3 self.assertTrue((s[48:54] == -3).all()) - def test_datetimeindex_constructor(self): - arr = ['1/1/2005', '1/2/2005', 'Jn 3, 2005', '2005-01-04'] - self.assertRaises(Exception, DatetimeIndex, arr) - - arr = ['1/1/2005', '1/2/2005', '1/3/2005', '2005-01-04'] - idx1 = DatetimeIndex(arr) - - arr = [datetime(2005, 1, 1), '1/2/2005', '1/3/2005', '2005-01-04'] - idx2 = DatetimeIndex(arr) - - arr = [lib.Timestamp(datetime(2005, 1, 1)), '1/2/2005', '1/3/2005', - '2005-01-04'] - idx3 = DatetimeIndex(arr) - - arr = np.array(['1/1/2005', '1/2/2005', '1/3/2005', - '2005-01-04'], dtype='O') - idx4 = DatetimeIndex(arr) - - arr = to_datetime(['1/1/2005', '1/2/2005', '1/3/2005', '2005-01-04']) - idx5 = DatetimeIndex(arr) - - arr = to_datetime(['1/1/2005', '1/2/2005', 'Jan 3, 2005', '2005-01-04' - ]) - idx6 = DatetimeIndex(arr) - - idx7 = DatetimeIndex(['12/05/2007', '25/01/2008'], dayfirst=True) - idx8 = DatetimeIndex(['2007/05/12', '2008/01/25'], dayfirst=False, - yearfirst=True) - tm.assert_index_equal(idx7, idx8) - - for other in [idx2, idx3, idx4, idx5, idx6]: - self.assertTrue((idx1.values == other.values).all()) - - sdate = datetime(1999, 12, 25) - edate = datetime(2000, 1, 1) - idx = DatetimeIndex(start=sdate, freq='1B', periods=20) - self.assertEqual(len(idx), 20) - self.assertEqual(idx[0], sdate + 0 * offsets.BDay()) - self.assertEqual(idx.freq, 'B') - - idx = DatetimeIndex(end=edate, freq=('D', 5), periods=20) - self.assertEqual(len(idx), 20) - self.assertEqual(idx[-1], edate) - self.assertEqual(idx.freq, '5D') - - idx1 = DatetimeIndex(start=sdate, end=edate, freq='W-SUN') - idx2 = DatetimeIndex(start=sdate, end=edate, - freq=offsets.Week(weekday=6)) - self.assertEqual(len(idx1), len(idx2)) - self.assertEqual(idx1.offset, idx2.offset) - - idx1 = DatetimeIndex(start=sdate, end=edate, freq='QS') - idx2 = DatetimeIndex(start=sdate, end=edate, - freq=offsets.QuarterBegin(startingMonth=1)) - self.assertEqual(len(idx1), len(idx2)) - self.assertEqual(idx1.offset, idx2.offset) - - idx1 = DatetimeIndex(start=sdate, end=edate, freq='BQ') - idx2 = DatetimeIndex(start=sdate, end=edate, - freq=offsets.BQuarterEnd(startingMonth=12)) - self.assertEqual(len(idx1), len(idx2)) - self.assertEqual(idx1.offset, idx2.offset) - - def test_dayfirst(self): - # GH 5917 - arr = ['10/02/2014', '11/02/2014', '12/02/2014'] - expected = DatetimeIndex([datetime(2014, 2, 10), datetime(2014, 2, 11), - datetime(2014, 2, 12)]) - idx1 = DatetimeIndex(arr, dayfirst=True) - idx2 = DatetimeIndex(np.array(arr), dayfirst=True) - idx3 = to_datetime(arr, dayfirst=True) - idx4 = to_datetime(np.array(arr), dayfirst=True) - idx5 = DatetimeIndex(Index(arr), dayfirst=True) - idx6 = DatetimeIndex(Series(arr), dayfirst=True) - tm.assert_index_equal(expected, idx1) - tm.assert_index_equal(expected, idx2) - tm.assert_index_equal(expected, idx3) - tm.assert_index_equal(expected, idx4) - tm.assert_index_equal(expected, idx5) - tm.assert_index_equal(expected, idx6) - def test_dti_snap(self): dti = DatetimeIndex(['1/1/2002', '1/2/2002', '1/3/2002', '1/4/2002', '1/5/2002', '1/6/2002', '1/7/2002'], freq='D') @@ -4255,43 +2775,6 @@ def test_dti_reset_index_round_trip(self): self.assertEqual(df.index[0], stamp) self.assertEqual(df.reset_index()['Date'][0], stamp) - def test_dti_set_index_reindex(self): - # GH 6631 - df = DataFrame(np.random.random(6)) - idx1 = date_range('2011/01/01', periods=6, freq='M', tz='US/Eastern') - idx2 = date_range('2013', periods=6, freq='A', tz='Asia/Tokyo') - - df = df.set_index(idx1) - tm.assert_index_equal(df.index, idx1) - df = df.reindex(idx2) - tm.assert_index_equal(df.index, idx2) - - # 11314 - # with tz - index = date_range(datetime(2015, 10, 1), - datetime(2015, 10, 1, 23), - freq='H', tz='US/Eastern') - df = DataFrame(np.random.randn(24, 1), columns=['a'], index=index) - new_index = date_range(datetime(2015, 10, 2), - datetime(2015, 10, 2, 23), - freq='H', tz='US/Eastern') - - # TODO: unused? - result = df.set_index(new_index) # noqa - - self.assertEqual(new_index.freq, index.freq) - - def test_datetimeindex_union_join_empty(self): - dti = DatetimeIndex(start='1/1/2001', end='2/1/2001', freq='D') - empty = Index([]) - - result = dti.union(empty) - tm.assertIsInstance(result, DatetimeIndex) - self.assertIs(result, result) - - result = dti.join(empty) - tm.assertIsInstance(result, DatetimeIndex) - def test_series_set_value(self): # #1561 diff --git a/setup.py b/setup.py index 0c4dd33a70482..2ba4331aa1561 100755 --- a/setup.py +++ b/setup.py @@ -640,6 +640,7 @@ def pxd(name): 'pandas.tests', 'pandas.tests.frame', 'pandas.tests.indexes', + 'pandas.tests.indexes.datetimes', 'pandas.tests.groupby', 'pandas.tests.series', 'pandas.tests.formats', From df9fc4f17d342cc75b596918b1da1ecaaf0ae54f Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 2 Feb 2017 20:18:13 -0500 Subject: [PATCH 009/353] TST: create the pandas/tests/scalar directory structure --- pandas/tests/scalar/__init__.py | 0 pandas/tests/scalar/test_timedelta.py | 1 + pandas/tests/scalar/test_timestamp.py | 1 + setup.py | 1 + 4 files changed, 3 insertions(+) create mode 100644 pandas/tests/scalar/__init__.py create mode 100644 pandas/tests/scalar/test_timedelta.py create mode 100644 pandas/tests/scalar/test_timestamp.py diff --git a/pandas/tests/scalar/__init__.py b/pandas/tests/scalar/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/scalar/test_timedelta.py b/pandas/tests/scalar/test_timedelta.py new file mode 100644 index 0000000000000..fab790c4bf948 --- /dev/null +++ b/pandas/tests/scalar/test_timedelta.py @@ -0,0 +1 @@ +""" test the scalar Timedelta """ diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py new file mode 100644 index 0000000000000..2159c59de72ce --- /dev/null +++ b/pandas/tests/scalar/test_timestamp.py @@ -0,0 +1 @@ +""" test the scalar Timestamp """ diff --git a/setup.py b/setup.py index 2ba4331aa1561..93a044bc3cc7d 100755 --- a/setup.py +++ b/setup.py @@ -644,6 +644,7 @@ def pxd(name): 'pandas.tests.groupby', 'pandas.tests.series', 'pandas.tests.formats', + 'pandas.tests.scalar', 'pandas.tests.types', 'pandas.tests.test_msgpack', 'pandas.tests.plotting', From 9ddba8dc811184c49d9bc4df5dc97d505345ec23 Mon Sep 17 00:00:00 2001 From: Kacawi Date: Fri, 3 Feb 2017 14:50:25 +0100 Subject: [PATCH 010/353] Added a tutorial for pandas dataframes (#15295) --- doc/source/tutorials.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/tutorials.rst b/doc/source/tutorials.rst index c1c1c81915c46..2489b787560d0 100644 --- a/doc/source/tutorials.rst +++ b/doc/source/tutorials.rst @@ -177,3 +177,4 @@ Various Tutorials - `Intro to pandas data structures, by Greg Reda `_ - `Pandas and Python: Top 10, by Manish Amde `_ - `Pandas Tutorial, by Mikhail Semeniuk `_ +- `Pandas DataFrames Tutorial, by Karlijn Willems `_ From 69a9b05b23819dc5b6ccc79a41be80e4697b6eea Mon Sep 17 00:00:00 2001 From: TrigonaMinima Date: Sat, 4 Feb 2017 11:02:30 -0500 Subject: [PATCH 011/353] TST: Timestamp and Timeseries tests reorg (gh14854) xref partial on #14854 Author: TrigonaMinima Closes #15301 from TrigonaMinima/gh14854-timestamp and squashes the following commits: d8e3f4d [TrigonaMinima] splitting test_timeseries.py further 4072d93 [TrigonaMinima] TST: tseries/tests/test_timeseries.py tests moved to appropriate places. dbfd2ba [TrigonaMinima] TST: Timestamp tests compiled (gh14854) --- .../indexes/datetimes/test_construction.py | 69 +- .../indexes/datetimes/test_date_range.py | 112 + .../tests/indexes/datetimes/test_datetime.py | 68 - pandas/tests/indexes/datetimes/test_misc.py | 214 +- pandas/tests/indexes/test_timedelta.py | 43 + pandas/tests/scalar/test_timestamp.py | 1577 +++++++ pandas/tests/series/test_missing.py | 103 + pandas/tests/series/test_timeseries.py | 3147 ++++++++++++- pandas/tseries/tests/test_timeseries.py | 4184 ----------------- pandas/tseries/tests/test_tslib.py | 869 +--- 10 files changed, 5183 insertions(+), 5203 deletions(-) create mode 100644 pandas/tests/indexes/datetimes/test_date_range.py create mode 100644 pandas/tests/indexes/test_timedelta.py delete mode 100644 pandas/tseries/tests/test_timeseries.py diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index ae4eb6ee397b6..f8eca0f0d91d0 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -4,7 +4,8 @@ import pandas as pd import pandas.util.testing as tm from pandas.tslib import OutOfBoundsDatetime -from pandas import (DatetimeIndex, Index, Timestamp, datetime, date_range) +from pandas import (DatetimeIndex, Index, Timestamp, datetime, date_range, + to_datetime) class TestDatetimeIndex(tm.TestCase): @@ -423,3 +424,69 @@ def test_000constructor_resolution(self): idx = DatetimeIndex([t1]) self.assertEqual(idx.nanosecond[0], t1.nanosecond) + + +class TestTimeSeries(tm.TestCase): + _multiprocess_can_split_ = True + + def test_dti_constructor_preserve_dti_freq(self): + rng = date_range('1/1/2000', '1/2/2000', freq='5min') + + rng2 = DatetimeIndex(rng) + self.assertEqual(rng.freq, rng2.freq) + + def test_dti_constructor_years_only(self): + # GH 6961 + for tz in [None, 'UTC', 'Asia/Tokyo', 'dateutil/US/Pacific']: + rng1 = date_range('2014', '2015', freq='M', tz=tz) + expected1 = date_range('2014-01-31', '2014-12-31', freq='M', tz=tz) + + rng2 = date_range('2014', '2015', freq='MS', tz=tz) + expected2 = date_range('2014-01-01', '2015-01-01', freq='MS', + tz=tz) + + rng3 = date_range('2014', '2020', freq='A', tz=tz) + expected3 = date_range('2014-12-31', '2019-12-31', freq='A', tz=tz) + + rng4 = date_range('2014', '2020', freq='AS', tz=tz) + expected4 = date_range('2014-01-01', '2020-01-01', freq='AS', + tz=tz) + + for rng, expected in [(rng1, expected1), (rng2, expected2), + (rng3, expected3), (rng4, expected4)]: + tm.assert_index_equal(rng, expected) + + def test_dti_constructor_small_int(self): + # GH 13721 + exp = DatetimeIndex(['1970-01-01 00:00:00.00000000', + '1970-01-01 00:00:00.00000001', + '1970-01-01 00:00:00.00000002']) + + for dtype in [np.int64, np.int32, np.int16, np.int8]: + arr = np.array([0, 10, 20], dtype=dtype) + tm.assert_index_equal(DatetimeIndex(arr), exp) + + def test_dti_constructor_numpy_timeunits(self): + # GH 9114 + base = pd.to_datetime(['2000-01-01T00:00', '2000-01-02T00:00', 'NaT']) + + for dtype in ['datetime64[h]', 'datetime64[m]', 'datetime64[s]', + 'datetime64[ms]', 'datetime64[us]', 'datetime64[ns]']: + values = base.values.astype(dtype) + + tm.assert_index_equal(DatetimeIndex(values), base) + tm.assert_index_equal(to_datetime(values), base) + + def test_constructor_int64_nocopy(self): + # #1624 + arr = np.arange(1000, dtype=np.int64) + index = DatetimeIndex(arr) + + arr[50:100] = -1 + self.assertTrue((index.asi8[50:100] == -1).all()) + + arr = np.arange(1000, dtype=np.int64) + index = DatetimeIndex(arr, copy=True) + + arr[50:100] = -1 + self.assertTrue((index.asi8[50:100] != -1).all()) diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py new file mode 100644 index 0000000000000..b3d6c41573ab8 --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -0,0 +1,112 @@ +from datetime import datetime, timedelta, time + +import pandas as pd +import pandas.util.testing as tm +from pandas import date_range, offsets, DatetimeIndex, Timestamp + +from pandas.tests.series.common import TestData + + +class TestTimeSeries(TestData, tm.TestCase): + _multiprocess_can_split_ = True + + def test_date_range_gen_error(self): + rng = date_range('1/1/2000 00:00', '1/1/2000 00:18', freq='5min') + self.assertEqual(len(rng), 4) + + def test_date_range_negative_freq(self): + # GH 11018 + rng = date_range('2011-12-31', freq='-2A', periods=3) + exp = pd.DatetimeIndex(['2011-12-31', '2009-12-31', + '2007-12-31'], freq='-2A') + tm.assert_index_equal(rng, exp) + self.assertEqual(rng.freq, '-2A') + + rng = date_range('2011-01-31', freq='-2M', periods=3) + exp = pd.DatetimeIndex(['2011-01-31', '2010-11-30', + '2010-09-30'], freq='-2M') + tm.assert_index_equal(rng, exp) + self.assertEqual(rng.freq, '-2M') + + def test_date_range_bms_bug(self): + # #1645 + rng = date_range('1/1/2000', periods=10, freq='BMS') + + ex_first = Timestamp('2000-01-03') + self.assertEqual(rng[0], ex_first) + + def test_date_range_normalize(self): + snap = datetime.today() + n = 50 + + rng = date_range(snap, periods=n, normalize=False, freq='2D') + + offset = timedelta(2) + values = DatetimeIndex([snap + i * offset for i in range(n)]) + + tm.assert_index_equal(rng, values) + + rng = date_range('1/1/2000 08:15', periods=n, normalize=False, + freq='B') + the_time = time(8, 15) + for val in rng: + self.assertEqual(val.time(), the_time) + + def test_date_range_fy5252(self): + dr = date_range(start="2013-01-01", periods=2, freq=offsets.FY5253( + startingMonth=1, weekday=3, variation="nearest")) + self.assertEqual(dr[0], Timestamp('2013-01-31')) + self.assertEqual(dr[1], Timestamp('2014-01-30')) + + def test_date_range_ambiguous_arguments(self): + # #2538 + start = datetime(2011, 1, 1, 5, 3, 40) + end = datetime(2011, 1, 1, 8, 9, 40) + + self.assertRaises(ValueError, date_range, start, end, freq='s', + periods=10) + + def test_date_range_businesshour(self): + idx = DatetimeIndex(['2014-07-04 09:00', '2014-07-04 10:00', + '2014-07-04 11:00', + '2014-07-04 12:00', '2014-07-04 13:00', + '2014-07-04 14:00', + '2014-07-04 15:00', '2014-07-04 16:00'], + freq='BH') + rng = date_range('2014-07-04 09:00', '2014-07-04 16:00', freq='BH') + tm.assert_index_equal(idx, rng) + + idx = DatetimeIndex( + ['2014-07-04 16:00', '2014-07-07 09:00'], freq='BH') + rng = date_range('2014-07-04 16:00', '2014-07-07 09:00', freq='BH') + tm.assert_index_equal(idx, rng) + + idx = DatetimeIndex(['2014-07-04 09:00', '2014-07-04 10:00', + '2014-07-04 11:00', + '2014-07-04 12:00', '2014-07-04 13:00', + '2014-07-04 14:00', + '2014-07-04 15:00', '2014-07-04 16:00', + '2014-07-07 09:00', '2014-07-07 10:00', + '2014-07-07 11:00', + '2014-07-07 12:00', '2014-07-07 13:00', + '2014-07-07 14:00', + '2014-07-07 15:00', '2014-07-07 16:00', + '2014-07-08 09:00', '2014-07-08 10:00', + '2014-07-08 11:00', + '2014-07-08 12:00', '2014-07-08 13:00', + '2014-07-08 14:00', + '2014-07-08 15:00', '2014-07-08 16:00'], + freq='BH') + rng = date_range('2014-07-04 09:00', '2014-07-08 16:00', freq='BH') + tm.assert_index_equal(idx, rng) + + def test_range_misspecified(self): + # GH #1095 + + self.assertRaises(ValueError, date_range, '1/1/2000') + self.assertRaises(ValueError, date_range, end='1/1/2000') + self.assertRaises(ValueError, date_range, periods=10) + + self.assertRaises(ValueError, date_range, '1/1/2000', freq='H') + self.assertRaises(ValueError, date_range, end='1/1/2000', freq='H') + self.assertRaises(ValueError, date_range, periods=10, freq='H') diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index a69406804cd97..f92fca6ecfa14 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -457,74 +457,6 @@ def test_sort_values(self): self.assert_numpy_array_equal(dexer, np.array([0, 2, 1], dtype=np.intp)) - def test_round(self): - - # round - dt = Timestamp('20130101 09:10:11') - result = dt.round('D') - expected = Timestamp('20130101') - self.assertEqual(result, expected) - - dt = Timestamp('20130101 19:10:11') - result = dt.round('D') - expected = Timestamp('20130102') - self.assertEqual(result, expected) - - dt = Timestamp('20130201 12:00:00') - result = dt.round('D') - expected = Timestamp('20130202') - self.assertEqual(result, expected) - - dt = Timestamp('20130104 12:00:00') - result = dt.round('D') - expected = Timestamp('20130105') - self.assertEqual(result, expected) - - dt = Timestamp('20130104 12:32:00') - result = dt.round('30Min') - expected = Timestamp('20130104 12:30:00') - self.assertEqual(result, expected) - - dti = date_range('20130101 09:10:11', periods=5) - result = dti.round('D') - expected = date_range('20130101', periods=5) - tm.assert_index_equal(result, expected) - - # floor - dt = Timestamp('20130101 09:10:11') - result = dt.floor('D') - expected = Timestamp('20130101') - self.assertEqual(result, expected) - - # ceil - dt = Timestamp('20130101 09:10:11') - result = dt.ceil('D') - expected = Timestamp('20130102') - self.assertEqual(result, expected) - - # round with tz - dt = Timestamp('20130101 09:10:11', tz='US/Eastern') - result = dt.round('D') - expected = Timestamp('20130101', tz='US/Eastern') - self.assertEqual(result, expected) - - dt = Timestamp('20130101 09:10:11', tz='US/Eastern') - result = dt.round('s') - self.assertEqual(result, dt) - - dti = date_range('20130101 09:10:11', - periods=5).tz_localize('UTC').tz_convert('US/Eastern') - result = dti.round('D') - expected = date_range('20130101', periods=5).tz_localize('US/Eastern') - tm.assert_index_equal(result, expected) - - result = dti.round('s') - tm.assert_index_equal(result, dti) - - # invalid - for freq in ['Y', 'M', 'foobar']: - self.assertRaises(ValueError, lambda: dti.round(freq)) - def test_take(self): dates = [datetime(2010, 1, 1, 14), datetime(2010, 1, 1, 15), datetime(2010, 1, 1, 17), datetime(2010, 1, 1, 21)] diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py index 3dfe95fa77b85..4685df580190b 100644 --- a/pandas/tests/indexes/datetimes/test_misc.py +++ b/pandas/tests/indexes/datetimes/test_misc.py @@ -1,10 +1,12 @@ import numpy as np +import pandas as pd import pandas.lib as lib import pandas.util.testing as tm -from pandas import Float64Index, date_range, Timestamp from pandas import (Index, DatetimeIndex, datetime, offsets, to_datetime, - Series, DataFrame) + Series, DataFrame, Float64Index, date_range, Timestamp) + +from pandas.util.testing import assert_series_equal class TestDateTimeIndexToJulianDate(tm.TestCase): @@ -65,6 +67,196 @@ def test_pass_datetimeindex_to_index(self): self.assert_numpy_array_equal(idx.values, expected.values) + def test_range_edges(self): + # GH 13672 + idx = DatetimeIndex(start=Timestamp('1970-01-01 00:00:00.000000001'), + end=Timestamp('1970-01-01 00:00:00.000000004'), + freq='N') + exp = DatetimeIndex(['1970-01-01 00:00:00.000000001', + '1970-01-01 00:00:00.000000002', + '1970-01-01 00:00:00.000000003', + '1970-01-01 00:00:00.000000004']) + tm.assert_index_equal(idx, exp) + + idx = DatetimeIndex(start=Timestamp('1970-01-01 00:00:00.000000004'), + end=Timestamp('1970-01-01 00:00:00.000000001'), + freq='N') + exp = DatetimeIndex([]) + tm.assert_index_equal(idx, exp) + + idx = DatetimeIndex(start=Timestamp('1970-01-01 00:00:00.000000001'), + end=Timestamp('1970-01-01 00:00:00.000000001'), + freq='N') + exp = DatetimeIndex(['1970-01-01 00:00:00.000000001']) + tm.assert_index_equal(idx, exp) + + idx = DatetimeIndex(start=Timestamp('1970-01-01 00:00:00.000001'), + end=Timestamp('1970-01-01 00:00:00.000004'), + freq='U') + exp = DatetimeIndex(['1970-01-01 00:00:00.000001', + '1970-01-01 00:00:00.000002', + '1970-01-01 00:00:00.000003', + '1970-01-01 00:00:00.000004']) + tm.assert_index_equal(idx, exp) + + idx = DatetimeIndex(start=Timestamp('1970-01-01 00:00:00.001'), + end=Timestamp('1970-01-01 00:00:00.004'), + freq='L') + exp = DatetimeIndex(['1970-01-01 00:00:00.001', + '1970-01-01 00:00:00.002', + '1970-01-01 00:00:00.003', + '1970-01-01 00:00:00.004']) + tm.assert_index_equal(idx, exp) + + idx = DatetimeIndex(start=Timestamp('1970-01-01 00:00:01'), + end=Timestamp('1970-01-01 00:00:04'), freq='S') + exp = DatetimeIndex(['1970-01-01 00:00:01', '1970-01-01 00:00:02', + '1970-01-01 00:00:03', '1970-01-01 00:00:04']) + tm.assert_index_equal(idx, exp) + + idx = DatetimeIndex(start=Timestamp('1970-01-01 00:01'), + end=Timestamp('1970-01-01 00:04'), freq='T') + exp = DatetimeIndex(['1970-01-01 00:01', '1970-01-01 00:02', + '1970-01-01 00:03', '1970-01-01 00:04']) + tm.assert_index_equal(idx, exp) + + idx = DatetimeIndex(start=Timestamp('1970-01-01 01:00'), + end=Timestamp('1970-01-01 04:00'), freq='H') + exp = DatetimeIndex(['1970-01-01 01:00', '1970-01-01 02:00', + '1970-01-01 03:00', '1970-01-01 04:00']) + tm.assert_index_equal(idx, exp) + + idx = DatetimeIndex(start=Timestamp('1970-01-01'), + end=Timestamp('1970-01-04'), freq='D') + exp = DatetimeIndex(['1970-01-01', '1970-01-02', + '1970-01-03', '1970-01-04']) + tm.assert_index_equal(idx, exp) + + def test_datetimeindex_integers_shift(self): + rng = date_range('1/1/2000', periods=20) + + result = rng + 5 + expected = rng.shift(5) + tm.assert_index_equal(result, expected) + + result = rng - 5 + expected = rng.shift(-5) + tm.assert_index_equal(result, expected) + + def test_datetimeindex_repr_short(self): + dr = date_range(start='1/1/2012', periods=1) + repr(dr) + + dr = date_range(start='1/1/2012', periods=2) + repr(dr) + + dr = date_range(start='1/1/2012', periods=3) + repr(dr) + + def test_getitem_setitem_datetimeindex(self): + N = 50 + # testing with timezone, GH #2785 + rng = date_range('1/1/1990', periods=N, freq='H', tz='US/Eastern') + ts = Series(np.random.randn(N), index=rng) + + result = ts["1990-01-01 04:00:00"] + expected = ts[4] + self.assertEqual(result, expected) + + result = ts.copy() + result["1990-01-01 04:00:00"] = 0 + result["1990-01-01 04:00:00"] = ts[4] + assert_series_equal(result, ts) + + result = ts["1990-01-01 04:00:00":"1990-01-01 07:00:00"] + expected = ts[4:8] + assert_series_equal(result, expected) + + result = ts.copy() + result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = 0 + result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = ts[4:8] + assert_series_equal(result, ts) + + lb = "1990-01-01 04:00:00" + rb = "1990-01-01 07:00:00" + result = ts[(ts.index >= lb) & (ts.index <= rb)] + expected = ts[4:8] + assert_series_equal(result, expected) + + # repeat all the above with naive datetimes + result = ts[datetime(1990, 1, 1, 4)] + expected = ts[4] + self.assertEqual(result, expected) + + result = ts.copy() + result[datetime(1990, 1, 1, 4)] = 0 + result[datetime(1990, 1, 1, 4)] = ts[4] + assert_series_equal(result, ts) + + result = ts[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)] + expected = ts[4:8] + assert_series_equal(result, expected) + + result = ts.copy() + result[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)] = 0 + result[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)] = ts[4:8] + assert_series_equal(result, ts) + + lb = datetime(1990, 1, 1, 4) + rb = datetime(1990, 1, 1, 7) + result = ts[(ts.index >= lb) & (ts.index <= rb)] + expected = ts[4:8] + assert_series_equal(result, expected) + + result = ts[ts.index[4]] + expected = ts[4] + self.assertEqual(result, expected) + + result = ts[ts.index[4:8]] + expected = ts[4:8] + assert_series_equal(result, expected) + + result = ts.copy() + result[ts.index[4:8]] = 0 + result[4:8] = ts[4:8] + assert_series_equal(result, ts) + + # also test partial date slicing + result = ts["1990-01-02"] + expected = ts[24:48] + assert_series_equal(result, expected) + + result = ts.copy() + result["1990-01-02"] = 0 + result["1990-01-02"] = ts[24:48] + assert_series_equal(result, ts) + + def test_normalize(self): + rng = date_range('1/1/2000 9:30', periods=10, freq='D') + + result = rng.normalize() + expected = date_range('1/1/2000', periods=10, freq='D') + tm.assert_index_equal(result, expected) + + rng_ns = pd.DatetimeIndex(np.array([1380585623454345752, + 1380585612343234312]).astype( + "datetime64[ns]")) + rng_ns_normalized = rng_ns.normalize() + expected = pd.DatetimeIndex(np.array([1380585600000000000, + 1380585600000000000]).astype( + "datetime64[ns]")) + tm.assert_index_equal(rng_ns_normalized, expected) + + self.assertTrue(result.is_normalized) + self.assertFalse(rng.is_normalized) + + def test_series_ctor_plus_datetimeindex(self): + rng = date_range('20090415', '20090519', freq='B') + data = dict((k, 1) for k in rng) + + result = Series(data, index=rng) + self.assertIs(result.index, rng) + class TestDatetime64(tm.TestCase): @@ -331,3 +523,21 @@ def test_datetimeindex_union_join_empty(self): result = dti.join(empty) tm.assertIsInstance(result, DatetimeIndex) + + +class TestTimeSeriesDuplicates(tm.TestCase): + _multiprocess_can_split_ = True + + def test_recreate_from_data(self): + freqs = ['M', 'Q', 'A', 'D', 'B', 'BH', 'T', 'S', 'L', 'U', 'H', 'N', + 'C'] + + for f in freqs: + org = DatetimeIndex(start='2001/02/01 09:00', freq=f, periods=1) + idx = DatetimeIndex(org, freq=f) + tm.assert_index_equal(idx, org) + + org = DatetimeIndex(start='2001/02/01 09:00', freq=f, + tz='US/Pacific', periods=1) + idx = DatetimeIndex(org, freq=f, tz='US/Pacific') + tm.assert_index_equal(idx, org) diff --git a/pandas/tests/indexes/test_timedelta.py b/pandas/tests/indexes/test_timedelta.py new file mode 100644 index 0000000000000..be01ad03a0660 --- /dev/null +++ b/pandas/tests/indexes/test_timedelta.py @@ -0,0 +1,43 @@ +import numpy as np +from datetime import timedelta + +import pandas as pd +import pandas.util.testing as tm +from pandas import (timedelta_range, date_range, Series, Timedelta, + DatetimeIndex) + + +class TestSlicing(tm.TestCase): + + def test_timedelta(self): + # this is valid too + index = date_range('1/1/2000', periods=50, freq='B') + shifted = index + timedelta(1) + back = shifted + timedelta(-1) + self.assertTrue(tm.equalContents(index, back)) + self.assertEqual(shifted.freq, index.freq) + self.assertEqual(shifted.freq, back.freq) + + result = index - timedelta(1) + expected = index + timedelta(-1) + tm.assert_index_equal(result, expected) + + # GH4134, buggy with timedeltas + rng = date_range('2013', '2014') + s = Series(rng) + result1 = rng - pd.offsets.Hour(1) + result2 = DatetimeIndex(s - np.timedelta64(100000000)) + result3 = rng - np.timedelta64(100000000) + result4 = DatetimeIndex(s - pd.offsets.Hour(1)) + tm.assert_index_equal(result1, result4) + tm.assert_index_equal(result2, result3) + + +class TestTimeSeries(tm.TestCase): + _multiprocess_can_split_ = True + + def test_series_box_timedelta(self): + rng = timedelta_range('1 day 1 s', periods=5, freq='h') + s = Series(rng) + tm.assertIsInstance(s[1], Timedelta) + tm.assertIsInstance(s.iat[2], Timedelta) diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py index 2159c59de72ce..94369ebbd0a19 100644 --- a/pandas/tests/scalar/test_timestamp.py +++ b/pandas/tests/scalar/test_timestamp.py @@ -1 +1,1578 @@ """ test the scalar Timestamp """ + +import sys +import operator +import calendar +import numpy as np +from datetime import datetime, timedelta +from distutils.version import LooseVersion + +import pandas as pd +import pandas.util.testing as tm +import pandas._period as period +from pandas.tseries import offsets, frequencies +from pandas.tslib import get_timezone, iNaT +from pandas.compat import lrange, long +from pandas.util.testing import assert_series_equal +from pandas.compat.numpy import np_datetime64_compat +from pandas import (Timestamp, date_range, Period, Timedelta, tslib, compat, + Series, NaT, isnull, DataFrame, DatetimeIndex) +from pandas.tseries.frequencies import (RESO_DAY, RESO_HR, RESO_MIN, RESO_US, + RESO_MS, RESO_SEC) + +randn = np.random.randn + + +class TestTimestamp(tm.TestCase): + + def test_constructor(self): + base_str = '2014-07-01 09:00' + base_dt = datetime(2014, 7, 1, 9) + base_expected = 1404205200000000000 + + # confirm base representation is correct + import calendar + self.assertEqual(calendar.timegm(base_dt.timetuple()) * 1000000000, + base_expected) + + tests = [(base_str, base_dt, base_expected), + ('2014-07-01 10:00', datetime(2014, 7, 1, 10), + base_expected + 3600 * 1000000000), + ('2014-07-01 09:00:00.000008000', + datetime(2014, 7, 1, 9, 0, 0, 8), + base_expected + 8000), + ('2014-07-01 09:00:00.000000005', + Timestamp('2014-07-01 09:00:00.000000005'), + base_expected + 5)] + + tm._skip_if_no_pytz() + tm._skip_if_no_dateutil() + import pytz + import dateutil + timezones = [(None, 0), ('UTC', 0), (pytz.utc, 0), ('Asia/Tokyo', 9), + ('US/Eastern', -4), ('dateutil/US/Pacific', -7), + (pytz.FixedOffset(-180), -3), + (dateutil.tz.tzoffset(None, 18000), 5)] + + for date_str, date, expected in tests: + for result in [Timestamp(date_str), Timestamp(date)]: + # only with timestring + self.assertEqual(result.value, expected) + self.assertEqual(tslib.pydt_to_i8(result), expected) + + # re-creation shouldn't affect to internal value + result = Timestamp(result) + self.assertEqual(result.value, expected) + self.assertEqual(tslib.pydt_to_i8(result), expected) + + # with timezone + for tz, offset in timezones: + for result in [Timestamp(date_str, tz=tz), Timestamp(date, + tz=tz)]: + expected_tz = expected - offset * 3600 * 1000000000 + self.assertEqual(result.value, expected_tz) + self.assertEqual(tslib.pydt_to_i8(result), expected_tz) + + # should preserve tz + result = Timestamp(result) + self.assertEqual(result.value, expected_tz) + self.assertEqual(tslib.pydt_to_i8(result), expected_tz) + + # should convert to UTC + result = Timestamp(result, tz='UTC') + expected_utc = expected - offset * 3600 * 1000000000 + self.assertEqual(result.value, expected_utc) + self.assertEqual(tslib.pydt_to_i8(result), expected_utc) + + def test_constructor_with_stringoffset(self): + # GH 7833 + base_str = '2014-07-01 11:00:00+02:00' + base_dt = datetime(2014, 7, 1, 9) + base_expected = 1404205200000000000 + + # confirm base representation is correct + import calendar + self.assertEqual(calendar.timegm(base_dt.timetuple()) * 1000000000, + base_expected) + + tests = [(base_str, base_expected), + ('2014-07-01 12:00:00+02:00', + base_expected + 3600 * 1000000000), + ('2014-07-01 11:00:00.000008000+02:00', base_expected + 8000), + ('2014-07-01 11:00:00.000000005+02:00', base_expected + 5)] + + tm._skip_if_no_pytz() + tm._skip_if_no_dateutil() + import pytz + import dateutil + timezones = [(None, 0), ('UTC', 0), (pytz.utc, 0), ('Asia/Tokyo', 9), + ('US/Eastern', -4), ('dateutil/US/Pacific', -7), + (pytz.FixedOffset(-180), -3), + (dateutil.tz.tzoffset(None, 18000), 5)] + + for date_str, expected in tests: + for result in [Timestamp(date_str)]: + # only with timestring + self.assertEqual(result.value, expected) + self.assertEqual(tslib.pydt_to_i8(result), expected) + + # re-creation shouldn't affect to internal value + result = Timestamp(result) + self.assertEqual(result.value, expected) + self.assertEqual(tslib.pydt_to_i8(result), expected) + + # with timezone + for tz, offset in timezones: + result = Timestamp(date_str, tz=tz) + expected_tz = expected + self.assertEqual(result.value, expected_tz) + self.assertEqual(tslib.pydt_to_i8(result), expected_tz) + + # should preserve tz + result = Timestamp(result) + self.assertEqual(result.value, expected_tz) + self.assertEqual(tslib.pydt_to_i8(result), expected_tz) + + # should convert to UTC + result = Timestamp(result, tz='UTC') + expected_utc = expected + self.assertEqual(result.value, expected_utc) + self.assertEqual(tslib.pydt_to_i8(result), expected_utc) + + # This should be 2013-11-01 05:00 in UTC + # converted to Chicago tz + result = Timestamp('2013-11-01 00:00:00-0500', tz='America/Chicago') + self.assertEqual(result.value, Timestamp('2013-11-01 05:00').value) + expected = "Timestamp('2013-11-01 00:00:00-0500', tz='America/Chicago')" # noqa + self.assertEqual(repr(result), expected) + self.assertEqual(result, eval(repr(result))) + + # This should be 2013-11-01 05:00 in UTC + # converted to Tokyo tz (+09:00) + result = Timestamp('2013-11-01 00:00:00-0500', tz='Asia/Tokyo') + self.assertEqual(result.value, Timestamp('2013-11-01 05:00').value) + expected = "Timestamp('2013-11-01 14:00:00+0900', tz='Asia/Tokyo')" + self.assertEqual(repr(result), expected) + self.assertEqual(result, eval(repr(result))) + + # GH11708 + # This should be 2015-11-18 10:00 in UTC + # converted to Asia/Katmandu + result = Timestamp("2015-11-18 15:45:00+05:45", tz="Asia/Katmandu") + self.assertEqual(result.value, Timestamp("2015-11-18 10:00").value) + expected = "Timestamp('2015-11-18 15:45:00+0545', tz='Asia/Katmandu')" + self.assertEqual(repr(result), expected) + self.assertEqual(result, eval(repr(result))) + + # This should be 2015-11-18 10:00 in UTC + # converted to Asia/Kolkata + result = Timestamp("2015-11-18 15:30:00+05:30", tz="Asia/Kolkata") + self.assertEqual(result.value, Timestamp("2015-11-18 10:00").value) + expected = "Timestamp('2015-11-18 15:30:00+0530', tz='Asia/Kolkata')" + self.assertEqual(repr(result), expected) + self.assertEqual(result, eval(repr(result))) + + def test_constructor_invalid(self): + with tm.assertRaisesRegexp(TypeError, 'Cannot convert input'): + Timestamp(slice(2)) + with tm.assertRaisesRegexp(ValueError, 'Cannot convert Period'): + Timestamp(Period('1000-01-01')) + + def test_constructor_positional(self): + # GH 10758 + with tm.assertRaises(TypeError): + Timestamp(2000, 1) + with tm.assertRaises(ValueError): + Timestamp(2000, 0, 1) + with tm.assertRaises(ValueError): + Timestamp(2000, 13, 1) + with tm.assertRaises(ValueError): + Timestamp(2000, 1, 0) + with tm.assertRaises(ValueError): + Timestamp(2000, 1, 32) + + # GH 11630 + self.assertEqual( + repr(Timestamp(2015, 11, 12)), + repr(Timestamp('20151112'))) + + self.assertEqual( + repr(Timestamp(2015, 11, 12, 1, 2, 3, 999999)), + repr(Timestamp('2015-11-12 01:02:03.999999'))) + + self.assertIs(Timestamp(None), pd.NaT) + + def test_constructor_keyword(self): + # GH 10758 + with tm.assertRaises(TypeError): + Timestamp(year=2000, month=1) + with tm.assertRaises(ValueError): + Timestamp(year=2000, month=0, day=1) + with tm.assertRaises(ValueError): + Timestamp(year=2000, month=13, day=1) + with tm.assertRaises(ValueError): + Timestamp(year=2000, month=1, day=0) + with tm.assertRaises(ValueError): + Timestamp(year=2000, month=1, day=32) + + self.assertEqual( + repr(Timestamp(year=2015, month=11, day=12)), + repr(Timestamp('20151112'))) + + self.assertEqual( + repr(Timestamp(year=2015, month=11, day=12, + hour=1, minute=2, second=3, microsecond=999999)), + repr(Timestamp('2015-11-12 01:02:03.999999'))) + + def test_constructor_fromordinal(self): + base = datetime(2000, 1, 1) + + ts = Timestamp.fromordinal(base.toordinal(), freq='D') + self.assertEqual(base, ts) + self.assertEqual(ts.freq, 'D') + self.assertEqual(base.toordinal(), ts.toordinal()) + + ts = Timestamp.fromordinal(base.toordinal(), tz='US/Eastern') + self.assertEqual(pd.Timestamp('2000-01-01', tz='US/Eastern'), ts) + self.assertEqual(base.toordinal(), ts.toordinal()) + + def test_constructor_offset_depr(self): + # GH 12160 + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + ts = Timestamp('2011-01-01', offset='D') + self.assertEqual(ts.freq, 'D') + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + self.assertEqual(ts.offset, 'D') + + msg = "Can only specify freq or offset, not both" + with tm.assertRaisesRegexp(TypeError, msg): + Timestamp('2011-01-01', offset='D', freq='D') + + def test_constructor_offset_depr_fromordinal(self): + # GH 12160 + base = datetime(2000, 1, 1) + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + ts = Timestamp.fromordinal(base.toordinal(), offset='D') + self.assertEqual(pd.Timestamp('2000-01-01'), ts) + self.assertEqual(ts.freq, 'D') + self.assertEqual(base.toordinal(), ts.toordinal()) + + msg = "Can only specify freq or offset, not both" + with tm.assertRaisesRegexp(TypeError, msg): + Timestamp.fromordinal(base.toordinal(), offset='D', freq='D') + + def test_conversion(self): + # GH 9255 + ts = Timestamp('2000-01-01') + + result = ts.to_pydatetime() + expected = datetime(2000, 1, 1) + self.assertEqual(result, expected) + self.assertEqual(type(result), type(expected)) + + result = ts.to_datetime64() + expected = np.datetime64(ts.value, 'ns') + self.assertEqual(result, expected) + self.assertEqual(type(result), type(expected)) + self.assertEqual(result.dtype, expected.dtype) + + def test_repr(self): + tm._skip_if_no_pytz() + tm._skip_if_no_dateutil() + + dates = ['2014-03-07', '2014-01-01 09:00', + '2014-01-01 00:00:00.000000001'] + + # dateutil zone change (only matters for repr) + import dateutil + if (dateutil.__version__ >= LooseVersion('2.3') and + (dateutil.__version__ <= LooseVersion('2.4.0') or + dateutil.__version__ >= LooseVersion('2.6.0'))): + timezones = ['UTC', 'Asia/Tokyo', 'US/Eastern', + 'dateutil/US/Pacific'] + else: + timezones = ['UTC', 'Asia/Tokyo', 'US/Eastern', + 'dateutil/America/Los_Angeles'] + + freqs = ['D', 'M', 'S', 'N'] + + for date in dates: + for tz in timezones: + for freq in freqs: + + # avoid to match with timezone name + freq_repr = "'{0}'".format(freq) + if tz.startswith('dateutil'): + tz_repr = tz.replace('dateutil', '') + else: + tz_repr = tz + + date_only = Timestamp(date) + self.assertIn(date, repr(date_only)) + self.assertNotIn(tz_repr, repr(date_only)) + self.assertNotIn(freq_repr, repr(date_only)) + self.assertEqual(date_only, eval(repr(date_only))) + + date_tz = Timestamp(date, tz=tz) + self.assertIn(date, repr(date_tz)) + self.assertIn(tz_repr, repr(date_tz)) + self.assertNotIn(freq_repr, repr(date_tz)) + self.assertEqual(date_tz, eval(repr(date_tz))) + + date_freq = Timestamp(date, freq=freq) + self.assertIn(date, repr(date_freq)) + self.assertNotIn(tz_repr, repr(date_freq)) + self.assertIn(freq_repr, repr(date_freq)) + self.assertEqual(date_freq, eval(repr(date_freq))) + + date_tz_freq = Timestamp(date, tz=tz, freq=freq) + self.assertIn(date, repr(date_tz_freq)) + self.assertIn(tz_repr, repr(date_tz_freq)) + self.assertIn(freq_repr, repr(date_tz_freq)) + self.assertEqual(date_tz_freq, eval(repr(date_tz_freq))) + + # this can cause the tz field to be populated, but it's redundant to + # information in the datestring + tm._skip_if_no_pytz() + import pytz # noqa + date_with_utc_offset = Timestamp('2014-03-13 00:00:00-0400', tz=None) + self.assertIn('2014-03-13 00:00:00-0400', repr(date_with_utc_offset)) + self.assertNotIn('tzoffset', repr(date_with_utc_offset)) + self.assertIn('pytz.FixedOffset(-240)', repr(date_with_utc_offset)) + expr = repr(date_with_utc_offset).replace("'pytz.FixedOffset(-240)'", + 'pytz.FixedOffset(-240)') + self.assertEqual(date_with_utc_offset, eval(expr)) + + def test_bounds_with_different_units(self): + out_of_bounds_dates = ('1677-09-21', '2262-04-12', ) + + time_units = ('D', 'h', 'm', 's', 'ms', 'us') + + for date_string in out_of_bounds_dates: + for unit in time_units: + self.assertRaises(ValueError, Timestamp, np.datetime64( + date_string, dtype='M8[%s]' % unit)) + + in_bounds_dates = ('1677-09-23', '2262-04-11', ) + + for date_string in in_bounds_dates: + for unit in time_units: + Timestamp(np.datetime64(date_string, dtype='M8[%s]' % unit)) + + def test_tz(self): + t = '2014-02-01 09:00' + ts = Timestamp(t) + local = ts.tz_localize('Asia/Tokyo') + self.assertEqual(local.hour, 9) + self.assertEqual(local, Timestamp(t, tz='Asia/Tokyo')) + conv = local.tz_convert('US/Eastern') + self.assertEqual(conv, Timestamp('2014-01-31 19:00', tz='US/Eastern')) + self.assertEqual(conv.hour, 19) + + # preserves nanosecond + ts = Timestamp(t) + offsets.Nano(5) + local = ts.tz_localize('Asia/Tokyo') + self.assertEqual(local.hour, 9) + self.assertEqual(local.nanosecond, 5) + conv = local.tz_convert('US/Eastern') + self.assertEqual(conv.nanosecond, 5) + self.assertEqual(conv.hour, 19) + + def test_tz_localize_ambiguous(self): + + ts = Timestamp('2014-11-02 01:00') + ts_dst = ts.tz_localize('US/Eastern', ambiguous=True) + ts_no_dst = ts.tz_localize('US/Eastern', ambiguous=False) + + rng = date_range('2014-11-02', periods=3, freq='H', tz='US/Eastern') + self.assertEqual(rng[1], ts_dst) + self.assertEqual(rng[2], ts_no_dst) + self.assertRaises(ValueError, ts.tz_localize, 'US/Eastern', + ambiguous='infer') + + # GH 8025 + with tm.assertRaisesRegexp(TypeError, + 'Cannot localize tz-aware Timestamp, use ' + 'tz_convert for conversions'): + Timestamp('2011-01-01', tz='US/Eastern').tz_localize('Asia/Tokyo') + + with tm.assertRaisesRegexp(TypeError, + 'Cannot convert tz-naive Timestamp, use ' + 'tz_localize to localize'): + Timestamp('2011-01-01').tz_convert('Asia/Tokyo') + + def test_tz_localize_nonexistent(self): + # See issue 13057 + from pytz.exceptions import NonExistentTimeError + times = ['2015-03-08 02:00', '2015-03-08 02:30', + '2015-03-29 02:00', '2015-03-29 02:30'] + timezones = ['US/Eastern', 'US/Pacific', + 'Europe/Paris', 'Europe/Belgrade'] + for t, tz in zip(times, timezones): + ts = Timestamp(t) + self.assertRaises(NonExistentTimeError, ts.tz_localize, + tz) + self.assertRaises(NonExistentTimeError, ts.tz_localize, + tz, errors='raise') + self.assertIs(ts.tz_localize(tz, errors='coerce'), + pd.NaT) + + def test_tz_localize_errors_ambiguous(self): + # See issue 13057 + from pytz.exceptions import AmbiguousTimeError + ts = pd.Timestamp('2015-11-1 01:00') + self.assertRaises(AmbiguousTimeError, + ts.tz_localize, 'US/Pacific', errors='coerce') + + def test_tz_localize_roundtrip(self): + for tz in ['UTC', 'Asia/Tokyo', 'US/Eastern', 'dateutil/US/Pacific']: + for t in ['2014-02-01 09:00', '2014-07-08 09:00', + '2014-11-01 17:00', '2014-11-05 00:00']: + ts = Timestamp(t) + localized = ts.tz_localize(tz) + self.assertEqual(localized, Timestamp(t, tz=tz)) + + with tm.assertRaises(TypeError): + localized.tz_localize(tz) + + reset = localized.tz_localize(None) + self.assertEqual(reset, ts) + self.assertTrue(reset.tzinfo is None) + + def test_tz_convert_roundtrip(self): + for tz in ['UTC', 'Asia/Tokyo', 'US/Eastern', 'dateutil/US/Pacific']: + for t in ['2014-02-01 09:00', '2014-07-08 09:00', + '2014-11-01 17:00', '2014-11-05 00:00']: + ts = Timestamp(t, tz='UTC') + converted = ts.tz_convert(tz) + + reset = converted.tz_convert(None) + self.assertEqual(reset, Timestamp(t)) + self.assertTrue(reset.tzinfo is None) + self.assertEqual(reset, + converted.tz_convert('UTC').tz_localize(None)) + + def test_barely_oob_dts(self): + one_us = np.timedelta64(1).astype('timedelta64[us]') + + # By definition we can't go out of bounds in [ns], so we + # convert the datetime64s to [us] so we can go out of bounds + min_ts_us = np.datetime64(Timestamp.min).astype('M8[us]') + max_ts_us = np.datetime64(Timestamp.max).astype('M8[us]') + + # No error for the min/max datetimes + Timestamp(min_ts_us) + Timestamp(max_ts_us) + + # One us less than the minimum is an error + self.assertRaises(ValueError, Timestamp, min_ts_us - one_us) + + # One us more than the maximum is an error + self.assertRaises(ValueError, Timestamp, max_ts_us + one_us) + + def test_utc_z_designator(self): + self.assertEqual(get_timezone( + Timestamp('2014-11-02 01:00Z').tzinfo), 'UTC') + + def test_now(self): + # #9000 + ts_from_string = Timestamp('now') + ts_from_method = Timestamp.now() + ts_datetime = datetime.now() + + ts_from_string_tz = Timestamp('now', tz='US/Eastern') + ts_from_method_tz = Timestamp.now(tz='US/Eastern') + + # Check that the delta between the times is less than 1s (arbitrarily + # small) + delta = Timedelta(seconds=1) + self.assertTrue(abs(ts_from_method - ts_from_string) < delta) + self.assertTrue(abs(ts_datetime - ts_from_method) < delta) + self.assertTrue(abs(ts_from_method_tz - ts_from_string_tz) < delta) + self.assertTrue(abs(ts_from_string_tz.tz_localize(None) - + ts_from_method_tz.tz_localize(None)) < delta) + + def test_today(self): + + ts_from_string = Timestamp('today') + ts_from_method = Timestamp.today() + ts_datetime = datetime.today() + + ts_from_string_tz = Timestamp('today', tz='US/Eastern') + ts_from_method_tz = Timestamp.today(tz='US/Eastern') + + # Check that the delta between the times is less than 1s (arbitrarily + # small) + delta = Timedelta(seconds=1) + self.assertTrue(abs(ts_from_method - ts_from_string) < delta) + self.assertTrue(abs(ts_datetime - ts_from_method) < delta) + self.assertTrue(abs(ts_from_method_tz - ts_from_string_tz) < delta) + self.assertTrue(abs(ts_from_string_tz.tz_localize(None) - + ts_from_method_tz.tz_localize(None)) < delta) + + def test_asm8(self): + np.random.seed(7960929) + ns = [Timestamp.min.value, Timestamp.max.value, 1000, ] + for n in ns: + self.assertEqual(Timestamp(n).asm8.view('i8'), + np.datetime64(n, 'ns').view('i8'), n) + self.assertEqual(Timestamp('nat').asm8.view('i8'), + np.datetime64('nat', 'ns').view('i8')) + + def test_fields(self): + def check(value, equal): + # that we are int/long like + self.assertTrue(isinstance(value, (int, compat.long))) + self.assertEqual(value, equal) + + # GH 10050 + ts = Timestamp('2015-05-10 09:06:03.000100001') + check(ts.year, 2015) + check(ts.month, 5) + check(ts.day, 10) + check(ts.hour, 9) + check(ts.minute, 6) + check(ts.second, 3) + self.assertRaises(AttributeError, lambda: ts.millisecond) + check(ts.microsecond, 100) + check(ts.nanosecond, 1) + check(ts.dayofweek, 6) + check(ts.quarter, 2) + check(ts.dayofyear, 130) + check(ts.week, 19) + check(ts.daysinmonth, 31) + check(ts.daysinmonth, 31) + + def test_nat_fields(self): + # GH 10050 + ts = Timestamp('NaT') + self.assertTrue(np.isnan(ts.year)) + self.assertTrue(np.isnan(ts.month)) + self.assertTrue(np.isnan(ts.day)) + self.assertTrue(np.isnan(ts.hour)) + self.assertTrue(np.isnan(ts.minute)) + self.assertTrue(np.isnan(ts.second)) + self.assertTrue(np.isnan(ts.microsecond)) + self.assertTrue(np.isnan(ts.nanosecond)) + self.assertTrue(np.isnan(ts.dayofweek)) + self.assertTrue(np.isnan(ts.quarter)) + self.assertTrue(np.isnan(ts.dayofyear)) + self.assertTrue(np.isnan(ts.week)) + self.assertTrue(np.isnan(ts.daysinmonth)) + self.assertTrue(np.isnan(ts.days_in_month)) + + def test_pprint(self): + # GH12622 + import pprint + nested_obj = {'foo': 1, + 'bar': [{'w': {'a': Timestamp('2011-01-01')}}] * 10} + result = pprint.pformat(nested_obj, width=50) + expected = r"""{'bar': [{'w': {'a': Timestamp('2011-01-01 00:00:00')}}, + {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, + {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, + {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, + {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, + {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, + {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, + {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, + {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, + {'w': {'a': Timestamp('2011-01-01 00:00:00')}}], + 'foo': 1}""" + self.assertEqual(result, expected) + + def to_datetime_depr(self): + # see gh-8254 + ts = Timestamp('2011-01-01') + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + expected = datetime(2011, 1, 1) + result = ts.to_datetime() + self.assertEqual(result, expected) + + def to_pydatetime_nonzero_nano(self): + ts = Timestamp('2011-01-01 9:00:00.123456789') + + # Warn the user of data loss (nanoseconds). + with tm.assert_produces_warning(UserWarning, + check_stacklevel=False): + expected = datetime(2011, 1, 1, 9, 0, 0, 123456) + result = ts.to_pydatetime() + self.assertEqual(result, expected) + + def test_round(self): + + # round + dt = Timestamp('20130101 09:10:11') + result = dt.round('D') + expected = Timestamp('20130101') + self.assertEqual(result, expected) + + dt = Timestamp('20130101 19:10:11') + result = dt.round('D') + expected = Timestamp('20130102') + self.assertEqual(result, expected) + + dt = Timestamp('20130201 12:00:00') + result = dt.round('D') + expected = Timestamp('20130202') + self.assertEqual(result, expected) + + dt = Timestamp('20130104 12:00:00') + result = dt.round('D') + expected = Timestamp('20130105') + self.assertEqual(result, expected) + + dt = Timestamp('20130104 12:32:00') + result = dt.round('30Min') + expected = Timestamp('20130104 12:30:00') + self.assertEqual(result, expected) + + dti = date_range('20130101 09:10:11', periods=5) + result = dti.round('D') + expected = date_range('20130101', periods=5) + tm.assert_index_equal(result, expected) + + # floor + dt = Timestamp('20130101 09:10:11') + result = dt.floor('D') + expected = Timestamp('20130101') + self.assertEqual(result, expected) + + # ceil + dt = Timestamp('20130101 09:10:11') + result = dt.ceil('D') + expected = Timestamp('20130102') + self.assertEqual(result, expected) + + # round with tz + dt = Timestamp('20130101 09:10:11', tz='US/Eastern') + result = dt.round('D') + expected = Timestamp('20130101', tz='US/Eastern') + self.assertEqual(result, expected) + + dt = Timestamp('20130101 09:10:11', tz='US/Eastern') + result = dt.round('s') + self.assertEqual(result, dt) + + dti = date_range('20130101 09:10:11', + periods=5).tz_localize('UTC').tz_convert('US/Eastern') + result = dti.round('D') + expected = date_range('20130101', periods=5).tz_localize('US/Eastern') + tm.assert_index_equal(result, expected) + + result = dti.round('s') + tm.assert_index_equal(result, dti) + + # invalid + for freq in ['Y', 'M', 'foobar']: + self.assertRaises(ValueError, lambda: dti.round(freq)) + + def test_class_ops_pytz(self): + tm._skip_if_no_pytz() + from pytz import timezone + + def compare(x, y): + self.assertEqual(int(Timestamp(x).value / 1e9), + int(Timestamp(y).value / 1e9)) + + compare(Timestamp.now(), datetime.now()) + compare(Timestamp.now('UTC'), datetime.now(timezone('UTC'))) + compare(Timestamp.utcnow(), datetime.utcnow()) + compare(Timestamp.today(), datetime.today()) + current_time = calendar.timegm(datetime.now().utctimetuple()) + compare(Timestamp.utcfromtimestamp(current_time), + datetime.utcfromtimestamp(current_time)) + compare(Timestamp.fromtimestamp(current_time), + datetime.fromtimestamp(current_time)) + + date_component = datetime.utcnow() + time_component = (date_component + timedelta(minutes=10)).time() + compare(Timestamp.combine(date_component, time_component), + datetime.combine(date_component, time_component)) + + def test_class_ops_dateutil(self): + tm._skip_if_no_dateutil() + from dateutil.tz import tzutc + + def compare(x, y): + self.assertEqual(int(np.round(Timestamp(x).value / 1e9)), + int(np.round(Timestamp(y).value / 1e9))) + + compare(Timestamp.now(), datetime.now()) + compare(Timestamp.now('UTC'), datetime.now(tzutc())) + compare(Timestamp.utcnow(), datetime.utcnow()) + compare(Timestamp.today(), datetime.today()) + current_time = calendar.timegm(datetime.now().utctimetuple()) + compare(Timestamp.utcfromtimestamp(current_time), + datetime.utcfromtimestamp(current_time)) + compare(Timestamp.fromtimestamp(current_time), + datetime.fromtimestamp(current_time)) + + date_component = datetime.utcnow() + time_component = (date_component + timedelta(minutes=10)).time() + compare(Timestamp.combine(date_component, time_component), + datetime.combine(date_component, time_component)) + + def test_basics_nanos(self): + val = np.int64(946684800000000000).view('M8[ns]') + stamp = Timestamp(val.view('i8') + 500) + self.assertEqual(stamp.year, 2000) + self.assertEqual(stamp.month, 1) + self.assertEqual(stamp.microsecond, 0) + self.assertEqual(stamp.nanosecond, 500) + + # GH 14415 + val = np.iinfo(np.int64).min + 80000000000000 + stamp = Timestamp(val) + self.assertEqual(stamp.year, 1677) + self.assertEqual(stamp.month, 9) + self.assertEqual(stamp.day, 21) + self.assertEqual(stamp.microsecond, 145224) + self.assertEqual(stamp.nanosecond, 192) + + def test_unit(self): + + def check(val, unit=None, h=1, s=1, us=0): + stamp = Timestamp(val, unit=unit) + self.assertEqual(stamp.year, 2000) + self.assertEqual(stamp.month, 1) + self.assertEqual(stamp.day, 1) + self.assertEqual(stamp.hour, h) + if unit != 'D': + self.assertEqual(stamp.minute, 1) + self.assertEqual(stamp.second, s) + self.assertEqual(stamp.microsecond, us) + else: + self.assertEqual(stamp.minute, 0) + self.assertEqual(stamp.second, 0) + self.assertEqual(stamp.microsecond, 0) + self.assertEqual(stamp.nanosecond, 0) + + ts = Timestamp('20000101 01:01:01') + val = ts.value + days = (ts - Timestamp('1970-01-01')).days + + check(val) + check(val / long(1000), unit='us') + check(val / long(1000000), unit='ms') + check(val / long(1000000000), unit='s') + check(days, unit='D', h=0) + + # using truediv, so these are like floats + if compat.PY3: + check((val + 500000) / long(1000000000), unit='s', us=500) + check((val + 500000000) / long(1000000000), unit='s', us=500000) + check((val + 500000) / long(1000000), unit='ms', us=500) + + # get chopped in py2 + else: + check((val + 500000) / long(1000000000), unit='s') + check((val + 500000000) / long(1000000000), unit='s') + check((val + 500000) / long(1000000), unit='ms') + + # ok + check((val + 500000) / long(1000), unit='us', us=500) + check((val + 500000000) / long(1000000), unit='ms', us=500000) + + # floats + check(val / 1000.0 + 5, unit='us', us=5) + check(val / 1000.0 + 5000, unit='us', us=5000) + check(val / 1000000.0 + 0.5, unit='ms', us=500) + check(val / 1000000.0 + 0.005, unit='ms', us=5) + check(val / 1000000000.0 + 0.5, unit='s', us=500000) + check(days + 0.5, unit='D', h=12) + + # nan + result = Timestamp(np.nan) + self.assertIs(result, NaT) + + result = Timestamp(None) + self.assertIs(result, NaT) + + result = Timestamp(iNaT) + self.assertIs(result, NaT) + + result = Timestamp(NaT) + self.assertIs(result, NaT) + + result = Timestamp('NaT') + self.assertIs(result, NaT) + + self.assertTrue(isnull(Timestamp('nat'))) + + def test_roundtrip(self): + + # test value to string and back conversions + # further test accessors + base = Timestamp('20140101 00:00:00') + + result = Timestamp(base.value + pd.Timedelta('5ms').value) + self.assertEqual(result, Timestamp(str(base) + ".005000")) + self.assertEqual(result.microsecond, 5000) + + result = Timestamp(base.value + pd.Timedelta('5us').value) + self.assertEqual(result, Timestamp(str(base) + ".000005")) + self.assertEqual(result.microsecond, 5) + + result = Timestamp(base.value + pd.Timedelta('5ns').value) + self.assertEqual(result, Timestamp(str(base) + ".000000005")) + self.assertEqual(result.nanosecond, 5) + self.assertEqual(result.microsecond, 0) + + result = Timestamp(base.value + pd.Timedelta('6ms 5us').value) + self.assertEqual(result, Timestamp(str(base) + ".006005")) + self.assertEqual(result.microsecond, 5 + 6 * 1000) + + result = Timestamp(base.value + pd.Timedelta('200ms 5us').value) + self.assertEqual(result, Timestamp(str(base) + ".200005")) + self.assertEqual(result.microsecond, 5 + 200 * 1000) + + def test_comparison(self): + # 5-18-2012 00:00:00.000 + stamp = long(1337299200000000000) + + val = Timestamp(stamp) + + self.assertEqual(val, val) + self.assertFalse(val != val) + self.assertFalse(val < val) + self.assertTrue(val <= val) + self.assertFalse(val > val) + self.assertTrue(val >= val) + + other = datetime(2012, 5, 18) + self.assertEqual(val, other) + self.assertFalse(val != other) + self.assertFalse(val < other) + self.assertTrue(val <= other) + self.assertFalse(val > other) + self.assertTrue(val >= other) + + other = Timestamp(stamp + 100) + + self.assertNotEqual(val, other) + self.assertNotEqual(val, other) + self.assertTrue(val < other) + self.assertTrue(val <= other) + self.assertTrue(other > val) + self.assertTrue(other >= val) + + def test_compare_invalid(self): + + # GH 8058 + val = Timestamp('20130101 12:01:02') + self.assertFalse(val == 'foo') + self.assertFalse(val == 10.0) + self.assertFalse(val == 1) + self.assertFalse(val == long(1)) + self.assertFalse(val == []) + self.assertFalse(val == {'foo': 1}) + self.assertFalse(val == np.float64(1)) + self.assertFalse(val == np.int64(1)) + + self.assertTrue(val != 'foo') + self.assertTrue(val != 10.0) + self.assertTrue(val != 1) + self.assertTrue(val != long(1)) + self.assertTrue(val != []) + self.assertTrue(val != {'foo': 1}) + self.assertTrue(val != np.float64(1)) + self.assertTrue(val != np.int64(1)) + + # ops testing + df = DataFrame(randn(5, 2)) + a = df[0] + b = Series(randn(5)) + b.name = Timestamp('2000-01-01') + tm.assert_series_equal(a / b, 1 / (b / a)) + + def test_cant_compare_tz_naive_w_aware(self): + tm._skip_if_no_pytz() + # #1404 + a = Timestamp('3/12/2012') + b = Timestamp('3/12/2012', tz='utc') + + self.assertRaises(Exception, a.__eq__, b) + self.assertRaises(Exception, a.__ne__, b) + self.assertRaises(Exception, a.__lt__, b) + self.assertRaises(Exception, a.__gt__, b) + self.assertRaises(Exception, b.__eq__, a) + self.assertRaises(Exception, b.__ne__, a) + self.assertRaises(Exception, b.__lt__, a) + self.assertRaises(Exception, b.__gt__, a) + + if sys.version_info < (3, 3): + self.assertRaises(Exception, a.__eq__, b.to_pydatetime()) + self.assertRaises(Exception, a.to_pydatetime().__eq__, b) + else: + self.assertFalse(a == b.to_pydatetime()) + self.assertFalse(a.to_pydatetime() == b) + + def test_cant_compare_tz_naive_w_aware_explicit_pytz(self): + tm._skip_if_no_pytz() + from pytz import utc + # #1404 + a = Timestamp('3/12/2012') + b = Timestamp('3/12/2012', tz=utc) + + self.assertRaises(Exception, a.__eq__, b) + self.assertRaises(Exception, a.__ne__, b) + self.assertRaises(Exception, a.__lt__, b) + self.assertRaises(Exception, a.__gt__, b) + self.assertRaises(Exception, b.__eq__, a) + self.assertRaises(Exception, b.__ne__, a) + self.assertRaises(Exception, b.__lt__, a) + self.assertRaises(Exception, b.__gt__, a) + + if sys.version_info < (3, 3): + self.assertRaises(Exception, a.__eq__, b.to_pydatetime()) + self.assertRaises(Exception, a.to_pydatetime().__eq__, b) + else: + self.assertFalse(a == b.to_pydatetime()) + self.assertFalse(a.to_pydatetime() == b) + + def test_cant_compare_tz_naive_w_aware_dateutil(self): + tm._skip_if_no_dateutil() + from dateutil.tz import tzutc + utc = tzutc() + # #1404 + a = Timestamp('3/12/2012') + b = Timestamp('3/12/2012', tz=utc) + + self.assertRaises(Exception, a.__eq__, b) + self.assertRaises(Exception, a.__ne__, b) + self.assertRaises(Exception, a.__lt__, b) + self.assertRaises(Exception, a.__gt__, b) + self.assertRaises(Exception, b.__eq__, a) + self.assertRaises(Exception, b.__ne__, a) + self.assertRaises(Exception, b.__lt__, a) + self.assertRaises(Exception, b.__gt__, a) + + if sys.version_info < (3, 3): + self.assertRaises(Exception, a.__eq__, b.to_pydatetime()) + self.assertRaises(Exception, a.to_pydatetime().__eq__, b) + else: + self.assertFalse(a == b.to_pydatetime()) + self.assertFalse(a.to_pydatetime() == b) + + def test_delta_preserve_nanos(self): + val = Timestamp(long(1337299200000000123)) + result = val + timedelta(1) + self.assertEqual(result.nanosecond, val.nanosecond) + + def test_frequency_misc(self): + self.assertEqual(frequencies.get_freq_group('T'), + frequencies.FreqGroup.FR_MIN) + + code, stride = frequencies.get_freq_code(offsets.Hour()) + self.assertEqual(code, frequencies.FreqGroup.FR_HR) + + code, stride = frequencies.get_freq_code((5, 'T')) + self.assertEqual(code, frequencies.FreqGroup.FR_MIN) + self.assertEqual(stride, 5) + + offset = offsets.Hour() + result = frequencies.to_offset(offset) + self.assertEqual(result, offset) + + result = frequencies.to_offset((5, 'T')) + expected = offsets.Minute(5) + self.assertEqual(result, expected) + + self.assertRaises(ValueError, frequencies.get_freq_code, (5, 'baz')) + + self.assertRaises(ValueError, frequencies.to_offset, '100foo') + + self.assertRaises(ValueError, frequencies.to_offset, ('', '')) + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = frequencies.get_standard_freq(offsets.Hour()) + self.assertEqual(result, 'H') + + def test_hash_equivalent(self): + d = {datetime(2011, 1, 1): 5} + stamp = Timestamp(datetime(2011, 1, 1)) + self.assertEqual(d[stamp], 5) + + def test_timestamp_compare_scalars(self): + # case where ndim == 0 + lhs = np.datetime64(datetime(2013, 12, 6)) + rhs = Timestamp('now') + nat = Timestamp('nat') + + ops = {'gt': 'lt', + 'lt': 'gt', + 'ge': 'le', + 'le': 'ge', + 'eq': 'eq', + 'ne': 'ne'} + + for left, right in ops.items(): + left_f = getattr(operator, left) + right_f = getattr(operator, right) + expected = left_f(lhs, rhs) + + result = right_f(rhs, lhs) + self.assertEqual(result, expected) + + expected = left_f(rhs, nat) + result = right_f(nat, rhs) + self.assertEqual(result, expected) + + def test_timestamp_compare_series(self): + # make sure we can compare Timestamps on the right AND left hand side + # GH4982 + s = Series(date_range('20010101', periods=10), name='dates') + s_nat = s.copy(deep=True) + + s[0] = pd.Timestamp('nat') + s[3] = pd.Timestamp('nat') + + ops = {'lt': 'gt', 'le': 'ge', 'eq': 'eq', 'ne': 'ne'} + + for left, right in ops.items(): + left_f = getattr(operator, left) + right_f = getattr(operator, right) + + # no nats + expected = left_f(s, Timestamp('20010109')) + result = right_f(Timestamp('20010109'), s) + tm.assert_series_equal(result, expected) + + # nats + expected = left_f(s, Timestamp('nat')) + result = right_f(Timestamp('nat'), s) + tm.assert_series_equal(result, expected) + + # compare to timestamp with series containing nats + expected = left_f(s_nat, Timestamp('20010109')) + result = right_f(Timestamp('20010109'), s_nat) + tm.assert_series_equal(result, expected) + + # compare to nat with series containing nats + expected = left_f(s_nat, Timestamp('nat')) + result = right_f(Timestamp('nat'), s_nat) + tm.assert_series_equal(result, expected) + + def test_is_leap_year(self): + # GH 13727 + for tz in [None, 'UTC', 'US/Eastern', 'Asia/Tokyo']: + dt = Timestamp('2000-01-01 00:00:00', tz=tz) + self.assertTrue(dt.is_leap_year) + self.assertIsInstance(dt.is_leap_year, bool) + + dt = Timestamp('1999-01-01 00:00:00', tz=tz) + self.assertFalse(dt.is_leap_year) + + dt = Timestamp('2004-01-01 00:00:00', tz=tz) + self.assertTrue(dt.is_leap_year) + + dt = Timestamp('2100-01-01 00:00:00', tz=tz) + self.assertFalse(dt.is_leap_year) + + self.assertFalse(pd.NaT.is_leap_year) + self.assertIsInstance(pd.NaT.is_leap_year, bool) + + def test_round_nat(self): + # GH14940 + ts = Timestamp('nat') + print(dir(ts)) + for method in ["round", "floor", "ceil"]: + round_method = getattr(ts, method) + for freq in ["s", "5s", "min", "5min", "h", "5h"]: + self.assertIs(round_method(freq), ts) + + +class TestTimestampNsOperations(tm.TestCase): + def setUp(self): + self.timestamp = Timestamp(datetime.utcnow()) + + def assert_ns_timedelta(self, modified_timestamp, expected_value): + value = self.timestamp.value + modified_value = modified_timestamp.value + + self.assertEqual(modified_value - value, expected_value) + + def test_timedelta_ns_arithmetic(self): + self.assert_ns_timedelta(self.timestamp + np.timedelta64(-123, 'ns'), + -123) + + def test_timedelta_ns_based_arithmetic(self): + self.assert_ns_timedelta(self.timestamp + np.timedelta64( + 1234567898, 'ns'), 1234567898) + + def test_timedelta_us_arithmetic(self): + self.assert_ns_timedelta(self.timestamp + np.timedelta64(-123, 'us'), + -123000) + + def test_timedelta_ms_arithmetic(self): + time = self.timestamp + np.timedelta64(-123, 'ms') + self.assert_ns_timedelta(time, -123000000) + + def test_nanosecond_string_parsing(self): + ts = Timestamp('2013-05-01 07:15:45.123456789') + # GH 7878 + expected_repr = '2013-05-01 07:15:45.123456789' + expected_value = 1367392545123456789 + self.assertEqual(ts.value, expected_value) + self.assertIn(expected_repr, repr(ts)) + + ts = Timestamp('2013-05-01 07:15:45.123456789+09:00', tz='Asia/Tokyo') + self.assertEqual(ts.value, expected_value - 9 * 3600 * 1000000000) + self.assertIn(expected_repr, repr(ts)) + + ts = Timestamp('2013-05-01 07:15:45.123456789', tz='UTC') + self.assertEqual(ts.value, expected_value) + self.assertIn(expected_repr, repr(ts)) + + ts = Timestamp('2013-05-01 07:15:45.123456789', tz='US/Eastern') + self.assertEqual(ts.value, expected_value + 4 * 3600 * 1000000000) + self.assertIn(expected_repr, repr(ts)) + + # GH 10041 + ts = Timestamp('20130501T071545.123456789') + self.assertEqual(ts.value, expected_value) + self.assertIn(expected_repr, repr(ts)) + + def test_nanosecond_timestamp(self): + # GH 7610 + expected = 1293840000000000005 + t = Timestamp('2011-01-01') + offsets.Nano(5) + self.assertEqual(repr(t), "Timestamp('2011-01-01 00:00:00.000000005')") + self.assertEqual(t.value, expected) + self.assertEqual(t.nanosecond, 5) + + t = Timestamp(t) + self.assertEqual(repr(t), "Timestamp('2011-01-01 00:00:00.000000005')") + self.assertEqual(t.value, expected) + self.assertEqual(t.nanosecond, 5) + + t = Timestamp(np_datetime64_compat('2011-01-01 00:00:00.000000005Z')) + self.assertEqual(repr(t), "Timestamp('2011-01-01 00:00:00.000000005')") + self.assertEqual(t.value, expected) + self.assertEqual(t.nanosecond, 5) + + expected = 1293840000000000010 + t = t + offsets.Nano(5) + self.assertEqual(repr(t), "Timestamp('2011-01-01 00:00:00.000000010')") + self.assertEqual(t.value, expected) + self.assertEqual(t.nanosecond, 10) + + t = Timestamp(t) + self.assertEqual(repr(t), "Timestamp('2011-01-01 00:00:00.000000010')") + self.assertEqual(t.value, expected) + self.assertEqual(t.nanosecond, 10) + + t = Timestamp(np_datetime64_compat('2011-01-01 00:00:00.000000010Z')) + self.assertEqual(repr(t), "Timestamp('2011-01-01 00:00:00.000000010')") + self.assertEqual(t.value, expected) + self.assertEqual(t.nanosecond, 10) + + def test_nat_arithmetic(self): + # GH 6873 + i = 2 + f = 1.5 + + for (left, right) in [(pd.NaT, i), (pd.NaT, f), (pd.NaT, np.nan)]: + self.assertIs(left / right, pd.NaT) + self.assertIs(left * right, pd.NaT) + self.assertIs(right * left, pd.NaT) + with tm.assertRaises(TypeError): + right / left + + # Timestamp / datetime + t = Timestamp('2014-01-01') + dt = datetime(2014, 1, 1) + for (left, right) in [(pd.NaT, pd.NaT), (pd.NaT, t), (pd.NaT, dt)]: + # NaT __add__ or __sub__ Timestamp-like (or inverse) returns NaT + self.assertIs(right + left, pd.NaT) + self.assertIs(left + right, pd.NaT) + self.assertIs(left - right, pd.NaT) + self.assertIs(right - left, pd.NaT) + + # timedelta-like + # offsets are tested in test_offsets.py + + delta = timedelta(3600) + td = Timedelta('5s') + + for (left, right) in [(pd.NaT, delta), (pd.NaT, td)]: + # NaT + timedelta-like returns NaT + self.assertIs(right + left, pd.NaT) + self.assertIs(left + right, pd.NaT) + self.assertIs(right - left, pd.NaT) + self.assertIs(left - right, pd.NaT) + + # GH 11718 + tm._skip_if_no_pytz() + import pytz + + t_utc = Timestamp('2014-01-01', tz='UTC') + t_tz = Timestamp('2014-01-01', tz='US/Eastern') + dt_tz = pytz.timezone('Asia/Tokyo').localize(dt) + + for (left, right) in [(pd.NaT, t_utc), (pd.NaT, t_tz), + (pd.NaT, dt_tz)]: + # NaT __add__ or __sub__ Timestamp-like (or inverse) returns NaT + self.assertIs(right + left, pd.NaT) + self.assertIs(left + right, pd.NaT) + self.assertIs(left - right, pd.NaT) + self.assertIs(right - left, pd.NaT) + + # int addition / subtraction + for (left, right) in [(pd.NaT, 2), (pd.NaT, 0), (pd.NaT, -3)]: + self.assertIs(right + left, pd.NaT) + self.assertIs(left + right, pd.NaT) + self.assertIs(left - right, pd.NaT) + self.assertIs(right - left, pd.NaT) + + def test_nat_arithmetic_index(self): + # GH 11718 + + # datetime + tm._skip_if_no_pytz() + + dti = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], name='x') + exp = pd.DatetimeIndex([pd.NaT, pd.NaT], name='x') + self.assert_index_equal(dti + pd.NaT, exp) + self.assert_index_equal(pd.NaT + dti, exp) + + dti_tz = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], + tz='US/Eastern', name='x') + exp = pd.DatetimeIndex([pd.NaT, pd.NaT], name='x', tz='US/Eastern') + self.assert_index_equal(dti_tz + pd.NaT, exp) + self.assert_index_equal(pd.NaT + dti_tz, exp) + + exp = pd.TimedeltaIndex([pd.NaT, pd.NaT], name='x') + for (left, right) in [(pd.NaT, dti), (pd.NaT, dti_tz)]: + self.assert_index_equal(left - right, exp) + self.assert_index_equal(right - left, exp) + + # timedelta + tdi = pd.TimedeltaIndex(['1 day', '2 day'], name='x') + exp = pd.DatetimeIndex([pd.NaT, pd.NaT], name='x') + for (left, right) in [(pd.NaT, tdi)]: + self.assert_index_equal(left + right, exp) + self.assert_index_equal(right + left, exp) + self.assert_index_equal(left - right, exp) + self.assert_index_equal(right - left, exp) + + +class TestTimestampOps(tm.TestCase): + def test_timestamp_and_datetime(self): + self.assertEqual((Timestamp(datetime( + 2013, 10, 13)) - datetime(2013, 10, 12)).days, 1) + self.assertEqual((datetime(2013, 10, 12) - + Timestamp(datetime(2013, 10, 13))).days, -1) + + def test_timestamp_and_series(self): + timestamp_series = Series(date_range('2014-03-17', periods=2, freq='D', + tz='US/Eastern')) + first_timestamp = timestamp_series[0] + + delta_series = Series([np.timedelta64(0, 'D'), np.timedelta64(1, 'D')]) + assert_series_equal(timestamp_series - first_timestamp, delta_series) + assert_series_equal(first_timestamp - timestamp_series, -delta_series) + + def test_addition_subtraction_types(self): + # Assert on the types resulting from Timestamp +/- various date/time + # objects + datetime_instance = datetime(2014, 3, 4) + timedelta_instance = timedelta(seconds=1) + # build a timestamp with a frequency, since then it supports + # addition/subtraction of integers + timestamp_instance = date_range(datetime_instance, periods=1, + freq='D')[0] + + self.assertEqual(type(timestamp_instance + 1), Timestamp) + self.assertEqual(type(timestamp_instance - 1), Timestamp) + + # Timestamp + datetime not supported, though subtraction is supported + # and yields timedelta more tests in tseries/base/tests/test_base.py + self.assertEqual( + type(timestamp_instance - datetime_instance), Timedelta) + self.assertEqual( + type(timestamp_instance + timedelta_instance), Timestamp) + self.assertEqual( + type(timestamp_instance - timedelta_instance), Timestamp) + + # Timestamp +/- datetime64 not supported, so not tested (could possibly + # assert error raised?) + timedelta64_instance = np.timedelta64(1, 'D') + self.assertEqual( + type(timestamp_instance + timedelta64_instance), Timestamp) + self.assertEqual( + type(timestamp_instance - timedelta64_instance), Timestamp) + + def test_addition_subtraction_preserve_frequency(self): + timestamp_instance = date_range('2014-03-05', periods=1, freq='D')[0] + timedelta_instance = timedelta(days=1) + original_freq = timestamp_instance.freq + self.assertEqual((timestamp_instance + 1).freq, original_freq) + self.assertEqual((timestamp_instance - 1).freq, original_freq) + self.assertEqual( + (timestamp_instance + timedelta_instance).freq, original_freq) + self.assertEqual( + (timestamp_instance - timedelta_instance).freq, original_freq) + + timedelta64_instance = np.timedelta64(1, 'D') + self.assertEqual( + (timestamp_instance + timedelta64_instance).freq, original_freq) + self.assertEqual( + (timestamp_instance - timedelta64_instance).freq, original_freq) + + def test_resolution(self): + + for freq, expected in zip(['A', 'Q', 'M', 'D', 'H', 'T', + 'S', 'L', 'U'], + [RESO_DAY, RESO_DAY, + RESO_DAY, RESO_DAY, + RESO_HR, RESO_MIN, + RESO_SEC, RESO_MS, + RESO_US]): + for tz in [None, 'Asia/Tokyo', 'US/Eastern', + 'dateutil/US/Eastern']: + idx = date_range(start='2013-04-01', periods=30, freq=freq, + tz=tz) + result = period.resolution(idx.asi8, idx.tz) + self.assertEqual(result, expected) + + +class TestTimestampToJulianDate(tm.TestCase): + def test_compare_1700(self): + r = Timestamp('1700-06-23').to_julian_date() + self.assertEqual(r, 2342145.5) + + def test_compare_2000(self): + r = Timestamp('2000-04-12').to_julian_date() + self.assertEqual(r, 2451646.5) + + def test_compare_2100(self): + r = Timestamp('2100-08-12').to_julian_date() + self.assertEqual(r, 2488292.5) + + def test_compare_hour01(self): + r = Timestamp('2000-08-12T01:00:00').to_julian_date() + self.assertEqual(r, 2451768.5416666666666666) + + def test_compare_hour13(self): + r = Timestamp('2000-08-12T13:00:00').to_julian_date() + self.assertEqual(r, 2451769.0416666666666666) + + +class TestTimeSeries(tm.TestCase): + _multiprocess_can_split_ = True + + def test_timestamp_to_datetime(self): + tm._skip_if_no_pytz() + rng = date_range('20090415', '20090519', tz='US/Eastern') + + stamp = rng[0] + dtval = stamp.to_pydatetime() + self.assertEqual(stamp, dtval) + self.assertEqual(stamp.tzinfo, dtval.tzinfo) + + def test_timestamp_to_datetime_dateutil(self): + tm._skip_if_no_pytz() + rng = date_range('20090415', '20090519', tz='dateutil/US/Eastern') + + stamp = rng[0] + dtval = stamp.to_pydatetime() + self.assertEqual(stamp, dtval) + self.assertEqual(stamp.tzinfo, dtval.tzinfo) + + def test_timestamp_to_datetime_explicit_pytz(self): + tm._skip_if_no_pytz() + import pytz + rng = date_range('20090415', '20090519', + tz=pytz.timezone('US/Eastern')) + + stamp = rng[0] + dtval = stamp.to_pydatetime() + self.assertEqual(stamp, dtval) + self.assertEqual(stamp.tzinfo, dtval.tzinfo) + + def test_timestamp_to_datetime_explicit_dateutil(self): + tm._skip_if_windows_python_3() + tm._skip_if_no_dateutil() + from pandas.tslib import _dateutil_gettz as gettz + rng = date_range('20090415', '20090519', tz=gettz('US/Eastern')) + + stamp = rng[0] + dtval = stamp.to_pydatetime() + self.assertEqual(stamp, dtval) + self.assertEqual(stamp.tzinfo, dtval.tzinfo) + + def test_timestamp_fields(self): + # extra fields from DatetimeIndex like quarter and week + idx = tm.makeDateIndex(100) + + fields = ['dayofweek', 'dayofyear', 'week', 'weekofyear', 'quarter', + 'days_in_month', 'is_month_start', 'is_month_end', + 'is_quarter_start', 'is_quarter_end', 'is_year_start', + 'is_year_end', 'weekday_name'] + for f in fields: + expected = getattr(idx, f)[-1] + result = getattr(Timestamp(idx[-1]), f) + self.assertEqual(result, expected) + + self.assertEqual(idx.freq, Timestamp(idx[-1], idx.freq).freq) + self.assertEqual(idx.freqstr, Timestamp(idx[-1], idx.freq).freqstr) + + def test_timestamp_date_out_of_range(self): + self.assertRaises(ValueError, Timestamp, '1676-01-01') + self.assertRaises(ValueError, Timestamp, '2263-01-01') + + # 1475 + self.assertRaises(ValueError, DatetimeIndex, ['1400-01-01']) + self.assertRaises(ValueError, DatetimeIndex, [datetime(1400, 1, 1)]) + + def test_timestamp_repr(self): + # pre-1900 + stamp = Timestamp('1850-01-01', tz='US/Eastern') + repr(stamp) + + iso8601 = '1850-01-01 01:23:45.012345' + stamp = Timestamp(iso8601, tz='US/Eastern') + result = repr(stamp) + self.assertIn(iso8601, result) + + def test_timestamp_from_ordinal(self): + + # GH 3042 + dt = datetime(2011, 4, 16, 0, 0) + ts = Timestamp.fromordinal(dt.toordinal()) + self.assertEqual(ts.to_pydatetime(), dt) + + # with a tzinfo + stamp = Timestamp('2011-4-16', tz='US/Eastern') + dt_tz = stamp.to_pydatetime() + ts = Timestamp.fromordinal(dt_tz.toordinal(), tz='US/Eastern') + self.assertEqual(ts.to_pydatetime(), dt_tz) + + def test_timestamp_compare_with_early_datetime(self): + # e.g. datetime.min + stamp = Timestamp('2012-01-01') + + self.assertFalse(stamp == datetime.min) + self.assertFalse(stamp == datetime(1600, 1, 1)) + self.assertFalse(stamp == datetime(2700, 1, 1)) + self.assertNotEqual(stamp, datetime.min) + self.assertNotEqual(stamp, datetime(1600, 1, 1)) + self.assertNotEqual(stamp, datetime(2700, 1, 1)) + self.assertTrue(stamp > datetime(1600, 1, 1)) + self.assertTrue(stamp >= datetime(1600, 1, 1)) + self.assertTrue(stamp < datetime(2700, 1, 1)) + self.assertTrue(stamp <= datetime(2700, 1, 1)) + + def test_timestamp_equality(self): + + # GH 11034 + s = Series([Timestamp('2000-01-29 01:59:00'), 'NaT']) + result = s != s + assert_series_equal(result, Series([False, True])) + result = s != s[0] + assert_series_equal(result, Series([False, True])) + result = s != s[1] + assert_series_equal(result, Series([True, True])) + + result = s == s + assert_series_equal(result, Series([True, False])) + result = s == s[0] + assert_series_equal(result, Series([True, False])) + result = s == s[1] + assert_series_equal(result, Series([False, False])) + + def test_series_box_timestamp(self): + rng = date_range('20090415', '20090519', freq='B') + s = Series(rng) + + tm.assertIsInstance(s[5], Timestamp) + + rng = date_range('20090415', '20090519', freq='B') + s = Series(rng, index=rng) + tm.assertIsInstance(s[5], Timestamp) + + tm.assertIsInstance(s.iat[5], Timestamp) + + def test_frame_setitem_timestamp(self): + # 2155 + columns = DatetimeIndex(start='1/1/2012', end='2/1/2012', + freq=offsets.BDay()) + index = lrange(10) + data = DataFrame(columns=columns, index=index) + t = datetime(2012, 11, 1) + ts = Timestamp(t) + data[ts] = np.nan # works + + def test_to_html_timestamp(self): + rng = date_range('2000-01-01', periods=10) + df = DataFrame(np.random.randn(10, 4), index=rng) + + result = df.to_html() + self.assertIn('2000-01-01', result) + + def test_series_map_box_timestamps(self): + # #2689, #2627 + s = Series(date_range('1/1/2000', periods=10)) + + def f(x): + return (x.hour, x.day, x.month) + + # it works! + s.map(f) + s.apply(f) + DataFrame(s).applymap(f) + + def test_dti_slicing(self): + dti = DatetimeIndex(start='1/1/2005', end='12/1/2005', freq='M') + dti2 = dti[[1, 3, 5]] + + v1 = dti2[0] + v2 = dti2[1] + v3 = dti2[2] + + self.assertEqual(v1, Timestamp('2/28/2005')) + self.assertEqual(v2, Timestamp('4/30/2005')) + self.assertEqual(v3, Timestamp('6/30/2005')) + + # don't carry freq through irregular slicing + self.assertIsNone(dti2.freq) + + def test_woy_boundary(self): + # make sure weeks at year boundaries are correct + d = datetime(2013, 12, 31) + result = Timestamp(d).week + expected = 1 # ISO standard + self.assertEqual(result, expected) + + d = datetime(2008, 12, 28) + result = Timestamp(d).week + expected = 52 # ISO standard + self.assertEqual(result, expected) + + d = datetime(2009, 12, 31) + result = Timestamp(d).week + expected = 53 # ISO standard + self.assertEqual(result, expected) + + d = datetime(2010, 1, 1) + result = Timestamp(d).week + expected = 53 # ISO standard + self.assertEqual(result, expected) + + d = datetime(2010, 1, 3) + result = Timestamp(d).week + expected = 53 # ISO standard + self.assertEqual(result, expected) + + result = np.array([Timestamp(datetime(*args)).week + for args in [(2000, 1, 1), (2000, 1, 2), ( + 2005, 1, 1), (2005, 1, 2)]]) + self.assertTrue((result == [52, 52, 53, 53]).all()) diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 3c82e4ed82969..91da36161e188 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -34,6 +34,11 @@ def _skip_if_no_akima(): raise nose.SkipTest('scipy.interpolate.Akima1DInterpolator missing') +def _simple_ts(start, end, freq='D'): + rng = date_range(start, end, freq=freq) + return Series(np.random.randn(len(rng)), index=rng) + + class TestSeriesMissingData(TestData, tm.TestCase): _multiprocess_can_split_ = True @@ -530,6 +535,79 @@ def test_fill_value_when_combine_const(self): res = s.add(2, fill_value=0) assert_series_equal(res, exp) + def test_series_fillna_limit(self): + index = np.arange(10) + s = Series(np.random.randn(10), index=index) + + result = s[:2].reindex(index) + result = result.fillna(method='pad', limit=5) + + expected = s[:2].reindex(index).fillna(method='pad') + expected[-3:] = np.nan + assert_series_equal(result, expected) + + result = s[-2:].reindex(index) + result = result.fillna(method='bfill', limit=5) + + expected = s[-2:].reindex(index).fillna(method='backfill') + expected[:3] = np.nan + assert_series_equal(result, expected) + + def test_sparse_series_fillna_limit(self): + index = np.arange(10) + s = Series(np.random.randn(10), index=index) + + ss = s[:2].reindex(index).to_sparse() + result = ss.fillna(method='pad', limit=5) + expected = ss.fillna(method='pad', limit=5) + expected = expected.to_dense() + expected[-3:] = np.nan + expected = expected.to_sparse() + assert_series_equal(result, expected) + + ss = s[-2:].reindex(index).to_sparse() + result = ss.fillna(method='backfill', limit=5) + expected = ss.fillna(method='backfill') + expected = expected.to_dense() + expected[:3] = np.nan + expected = expected.to_sparse() + assert_series_equal(result, expected) + + def test_sparse_series_pad_backfill_limit(self): + index = np.arange(10) + s = Series(np.random.randn(10), index=index) + s = s.to_sparse() + + result = s[:2].reindex(index, method='pad', limit=5) + expected = s[:2].reindex(index).fillna(method='pad') + expected = expected.to_dense() + expected[-3:] = np.nan + expected = expected.to_sparse() + assert_series_equal(result, expected) + + result = s[-2:].reindex(index, method='backfill', limit=5) + expected = s[-2:].reindex(index).fillna(method='backfill') + expected = expected.to_dense() + expected[:3] = np.nan + expected = expected.to_sparse() + assert_series_equal(result, expected) + + def test_series_pad_backfill_limit(self): + index = np.arange(10) + s = Series(np.random.randn(10), index=index) + + result = s[:2].reindex(index, method='pad', limit=5) + + expected = s[:2].reindex(index).fillna(method='pad') + expected[-3:] = np.nan + assert_series_equal(result, expected) + + result = s[-2:].reindex(index, method='backfill', limit=5) + + expected = s[-2:].reindex(index).fillna(method='backfill') + expected[:3] = np.nan + assert_series_equal(result, expected) + class TestSeriesInterpolateData(TestData, tm.TestCase): @@ -932,6 +1010,31 @@ def test_interp_timedelta64(self): index=pd.to_timedelta([1, 2, 4])) assert_series_equal(result, expected) + def test_series_interpolate_method_values(self): + # #1646 + ts = _simple_ts('1/1/2000', '1/20/2000') + ts[::2] = np.nan + + result = ts.interpolate(method='values') + exp = ts.interpolate() + assert_series_equal(result, exp) + + def test_series_interpolate_intraday(self): + # #1698 + index = pd.date_range('1/1/2012', periods=4, freq='12D') + ts = pd.Series([0, 12, 24, 36], index) + new_index = index.append(index + pd.DateOffset(days=1)).sort_values() + + exp = ts.reindex(new_index).interpolate(method='time') + + index = pd.date_range('1/1/2012', periods=4, freq='12H') + ts = pd.Series([0, 12, 24, 36], index) + new_index = index.append(index + pd.DateOffset(hours=1)).sort_values() + result = ts.reindex(new_index).interpolate(method='time') + + self.assert_numpy_array_equal(result.values, exp.values) + + if __name__ == '__main__': import nose nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index df7ab24430746..073b8bfeee131 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -1,22 +1,48 @@ # coding=utf-8 # pylint: disable-msg=E1101,W0612 -from datetime import datetime - +import sys +import nose +import locale +import calendar import numpy as np +from numpy.random import rand +from datetime import datetime, timedelta, time -from pandas import Index, Series, date_range, NaT +import pandas as pd +import pandas.index as _index +import pandas.tseries.tools as tools +import pandas.core.common as com +import pandas.util.testing as tm +from pandas.tslib import iNaT +from pandas.compat import lrange, lmap, StringIO, product +from pandas.tseries.tdi import TimedeltaIndex from pandas.tseries.index import DatetimeIndex from pandas.tseries.offsets import BDay, BMonthEnd -from pandas.tseries.tdi import TimedeltaIndex - -from pandas.util.testing import assert_series_equal, assert_almost_equal -import pandas.util.testing as tm +from pandas.types.common import is_datetime64_ns_dtype +from pandas import (Index, Series, date_range, NaT, concat, DataFrame, + Timestamp, lib, isnull, to_datetime, offsets, Timedelta, + tslib, bdate_range, Period, timedelta_range, compat) +from pandas.util.testing import (assert_series_equal, assert_almost_equal, + slow, assert_frame_equal, _skip_if_has_locale) from pandas.tests.series.common import TestData +randn = np.random.randn + + +def _simple_ts(start, end, freq='D'): + rng = date_range(start, end, freq=freq) + return Series(np.random.randn(len(rng)), index=rng) -class TestSeriesTimeSeries(TestData, tm.TestCase): + +def assert_range_equal(left, right): + assert (left.equals(right)) + assert (left.freq == right.freq) + assert (left.tz == right.tz) + + +class TestTimeSeries(TestData, tm.TestCase): _multiprocess_can_split_ = True def test_shift(self): @@ -204,86 +230,6 @@ def test_truncate(self): before=self.ts.index[-1] + offset, after=self.ts.index[0] - offset) - def test_getitem_setitem_datetimeindex(self): - from pandas import date_range - - N = 50 - # testing with timezone, GH #2785 - rng = date_range('1/1/1990', periods=N, freq='H', tz='US/Eastern') - ts = Series(np.random.randn(N), index=rng) - - result = ts["1990-01-01 04:00:00"] - expected = ts[4] - self.assertEqual(result, expected) - - result = ts.copy() - result["1990-01-01 04:00:00"] = 0 - result["1990-01-01 04:00:00"] = ts[4] - assert_series_equal(result, ts) - - result = ts["1990-01-01 04:00:00":"1990-01-01 07:00:00"] - expected = ts[4:8] - assert_series_equal(result, expected) - - result = ts.copy() - result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = 0 - result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = ts[4:8] - assert_series_equal(result, ts) - - lb = "1990-01-01 04:00:00" - rb = "1990-01-01 07:00:00" - result = ts[(ts.index >= lb) & (ts.index <= rb)] - expected = ts[4:8] - assert_series_equal(result, expected) - - # repeat all the above with naive datetimes - result = ts[datetime(1990, 1, 1, 4)] - expected = ts[4] - self.assertEqual(result, expected) - - result = ts.copy() - result[datetime(1990, 1, 1, 4)] = 0 - result[datetime(1990, 1, 1, 4)] = ts[4] - assert_series_equal(result, ts) - - result = ts[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)] - expected = ts[4:8] - assert_series_equal(result, expected) - - result = ts.copy() - result[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)] = 0 - result[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)] = ts[4:8] - assert_series_equal(result, ts) - - lb = datetime(1990, 1, 1, 4) - rb = datetime(1990, 1, 1, 7) - result = ts[(ts.index >= lb) & (ts.index <= rb)] - expected = ts[4:8] - assert_series_equal(result, expected) - - result = ts[ts.index[4]] - expected = ts[4] - self.assertEqual(result, expected) - - result = ts[ts.index[4:8]] - expected = ts[4:8] - assert_series_equal(result, expected) - - result = ts.copy() - result[ts.index[4:8]] = 0 - result[4:8] = ts[4:8] - assert_series_equal(result, ts) - - # also test partial date slicing - result = ts["1990-01-02"] - expected = ts[24:48] - assert_series_equal(result, expected) - - result = ts.copy() - result["1990-01-02"] = 0 - result["1990-01-02"] = ts[24:48] - assert_series_equal(result, ts) - def test_getitem_setitem_datetime_tz_pytz(self): tm._skip_if_no_pytz() from pytz import timezone as tz @@ -567,8 +513,3031 @@ def test_empty_series_ops(self): assert_series_equal(a, b + a) self.assertRaises(TypeError, lambda x, y: x - y, b, a) + def test_is_(self): + dti = DatetimeIndex(start='1/1/2005', end='12/1/2005', freq='M') + self.assertTrue(dti.is_(dti)) + self.assertTrue(dti.is_(dti.view())) + self.assertFalse(dti.is_(dti.copy())) + + def test_contiguous_boolean_preserve_freq(self): + rng = date_range('1/1/2000', '3/1/2000', freq='B') + + mask = np.zeros(len(rng), dtype=bool) + mask[10:20] = True + + masked = rng[mask] + expected = rng[10:20] + self.assertIsNotNone(expected.freq) + assert_range_equal(masked, expected) + + mask[22] = True + masked = rng[mask] + self.assertIsNone(masked.freq) + + def test_getitem_median_slice_bug(self): + index = date_range('20090415', '20090519', freq='2B') + s = Series(np.random.randn(13), index=index) + + indexer = [slice(6, 7, None)] + result = s[indexer] + expected = s[indexer[0]] + assert_series_equal(result, expected) + + def test_ctor_str_intraday(self): + rng = DatetimeIndex(['1-1-2000 00:00:01']) + self.assertEqual(rng[0].second, 1) + + def test_frame_pad_backfill_limit(self): + index = np.arange(10) + df = DataFrame(np.random.randn(10, 4), index=index) + + result = df[:2].reindex(index, method='pad', limit=5) + + expected = df[:2].reindex(index).fillna(method='pad') + expected.values[-3:] = np.nan + tm.assert_frame_equal(result, expected) + + result = df[-2:].reindex(index, method='backfill', limit=5) + + expected = df[-2:].reindex(index).fillna(method='backfill') + expected.values[:3] = np.nan + tm.assert_frame_equal(result, expected) + + def test_frame_fillna_limit(self): + index = np.arange(10) + df = DataFrame(np.random.randn(10, 4), index=index) + + result = df[:2].reindex(index) + result = result.fillna(method='pad', limit=5) + + expected = df[:2].reindex(index).fillna(method='pad') + expected.values[-3:] = np.nan + tm.assert_frame_equal(result, expected) + + result = df[-2:].reindex(index) + result = result.fillna(method='backfill', limit=5) + + expected = df[-2:].reindex(index).fillna(method='backfill') + expected.values[:3] = np.nan + tm.assert_frame_equal(result, expected) + + def test_sparse_frame_pad_backfill_limit(self): + index = np.arange(10) + df = DataFrame(np.random.randn(10, 4), index=index) + sdf = df.to_sparse() + + result = sdf[:2].reindex(index, method='pad', limit=5) + + expected = sdf[:2].reindex(index).fillna(method='pad') + expected = expected.to_dense() + expected.values[-3:] = np.nan + expected = expected.to_sparse() + tm.assert_frame_equal(result, expected) + + result = sdf[-2:].reindex(index, method='backfill', limit=5) + + expected = sdf[-2:].reindex(index).fillna(method='backfill') + expected = expected.to_dense() + expected.values[:3] = np.nan + expected = expected.to_sparse() + tm.assert_frame_equal(result, expected) + + def test_sparse_frame_fillna_limit(self): + index = np.arange(10) + df = DataFrame(np.random.randn(10, 4), index=index) + sdf = df.to_sparse() + + result = sdf[:2].reindex(index) + result = result.fillna(method='pad', limit=5) + + expected = sdf[:2].reindex(index).fillna(method='pad') + expected = expected.to_dense() + expected.values[-3:] = np.nan + expected = expected.to_sparse() + tm.assert_frame_equal(result, expected) + + result = sdf[-2:].reindex(index) + result = result.fillna(method='backfill', limit=5) + + expected = sdf[-2:].reindex(index).fillna(method='backfill') + expected = expected.to_dense() + expected.values[:3] = np.nan + expected = expected.to_sparse() + tm.assert_frame_equal(result, expected) + + def test_pad_require_monotonicity(self): + rng = date_range('1/1/2000', '3/1/2000', freq='B') + + # neither monotonic increasing or decreasing + rng2 = rng[[1, 0, 2]] + + self.assertRaises(ValueError, rng2.get_indexer, rng, method='pad') + + def test_frame_ctor_datetime64_column(self): + rng = date_range('1/1/2000 00:00:00', '1/1/2000 1:59:50', freq='10s') + dates = np.asarray(rng) + + df = DataFrame({'A': np.random.randn(len(rng)), 'B': dates}) + self.assertTrue(np.issubdtype(df['B'].dtype, np.dtype('M8[ns]'))) + + def test_frame_add_datetime64_column(self): + rng = date_range('1/1/2000 00:00:00', '1/1/2000 1:59:50', freq='10s') + df = DataFrame(index=np.arange(len(rng))) + + df['A'] = rng + self.assertTrue(np.issubdtype(df['A'].dtype, np.dtype('M8[ns]'))) + + def test_frame_datetime64_pre1900_repr(self): + df = DataFrame({'year': date_range('1/1/1700', periods=50, + freq='A-DEC')}) + # it works! + repr(df) + + def test_frame_add_datetime64_col_other_units(self): + n = 100 + + units = ['h', 'm', 's', 'ms', 'D', 'M', 'Y'] + + ns_dtype = np.dtype('M8[ns]') + + for unit in units: + dtype = np.dtype('M8[%s]' % unit) + vals = np.arange(n, dtype=np.int64).view(dtype) + + df = DataFrame({'ints': np.arange(n)}, index=np.arange(n)) + df[unit] = vals + + ex_vals = to_datetime(vals.astype('O')).values + + self.assertEqual(df[unit].dtype, ns_dtype) + self.assertTrue((df[unit].values == ex_vals).all()) + + # Test insertion into existing datetime64 column + df = DataFrame({'ints': np.arange(n)}, index=np.arange(n)) + df['dates'] = np.arange(n, dtype=np.int64).view(ns_dtype) + + for unit in units: + dtype = np.dtype('M8[%s]' % unit) + vals = np.arange(n, dtype=np.int64).view(dtype) + + tmp = df.copy() + + tmp['dates'] = vals + ex_vals = to_datetime(vals.astype('O')).values + + self.assertTrue((tmp['dates'].values == ex_vals).all()) + + def test_to_datetime_unit(self): + + epoch = 1370745748 + s = Series([epoch + t for t in range(20)]) + result = to_datetime(s, unit='s') + expected = Series([Timestamp('2013-06-09 02:42:28') + timedelta( + seconds=t) for t in range(20)]) + assert_series_equal(result, expected) + + s = Series([epoch + t for t in range(20)]).astype(float) + result = to_datetime(s, unit='s') + expected = Series([Timestamp('2013-06-09 02:42:28') + timedelta( + seconds=t) for t in range(20)]) + assert_series_equal(result, expected) + + s = Series([epoch + t for t in range(20)] + [iNaT]) + result = to_datetime(s, unit='s') + expected = Series([Timestamp('2013-06-09 02:42:28') + timedelta( + seconds=t) for t in range(20)] + [NaT]) + assert_series_equal(result, expected) + + s = Series([epoch + t for t in range(20)] + [iNaT]).astype(float) + result = to_datetime(s, unit='s') + expected = Series([Timestamp('2013-06-09 02:42:28') + timedelta( + seconds=t) for t in range(20)] + [NaT]) + assert_series_equal(result, expected) + + # GH13834 + s = Series([epoch + t for t in np.arange(0, 2, .25)] + + [iNaT]).astype(float) + result = to_datetime(s, unit='s') + expected = Series([Timestamp('2013-06-09 02:42:28') + timedelta( + seconds=t) for t in np.arange(0, 2, .25)] + [NaT]) + assert_series_equal(result, expected) + + s = concat([Series([epoch + t for t in range(20)] + ).astype(float), Series([np.nan])], + ignore_index=True) + result = to_datetime(s, unit='s') + expected = Series([Timestamp('2013-06-09 02:42:28') + timedelta( + seconds=t) for t in range(20)] + [NaT]) + assert_series_equal(result, expected) + + result = to_datetime([1, 2, 'NaT', pd.NaT, np.nan], unit='D') + expected = DatetimeIndex([Timestamp('1970-01-02'), + Timestamp('1970-01-03')] + ['NaT'] * 3) + tm.assert_index_equal(result, expected) + + with self.assertRaises(ValueError): + to_datetime([1, 2, 'foo'], unit='D') + with self.assertRaises(ValueError): + to_datetime([1, 2, 111111111], unit='D') + + # coerce we can process + expected = DatetimeIndex([Timestamp('1970-01-02'), + Timestamp('1970-01-03')] + ['NaT'] * 1) + result = to_datetime([1, 2, 'foo'], unit='D', errors='coerce') + tm.assert_index_equal(result, expected) + + result = to_datetime([1, 2, 111111111], unit='D', errors='coerce') + tm.assert_index_equal(result, expected) + + def test_series_ctor_datetime64(self): + rng = date_range('1/1/2000 00:00:00', '1/1/2000 1:59:50', freq='10s') + dates = np.asarray(rng) + + series = Series(dates) + self.assertTrue(np.issubdtype(series.dtype, np.dtype('M8[ns]'))) + + def test_index_cast_datetime64_other_units(self): + arr = np.arange(0, 100, 10, dtype=np.int64).view('M8[D]') + + idx = Index(arr) + + self.assertTrue((idx.values == tslib.cast_to_nanoseconds(arr)).all()) + + def test_reindex_series_add_nat(self): + rng = date_range('1/1/2000 00:00:00', periods=10, freq='10s') + series = Series(rng) + + result = series.reindex(lrange(15)) + self.assertTrue(np.issubdtype(result.dtype, np.dtype('M8[ns]'))) + + mask = result.isnull() + self.assertTrue(mask[-5:].all()) + self.assertFalse(mask[:-5].any()) + + def test_reindex_frame_add_nat(self): + rng = date_range('1/1/2000 00:00:00', periods=10, freq='10s') + df = DataFrame({'A': np.random.randn(len(rng)), 'B': rng}) + + result = df.reindex(lrange(15)) + self.assertTrue(np.issubdtype(result['B'].dtype, np.dtype('M8[ns]'))) + + mask = com.isnull(result)['B'] + self.assertTrue(mask[-5:].all()) + self.assertFalse(mask[:-5].any()) + + def test_series_repr_nat(self): + series = Series([0, 1000, 2000, iNaT], dtype='M8[ns]') + + result = repr(series) + expected = ('0 1970-01-01 00:00:00.000000\n' + '1 1970-01-01 00:00:00.000001\n' + '2 1970-01-01 00:00:00.000002\n' + '3 NaT\n' + 'dtype: datetime64[ns]') + self.assertEqual(result, expected) + + def test_fillna_nat(self): + series = Series([0, 1, 2, iNaT], dtype='M8[ns]') + + filled = series.fillna(method='pad') + filled2 = series.fillna(value=series.values[2]) + + expected = series.copy() + expected.values[3] = expected.values[2] + + assert_series_equal(filled, expected) + assert_series_equal(filled2, expected) + + df = DataFrame({'A': series}) + filled = df.fillna(method='pad') + filled2 = df.fillna(value=series.values[2]) + expected = DataFrame({'A': expected}) + assert_frame_equal(filled, expected) + assert_frame_equal(filled2, expected) + + series = Series([iNaT, 0, 1, 2], dtype='M8[ns]') + + filled = series.fillna(method='bfill') + filled2 = series.fillna(value=series[1]) + + expected = series.copy() + expected[0] = expected[1] + + assert_series_equal(filled, expected) + assert_series_equal(filled2, expected) + + df = DataFrame({'A': series}) + filled = df.fillna(method='bfill') + filled2 = df.fillna(value=series[1]) + expected = DataFrame({'A': expected}) + assert_frame_equal(filled, expected) + assert_frame_equal(filled2, expected) + + def test_string_na_nat_conversion(self): + # GH #999, #858 + + from pandas.compat import parse_date + + strings = np.array(['1/1/2000', '1/2/2000', np.nan, + '1/4/2000, 12:34:56'], dtype=object) + + expected = np.empty(4, dtype='M8[ns]') + for i, val in enumerate(strings): + if com.isnull(val): + expected[i] = iNaT + else: + expected[i] = parse_date(val) + + result = tslib.array_to_datetime(strings) + assert_almost_equal(result, expected) + + result2 = to_datetime(strings) + tm.assertIsInstance(result2, DatetimeIndex) + tm.assert_numpy_array_equal(result, result2.values) + + malformed = np.array(['1/100/2000', np.nan], dtype=object) + + # GH 10636, default is now 'raise' + self.assertRaises(ValueError, + lambda: to_datetime(malformed, errors='raise')) + + result = to_datetime(malformed, errors='ignore') + tm.assert_numpy_array_equal(result, malformed) + + self.assertRaises(ValueError, to_datetime, malformed, errors='raise') + + idx = ['a', 'b', 'c', 'd', 'e'] + series = Series(['1/1/2000', np.nan, '1/3/2000', np.nan, + '1/5/2000'], index=idx, name='foo') + dseries = Series([to_datetime('1/1/2000'), np.nan, + to_datetime('1/3/2000'), np.nan, + to_datetime('1/5/2000')], index=idx, name='foo') + + result = to_datetime(series) + dresult = to_datetime(dseries) + + expected = Series(np.empty(5, dtype='M8[ns]'), index=idx) + for i in range(5): + x = series[i] + if isnull(x): + expected[i] = iNaT + else: + expected[i] = to_datetime(x) + + assert_series_equal(result, expected, check_names=False) + self.assertEqual(result.name, 'foo') + + assert_series_equal(dresult, expected, check_names=False) + self.assertEqual(dresult.name, 'foo') + + def test_nat_vector_field_access(self): + idx = DatetimeIndex(['1/1/2000', None, None, '1/4/2000']) + + fields = ['year', 'quarter', 'month', 'day', 'hour', 'minute', + 'second', 'microsecond', 'nanosecond', 'week', 'dayofyear', + 'days_in_month', 'is_leap_year'] + + for field in fields: + result = getattr(idx, field) + expected = [getattr(x, field) for x in idx] + self.assert_numpy_array_equal(result, np.array(expected)) + + s = pd.Series(idx) + + for field in fields: + result = getattr(s.dt, field) + expected = [getattr(x, field) for x in idx] + self.assert_series_equal(result, pd.Series(expected)) + + def test_nat_scalar_field_access(self): + fields = ['year', 'quarter', 'month', 'day', 'hour', 'minute', + 'second', 'microsecond', 'nanosecond', 'week', 'dayofyear', + 'days_in_month', 'daysinmonth', 'dayofweek', 'weekday_name'] + for field in fields: + result = getattr(NaT, field) + self.assertTrue(np.isnan(result)) + + def test_NaT_methods(self): + # GH 9513 + raise_methods = ['astimezone', 'combine', 'ctime', 'dst', + 'fromordinal', 'fromtimestamp', 'isocalendar', + 'strftime', 'strptime', 'time', 'timestamp', + 'timetuple', 'timetz', 'toordinal', 'tzname', + 'utcfromtimestamp', 'utcnow', 'utcoffset', + 'utctimetuple'] + nat_methods = ['date', 'now', 'replace', 'to_datetime', 'today'] + nan_methods = ['weekday', 'isoweekday'] + + for method in raise_methods: + if hasattr(NaT, method): + self.assertRaises(ValueError, getattr(NaT, method)) + + for method in nan_methods: + if hasattr(NaT, method): + self.assertTrue(np.isnan(getattr(NaT, method)())) + + for method in nat_methods: + if hasattr(NaT, method): + # see gh-8254 + exp_warning = None + if method == 'to_datetime': + exp_warning = FutureWarning + with tm.assert_produces_warning( + exp_warning, check_stacklevel=False): + self.assertIs(getattr(NaT, method)(), NaT) + + # GH 12300 + self.assertEqual(NaT.isoformat(), 'NaT') + + def test_index_convert_to_datetime_array(self): + tm._skip_if_no_pytz() + + def _check_rng(rng): + converted = rng.to_pydatetime() + tm.assertIsInstance(converted, np.ndarray) + for x, stamp in zip(converted, rng): + tm.assertIsInstance(x, datetime) + self.assertEqual(x, stamp.to_pydatetime()) + self.assertEqual(x.tzinfo, stamp.tzinfo) + + rng = date_range('20090415', '20090519') + rng_eastern = date_range('20090415', '20090519', tz='US/Eastern') + rng_utc = date_range('20090415', '20090519', tz='utc') + + _check_rng(rng) + _check_rng(rng_eastern) + _check_rng(rng_utc) + + def test_index_convert_to_datetime_array_explicit_pytz(self): + tm._skip_if_no_pytz() + import pytz + + def _check_rng(rng): + converted = rng.to_pydatetime() + tm.assertIsInstance(converted, np.ndarray) + for x, stamp in zip(converted, rng): + tm.assertIsInstance(x, datetime) + self.assertEqual(x, stamp.to_pydatetime()) + self.assertEqual(x.tzinfo, stamp.tzinfo) + + rng = date_range('20090415', '20090519') + rng_eastern = date_range('20090415', '20090519', + tz=pytz.timezone('US/Eastern')) + rng_utc = date_range('20090415', '20090519', tz=pytz.utc) + + _check_rng(rng) + _check_rng(rng_eastern) + _check_rng(rng_utc) + + def test_index_convert_to_datetime_array_dateutil(self): + tm._skip_if_no_dateutil() + import dateutil + + def _check_rng(rng): + converted = rng.to_pydatetime() + tm.assertIsInstance(converted, np.ndarray) + for x, stamp in zip(converted, rng): + tm.assertIsInstance(x, datetime) + self.assertEqual(x, stamp.to_pydatetime()) + self.assertEqual(x.tzinfo, stamp.tzinfo) + + rng = date_range('20090415', '20090519') + rng_eastern = date_range('20090415', '20090519', + tz='dateutil/US/Eastern') + rng_utc = date_range('20090415', '20090519', tz=dateutil.tz.tzutc()) + + _check_rng(rng) + _check_rng(rng_eastern) + _check_rng(rng_utc) + + def test_reasonable_keyerror(self): + # GH #1062 + index = DatetimeIndex(['1/3/2000']) + try: + index.get_loc('1/1/2000') + except KeyError as e: + self.assertIn('2000', str(e)) + + def test_reindex_with_datetimes(self): + rng = date_range('1/1/2000', periods=20) + ts = Series(np.random.randn(20), index=rng) + + result = ts.reindex(list(ts.index[5:10])) + expected = ts[5:10] + tm.assert_series_equal(result, expected) + + result = ts[list(ts.index[5:10])] + tm.assert_series_equal(result, expected) + + def test_asfreq_keep_index_name(self): + # GH #9854 + index_name = 'bar' + index = pd.date_range('20130101', periods=20, name=index_name) + df = pd.DataFrame([x for x in range(20)], columns=['foo'], index=index) + + self.assertEqual(index_name, df.index.name) + self.assertEqual(index_name, df.asfreq('10D').index.name) + + def test_promote_datetime_date(self): + rng = date_range('1/1/2000', periods=20) + ts = Series(np.random.randn(20), index=rng) + + ts_slice = ts[5:] + ts2 = ts_slice.copy() + ts2.index = [x.date() for x in ts2.index] + + result = ts + ts2 + result2 = ts2 + ts + expected = ts + ts[5:] + assert_series_equal(result, expected) + assert_series_equal(result2, expected) + + # test asfreq + result = ts2.asfreq('4H', method='ffill') + expected = ts[5:].asfreq('4H', method='ffill') + assert_series_equal(result, expected) + + result = rng.get_indexer(ts2.index) + expected = rng.get_indexer(ts_slice.index) + self.assert_numpy_array_equal(result, expected) + + def test_asfreq_normalize(self): + rng = date_range('1/1/2000 09:30', periods=20) + norm = date_range('1/1/2000', periods=20) + vals = np.random.randn(20) + ts = Series(vals, index=rng) + + result = ts.asfreq('D', normalize=True) + norm = date_range('1/1/2000', periods=20) + expected = Series(vals, index=norm) + + assert_series_equal(result, expected) + + vals = np.random.randn(20, 3) + ts = DataFrame(vals, index=rng) + + result = ts.asfreq('D', normalize=True) + expected = DataFrame(vals, index=norm) + + assert_frame_equal(result, expected) + + def test_first_subset(self): + ts = _simple_ts('1/1/2000', '1/1/2010', freq='12h') + result = ts.first('10d') + self.assertEqual(len(result), 20) + + ts = _simple_ts('1/1/2000', '1/1/2010') + result = ts.first('10d') + self.assertEqual(len(result), 10) + + result = ts.first('3M') + expected = ts[:'3/31/2000'] + assert_series_equal(result, expected) + + result = ts.first('21D') + expected = ts[:21] + assert_series_equal(result, expected) + + result = ts[:0].first('3M') + assert_series_equal(result, ts[:0]) + + def test_last_subset(self): + ts = _simple_ts('1/1/2000', '1/1/2010', freq='12h') + result = ts.last('10d') + self.assertEqual(len(result), 20) + + ts = _simple_ts('1/1/2000', '1/1/2010') + result = ts.last('10d') + self.assertEqual(len(result), 10) + + result = ts.last('21D') + expected = ts['12/12/2009':] + assert_series_equal(result, expected) + + result = ts.last('21D') + expected = ts[-21:] + assert_series_equal(result, expected) + + result = ts[:0].last('3M') + assert_series_equal(result, ts[:0]) + + def test_format_pre_1900_dates(self): + rng = date_range('1/1/1850', '1/1/1950', freq='A-DEC') + rng.format() + ts = Series(1, index=rng) + repr(ts) + + def test_at_time(self): + rng = date_range('1/1/2000', '1/5/2000', freq='5min') + ts = Series(np.random.randn(len(rng)), index=rng) + rs = ts.at_time(rng[1]) + self.assertTrue((rs.index.hour == rng[1].hour).all()) + self.assertTrue((rs.index.minute == rng[1].minute).all()) + self.assertTrue((rs.index.second == rng[1].second).all()) + + result = ts.at_time('9:30') + expected = ts.at_time(time(9, 30)) + assert_series_equal(result, expected) + + df = DataFrame(np.random.randn(len(rng), 3), index=rng) + + result = ts[time(9, 30)] + result_df = df.loc[time(9, 30)] + expected = ts[(rng.hour == 9) & (rng.minute == 30)] + exp_df = df[(rng.hour == 9) & (rng.minute == 30)] + + # expected.index = date_range('1/1/2000', '1/4/2000') + + assert_series_equal(result, expected) + tm.assert_frame_equal(result_df, exp_df) + + chunk = df.loc['1/4/2000':] + result = chunk.loc[time(9, 30)] + expected = result_df[-1:] + tm.assert_frame_equal(result, expected) + + # midnight, everything + rng = date_range('1/1/2000', '1/31/2000') + ts = Series(np.random.randn(len(rng)), index=rng) + + result = ts.at_time(time(0, 0)) + assert_series_equal(result, ts) + + # time doesn't exist + rng = date_range('1/1/2012', freq='23Min', periods=384) + ts = Series(np.random.randn(len(rng)), rng) + rs = ts.at_time('16:00') + self.assertEqual(len(rs), 0) + + def test_at_time_frame(self): + rng = date_range('1/1/2000', '1/5/2000', freq='5min') + ts = DataFrame(np.random.randn(len(rng), 2), index=rng) + rs = ts.at_time(rng[1]) + self.assertTrue((rs.index.hour == rng[1].hour).all()) + self.assertTrue((rs.index.minute == rng[1].minute).all()) + self.assertTrue((rs.index.second == rng[1].second).all()) + + result = ts.at_time('9:30') + expected = ts.at_time(time(9, 30)) + assert_frame_equal(result, expected) + + result = ts.loc[time(9, 30)] + expected = ts.loc[(rng.hour == 9) & (rng.minute == 30)] + + assert_frame_equal(result, expected) + + # midnight, everything + rng = date_range('1/1/2000', '1/31/2000') + ts = DataFrame(np.random.randn(len(rng), 3), index=rng) + + result = ts.at_time(time(0, 0)) + assert_frame_equal(result, ts) + + # time doesn't exist + rng = date_range('1/1/2012', freq='23Min', periods=384) + ts = DataFrame(np.random.randn(len(rng), 2), rng) + rs = ts.at_time('16:00') + self.assertEqual(len(rs), 0) + + def test_between_time(self): + rng = date_range('1/1/2000', '1/5/2000', freq='5min') + ts = Series(np.random.randn(len(rng)), index=rng) + stime = time(0, 0) + etime = time(1, 0) + + close_open = product([True, False], [True, False]) + for inc_start, inc_end in close_open: + filtered = ts.between_time(stime, etime, inc_start, inc_end) + exp_len = 13 * 4 + 1 + if not inc_start: + exp_len -= 5 + if not inc_end: + exp_len -= 4 + + self.assertEqual(len(filtered), exp_len) + for rs in filtered.index: + t = rs.time() + if inc_start: + self.assertTrue(t >= stime) + else: + self.assertTrue(t > stime) + + if inc_end: + self.assertTrue(t <= etime) + else: + self.assertTrue(t < etime) + + result = ts.between_time('00:00', '01:00') + expected = ts.between_time(stime, etime) + assert_series_equal(result, expected) + + # across midnight + rng = date_range('1/1/2000', '1/5/2000', freq='5min') + ts = Series(np.random.randn(len(rng)), index=rng) + stime = time(22, 0) + etime = time(9, 0) + + close_open = product([True, False], [True, False]) + for inc_start, inc_end in close_open: + filtered = ts.between_time(stime, etime, inc_start, inc_end) + exp_len = (12 * 11 + 1) * 4 + 1 + if not inc_start: + exp_len -= 4 + if not inc_end: + exp_len -= 4 + + self.assertEqual(len(filtered), exp_len) + for rs in filtered.index: + t = rs.time() + if inc_start: + self.assertTrue((t >= stime) or (t <= etime)) + else: + self.assertTrue((t > stime) or (t <= etime)) + + if inc_end: + self.assertTrue((t <= etime) or (t >= stime)) + else: + self.assertTrue((t < etime) or (t >= stime)) + + def test_between_time_frame(self): + rng = date_range('1/1/2000', '1/5/2000', freq='5min') + ts = DataFrame(np.random.randn(len(rng), 2), index=rng) + stime = time(0, 0) + etime = time(1, 0) + + close_open = product([True, False], [True, False]) + for inc_start, inc_end in close_open: + filtered = ts.between_time(stime, etime, inc_start, inc_end) + exp_len = 13 * 4 + 1 + if not inc_start: + exp_len -= 5 + if not inc_end: + exp_len -= 4 + + self.assertEqual(len(filtered), exp_len) + for rs in filtered.index: + t = rs.time() + if inc_start: + self.assertTrue(t >= stime) + else: + self.assertTrue(t > stime) + + if inc_end: + self.assertTrue(t <= etime) + else: + self.assertTrue(t < etime) + + result = ts.between_time('00:00', '01:00') + expected = ts.between_time(stime, etime) + assert_frame_equal(result, expected) + + # across midnight + rng = date_range('1/1/2000', '1/5/2000', freq='5min') + ts = DataFrame(np.random.randn(len(rng), 2), index=rng) + stime = time(22, 0) + etime = time(9, 0) + + close_open = product([True, False], [True, False]) + for inc_start, inc_end in close_open: + filtered = ts.between_time(stime, etime, inc_start, inc_end) + exp_len = (12 * 11 + 1) * 4 + 1 + if not inc_start: + exp_len -= 4 + if not inc_end: + exp_len -= 4 + + self.assertEqual(len(filtered), exp_len) + for rs in filtered.index: + t = rs.time() + if inc_start: + self.assertTrue((t >= stime) or (t <= etime)) + else: + self.assertTrue((t > stime) or (t <= etime)) + + if inc_end: + self.assertTrue((t <= etime) or (t >= stime)) + else: + self.assertTrue((t < etime) or (t >= stime)) + + def test_between_time_types(self): + # GH11818 + rng = date_range('1/1/2000', '1/5/2000', freq='5min') + self.assertRaises(ValueError, rng.indexer_between_time, + datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) + + frame = DataFrame({'A': 0}, index=rng) + self.assertRaises(ValueError, frame.between_time, + datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) + + series = Series(0, index=rng) + self.assertRaises(ValueError, series.between_time, + datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) + + def test_between_time_formats(self): + # GH11818 + _skip_if_has_locale() + + rng = date_range('1/1/2000', '1/5/2000', freq='5min') + ts = DataFrame(np.random.randn(len(rng), 2), index=rng) + + strings = [("2:00", "2:30"), ("0200", "0230"), ("2:00am", "2:30am"), + ("0200am", "0230am"), ("2:00:00", "2:30:00"), + ("020000", "023000"), ("2:00:00am", "2:30:00am"), + ("020000am", "023000am")] + expected_length = 28 + + for time_string in strings: + self.assertEqual(len(ts.between_time(*time_string)), + expected_length, + "%s - %s" % time_string) + + def test_to_period(self): + from pandas.tseries.period import period_range + + ts = _simple_ts('1/1/2000', '1/1/2001') + + pts = ts.to_period() + exp = ts.copy() + exp.index = period_range('1/1/2000', '1/1/2001') + assert_series_equal(pts, exp) + + pts = ts.to_period('M') + exp.index = exp.index.asfreq('M') + tm.assert_index_equal(pts.index, exp.index.asfreq('M')) + assert_series_equal(pts, exp) + + # GH 7606 without freq + idx = DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03', + '2011-01-04']) + exp_idx = pd.PeriodIndex(['2011-01-01', '2011-01-02', '2011-01-03', + '2011-01-04'], freq='D') + + s = Series(np.random.randn(4), index=idx) + expected = s.copy() + expected.index = exp_idx + assert_series_equal(s.to_period(), expected) + + df = DataFrame(np.random.randn(4, 4), index=idx, columns=idx) + expected = df.copy() + expected.index = exp_idx + assert_frame_equal(df.to_period(), expected) + + expected = df.copy() + expected.columns = exp_idx + assert_frame_equal(df.to_period(axis=1), expected) + + def create_dt64_based_index(self): + data = [Timestamp('2007-01-01 10:11:12.123456Z'), + Timestamp('2007-01-01 10:11:13.789123Z')] + index = DatetimeIndex(data) + return index + + def test_to_period_millisecond(self): + index = self.create_dt64_based_index() + + period = index.to_period(freq='L') + self.assertEqual(2, len(period)) + self.assertEqual(period[0], Period('2007-01-01 10:11:12.123Z', 'L')) + self.assertEqual(period[1], Period('2007-01-01 10:11:13.789Z', 'L')) + + def test_to_period_microsecond(self): + index = self.create_dt64_based_index() + + period = index.to_period(freq='U') + self.assertEqual(2, len(period)) + self.assertEqual(period[0], Period('2007-01-01 10:11:12.123456Z', 'U')) + self.assertEqual(period[1], Period('2007-01-01 10:11:13.789123Z', 'U')) + + def test_to_period_tz_pytz(self): + tm._skip_if_no_pytz() + from dateutil.tz import tzlocal + from pytz import utc as UTC + + xp = date_range('1/1/2000', '4/1/2000').to_period() + + ts = date_range('1/1/2000', '4/1/2000', tz='US/Eastern') + + result = ts.to_period()[0] + expected = ts[0].to_period() + + self.assertEqual(result, expected) + tm.assert_index_equal(ts.to_period(), xp) + + ts = date_range('1/1/2000', '4/1/2000', tz=UTC) + + result = ts.to_period()[0] + expected = ts[0].to_period() + + self.assertEqual(result, expected) + tm.assert_index_equal(ts.to_period(), xp) + + ts = date_range('1/1/2000', '4/1/2000', tz=tzlocal()) + + result = ts.to_period()[0] + expected = ts[0].to_period() + + self.assertEqual(result, expected) + tm.assert_index_equal(ts.to_period(), xp) + + def test_to_period_tz_explicit_pytz(self): + tm._skip_if_no_pytz() + import pytz + from dateutil.tz import tzlocal + + xp = date_range('1/1/2000', '4/1/2000').to_period() + + ts = date_range('1/1/2000', '4/1/2000', tz=pytz.timezone('US/Eastern')) + + result = ts.to_period()[0] + expected = ts[0].to_period() + + self.assertTrue(result == expected) + tm.assert_index_equal(ts.to_period(), xp) + + ts = date_range('1/1/2000', '4/1/2000', tz=pytz.utc) + + result = ts.to_period()[0] + expected = ts[0].to_period() + + self.assertTrue(result == expected) + tm.assert_index_equal(ts.to_period(), xp) + + ts = date_range('1/1/2000', '4/1/2000', tz=tzlocal()) + + result = ts.to_period()[0] + expected = ts[0].to_period() + + self.assertTrue(result == expected) + tm.assert_index_equal(ts.to_period(), xp) + + def test_to_period_tz_dateutil(self): + tm._skip_if_no_dateutil() + import dateutil + from dateutil.tz import tzlocal + + xp = date_range('1/1/2000', '4/1/2000').to_period() + + ts = date_range('1/1/2000', '4/1/2000', tz='dateutil/US/Eastern') + + result = ts.to_period()[0] + expected = ts[0].to_period() + + self.assertTrue(result == expected) + tm.assert_index_equal(ts.to_period(), xp) + + ts = date_range('1/1/2000', '4/1/2000', tz=dateutil.tz.tzutc()) + + result = ts.to_period()[0] + expected = ts[0].to_period() + + self.assertTrue(result == expected) + tm.assert_index_equal(ts.to_period(), xp) + + ts = date_range('1/1/2000', '4/1/2000', tz=tzlocal()) + + result = ts.to_period()[0] + expected = ts[0].to_period() + + self.assertTrue(result == expected) + tm.assert_index_equal(ts.to_period(), xp) + + def test_frame_to_period(self): + K = 5 + from pandas.tseries.period import period_range + + dr = date_range('1/1/2000', '1/1/2001') + pr = period_range('1/1/2000', '1/1/2001') + df = DataFrame(randn(len(dr), K), index=dr) + df['mix'] = 'a' + + pts = df.to_period() + exp = df.copy() + exp.index = pr + assert_frame_equal(pts, exp) + + pts = df.to_period('M') + tm.assert_index_equal(pts.index, exp.index.asfreq('M')) + + df = df.T + pts = df.to_period(axis=1) + exp = df.copy() + exp.columns = pr + assert_frame_equal(pts, exp) + + pts = df.to_period('M', axis=1) + tm.assert_index_equal(pts.columns, exp.columns.asfreq('M')) + + self.assertRaises(ValueError, df.to_period, axis=2) + + def test_compat_replace(self): + # https://github.com/statsmodels/statsmodels/issues/3349 + # replace should take ints/longs for compat + + for f in [compat.long, int]: + result = date_range(Timestamp('1960-04-01 00:00:00', + freq='QS-JAN'), + periods=f(76), + freq='QS-JAN') + self.assertEqual(len(result), 76) + + def test_astype_object(self): + # NumPy 1.6.1 weak ns support + rng = date_range('1/1/2000', periods=20) + + casted = rng.astype('O') + exp_values = list(rng) + + tm.assert_index_equal(casted, Index(exp_values, dtype=np.object_)) + self.assertEqual(casted.tolist(), exp_values) + + def test_catch_infinite_loop(self): + offset = offsets.DateOffset(minute=5) + # blow up, don't loop forever + self.assertRaises(Exception, date_range, datetime(2011, 11, 11), + datetime(2011, 11, 12), freq=offset) + + def test_append_concat(self): + rng = date_range('5/8/2012 1:45', periods=10, freq='5T') + ts = Series(np.random.randn(len(rng)), rng) + df = DataFrame(np.random.randn(len(rng), 4), index=rng) + + result = ts.append(ts) + result_df = df.append(df) + ex_index = DatetimeIndex(np.tile(rng.values, 2)) + tm.assert_index_equal(result.index, ex_index) + tm.assert_index_equal(result_df.index, ex_index) + + appended = rng.append(rng) + tm.assert_index_equal(appended, ex_index) + + appended = rng.append([rng, rng]) + ex_index = DatetimeIndex(np.tile(rng.values, 3)) + tm.assert_index_equal(appended, ex_index) + + # different index names + rng1 = rng.copy() + rng2 = rng.copy() + rng1.name = 'foo' + rng2.name = 'bar' + self.assertEqual(rng1.append(rng1).name, 'foo') + self.assertIsNone(rng1.append(rng2).name) + + def test_append_concat_tz(self): + # GH 2938 + tm._skip_if_no_pytz() + + rng = date_range('5/8/2012 1:45', periods=10, freq='5T', + tz='US/Eastern') + rng2 = date_range('5/8/2012 2:35', periods=10, freq='5T', + tz='US/Eastern') + rng3 = date_range('5/8/2012 1:45', periods=20, freq='5T', + tz='US/Eastern') + ts = Series(np.random.randn(len(rng)), rng) + df = DataFrame(np.random.randn(len(rng), 4), index=rng) + ts2 = Series(np.random.randn(len(rng2)), rng2) + df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2) + + result = ts.append(ts2) + result_df = df.append(df2) + tm.assert_index_equal(result.index, rng3) + tm.assert_index_equal(result_df.index, rng3) + + appended = rng.append(rng2) + tm.assert_index_equal(appended, rng3) + + def test_append_concat_tz_explicit_pytz(self): + # GH 2938 + tm._skip_if_no_pytz() + from pytz import timezone as timezone + + rng = date_range('5/8/2012 1:45', periods=10, freq='5T', + tz=timezone('US/Eastern')) + rng2 = date_range('5/8/2012 2:35', periods=10, freq='5T', + tz=timezone('US/Eastern')) + rng3 = date_range('5/8/2012 1:45', periods=20, freq='5T', + tz=timezone('US/Eastern')) + ts = Series(np.random.randn(len(rng)), rng) + df = DataFrame(np.random.randn(len(rng), 4), index=rng) + ts2 = Series(np.random.randn(len(rng2)), rng2) + df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2) + + result = ts.append(ts2) + result_df = df.append(df2) + tm.assert_index_equal(result.index, rng3) + tm.assert_index_equal(result_df.index, rng3) + + appended = rng.append(rng2) + tm.assert_index_equal(appended, rng3) + + def test_append_concat_tz_dateutil(self): + # GH 2938 + tm._skip_if_no_dateutil() + rng = date_range('5/8/2012 1:45', periods=10, freq='5T', + tz='dateutil/US/Eastern') + rng2 = date_range('5/8/2012 2:35', periods=10, freq='5T', + tz='dateutil/US/Eastern') + rng3 = date_range('5/8/2012 1:45', periods=20, freq='5T', + tz='dateutil/US/Eastern') + ts = Series(np.random.randn(len(rng)), rng) + df = DataFrame(np.random.randn(len(rng), 4), index=rng) + ts2 = Series(np.random.randn(len(rng2)), rng2) + df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2) + + result = ts.append(ts2) + result_df = df.append(df2) + tm.assert_index_equal(result.index, rng3) + tm.assert_index_equal(result_df.index, rng3) + + appended = rng.append(rng2) + tm.assert_index_equal(appended, rng3) + + def test_set_dataframe_column_ns_dtype(self): + x = DataFrame([datetime.now(), datetime.now()]) + self.assertEqual(x[0].dtype, np.dtype('M8[ns]')) + + def test_groupby_count_dateparseerror(self): + dr = date_range(start='1/1/2012', freq='5min', periods=10) + + # BAD Example, datetimes first + s = Series(np.arange(10), index=[dr, lrange(10)]) + grouped = s.groupby(lambda x: x[1] % 2 == 0) + result = grouped.count() + + s = Series(np.arange(10), index=[lrange(10), dr]) + grouped = s.groupby(lambda x: x[0] % 2 == 0) + expected = grouped.count() + + assert_series_equal(result, expected) + + def test_frame_datetime64_handling_groupby(self): + # it works! + df = DataFrame([(3, np.datetime64('2012-07-03')), + (3, np.datetime64('2012-07-04'))], + columns=['a', 'date']) + result = df.groupby('a').first() + self.assertEqual(result['date'][3], Timestamp('2012-07-03')) + + def test_frame_dict_constructor_datetime64_1680(self): + dr = date_range('1/1/2012', periods=10) + s = Series(dr, index=dr) + + # it works! + DataFrame({'a': 'foo', 'b': s}, index=dr) + DataFrame({'a': 'foo', 'b': s.values}, index=dr) + + def test_frame_datetime64_mixed_index_ctor_1681(self): + dr = date_range('2011/1/1', '2012/1/1', freq='W-FRI') + ts = Series(dr) + + # it works! + d = DataFrame({'A': 'foo', 'B': ts}, index=dr) + self.assertTrue(d['B'].isnull().all()) + + def test_frame_timeseries_to_records(self): + index = date_range('1/1/2000', periods=10) + df = DataFrame(np.random.randn(10, 3), index=index, + columns=['a', 'b', 'c']) + + result = df.to_records() + result['index'].dtype == 'M8[ns]' + + result = df.to_records(index=False) + + def test_to_csv_numpy_16_bug(self): + frame = DataFrame({'a': date_range('1/1/2000', periods=10)}) + + buf = StringIO() + frame.to_csv(buf) + + result = buf.getvalue() + self.assertIn('2000-01-01', result) + + def test_series_map_box_timedelta(self): + # GH 11349 + s = Series(timedelta_range('1 day 1 s', periods=5, freq='h')) + + def f(x): + return x.total_seconds() + + s.map(f) + s.apply(f) + DataFrame(s).applymap(f) + + def test_concat_datetime_datetime64_frame(self): + # #2624 + rows = [] + rows.append([datetime(2010, 1, 1), 1]) + rows.append([datetime(2010, 1, 2), 'hi']) + + df2_obj = DataFrame.from_records(rows, columns=['date', 'test']) + + ind = date_range(start="2000/1/1", freq="D", periods=10) + df1 = DataFrame({'date': ind, 'test': lrange(10)}) + + # it works! + pd.concat([df1, df2_obj]) + + def test_asfreq_resample_set_correct_freq(self): + # GH5613 + # we test if .asfreq() and .resample() set the correct value for .freq + df = pd.DataFrame({'date': ["2012-01-01", "2012-01-02", "2012-01-03"], + 'col': [1, 2, 3]}) + df = df.set_index(pd.to_datetime(df.date)) + + # testing the settings before calling .asfreq() and .resample() + self.assertEqual(df.index.freq, None) + self.assertEqual(df.index.inferred_freq, 'D') + + # does .asfreq() set .freq correctly? + self.assertEqual(df.asfreq('D').index.freq, 'D') + + # does .resample() set .freq correctly? + self.assertEqual(df.resample('D').asfreq().index.freq, 'D') + + def test_pickle(self): + + # GH4606 + p = self.round_trip_pickle(NaT) + self.assertTrue(p is NaT) + + idx = pd.to_datetime(['2013-01-01', NaT, '2014-01-06']) + idx_p = self.round_trip_pickle(idx) + self.assertTrue(idx_p[0] == idx[0]) + self.assertTrue(idx_p[1] is NaT) + self.assertTrue(idx_p[2] == idx[2]) + + # GH11002 + # don't infer freq + idx = date_range('1750-1-1', '2050-1-1', freq='7D') + idx_p = self.round_trip_pickle(idx) + tm.assert_index_equal(idx, idx_p) + + +class TestTimeSeriesDuplicates(tm.TestCase): + _multiprocess_can_split_ = True + + def setUp(self): + dates = [datetime(2000, 1, 2), datetime(2000, 1, 2), + datetime(2000, 1, 2), datetime(2000, 1, 3), + datetime(2000, 1, 3), datetime(2000, 1, 3), + datetime(2000, 1, 4), datetime(2000, 1, 4), + datetime(2000, 1, 4), datetime(2000, 1, 5)] + + self.dups = Series(np.random.randn(len(dates)), index=dates) + + def test_constructor(self): + tm.assertIsInstance(self.dups, Series) + tm.assertIsInstance(self.dups.index, DatetimeIndex) + + def test_is_unique_monotonic(self): + self.assertFalse(self.dups.index.is_unique) + + def test_index_unique(self): + uniques = self.dups.index.unique() + expected = DatetimeIndex([datetime(2000, 1, 2), datetime(2000, 1, 3), + datetime(2000, 1, 4), datetime(2000, 1, 5)]) + self.assertEqual(uniques.dtype, 'M8[ns]') # sanity + tm.assert_index_equal(uniques, expected) + self.assertEqual(self.dups.index.nunique(), 4) + + # #2563 + self.assertTrue(isinstance(uniques, DatetimeIndex)) + + dups_local = self.dups.index.tz_localize('US/Eastern') + dups_local.name = 'foo' + result = dups_local.unique() + expected = DatetimeIndex(expected, name='foo') + expected = expected.tz_localize('US/Eastern') + self.assertTrue(result.tz is not None) + self.assertEqual(result.name, 'foo') + tm.assert_index_equal(result, expected) + + # NaT, note this is excluded + arr = [1370745748 + t for t in range(20)] + [iNaT] + idx = DatetimeIndex(arr * 3) + tm.assert_index_equal(idx.unique(), DatetimeIndex(arr)) + self.assertEqual(idx.nunique(), 20) + self.assertEqual(idx.nunique(dropna=False), 21) + + arr = [Timestamp('2013-06-09 02:42:28') + timedelta(seconds=t) + for t in range(20)] + [NaT] + idx = DatetimeIndex(arr * 3) + tm.assert_index_equal(idx.unique(), DatetimeIndex(arr)) + self.assertEqual(idx.nunique(), 20) + self.assertEqual(idx.nunique(dropna=False), 21) + + def test_index_dupes_contains(self): + d = datetime(2011, 12, 5, 20, 30) + ix = DatetimeIndex([d, d]) + self.assertTrue(d in ix) + + def test_duplicate_dates_indexing(self): + ts = self.dups + + uniques = ts.index.unique() + for date in uniques: + result = ts[date] + + mask = ts.index == date + total = (ts.index == date).sum() + expected = ts[mask] + if total > 1: + assert_series_equal(result, expected) + else: + assert_almost_equal(result, expected[0]) + + cp = ts.copy() + cp[date] = 0 + expected = Series(np.where(mask, 0, ts), index=ts.index) + assert_series_equal(cp, expected) + + self.assertRaises(KeyError, ts.__getitem__, datetime(2000, 1, 6)) + + # new index + ts[datetime(2000, 1, 6)] = 0 + self.assertEqual(ts[datetime(2000, 1, 6)], 0) + + def test_range_slice(self): + idx = DatetimeIndex(['1/1/2000', '1/2/2000', '1/2/2000', '1/3/2000', + '1/4/2000']) + + ts = Series(np.random.randn(len(idx)), index=idx) + + result = ts['1/2/2000':] + expected = ts[1:] + assert_series_equal(result, expected) + + result = ts['1/2/2000':'1/3/2000'] + expected = ts[1:4] + assert_series_equal(result, expected) + + def test_groupby_average_dup_values(self): + result = self.dups.groupby(level=0).mean() + expected = self.dups.groupby(self.dups.index).mean() + assert_series_equal(result, expected) + + def test_indexing_over_size_cutoff(self): + import datetime + # #1821 + + old_cutoff = _index._SIZE_CUTOFF + try: + _index._SIZE_CUTOFF = 1000 + + # create large list of non periodic datetime + dates = [] + sec = datetime.timedelta(seconds=1) + half_sec = datetime.timedelta(microseconds=500000) + d = datetime.datetime(2011, 12, 5, 20, 30) + n = 1100 + for i in range(n): + dates.append(d) + dates.append(d + sec) + dates.append(d + sec + half_sec) + dates.append(d + sec + sec + half_sec) + d += 3 * sec + + # duplicate some values in the list + duplicate_positions = np.random.randint(0, len(dates) - 1, 20) + for p in duplicate_positions: + dates[p + 1] = dates[p] + + df = DataFrame(np.random.randn(len(dates), 4), + index=dates, + columns=list('ABCD')) + + pos = n * 3 + timestamp = df.index[pos] + self.assertIn(timestamp, df.index) + + # it works! + df.loc[timestamp] + self.assertTrue(len(df.loc[[timestamp]]) > 0) + finally: + _index._SIZE_CUTOFF = old_cutoff + + def test_indexing_unordered(self): + # GH 2437 + rng = date_range(start='2011-01-01', end='2011-01-15') + ts = Series(randn(len(rng)), index=rng) + ts2 = concat([ts[0:4], ts[-4:], ts[4:-4]]) + + for t in ts.index: + # TODO: unused? + s = str(t) # noqa + + expected = ts[t] + result = ts2[t] + self.assertTrue(expected == result) + + # GH 3448 (ranges) + def compare(slobj): + result = ts2[slobj].copy() + result = result.sort_index() + expected = ts[slobj] + assert_series_equal(result, expected) + + compare(slice('2011-01-01', '2011-01-15')) + compare(slice('2010-12-30', '2011-01-15')) + compare(slice('2011-01-01', '2011-01-16')) + + # partial ranges + compare(slice('2011-01-01', '2011-01-6')) + compare(slice('2011-01-06', '2011-01-8')) + compare(slice('2011-01-06', '2011-01-12')) + + # single values + result = ts2['2011'].sort_index() + expected = ts['2011'] + assert_series_equal(result, expected) + + # diff freq + rng = date_range(datetime(2005, 1, 1), periods=20, freq='M') + ts = Series(np.arange(len(rng)), index=rng) + ts = ts.take(np.random.permutation(20)) + + result = ts['2005'] + for t in result.index: + self.assertTrue(t.year == 2005) + + def test_indexing(self): + + idx = date_range("2001-1-1", periods=20, freq='M') + ts = Series(np.random.rand(len(idx)), index=idx) + + # getting + + # GH 3070, make sure semantics work on Series/Frame + expected = ts['2001'] + expected.name = 'A' + + df = DataFrame(dict(A=ts)) + result = df['2001']['A'] + assert_series_equal(expected, result) + + # setting + ts['2001'] = 1 + expected = ts['2001'] + expected.name = 'A' + + df.loc['2001', 'A'] = 1 + + result = df['2001']['A'] + assert_series_equal(expected, result) + + # GH3546 (not including times on the last day) + idx = date_range(start='2013-05-31 00:00', end='2013-05-31 23:00', + freq='H') + ts = Series(lrange(len(idx)), index=idx) + expected = ts['2013-05'] + assert_series_equal(expected, ts) + + idx = date_range(start='2013-05-31 00:00', end='2013-05-31 23:59', + freq='S') + ts = Series(lrange(len(idx)), index=idx) + expected = ts['2013-05'] + assert_series_equal(expected, ts) + + idx = [Timestamp('2013-05-31 00:00'), + Timestamp(datetime(2013, 5, 31, 23, 59, 59, 999999))] + ts = Series(lrange(len(idx)), index=idx) + expected = ts['2013'] + assert_series_equal(expected, ts) + + # GH14826, indexing with a seconds resolution string / datetime object + df = DataFrame(randn(5, 5), + columns=['open', 'high', 'low', 'close', 'volume'], + index=date_range('2012-01-02 18:01:00', + periods=5, tz='US/Central', freq='s')) + expected = df.loc[[df.index[2]]] + + # this is a single date, so will raise + self.assertRaises(KeyError, df.__getitem__, '2012-01-02 18:01:02', ) + self.assertRaises(KeyError, df.__getitem__, df.index[2], ) + + +class TestDatetime64(tm.TestCase): + """ + Also test support for datetime64[ns] in Series / DataFrame + """ + + def setUp(self): + dti = DatetimeIndex(start=datetime(2005, 1, 1), + end=datetime(2005, 1, 10), freq='Min') + self.series = Series(rand(len(dti)), dti) + + def test_fancy_getitem(self): + dti = DatetimeIndex(freq='WOM-1FRI', start=datetime(2005, 1, 1), + end=datetime(2010, 1, 1)) + + s = Series(np.arange(len(dti)), index=dti) + + self.assertEqual(s[48], 48) + self.assertEqual(s['1/2/2009'], 48) + self.assertEqual(s['2009-1-2'], 48) + self.assertEqual(s[datetime(2009, 1, 2)], 48) + self.assertEqual(s[lib.Timestamp(datetime(2009, 1, 2))], 48) + self.assertRaises(KeyError, s.__getitem__, '2009-1-3') + + assert_series_equal(s['3/6/2009':'2009-06-05'], + s[datetime(2009, 3, 6):datetime(2009, 6, 5)]) + + def test_fancy_setitem(self): + dti = DatetimeIndex(freq='WOM-1FRI', start=datetime(2005, 1, 1), + end=datetime(2010, 1, 1)) + + s = Series(np.arange(len(dti)), index=dti) + s[48] = -1 + self.assertEqual(s[48], -1) + s['1/2/2009'] = -2 + self.assertEqual(s[48], -2) + s['1/2/2009':'2009-06-05'] = -3 + self.assertTrue((s[48:54] == -3).all()) + + def test_dti_snap(self): + dti = DatetimeIndex(['1/1/2002', '1/2/2002', '1/3/2002', '1/4/2002', + '1/5/2002', '1/6/2002', '1/7/2002'], freq='D') + + res = dti.snap(freq='W-MON') + exp = date_range('12/31/2001', '1/7/2002', freq='w-mon') + exp = exp.repeat([3, 4]) + self.assertTrue((res == exp).all()) + + res = dti.snap(freq='B') + + exp = date_range('1/1/2002', '1/7/2002', freq='b') + exp = exp.repeat([1, 1, 1, 2, 2]) + self.assertTrue((res == exp).all()) + + def test_dti_reset_index_round_trip(self): + dti = DatetimeIndex(start='1/1/2001', end='6/1/2001', freq='D') + d1 = DataFrame({'v': np.random.rand(len(dti))}, index=dti) + d2 = d1.reset_index() + self.assertEqual(d2.dtypes[0], np.dtype('M8[ns]')) + d3 = d2.set_index('index') + assert_frame_equal(d1, d3, check_names=False) + + # #2329 + stamp = datetime(2012, 11, 22) + df = DataFrame([[stamp, 12.1]], columns=['Date', 'Value']) + df = df.set_index('Date') + + self.assertEqual(df.index[0], stamp) + self.assertEqual(df.reset_index()['Date'][0], stamp) + + def test_series_set_value(self): + # #1561 + + dates = [datetime(2001, 1, 1), datetime(2001, 1, 2)] + index = DatetimeIndex(dates) + + s = Series().set_value(dates[0], 1.) + s2 = s.set_value(dates[1], np.nan) + + exp = Series([1., np.nan], index=index) + + assert_series_equal(s2, exp) + + # s = Series(index[:1], index[:1]) + # s2 = s.set_value(dates[1], index[1]) + # self.assertEqual(s2.values.dtype, 'M8[ns]') + + @slow + def test_slice_locs_indexerror(self): + times = [datetime(2000, 1, 1) + timedelta(minutes=i * 10) + for i in range(100000)] + s = Series(lrange(100000), times) + s.loc[datetime(1900, 1, 1):datetime(2100, 1, 1)] + + def test_slicing_datetimes(self): + + # GH 7523 + + # unique + df = DataFrame(np.arange(4., dtype='float64'), + index=[datetime(2001, 1, i, 10, 00) + for i in [1, 2, 3, 4]]) + result = df.loc[datetime(2001, 1, 1, 10):] + assert_frame_equal(result, df) + result = df.loc[:datetime(2001, 1, 4, 10)] + assert_frame_equal(result, df) + result = df.loc[datetime(2001, 1, 1, 10):datetime(2001, 1, 4, 10)] + assert_frame_equal(result, df) + + result = df.loc[datetime(2001, 1, 1, 11):] + expected = df.iloc[1:] + assert_frame_equal(result, expected) + result = df.loc['20010101 11':] + assert_frame_equal(result, expected) + + # duplicates + df = pd.DataFrame(np.arange(5., dtype='float64'), + index=[datetime(2001, 1, i, 10, 00) + for i in [1, 2, 2, 3, 4]]) + + result = df.loc[datetime(2001, 1, 1, 10):] + assert_frame_equal(result, df) + result = df.loc[:datetime(2001, 1, 4, 10)] + assert_frame_equal(result, df) + result = df.loc[datetime(2001, 1, 1, 10):datetime(2001, 1, 4, 10)] + assert_frame_equal(result, df) + + result = df.loc[datetime(2001, 1, 1, 11):] + expected = df.iloc[1:] + assert_frame_equal(result, expected) + result = df.loc['20010101 11':] + assert_frame_equal(result, expected) + + def test_frame_datetime64_duplicated(self): + dates = date_range('2010-07-01', end='2010-08-05') + + tst = DataFrame({'symbol': 'AAA', 'date': dates}) + result = tst.duplicated(['date', 'symbol']) + self.assertTrue((-result).all()) + + tst = DataFrame({'date': dates}) + result = tst.duplicated() + self.assertTrue((-result).all()) + + +class TestSeriesDatetime64(tm.TestCase): + def setUp(self): + self.series = Series(date_range('1/1/2000', periods=10)) + + def test_auto_conversion(self): + series = Series(list(date_range('1/1/2000', periods=10))) + self.assertEqual(series.dtype, 'M8[ns]') + + def test_constructor_cant_cast_datetime64(self): + msg = "Cannot cast datetime64 to " + with tm.assertRaisesRegexp(TypeError, msg): + Series(date_range('1/1/2000', periods=10), dtype=float) + + with tm.assertRaisesRegexp(TypeError, msg): + Series(date_range('1/1/2000', periods=10), dtype=int) + + def test_constructor_cast_object(self): + s = Series(date_range('1/1/2000', periods=10), dtype=object) + exp = Series(date_range('1/1/2000', periods=10)) + tm.assert_series_equal(s, exp) + + def test_series_comparison_scalars(self): + val = datetime(2000, 1, 4) + result = self.series > val + expected = Series([x > val for x in self.series]) + self.assert_series_equal(result, expected) + + val = self.series[5] + result = self.series > val + expected = Series([x > val for x in self.series]) + self.assert_series_equal(result, expected) + + def test_between(self): + left, right = self.series[[2, 7]] + + result = self.series.between(left, right) + expected = (self.series >= left) & (self.series <= right) + assert_series_equal(result, expected) + + # --------------------------------------------------------------------- + # NaT support + + def test_NaT_scalar(self): + series = Series([0, 1000, 2000, iNaT], dtype='M8[ns]') + + val = series[3] + self.assertTrue(com.isnull(val)) + + series[2] = val + self.assertTrue(com.isnull(series[2])) + + def test_NaT_cast(self): + # GH10747 + result = Series([np.nan]).astype('M8[ns]') + expected = Series([NaT]) + assert_series_equal(result, expected) + + def test_set_none_nan(self): + self.series[3] = None + self.assertIs(self.series[3], NaT) + + self.series[3:5] = None + self.assertIs(self.series[4], NaT) + + self.series[5] = np.nan + self.assertIs(self.series[5], NaT) + + self.series[5:7] = np.nan + self.assertIs(self.series[6], NaT) + + def test_intercept_astype_object(self): + + # this test no longer makes sense as series is by default already + # M8[ns] + expected = self.series.astype('object') + + df = DataFrame({'a': self.series, + 'b': np.random.randn(len(self.series))}) + exp_dtypes = pd.Series([np.dtype('datetime64[ns]'), + np.dtype('float64')], index=['a', 'b']) + tm.assert_series_equal(df.dtypes, exp_dtypes) + + result = df.values.squeeze() + self.assertTrue((result[:, 0] == expected.values).all()) + + df = DataFrame({'a': self.series, 'b': ['foo'] * len(self.series)}) + + result = df.values.squeeze() + self.assertTrue((result[:, 0] == expected.values).all()) + + def test_nat_operations(self): + # GH 8617 + s = Series([0, pd.NaT], dtype='m8[ns]') + exp = s[0] + self.assertEqual(s.median(), exp) + self.assertEqual(s.min(), exp) + self.assertEqual(s.max(), exp) + + def test_round_nat(self): + # GH14940 + s = Series([pd.NaT]) + expected = Series(pd.NaT) + for method in ["round", "floor", "ceil"]: + round_method = getattr(s.dt, method) + for freq in ["s", "5s", "min", "5min", "h", "5h"]: + assert_series_equal(round_method(freq), expected) + + +class TestDaysInMonth(tm.TestCase): + # tests for issue #10154 + def test_day_not_in_month_coerce(self): + self.assertTrue(isnull(to_datetime('2015-02-29', errors='coerce'))) + self.assertTrue(isnull(to_datetime('2015-02-29', format="%Y-%m-%d", + errors='coerce'))) + self.assertTrue(isnull(to_datetime('2015-02-32', format="%Y-%m-%d", + errors='coerce'))) + self.assertTrue(isnull(to_datetime('2015-04-31', format="%Y-%m-%d", + errors='coerce'))) + + def test_day_not_in_month_raise(self): + self.assertRaises(ValueError, to_datetime, '2015-02-29', + errors='raise') + self.assertRaises(ValueError, to_datetime, '2015-02-29', + errors='raise', format="%Y-%m-%d") + self.assertRaises(ValueError, to_datetime, '2015-02-32', + errors='raise', format="%Y-%m-%d") + self.assertRaises(ValueError, to_datetime, '2015-04-31', + errors='raise', format="%Y-%m-%d") + + def test_day_not_in_month_ignore(self): + self.assertEqual(to_datetime( + '2015-02-29', errors='ignore'), '2015-02-29') + self.assertEqual(to_datetime( + '2015-02-29', errors='ignore', format="%Y-%m-%d"), '2015-02-29') + self.assertEqual(to_datetime( + '2015-02-32', errors='ignore', format="%Y-%m-%d"), '2015-02-32') + self.assertEqual(to_datetime( + '2015-04-31', errors='ignore', format="%Y-%m-%d"), '2015-04-31') + + +class TestGuessDatetimeFormat(tm.TestCase): + + def test_guess_datetime_format_with_parseable_formats(self): + tm._skip_if_not_us_locale() + dt_string_to_format = (('20111230', '%Y%m%d'), + ('2011-12-30', '%Y-%m-%d'), + ('30-12-2011', '%d-%m-%Y'), + ('2011-12-30 00:00:00', '%Y-%m-%d %H:%M:%S'), + ('2011-12-30T00:00:00', '%Y-%m-%dT%H:%M:%S'), + ('2011-12-30 00:00:00.000000', + '%Y-%m-%d %H:%M:%S.%f'), ) + + for dt_string, dt_format in dt_string_to_format: + self.assertEqual( + tools._guess_datetime_format(dt_string), + dt_format + ) + + def test_guess_datetime_format_with_dayfirst(self): + ambiguous_string = '01/01/2011' + self.assertEqual( + tools._guess_datetime_format(ambiguous_string, dayfirst=True), + '%d/%m/%Y' + ) + self.assertEqual( + tools._guess_datetime_format(ambiguous_string, dayfirst=False), + '%m/%d/%Y' + ) + + def test_guess_datetime_format_with_locale_specific_formats(self): + # The month names will vary depending on the locale, in which + # case these wont be parsed properly (dateutil can't parse them) + _skip_if_has_locale() + + dt_string_to_format = (('30/Dec/2011', '%d/%b/%Y'), + ('30/December/2011', '%d/%B/%Y'), + ('30/Dec/2011 00:00:00', '%d/%b/%Y %H:%M:%S'), ) + + for dt_string, dt_format in dt_string_to_format: + self.assertEqual( + tools._guess_datetime_format(dt_string), + dt_format + ) + + def test_guess_datetime_format_invalid_inputs(self): + # A datetime string must include a year, month and a day for it + # to be guessable, in addition to being a string that looks like + # a datetime + invalid_dts = [ + '2013', + '01/2013', + '12:00:00', + '1/1/1/1', + 'this_is_not_a_datetime', + '51a', + 9, + datetime(2011, 1, 1), + ] + + for invalid_dt in invalid_dts: + self.assertTrue(tools._guess_datetime_format(invalid_dt) is None) + + def test_guess_datetime_format_nopadding(self): + # GH 11142 + dt_string_to_format = (('2011-1-1', '%Y-%m-%d'), + ('30-1-2011', '%d-%m-%Y'), + ('1/1/2011', '%m/%d/%Y'), + ('2011-1-1 00:00:00', '%Y-%m-%d %H:%M:%S'), + ('2011-1-1 0:0:0', '%Y-%m-%d %H:%M:%S'), + ('2011-1-3T00:00:0', '%Y-%m-%dT%H:%M:%S')) + + for dt_string, dt_format in dt_string_to_format: + self.assertEqual( + tools._guess_datetime_format(dt_string), + dt_format + ) + + def test_guess_datetime_format_for_array(self): + tm._skip_if_not_us_locale() + expected_format = '%Y-%m-%d %H:%M:%S.%f' + dt_string = datetime(2011, 12, 30, 0, 0, 0).strftime(expected_format) + + test_arrays = [ + np.array([dt_string, dt_string, dt_string], dtype='O'), + np.array([np.nan, np.nan, dt_string], dtype='O'), + np.array([dt_string, 'random_string'], dtype='O'), + ] + + for test_array in test_arrays: + self.assertEqual( + tools._guess_datetime_format_for_array(test_array), + expected_format + ) + + format_for_string_of_nans = tools._guess_datetime_format_for_array( + np.array( + [np.nan, np.nan, np.nan], dtype='O')) + self.assertTrue(format_for_string_of_nans is None) + + +class TestToDatetimeInferFormat(tm.TestCase): + + def test_to_datetime_infer_datetime_format_consistent_format(self): + s = pd.Series(pd.date_range('20000101', periods=50, freq='H')) + + test_formats = ['%m-%d-%Y', '%m/%d/%Y %H:%M:%S.%f', + '%Y-%m-%dT%H:%M:%S.%f'] + + for test_format in test_formats: + s_as_dt_strings = s.apply(lambda x: x.strftime(test_format)) + + with_format = pd.to_datetime(s_as_dt_strings, format=test_format) + no_infer = pd.to_datetime(s_as_dt_strings, + infer_datetime_format=False) + yes_infer = pd.to_datetime(s_as_dt_strings, + infer_datetime_format=True) + + # Whether the format is explicitly passed, it is inferred, or + # it is not inferred, the results should all be the same + self.assert_series_equal(with_format, no_infer) + self.assert_series_equal(no_infer, yes_infer) + + def test_to_datetime_infer_datetime_format_inconsistent_format(self): + s = pd.Series(np.array(['01/01/2011 00:00:00', + '01-02-2011 00:00:00', + '2011-01-03T00:00:00'])) + + # When the format is inconsistent, infer_datetime_format should just + # fallback to the default parsing + tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False), + pd.to_datetime(s, infer_datetime_format=True)) + + s = pd.Series(np.array(['Jan/01/2011', 'Feb/01/2011', 'Mar/01/2011'])) + + tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False), + pd.to_datetime(s, infer_datetime_format=True)) + + def test_to_datetime_infer_datetime_format_series_with_nans(self): + s = pd.Series(np.array(['01/01/2011 00:00:00', np.nan, + '01/03/2011 00:00:00', np.nan])) + tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False), + pd.to_datetime(s, infer_datetime_format=True)) + + def test_to_datetime_infer_datetime_format_series_starting_with_nans(self): + s = pd.Series(np.array([np.nan, np.nan, '01/01/2011 00:00:00', + '01/02/2011 00:00:00', '01/03/2011 00:00:00'])) + + tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False), + pd.to_datetime(s, infer_datetime_format=True)) + + def test_to_datetime_iso8601_noleading_0s(self): + # GH 11871 + s = pd.Series(['2014-1-1', '2014-2-2', '2015-3-3']) + expected = pd.Series([pd.Timestamp('2014-01-01'), + pd.Timestamp('2014-02-02'), + pd.Timestamp('2015-03-03')]) + tm.assert_series_equal(pd.to_datetime(s), expected) + tm.assert_series_equal(pd.to_datetime(s, format='%Y-%m-%d'), expected) + + +class TimeConversionFormats(tm.TestCase): + def test_to_datetime_format(self): + values = ['1/1/2000', '1/2/2000', '1/3/2000'] + + results1 = [Timestamp('20000101'), Timestamp('20000201'), + Timestamp('20000301')] + results2 = [Timestamp('20000101'), Timestamp('20000102'), + Timestamp('20000103')] + for vals, expecteds in [(values, (Index(results1), Index(results2))), + (Series(values), + (Series(results1), Series(results2))), + (values[0], (results1[0], results2[0])), + (values[1], (results1[1], results2[1])), + (values[2], (results1[2], results2[2]))]: + + for i, fmt in enumerate(['%d/%m/%Y', '%m/%d/%Y']): + result = to_datetime(vals, format=fmt) + expected = expecteds[i] + + if isinstance(expected, Series): + assert_series_equal(result, Series(expected)) + elif isinstance(expected, Timestamp): + self.assertEqual(result, expected) + else: + tm.assert_index_equal(result, expected) + + def test_to_datetime_format_YYYYMMDD(self): + s = Series([19801222, 19801222] + [19810105] * 5) + expected = Series([Timestamp(x) for x in s.apply(str)]) + + result = to_datetime(s, format='%Y%m%d') + assert_series_equal(result, expected) + + result = to_datetime(s.apply(str), format='%Y%m%d') + assert_series_equal(result, expected) + + # with NaT + expected = Series([Timestamp("19801222"), Timestamp("19801222")] + + [Timestamp("19810105")] * 5) + expected[2] = np.nan + s[2] = np.nan + + result = to_datetime(s, format='%Y%m%d') + assert_series_equal(result, expected) + + # string with NaT + s = s.apply(str) + s[2] = 'nat' + result = to_datetime(s, format='%Y%m%d') + assert_series_equal(result, expected) + + # coercion + # GH 7930 + s = Series([20121231, 20141231, 99991231]) + result = pd.to_datetime(s, format='%Y%m%d', errors='ignore') + expected = Series([datetime(2012, 12, 31), + datetime(2014, 12, 31), datetime(9999, 12, 31)], + dtype=object) + self.assert_series_equal(result, expected) + + result = pd.to_datetime(s, format='%Y%m%d', errors='coerce') + expected = Series(['20121231', '20141231', 'NaT'], dtype='M8[ns]') + assert_series_equal(result, expected) + + # GH 10178 + def test_to_datetime_format_integer(self): + s = Series([2000, 2001, 2002]) + expected = Series([Timestamp(x) for x in s.apply(str)]) + + result = to_datetime(s, format='%Y') + assert_series_equal(result, expected) + + s = Series([200001, 200105, 200206]) + expected = Series([Timestamp(x[:4] + '-' + x[4:]) for x in s.apply(str) + ]) + + result = to_datetime(s, format='%Y%m') + assert_series_equal(result, expected) + + def test_to_datetime_format_microsecond(self): + + # these are locale dependent + lang, _ = locale.getlocale() + month_abbr = calendar.month_abbr[4] + val = '01-{}-2011 00:00:01.978'.format(month_abbr) + + format = '%d-%b-%Y %H:%M:%S.%f' + result = to_datetime(val, format=format) + exp = datetime.strptime(val, format) + self.assertEqual(result, exp) + + def test_to_datetime_format_time(self): + data = [ + ['01/10/2010 15:20', '%m/%d/%Y %H:%M', + Timestamp('2010-01-10 15:20')], + ['01/10/2010 05:43', '%m/%d/%Y %I:%M', + Timestamp('2010-01-10 05:43')], + ['01/10/2010 13:56:01', '%m/%d/%Y %H:%M:%S', + Timestamp('2010-01-10 13:56:01')] # , + # ['01/10/2010 08:14 PM', '%m/%d/%Y %I:%M %p', + # Timestamp('2010-01-10 20:14')], + # ['01/10/2010 07:40 AM', '%m/%d/%Y %I:%M %p', + # Timestamp('2010-01-10 07:40')], + # ['01/10/2010 09:12:56 AM', '%m/%d/%Y %I:%M:%S %p', + # Timestamp('2010-01-10 09:12:56')] + ] + for s, format, dt in data: + self.assertEqual(to_datetime(s, format=format), dt) + + def test_to_datetime_with_non_exact(self): + # GH 10834 + _skip_if_has_locale() + + # 8904 + # exact kw + if sys.version_info < (2, 7): + raise nose.SkipTest('on python version < 2.7') + + s = Series(['19MAY11', 'foobar19MAY11', '19MAY11:00:00:00', + '19MAY11 00:00:00Z']) + result = to_datetime(s, format='%d%b%y', exact=False) + expected = to_datetime(s.str.extract(r'(\d+\w+\d+)', expand=False), + format='%d%b%y') + assert_series_equal(result, expected) + + def test_parse_nanoseconds_with_formula(self): + + # GH8989 + # trunctaing the nanoseconds when a format was provided + for v in ["2012-01-01 09:00:00.000000001", + "2012-01-01 09:00:00.000001", + "2012-01-01 09:00:00.001", + "2012-01-01 09:00:00.001000", + "2012-01-01 09:00:00.001000000", ]: + expected = pd.to_datetime(v) + result = pd.to_datetime(v, format="%Y-%m-%d %H:%M:%S.%f") + self.assertEqual(result, expected) + + def test_to_datetime_format_weeks(self): + data = [ + ['2009324', '%Y%W%w', Timestamp('2009-08-13')], + ['2013020', '%Y%U%w', Timestamp('2013-01-13')] + ] + for s, format, dt in data: + self.assertEqual(to_datetime(s, format=format), dt) + + +class TestSlicing(tm.TestCase): + def test_slice_year(self): + dti = DatetimeIndex(freq='B', start=datetime(2005, 1, 1), periods=500) + + s = Series(np.arange(len(dti)), index=dti) + result = s['2005'] + expected = s[s.index.year == 2005] + assert_series_equal(result, expected) + + df = DataFrame(np.random.rand(len(dti), 5), index=dti) + result = df.loc['2005'] + expected = df[df.index.year == 2005] + assert_frame_equal(result, expected) + + rng = date_range('1/1/2000', '1/1/2010') + + result = rng.get_loc('2009') + expected = slice(3288, 3653) + self.assertEqual(result, expected) + + def test_slice_quarter(self): + dti = DatetimeIndex(freq='D', start=datetime(2000, 6, 1), periods=500) + + s = Series(np.arange(len(dti)), index=dti) + self.assertEqual(len(s['2001Q1']), 90) + + df = DataFrame(np.random.rand(len(dti), 5), index=dti) + self.assertEqual(len(df.loc['1Q01']), 90) + + def test_slice_month(self): + dti = DatetimeIndex(freq='D', start=datetime(2005, 1, 1), periods=500) + s = Series(np.arange(len(dti)), index=dti) + self.assertEqual(len(s['2005-11']), 30) + + df = DataFrame(np.random.rand(len(dti), 5), index=dti) + self.assertEqual(len(df.loc['2005-11']), 30) + + assert_series_equal(s['2005-11'], s['11-2005']) + + def test_partial_slice(self): + rng = DatetimeIndex(freq='D', start=datetime(2005, 1, 1), periods=500) + s = Series(np.arange(len(rng)), index=rng) + + result = s['2005-05':'2006-02'] + expected = s['20050501':'20060228'] + assert_series_equal(result, expected) + + result = s['2005-05':] + expected = s['20050501':] + assert_series_equal(result, expected) + + result = s[:'2006-02'] + expected = s[:'20060228'] + assert_series_equal(result, expected) + + result = s['2005-1-1'] + self.assertEqual(result, s.iloc[0]) + + self.assertRaises(Exception, s.__getitem__, '2004-12-31') + + def test_partial_slice_daily(self): + rng = DatetimeIndex(freq='H', start=datetime(2005, 1, 31), periods=500) + s = Series(np.arange(len(rng)), index=rng) + + result = s['2005-1-31'] + assert_series_equal(result, s.iloc[:24]) + + self.assertRaises(Exception, s.__getitem__, '2004-12-31 00') + + def test_partial_slice_hourly(self): + rng = DatetimeIndex(freq='T', start=datetime(2005, 1, 1, 20, 0, 0), + periods=500) + s = Series(np.arange(len(rng)), index=rng) + + result = s['2005-1-1'] + assert_series_equal(result, s.iloc[:60 * 4]) + + result = s['2005-1-1 20'] + assert_series_equal(result, s.iloc[:60]) + + self.assertEqual(s['2005-1-1 20:00'], s.iloc[0]) + self.assertRaises(Exception, s.__getitem__, '2004-12-31 00:15') + + def test_partial_slice_minutely(self): + rng = DatetimeIndex(freq='S', start=datetime(2005, 1, 1, 23, 59, 0), + periods=500) + s = Series(np.arange(len(rng)), index=rng) + + result = s['2005-1-1 23:59'] + assert_series_equal(result, s.iloc[:60]) + + result = s['2005-1-1'] + assert_series_equal(result, s.iloc[:60]) + + self.assertEqual(s[Timestamp('2005-1-1 23:59:00')], s.iloc[0]) + self.assertRaises(Exception, s.__getitem__, '2004-12-31 00:00:00') + + def test_partial_slice_second_precision(self): + rng = DatetimeIndex(start=datetime(2005, 1, 1, 0, 0, 59, + microsecond=999990), + periods=20, freq='US') + s = Series(np.arange(20), rng) + + assert_series_equal(s['2005-1-1 00:00'], s.iloc[:10]) + assert_series_equal(s['2005-1-1 00:00:59'], s.iloc[:10]) + + assert_series_equal(s['2005-1-1 00:01'], s.iloc[10:]) + assert_series_equal(s['2005-1-1 00:01:00'], s.iloc[10:]) + + self.assertEqual(s[Timestamp('2005-1-1 00:00:59.999990')], s.iloc[0]) + self.assertRaisesRegexp(KeyError, '2005-1-1 00:00:00', + lambda: s['2005-1-1 00:00:00']) + + def test_partial_slicing_dataframe(self): + # GH14856 + # Test various combinations of string slicing resolution vs. + # index resolution + # - If string resolution is less precise than index resolution, + # string is considered a slice + # - If string resolution is equal to or more precise than index + # resolution, string is considered an exact match + formats = ['%Y', '%Y-%m', '%Y-%m-%d', '%Y-%m-%d %H', + '%Y-%m-%d %H:%M', '%Y-%m-%d %H:%M:%S'] + resolutions = ['year', 'month', 'day', 'hour', 'minute', 'second'] + for rnum, resolution in enumerate(resolutions[2:], 2): + # we check only 'day', 'hour', 'minute' and 'second' + unit = Timedelta("1 " + resolution) + middate = datetime(2012, 1, 1, 0, 0, 0) + index = DatetimeIndex([middate - unit, + middate, middate + unit]) + values = [1, 2, 3] + df = DataFrame({'a': values}, index, dtype=np.int64) + self.assertEqual(df.index.resolution, resolution) + + # Timestamp with the same resolution as index + # Should be exact match for Series (return scalar) + # and raise KeyError for Frame + for timestamp, expected in zip(index, values): + ts_string = timestamp.strftime(formats[rnum]) + # make ts_string as precise as index + result = df['a'][ts_string] + self.assertIsInstance(result, np.int64) + self.assertEqual(result, expected) + self.assertRaises(KeyError, df.__getitem__, ts_string) + + # Timestamp with resolution less precise than index + for fmt in formats[:rnum]: + for element, theslice in [[0, slice(None, 1)], + [1, slice(1, None)]]: + ts_string = index[element].strftime(fmt) + + # Series should return slice + result = df['a'][ts_string] + expected = df['a'][theslice] + assert_series_equal(result, expected) + + # Frame should return slice as well + result = df[ts_string] + expected = df[theslice] + assert_frame_equal(result, expected) + + # Timestamp with resolution more precise than index + # Compatible with existing key + # Should return scalar for Series + # and raise KeyError for Frame + for fmt in formats[rnum + 1:]: + ts_string = index[1].strftime(fmt) + result = df['a'][ts_string] + self.assertIsInstance(result, np.int64) + self.assertEqual(result, 2) + self.assertRaises(KeyError, df.__getitem__, ts_string) + + # Not compatible with existing key + # Should raise KeyError + for fmt, res in list(zip(formats, resolutions))[rnum + 1:]: + ts = index[1] + Timedelta("1 " + res) + ts_string = ts.strftime(fmt) + self.assertRaises(KeyError, df['a'].__getitem__, ts_string) + self.assertRaises(KeyError, df.__getitem__, ts_string) + + def test_partial_slicing_with_multiindex(self): + + # GH 4758 + # partial string indexing with a multi-index buggy + df = DataFrame({'ACCOUNT': ["ACCT1", "ACCT1", "ACCT1", "ACCT2"], + 'TICKER': ["ABC", "MNP", "XYZ", "XYZ"], + 'val': [1, 2, 3, 4]}, + index=date_range("2013-06-19 09:30:00", + periods=4, freq='5T')) + df_multi = df.set_index(['ACCOUNT', 'TICKER'], append=True) + + expected = DataFrame([ + [1] + ], index=Index(['ABC'], name='TICKER'), columns=['val']) + result = df_multi.loc[('2013-06-19 09:30:00', 'ACCT1')] + assert_frame_equal(result, expected) + + expected = df_multi.loc[ + (pd.Timestamp('2013-06-19 09:30:00', tz=None), 'ACCT1', 'ABC')] + result = df_multi.loc[('2013-06-19 09:30:00', 'ACCT1', 'ABC')] + assert_series_equal(result, expected) + + # this is a KeyError as we don't do partial string selection on + # multi-levels + def f(): + df_multi.loc[('2013-06-19', 'ACCT1', 'ABC')] + + self.assertRaises(KeyError, f) + + # GH 4294 + # partial slice on a series mi + s = pd.DataFrame(randn(1000, 1000), index=pd.date_range( + '2000-1-1', periods=1000)).stack() + + s2 = s[:-1].copy() + expected = s2['2000-1-4'] + result = s2[pd.Timestamp('2000-1-4')] + assert_series_equal(result, expected) + + result = s[pd.Timestamp('2000-1-4')] + expected = s['2000-1-4'] + assert_series_equal(result, expected) + + df2 = pd.DataFrame(s) + expected = df2.xs('2000-1-4') + result = df2.loc[pd.Timestamp('2000-1-4')] + assert_frame_equal(result, expected) + + def test_shift(self): + ts = Series(np.random.randn(5), + index=date_range('1/1/2000', periods=5, freq='H')) + + result = ts.shift(1, freq='5T') + exp_index = ts.index.shift(1, freq='5T') + tm.assert_index_equal(result.index, exp_index) + + # GH #1063, multiple of same base + result = ts.shift(1, freq='4H') + exp_index = ts.index + offsets.Hour(4) + tm.assert_index_equal(result.index, exp_index) + + idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-04']) + self.assertRaises(ValueError, idx.shift, 1) + + def test_setops_preserve_freq(self): + for tz in [None, 'Asia/Tokyo', 'US/Eastern']: + rng = date_range('1/1/2000', '1/1/2002', name='idx', tz=tz) + + result = rng[:50].union(rng[50:100]) + self.assertEqual(result.name, rng.name) + self.assertEqual(result.freq, rng.freq) + self.assertEqual(result.tz, rng.tz) + + result = rng[:50].union(rng[30:100]) + self.assertEqual(result.name, rng.name) + self.assertEqual(result.freq, rng.freq) + self.assertEqual(result.tz, rng.tz) + + result = rng[:50].union(rng[60:100]) + self.assertEqual(result.name, rng.name) + self.assertIsNone(result.freq) + self.assertEqual(result.tz, rng.tz) + + result = rng[:50].intersection(rng[25:75]) + self.assertEqual(result.name, rng.name) + self.assertEqual(result.freqstr, 'D') + self.assertEqual(result.tz, rng.tz) + + nofreq = DatetimeIndex(list(rng[25:75]), name='other') + result = rng[:50].union(nofreq) + self.assertIsNone(result.name) + self.assertEqual(result.freq, rng.freq) + self.assertEqual(result.tz, rng.tz) + + result = rng[:50].intersection(nofreq) + self.assertIsNone(result.name) + self.assertEqual(result.freq, rng.freq) + self.assertEqual(result.tz, rng.tz) + + def test_min_max(self): + rng = date_range('1/1/2000', '12/31/2000') + rng2 = rng.take(np.random.permutation(len(rng))) + + the_min = rng2.min() + the_max = rng2.max() + tm.assertIsInstance(the_min, Timestamp) + tm.assertIsInstance(the_max, Timestamp) + self.assertEqual(the_min, rng[0]) + self.assertEqual(the_max, rng[-1]) + + self.assertEqual(rng.min(), rng[0]) + self.assertEqual(rng.max(), rng[-1]) + + def test_min_max_series(self): + rng = date_range('1/1/2000', periods=10, freq='4h') + lvls = ['A', 'A', 'A', 'B', 'B', 'B', 'C', 'C', 'C', 'C'] + df = DataFrame({'TS': rng, 'V': np.random.randn(len(rng)), 'L': lvls}) + + result = df.TS.max() + exp = Timestamp(df.TS.iat[-1]) + self.assertTrue(isinstance(result, Timestamp)) + self.assertEqual(result, exp) + + result = df.TS.min() + exp = Timestamp(df.TS.iat[0]) + self.assertTrue(isinstance(result, Timestamp)) + self.assertEqual(result, exp) + + def test_from_M8_structured(self): + dates = [(datetime(2012, 9, 9, 0, 0), datetime(2012, 9, 8, 15, 10))] + arr = np.array(dates, + dtype=[('Date', 'M8[us]'), ('Forecasting', 'M8[us]')]) + df = DataFrame(arr) + + self.assertEqual(df['Date'][0], dates[0][0]) + self.assertEqual(df['Forecasting'][0], dates[0][1]) + + s = Series(arr['Date']) + self.assertTrue(s[0], Timestamp) + self.assertEqual(s[0], dates[0][0]) + + s = Series.from_array(arr['Date'], Index([0])) + self.assertEqual(s[0], dates[0][0]) + + def test_get_level_values_box(self): + from pandas import MultiIndex + + dates = date_range('1/1/2000', periods=4) + levels = [dates, [0, 1]] + labels = [[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]] + + index = MultiIndex(levels=levels, labels=labels) + + self.assertTrue(isinstance(index.get_level_values(0)[0], Timestamp)) + + def test_frame_apply_dont_convert_datetime64(self): + from pandas.tseries.offsets import BDay + df = DataFrame({'x1': [datetime(1996, 1, 1)]}) + + df = df.applymap(lambda x: x + BDay()) + df = df.applymap(lambda x: x + BDay()) + + self.assertTrue(df.x1.dtype == 'M8[ns]') + + def test_partial_slice_doesnt_require_monotonicity(self): + # For historical reasons. + s = pd.Series(np.arange(10), pd.date_range('2014-01-01', periods=10)) + + nonmonotonic = s[[3, 5, 4]] + expected = nonmonotonic.iloc[:0] + timestamp = pd.Timestamp('2014-01-10') + + assert_series_equal(nonmonotonic['2014-01-10':], expected) + self.assertRaisesRegexp(KeyError, + r"Timestamp\('2014-01-10 00:00:00'\)", + lambda: nonmonotonic[timestamp:]) + + assert_series_equal(nonmonotonic.loc['2014-01-10':], expected) + self.assertRaisesRegexp(KeyError, + r"Timestamp\('2014-01-10 00:00:00'\)", + lambda: nonmonotonic.loc[timestamp:]) + + +class TestToDatetime(tm.TestCase): + _multiprocess_can_split_ = True + + def test_to_datetime_dt64s(self): + in_bound_dts = [ + np.datetime64('2000-01-01'), + np.datetime64('2000-01-02'), + ] + + for dt in in_bound_dts: + self.assertEqual(pd.to_datetime(dt), Timestamp(dt)) + + oob_dts = [np.datetime64('1000-01-01'), np.datetime64('5000-01-02'), ] + + for dt in oob_dts: + self.assertRaises(ValueError, pd.to_datetime, dt, errors='raise') + self.assertRaises(ValueError, tslib.Timestamp, dt) + self.assertIs(pd.to_datetime(dt, errors='coerce'), NaT) + + def test_to_datetime_array_of_dt64s(self): + dts = [np.datetime64('2000-01-01'), np.datetime64('2000-01-02'), ] + + # Assuming all datetimes are in bounds, to_datetime() returns + # an array that is equal to Timestamp() parsing + self.assert_numpy_array_equal( + pd.to_datetime(dts, box=False), + np.array([Timestamp(x).asm8 for x in dts]) + ) + + # A list of datetimes where the last one is out of bounds + dts_with_oob = dts + [np.datetime64('9999-01-01')] + + self.assertRaises(ValueError, pd.to_datetime, dts_with_oob, + errors='raise') + + self.assert_numpy_array_equal( + pd.to_datetime(dts_with_oob, box=False, errors='coerce'), + np.array( + [ + Timestamp(dts_with_oob[0]).asm8, + Timestamp(dts_with_oob[1]).asm8, + iNaT, + ], + dtype='M8' + ) + ) + + # With errors='ignore', out of bounds datetime64s + # are converted to their .item(), which depending on the version of + # numpy is either a python datetime.datetime or datetime.date + self.assert_numpy_array_equal( + pd.to_datetime(dts_with_oob, box=False, errors='ignore'), + np.array( + [dt.item() for dt in dts_with_oob], + dtype='O' + ) + ) + + def test_to_datetime_tz(self): + + # xref 8260 + # uniform returns a DatetimeIndex + arr = [pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'), + pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Pacific')] + result = pd.to_datetime(arr) + expected = DatetimeIndex( + ['2013-01-01 13:00:00', '2013-01-02 14:00:00'], tz='US/Pacific') + tm.assert_index_equal(result, expected) + + # mixed tzs will raise + arr = [pd.Timestamp('2013-01-01 13:00:00', tz='US/Pacific'), + pd.Timestamp('2013-01-02 14:00:00', tz='US/Eastern')] + self.assertRaises(ValueError, lambda: pd.to_datetime(arr)) + + def test_to_datetime_tz_pytz(self): + + # xref 8260 + tm._skip_if_no_pytz() + import pytz + + us_eastern = pytz.timezone('US/Eastern') + arr = np.array([us_eastern.localize(datetime(year=2000, month=1, day=1, + hour=3, minute=0)), + us_eastern.localize(datetime(year=2000, month=6, day=1, + hour=3, minute=0))], + dtype=object) + result = pd.to_datetime(arr, utc=True) + expected = DatetimeIndex(['2000-01-01 08:00:00+00:00', + '2000-06-01 07:00:00+00:00'], + dtype='datetime64[ns, UTC]', freq=None) + tm.assert_index_equal(result, expected) + + def test_to_datetime_utc_is_true(self): + # See gh-11934 + start = pd.Timestamp('2014-01-01', tz='utc') + end = pd.Timestamp('2014-01-03', tz='utc') + date_range = pd.bdate_range(start, end) + + result = pd.to_datetime(date_range, utc=True) + expected = pd.DatetimeIndex(data=date_range) + tm.assert_index_equal(result, expected) + + def test_to_datetime_tz_psycopg2(self): + + # xref 8260 + try: + import psycopg2 + except ImportError: + raise nose.SkipTest("no psycopg2 installed") + + # misc cases + tz1 = psycopg2.tz.FixedOffsetTimezone(offset=-300, name=None) + tz2 = psycopg2.tz.FixedOffsetTimezone(offset=-240, name=None) + arr = np.array([datetime(2000, 1, 1, 3, 0, tzinfo=tz1), + datetime(2000, 6, 1, 3, 0, tzinfo=tz2)], + dtype=object) + + result = pd.to_datetime(arr, errors='coerce', utc=True) + expected = DatetimeIndex(['2000-01-01 08:00:00+00:00', + '2000-06-01 07:00:00+00:00'], + dtype='datetime64[ns, UTC]', freq=None) + tm.assert_index_equal(result, expected) + + # dtype coercion + i = pd.DatetimeIndex([ + '2000-01-01 08:00:00+00:00' + ], tz=psycopg2.tz.FixedOffsetTimezone(offset=-300, name=None)) + self.assertTrue(is_datetime64_ns_dtype(i)) + + # tz coerceion + result = pd.to_datetime(i, errors='coerce') + tm.assert_index_equal(result, i) + + result = pd.to_datetime(i, errors='coerce', utc=True) + expected = pd.DatetimeIndex(['2000-01-01 13:00:00'], + dtype='datetime64[ns, UTC]') + tm.assert_index_equal(result, expected) + + def test_datetime_bool(self): + # GH13176 + with self.assertRaises(TypeError): + to_datetime(False) + self.assertTrue(to_datetime(False, errors="coerce") is tslib.NaT) + self.assertEqual(to_datetime(False, errors="ignore"), False) + with self.assertRaises(TypeError): + to_datetime(True) + self.assertTrue(to_datetime(True, errors="coerce") is tslib.NaT) + self.assertEqual(to_datetime(True, errors="ignore"), True) + with self.assertRaises(TypeError): + to_datetime([False, datetime.today()]) + with self.assertRaises(TypeError): + to_datetime(['20130101', True]) + tm.assert_index_equal(to_datetime([0, False, tslib.NaT, 0.0], + errors="coerce"), + DatetimeIndex([to_datetime(0), tslib.NaT, + tslib.NaT, to_datetime(0)])) + + def test_datetime_invalid_datatype(self): + # GH13176 + + with self.assertRaises(TypeError): + pd.to_datetime(bool) + with self.assertRaises(TypeError): + pd.to_datetime(pd.to_datetime) + + def test_unit(self): + # GH 11758 + # test proper behavior with erros + + with self.assertRaises(ValueError): + to_datetime([1], unit='D', format='%Y%m%d') + + values = [11111111, 1, 1.0, tslib.iNaT, pd.NaT, np.nan, + 'NaT', ''] + result = to_datetime(values, unit='D', errors='ignore') + expected = Index([11111111, Timestamp('1970-01-02'), + Timestamp('1970-01-02'), pd.NaT, + pd.NaT, pd.NaT, pd.NaT, pd.NaT], + dtype=object) + tm.assert_index_equal(result, expected) + + result = to_datetime(values, unit='D', errors='coerce') + expected = DatetimeIndex(['NaT', '1970-01-02', '1970-01-02', + 'NaT', 'NaT', 'NaT', 'NaT', 'NaT']) + tm.assert_index_equal(result, expected) + + with self.assertRaises(tslib.OutOfBoundsDatetime): + to_datetime(values, unit='D', errors='raise') + + values = [1420043460000, tslib.iNaT, pd.NaT, np.nan, 'NaT'] + + result = to_datetime(values, errors='ignore', unit='s') + expected = Index([1420043460000, pd.NaT, pd.NaT, + pd.NaT, pd.NaT], dtype=object) + tm.assert_index_equal(result, expected) + + result = to_datetime(values, errors='coerce', unit='s') + expected = DatetimeIndex(['NaT', 'NaT', 'NaT', 'NaT', 'NaT']) + tm.assert_index_equal(result, expected) + + with self.assertRaises(tslib.OutOfBoundsDatetime): + to_datetime(values, errors='raise', unit='s') + + # if we have a string, then we raise a ValueError + # and NOT an OutOfBoundsDatetime + for val in ['foo', Timestamp('20130101')]: + try: + to_datetime(val, errors='raise', unit='s') + except tslib.OutOfBoundsDatetime: + raise AssertionError("incorrect exception raised") + except ValueError: + pass + + def test_unit_consistency(self): + + # consistency of conversions + expected = Timestamp('1970-05-09 14:25:11') + result = pd.to_datetime(11111111, unit='s', errors='raise') + self.assertEqual(result, expected) + self.assertIsInstance(result, Timestamp) + + result = pd.to_datetime(11111111, unit='s', errors='coerce') + self.assertEqual(result, expected) + self.assertIsInstance(result, Timestamp) + + result = pd.to_datetime(11111111, unit='s', errors='ignore') + self.assertEqual(result, expected) + self.assertIsInstance(result, Timestamp) + + def test_unit_with_numeric(self): + + # GH 13180 + # coercions from floats/ints are ok + expected = DatetimeIndex(['2015-06-19 05:33:20', + '2015-05-27 22:33:20']) + arr1 = [1.434692e+18, 1.432766e+18] + arr2 = np.array(arr1).astype('int64') + for errors in ['ignore', 'raise', 'coerce']: + result = pd.to_datetime(arr1, errors=errors) + tm.assert_index_equal(result, expected) + + result = pd.to_datetime(arr2, errors=errors) + tm.assert_index_equal(result, expected) + + # but we want to make sure that we are coercing + # if we have ints/strings + expected = DatetimeIndex(['NaT', + '2015-06-19 05:33:20', + '2015-05-27 22:33:20']) + arr = ['foo', 1.434692e+18, 1.432766e+18] + result = pd.to_datetime(arr, errors='coerce') + tm.assert_index_equal(result, expected) + + expected = DatetimeIndex(['2015-06-19 05:33:20', + '2015-05-27 22:33:20', + 'NaT', + 'NaT']) + arr = [1.434692e+18, 1.432766e+18, 'foo', 'NaT'] + result = pd.to_datetime(arr, errors='coerce') + tm.assert_index_equal(result, expected) + + def test_unit_mixed(self): + + # mixed integers/datetimes + expected = DatetimeIndex(['2013-01-01', 'NaT', 'NaT']) + arr = [pd.Timestamp('20130101'), 1.434692e+18, 1.432766e+18] + result = pd.to_datetime(arr, errors='coerce') + tm.assert_index_equal(result, expected) + + with self.assertRaises(ValueError): + pd.to_datetime(arr, errors='raise') + + expected = DatetimeIndex(['NaT', + 'NaT', + '2013-01-01']) + arr = [1.434692e+18, 1.432766e+18, pd.Timestamp('20130101')] + result = pd.to_datetime(arr, errors='coerce') + tm.assert_index_equal(result, expected) + + with self.assertRaises(ValueError): + pd.to_datetime(arr, errors='raise') + + def test_dataframe(self): + + df = DataFrame({'year': [2015, 2016], + 'month': [2, 3], + 'day': [4, 5], + 'hour': [6, 7], + 'minute': [58, 59], + 'second': [10, 11], + 'ms': [1, 1], + 'us': [2, 2], + 'ns': [3, 3]}) + + result = to_datetime({'year': df['year'], + 'month': df['month'], + 'day': df['day']}) + expected = Series([Timestamp('20150204 00:00:00'), + Timestamp('20160305 00:0:00')]) + assert_series_equal(result, expected) + + # dict-like + result = to_datetime(df[['year', 'month', 'day']].to_dict()) + assert_series_equal(result, expected) + + # dict but with constructable + df2 = df[['year', 'month', 'day']].to_dict() + df2['month'] = 2 + result = to_datetime(df2) + expected2 = Series([Timestamp('20150204 00:00:00'), + Timestamp('20160205 00:0:00')]) + assert_series_equal(result, expected2) + + # unit mappings + units = [{'year': 'years', + 'month': 'months', + 'day': 'days', + 'hour': 'hours', + 'minute': 'minutes', + 'second': 'seconds'}, + {'year': 'year', + 'month': 'month', + 'day': 'day', + 'hour': 'hour', + 'minute': 'minute', + 'second': 'second'}, + ] + + for d in units: + result = to_datetime(df[list(d.keys())].rename(columns=d)) + expected = Series([Timestamp('20150204 06:58:10'), + Timestamp('20160305 07:59:11')]) + assert_series_equal(result, expected) + + d = {'year': 'year', + 'month': 'month', + 'day': 'day', + 'hour': 'hour', + 'minute': 'minute', + 'second': 'second', + 'ms': 'ms', + 'us': 'us', + 'ns': 'ns'} + + result = to_datetime(df.rename(columns=d)) + expected = Series([Timestamp('20150204 06:58:10.001002003'), + Timestamp('20160305 07:59:11.001002003')]) + assert_series_equal(result, expected) + + # coerce back to int + result = to_datetime(df.astype(str)) + assert_series_equal(result, expected) + + # passing coerce + df2 = DataFrame({'year': [2015, 2016], + 'month': [2, 20], + 'day': [4, 5]}) + with self.assertRaises(ValueError): + to_datetime(df2) + result = to_datetime(df2, errors='coerce') + expected = Series([Timestamp('20150204 00:00:00'), + pd.NaT]) + assert_series_equal(result, expected) + + # extra columns + with self.assertRaises(ValueError): + df2 = df.copy() + df2['foo'] = 1 + to_datetime(df2) + + # not enough + for c in [['year'], + ['year', 'month'], + ['year', 'month', 'second'], + ['month', 'day'], + ['year', 'day', 'second']]: + with self.assertRaises(ValueError): + to_datetime(df[c]) + + # duplicates + df2 = DataFrame({'year': [2015, 2016], + 'month': [2, 20], + 'day': [4, 5]}) + df2.columns = ['year', 'year', 'day'] + with self.assertRaises(ValueError): + to_datetime(df2) + + df2 = DataFrame({'year': [2015, 2016], + 'month': [2, 20], + 'day': [4, 5], + 'hour': [4, 5]}) + df2.columns = ['year', 'month', 'day', 'day'] + with self.assertRaises(ValueError): + to_datetime(df2) + + def test_dataframe_dtypes(self): + # #13451 + df = DataFrame({'year': [2015, 2016], + 'month': [2, 3], + 'day': [4, 5]}) + + # int16 + result = to_datetime(df.astype('int16')) + expected = Series([Timestamp('20150204 00:00:00'), + Timestamp('20160305 00:00:00')]) + assert_series_equal(result, expected) + + # mixed dtypes + df['month'] = df['month'].astype('int8') + df['day'] = df['day'].astype('int8') + result = to_datetime(df) + expected = Series([Timestamp('20150204 00:00:00'), + Timestamp('20160305 00:00:00')]) + assert_series_equal(result, expected) + + # float + df = DataFrame({'year': [2000, 2001], + 'month': [1.5, 1], + 'day': [1, 1]}) + with self.assertRaises(ValueError): + to_datetime(df) + + def test_index_to_datetime(self): + idx = Index(['1/1/2000', '1/2/2000', '1/3/2000']) + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = idx.to_datetime() + expected = DatetimeIndex(pd.to_datetime(idx.values)) + tm.assert_index_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + today = datetime.today() + idx = Index([today], dtype=object) + result = idx.to_datetime() + expected = DatetimeIndex([today]) + tm.assert_index_equal(result, expected) + + def test_to_datetime_iso8601(self): + result = to_datetime(["2012-01-01 00:00:00"]) + exp = Timestamp("2012-01-01 00:00:00") + self.assertEqual(result[0], exp) + + result = to_datetime(['20121001']) # bad iso 8601 + exp = Timestamp('2012-10-01') + self.assertEqual(result[0], exp) + + def test_to_datetime_default(self): + rs = to_datetime('2001') + xp = datetime(2001, 1, 1) + self.assertTrue(rs, xp) + + # dayfirst is essentially broken + + # to_datetime('01-13-2012', dayfirst=True) + # self.assertRaises(ValueError, to_datetime('01-13-2012', + # dayfirst=True)) + + def test_to_datetime_on_datetime64_series(self): + # #2699 + s = Series(date_range('1/1/2000', periods=10)) + + result = to_datetime(s) + self.assertEqual(result[0], s[0]) + + def test_to_datetime_with_space_in_series(self): + # GH 6428 + s = Series(['10/18/2006', '10/18/2008', ' ']) + tm.assertRaises(ValueError, lambda: to_datetime(s, errors='raise')) + result_coerce = to_datetime(s, errors='coerce') + expected_coerce = Series([datetime(2006, 10, 18), + datetime(2008, 10, 18), + pd.NaT]) + tm.assert_series_equal(result_coerce, expected_coerce) + result_ignore = to_datetime(s, errors='ignore') + tm.assert_series_equal(result_ignore, s) + + def test_to_datetime_with_apply(self): + # this is only locale tested with US/None locales + _skip_if_has_locale() + + # GH 5195 + # with a format and coerce a single item to_datetime fails + td = Series(['May 04', 'Jun 02', 'Dec 11'], index=[1, 2, 3]) + expected = pd.to_datetime(td, format='%b %y') + result = td.apply(pd.to_datetime, format='%b %y') + assert_series_equal(result, expected) + + td = pd.Series(['May 04', 'Jun 02', ''], index=[1, 2, 3]) + self.assertRaises(ValueError, + lambda: pd.to_datetime(td, format='%b %y', + errors='raise')) + self.assertRaises(ValueError, + lambda: td.apply(pd.to_datetime, format='%b %y', + errors='raise')) + expected = pd.to_datetime(td, format='%b %y', errors='coerce') + + result = td.apply( + lambda x: pd.to_datetime(x, format='%b %y', errors='coerce')) + assert_series_equal(result, expected) + + def test_to_datetime_types(self): + + # empty string + result = to_datetime('') + self.assertIs(result, NaT) + + result = to_datetime(['', '']) + self.assertTrue(isnull(result).all()) + + # ints + result = Timestamp(0) + expected = to_datetime(0) + self.assertEqual(result, expected) + + # GH 3888 (strings) + expected = to_datetime(['2012'])[0] + result = to_datetime('2012') + self.assertEqual(result, expected) + + # array = ['2012','20120101','20120101 12:01:01'] + array = ['20120101', '20120101 12:01:01'] + expected = list(to_datetime(array)) + result = lmap(Timestamp, array) + tm.assert_almost_equal(result, expected) + + # currently fails ### + # result = Timestamp('2012') + # expected = to_datetime('2012') + # self.assertEqual(result, expected) + + def test_to_datetime_unprocessable_input(self): + # GH 4928 + self.assert_numpy_array_equal( + to_datetime([1, '1'], errors='ignore'), + np.array([1, '1'], dtype='O') + ) + self.assertRaises(TypeError, to_datetime, [1, '1'], errors='raise') + + def test_to_datetime_other_datetime64_units(self): + # 5/25/2012 + scalar = np.int64(1337904000000000).view('M8[us]') + as_obj = scalar.astype('O') + + index = DatetimeIndex([scalar]) + self.assertEqual(index[0], scalar.astype('O')) + + value = Timestamp(scalar) + self.assertEqual(value, as_obj) + + def test_to_datetime_list_of_integers(self): + rng = date_range('1/1/2000', periods=20) + rng = DatetimeIndex(rng.values) + + ints = list(rng.asi8) + + result = DatetimeIndex(ints) + + tm.assert_index_equal(rng, result) + + def test_to_datetime_freq(self): + xp = bdate_range('2000-1-1', periods=10, tz='UTC') + rs = xp.to_datetime() + self.assertEqual(xp.freq, rs.freq) + self.assertEqual(xp.tzinfo, rs.tzinfo) + if __name__ == '__main__': - import nose nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py deleted file mode 100644 index ff6cc4bb9853c..0000000000000 --- a/pandas/tseries/tests/test_timeseries.py +++ /dev/null @@ -1,4184 +0,0 @@ -# pylint: disable-msg=E1101,W0612 -import locale -import calendar -import operator -import sys -from datetime import datetime, time, timedelta -from numpy.random import rand - -import nose -import numpy as np -import pandas.index as _index -import pandas.lib as lib -import pandas.tslib as tslib - -from pandas.types.common import is_datetime64_ns_dtype -import pandas as pd -import pandas.compat as compat -import pandas.core.common as com -import pandas.tseries.frequencies as frequencies -import pandas.tseries.offsets as offsets -import pandas.tseries.tools as tools -import pandas.util.testing as tm -from pandas import ( - Index, Series, DataFrame, isnull, date_range, Timestamp, Period, - DatetimeIndex, to_datetime, bdate_range, Float64Index, - NaT, timedelta_range, Timedelta, concat) -from pandas.compat import range, long, StringIO, lrange, lmap, zip, product -from pandas.tslib import iNaT -from pandas.util.testing import ( - assert_frame_equal, assert_series_equal, assert_almost_equal, - _skip_if_has_locale, slow) - -randn = np.random.randn - - -class TestTimeSeriesDuplicates(tm.TestCase): - _multiprocess_can_split_ = True - - def setUp(self): - dates = [datetime(2000, 1, 2), datetime(2000, 1, 2), - datetime(2000, 1, 2), datetime(2000, 1, 3), - datetime(2000, 1, 3), datetime(2000, 1, 3), - datetime(2000, 1, 4), datetime(2000, 1, 4), - datetime(2000, 1, 4), datetime(2000, 1, 5)] - - self.dups = Series(np.random.randn(len(dates)), index=dates) - - def test_constructor(self): - tm.assertIsInstance(self.dups, Series) - tm.assertIsInstance(self.dups.index, DatetimeIndex) - - def test_is_unique_monotonic(self): - self.assertFalse(self.dups.index.is_unique) - - def test_index_unique(self): - uniques = self.dups.index.unique() - expected = DatetimeIndex([datetime(2000, 1, 2), datetime(2000, 1, 3), - datetime(2000, 1, 4), datetime(2000, 1, 5)]) - self.assertEqual(uniques.dtype, 'M8[ns]') # sanity - tm.assert_index_equal(uniques, expected) - self.assertEqual(self.dups.index.nunique(), 4) - - # #2563 - self.assertTrue(isinstance(uniques, DatetimeIndex)) - - dups_local = self.dups.index.tz_localize('US/Eastern') - dups_local.name = 'foo' - result = dups_local.unique() - expected = DatetimeIndex(expected, name='foo') - expected = expected.tz_localize('US/Eastern') - self.assertTrue(result.tz is not None) - self.assertEqual(result.name, 'foo') - tm.assert_index_equal(result, expected) - - # NaT, note this is excluded - arr = [1370745748 + t for t in range(20)] + [iNaT] - idx = DatetimeIndex(arr * 3) - tm.assert_index_equal(idx.unique(), DatetimeIndex(arr)) - self.assertEqual(idx.nunique(), 20) - self.assertEqual(idx.nunique(dropna=False), 21) - - arr = [Timestamp('2013-06-09 02:42:28') + timedelta(seconds=t) - for t in range(20)] + [NaT] - idx = DatetimeIndex(arr * 3) - tm.assert_index_equal(idx.unique(), DatetimeIndex(arr)) - self.assertEqual(idx.nunique(), 20) - self.assertEqual(idx.nunique(dropna=False), 21) - - def test_index_dupes_contains(self): - d = datetime(2011, 12, 5, 20, 30) - ix = DatetimeIndex([d, d]) - self.assertTrue(d in ix) - - def test_duplicate_dates_indexing(self): - ts = self.dups - - uniques = ts.index.unique() - for date in uniques: - result = ts[date] - - mask = ts.index == date - total = (ts.index == date).sum() - expected = ts[mask] - if total > 1: - assert_series_equal(result, expected) - else: - assert_almost_equal(result, expected[0]) - - cp = ts.copy() - cp[date] = 0 - expected = Series(np.where(mask, 0, ts), index=ts.index) - assert_series_equal(cp, expected) - - self.assertRaises(KeyError, ts.__getitem__, datetime(2000, 1, 6)) - - # new index - ts[datetime(2000, 1, 6)] = 0 - self.assertEqual(ts[datetime(2000, 1, 6)], 0) - - def test_range_slice(self): - idx = DatetimeIndex(['1/1/2000', '1/2/2000', '1/2/2000', '1/3/2000', - '1/4/2000']) - - ts = Series(np.random.randn(len(idx)), index=idx) - - result = ts['1/2/2000':] - expected = ts[1:] - assert_series_equal(result, expected) - - result = ts['1/2/2000':'1/3/2000'] - expected = ts[1:4] - assert_series_equal(result, expected) - - def test_groupby_average_dup_values(self): - result = self.dups.groupby(level=0).mean() - expected = self.dups.groupby(self.dups.index).mean() - assert_series_equal(result, expected) - - def test_indexing_over_size_cutoff(self): - import datetime - # #1821 - - old_cutoff = _index._SIZE_CUTOFF - try: - _index._SIZE_CUTOFF = 1000 - - # create large list of non periodic datetime - dates = [] - sec = datetime.timedelta(seconds=1) - half_sec = datetime.timedelta(microseconds=500000) - d = datetime.datetime(2011, 12, 5, 20, 30) - n = 1100 - for i in range(n): - dates.append(d) - dates.append(d + sec) - dates.append(d + sec + half_sec) - dates.append(d + sec + sec + half_sec) - d += 3 * sec - - # duplicate some values in the list - duplicate_positions = np.random.randint(0, len(dates) - 1, 20) - for p in duplicate_positions: - dates[p + 1] = dates[p] - - df = DataFrame(np.random.randn(len(dates), 4), - index=dates, - columns=list('ABCD')) - - pos = n * 3 - timestamp = df.index[pos] - self.assertIn(timestamp, df.index) - - # it works! - df.loc[timestamp] - self.assertTrue(len(df.loc[[timestamp]]) > 0) - finally: - _index._SIZE_CUTOFF = old_cutoff - - def test_indexing_unordered(self): - # GH 2437 - rng = date_range(start='2011-01-01', end='2011-01-15') - ts = Series(randn(len(rng)), index=rng) - ts2 = concat([ts[0:4], ts[-4:], ts[4:-4]]) - - for t in ts.index: - # TODO: unused? - s = str(t) # noqa - - expected = ts[t] - result = ts2[t] - self.assertTrue(expected == result) - - # GH 3448 (ranges) - def compare(slobj): - result = ts2[slobj].copy() - result = result.sort_index() - expected = ts[slobj] - assert_series_equal(result, expected) - - compare(slice('2011-01-01', '2011-01-15')) - compare(slice('2010-12-30', '2011-01-15')) - compare(slice('2011-01-01', '2011-01-16')) - - # partial ranges - compare(slice('2011-01-01', '2011-01-6')) - compare(slice('2011-01-06', '2011-01-8')) - compare(slice('2011-01-06', '2011-01-12')) - - # single values - result = ts2['2011'].sort_index() - expected = ts['2011'] - assert_series_equal(result, expected) - - # diff freq - rng = date_range(datetime(2005, 1, 1), periods=20, freq='M') - ts = Series(np.arange(len(rng)), index=rng) - ts = ts.take(np.random.permutation(20)) - - result = ts['2005'] - for t in result.index: - self.assertTrue(t.year == 2005) - - def test_indexing(self): - - idx = date_range("2001-1-1", periods=20, freq='M') - ts = Series(np.random.rand(len(idx)), index=idx) - - # getting - - # GH 3070, make sure semantics work on Series/Frame - expected = ts['2001'] - expected.name = 'A' - - df = DataFrame(dict(A=ts)) - result = df['2001']['A'] - assert_series_equal(expected, result) - - # setting - ts['2001'] = 1 - expected = ts['2001'] - expected.name = 'A' - - df.loc['2001', 'A'] = 1 - - result = df['2001']['A'] - assert_series_equal(expected, result) - - # GH3546 (not including times on the last day) - idx = date_range(start='2013-05-31 00:00', end='2013-05-31 23:00', - freq='H') - ts = Series(lrange(len(idx)), index=idx) - expected = ts['2013-05'] - assert_series_equal(expected, ts) - - idx = date_range(start='2013-05-31 00:00', end='2013-05-31 23:59', - freq='S') - ts = Series(lrange(len(idx)), index=idx) - expected = ts['2013-05'] - assert_series_equal(expected, ts) - - idx = [Timestamp('2013-05-31 00:00'), - Timestamp(datetime(2013, 5, 31, 23, 59, 59, 999999))] - ts = Series(lrange(len(idx)), index=idx) - expected = ts['2013'] - assert_series_equal(expected, ts) - - # GH14826, indexing with a seconds resolution string / datetime object - df = DataFrame(randn(5, 5), - columns=['open', 'high', 'low', 'close', 'volume'], - index=date_range('2012-01-02 18:01:00', - periods=5, tz='US/Central', freq='s')) - expected = df.loc[[df.index[2]]] - - # this is a single date, so will raise - self.assertRaises(KeyError, df.__getitem__, '2012-01-02 18:01:02', ) - self.assertRaises(KeyError, df.__getitem__, df.index[2], ) - - def test_recreate_from_data(self): - freqs = ['M', 'Q', 'A', 'D', 'B', 'BH', 'T', 'S', 'L', 'U', 'H', 'N', - 'C'] - - for f in freqs: - org = DatetimeIndex(start='2001/02/01 09:00', freq=f, periods=1) - idx = DatetimeIndex(org, freq=f) - tm.assert_index_equal(idx, org) - - org = DatetimeIndex(start='2001/02/01 09:00', freq=f, - tz='US/Pacific', periods=1) - idx = DatetimeIndex(org, freq=f, tz='US/Pacific') - tm.assert_index_equal(idx, org) - - -def assert_range_equal(left, right): - assert (left.equals(right)) - assert (left.freq == right.freq) - assert (left.tz == right.tz) - - -class TestTimeSeries(tm.TestCase): - _multiprocess_can_split_ = True - - def test_is_(self): - dti = DatetimeIndex(start='1/1/2005', end='12/1/2005', freq='M') - self.assertTrue(dti.is_(dti)) - self.assertTrue(dti.is_(dti.view())) - self.assertFalse(dti.is_(dti.copy())) - - def test_dti_slicing(self): - dti = DatetimeIndex(start='1/1/2005', end='12/1/2005', freq='M') - dti2 = dti[[1, 3, 5]] - - v1 = dti2[0] - v2 = dti2[1] - v3 = dti2[2] - - self.assertEqual(v1, Timestamp('2/28/2005')) - self.assertEqual(v2, Timestamp('4/30/2005')) - self.assertEqual(v3, Timestamp('6/30/2005')) - - # don't carry freq through irregular slicing - self.assertIsNone(dti2.freq) - - def test_contiguous_boolean_preserve_freq(self): - rng = date_range('1/1/2000', '3/1/2000', freq='B') - - mask = np.zeros(len(rng), dtype=bool) - mask[10:20] = True - - masked = rng[mask] - expected = rng[10:20] - self.assertIsNotNone(expected.freq) - assert_range_equal(masked, expected) - - mask[22] = True - masked = rng[mask] - self.assertIsNone(masked.freq) - - def test_getitem_median_slice_bug(self): - index = date_range('20090415', '20090519', freq='2B') - s = Series(np.random.randn(13), index=index) - - indexer = [slice(6, 7, None)] - result = s[indexer] - expected = s[indexer[0]] - assert_series_equal(result, expected) - - def test_series_box_timestamp(self): - rng = date_range('20090415', '20090519', freq='B') - s = Series(rng) - - tm.assertIsInstance(s[5], Timestamp) - - rng = date_range('20090415', '20090519', freq='B') - s = Series(rng, index=rng) - tm.assertIsInstance(s[5], Timestamp) - - tm.assertIsInstance(s.iat[5], Timestamp) - - def test_series_box_timedelta(self): - rng = timedelta_range('1 day 1 s', periods=5, freq='h') - s = Series(rng) - tm.assertIsInstance(s[1], Timedelta) - tm.assertIsInstance(s.iat[2], Timedelta) - - def test_date_range_ambiguous_arguments(self): - # #2538 - start = datetime(2011, 1, 1, 5, 3, 40) - end = datetime(2011, 1, 1, 8, 9, 40) - - self.assertRaises(ValueError, date_range, start, end, freq='s', - periods=10) - - def test_timestamp_to_datetime(self): - tm._skip_if_no_pytz() - rng = date_range('20090415', '20090519', tz='US/Eastern') - - stamp = rng[0] - dtval = stamp.to_pydatetime() - self.assertEqual(stamp, dtval) - self.assertEqual(stamp.tzinfo, dtval.tzinfo) - - def test_timestamp_to_datetime_dateutil(self): - tm._skip_if_no_pytz() - rng = date_range('20090415', '20090519', tz='dateutil/US/Eastern') - - stamp = rng[0] - dtval = stamp.to_pydatetime() - self.assertEqual(stamp, dtval) - self.assertEqual(stamp.tzinfo, dtval.tzinfo) - - def test_timestamp_to_datetime_explicit_pytz(self): - tm._skip_if_no_pytz() - import pytz - rng = date_range('20090415', '20090519', - tz=pytz.timezone('US/Eastern')) - - stamp = rng[0] - dtval = stamp.to_pydatetime() - self.assertEqual(stamp, dtval) - self.assertEqual(stamp.tzinfo, dtval.tzinfo) - - def test_timestamp_to_datetime_explicit_dateutil(self): - tm._skip_if_windows_python_3() - tm._skip_if_no_dateutil() - from pandas.tslib import _dateutil_gettz as gettz - rng = date_range('20090415', '20090519', tz=gettz('US/Eastern')) - - stamp = rng[0] - dtval = stamp.to_pydatetime() - self.assertEqual(stamp, dtval) - self.assertEqual(stamp.tzinfo, dtval.tzinfo) - - def test_index_convert_to_datetime_array(self): - tm._skip_if_no_pytz() - - def _check_rng(rng): - converted = rng.to_pydatetime() - tm.assertIsInstance(converted, np.ndarray) - for x, stamp in zip(converted, rng): - tm.assertIsInstance(x, datetime) - self.assertEqual(x, stamp.to_pydatetime()) - self.assertEqual(x.tzinfo, stamp.tzinfo) - - rng = date_range('20090415', '20090519') - rng_eastern = date_range('20090415', '20090519', tz='US/Eastern') - rng_utc = date_range('20090415', '20090519', tz='utc') - - _check_rng(rng) - _check_rng(rng_eastern) - _check_rng(rng_utc) - - def test_index_convert_to_datetime_array_explicit_pytz(self): - tm._skip_if_no_pytz() - import pytz - - def _check_rng(rng): - converted = rng.to_pydatetime() - tm.assertIsInstance(converted, np.ndarray) - for x, stamp in zip(converted, rng): - tm.assertIsInstance(x, datetime) - self.assertEqual(x, stamp.to_pydatetime()) - self.assertEqual(x.tzinfo, stamp.tzinfo) - - rng = date_range('20090415', '20090519') - rng_eastern = date_range('20090415', '20090519', - tz=pytz.timezone('US/Eastern')) - rng_utc = date_range('20090415', '20090519', tz=pytz.utc) - - _check_rng(rng) - _check_rng(rng_eastern) - _check_rng(rng_utc) - - def test_index_convert_to_datetime_array_dateutil(self): - tm._skip_if_no_dateutil() - import dateutil - - def _check_rng(rng): - converted = rng.to_pydatetime() - tm.assertIsInstance(converted, np.ndarray) - for x, stamp in zip(converted, rng): - tm.assertIsInstance(x, datetime) - self.assertEqual(x, stamp.to_pydatetime()) - self.assertEqual(x.tzinfo, stamp.tzinfo) - - rng = date_range('20090415', '20090519') - rng_eastern = date_range('20090415', '20090519', - tz='dateutil/US/Eastern') - rng_utc = date_range('20090415', '20090519', tz=dateutil.tz.tzutc()) - - _check_rng(rng) - _check_rng(rng_eastern) - _check_rng(rng_utc) - - def test_ctor_str_intraday(self): - rng = DatetimeIndex(['1-1-2000 00:00:01']) - self.assertEqual(rng[0].second, 1) - - def test_series_ctor_plus_datetimeindex(self): - rng = date_range('20090415', '20090519', freq='B') - data = dict((k, 1) for k in rng) - - result = Series(data, index=rng) - self.assertIs(result.index, rng) - - def test_series_pad_backfill_limit(self): - index = np.arange(10) - s = Series(np.random.randn(10), index=index) - - result = s[:2].reindex(index, method='pad', limit=5) - - expected = s[:2].reindex(index).fillna(method='pad') - expected[-3:] = np.nan - assert_series_equal(result, expected) - - result = s[-2:].reindex(index, method='backfill', limit=5) - - expected = s[-2:].reindex(index).fillna(method='backfill') - expected[:3] = np.nan - assert_series_equal(result, expected) - - def test_series_fillna_limit(self): - index = np.arange(10) - s = Series(np.random.randn(10), index=index) - - result = s[:2].reindex(index) - result = result.fillna(method='pad', limit=5) - - expected = s[:2].reindex(index).fillna(method='pad') - expected[-3:] = np.nan - assert_series_equal(result, expected) - - result = s[-2:].reindex(index) - result = result.fillna(method='bfill', limit=5) - - expected = s[-2:].reindex(index).fillna(method='backfill') - expected[:3] = np.nan - assert_series_equal(result, expected) - - def test_frame_pad_backfill_limit(self): - index = np.arange(10) - df = DataFrame(np.random.randn(10, 4), index=index) - - result = df[:2].reindex(index, method='pad', limit=5) - - expected = df[:2].reindex(index).fillna(method='pad') - expected.values[-3:] = np.nan - tm.assert_frame_equal(result, expected) - - result = df[-2:].reindex(index, method='backfill', limit=5) - - expected = df[-2:].reindex(index).fillna(method='backfill') - expected.values[:3] = np.nan - tm.assert_frame_equal(result, expected) - - def test_frame_fillna_limit(self): - index = np.arange(10) - df = DataFrame(np.random.randn(10, 4), index=index) - - result = df[:2].reindex(index) - result = result.fillna(method='pad', limit=5) - - expected = df[:2].reindex(index).fillna(method='pad') - expected.values[-3:] = np.nan - tm.assert_frame_equal(result, expected) - - result = df[-2:].reindex(index) - result = result.fillna(method='backfill', limit=5) - - expected = df[-2:].reindex(index).fillna(method='backfill') - expected.values[:3] = np.nan - tm.assert_frame_equal(result, expected) - - def test_frame_setitem_timestamp(self): - # 2155 - columns = DatetimeIndex(start='1/1/2012', end='2/1/2012', - freq=offsets.BDay()) - index = lrange(10) - data = DataFrame(columns=columns, index=index) - t = datetime(2012, 11, 1) - ts = Timestamp(t) - data[ts] = np.nan # works - - def test_sparse_series_fillna_limit(self): - index = np.arange(10) - s = Series(np.random.randn(10), index=index) - - ss = s[:2].reindex(index).to_sparse() - result = ss.fillna(method='pad', limit=5) - expected = ss.fillna(method='pad', limit=5) - expected = expected.to_dense() - expected[-3:] = np.nan - expected = expected.to_sparse() - assert_series_equal(result, expected) - - ss = s[-2:].reindex(index).to_sparse() - result = ss.fillna(method='backfill', limit=5) - expected = ss.fillna(method='backfill') - expected = expected.to_dense() - expected[:3] = np.nan - expected = expected.to_sparse() - assert_series_equal(result, expected) - - def test_sparse_series_pad_backfill_limit(self): - index = np.arange(10) - s = Series(np.random.randn(10), index=index) - s = s.to_sparse() - - result = s[:2].reindex(index, method='pad', limit=5) - expected = s[:2].reindex(index).fillna(method='pad') - expected = expected.to_dense() - expected[-3:] = np.nan - expected = expected.to_sparse() - assert_series_equal(result, expected) - - result = s[-2:].reindex(index, method='backfill', limit=5) - expected = s[-2:].reindex(index).fillna(method='backfill') - expected = expected.to_dense() - expected[:3] = np.nan - expected = expected.to_sparse() - assert_series_equal(result, expected) - - def test_sparse_frame_pad_backfill_limit(self): - index = np.arange(10) - df = DataFrame(np.random.randn(10, 4), index=index) - sdf = df.to_sparse() - - result = sdf[:2].reindex(index, method='pad', limit=5) - - expected = sdf[:2].reindex(index).fillna(method='pad') - expected = expected.to_dense() - expected.values[-3:] = np.nan - expected = expected.to_sparse() - tm.assert_frame_equal(result, expected) - - result = sdf[-2:].reindex(index, method='backfill', limit=5) - - expected = sdf[-2:].reindex(index).fillna(method='backfill') - expected = expected.to_dense() - expected.values[:3] = np.nan - expected = expected.to_sparse() - tm.assert_frame_equal(result, expected) - - def test_sparse_frame_fillna_limit(self): - index = np.arange(10) - df = DataFrame(np.random.randn(10, 4), index=index) - sdf = df.to_sparse() - - result = sdf[:2].reindex(index) - result = result.fillna(method='pad', limit=5) - - expected = sdf[:2].reindex(index).fillna(method='pad') - expected = expected.to_dense() - expected.values[-3:] = np.nan - expected = expected.to_sparse() - tm.assert_frame_equal(result, expected) - - result = sdf[-2:].reindex(index) - result = result.fillna(method='backfill', limit=5) - - expected = sdf[-2:].reindex(index).fillna(method='backfill') - expected = expected.to_dense() - expected.values[:3] = np.nan - expected = expected.to_sparse() - tm.assert_frame_equal(result, expected) - - def test_pad_require_monotonicity(self): - rng = date_range('1/1/2000', '3/1/2000', freq='B') - - # neither monotonic increasing or decreasing - rng2 = rng[[1, 0, 2]] - - self.assertRaises(ValueError, rng2.get_indexer, rng, method='pad') - - def test_frame_ctor_datetime64_column(self): - rng = date_range('1/1/2000 00:00:00', '1/1/2000 1:59:50', freq='10s') - dates = np.asarray(rng) - - df = DataFrame({'A': np.random.randn(len(rng)), 'B': dates}) - self.assertTrue(np.issubdtype(df['B'].dtype, np.dtype('M8[ns]'))) - - def test_frame_add_datetime64_column(self): - rng = date_range('1/1/2000 00:00:00', '1/1/2000 1:59:50', freq='10s') - df = DataFrame(index=np.arange(len(rng))) - - df['A'] = rng - self.assertTrue(np.issubdtype(df['A'].dtype, np.dtype('M8[ns]'))) - - def test_frame_datetime64_pre1900_repr(self): - df = DataFrame({'year': date_range('1/1/1700', periods=50, - freq='A-DEC')}) - # it works! - repr(df) - - def test_frame_add_datetime64_col_other_units(self): - n = 100 - - units = ['h', 'm', 's', 'ms', 'D', 'M', 'Y'] - - ns_dtype = np.dtype('M8[ns]') - - for unit in units: - dtype = np.dtype('M8[%s]' % unit) - vals = np.arange(n, dtype=np.int64).view(dtype) - - df = DataFrame({'ints': np.arange(n)}, index=np.arange(n)) - df[unit] = vals - - ex_vals = to_datetime(vals.astype('O')).values - - self.assertEqual(df[unit].dtype, ns_dtype) - self.assertTrue((df[unit].values == ex_vals).all()) - - # Test insertion into existing datetime64 column - df = DataFrame({'ints': np.arange(n)}, index=np.arange(n)) - df['dates'] = np.arange(n, dtype=np.int64).view(ns_dtype) - - for unit in units: - dtype = np.dtype('M8[%s]' % unit) - vals = np.arange(n, dtype=np.int64).view(dtype) - - tmp = df.copy() - - tmp['dates'] = vals - ex_vals = to_datetime(vals.astype('O')).values - - self.assertTrue((tmp['dates'].values == ex_vals).all()) - - def test_to_datetime_unit(self): - - epoch = 1370745748 - s = Series([epoch + t for t in range(20)]) - result = to_datetime(s, unit='s') - expected = Series([Timestamp('2013-06-09 02:42:28') + timedelta( - seconds=t) for t in range(20)]) - assert_series_equal(result, expected) - - s = Series([epoch + t for t in range(20)]).astype(float) - result = to_datetime(s, unit='s') - expected = Series([Timestamp('2013-06-09 02:42:28') + timedelta( - seconds=t) for t in range(20)]) - assert_series_equal(result, expected) - - s = Series([epoch + t for t in range(20)] + [iNaT]) - result = to_datetime(s, unit='s') - expected = Series([Timestamp('2013-06-09 02:42:28') + timedelta( - seconds=t) for t in range(20)] + [NaT]) - assert_series_equal(result, expected) - - s = Series([epoch + t for t in range(20)] + [iNaT]).astype(float) - result = to_datetime(s, unit='s') - expected = Series([Timestamp('2013-06-09 02:42:28') + timedelta( - seconds=t) for t in range(20)] + [NaT]) - assert_series_equal(result, expected) - - # GH13834 - s = Series([epoch + t for t in np.arange(0, 2, .25)] + - [iNaT]).astype(float) - result = to_datetime(s, unit='s') - expected = Series([Timestamp('2013-06-09 02:42:28') + timedelta( - seconds=t) for t in np.arange(0, 2, .25)] + [NaT]) - assert_series_equal(result, expected) - - s = concat([Series([epoch + t for t in range(20)] - ).astype(float), Series([np.nan])], - ignore_index=True) - result = to_datetime(s, unit='s') - expected = Series([Timestamp('2013-06-09 02:42:28') + timedelta( - seconds=t) for t in range(20)] + [NaT]) - assert_series_equal(result, expected) - - result = to_datetime([1, 2, 'NaT', pd.NaT, np.nan], unit='D') - expected = DatetimeIndex([Timestamp('1970-01-02'), - Timestamp('1970-01-03')] + ['NaT'] * 3) - tm.assert_index_equal(result, expected) - - with self.assertRaises(ValueError): - to_datetime([1, 2, 'foo'], unit='D') - with self.assertRaises(ValueError): - to_datetime([1, 2, 111111111], unit='D') - - # coerce we can process - expected = DatetimeIndex([Timestamp('1970-01-02'), - Timestamp('1970-01-03')] + ['NaT'] * 1) - result = to_datetime([1, 2, 'foo'], unit='D', errors='coerce') - tm.assert_index_equal(result, expected) - - result = to_datetime([1, 2, 111111111], unit='D', errors='coerce') - tm.assert_index_equal(result, expected) - - def test_series_ctor_datetime64(self): - rng = date_range('1/1/2000 00:00:00', '1/1/2000 1:59:50', freq='10s') - dates = np.asarray(rng) - - series = Series(dates) - self.assertTrue(np.issubdtype(series.dtype, np.dtype('M8[ns]'))) - - def test_index_cast_datetime64_other_units(self): - arr = np.arange(0, 100, 10, dtype=np.int64).view('M8[D]') - - idx = Index(arr) - - self.assertTrue((idx.values == tslib.cast_to_nanoseconds(arr)).all()) - - def test_reindex_series_add_nat(self): - rng = date_range('1/1/2000 00:00:00', periods=10, freq='10s') - series = Series(rng) - - result = series.reindex(lrange(15)) - self.assertTrue(np.issubdtype(result.dtype, np.dtype('M8[ns]'))) - - mask = result.isnull() - self.assertTrue(mask[-5:].all()) - self.assertFalse(mask[:-5].any()) - - def test_reindex_frame_add_nat(self): - rng = date_range('1/1/2000 00:00:00', periods=10, freq='10s') - df = DataFrame({'A': np.random.randn(len(rng)), 'B': rng}) - - result = df.reindex(lrange(15)) - self.assertTrue(np.issubdtype(result['B'].dtype, np.dtype('M8[ns]'))) - - mask = com.isnull(result)['B'] - self.assertTrue(mask[-5:].all()) - self.assertFalse(mask[:-5].any()) - - def test_series_repr_nat(self): - series = Series([0, 1000, 2000, iNaT], dtype='M8[ns]') - - result = repr(series) - expected = ('0 1970-01-01 00:00:00.000000\n' - '1 1970-01-01 00:00:00.000001\n' - '2 1970-01-01 00:00:00.000002\n' - '3 NaT\n' - 'dtype: datetime64[ns]') - self.assertEqual(result, expected) - - def test_fillna_nat(self): - series = Series([0, 1, 2, iNaT], dtype='M8[ns]') - - filled = series.fillna(method='pad') - filled2 = series.fillna(value=series.values[2]) - - expected = series.copy() - expected.values[3] = expected.values[2] - - assert_series_equal(filled, expected) - assert_series_equal(filled2, expected) - - df = DataFrame({'A': series}) - filled = df.fillna(method='pad') - filled2 = df.fillna(value=series.values[2]) - expected = DataFrame({'A': expected}) - assert_frame_equal(filled, expected) - assert_frame_equal(filled2, expected) - - series = Series([iNaT, 0, 1, 2], dtype='M8[ns]') - - filled = series.fillna(method='bfill') - filled2 = series.fillna(value=series[1]) - - expected = series.copy() - expected[0] = expected[1] - - assert_series_equal(filled, expected) - assert_series_equal(filled2, expected) - - df = DataFrame({'A': series}) - filled = df.fillna(method='bfill') - filled2 = df.fillna(value=series[1]) - expected = DataFrame({'A': expected}) - assert_frame_equal(filled, expected) - assert_frame_equal(filled2, expected) - - def test_string_na_nat_conversion(self): - # GH #999, #858 - - from pandas.compat import parse_date - - strings = np.array(['1/1/2000', '1/2/2000', np.nan, - '1/4/2000, 12:34:56'], dtype=object) - - expected = np.empty(4, dtype='M8[ns]') - for i, val in enumerate(strings): - if com.isnull(val): - expected[i] = iNaT - else: - expected[i] = parse_date(val) - - result = tslib.array_to_datetime(strings) - assert_almost_equal(result, expected) - - result2 = to_datetime(strings) - tm.assertIsInstance(result2, DatetimeIndex) - tm.assert_numpy_array_equal(result, result2.values) - - malformed = np.array(['1/100/2000', np.nan], dtype=object) - - # GH 10636, default is now 'raise' - self.assertRaises(ValueError, - lambda: to_datetime(malformed, errors='raise')) - - result = to_datetime(malformed, errors='ignore') - tm.assert_numpy_array_equal(result, malformed) - - self.assertRaises(ValueError, to_datetime, malformed, errors='raise') - - idx = ['a', 'b', 'c', 'd', 'e'] - series = Series(['1/1/2000', np.nan, '1/3/2000', np.nan, - '1/5/2000'], index=idx, name='foo') - dseries = Series([to_datetime('1/1/2000'), np.nan, - to_datetime('1/3/2000'), np.nan, - to_datetime('1/5/2000')], index=idx, name='foo') - - result = to_datetime(series) - dresult = to_datetime(dseries) - - expected = Series(np.empty(5, dtype='M8[ns]'), index=idx) - for i in range(5): - x = series[i] - if isnull(x): - expected[i] = iNaT - else: - expected[i] = to_datetime(x) - - assert_series_equal(result, expected, check_names=False) - self.assertEqual(result.name, 'foo') - - assert_series_equal(dresult, expected, check_names=False) - self.assertEqual(dresult.name, 'foo') - - def test_to_datetime_iso8601(self): - result = to_datetime(["2012-01-01 00:00:00"]) - exp = Timestamp("2012-01-01 00:00:00") - self.assertEqual(result[0], exp) - - result = to_datetime(['20121001']) # bad iso 8601 - exp = Timestamp('2012-10-01') - self.assertEqual(result[0], exp) - - def test_to_datetime_default(self): - rs = to_datetime('2001') - xp = datetime(2001, 1, 1) - self.assertTrue(rs, xp) - - # dayfirst is essentially broken - - # to_datetime('01-13-2012', dayfirst=True) - # self.assertRaises(ValueError, to_datetime('01-13-2012', - # dayfirst=True)) - - def test_to_datetime_on_datetime64_series(self): - # #2699 - s = Series(date_range('1/1/2000', periods=10)) - - result = to_datetime(s) - self.assertEqual(result[0], s[0]) - - def test_to_datetime_with_space_in_series(self): - # GH 6428 - s = Series(['10/18/2006', '10/18/2008', ' ']) - tm.assertRaises(ValueError, lambda: to_datetime(s, errors='raise')) - result_coerce = to_datetime(s, errors='coerce') - expected_coerce = Series([datetime(2006, 10, 18), - datetime(2008, 10, 18), - pd.NaT]) - tm.assert_series_equal(result_coerce, expected_coerce) - result_ignore = to_datetime(s, errors='ignore') - tm.assert_series_equal(result_ignore, s) - - def test_to_datetime_with_apply(self): - # this is only locale tested with US/None locales - _skip_if_has_locale() - - # GH 5195 - # with a format and coerce a single item to_datetime fails - td = Series(['May 04', 'Jun 02', 'Dec 11'], index=[1, 2, 3]) - expected = pd.to_datetime(td, format='%b %y') - result = td.apply(pd.to_datetime, format='%b %y') - assert_series_equal(result, expected) - - td = pd.Series(['May 04', 'Jun 02', ''], index=[1, 2, 3]) - self.assertRaises(ValueError, - lambda: pd.to_datetime(td, format='%b %y', - errors='raise')) - self.assertRaises(ValueError, - lambda: td.apply(pd.to_datetime, format='%b %y', - errors='raise')) - expected = pd.to_datetime(td, format='%b %y', errors='coerce') - - result = td.apply( - lambda x: pd.to_datetime(x, format='%b %y', errors='coerce')) - assert_series_equal(result, expected) - - def test_nat_vector_field_access(self): - idx = DatetimeIndex(['1/1/2000', None, None, '1/4/2000']) - - fields = ['year', 'quarter', 'month', 'day', 'hour', 'minute', - 'second', 'microsecond', 'nanosecond', 'week', 'dayofyear', - 'days_in_month', 'is_leap_year'] - - for field in fields: - result = getattr(idx, field) - expected = [getattr(x, field) for x in idx] - self.assert_numpy_array_equal(result, np.array(expected)) - - s = pd.Series(idx) - - for field in fields: - result = getattr(s.dt, field) - expected = [getattr(x, field) for x in idx] - self.assert_series_equal(result, pd.Series(expected)) - - def test_nat_scalar_field_access(self): - fields = ['year', 'quarter', 'month', 'day', 'hour', 'minute', - 'second', 'microsecond', 'nanosecond', 'week', 'dayofyear', - 'days_in_month', 'daysinmonth', 'dayofweek', 'weekday_name'] - for field in fields: - result = getattr(NaT, field) - self.assertTrue(np.isnan(result)) - - def test_NaT_methods(self): - # GH 9513 - raise_methods = ['astimezone', 'combine', 'ctime', 'dst', - 'fromordinal', 'fromtimestamp', 'isocalendar', - 'strftime', 'strptime', 'time', 'timestamp', - 'timetuple', 'timetz', 'toordinal', 'tzname', - 'utcfromtimestamp', 'utcnow', 'utcoffset', - 'utctimetuple'] - nat_methods = ['date', 'now', 'replace', 'to_datetime', 'today'] - nan_methods = ['weekday', 'isoweekday'] - - for method in raise_methods: - if hasattr(NaT, method): - self.assertRaises(ValueError, getattr(NaT, method)) - - for method in nan_methods: - if hasattr(NaT, method): - self.assertTrue(np.isnan(getattr(NaT, method)())) - - for method in nat_methods: - if hasattr(NaT, method): - # see gh-8254 - exp_warning = None - if method == 'to_datetime': - exp_warning = FutureWarning - with tm.assert_produces_warning( - exp_warning, check_stacklevel=False): - self.assertIs(getattr(NaT, method)(), NaT) - - # GH 12300 - self.assertEqual(NaT.isoformat(), 'NaT') - - def test_to_datetime_types(self): - - # empty string - result = to_datetime('') - self.assertIs(result, NaT) - - result = to_datetime(['', '']) - self.assertTrue(isnull(result).all()) - - # ints - result = Timestamp(0) - expected = to_datetime(0) - self.assertEqual(result, expected) - - # GH 3888 (strings) - expected = to_datetime(['2012'])[0] - result = to_datetime('2012') - self.assertEqual(result, expected) - - # array = ['2012','20120101','20120101 12:01:01'] - array = ['20120101', '20120101 12:01:01'] - expected = list(to_datetime(array)) - result = lmap(Timestamp, array) - tm.assert_almost_equal(result, expected) - - # currently fails ### - # result = Timestamp('2012') - # expected = to_datetime('2012') - # self.assertEqual(result, expected) - - def test_to_datetime_unprocessable_input(self): - # GH 4928 - self.assert_numpy_array_equal( - to_datetime([1, '1'], errors='ignore'), - np.array([1, '1'], dtype='O') - ) - self.assertRaises(TypeError, to_datetime, [1, '1'], errors='raise') - - def test_to_datetime_other_datetime64_units(self): - # 5/25/2012 - scalar = np.int64(1337904000000000).view('M8[us]') - as_obj = scalar.astype('O') - - index = DatetimeIndex([scalar]) - self.assertEqual(index[0], scalar.astype('O')) - - value = Timestamp(scalar) - self.assertEqual(value, as_obj) - - def test_to_datetime_list_of_integers(self): - rng = date_range('1/1/2000', periods=20) - rng = DatetimeIndex(rng.values) - - ints = list(rng.asi8) - - result = DatetimeIndex(ints) - - tm.assert_index_equal(rng, result) - - def test_to_datetime_freq(self): - xp = bdate_range('2000-1-1', periods=10, tz='UTC') - rs = xp.to_datetime() - self.assertEqual(xp.freq, rs.freq) - self.assertEqual(xp.tzinfo, rs.tzinfo) - - def test_range_edges(self): - # GH 13672 - idx = DatetimeIndex(start=Timestamp('1970-01-01 00:00:00.000000001'), - end=Timestamp('1970-01-01 00:00:00.000000004'), - freq='N') - exp = DatetimeIndex(['1970-01-01 00:00:00.000000001', - '1970-01-01 00:00:00.000000002', - '1970-01-01 00:00:00.000000003', - '1970-01-01 00:00:00.000000004']) - tm.assert_index_equal(idx, exp) - - idx = DatetimeIndex(start=Timestamp('1970-01-01 00:00:00.000000004'), - end=Timestamp('1970-01-01 00:00:00.000000001'), - freq='N') - exp = DatetimeIndex([]) - tm.assert_index_equal(idx, exp) - - idx = DatetimeIndex(start=Timestamp('1970-01-01 00:00:00.000000001'), - end=Timestamp('1970-01-01 00:00:00.000000001'), - freq='N') - exp = DatetimeIndex(['1970-01-01 00:00:00.000000001']) - tm.assert_index_equal(idx, exp) - - idx = DatetimeIndex(start=Timestamp('1970-01-01 00:00:00.000001'), - end=Timestamp('1970-01-01 00:00:00.000004'), - freq='U') - exp = DatetimeIndex(['1970-01-01 00:00:00.000001', - '1970-01-01 00:00:00.000002', - '1970-01-01 00:00:00.000003', - '1970-01-01 00:00:00.000004']) - tm.assert_index_equal(idx, exp) - - idx = DatetimeIndex(start=Timestamp('1970-01-01 00:00:00.001'), - end=Timestamp('1970-01-01 00:00:00.004'), - freq='L') - exp = DatetimeIndex(['1970-01-01 00:00:00.001', - '1970-01-01 00:00:00.002', - '1970-01-01 00:00:00.003', - '1970-01-01 00:00:00.004']) - tm.assert_index_equal(idx, exp) - - idx = DatetimeIndex(start=Timestamp('1970-01-01 00:00:01'), - end=Timestamp('1970-01-01 00:00:04'), freq='S') - exp = DatetimeIndex(['1970-01-01 00:00:01', '1970-01-01 00:00:02', - '1970-01-01 00:00:03', '1970-01-01 00:00:04']) - tm.assert_index_equal(idx, exp) - - idx = DatetimeIndex(start=Timestamp('1970-01-01 00:01'), - end=Timestamp('1970-01-01 00:04'), freq='T') - exp = DatetimeIndex(['1970-01-01 00:01', '1970-01-01 00:02', - '1970-01-01 00:03', '1970-01-01 00:04']) - tm.assert_index_equal(idx, exp) - - idx = DatetimeIndex(start=Timestamp('1970-01-01 01:00'), - end=Timestamp('1970-01-01 04:00'), freq='H') - exp = DatetimeIndex(['1970-01-01 01:00', '1970-01-01 02:00', - '1970-01-01 03:00', '1970-01-01 04:00']) - tm.assert_index_equal(idx, exp) - - idx = DatetimeIndex(start=Timestamp('1970-01-01'), - end=Timestamp('1970-01-04'), freq='D') - exp = DatetimeIndex(['1970-01-01', '1970-01-02', - '1970-01-03', '1970-01-04']) - tm.assert_index_equal(idx, exp) - - def test_range_misspecified(self): - # GH #1095 - - self.assertRaises(ValueError, date_range, '1/1/2000') - self.assertRaises(ValueError, date_range, end='1/1/2000') - self.assertRaises(ValueError, date_range, periods=10) - - self.assertRaises(ValueError, date_range, '1/1/2000', freq='H') - self.assertRaises(ValueError, date_range, end='1/1/2000', freq='H') - self.assertRaises(ValueError, date_range, periods=10, freq='H') - - def test_reasonable_keyerror(self): - # GH #1062 - index = DatetimeIndex(['1/3/2000']) - try: - index.get_loc('1/1/2000') - except KeyError as e: - self.assertIn('2000', str(e)) - - def test_reindex_with_datetimes(self): - rng = date_range('1/1/2000', periods=20) - ts = Series(np.random.randn(20), index=rng) - - result = ts.reindex(list(ts.index[5:10])) - expected = ts[5:10] - tm.assert_series_equal(result, expected) - - result = ts[list(ts.index[5:10])] - tm.assert_series_equal(result, expected) - - def test_asfreq_keep_index_name(self): - # GH #9854 - index_name = 'bar' - index = pd.date_range('20130101', periods=20, name=index_name) - df = pd.DataFrame([x for x in range(20)], columns=['foo'], index=index) - - self.assertEqual(index_name, df.index.name) - self.assertEqual(index_name, df.asfreq('10D').index.name) - - def test_promote_datetime_date(self): - rng = date_range('1/1/2000', periods=20) - ts = Series(np.random.randn(20), index=rng) - - ts_slice = ts[5:] - ts2 = ts_slice.copy() - ts2.index = [x.date() for x in ts2.index] - - result = ts + ts2 - result2 = ts2 + ts - expected = ts + ts[5:] - assert_series_equal(result, expected) - assert_series_equal(result2, expected) - - # test asfreq - result = ts2.asfreq('4H', method='ffill') - expected = ts[5:].asfreq('4H', method='ffill') - assert_series_equal(result, expected) - - result = rng.get_indexer(ts2.index) - expected = rng.get_indexer(ts_slice.index) - self.assert_numpy_array_equal(result, expected) - - def test_asfreq_normalize(self): - rng = date_range('1/1/2000 09:30', periods=20) - norm = date_range('1/1/2000', periods=20) - vals = np.random.randn(20) - ts = Series(vals, index=rng) - - result = ts.asfreq('D', normalize=True) - norm = date_range('1/1/2000', periods=20) - expected = Series(vals, index=norm) - - assert_series_equal(result, expected) - - vals = np.random.randn(20, 3) - ts = DataFrame(vals, index=rng) - - result = ts.asfreq('D', normalize=True) - expected = DataFrame(vals, index=norm) - - assert_frame_equal(result, expected) - - def test_date_range_gen_error(self): - rng = date_range('1/1/2000 00:00', '1/1/2000 00:18', freq='5min') - self.assertEqual(len(rng), 4) - - def test_date_range_negative_freq(self): - # GH 11018 - rng = date_range('2011-12-31', freq='-2A', periods=3) - exp = pd.DatetimeIndex(['2011-12-31', '2009-12-31', - '2007-12-31'], freq='-2A') - tm.assert_index_equal(rng, exp) - self.assertEqual(rng.freq, '-2A') - - rng = date_range('2011-01-31', freq='-2M', periods=3) - exp = pd.DatetimeIndex(['2011-01-31', '2010-11-30', - '2010-09-30'], freq='-2M') - tm.assert_index_equal(rng, exp) - self.assertEqual(rng.freq, '-2M') - - def test_date_range_bms_bug(self): - # #1645 - rng = date_range('1/1/2000', periods=10, freq='BMS') - - ex_first = Timestamp('2000-01-03') - self.assertEqual(rng[0], ex_first) - - def test_date_range_businesshour(self): - idx = DatetimeIndex(['2014-07-04 09:00', '2014-07-04 10:00', - '2014-07-04 11:00', - '2014-07-04 12:00', '2014-07-04 13:00', - '2014-07-04 14:00', - '2014-07-04 15:00', '2014-07-04 16:00'], - freq='BH') - rng = date_range('2014-07-04 09:00', '2014-07-04 16:00', freq='BH') - tm.assert_index_equal(idx, rng) - - idx = DatetimeIndex( - ['2014-07-04 16:00', '2014-07-07 09:00'], freq='BH') - rng = date_range('2014-07-04 16:00', '2014-07-07 09:00', freq='BH') - tm.assert_index_equal(idx, rng) - - idx = DatetimeIndex(['2014-07-04 09:00', '2014-07-04 10:00', - '2014-07-04 11:00', - '2014-07-04 12:00', '2014-07-04 13:00', - '2014-07-04 14:00', - '2014-07-04 15:00', '2014-07-04 16:00', - '2014-07-07 09:00', '2014-07-07 10:00', - '2014-07-07 11:00', - '2014-07-07 12:00', '2014-07-07 13:00', - '2014-07-07 14:00', - '2014-07-07 15:00', '2014-07-07 16:00', - '2014-07-08 09:00', '2014-07-08 10:00', - '2014-07-08 11:00', - '2014-07-08 12:00', '2014-07-08 13:00', - '2014-07-08 14:00', - '2014-07-08 15:00', '2014-07-08 16:00'], - freq='BH') - rng = date_range('2014-07-04 09:00', '2014-07-08 16:00', freq='BH') - tm.assert_index_equal(idx, rng) - - def test_first_subset(self): - ts = _simple_ts('1/1/2000', '1/1/2010', freq='12h') - result = ts.first('10d') - self.assertEqual(len(result), 20) - - ts = _simple_ts('1/1/2000', '1/1/2010') - result = ts.first('10d') - self.assertEqual(len(result), 10) - - result = ts.first('3M') - expected = ts[:'3/31/2000'] - assert_series_equal(result, expected) - - result = ts.first('21D') - expected = ts[:21] - assert_series_equal(result, expected) - - result = ts[:0].first('3M') - assert_series_equal(result, ts[:0]) - - def test_last_subset(self): - ts = _simple_ts('1/1/2000', '1/1/2010', freq='12h') - result = ts.last('10d') - self.assertEqual(len(result), 20) - - ts = _simple_ts('1/1/2000', '1/1/2010') - result = ts.last('10d') - self.assertEqual(len(result), 10) - - result = ts.last('21D') - expected = ts['12/12/2009':] - assert_series_equal(result, expected) - - result = ts.last('21D') - expected = ts[-21:] - assert_series_equal(result, expected) - - result = ts[:0].last('3M') - assert_series_equal(result, ts[:0]) - - def test_format_pre_1900_dates(self): - rng = date_range('1/1/1850', '1/1/1950', freq='A-DEC') - rng.format() - ts = Series(1, index=rng) - repr(ts) - - def test_at_time(self): - rng = date_range('1/1/2000', '1/5/2000', freq='5min') - ts = Series(np.random.randn(len(rng)), index=rng) - rs = ts.at_time(rng[1]) - self.assertTrue((rs.index.hour == rng[1].hour).all()) - self.assertTrue((rs.index.minute == rng[1].minute).all()) - self.assertTrue((rs.index.second == rng[1].second).all()) - - result = ts.at_time('9:30') - expected = ts.at_time(time(9, 30)) - assert_series_equal(result, expected) - - df = DataFrame(np.random.randn(len(rng), 3), index=rng) - - result = ts[time(9, 30)] - result_df = df.loc[time(9, 30)] - expected = ts[(rng.hour == 9) & (rng.minute == 30)] - exp_df = df[(rng.hour == 9) & (rng.minute == 30)] - - # expected.index = date_range('1/1/2000', '1/4/2000') - - assert_series_equal(result, expected) - tm.assert_frame_equal(result_df, exp_df) - - chunk = df.loc['1/4/2000':] - result = chunk.loc[time(9, 30)] - expected = result_df[-1:] - tm.assert_frame_equal(result, expected) - - # midnight, everything - rng = date_range('1/1/2000', '1/31/2000') - ts = Series(np.random.randn(len(rng)), index=rng) - - result = ts.at_time(time(0, 0)) - assert_series_equal(result, ts) - - # time doesn't exist - rng = date_range('1/1/2012', freq='23Min', periods=384) - ts = Series(np.random.randn(len(rng)), rng) - rs = ts.at_time('16:00') - self.assertEqual(len(rs), 0) - - def test_at_time_frame(self): - rng = date_range('1/1/2000', '1/5/2000', freq='5min') - ts = DataFrame(np.random.randn(len(rng), 2), index=rng) - rs = ts.at_time(rng[1]) - self.assertTrue((rs.index.hour == rng[1].hour).all()) - self.assertTrue((rs.index.minute == rng[1].minute).all()) - self.assertTrue((rs.index.second == rng[1].second).all()) - - result = ts.at_time('9:30') - expected = ts.at_time(time(9, 30)) - assert_frame_equal(result, expected) - - result = ts.loc[time(9, 30)] - expected = ts.loc[(rng.hour == 9) & (rng.minute == 30)] - - assert_frame_equal(result, expected) - - # midnight, everything - rng = date_range('1/1/2000', '1/31/2000') - ts = DataFrame(np.random.randn(len(rng), 3), index=rng) - - result = ts.at_time(time(0, 0)) - assert_frame_equal(result, ts) - - # time doesn't exist - rng = date_range('1/1/2012', freq='23Min', periods=384) - ts = DataFrame(np.random.randn(len(rng), 2), rng) - rs = ts.at_time('16:00') - self.assertEqual(len(rs), 0) - - def test_between_time(self): - rng = date_range('1/1/2000', '1/5/2000', freq='5min') - ts = Series(np.random.randn(len(rng)), index=rng) - stime = time(0, 0) - etime = time(1, 0) - - close_open = product([True, False], [True, False]) - for inc_start, inc_end in close_open: - filtered = ts.between_time(stime, etime, inc_start, inc_end) - exp_len = 13 * 4 + 1 - if not inc_start: - exp_len -= 5 - if not inc_end: - exp_len -= 4 - - self.assertEqual(len(filtered), exp_len) - for rs in filtered.index: - t = rs.time() - if inc_start: - self.assertTrue(t >= stime) - else: - self.assertTrue(t > stime) - - if inc_end: - self.assertTrue(t <= etime) - else: - self.assertTrue(t < etime) - - result = ts.between_time('00:00', '01:00') - expected = ts.between_time(stime, etime) - assert_series_equal(result, expected) - - # across midnight - rng = date_range('1/1/2000', '1/5/2000', freq='5min') - ts = Series(np.random.randn(len(rng)), index=rng) - stime = time(22, 0) - etime = time(9, 0) - - close_open = product([True, False], [True, False]) - for inc_start, inc_end in close_open: - filtered = ts.between_time(stime, etime, inc_start, inc_end) - exp_len = (12 * 11 + 1) * 4 + 1 - if not inc_start: - exp_len -= 4 - if not inc_end: - exp_len -= 4 - - self.assertEqual(len(filtered), exp_len) - for rs in filtered.index: - t = rs.time() - if inc_start: - self.assertTrue((t >= stime) or (t <= etime)) - else: - self.assertTrue((t > stime) or (t <= etime)) - - if inc_end: - self.assertTrue((t <= etime) or (t >= stime)) - else: - self.assertTrue((t < etime) or (t >= stime)) - - def test_between_time_frame(self): - rng = date_range('1/1/2000', '1/5/2000', freq='5min') - ts = DataFrame(np.random.randn(len(rng), 2), index=rng) - stime = time(0, 0) - etime = time(1, 0) - - close_open = product([True, False], [True, False]) - for inc_start, inc_end in close_open: - filtered = ts.between_time(stime, etime, inc_start, inc_end) - exp_len = 13 * 4 + 1 - if not inc_start: - exp_len -= 5 - if not inc_end: - exp_len -= 4 - - self.assertEqual(len(filtered), exp_len) - for rs in filtered.index: - t = rs.time() - if inc_start: - self.assertTrue(t >= stime) - else: - self.assertTrue(t > stime) - - if inc_end: - self.assertTrue(t <= etime) - else: - self.assertTrue(t < etime) - - result = ts.between_time('00:00', '01:00') - expected = ts.between_time(stime, etime) - assert_frame_equal(result, expected) - - # across midnight - rng = date_range('1/1/2000', '1/5/2000', freq='5min') - ts = DataFrame(np.random.randn(len(rng), 2), index=rng) - stime = time(22, 0) - etime = time(9, 0) - - close_open = product([True, False], [True, False]) - for inc_start, inc_end in close_open: - filtered = ts.between_time(stime, etime, inc_start, inc_end) - exp_len = (12 * 11 + 1) * 4 + 1 - if not inc_start: - exp_len -= 4 - if not inc_end: - exp_len -= 4 - - self.assertEqual(len(filtered), exp_len) - for rs in filtered.index: - t = rs.time() - if inc_start: - self.assertTrue((t >= stime) or (t <= etime)) - else: - self.assertTrue((t > stime) or (t <= etime)) - - if inc_end: - self.assertTrue((t <= etime) or (t >= stime)) - else: - self.assertTrue((t < etime) or (t >= stime)) - - def test_between_time_types(self): - # GH11818 - rng = date_range('1/1/2000', '1/5/2000', freq='5min') - self.assertRaises(ValueError, rng.indexer_between_time, - datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) - - frame = DataFrame({'A': 0}, index=rng) - self.assertRaises(ValueError, frame.between_time, - datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) - - series = Series(0, index=rng) - self.assertRaises(ValueError, series.between_time, - datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) - - def test_between_time_formats(self): - # GH11818 - _skip_if_has_locale() - - rng = date_range('1/1/2000', '1/5/2000', freq='5min') - ts = DataFrame(np.random.randn(len(rng), 2), index=rng) - - strings = [("2:00", "2:30"), ("0200", "0230"), ("2:00am", "2:30am"), - ("0200am", "0230am"), ("2:00:00", "2:30:00"), - ("020000", "023000"), ("2:00:00am", "2:30:00am"), - ("020000am", "023000am")] - expected_length = 28 - - for time_string in strings: - self.assertEqual(len(ts.between_time(*time_string)), - expected_length, - "%s - %s" % time_string) - - def test_dti_constructor_preserve_dti_freq(self): - rng = date_range('1/1/2000', '1/2/2000', freq='5min') - - rng2 = DatetimeIndex(rng) - self.assertEqual(rng.freq, rng2.freq) - - def test_dti_constructor_years_only(self): - # GH 6961 - for tz in [None, 'UTC', 'Asia/Tokyo', 'dateutil/US/Pacific']: - rng1 = date_range('2014', '2015', freq='M', tz=tz) - expected1 = date_range('2014-01-31', '2014-12-31', freq='M', tz=tz) - - rng2 = date_range('2014', '2015', freq='MS', tz=tz) - expected2 = date_range('2014-01-01', '2015-01-01', freq='MS', - tz=tz) - - rng3 = date_range('2014', '2020', freq='A', tz=tz) - expected3 = date_range('2014-12-31', '2019-12-31', freq='A', tz=tz) - - rng4 = date_range('2014', '2020', freq='AS', tz=tz) - expected4 = date_range('2014-01-01', '2020-01-01', freq='AS', - tz=tz) - - for rng, expected in [(rng1, expected1), (rng2, expected2), - (rng3, expected3), (rng4, expected4)]: - tm.assert_index_equal(rng, expected) - - def test_dti_constructor_small_int(self): - # GH 13721 - exp = DatetimeIndex(['1970-01-01 00:00:00.00000000', - '1970-01-01 00:00:00.00000001', - '1970-01-01 00:00:00.00000002']) - - for dtype in [np.int64, np.int32, np.int16, np.int8]: - arr = np.array([0, 10, 20], dtype=dtype) - tm.assert_index_equal(DatetimeIndex(arr), exp) - - def test_dti_constructor_numpy_timeunits(self): - # GH 9114 - base = pd.to_datetime(['2000-01-01T00:00', '2000-01-02T00:00', 'NaT']) - - for dtype in ['datetime64[h]', 'datetime64[m]', 'datetime64[s]', - 'datetime64[ms]', 'datetime64[us]', 'datetime64[ns]']: - values = base.values.astype(dtype) - - tm.assert_index_equal(DatetimeIndex(values), base) - tm.assert_index_equal(to_datetime(values), base) - - def test_normalize(self): - rng = date_range('1/1/2000 9:30', periods=10, freq='D') - - result = rng.normalize() - expected = date_range('1/1/2000', periods=10, freq='D') - tm.assert_index_equal(result, expected) - - rng_ns = pd.DatetimeIndex(np.array([1380585623454345752, - 1380585612343234312]).astype( - "datetime64[ns]")) - rng_ns_normalized = rng_ns.normalize() - expected = pd.DatetimeIndex(np.array([1380585600000000000, - 1380585600000000000]).astype( - "datetime64[ns]")) - tm.assert_index_equal(rng_ns_normalized, expected) - - self.assertTrue(result.is_normalized) - self.assertFalse(rng.is_normalized) - - def test_to_period(self): - from pandas.tseries.period import period_range - - ts = _simple_ts('1/1/2000', '1/1/2001') - - pts = ts.to_period() - exp = ts.copy() - exp.index = period_range('1/1/2000', '1/1/2001') - assert_series_equal(pts, exp) - - pts = ts.to_period('M') - exp.index = exp.index.asfreq('M') - tm.assert_index_equal(pts.index, exp.index.asfreq('M')) - assert_series_equal(pts, exp) - - # GH 7606 without freq - idx = DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03', - '2011-01-04']) - exp_idx = pd.PeriodIndex(['2011-01-01', '2011-01-02', '2011-01-03', - '2011-01-04'], freq='D') - - s = Series(np.random.randn(4), index=idx) - expected = s.copy() - expected.index = exp_idx - assert_series_equal(s.to_period(), expected) - - df = DataFrame(np.random.randn(4, 4), index=idx, columns=idx) - expected = df.copy() - expected.index = exp_idx - assert_frame_equal(df.to_period(), expected) - - expected = df.copy() - expected.columns = exp_idx - assert_frame_equal(df.to_period(axis=1), expected) - - def create_dt64_based_index(self): - data = [Timestamp('2007-01-01 10:11:12.123456Z'), - Timestamp('2007-01-01 10:11:13.789123Z')] - index = DatetimeIndex(data) - return index - - def test_to_period_millisecond(self): - index = self.create_dt64_based_index() - - period = index.to_period(freq='L') - self.assertEqual(2, len(period)) - self.assertEqual(period[0], Period('2007-01-01 10:11:12.123Z', 'L')) - self.assertEqual(period[1], Period('2007-01-01 10:11:13.789Z', 'L')) - - def test_to_period_microsecond(self): - index = self.create_dt64_based_index() - - period = index.to_period(freq='U') - self.assertEqual(2, len(period)) - self.assertEqual(period[0], Period('2007-01-01 10:11:12.123456Z', 'U')) - self.assertEqual(period[1], Period('2007-01-01 10:11:13.789123Z', 'U')) - - def test_to_period_tz_pytz(self): - tm._skip_if_no_pytz() - from dateutil.tz import tzlocal - from pytz import utc as UTC - - xp = date_range('1/1/2000', '4/1/2000').to_period() - - ts = date_range('1/1/2000', '4/1/2000', tz='US/Eastern') - - result = ts.to_period()[0] - expected = ts[0].to_period() - - self.assertEqual(result, expected) - tm.assert_index_equal(ts.to_period(), xp) - - ts = date_range('1/1/2000', '4/1/2000', tz=UTC) - - result = ts.to_period()[0] - expected = ts[0].to_period() - - self.assertEqual(result, expected) - tm.assert_index_equal(ts.to_period(), xp) - - ts = date_range('1/1/2000', '4/1/2000', tz=tzlocal()) - - result = ts.to_period()[0] - expected = ts[0].to_period() - - self.assertEqual(result, expected) - tm.assert_index_equal(ts.to_period(), xp) - - def test_to_period_tz_explicit_pytz(self): - tm._skip_if_no_pytz() - import pytz - from dateutil.tz import tzlocal - - xp = date_range('1/1/2000', '4/1/2000').to_period() - - ts = date_range('1/1/2000', '4/1/2000', tz=pytz.timezone('US/Eastern')) - - result = ts.to_period()[0] - expected = ts[0].to_period() - - self.assertTrue(result == expected) - tm.assert_index_equal(ts.to_period(), xp) - - ts = date_range('1/1/2000', '4/1/2000', tz=pytz.utc) - - result = ts.to_period()[0] - expected = ts[0].to_period() - - self.assertTrue(result == expected) - tm.assert_index_equal(ts.to_period(), xp) - - ts = date_range('1/1/2000', '4/1/2000', tz=tzlocal()) - - result = ts.to_period()[0] - expected = ts[0].to_period() - - self.assertTrue(result == expected) - tm.assert_index_equal(ts.to_period(), xp) - - def test_to_period_tz_dateutil(self): - tm._skip_if_no_dateutil() - import dateutil - from dateutil.tz import tzlocal - - xp = date_range('1/1/2000', '4/1/2000').to_period() - - ts = date_range('1/1/2000', '4/1/2000', tz='dateutil/US/Eastern') - - result = ts.to_period()[0] - expected = ts[0].to_period() - - self.assertTrue(result == expected) - tm.assert_index_equal(ts.to_period(), xp) - - ts = date_range('1/1/2000', '4/1/2000', tz=dateutil.tz.tzutc()) - - result = ts.to_period()[0] - expected = ts[0].to_period() - - self.assertTrue(result == expected) - tm.assert_index_equal(ts.to_period(), xp) - - ts = date_range('1/1/2000', '4/1/2000', tz=tzlocal()) - - result = ts.to_period()[0] - expected = ts[0].to_period() - - self.assertTrue(result == expected) - tm.assert_index_equal(ts.to_period(), xp) - - def test_frame_to_period(self): - K = 5 - from pandas.tseries.period import period_range - - dr = date_range('1/1/2000', '1/1/2001') - pr = period_range('1/1/2000', '1/1/2001') - df = DataFrame(randn(len(dr), K), index=dr) - df['mix'] = 'a' - - pts = df.to_period() - exp = df.copy() - exp.index = pr - assert_frame_equal(pts, exp) - - pts = df.to_period('M') - tm.assert_index_equal(pts.index, exp.index.asfreq('M')) - - df = df.T - pts = df.to_period(axis=1) - exp = df.copy() - exp.columns = pr - assert_frame_equal(pts, exp) - - pts = df.to_period('M', axis=1) - tm.assert_index_equal(pts.columns, exp.columns.asfreq('M')) - - self.assertRaises(ValueError, df.to_period, axis=2) - - def test_timestamp_fields(self): - # extra fields from DatetimeIndex like quarter and week - idx = tm.makeDateIndex(100) - - fields = ['dayofweek', 'dayofyear', 'week', 'weekofyear', 'quarter', - 'days_in_month', 'is_month_start', 'is_month_end', - 'is_quarter_start', 'is_quarter_end', 'is_year_start', - 'is_year_end', 'weekday_name'] - for f in fields: - expected = getattr(idx, f)[-1] - result = getattr(Timestamp(idx[-1]), f) - self.assertEqual(result, expected) - - self.assertEqual(idx.freq, Timestamp(idx[-1], idx.freq).freq) - self.assertEqual(idx.freqstr, Timestamp(idx[-1], idx.freq).freqstr) - - def test_woy_boundary(self): - # make sure weeks at year boundaries are correct - d = datetime(2013, 12, 31) - result = Timestamp(d).week - expected = 1 # ISO standard - self.assertEqual(result, expected) - - d = datetime(2008, 12, 28) - result = Timestamp(d).week - expected = 52 # ISO standard - self.assertEqual(result, expected) - - d = datetime(2009, 12, 31) - result = Timestamp(d).week - expected = 53 # ISO standard - self.assertEqual(result, expected) - - d = datetime(2010, 1, 1) - result = Timestamp(d).week - expected = 53 # ISO standard - self.assertEqual(result, expected) - - d = datetime(2010, 1, 3) - result = Timestamp(d).week - expected = 53 # ISO standard - self.assertEqual(result, expected) - - result = np.array([Timestamp(datetime(*args)).week - for args in [(2000, 1, 1), (2000, 1, 2), ( - 2005, 1, 1), (2005, 1, 2)]]) - self.assertTrue((result == [52, 52, 53, 53]).all()) - - def test_timestamp_date_out_of_range(self): - self.assertRaises(ValueError, Timestamp, '1676-01-01') - self.assertRaises(ValueError, Timestamp, '2263-01-01') - - # 1475 - self.assertRaises(ValueError, DatetimeIndex, ['1400-01-01']) - self.assertRaises(ValueError, DatetimeIndex, [datetime(1400, 1, 1)]) - - def test_compat_replace(self): - # https://github.com/statsmodels/statsmodels/issues/3349 - # replace should take ints/longs for compat - - for f in [compat.long, int]: - result = date_range(Timestamp('1960-04-01 00:00:00', - freq='QS-JAN'), - periods=f(76), - freq='QS-JAN') - self.assertEqual(len(result), 76) - - def test_timestamp_repr(self): - # pre-1900 - stamp = Timestamp('1850-01-01', tz='US/Eastern') - repr(stamp) - - iso8601 = '1850-01-01 01:23:45.012345' - stamp = Timestamp(iso8601, tz='US/Eastern') - result = repr(stamp) - self.assertIn(iso8601, result) - - def test_timestamp_from_ordinal(self): - - # GH 3042 - dt = datetime(2011, 4, 16, 0, 0) - ts = Timestamp.fromordinal(dt.toordinal()) - self.assertEqual(ts.to_pydatetime(), dt) - - # with a tzinfo - stamp = Timestamp('2011-4-16', tz='US/Eastern') - dt_tz = stamp.to_pydatetime() - ts = Timestamp.fromordinal(dt_tz.toordinal(), tz='US/Eastern') - self.assertEqual(ts.to_pydatetime(), dt_tz) - - def test_datetimeindex_integers_shift(self): - rng = date_range('1/1/2000', periods=20) - - result = rng + 5 - expected = rng.shift(5) - tm.assert_index_equal(result, expected) - - result = rng - 5 - expected = rng.shift(-5) - tm.assert_index_equal(result, expected) - - def test_astype_object(self): - # NumPy 1.6.1 weak ns support - rng = date_range('1/1/2000', periods=20) - - casted = rng.astype('O') - exp_values = list(rng) - - tm.assert_index_equal(casted, Index(exp_values, dtype=np.object_)) - self.assertEqual(casted.tolist(), exp_values) - - def test_catch_infinite_loop(self): - offset = offsets.DateOffset(minute=5) - # blow up, don't loop forever - self.assertRaises(Exception, date_range, datetime(2011, 11, 11), - datetime(2011, 11, 12), freq=offset) - - def test_append_concat(self): - rng = date_range('5/8/2012 1:45', periods=10, freq='5T') - ts = Series(np.random.randn(len(rng)), rng) - df = DataFrame(np.random.randn(len(rng), 4), index=rng) - - result = ts.append(ts) - result_df = df.append(df) - ex_index = DatetimeIndex(np.tile(rng.values, 2)) - tm.assert_index_equal(result.index, ex_index) - tm.assert_index_equal(result_df.index, ex_index) - - appended = rng.append(rng) - tm.assert_index_equal(appended, ex_index) - - appended = rng.append([rng, rng]) - ex_index = DatetimeIndex(np.tile(rng.values, 3)) - tm.assert_index_equal(appended, ex_index) - - # different index names - rng1 = rng.copy() - rng2 = rng.copy() - rng1.name = 'foo' - rng2.name = 'bar' - self.assertEqual(rng1.append(rng1).name, 'foo') - self.assertIsNone(rng1.append(rng2).name) - - def test_append_concat_tz(self): - # GH 2938 - tm._skip_if_no_pytz() - - rng = date_range('5/8/2012 1:45', periods=10, freq='5T', - tz='US/Eastern') - rng2 = date_range('5/8/2012 2:35', periods=10, freq='5T', - tz='US/Eastern') - rng3 = date_range('5/8/2012 1:45', periods=20, freq='5T', - tz='US/Eastern') - ts = Series(np.random.randn(len(rng)), rng) - df = DataFrame(np.random.randn(len(rng), 4), index=rng) - ts2 = Series(np.random.randn(len(rng2)), rng2) - df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2) - - result = ts.append(ts2) - result_df = df.append(df2) - tm.assert_index_equal(result.index, rng3) - tm.assert_index_equal(result_df.index, rng3) - - appended = rng.append(rng2) - tm.assert_index_equal(appended, rng3) - - def test_append_concat_tz_explicit_pytz(self): - # GH 2938 - tm._skip_if_no_pytz() - from pytz import timezone as timezone - - rng = date_range('5/8/2012 1:45', periods=10, freq='5T', - tz=timezone('US/Eastern')) - rng2 = date_range('5/8/2012 2:35', periods=10, freq='5T', - tz=timezone('US/Eastern')) - rng3 = date_range('5/8/2012 1:45', periods=20, freq='5T', - tz=timezone('US/Eastern')) - ts = Series(np.random.randn(len(rng)), rng) - df = DataFrame(np.random.randn(len(rng), 4), index=rng) - ts2 = Series(np.random.randn(len(rng2)), rng2) - df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2) - - result = ts.append(ts2) - result_df = df.append(df2) - tm.assert_index_equal(result.index, rng3) - tm.assert_index_equal(result_df.index, rng3) - - appended = rng.append(rng2) - tm.assert_index_equal(appended, rng3) - - def test_append_concat_tz_dateutil(self): - # GH 2938 - tm._skip_if_no_dateutil() - rng = date_range('5/8/2012 1:45', periods=10, freq='5T', - tz='dateutil/US/Eastern') - rng2 = date_range('5/8/2012 2:35', periods=10, freq='5T', - tz='dateutil/US/Eastern') - rng3 = date_range('5/8/2012 1:45', periods=20, freq='5T', - tz='dateutil/US/Eastern') - ts = Series(np.random.randn(len(rng)), rng) - df = DataFrame(np.random.randn(len(rng), 4), index=rng) - ts2 = Series(np.random.randn(len(rng2)), rng2) - df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2) - - result = ts.append(ts2) - result_df = df.append(df2) - tm.assert_index_equal(result.index, rng3) - tm.assert_index_equal(result_df.index, rng3) - - appended = rng.append(rng2) - tm.assert_index_equal(appended, rng3) - - def test_set_dataframe_column_ns_dtype(self): - x = DataFrame([datetime.now(), datetime.now()]) - self.assertEqual(x[0].dtype, np.dtype('M8[ns]')) - - def test_groupby_count_dateparseerror(self): - dr = date_range(start='1/1/2012', freq='5min', periods=10) - - # BAD Example, datetimes first - s = Series(np.arange(10), index=[dr, lrange(10)]) - grouped = s.groupby(lambda x: x[1] % 2 == 0) - result = grouped.count() - - s = Series(np.arange(10), index=[lrange(10), dr]) - grouped = s.groupby(lambda x: x[0] % 2 == 0) - expected = grouped.count() - - assert_series_equal(result, expected) - - def test_datetimeindex_repr_short(self): - dr = date_range(start='1/1/2012', periods=1) - repr(dr) - - dr = date_range(start='1/1/2012', periods=2) - repr(dr) - - dr = date_range(start='1/1/2012', periods=3) - repr(dr) - - def test_constructor_int64_nocopy(self): - # #1624 - arr = np.arange(1000, dtype=np.int64) - index = DatetimeIndex(arr) - - arr[50:100] = -1 - self.assertTrue((index.asi8[50:100] == -1).all()) - - arr = np.arange(1000, dtype=np.int64) - index = DatetimeIndex(arr, copy=True) - - arr[50:100] = -1 - self.assertTrue((index.asi8[50:100] != -1).all()) - - def test_series_interpolate_method_values(self): - # #1646 - ts = _simple_ts('1/1/2000', '1/20/2000') - ts[::2] = np.nan - - result = ts.interpolate(method='values') - exp = ts.interpolate() - assert_series_equal(result, exp) - - def test_frame_datetime64_handling_groupby(self): - # it works! - df = DataFrame([(3, np.datetime64('2012-07-03')), - (3, np.datetime64('2012-07-04'))], - columns=['a', 'date']) - result = df.groupby('a').first() - self.assertEqual(result['date'][3], Timestamp('2012-07-03')) - - def test_series_interpolate_intraday(self): - # #1698 - index = pd.date_range('1/1/2012', periods=4, freq='12D') - ts = pd.Series([0, 12, 24, 36], index) - new_index = index.append(index + pd.DateOffset(days=1)).sort_values() - - exp = ts.reindex(new_index).interpolate(method='time') - - index = pd.date_range('1/1/2012', periods=4, freq='12H') - ts = pd.Series([0, 12, 24, 36], index) - new_index = index.append(index + pd.DateOffset(hours=1)).sort_values() - result = ts.reindex(new_index).interpolate(method='time') - - self.assert_numpy_array_equal(result.values, exp.values) - - def test_frame_dict_constructor_datetime64_1680(self): - dr = date_range('1/1/2012', periods=10) - s = Series(dr, index=dr) - - # it works! - DataFrame({'a': 'foo', 'b': s}, index=dr) - DataFrame({'a': 'foo', 'b': s.values}, index=dr) - - def test_frame_datetime64_mixed_index_ctor_1681(self): - dr = date_range('2011/1/1', '2012/1/1', freq='W-FRI') - ts = Series(dr) - - # it works! - d = DataFrame({'A': 'foo', 'B': ts}, index=dr) - self.assertTrue(d['B'].isnull().all()) - - def test_frame_timeseries_to_records(self): - index = date_range('1/1/2000', periods=10) - df = DataFrame(np.random.randn(10, 3), index=index, - columns=['a', 'b', 'c']) - - result = df.to_records() - result['index'].dtype == 'M8[ns]' - - result = df.to_records(index=False) - - def test_frame_datetime64_duplicated(self): - dates = date_range('2010-07-01', end='2010-08-05') - - tst = DataFrame({'symbol': 'AAA', 'date': dates}) - result = tst.duplicated(['date', 'symbol']) - self.assertTrue((-result).all()) - - tst = DataFrame({'date': dates}) - result = tst.duplicated() - self.assertTrue((-result).all()) - - def test_timestamp_compare_with_early_datetime(self): - # e.g. datetime.min - stamp = Timestamp('2012-01-01') - - self.assertFalse(stamp == datetime.min) - self.assertFalse(stamp == datetime(1600, 1, 1)) - self.assertFalse(stamp == datetime(2700, 1, 1)) - self.assertNotEqual(stamp, datetime.min) - self.assertNotEqual(stamp, datetime(1600, 1, 1)) - self.assertNotEqual(stamp, datetime(2700, 1, 1)) - self.assertTrue(stamp > datetime(1600, 1, 1)) - self.assertTrue(stamp >= datetime(1600, 1, 1)) - self.assertTrue(stamp < datetime(2700, 1, 1)) - self.assertTrue(stamp <= datetime(2700, 1, 1)) - - def test_to_html_timestamp(self): - rng = date_range('2000-01-01', periods=10) - df = DataFrame(np.random.randn(10, 4), index=rng) - - result = df.to_html() - self.assertIn('2000-01-01', result) - - def test_to_csv_numpy_16_bug(self): - frame = DataFrame({'a': date_range('1/1/2000', periods=10)}) - - buf = StringIO() - frame.to_csv(buf) - - result = buf.getvalue() - self.assertIn('2000-01-01', result) - - def test_series_map_box_timestamps(self): - # #2689, #2627 - s = Series(date_range('1/1/2000', periods=10)) - - def f(x): - return (x.hour, x.day, x.month) - - # it works! - s.map(f) - s.apply(f) - DataFrame(s).applymap(f) - - def test_series_map_box_timedelta(self): - # GH 11349 - s = Series(timedelta_range('1 day 1 s', periods=5, freq='h')) - - def f(x): - return x.total_seconds() - - s.map(f) - s.apply(f) - DataFrame(s).applymap(f) - - def test_concat_datetime_datetime64_frame(self): - # #2624 - rows = [] - rows.append([datetime(2010, 1, 1), 1]) - rows.append([datetime(2010, 1, 2), 'hi']) - - df2_obj = DataFrame.from_records(rows, columns=['date', 'test']) - - ind = date_range(start="2000/1/1", freq="D", periods=10) - df1 = DataFrame({'date': ind, 'test': lrange(10)}) - - # it works! - pd.concat([df1, df2_obj]) - - def test_asfreq_resample_set_correct_freq(self): - # GH5613 - # we test if .asfreq() and .resample() set the correct value for .freq - df = pd.DataFrame({'date': ["2012-01-01", "2012-01-02", "2012-01-03"], - 'col': [1, 2, 3]}) - df = df.set_index(pd.to_datetime(df.date)) - - # testing the settings before calling .asfreq() and .resample() - self.assertEqual(df.index.freq, None) - self.assertEqual(df.index.inferred_freq, 'D') - - # does .asfreq() set .freq correctly? - self.assertEqual(df.asfreq('D').index.freq, 'D') - - # does .resample() set .freq correctly? - self.assertEqual(df.resample('D').asfreq().index.freq, 'D') - - def test_pickle(self): - - # GH4606 - p = self.round_trip_pickle(NaT) - self.assertTrue(p is NaT) - - idx = pd.to_datetime(['2013-01-01', NaT, '2014-01-06']) - idx_p = self.round_trip_pickle(idx) - self.assertTrue(idx_p[0] == idx[0]) - self.assertTrue(idx_p[1] is NaT) - self.assertTrue(idx_p[2] == idx[2]) - - # GH11002 - # don't infer freq - idx = date_range('1750-1-1', '2050-1-1', freq='7D') - idx_p = self.round_trip_pickle(idx) - tm.assert_index_equal(idx, idx_p) - - def test_timestamp_equality(self): - - # GH 11034 - s = Series([Timestamp('2000-01-29 01:59:00'), 'NaT']) - result = s != s - assert_series_equal(result, Series([False, True])) - result = s != s[0] - assert_series_equal(result, Series([False, True])) - result = s != s[1] - assert_series_equal(result, Series([True, True])) - - result = s == s - assert_series_equal(result, Series([True, False])) - result = s == s[0] - assert_series_equal(result, Series([True, False])) - result = s == s[1] - assert_series_equal(result, Series([False, False])) - - -def _simple_ts(start, end, freq='D'): - rng = date_range(start, end, freq=freq) - return Series(np.random.randn(len(rng)), index=rng) - - -class TestToDatetime(tm.TestCase): - _multiprocess_can_split_ = True - - def test_to_datetime_dt64s(self): - in_bound_dts = [ - np.datetime64('2000-01-01'), - np.datetime64('2000-01-02'), - ] - - for dt in in_bound_dts: - self.assertEqual(pd.to_datetime(dt), Timestamp(dt)) - - oob_dts = [np.datetime64('1000-01-01'), np.datetime64('5000-01-02'), ] - - for dt in oob_dts: - self.assertRaises(ValueError, pd.to_datetime, dt, errors='raise') - self.assertRaises(ValueError, tslib.Timestamp, dt) - self.assertIs(pd.to_datetime(dt, errors='coerce'), NaT) - - def test_to_datetime_array_of_dt64s(self): - dts = [np.datetime64('2000-01-01'), np.datetime64('2000-01-02'), ] - - # Assuming all datetimes are in bounds, to_datetime() returns - # an array that is equal to Timestamp() parsing - self.assert_numpy_array_equal( - pd.to_datetime(dts, box=False), - np.array([Timestamp(x).asm8 for x in dts]) - ) - - # A list of datetimes where the last one is out of bounds - dts_with_oob = dts + [np.datetime64('9999-01-01')] - - self.assertRaises(ValueError, pd.to_datetime, dts_with_oob, - errors='raise') - - self.assert_numpy_array_equal( - pd.to_datetime(dts_with_oob, box=False, errors='coerce'), - np.array( - [ - Timestamp(dts_with_oob[0]).asm8, - Timestamp(dts_with_oob[1]).asm8, - iNaT, - ], - dtype='M8' - ) - ) - - # With errors='ignore', out of bounds datetime64s - # are converted to their .item(), which depending on the version of - # numpy is either a python datetime.datetime or datetime.date - self.assert_numpy_array_equal( - pd.to_datetime(dts_with_oob, box=False, errors='ignore'), - np.array( - [dt.item() for dt in dts_with_oob], - dtype='O' - ) - ) - - def test_to_datetime_tz(self): - - # xref 8260 - # uniform returns a DatetimeIndex - arr = [pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'), - pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Pacific')] - result = pd.to_datetime(arr) - expected = DatetimeIndex( - ['2013-01-01 13:00:00', '2013-01-02 14:00:00'], tz='US/Pacific') - tm.assert_index_equal(result, expected) - - # mixed tzs will raise - arr = [pd.Timestamp('2013-01-01 13:00:00', tz='US/Pacific'), - pd.Timestamp('2013-01-02 14:00:00', tz='US/Eastern')] - self.assertRaises(ValueError, lambda: pd.to_datetime(arr)) - - def test_to_datetime_tz_pytz(self): - - # xref 8260 - tm._skip_if_no_pytz() - import pytz - - us_eastern = pytz.timezone('US/Eastern') - arr = np.array([us_eastern.localize(datetime(year=2000, month=1, day=1, - hour=3, minute=0)), - us_eastern.localize(datetime(year=2000, month=6, day=1, - hour=3, minute=0))], - dtype=object) - result = pd.to_datetime(arr, utc=True) - expected = DatetimeIndex(['2000-01-01 08:00:00+00:00', - '2000-06-01 07:00:00+00:00'], - dtype='datetime64[ns, UTC]', freq=None) - tm.assert_index_equal(result, expected) - - def test_to_datetime_utc_is_true(self): - # See gh-11934 - start = pd.Timestamp('2014-01-01', tz='utc') - end = pd.Timestamp('2014-01-03', tz='utc') - date_range = pd.bdate_range(start, end) - - result = pd.to_datetime(date_range, utc=True) - expected = pd.DatetimeIndex(data=date_range) - tm.assert_index_equal(result, expected) - - def test_to_datetime_tz_psycopg2(self): - - # xref 8260 - try: - import psycopg2 - except ImportError: - raise nose.SkipTest("no psycopg2 installed") - - # misc cases - tz1 = psycopg2.tz.FixedOffsetTimezone(offset=-300, name=None) - tz2 = psycopg2.tz.FixedOffsetTimezone(offset=-240, name=None) - arr = np.array([datetime(2000, 1, 1, 3, 0, tzinfo=tz1), - datetime(2000, 6, 1, 3, 0, tzinfo=tz2)], - dtype=object) - - result = pd.to_datetime(arr, errors='coerce', utc=True) - expected = DatetimeIndex(['2000-01-01 08:00:00+00:00', - '2000-06-01 07:00:00+00:00'], - dtype='datetime64[ns, UTC]', freq=None) - tm.assert_index_equal(result, expected) - - # dtype coercion - i = pd.DatetimeIndex([ - '2000-01-01 08:00:00+00:00' - ], tz=psycopg2.tz.FixedOffsetTimezone(offset=-300, name=None)) - self.assertTrue(is_datetime64_ns_dtype(i)) - - # tz coerceion - result = pd.to_datetime(i, errors='coerce') - tm.assert_index_equal(result, i) - - result = pd.to_datetime(i, errors='coerce', utc=True) - expected = pd.DatetimeIndex(['2000-01-01 13:00:00'], - dtype='datetime64[ns, UTC]') - tm.assert_index_equal(result, expected) - - def test_datetime_bool(self): - # GH13176 - with self.assertRaises(TypeError): - to_datetime(False) - self.assertTrue(to_datetime(False, errors="coerce") is tslib.NaT) - self.assertEqual(to_datetime(False, errors="ignore"), False) - with self.assertRaises(TypeError): - to_datetime(True) - self.assertTrue(to_datetime(True, errors="coerce") is tslib.NaT) - self.assertEqual(to_datetime(True, errors="ignore"), True) - with self.assertRaises(TypeError): - to_datetime([False, datetime.today()]) - with self.assertRaises(TypeError): - to_datetime(['20130101', True]) - tm.assert_index_equal(to_datetime([0, False, tslib.NaT, 0.0], - errors="coerce"), - DatetimeIndex([to_datetime(0), tslib.NaT, - tslib.NaT, to_datetime(0)])) - - def test_datetime_invalid_datatype(self): - # GH13176 - - with self.assertRaises(TypeError): - pd.to_datetime(bool) - with self.assertRaises(TypeError): - pd.to_datetime(pd.to_datetime) - - def test_unit(self): - # GH 11758 - # test proper behavior with erros - - with self.assertRaises(ValueError): - to_datetime([1], unit='D', format='%Y%m%d') - - values = [11111111, 1, 1.0, tslib.iNaT, pd.NaT, np.nan, - 'NaT', ''] - result = to_datetime(values, unit='D', errors='ignore') - expected = Index([11111111, Timestamp('1970-01-02'), - Timestamp('1970-01-02'), pd.NaT, - pd.NaT, pd.NaT, pd.NaT, pd.NaT], - dtype=object) - tm.assert_index_equal(result, expected) - - result = to_datetime(values, unit='D', errors='coerce') - expected = DatetimeIndex(['NaT', '1970-01-02', '1970-01-02', - 'NaT', 'NaT', 'NaT', 'NaT', 'NaT']) - tm.assert_index_equal(result, expected) - - with self.assertRaises(tslib.OutOfBoundsDatetime): - to_datetime(values, unit='D', errors='raise') - - values = [1420043460000, tslib.iNaT, pd.NaT, np.nan, 'NaT'] - - result = to_datetime(values, errors='ignore', unit='s') - expected = Index([1420043460000, pd.NaT, pd.NaT, - pd.NaT, pd.NaT], dtype=object) - tm.assert_index_equal(result, expected) - - result = to_datetime(values, errors='coerce', unit='s') - expected = DatetimeIndex(['NaT', 'NaT', 'NaT', 'NaT', 'NaT']) - tm.assert_index_equal(result, expected) - - with self.assertRaises(tslib.OutOfBoundsDatetime): - to_datetime(values, errors='raise', unit='s') - - # if we have a string, then we raise a ValueError - # and NOT an OutOfBoundsDatetime - for val in ['foo', Timestamp('20130101')]: - try: - to_datetime(val, errors='raise', unit='s') - except tslib.OutOfBoundsDatetime: - raise AssertionError("incorrect exception raised") - except ValueError: - pass - - def test_unit_consistency(self): - - # consistency of conversions - expected = Timestamp('1970-05-09 14:25:11') - result = pd.to_datetime(11111111, unit='s', errors='raise') - self.assertEqual(result, expected) - self.assertIsInstance(result, Timestamp) - - result = pd.to_datetime(11111111, unit='s', errors='coerce') - self.assertEqual(result, expected) - self.assertIsInstance(result, Timestamp) - - result = pd.to_datetime(11111111, unit='s', errors='ignore') - self.assertEqual(result, expected) - self.assertIsInstance(result, Timestamp) - - def test_unit_with_numeric(self): - - # GH 13180 - # coercions from floats/ints are ok - expected = DatetimeIndex(['2015-06-19 05:33:20', - '2015-05-27 22:33:20']) - arr1 = [1.434692e+18, 1.432766e+18] - arr2 = np.array(arr1).astype('int64') - for errors in ['ignore', 'raise', 'coerce']: - result = pd.to_datetime(arr1, errors=errors) - tm.assert_index_equal(result, expected) - - result = pd.to_datetime(arr2, errors=errors) - tm.assert_index_equal(result, expected) - - # but we want to make sure that we are coercing - # if we have ints/strings - expected = DatetimeIndex(['NaT', - '2015-06-19 05:33:20', - '2015-05-27 22:33:20']) - arr = ['foo', 1.434692e+18, 1.432766e+18] - result = pd.to_datetime(arr, errors='coerce') - tm.assert_index_equal(result, expected) - - expected = DatetimeIndex(['2015-06-19 05:33:20', - '2015-05-27 22:33:20', - 'NaT', - 'NaT']) - arr = [1.434692e+18, 1.432766e+18, 'foo', 'NaT'] - result = pd.to_datetime(arr, errors='coerce') - tm.assert_index_equal(result, expected) - - def test_unit_mixed(self): - - # mixed integers/datetimes - expected = DatetimeIndex(['2013-01-01', 'NaT', 'NaT']) - arr = [pd.Timestamp('20130101'), 1.434692e+18, 1.432766e+18] - result = pd.to_datetime(arr, errors='coerce') - tm.assert_index_equal(result, expected) - - with self.assertRaises(ValueError): - pd.to_datetime(arr, errors='raise') - - expected = DatetimeIndex(['NaT', - 'NaT', - '2013-01-01']) - arr = [1.434692e+18, 1.432766e+18, pd.Timestamp('20130101')] - result = pd.to_datetime(arr, errors='coerce') - tm.assert_index_equal(result, expected) - - with self.assertRaises(ValueError): - pd.to_datetime(arr, errors='raise') - - def test_index_to_datetime(self): - idx = Index(['1/1/2000', '1/2/2000', '1/3/2000']) - - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - result = idx.to_datetime() - expected = DatetimeIndex(pd.to_datetime(idx.values)) - tm.assert_index_equal(result, expected) - - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - today = datetime.today() - idx = Index([today], dtype=object) - result = idx.to_datetime() - expected = DatetimeIndex([today]) - tm.assert_index_equal(result, expected) - - def test_dataframe(self): - - df = DataFrame({'year': [2015, 2016], - 'month': [2, 3], - 'day': [4, 5], - 'hour': [6, 7], - 'minute': [58, 59], - 'second': [10, 11], - 'ms': [1, 1], - 'us': [2, 2], - 'ns': [3, 3]}) - - result = to_datetime({'year': df['year'], - 'month': df['month'], - 'day': df['day']}) - expected = Series([Timestamp('20150204 00:00:00'), - Timestamp('20160305 00:0:00')]) - assert_series_equal(result, expected) - - # dict-like - result = to_datetime(df[['year', 'month', 'day']].to_dict()) - assert_series_equal(result, expected) - - # dict but with constructable - df2 = df[['year', 'month', 'day']].to_dict() - df2['month'] = 2 - result = to_datetime(df2) - expected2 = Series([Timestamp('20150204 00:00:00'), - Timestamp('20160205 00:0:00')]) - assert_series_equal(result, expected2) - - # unit mappings - units = [{'year': 'years', - 'month': 'months', - 'day': 'days', - 'hour': 'hours', - 'minute': 'minutes', - 'second': 'seconds'}, - {'year': 'year', - 'month': 'month', - 'day': 'day', - 'hour': 'hour', - 'minute': 'minute', - 'second': 'second'}, - ] - - for d in units: - result = to_datetime(df[list(d.keys())].rename(columns=d)) - expected = Series([Timestamp('20150204 06:58:10'), - Timestamp('20160305 07:59:11')]) - assert_series_equal(result, expected) - - d = {'year': 'year', - 'month': 'month', - 'day': 'day', - 'hour': 'hour', - 'minute': 'minute', - 'second': 'second', - 'ms': 'ms', - 'us': 'us', - 'ns': 'ns'} - - result = to_datetime(df.rename(columns=d)) - expected = Series([Timestamp('20150204 06:58:10.001002003'), - Timestamp('20160305 07:59:11.001002003')]) - assert_series_equal(result, expected) - - # coerce back to int - result = to_datetime(df.astype(str)) - assert_series_equal(result, expected) - - # passing coerce - df2 = DataFrame({'year': [2015, 2016], - 'month': [2, 20], - 'day': [4, 5]}) - with self.assertRaises(ValueError): - to_datetime(df2) - result = to_datetime(df2, errors='coerce') - expected = Series([Timestamp('20150204 00:00:00'), - pd.NaT]) - assert_series_equal(result, expected) - - # extra columns - with self.assertRaises(ValueError): - df2 = df.copy() - df2['foo'] = 1 - to_datetime(df2) - - # not enough - for c in [['year'], - ['year', 'month'], - ['year', 'month', 'second'], - ['month', 'day'], - ['year', 'day', 'second']]: - with self.assertRaises(ValueError): - to_datetime(df[c]) - - # duplicates - df2 = DataFrame({'year': [2015, 2016], - 'month': [2, 20], - 'day': [4, 5]}) - df2.columns = ['year', 'year', 'day'] - with self.assertRaises(ValueError): - to_datetime(df2) - - df2 = DataFrame({'year': [2015, 2016], - 'month': [2, 20], - 'day': [4, 5], - 'hour': [4, 5]}) - df2.columns = ['year', 'month', 'day', 'day'] - with self.assertRaises(ValueError): - to_datetime(df2) - - def test_dataframe_dtypes(self): - # #13451 - df = DataFrame({'year': [2015, 2016], - 'month': [2, 3], - 'day': [4, 5]}) - - # int16 - result = to_datetime(df.astype('int16')) - expected = Series([Timestamp('20150204 00:00:00'), - Timestamp('20160305 00:00:00')]) - assert_series_equal(result, expected) - - # mixed dtypes - df['month'] = df['month'].astype('int8') - df['day'] = df['day'].astype('int8') - result = to_datetime(df) - expected = Series([Timestamp('20150204 00:00:00'), - Timestamp('20160305 00:00:00')]) - assert_series_equal(result, expected) - - # float - df = DataFrame({'year': [2000, 2001], - 'month': [1.5, 1], - 'day': [1, 1]}) - with self.assertRaises(ValueError): - to_datetime(df) - - -class TestDatetime64(tm.TestCase): - """ - Also test support for datetime64[ns] in Series / DataFrame - """ - - def setUp(self): - dti = DatetimeIndex(start=datetime(2005, 1, 1), - end=datetime(2005, 1, 10), freq='Min') - self.series = Series(rand(len(dti)), dti) - - def test_fancy_getitem(self): - dti = DatetimeIndex(freq='WOM-1FRI', start=datetime(2005, 1, 1), - end=datetime(2010, 1, 1)) - - s = Series(np.arange(len(dti)), index=dti) - - self.assertEqual(s[48], 48) - self.assertEqual(s['1/2/2009'], 48) - self.assertEqual(s['2009-1-2'], 48) - self.assertEqual(s[datetime(2009, 1, 2)], 48) - self.assertEqual(s[lib.Timestamp(datetime(2009, 1, 2))], 48) - self.assertRaises(KeyError, s.__getitem__, '2009-1-3') - - assert_series_equal(s['3/6/2009':'2009-06-05'], - s[datetime(2009, 3, 6):datetime(2009, 6, 5)]) - - def test_fancy_setitem(self): - dti = DatetimeIndex(freq='WOM-1FRI', start=datetime(2005, 1, 1), - end=datetime(2010, 1, 1)) - - s = Series(np.arange(len(dti)), index=dti) - s[48] = -1 - self.assertEqual(s[48], -1) - s['1/2/2009'] = -2 - self.assertEqual(s[48], -2) - s['1/2/2009':'2009-06-05'] = -3 - self.assertTrue((s[48:54] == -3).all()) - - def test_dti_snap(self): - dti = DatetimeIndex(['1/1/2002', '1/2/2002', '1/3/2002', '1/4/2002', - '1/5/2002', '1/6/2002', '1/7/2002'], freq='D') - - res = dti.snap(freq='W-MON') - exp = date_range('12/31/2001', '1/7/2002', freq='w-mon') - exp = exp.repeat([3, 4]) - self.assertTrue((res == exp).all()) - - res = dti.snap(freq='B') - - exp = date_range('1/1/2002', '1/7/2002', freq='b') - exp = exp.repeat([1, 1, 1, 2, 2]) - self.assertTrue((res == exp).all()) - - def test_dti_reset_index_round_trip(self): - dti = DatetimeIndex(start='1/1/2001', end='6/1/2001', freq='D') - d1 = DataFrame({'v': np.random.rand(len(dti))}, index=dti) - d2 = d1.reset_index() - self.assertEqual(d2.dtypes[0], np.dtype('M8[ns]')) - d3 = d2.set_index('index') - assert_frame_equal(d1, d3, check_names=False) - - # #2329 - stamp = datetime(2012, 11, 22) - df = DataFrame([[stamp, 12.1]], columns=['Date', 'Value']) - df = df.set_index('Date') - - self.assertEqual(df.index[0], stamp) - self.assertEqual(df.reset_index()['Date'][0], stamp) - - def test_series_set_value(self): - # #1561 - - dates = [datetime(2001, 1, 1), datetime(2001, 1, 2)] - index = DatetimeIndex(dates) - - s = Series().set_value(dates[0], 1.) - s2 = s.set_value(dates[1], np.nan) - - exp = Series([1., np.nan], index=index) - - assert_series_equal(s2, exp) - - # s = Series(index[:1], index[:1]) - # s2 = s.set_value(dates[1], index[1]) - # self.assertEqual(s2.values.dtype, 'M8[ns]') - - @slow - def test_slice_locs_indexerror(self): - times = [datetime(2000, 1, 1) + timedelta(minutes=i * 10) - for i in range(100000)] - s = Series(lrange(100000), times) - s.loc[datetime(1900, 1, 1):datetime(2100, 1, 1)] - - def test_slicing_datetimes(self): - - # GH 7523 - - # unique - df = DataFrame(np.arange(4., dtype='float64'), - index=[datetime(2001, 1, i, 10, 00) - for i in [1, 2, 3, 4]]) - result = df.loc[datetime(2001, 1, 1, 10):] - assert_frame_equal(result, df) - result = df.loc[:datetime(2001, 1, 4, 10)] - assert_frame_equal(result, df) - result = df.loc[datetime(2001, 1, 1, 10):datetime(2001, 1, 4, 10)] - assert_frame_equal(result, df) - - result = df.loc[datetime(2001, 1, 1, 11):] - expected = df.iloc[1:] - assert_frame_equal(result, expected) - result = df.loc['20010101 11':] - assert_frame_equal(result, expected) - - # duplicates - df = pd.DataFrame(np.arange(5., dtype='float64'), - index=[datetime(2001, 1, i, 10, 00) - for i in [1, 2, 2, 3, 4]]) - - result = df.loc[datetime(2001, 1, 1, 10):] - assert_frame_equal(result, df) - result = df.loc[:datetime(2001, 1, 4, 10)] - assert_frame_equal(result, df) - result = df.loc[datetime(2001, 1, 1, 10):datetime(2001, 1, 4, 10)] - assert_frame_equal(result, df) - - result = df.loc[datetime(2001, 1, 1, 11):] - expected = df.iloc[1:] - assert_frame_equal(result, expected) - result = df.loc['20010101 11':] - assert_frame_equal(result, expected) - - -class TestSeriesDatetime64(tm.TestCase): - def setUp(self): - self.series = Series(date_range('1/1/2000', periods=10)) - - def test_auto_conversion(self): - series = Series(list(date_range('1/1/2000', periods=10))) - self.assertEqual(series.dtype, 'M8[ns]') - - def test_constructor_cant_cast_datetime64(self): - msg = "Cannot cast datetime64 to " - with tm.assertRaisesRegexp(TypeError, msg): - Series(date_range('1/1/2000', periods=10), dtype=float) - - with tm.assertRaisesRegexp(TypeError, msg): - Series(date_range('1/1/2000', periods=10), dtype=int) - - def test_constructor_cast_object(self): - s = Series(date_range('1/1/2000', periods=10), dtype=object) - exp = Series(date_range('1/1/2000', periods=10)) - tm.assert_series_equal(s, exp) - - def test_series_comparison_scalars(self): - val = datetime(2000, 1, 4) - result = self.series > val - expected = Series([x > val for x in self.series]) - self.assert_series_equal(result, expected) - - val = self.series[5] - result = self.series > val - expected = Series([x > val for x in self.series]) - self.assert_series_equal(result, expected) - - def test_between(self): - left, right = self.series[[2, 7]] - - result = self.series.between(left, right) - expected = (self.series >= left) & (self.series <= right) - assert_series_equal(result, expected) - - # --------------------------------------------------------------------- - # NaT support - - def test_NaT_scalar(self): - series = Series([0, 1000, 2000, iNaT], dtype='M8[ns]') - - val = series[3] - self.assertTrue(com.isnull(val)) - - series[2] = val - self.assertTrue(com.isnull(series[2])) - - def test_NaT_cast(self): - # GH10747 - result = Series([np.nan]).astype('M8[ns]') - expected = Series([NaT]) - assert_series_equal(result, expected) - - def test_set_none_nan(self): - self.series[3] = None - self.assertIs(self.series[3], NaT) - - self.series[3:5] = None - self.assertIs(self.series[4], NaT) - - self.series[5] = np.nan - self.assertIs(self.series[5], NaT) - - self.series[5:7] = np.nan - self.assertIs(self.series[6], NaT) - - def test_intercept_astype_object(self): - - # this test no longer makes sense as series is by default already - # M8[ns] - expected = self.series.astype('object') - - df = DataFrame({'a': self.series, - 'b': np.random.randn(len(self.series))}) - exp_dtypes = pd.Series([np.dtype('datetime64[ns]'), - np.dtype('float64')], index=['a', 'b']) - tm.assert_series_equal(df.dtypes, exp_dtypes) - - result = df.values.squeeze() - self.assertTrue((result[:, 0] == expected.values).all()) - - df = DataFrame({'a': self.series, 'b': ['foo'] * len(self.series)}) - - result = df.values.squeeze() - self.assertTrue((result[:, 0] == expected.values).all()) - - def test_nat_operations(self): - # GH 8617 - s = Series([0, pd.NaT], dtype='m8[ns]') - exp = s[0] - self.assertEqual(s.median(), exp) - self.assertEqual(s.min(), exp) - self.assertEqual(s.max(), exp) - - def test_round_nat(self): - # GH14940 - s = Series([pd.NaT]) - expected = Series(pd.NaT) - for method in ["round", "floor", "ceil"]: - round_method = getattr(s.dt, method) - for freq in ["s", "5s", "min", "5min", "h", "5h"]: - assert_series_equal(round_method(freq), expected) - - -class TestTimestamp(tm.TestCase): - def test_class_ops_pytz(self): - tm._skip_if_no_pytz() - from pytz import timezone - - def compare(x, y): - self.assertEqual(int(Timestamp(x).value / 1e9), - int(Timestamp(y).value / 1e9)) - - compare(Timestamp.now(), datetime.now()) - compare(Timestamp.now('UTC'), datetime.now(timezone('UTC'))) - compare(Timestamp.utcnow(), datetime.utcnow()) - compare(Timestamp.today(), datetime.today()) - current_time = calendar.timegm(datetime.now().utctimetuple()) - compare(Timestamp.utcfromtimestamp(current_time), - datetime.utcfromtimestamp(current_time)) - compare(Timestamp.fromtimestamp(current_time), - datetime.fromtimestamp(current_time)) - - date_component = datetime.utcnow() - time_component = (date_component + timedelta(minutes=10)).time() - compare(Timestamp.combine(date_component, time_component), - datetime.combine(date_component, time_component)) - - def test_class_ops_dateutil(self): - tm._skip_if_no_dateutil() - from dateutil.tz import tzutc - - def compare(x, y): - self.assertEqual(int(np.round(Timestamp(x).value / 1e9)), - int(np.round(Timestamp(y).value / 1e9))) - - compare(Timestamp.now(), datetime.now()) - compare(Timestamp.now('UTC'), datetime.now(tzutc())) - compare(Timestamp.utcnow(), datetime.utcnow()) - compare(Timestamp.today(), datetime.today()) - current_time = calendar.timegm(datetime.now().utctimetuple()) - compare(Timestamp.utcfromtimestamp(current_time), - datetime.utcfromtimestamp(current_time)) - compare(Timestamp.fromtimestamp(current_time), - datetime.fromtimestamp(current_time)) - - date_component = datetime.utcnow() - time_component = (date_component + timedelta(minutes=10)).time() - compare(Timestamp.combine(date_component, time_component), - datetime.combine(date_component, time_component)) - - def test_basics_nanos(self): - val = np.int64(946684800000000000).view('M8[ns]') - stamp = Timestamp(val.view('i8') + 500) - self.assertEqual(stamp.year, 2000) - self.assertEqual(stamp.month, 1) - self.assertEqual(stamp.microsecond, 0) - self.assertEqual(stamp.nanosecond, 500) - - # GH 14415 - val = np.iinfo(np.int64).min + 80000000000000 - stamp = Timestamp(val) - self.assertEqual(stamp.year, 1677) - self.assertEqual(stamp.month, 9) - self.assertEqual(stamp.day, 21) - self.assertEqual(stamp.microsecond, 145224) - self.assertEqual(stamp.nanosecond, 192) - - def test_unit(self): - - def check(val, unit=None, h=1, s=1, us=0): - stamp = Timestamp(val, unit=unit) - self.assertEqual(stamp.year, 2000) - self.assertEqual(stamp.month, 1) - self.assertEqual(stamp.day, 1) - self.assertEqual(stamp.hour, h) - if unit != 'D': - self.assertEqual(stamp.minute, 1) - self.assertEqual(stamp.second, s) - self.assertEqual(stamp.microsecond, us) - else: - self.assertEqual(stamp.minute, 0) - self.assertEqual(stamp.second, 0) - self.assertEqual(stamp.microsecond, 0) - self.assertEqual(stamp.nanosecond, 0) - - ts = Timestamp('20000101 01:01:01') - val = ts.value - days = (ts - Timestamp('1970-01-01')).days - - check(val) - check(val / long(1000), unit='us') - check(val / long(1000000), unit='ms') - check(val / long(1000000000), unit='s') - check(days, unit='D', h=0) - - # using truediv, so these are like floats - if compat.PY3: - check((val + 500000) / long(1000000000), unit='s', us=500) - check((val + 500000000) / long(1000000000), unit='s', us=500000) - check((val + 500000) / long(1000000), unit='ms', us=500) - - # get chopped in py2 - else: - check((val + 500000) / long(1000000000), unit='s') - check((val + 500000000) / long(1000000000), unit='s') - check((val + 500000) / long(1000000), unit='ms') - - # ok - check((val + 500000) / long(1000), unit='us', us=500) - check((val + 500000000) / long(1000000), unit='ms', us=500000) - - # floats - check(val / 1000.0 + 5, unit='us', us=5) - check(val / 1000.0 + 5000, unit='us', us=5000) - check(val / 1000000.0 + 0.5, unit='ms', us=500) - check(val / 1000000.0 + 0.005, unit='ms', us=5) - check(val / 1000000000.0 + 0.5, unit='s', us=500000) - check(days + 0.5, unit='D', h=12) - - # nan - result = Timestamp(np.nan) - self.assertIs(result, NaT) - - result = Timestamp(None) - self.assertIs(result, NaT) - - result = Timestamp(iNaT) - self.assertIs(result, NaT) - - result = Timestamp(NaT) - self.assertIs(result, NaT) - - result = Timestamp('NaT') - self.assertIs(result, NaT) - - self.assertTrue(isnull(Timestamp('nat'))) - - def test_roundtrip(self): - - # test value to string and back conversions - # further test accessors - base = Timestamp('20140101 00:00:00') - - result = Timestamp(base.value + pd.Timedelta('5ms').value) - self.assertEqual(result, Timestamp(str(base) + ".005000")) - self.assertEqual(result.microsecond, 5000) - - result = Timestamp(base.value + pd.Timedelta('5us').value) - self.assertEqual(result, Timestamp(str(base) + ".000005")) - self.assertEqual(result.microsecond, 5) - - result = Timestamp(base.value + pd.Timedelta('5ns').value) - self.assertEqual(result, Timestamp(str(base) + ".000000005")) - self.assertEqual(result.nanosecond, 5) - self.assertEqual(result.microsecond, 0) - - result = Timestamp(base.value + pd.Timedelta('6ms 5us').value) - self.assertEqual(result, Timestamp(str(base) + ".006005")) - self.assertEqual(result.microsecond, 5 + 6 * 1000) - - result = Timestamp(base.value + pd.Timedelta('200ms 5us').value) - self.assertEqual(result, Timestamp(str(base) + ".200005")) - self.assertEqual(result.microsecond, 5 + 200 * 1000) - - def test_comparison(self): - # 5-18-2012 00:00:00.000 - stamp = long(1337299200000000000) - - val = Timestamp(stamp) - - self.assertEqual(val, val) - self.assertFalse(val != val) - self.assertFalse(val < val) - self.assertTrue(val <= val) - self.assertFalse(val > val) - self.assertTrue(val >= val) - - other = datetime(2012, 5, 18) - self.assertEqual(val, other) - self.assertFalse(val != other) - self.assertFalse(val < other) - self.assertTrue(val <= other) - self.assertFalse(val > other) - self.assertTrue(val >= other) - - other = Timestamp(stamp + 100) - - self.assertNotEqual(val, other) - self.assertNotEqual(val, other) - self.assertTrue(val < other) - self.assertTrue(val <= other) - self.assertTrue(other > val) - self.assertTrue(other >= val) - - def test_compare_invalid(self): - - # GH 8058 - val = Timestamp('20130101 12:01:02') - self.assertFalse(val == 'foo') - self.assertFalse(val == 10.0) - self.assertFalse(val == 1) - self.assertFalse(val == long(1)) - self.assertFalse(val == []) - self.assertFalse(val == {'foo': 1}) - self.assertFalse(val == np.float64(1)) - self.assertFalse(val == np.int64(1)) - - self.assertTrue(val != 'foo') - self.assertTrue(val != 10.0) - self.assertTrue(val != 1) - self.assertTrue(val != long(1)) - self.assertTrue(val != []) - self.assertTrue(val != {'foo': 1}) - self.assertTrue(val != np.float64(1)) - self.assertTrue(val != np.int64(1)) - - # ops testing - df = DataFrame(randn(5, 2)) - a = df[0] - b = Series(randn(5)) - b.name = Timestamp('2000-01-01') - tm.assert_series_equal(a / b, 1 / (b / a)) - - def test_cant_compare_tz_naive_w_aware(self): - tm._skip_if_no_pytz() - # #1404 - a = Timestamp('3/12/2012') - b = Timestamp('3/12/2012', tz='utc') - - self.assertRaises(Exception, a.__eq__, b) - self.assertRaises(Exception, a.__ne__, b) - self.assertRaises(Exception, a.__lt__, b) - self.assertRaises(Exception, a.__gt__, b) - self.assertRaises(Exception, b.__eq__, a) - self.assertRaises(Exception, b.__ne__, a) - self.assertRaises(Exception, b.__lt__, a) - self.assertRaises(Exception, b.__gt__, a) - - if sys.version_info < (3, 3): - self.assertRaises(Exception, a.__eq__, b.to_pydatetime()) - self.assertRaises(Exception, a.to_pydatetime().__eq__, b) - else: - self.assertFalse(a == b.to_pydatetime()) - self.assertFalse(a.to_pydatetime() == b) - - def test_cant_compare_tz_naive_w_aware_explicit_pytz(self): - tm._skip_if_no_pytz() - from pytz import utc - # #1404 - a = Timestamp('3/12/2012') - b = Timestamp('3/12/2012', tz=utc) - - self.assertRaises(Exception, a.__eq__, b) - self.assertRaises(Exception, a.__ne__, b) - self.assertRaises(Exception, a.__lt__, b) - self.assertRaises(Exception, a.__gt__, b) - self.assertRaises(Exception, b.__eq__, a) - self.assertRaises(Exception, b.__ne__, a) - self.assertRaises(Exception, b.__lt__, a) - self.assertRaises(Exception, b.__gt__, a) - - if sys.version_info < (3, 3): - self.assertRaises(Exception, a.__eq__, b.to_pydatetime()) - self.assertRaises(Exception, a.to_pydatetime().__eq__, b) - else: - self.assertFalse(a == b.to_pydatetime()) - self.assertFalse(a.to_pydatetime() == b) - - def test_cant_compare_tz_naive_w_aware_dateutil(self): - tm._skip_if_no_dateutil() - from dateutil.tz import tzutc - utc = tzutc() - # #1404 - a = Timestamp('3/12/2012') - b = Timestamp('3/12/2012', tz=utc) - - self.assertRaises(Exception, a.__eq__, b) - self.assertRaises(Exception, a.__ne__, b) - self.assertRaises(Exception, a.__lt__, b) - self.assertRaises(Exception, a.__gt__, b) - self.assertRaises(Exception, b.__eq__, a) - self.assertRaises(Exception, b.__ne__, a) - self.assertRaises(Exception, b.__lt__, a) - self.assertRaises(Exception, b.__gt__, a) - - if sys.version_info < (3, 3): - self.assertRaises(Exception, a.__eq__, b.to_pydatetime()) - self.assertRaises(Exception, a.to_pydatetime().__eq__, b) - else: - self.assertFalse(a == b.to_pydatetime()) - self.assertFalse(a.to_pydatetime() == b) - - def test_delta_preserve_nanos(self): - val = Timestamp(long(1337299200000000123)) - result = val + timedelta(1) - self.assertEqual(result.nanosecond, val.nanosecond) - - def test_frequency_misc(self): - self.assertEqual(frequencies.get_freq_group('T'), - frequencies.FreqGroup.FR_MIN) - - code, stride = frequencies.get_freq_code(offsets.Hour()) - self.assertEqual(code, frequencies.FreqGroup.FR_HR) - - code, stride = frequencies.get_freq_code((5, 'T')) - self.assertEqual(code, frequencies.FreqGroup.FR_MIN) - self.assertEqual(stride, 5) - - offset = offsets.Hour() - result = frequencies.to_offset(offset) - self.assertEqual(result, offset) - - result = frequencies.to_offset((5, 'T')) - expected = offsets.Minute(5) - self.assertEqual(result, expected) - - self.assertRaises(ValueError, frequencies.get_freq_code, (5, 'baz')) - - self.assertRaises(ValueError, frequencies.to_offset, '100foo') - - self.assertRaises(ValueError, frequencies.to_offset, ('', '')) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = frequencies.get_standard_freq(offsets.Hour()) - self.assertEqual(result, 'H') - - def test_hash_equivalent(self): - d = {datetime(2011, 1, 1): 5} - stamp = Timestamp(datetime(2011, 1, 1)) - self.assertEqual(d[stamp], 5) - - def test_timestamp_compare_scalars(self): - # case where ndim == 0 - lhs = np.datetime64(datetime(2013, 12, 6)) - rhs = Timestamp('now') - nat = Timestamp('nat') - - ops = {'gt': 'lt', - 'lt': 'gt', - 'ge': 'le', - 'le': 'ge', - 'eq': 'eq', - 'ne': 'ne'} - - for left, right in ops.items(): - left_f = getattr(operator, left) - right_f = getattr(operator, right) - expected = left_f(lhs, rhs) - - result = right_f(rhs, lhs) - self.assertEqual(result, expected) - - expected = left_f(rhs, nat) - result = right_f(nat, rhs) - self.assertEqual(result, expected) - - def test_timestamp_compare_series(self): - # make sure we can compare Timestamps on the right AND left hand side - # GH4982 - s = Series(date_range('20010101', periods=10), name='dates') - s_nat = s.copy(deep=True) - - s[0] = pd.Timestamp('nat') - s[3] = pd.Timestamp('nat') - - ops = {'lt': 'gt', 'le': 'ge', 'eq': 'eq', 'ne': 'ne'} - - for left, right in ops.items(): - left_f = getattr(operator, left) - right_f = getattr(operator, right) - - # no nats - expected = left_f(s, Timestamp('20010109')) - result = right_f(Timestamp('20010109'), s) - tm.assert_series_equal(result, expected) - - # nats - expected = left_f(s, Timestamp('nat')) - result = right_f(Timestamp('nat'), s) - tm.assert_series_equal(result, expected) - - # compare to timestamp with series containing nats - expected = left_f(s_nat, Timestamp('20010109')) - result = right_f(Timestamp('20010109'), s_nat) - tm.assert_series_equal(result, expected) - - # compare to nat with series containing nats - expected = left_f(s_nat, Timestamp('nat')) - result = right_f(Timestamp('nat'), s_nat) - tm.assert_series_equal(result, expected) - - def test_is_leap_year(self): - # GH 13727 - for tz in [None, 'UTC', 'US/Eastern', 'Asia/Tokyo']: - dt = Timestamp('2000-01-01 00:00:00', tz=tz) - self.assertTrue(dt.is_leap_year) - self.assertIsInstance(dt.is_leap_year, bool) - - dt = Timestamp('1999-01-01 00:00:00', tz=tz) - self.assertFalse(dt.is_leap_year) - - dt = Timestamp('2004-01-01 00:00:00', tz=tz) - self.assertTrue(dt.is_leap_year) - - dt = Timestamp('2100-01-01 00:00:00', tz=tz) - self.assertFalse(dt.is_leap_year) - - self.assertFalse(pd.NaT.is_leap_year) - self.assertIsInstance(pd.NaT.is_leap_year, bool) - - def test_round_nat(self): - # GH14940 - ts = Timestamp('nat') - print(dir(ts)) - for method in ["round", "floor", "ceil"]: - round_method = getattr(ts, method) - for freq in ["s", "5s", "min", "5min", "h", "5h"]: - self.assertIs(round_method(freq), ts) - - -class TestSlicing(tm.TestCase): - def test_slice_year(self): - dti = DatetimeIndex(freq='B', start=datetime(2005, 1, 1), periods=500) - - s = Series(np.arange(len(dti)), index=dti) - result = s['2005'] - expected = s[s.index.year == 2005] - assert_series_equal(result, expected) - - df = DataFrame(np.random.rand(len(dti), 5), index=dti) - result = df.loc['2005'] - expected = df[df.index.year == 2005] - assert_frame_equal(result, expected) - - rng = date_range('1/1/2000', '1/1/2010') - - result = rng.get_loc('2009') - expected = slice(3288, 3653) - self.assertEqual(result, expected) - - def test_slice_quarter(self): - dti = DatetimeIndex(freq='D', start=datetime(2000, 6, 1), periods=500) - - s = Series(np.arange(len(dti)), index=dti) - self.assertEqual(len(s['2001Q1']), 90) - - df = DataFrame(np.random.rand(len(dti), 5), index=dti) - self.assertEqual(len(df.loc['1Q01']), 90) - - def test_slice_month(self): - dti = DatetimeIndex(freq='D', start=datetime(2005, 1, 1), periods=500) - s = Series(np.arange(len(dti)), index=dti) - self.assertEqual(len(s['2005-11']), 30) - - df = DataFrame(np.random.rand(len(dti), 5), index=dti) - self.assertEqual(len(df.loc['2005-11']), 30) - - assert_series_equal(s['2005-11'], s['11-2005']) - - def test_partial_slice(self): - rng = DatetimeIndex(freq='D', start=datetime(2005, 1, 1), periods=500) - s = Series(np.arange(len(rng)), index=rng) - - result = s['2005-05':'2006-02'] - expected = s['20050501':'20060228'] - assert_series_equal(result, expected) - - result = s['2005-05':] - expected = s['20050501':] - assert_series_equal(result, expected) - - result = s[:'2006-02'] - expected = s[:'20060228'] - assert_series_equal(result, expected) - - result = s['2005-1-1'] - self.assertEqual(result, s.iloc[0]) - - self.assertRaises(Exception, s.__getitem__, '2004-12-31') - - def test_partial_slice_daily(self): - rng = DatetimeIndex(freq='H', start=datetime(2005, 1, 31), periods=500) - s = Series(np.arange(len(rng)), index=rng) - - result = s['2005-1-31'] - assert_series_equal(result, s.iloc[:24]) - - self.assertRaises(Exception, s.__getitem__, '2004-12-31 00') - - def test_partial_slice_hourly(self): - rng = DatetimeIndex(freq='T', start=datetime(2005, 1, 1, 20, 0, 0), - periods=500) - s = Series(np.arange(len(rng)), index=rng) - - result = s['2005-1-1'] - assert_series_equal(result, s.iloc[:60 * 4]) - - result = s['2005-1-1 20'] - assert_series_equal(result, s.iloc[:60]) - - self.assertEqual(s['2005-1-1 20:00'], s.iloc[0]) - self.assertRaises(Exception, s.__getitem__, '2004-12-31 00:15') - - def test_partial_slice_minutely(self): - rng = DatetimeIndex(freq='S', start=datetime(2005, 1, 1, 23, 59, 0), - periods=500) - s = Series(np.arange(len(rng)), index=rng) - - result = s['2005-1-1 23:59'] - assert_series_equal(result, s.iloc[:60]) - - result = s['2005-1-1'] - assert_series_equal(result, s.iloc[:60]) - - self.assertEqual(s[Timestamp('2005-1-1 23:59:00')], s.iloc[0]) - self.assertRaises(Exception, s.__getitem__, '2004-12-31 00:00:00') - - def test_partial_slice_second_precision(self): - rng = DatetimeIndex(start=datetime(2005, 1, 1, 0, 0, 59, - microsecond=999990), - periods=20, freq='US') - s = Series(np.arange(20), rng) - - assert_series_equal(s['2005-1-1 00:00'], s.iloc[:10]) - assert_series_equal(s['2005-1-1 00:00:59'], s.iloc[:10]) - - assert_series_equal(s['2005-1-1 00:01'], s.iloc[10:]) - assert_series_equal(s['2005-1-1 00:01:00'], s.iloc[10:]) - - self.assertEqual(s[Timestamp('2005-1-1 00:00:59.999990')], s.iloc[0]) - self.assertRaisesRegexp(KeyError, '2005-1-1 00:00:00', - lambda: s['2005-1-1 00:00:00']) - - def test_partial_slicing_dataframe(self): - # GH14856 - # Test various combinations of string slicing resolution vs. - # index resolution - # - If string resolution is less precise than index resolution, - # string is considered a slice - # - If string resolution is equal to or more precise than index - # resolution, string is considered an exact match - formats = ['%Y', '%Y-%m', '%Y-%m-%d', '%Y-%m-%d %H', - '%Y-%m-%d %H:%M', '%Y-%m-%d %H:%M:%S'] - resolutions = ['year', 'month', 'day', 'hour', 'minute', 'second'] - for rnum, resolution in enumerate(resolutions[2:], 2): - # we check only 'day', 'hour', 'minute' and 'second' - unit = Timedelta("1 " + resolution) - middate = datetime(2012, 1, 1, 0, 0, 0) - index = DatetimeIndex([middate - unit, - middate, middate + unit]) - values = [1, 2, 3] - df = DataFrame({'a': values}, index, dtype=np.int64) - self.assertEqual(df.index.resolution, resolution) - - # Timestamp with the same resolution as index - # Should be exact match for Series (return scalar) - # and raise KeyError for Frame - for timestamp, expected in zip(index, values): - ts_string = timestamp.strftime(formats[rnum]) - # make ts_string as precise as index - result = df['a'][ts_string] - self.assertIsInstance(result, np.int64) - self.assertEqual(result, expected) - self.assertRaises(KeyError, df.__getitem__, ts_string) - - # Timestamp with resolution less precise than index - for fmt in formats[:rnum]: - for element, theslice in [[0, slice(None, 1)], - [1, slice(1, None)]]: - ts_string = index[element].strftime(fmt) - - # Series should return slice - result = df['a'][ts_string] - expected = df['a'][theslice] - assert_series_equal(result, expected) - - # Frame should return slice as well - result = df[ts_string] - expected = df[theslice] - assert_frame_equal(result, expected) - - # Timestamp with resolution more precise than index - # Compatible with existing key - # Should return scalar for Series - # and raise KeyError for Frame - for fmt in formats[rnum + 1:]: - ts_string = index[1].strftime(fmt) - result = df['a'][ts_string] - self.assertIsInstance(result, np.int64) - self.assertEqual(result, 2) - self.assertRaises(KeyError, df.__getitem__, ts_string) - - # Not compatible with existing key - # Should raise KeyError - for fmt, res in list(zip(formats, resolutions))[rnum + 1:]: - ts = index[1] + Timedelta("1 " + res) - ts_string = ts.strftime(fmt) - self.assertRaises(KeyError, df['a'].__getitem__, ts_string) - self.assertRaises(KeyError, df.__getitem__, ts_string) - - def test_partial_slicing_with_multiindex(self): - - # GH 4758 - # partial string indexing with a multi-index buggy - df = DataFrame({'ACCOUNT': ["ACCT1", "ACCT1", "ACCT1", "ACCT2"], - 'TICKER': ["ABC", "MNP", "XYZ", "XYZ"], - 'val': [1, 2, 3, 4]}, - index=date_range("2013-06-19 09:30:00", - periods=4, freq='5T')) - df_multi = df.set_index(['ACCOUNT', 'TICKER'], append=True) - - expected = DataFrame([ - [1] - ], index=Index(['ABC'], name='TICKER'), columns=['val']) - result = df_multi.loc[('2013-06-19 09:30:00', 'ACCT1')] - assert_frame_equal(result, expected) - - expected = df_multi.loc[ - (pd.Timestamp('2013-06-19 09:30:00', tz=None), 'ACCT1', 'ABC')] - result = df_multi.loc[('2013-06-19 09:30:00', 'ACCT1', 'ABC')] - assert_series_equal(result, expected) - - # this is a KeyError as we don't do partial string selection on - # multi-levels - def f(): - df_multi.loc[('2013-06-19', 'ACCT1', 'ABC')] - - self.assertRaises(KeyError, f) - - # GH 4294 - # partial slice on a series mi - s = pd.DataFrame(randn(1000, 1000), index=pd.date_range( - '2000-1-1', periods=1000)).stack() - - s2 = s[:-1].copy() - expected = s2['2000-1-4'] - result = s2[pd.Timestamp('2000-1-4')] - assert_series_equal(result, expected) - - result = s[pd.Timestamp('2000-1-4')] - expected = s['2000-1-4'] - assert_series_equal(result, expected) - - df2 = pd.DataFrame(s) - expected = df2.xs('2000-1-4') - result = df2.loc[pd.Timestamp('2000-1-4')] - assert_frame_equal(result, expected) - - def test_date_range_normalize(self): - snap = datetime.today() - n = 50 - - rng = date_range(snap, periods=n, normalize=False, freq='2D') - - offset = timedelta(2) - values = DatetimeIndex([snap + i * offset for i in range(n)]) - - tm.assert_index_equal(rng, values) - - rng = date_range('1/1/2000 08:15', periods=n, normalize=False, - freq='B') - the_time = time(8, 15) - for val in rng: - self.assertEqual(val.time(), the_time) - - def test_timedelta(self): - # this is valid too - index = date_range('1/1/2000', periods=50, freq='B') - shifted = index + timedelta(1) - back = shifted + timedelta(-1) - self.assertTrue(tm.equalContents(index, back)) - self.assertEqual(shifted.freq, index.freq) - self.assertEqual(shifted.freq, back.freq) - - result = index - timedelta(1) - expected = index + timedelta(-1) - tm.assert_index_equal(result, expected) - - # GH4134, buggy with timedeltas - rng = date_range('2013', '2014') - s = Series(rng) - result1 = rng - pd.offsets.Hour(1) - result2 = DatetimeIndex(s - np.timedelta64(100000000)) - result3 = rng - np.timedelta64(100000000) - result4 = DatetimeIndex(s - pd.offsets.Hour(1)) - tm.assert_index_equal(result1, result4) - tm.assert_index_equal(result2, result3) - - def test_shift(self): - ts = Series(np.random.randn(5), - index=date_range('1/1/2000', periods=5, freq='H')) - - result = ts.shift(1, freq='5T') - exp_index = ts.index.shift(1, freq='5T') - tm.assert_index_equal(result.index, exp_index) - - # GH #1063, multiple of same base - result = ts.shift(1, freq='4H') - exp_index = ts.index + offsets.Hour(4) - tm.assert_index_equal(result.index, exp_index) - - idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-04']) - self.assertRaises(ValueError, idx.shift, 1) - - def test_setops_preserve_freq(self): - for tz in [None, 'Asia/Tokyo', 'US/Eastern']: - rng = date_range('1/1/2000', '1/1/2002', name='idx', tz=tz) - - result = rng[:50].union(rng[50:100]) - self.assertEqual(result.name, rng.name) - self.assertEqual(result.freq, rng.freq) - self.assertEqual(result.tz, rng.tz) - - result = rng[:50].union(rng[30:100]) - self.assertEqual(result.name, rng.name) - self.assertEqual(result.freq, rng.freq) - self.assertEqual(result.tz, rng.tz) - - result = rng[:50].union(rng[60:100]) - self.assertEqual(result.name, rng.name) - self.assertIsNone(result.freq) - self.assertEqual(result.tz, rng.tz) - - result = rng[:50].intersection(rng[25:75]) - self.assertEqual(result.name, rng.name) - self.assertEqual(result.freqstr, 'D') - self.assertEqual(result.tz, rng.tz) - - nofreq = DatetimeIndex(list(rng[25:75]), name='other') - result = rng[:50].union(nofreq) - self.assertIsNone(result.name) - self.assertEqual(result.freq, rng.freq) - self.assertEqual(result.tz, rng.tz) - - result = rng[:50].intersection(nofreq) - self.assertIsNone(result.name) - self.assertEqual(result.freq, rng.freq) - self.assertEqual(result.tz, rng.tz) - - def test_min_max(self): - rng = date_range('1/1/2000', '12/31/2000') - rng2 = rng.take(np.random.permutation(len(rng))) - - the_min = rng2.min() - the_max = rng2.max() - tm.assertIsInstance(the_min, Timestamp) - tm.assertIsInstance(the_max, Timestamp) - self.assertEqual(the_min, rng[0]) - self.assertEqual(the_max, rng[-1]) - - self.assertEqual(rng.min(), rng[0]) - self.assertEqual(rng.max(), rng[-1]) - - def test_min_max_series(self): - rng = date_range('1/1/2000', periods=10, freq='4h') - lvls = ['A', 'A', 'A', 'B', 'B', 'B', 'C', 'C', 'C', 'C'] - df = DataFrame({'TS': rng, 'V': np.random.randn(len(rng)), 'L': lvls}) - - result = df.TS.max() - exp = Timestamp(df.TS.iat[-1]) - self.assertTrue(isinstance(result, Timestamp)) - self.assertEqual(result, exp) - - result = df.TS.min() - exp = Timestamp(df.TS.iat[0]) - self.assertTrue(isinstance(result, Timestamp)) - self.assertEqual(result, exp) - - def test_from_M8_structured(self): - dates = [(datetime(2012, 9, 9, 0, 0), datetime(2012, 9, 8, 15, 10))] - arr = np.array(dates, - dtype=[('Date', 'M8[us]'), ('Forecasting', 'M8[us]')]) - df = DataFrame(arr) - - self.assertEqual(df['Date'][0], dates[0][0]) - self.assertEqual(df['Forecasting'][0], dates[0][1]) - - s = Series(arr['Date']) - self.assertTrue(s[0], Timestamp) - self.assertEqual(s[0], dates[0][0]) - - s = Series.from_array(arr['Date'], Index([0])) - self.assertEqual(s[0], dates[0][0]) - - def test_get_level_values_box(self): - from pandas import MultiIndex - - dates = date_range('1/1/2000', periods=4) - levels = [dates, [0, 1]] - labels = [[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]] - - index = MultiIndex(levels=levels, labels=labels) - - self.assertTrue(isinstance(index.get_level_values(0)[0], Timestamp)) - - def test_frame_apply_dont_convert_datetime64(self): - from pandas.tseries.offsets import BDay - df = DataFrame({'x1': [datetime(1996, 1, 1)]}) - - df = df.applymap(lambda x: x + BDay()) - df = df.applymap(lambda x: x + BDay()) - - self.assertTrue(df.x1.dtype == 'M8[ns]') - - def test_date_range_fy5252(self): - dr = date_range(start="2013-01-01", periods=2, freq=offsets.FY5253( - startingMonth=1, weekday=3, variation="nearest")) - self.assertEqual(dr[0], Timestamp('2013-01-31')) - self.assertEqual(dr[1], Timestamp('2014-01-30')) - - def test_partial_slice_doesnt_require_monotonicity(self): - # For historical reasons. - s = pd.Series(np.arange(10), pd.date_range('2014-01-01', periods=10)) - - nonmonotonic = s[[3, 5, 4]] - expected = nonmonotonic.iloc[:0] - timestamp = pd.Timestamp('2014-01-10') - - assert_series_equal(nonmonotonic['2014-01-10':], expected) - self.assertRaisesRegexp(KeyError, - r"Timestamp\('2014-01-10 00:00:00'\)", - lambda: nonmonotonic[timestamp:]) - - assert_series_equal(nonmonotonic.loc['2014-01-10':], expected) - self.assertRaisesRegexp(KeyError, - r"Timestamp\('2014-01-10 00:00:00'\)", - lambda: nonmonotonic.loc[timestamp:]) - - -class TimeConversionFormats(tm.TestCase): - def test_to_datetime_format(self): - values = ['1/1/2000', '1/2/2000', '1/3/2000'] - - results1 = [Timestamp('20000101'), Timestamp('20000201'), - Timestamp('20000301')] - results2 = [Timestamp('20000101'), Timestamp('20000102'), - Timestamp('20000103')] - for vals, expecteds in [(values, (Index(results1), Index(results2))), - (Series(values), - (Series(results1), Series(results2))), - (values[0], (results1[0], results2[0])), - (values[1], (results1[1], results2[1])), - (values[2], (results1[2], results2[2]))]: - - for i, fmt in enumerate(['%d/%m/%Y', '%m/%d/%Y']): - result = to_datetime(vals, format=fmt) - expected = expecteds[i] - - if isinstance(expected, Series): - assert_series_equal(result, Series(expected)) - elif isinstance(expected, Timestamp): - self.assertEqual(result, expected) - else: - tm.assert_index_equal(result, expected) - - def test_to_datetime_format_YYYYMMDD(self): - s = Series([19801222, 19801222] + [19810105] * 5) - expected = Series([Timestamp(x) for x in s.apply(str)]) - - result = to_datetime(s, format='%Y%m%d') - assert_series_equal(result, expected) - - result = to_datetime(s.apply(str), format='%Y%m%d') - assert_series_equal(result, expected) - - # with NaT - expected = Series([Timestamp("19801222"), Timestamp("19801222")] + - [Timestamp("19810105")] * 5) - expected[2] = np.nan - s[2] = np.nan - - result = to_datetime(s, format='%Y%m%d') - assert_series_equal(result, expected) - - # string with NaT - s = s.apply(str) - s[2] = 'nat' - result = to_datetime(s, format='%Y%m%d') - assert_series_equal(result, expected) - - # coercion - # GH 7930 - s = Series([20121231, 20141231, 99991231]) - result = pd.to_datetime(s, format='%Y%m%d', errors='ignore') - expected = Series([datetime(2012, 12, 31), - datetime(2014, 12, 31), datetime(9999, 12, 31)], - dtype=object) - self.assert_series_equal(result, expected) - - result = pd.to_datetime(s, format='%Y%m%d', errors='coerce') - expected = Series(['20121231', '20141231', 'NaT'], dtype='M8[ns]') - assert_series_equal(result, expected) - - # GH 10178 - def test_to_datetime_format_integer(self): - s = Series([2000, 2001, 2002]) - expected = Series([Timestamp(x) for x in s.apply(str)]) - - result = to_datetime(s, format='%Y') - assert_series_equal(result, expected) - - s = Series([200001, 200105, 200206]) - expected = Series([Timestamp(x[:4] + '-' + x[4:]) for x in s.apply(str) - ]) - - result = to_datetime(s, format='%Y%m') - assert_series_equal(result, expected) - - def test_to_datetime_format_microsecond(self): - - # these are locale dependent - lang, _ = locale.getlocale() - month_abbr = calendar.month_abbr[4] - val = '01-{}-2011 00:00:01.978'.format(month_abbr) - - format = '%d-%b-%Y %H:%M:%S.%f' - result = to_datetime(val, format=format) - exp = datetime.strptime(val, format) - self.assertEqual(result, exp) - - def test_to_datetime_format_time(self): - data = [ - ['01/10/2010 15:20', '%m/%d/%Y %H:%M', - Timestamp('2010-01-10 15:20')], - ['01/10/2010 05:43', '%m/%d/%Y %I:%M', - Timestamp('2010-01-10 05:43')], - ['01/10/2010 13:56:01', '%m/%d/%Y %H:%M:%S', - Timestamp('2010-01-10 13:56:01')] # , - # ['01/10/2010 08:14 PM', '%m/%d/%Y %I:%M %p', - # Timestamp('2010-01-10 20:14')], - # ['01/10/2010 07:40 AM', '%m/%d/%Y %I:%M %p', - # Timestamp('2010-01-10 07:40')], - # ['01/10/2010 09:12:56 AM', '%m/%d/%Y %I:%M:%S %p', - # Timestamp('2010-01-10 09:12:56')] - ] - for s, format, dt in data: - self.assertEqual(to_datetime(s, format=format), dt) - - def test_to_datetime_with_non_exact(self): - # GH 10834 - _skip_if_has_locale() - - # 8904 - # exact kw - if sys.version_info < (2, 7): - raise nose.SkipTest('on python version < 2.7') - - s = Series(['19MAY11', 'foobar19MAY11', '19MAY11:00:00:00', - '19MAY11 00:00:00Z']) - result = to_datetime(s, format='%d%b%y', exact=False) - expected = to_datetime(s.str.extract(r'(\d+\w+\d+)', expand=False), - format='%d%b%y') - assert_series_equal(result, expected) - - def test_parse_nanoseconds_with_formula(self): - - # GH8989 - # trunctaing the nanoseconds when a format was provided - for v in ["2012-01-01 09:00:00.000000001", - "2012-01-01 09:00:00.000001", - "2012-01-01 09:00:00.001", - "2012-01-01 09:00:00.001000", - "2012-01-01 09:00:00.001000000", ]: - expected = pd.to_datetime(v) - result = pd.to_datetime(v, format="%Y-%m-%d %H:%M:%S.%f") - self.assertEqual(result, expected) - - def test_to_datetime_format_weeks(self): - data = [ - ['2009324', '%Y%W%w', Timestamp('2009-08-13')], - ['2013020', '%Y%U%w', Timestamp('2013-01-13')] - ] - for s, format, dt in data: - self.assertEqual(to_datetime(s, format=format), dt) - - -class TestToDatetimeInferFormat(tm.TestCase): - - def test_to_datetime_infer_datetime_format_consistent_format(self): - s = pd.Series(pd.date_range('20000101', periods=50, freq='H')) - - test_formats = ['%m-%d-%Y', '%m/%d/%Y %H:%M:%S.%f', - '%Y-%m-%dT%H:%M:%S.%f'] - - for test_format in test_formats: - s_as_dt_strings = s.apply(lambda x: x.strftime(test_format)) - - with_format = pd.to_datetime(s_as_dt_strings, format=test_format) - no_infer = pd.to_datetime(s_as_dt_strings, - infer_datetime_format=False) - yes_infer = pd.to_datetime(s_as_dt_strings, - infer_datetime_format=True) - - # Whether the format is explicitly passed, it is inferred, or - # it is not inferred, the results should all be the same - self.assert_series_equal(with_format, no_infer) - self.assert_series_equal(no_infer, yes_infer) - - def test_to_datetime_infer_datetime_format_inconsistent_format(self): - s = pd.Series(np.array(['01/01/2011 00:00:00', - '01-02-2011 00:00:00', - '2011-01-03T00:00:00'])) - - # When the format is inconsistent, infer_datetime_format should just - # fallback to the default parsing - tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False), - pd.to_datetime(s, infer_datetime_format=True)) - - s = pd.Series(np.array(['Jan/01/2011', 'Feb/01/2011', 'Mar/01/2011'])) - - tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False), - pd.to_datetime(s, infer_datetime_format=True)) - - def test_to_datetime_infer_datetime_format_series_with_nans(self): - s = pd.Series(np.array(['01/01/2011 00:00:00', np.nan, - '01/03/2011 00:00:00', np.nan])) - tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False), - pd.to_datetime(s, infer_datetime_format=True)) - - def test_to_datetime_infer_datetime_format_series_starting_with_nans(self): - s = pd.Series(np.array([np.nan, np.nan, '01/01/2011 00:00:00', - '01/02/2011 00:00:00', '01/03/2011 00:00:00'])) - - tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False), - pd.to_datetime(s, infer_datetime_format=True)) - - def test_to_datetime_iso8601_noleading_0s(self): - # GH 11871 - s = pd.Series(['2014-1-1', '2014-2-2', '2015-3-3']) - expected = pd.Series([pd.Timestamp('2014-01-01'), - pd.Timestamp('2014-02-02'), - pd.Timestamp('2015-03-03')]) - tm.assert_series_equal(pd.to_datetime(s), expected) - tm.assert_series_equal(pd.to_datetime(s, format='%Y-%m-%d'), expected) - - -class TestGuessDatetimeFormat(tm.TestCase): - - def test_guess_datetime_format_with_parseable_formats(self): - tm._skip_if_not_us_locale() - dt_string_to_format = (('20111230', '%Y%m%d'), - ('2011-12-30', '%Y-%m-%d'), - ('30-12-2011', '%d-%m-%Y'), - ('2011-12-30 00:00:00', '%Y-%m-%d %H:%M:%S'), - ('2011-12-30T00:00:00', '%Y-%m-%dT%H:%M:%S'), - ('2011-12-30 00:00:00.000000', - '%Y-%m-%d %H:%M:%S.%f'), ) - - for dt_string, dt_format in dt_string_to_format: - self.assertEqual( - tools._guess_datetime_format(dt_string), - dt_format - ) - - def test_guess_datetime_format_with_dayfirst(self): - ambiguous_string = '01/01/2011' - self.assertEqual( - tools._guess_datetime_format(ambiguous_string, dayfirst=True), - '%d/%m/%Y' - ) - self.assertEqual( - tools._guess_datetime_format(ambiguous_string, dayfirst=False), - '%m/%d/%Y' - ) - - def test_guess_datetime_format_with_locale_specific_formats(self): - # The month names will vary depending on the locale, in which - # case these wont be parsed properly (dateutil can't parse them) - _skip_if_has_locale() - - dt_string_to_format = (('30/Dec/2011', '%d/%b/%Y'), - ('30/December/2011', '%d/%B/%Y'), - ('30/Dec/2011 00:00:00', '%d/%b/%Y %H:%M:%S'), ) - - for dt_string, dt_format in dt_string_to_format: - self.assertEqual( - tools._guess_datetime_format(dt_string), - dt_format - ) - - def test_guess_datetime_format_invalid_inputs(self): - # A datetime string must include a year, month and a day for it - # to be guessable, in addition to being a string that looks like - # a datetime - invalid_dts = [ - '2013', - '01/2013', - '12:00:00', - '1/1/1/1', - 'this_is_not_a_datetime', - '51a', - 9, - datetime(2011, 1, 1), - ] - - for invalid_dt in invalid_dts: - self.assertTrue(tools._guess_datetime_format(invalid_dt) is None) - - def test_guess_datetime_format_nopadding(self): - # GH 11142 - dt_string_to_format = (('2011-1-1', '%Y-%m-%d'), - ('30-1-2011', '%d-%m-%Y'), - ('1/1/2011', '%m/%d/%Y'), - ('2011-1-1 00:00:00', '%Y-%m-%d %H:%M:%S'), - ('2011-1-1 0:0:0', '%Y-%m-%d %H:%M:%S'), - ('2011-1-3T00:00:0', '%Y-%m-%dT%H:%M:%S')) - - for dt_string, dt_format in dt_string_to_format: - self.assertEqual( - tools._guess_datetime_format(dt_string), - dt_format - ) - - def test_guess_datetime_format_for_array(self): - tm._skip_if_not_us_locale() - expected_format = '%Y-%m-%d %H:%M:%S.%f' - dt_string = datetime(2011, 12, 30, 0, 0, 0).strftime(expected_format) - - test_arrays = [ - np.array([dt_string, dt_string, dt_string], dtype='O'), - np.array([np.nan, np.nan, dt_string], dtype='O'), - np.array([dt_string, 'random_string'], dtype='O'), - ] - - for test_array in test_arrays: - self.assertEqual( - tools._guess_datetime_format_for_array(test_array), - expected_format - ) - - format_for_string_of_nans = tools._guess_datetime_format_for_array( - np.array( - [np.nan, np.nan, np.nan], dtype='O')) - self.assertTrue(format_for_string_of_nans is None) - - -class TestTimestampToJulianDate(tm.TestCase): - def test_compare_1700(self): - r = Timestamp('1700-06-23').to_julian_date() - self.assertEqual(r, 2342145.5) - - def test_compare_2000(self): - r = Timestamp('2000-04-12').to_julian_date() - self.assertEqual(r, 2451646.5) - - def test_compare_2100(self): - r = Timestamp('2100-08-12').to_julian_date() - self.assertEqual(r, 2488292.5) - - def test_compare_hour01(self): - r = Timestamp('2000-08-12T01:00:00').to_julian_date() - self.assertEqual(r, 2451768.5416666666666666) - - def test_compare_hour13(self): - r = Timestamp('2000-08-12T13:00:00').to_julian_date() - self.assertEqual(r, 2451769.0416666666666666) - - -class TestDateTimeIndexToJulianDate(tm.TestCase): - def test_1700(self): - r1 = Float64Index([2345897.5, 2345898.5, 2345899.5, 2345900.5, - 2345901.5]) - r2 = date_range(start=Timestamp('1710-10-01'), periods=5, - freq='D').to_julian_date() - self.assertIsInstance(r2, Float64Index) - tm.assert_index_equal(r1, r2) - - def test_2000(self): - r1 = Float64Index([2451601.5, 2451602.5, 2451603.5, 2451604.5, - 2451605.5]) - r2 = date_range(start=Timestamp('2000-02-27'), periods=5, - freq='D').to_julian_date() - self.assertIsInstance(r2, Float64Index) - tm.assert_index_equal(r1, r2) - - def test_hour(self): - r1 = Float64Index( - [2451601.5, 2451601.5416666666666666, 2451601.5833333333333333, - 2451601.625, 2451601.6666666666666666]) - r2 = date_range(start=Timestamp('2000-02-27'), periods=5, - freq='H').to_julian_date() - self.assertIsInstance(r2, Float64Index) - tm.assert_index_equal(r1, r2) - - def test_minute(self): - r1 = Float64Index( - [2451601.5, 2451601.5006944444444444, 2451601.5013888888888888, - 2451601.5020833333333333, 2451601.5027777777777777]) - r2 = date_range(start=Timestamp('2000-02-27'), periods=5, - freq='T').to_julian_date() - self.assertIsInstance(r2, Float64Index) - tm.assert_index_equal(r1, r2) - - def test_second(self): - r1 = Float64Index( - [2451601.5, 2451601.500011574074074, 2451601.5000231481481481, - 2451601.5000347222222222, 2451601.5000462962962962]) - r2 = date_range(start=Timestamp('2000-02-27'), periods=5, - freq='S').to_julian_date() - self.assertIsInstance(r2, Float64Index) - tm.assert_index_equal(r1, r2) - - -class TestDaysInMonth(tm.TestCase): - # tests for issue #10154 - def test_day_not_in_month_coerce(self): - self.assertTrue(isnull(to_datetime('2015-02-29', errors='coerce'))) - self.assertTrue(isnull(to_datetime('2015-02-29', format="%Y-%m-%d", - errors='coerce'))) - self.assertTrue(isnull(to_datetime('2015-02-32', format="%Y-%m-%d", - errors='coerce'))) - self.assertTrue(isnull(to_datetime('2015-04-31', format="%Y-%m-%d", - errors='coerce'))) - - def test_day_not_in_month_raise(self): - self.assertRaises(ValueError, to_datetime, '2015-02-29', - errors='raise') - self.assertRaises(ValueError, to_datetime, '2015-02-29', - errors='raise', format="%Y-%m-%d") - self.assertRaises(ValueError, to_datetime, '2015-02-32', - errors='raise', format="%Y-%m-%d") - self.assertRaises(ValueError, to_datetime, '2015-04-31', - errors='raise', format="%Y-%m-%d") - - def test_day_not_in_month_ignore(self): - self.assertEqual(to_datetime( - '2015-02-29', errors='ignore'), '2015-02-29') - self.assertEqual(to_datetime( - '2015-02-29', errors='ignore', format="%Y-%m-%d"), '2015-02-29') - self.assertEqual(to_datetime( - '2015-02-32', errors='ignore', format="%Y-%m-%d"), '2015-02-32') - self.assertEqual(to_datetime( - '2015-04-31', errors='ignore', format="%Y-%m-%d"), '2015-04-31') - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tseries/tests/test_tslib.py b/pandas/tseries/tests/test_tslib.py index 58ec1561b2535..cf5dbd671d38c 100644 --- a/pandas/tseries/tests/test_tslib.py +++ b/pandas/tseries/tests/test_tslib.py @@ -1,29 +1,18 @@ import nose -from distutils.version import LooseVersion -import numpy as np - -from pandas import tslib, lib -import pandas._period as period import datetime +import numpy as np +from distutils.version import LooseVersion import pandas as pd -from pandas.core.api import (Timestamp, Index, Series, Timedelta, Period, - to_datetime) -from pandas.tslib import get_timezone -from pandas._period import period_asfreq, period_ordinal -from pandas.tseries.index import date_range, DatetimeIndex -from pandas.tseries.frequencies import ( - get_freq, - RESO_US, RESO_MS, RESO_SEC, RESO_HR, RESO_DAY, RESO_MIN -) -import pandas.tseries.tools as tools -import pandas.tseries.offsets as offsets import pandas.util.testing as tm -import pandas.compat as compat -from pandas.compat.numpy import (np_datetime64_compat, - np_array_datetime64_compat) - -from pandas.util.testing import assert_series_equal, _skip_if_has_locale +from pandas import tslib, lib, compat +from pandas.tseries import offsets, tools +from pandas.tseries.frequencies import get_freq +from pandas.tseries.index import date_range, DatetimeIndex +from pandas.util.testing import _skip_if_has_locale +from pandas._period import period_ordinal, period_asfreq +from pandas.compat.numpy import np_array_datetime64_compat +from pandas.core.api import Timestamp, to_datetime, Index, Series class TestTsUtil(tm.TestCase): @@ -60,589 +49,6 @@ def test_to_datetime_bijective(self): Timestamp.min.value / 1000) -class TestTimestamp(tm.TestCase): - - def test_constructor(self): - base_str = '2014-07-01 09:00' - base_dt = datetime.datetime(2014, 7, 1, 9) - base_expected = 1404205200000000000 - - # confirm base representation is correct - import calendar - self.assertEqual(calendar.timegm(base_dt.timetuple()) * 1000000000, - base_expected) - - tests = [(base_str, base_dt, base_expected), - ('2014-07-01 10:00', datetime.datetime(2014, 7, 1, 10), - base_expected + 3600 * 1000000000), - ('2014-07-01 09:00:00.000008000', - datetime.datetime(2014, 7, 1, 9, 0, 0, 8), - base_expected + 8000), - ('2014-07-01 09:00:00.000000005', - Timestamp('2014-07-01 09:00:00.000000005'), - base_expected + 5)] - - tm._skip_if_no_pytz() - tm._skip_if_no_dateutil() - import pytz - import dateutil - timezones = [(None, 0), ('UTC', 0), (pytz.utc, 0), ('Asia/Tokyo', 9), - ('US/Eastern', -4), ('dateutil/US/Pacific', -7), - (pytz.FixedOffset(-180), -3), - (dateutil.tz.tzoffset(None, 18000), 5)] - - for date_str, date, expected in tests: - for result in [Timestamp(date_str), Timestamp(date)]: - # only with timestring - self.assertEqual(result.value, expected) - self.assertEqual(tslib.pydt_to_i8(result), expected) - - # re-creation shouldn't affect to internal value - result = Timestamp(result) - self.assertEqual(result.value, expected) - self.assertEqual(tslib.pydt_to_i8(result), expected) - - # with timezone - for tz, offset in timezones: - for result in [Timestamp(date_str, tz=tz), Timestamp(date, - tz=tz)]: - expected_tz = expected - offset * 3600 * 1000000000 - self.assertEqual(result.value, expected_tz) - self.assertEqual(tslib.pydt_to_i8(result), expected_tz) - - # should preserve tz - result = Timestamp(result) - self.assertEqual(result.value, expected_tz) - self.assertEqual(tslib.pydt_to_i8(result), expected_tz) - - # should convert to UTC - result = Timestamp(result, tz='UTC') - expected_utc = expected - offset * 3600 * 1000000000 - self.assertEqual(result.value, expected_utc) - self.assertEqual(tslib.pydt_to_i8(result), expected_utc) - - def test_constructor_with_stringoffset(self): - # GH 7833 - base_str = '2014-07-01 11:00:00+02:00' - base_dt = datetime.datetime(2014, 7, 1, 9) - base_expected = 1404205200000000000 - - # confirm base representation is correct - import calendar - self.assertEqual(calendar.timegm(base_dt.timetuple()) * 1000000000, - base_expected) - - tests = [(base_str, base_expected), - ('2014-07-01 12:00:00+02:00', - base_expected + 3600 * 1000000000), - ('2014-07-01 11:00:00.000008000+02:00', base_expected + 8000), - ('2014-07-01 11:00:00.000000005+02:00', base_expected + 5)] - - tm._skip_if_no_pytz() - tm._skip_if_no_dateutil() - import pytz - import dateutil - timezones = [(None, 0), ('UTC', 0), (pytz.utc, 0), ('Asia/Tokyo', 9), - ('US/Eastern', -4), ('dateutil/US/Pacific', -7), - (pytz.FixedOffset(-180), -3), - (dateutil.tz.tzoffset(None, 18000), 5)] - - for date_str, expected in tests: - for result in [Timestamp(date_str)]: - # only with timestring - self.assertEqual(result.value, expected) - self.assertEqual(tslib.pydt_to_i8(result), expected) - - # re-creation shouldn't affect to internal value - result = Timestamp(result) - self.assertEqual(result.value, expected) - self.assertEqual(tslib.pydt_to_i8(result), expected) - - # with timezone - for tz, offset in timezones: - result = Timestamp(date_str, tz=tz) - expected_tz = expected - self.assertEqual(result.value, expected_tz) - self.assertEqual(tslib.pydt_to_i8(result), expected_tz) - - # should preserve tz - result = Timestamp(result) - self.assertEqual(result.value, expected_tz) - self.assertEqual(tslib.pydt_to_i8(result), expected_tz) - - # should convert to UTC - result = Timestamp(result, tz='UTC') - expected_utc = expected - self.assertEqual(result.value, expected_utc) - self.assertEqual(tslib.pydt_to_i8(result), expected_utc) - - # This should be 2013-11-01 05:00 in UTC - # converted to Chicago tz - result = Timestamp('2013-11-01 00:00:00-0500', tz='America/Chicago') - self.assertEqual(result.value, Timestamp('2013-11-01 05:00').value) - expected = "Timestamp('2013-11-01 00:00:00-0500', tz='America/Chicago')" # noqa - self.assertEqual(repr(result), expected) - self.assertEqual(result, eval(repr(result))) - - # This should be 2013-11-01 05:00 in UTC - # converted to Tokyo tz (+09:00) - result = Timestamp('2013-11-01 00:00:00-0500', tz='Asia/Tokyo') - self.assertEqual(result.value, Timestamp('2013-11-01 05:00').value) - expected = "Timestamp('2013-11-01 14:00:00+0900', tz='Asia/Tokyo')" - self.assertEqual(repr(result), expected) - self.assertEqual(result, eval(repr(result))) - - # GH11708 - # This should be 2015-11-18 10:00 in UTC - # converted to Asia/Katmandu - result = Timestamp("2015-11-18 15:45:00+05:45", tz="Asia/Katmandu") - self.assertEqual(result.value, Timestamp("2015-11-18 10:00").value) - expected = "Timestamp('2015-11-18 15:45:00+0545', tz='Asia/Katmandu')" - self.assertEqual(repr(result), expected) - self.assertEqual(result, eval(repr(result))) - - # This should be 2015-11-18 10:00 in UTC - # converted to Asia/Kolkata - result = Timestamp("2015-11-18 15:30:00+05:30", tz="Asia/Kolkata") - self.assertEqual(result.value, Timestamp("2015-11-18 10:00").value) - expected = "Timestamp('2015-11-18 15:30:00+0530', tz='Asia/Kolkata')" - self.assertEqual(repr(result), expected) - self.assertEqual(result, eval(repr(result))) - - def test_constructor_invalid(self): - with tm.assertRaisesRegexp(TypeError, 'Cannot convert input'): - Timestamp(slice(2)) - with tm.assertRaisesRegexp(ValueError, 'Cannot convert Period'): - Timestamp(Period('1000-01-01')) - - def test_constructor_positional(self): - # GH 10758 - with tm.assertRaises(TypeError): - Timestamp(2000, 1) - with tm.assertRaises(ValueError): - Timestamp(2000, 0, 1) - with tm.assertRaises(ValueError): - Timestamp(2000, 13, 1) - with tm.assertRaises(ValueError): - Timestamp(2000, 1, 0) - with tm.assertRaises(ValueError): - Timestamp(2000, 1, 32) - - # GH 11630 - self.assertEqual( - repr(Timestamp(2015, 11, 12)), - repr(Timestamp('20151112'))) - - self.assertEqual( - repr(Timestamp(2015, 11, 12, 1, 2, 3, 999999)), - repr(Timestamp('2015-11-12 01:02:03.999999'))) - - self.assertIs(Timestamp(None), pd.NaT) - - def test_constructor_keyword(self): - # GH 10758 - with tm.assertRaises(TypeError): - Timestamp(year=2000, month=1) - with tm.assertRaises(ValueError): - Timestamp(year=2000, month=0, day=1) - with tm.assertRaises(ValueError): - Timestamp(year=2000, month=13, day=1) - with tm.assertRaises(ValueError): - Timestamp(year=2000, month=1, day=0) - with tm.assertRaises(ValueError): - Timestamp(year=2000, month=1, day=32) - - self.assertEqual( - repr(Timestamp(year=2015, month=11, day=12)), - repr(Timestamp('20151112'))) - - self.assertEqual( - repr(Timestamp(year=2015, month=11, day=12, - hour=1, minute=2, second=3, microsecond=999999)), - repr(Timestamp('2015-11-12 01:02:03.999999'))) - - def test_constructor_fromordinal(self): - base = datetime.datetime(2000, 1, 1) - - ts = Timestamp.fromordinal(base.toordinal(), freq='D') - self.assertEqual(base, ts) - self.assertEqual(ts.freq, 'D') - self.assertEqual(base.toordinal(), ts.toordinal()) - - ts = Timestamp.fromordinal(base.toordinal(), tz='US/Eastern') - self.assertEqual(pd.Timestamp('2000-01-01', tz='US/Eastern'), ts) - self.assertEqual(base.toordinal(), ts.toordinal()) - - def test_constructor_offset_depr(self): - # GH 12160 - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - ts = Timestamp('2011-01-01', offset='D') - self.assertEqual(ts.freq, 'D') - - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - self.assertEqual(ts.offset, 'D') - - msg = "Can only specify freq or offset, not both" - with tm.assertRaisesRegexp(TypeError, msg): - Timestamp('2011-01-01', offset='D', freq='D') - - def test_constructor_offset_depr_fromordinal(self): - # GH 12160 - base = datetime.datetime(2000, 1, 1) - - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - ts = Timestamp.fromordinal(base.toordinal(), offset='D') - self.assertEqual(pd.Timestamp('2000-01-01'), ts) - self.assertEqual(ts.freq, 'D') - self.assertEqual(base.toordinal(), ts.toordinal()) - - msg = "Can only specify freq or offset, not both" - with tm.assertRaisesRegexp(TypeError, msg): - Timestamp.fromordinal(base.toordinal(), offset='D', freq='D') - - def test_conversion(self): - # GH 9255 - ts = Timestamp('2000-01-01') - - result = ts.to_pydatetime() - expected = datetime.datetime(2000, 1, 1) - self.assertEqual(result, expected) - self.assertEqual(type(result), type(expected)) - - result = ts.to_datetime64() - expected = np.datetime64(ts.value, 'ns') - self.assertEqual(result, expected) - self.assertEqual(type(result), type(expected)) - self.assertEqual(result.dtype, expected.dtype) - - def test_repr(self): - tm._skip_if_no_pytz() - tm._skip_if_no_dateutil() - - dates = ['2014-03-07', '2014-01-01 09:00', - '2014-01-01 00:00:00.000000001'] - - # dateutil zone change (only matters for repr) - import dateutil - if (dateutil.__version__ >= LooseVersion('2.3') and - (dateutil.__version__ <= LooseVersion('2.4.0') or - dateutil.__version__ >= LooseVersion('2.6.0'))): - timezones = ['UTC', 'Asia/Tokyo', 'US/Eastern', - 'dateutil/US/Pacific'] - else: - timezones = ['UTC', 'Asia/Tokyo', 'US/Eastern', - 'dateutil/America/Los_Angeles'] - - freqs = ['D', 'M', 'S', 'N'] - - for date in dates: - for tz in timezones: - for freq in freqs: - - # avoid to match with timezone name - freq_repr = "'{0}'".format(freq) - if tz.startswith('dateutil'): - tz_repr = tz.replace('dateutil', '') - else: - tz_repr = tz - - date_only = Timestamp(date) - self.assertIn(date, repr(date_only)) - self.assertNotIn(tz_repr, repr(date_only)) - self.assertNotIn(freq_repr, repr(date_only)) - self.assertEqual(date_only, eval(repr(date_only))) - - date_tz = Timestamp(date, tz=tz) - self.assertIn(date, repr(date_tz)) - self.assertIn(tz_repr, repr(date_tz)) - self.assertNotIn(freq_repr, repr(date_tz)) - self.assertEqual(date_tz, eval(repr(date_tz))) - - date_freq = Timestamp(date, freq=freq) - self.assertIn(date, repr(date_freq)) - self.assertNotIn(tz_repr, repr(date_freq)) - self.assertIn(freq_repr, repr(date_freq)) - self.assertEqual(date_freq, eval(repr(date_freq))) - - date_tz_freq = Timestamp(date, tz=tz, freq=freq) - self.assertIn(date, repr(date_tz_freq)) - self.assertIn(tz_repr, repr(date_tz_freq)) - self.assertIn(freq_repr, repr(date_tz_freq)) - self.assertEqual(date_tz_freq, eval(repr(date_tz_freq))) - - # this can cause the tz field to be populated, but it's redundant to - # information in the datestring - tm._skip_if_no_pytz() - import pytz # noqa - date_with_utc_offset = Timestamp('2014-03-13 00:00:00-0400', tz=None) - self.assertIn('2014-03-13 00:00:00-0400', repr(date_with_utc_offset)) - self.assertNotIn('tzoffset', repr(date_with_utc_offset)) - self.assertIn('pytz.FixedOffset(-240)', repr(date_with_utc_offset)) - expr = repr(date_with_utc_offset).replace("'pytz.FixedOffset(-240)'", - 'pytz.FixedOffset(-240)') - self.assertEqual(date_with_utc_offset, eval(expr)) - - def test_bounds_with_different_units(self): - out_of_bounds_dates = ('1677-09-21', '2262-04-12', ) - - time_units = ('D', 'h', 'm', 's', 'ms', 'us') - - for date_string in out_of_bounds_dates: - for unit in time_units: - self.assertRaises(ValueError, Timestamp, np.datetime64( - date_string, dtype='M8[%s]' % unit)) - - in_bounds_dates = ('1677-09-23', '2262-04-11', ) - - for date_string in in_bounds_dates: - for unit in time_units: - Timestamp(np.datetime64(date_string, dtype='M8[%s]' % unit)) - - def test_tz(self): - t = '2014-02-01 09:00' - ts = Timestamp(t) - local = ts.tz_localize('Asia/Tokyo') - self.assertEqual(local.hour, 9) - self.assertEqual(local, Timestamp(t, tz='Asia/Tokyo')) - conv = local.tz_convert('US/Eastern') - self.assertEqual(conv, Timestamp('2014-01-31 19:00', tz='US/Eastern')) - self.assertEqual(conv.hour, 19) - - # preserves nanosecond - ts = Timestamp(t) + offsets.Nano(5) - local = ts.tz_localize('Asia/Tokyo') - self.assertEqual(local.hour, 9) - self.assertEqual(local.nanosecond, 5) - conv = local.tz_convert('US/Eastern') - self.assertEqual(conv.nanosecond, 5) - self.assertEqual(conv.hour, 19) - - def test_tz_localize_ambiguous(self): - - ts = Timestamp('2014-11-02 01:00') - ts_dst = ts.tz_localize('US/Eastern', ambiguous=True) - ts_no_dst = ts.tz_localize('US/Eastern', ambiguous=False) - - rng = date_range('2014-11-02', periods=3, freq='H', tz='US/Eastern') - self.assertEqual(rng[1], ts_dst) - self.assertEqual(rng[2], ts_no_dst) - self.assertRaises(ValueError, ts.tz_localize, 'US/Eastern', - ambiguous='infer') - - # GH 8025 - with tm.assertRaisesRegexp(TypeError, - 'Cannot localize tz-aware Timestamp, use ' - 'tz_convert for conversions'): - Timestamp('2011-01-01', tz='US/Eastern').tz_localize('Asia/Tokyo') - - with tm.assertRaisesRegexp(TypeError, - 'Cannot convert tz-naive Timestamp, use ' - 'tz_localize to localize'): - Timestamp('2011-01-01').tz_convert('Asia/Tokyo') - - def test_tz_localize_nonexistent(self): - # See issue 13057 - from pytz.exceptions import NonExistentTimeError - times = ['2015-03-08 02:00', '2015-03-08 02:30', - '2015-03-29 02:00', '2015-03-29 02:30'] - timezones = ['US/Eastern', 'US/Pacific', - 'Europe/Paris', 'Europe/Belgrade'] - for t, tz in zip(times, timezones): - ts = Timestamp(t) - self.assertRaises(NonExistentTimeError, ts.tz_localize, - tz) - self.assertRaises(NonExistentTimeError, ts.tz_localize, - tz, errors='raise') - self.assertIs(ts.tz_localize(tz, errors='coerce'), - pd.NaT) - - def test_tz_localize_errors_ambiguous(self): - # See issue 13057 - from pytz.exceptions import AmbiguousTimeError - ts = pd.Timestamp('2015-11-1 01:00') - self.assertRaises(AmbiguousTimeError, - ts.tz_localize, 'US/Pacific', errors='coerce') - - def test_tz_localize_roundtrip(self): - for tz in ['UTC', 'Asia/Tokyo', 'US/Eastern', 'dateutil/US/Pacific']: - for t in ['2014-02-01 09:00', '2014-07-08 09:00', - '2014-11-01 17:00', '2014-11-05 00:00']: - ts = Timestamp(t) - localized = ts.tz_localize(tz) - self.assertEqual(localized, Timestamp(t, tz=tz)) - - with tm.assertRaises(TypeError): - localized.tz_localize(tz) - - reset = localized.tz_localize(None) - self.assertEqual(reset, ts) - self.assertTrue(reset.tzinfo is None) - - def test_tz_convert_roundtrip(self): - for tz in ['UTC', 'Asia/Tokyo', 'US/Eastern', 'dateutil/US/Pacific']: - for t in ['2014-02-01 09:00', '2014-07-08 09:00', - '2014-11-01 17:00', '2014-11-05 00:00']: - ts = Timestamp(t, tz='UTC') - converted = ts.tz_convert(tz) - - reset = converted.tz_convert(None) - self.assertEqual(reset, Timestamp(t)) - self.assertTrue(reset.tzinfo is None) - self.assertEqual(reset, - converted.tz_convert('UTC').tz_localize(None)) - - def test_barely_oob_dts(self): - one_us = np.timedelta64(1).astype('timedelta64[us]') - - # By definition we can't go out of bounds in [ns], so we - # convert the datetime64s to [us] so we can go out of bounds - min_ts_us = np.datetime64(Timestamp.min).astype('M8[us]') - max_ts_us = np.datetime64(Timestamp.max).astype('M8[us]') - - # No error for the min/max datetimes - Timestamp(min_ts_us) - Timestamp(max_ts_us) - - # One us less than the minimum is an error - self.assertRaises(ValueError, Timestamp, min_ts_us - one_us) - - # One us more than the maximum is an error - self.assertRaises(ValueError, Timestamp, max_ts_us + one_us) - - def test_utc_z_designator(self): - self.assertEqual(get_timezone( - Timestamp('2014-11-02 01:00Z').tzinfo), 'UTC') - - def test_now(self): - # #9000 - ts_from_string = Timestamp('now') - ts_from_method = Timestamp.now() - ts_datetime = datetime.datetime.now() - - ts_from_string_tz = Timestamp('now', tz='US/Eastern') - ts_from_method_tz = Timestamp.now(tz='US/Eastern') - - # Check that the delta between the times is less than 1s (arbitrarily - # small) - delta = Timedelta(seconds=1) - self.assertTrue(abs(ts_from_method - ts_from_string) < delta) - self.assertTrue(abs(ts_datetime - ts_from_method) < delta) - self.assertTrue(abs(ts_from_method_tz - ts_from_string_tz) < delta) - self.assertTrue(abs(ts_from_string_tz.tz_localize(None) - - ts_from_method_tz.tz_localize(None)) < delta) - - def test_today(self): - - ts_from_string = Timestamp('today') - ts_from_method = Timestamp.today() - ts_datetime = datetime.datetime.today() - - ts_from_string_tz = Timestamp('today', tz='US/Eastern') - ts_from_method_tz = Timestamp.today(tz='US/Eastern') - - # Check that the delta between the times is less than 1s (arbitrarily - # small) - delta = Timedelta(seconds=1) - self.assertTrue(abs(ts_from_method - ts_from_string) < delta) - self.assertTrue(abs(ts_datetime - ts_from_method) < delta) - self.assertTrue(abs(ts_from_method_tz - ts_from_string_tz) < delta) - self.assertTrue(abs(ts_from_string_tz.tz_localize(None) - - ts_from_method_tz.tz_localize(None)) < delta) - - def test_asm8(self): - np.random.seed(7960929) - ns = [Timestamp.min.value, Timestamp.max.value, 1000, ] - for n in ns: - self.assertEqual(Timestamp(n).asm8.view('i8'), - np.datetime64(n, 'ns').view('i8'), n) - self.assertEqual(Timestamp('nat').asm8.view('i8'), - np.datetime64('nat', 'ns').view('i8')) - - def test_fields(self): - def check(value, equal): - # that we are int/long like - self.assertTrue(isinstance(value, (int, compat.long))) - self.assertEqual(value, equal) - - # GH 10050 - ts = Timestamp('2015-05-10 09:06:03.000100001') - check(ts.year, 2015) - check(ts.month, 5) - check(ts.day, 10) - check(ts.hour, 9) - check(ts.minute, 6) - check(ts.second, 3) - self.assertRaises(AttributeError, lambda: ts.millisecond) - check(ts.microsecond, 100) - check(ts.nanosecond, 1) - check(ts.dayofweek, 6) - check(ts.quarter, 2) - check(ts.dayofyear, 130) - check(ts.week, 19) - check(ts.daysinmonth, 31) - check(ts.daysinmonth, 31) - - def test_nat_fields(self): - # GH 10050 - ts = Timestamp('NaT') - self.assertTrue(np.isnan(ts.year)) - self.assertTrue(np.isnan(ts.month)) - self.assertTrue(np.isnan(ts.day)) - self.assertTrue(np.isnan(ts.hour)) - self.assertTrue(np.isnan(ts.minute)) - self.assertTrue(np.isnan(ts.second)) - self.assertTrue(np.isnan(ts.microsecond)) - self.assertTrue(np.isnan(ts.nanosecond)) - self.assertTrue(np.isnan(ts.dayofweek)) - self.assertTrue(np.isnan(ts.quarter)) - self.assertTrue(np.isnan(ts.dayofyear)) - self.assertTrue(np.isnan(ts.week)) - self.assertTrue(np.isnan(ts.daysinmonth)) - self.assertTrue(np.isnan(ts.days_in_month)) - - def test_pprint(self): - # GH12622 - import pprint - nested_obj = {'foo': 1, - 'bar': [{'w': {'a': Timestamp('2011-01-01')}}] * 10} - result = pprint.pformat(nested_obj, width=50) - expected = r"""{'bar': [{'w': {'a': Timestamp('2011-01-01 00:00:00')}}, - {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, - {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, - {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, - {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, - {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, - {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, - {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, - {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, - {'w': {'a': Timestamp('2011-01-01 00:00:00')}}], - 'foo': 1}""" - self.assertEqual(result, expected) - - def to_datetime_depr(self): - # see gh-8254 - ts = Timestamp('2011-01-01') - - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - expected = datetime.datetime(2011, 1, 1) - result = ts.to_datetime() - self.assertEqual(result, expected) - - def to_pydatetime_nonzero_nano(self): - ts = Timestamp('2011-01-01 9:00:00.123456789') - - # Warn the user of data loss (nanoseconds). - with tm.assert_produces_warning(UserWarning, - check_stacklevel=False): - expected = datetime.datetime(2011, 1, 1, 9, 0, 0, 123456) - result = ts.to_pydatetime() - self.assertEqual(result, expected) - - class TestDatetimeParsingWrappers(tm.TestCase): def test_does_not_convert_mixed_integer(self): bad_date_strings = ('-50000', '999', '123.1234', 'm', 'T') @@ -1117,181 +523,6 @@ def test_parsing_timezone_offsets(self): ) -class TestTimestampNsOperations(tm.TestCase): - def setUp(self): - self.timestamp = Timestamp(datetime.datetime.utcnow()) - - def assert_ns_timedelta(self, modified_timestamp, expected_value): - value = self.timestamp.value - modified_value = modified_timestamp.value - - self.assertEqual(modified_value - value, expected_value) - - def test_timedelta_ns_arithmetic(self): - self.assert_ns_timedelta(self.timestamp + np.timedelta64(-123, 'ns'), - -123) - - def test_timedelta_ns_based_arithmetic(self): - self.assert_ns_timedelta(self.timestamp + np.timedelta64( - 1234567898, 'ns'), 1234567898) - - def test_timedelta_us_arithmetic(self): - self.assert_ns_timedelta(self.timestamp + np.timedelta64(-123, 'us'), - -123000) - - def test_timedelta_ms_arithmetic(self): - time = self.timestamp + np.timedelta64(-123, 'ms') - self.assert_ns_timedelta(time, -123000000) - - def test_nanosecond_string_parsing(self): - ts = Timestamp('2013-05-01 07:15:45.123456789') - # GH 7878 - expected_repr = '2013-05-01 07:15:45.123456789' - expected_value = 1367392545123456789 - self.assertEqual(ts.value, expected_value) - self.assertIn(expected_repr, repr(ts)) - - ts = Timestamp('2013-05-01 07:15:45.123456789+09:00', tz='Asia/Tokyo') - self.assertEqual(ts.value, expected_value - 9 * 3600 * 1000000000) - self.assertIn(expected_repr, repr(ts)) - - ts = Timestamp('2013-05-01 07:15:45.123456789', tz='UTC') - self.assertEqual(ts.value, expected_value) - self.assertIn(expected_repr, repr(ts)) - - ts = Timestamp('2013-05-01 07:15:45.123456789', tz='US/Eastern') - self.assertEqual(ts.value, expected_value + 4 * 3600 * 1000000000) - self.assertIn(expected_repr, repr(ts)) - - # GH 10041 - ts = Timestamp('20130501T071545.123456789') - self.assertEqual(ts.value, expected_value) - self.assertIn(expected_repr, repr(ts)) - - def test_nanosecond_timestamp(self): - # GH 7610 - expected = 1293840000000000005 - t = Timestamp('2011-01-01') + offsets.Nano(5) - self.assertEqual(repr(t), "Timestamp('2011-01-01 00:00:00.000000005')") - self.assertEqual(t.value, expected) - self.assertEqual(t.nanosecond, 5) - - t = Timestamp(t) - self.assertEqual(repr(t), "Timestamp('2011-01-01 00:00:00.000000005')") - self.assertEqual(t.value, expected) - self.assertEqual(t.nanosecond, 5) - - t = Timestamp(np_datetime64_compat('2011-01-01 00:00:00.000000005Z')) - self.assertEqual(repr(t), "Timestamp('2011-01-01 00:00:00.000000005')") - self.assertEqual(t.value, expected) - self.assertEqual(t.nanosecond, 5) - - expected = 1293840000000000010 - t = t + offsets.Nano(5) - self.assertEqual(repr(t), "Timestamp('2011-01-01 00:00:00.000000010')") - self.assertEqual(t.value, expected) - self.assertEqual(t.nanosecond, 10) - - t = Timestamp(t) - self.assertEqual(repr(t), "Timestamp('2011-01-01 00:00:00.000000010')") - self.assertEqual(t.value, expected) - self.assertEqual(t.nanosecond, 10) - - t = Timestamp(np_datetime64_compat('2011-01-01 00:00:00.000000010Z')) - self.assertEqual(repr(t), "Timestamp('2011-01-01 00:00:00.000000010')") - self.assertEqual(t.value, expected) - self.assertEqual(t.nanosecond, 10) - - def test_nat_arithmetic(self): - # GH 6873 - i = 2 - f = 1.5 - - for (left, right) in [(pd.NaT, i), (pd.NaT, f), (pd.NaT, np.nan)]: - self.assertIs(left / right, pd.NaT) - self.assertIs(left * right, pd.NaT) - self.assertIs(right * left, pd.NaT) - with tm.assertRaises(TypeError): - right / left - - # Timestamp / datetime - t = Timestamp('2014-01-01') - dt = datetime.datetime(2014, 1, 1) - for (left, right) in [(pd.NaT, pd.NaT), (pd.NaT, t), (pd.NaT, dt)]: - # NaT __add__ or __sub__ Timestamp-like (or inverse) returns NaT - self.assertIs(right + left, pd.NaT) - self.assertIs(left + right, pd.NaT) - self.assertIs(left - right, pd.NaT) - self.assertIs(right - left, pd.NaT) - - # timedelta-like - # offsets are tested in test_offsets.py - - delta = datetime.timedelta(3600) - td = Timedelta('5s') - - for (left, right) in [(pd.NaT, delta), (pd.NaT, td)]: - # NaT + timedelta-like returns NaT - self.assertIs(right + left, pd.NaT) - self.assertIs(left + right, pd.NaT) - self.assertIs(right - left, pd.NaT) - self.assertIs(left - right, pd.NaT) - - # GH 11718 - tm._skip_if_no_pytz() - import pytz - - t_utc = Timestamp('2014-01-01', tz='UTC') - t_tz = Timestamp('2014-01-01', tz='US/Eastern') - dt_tz = pytz.timezone('Asia/Tokyo').localize(dt) - - for (left, right) in [(pd.NaT, t_utc), (pd.NaT, t_tz), - (pd.NaT, dt_tz)]: - # NaT __add__ or __sub__ Timestamp-like (or inverse) returns NaT - self.assertIs(right + left, pd.NaT) - self.assertIs(left + right, pd.NaT) - self.assertIs(left - right, pd.NaT) - self.assertIs(right - left, pd.NaT) - - # int addition / subtraction - for (left, right) in [(pd.NaT, 2), (pd.NaT, 0), (pd.NaT, -3)]: - self.assertIs(right + left, pd.NaT) - self.assertIs(left + right, pd.NaT) - self.assertIs(left - right, pd.NaT) - self.assertIs(right - left, pd.NaT) - - def test_nat_arithmetic_index(self): - # GH 11718 - - # datetime - tm._skip_if_no_pytz() - - dti = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], name='x') - exp = pd.DatetimeIndex([pd.NaT, pd.NaT], name='x') - self.assert_index_equal(dti + pd.NaT, exp) - self.assert_index_equal(pd.NaT + dti, exp) - - dti_tz = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], - tz='US/Eastern', name='x') - exp = pd.DatetimeIndex([pd.NaT, pd.NaT], name='x', tz='US/Eastern') - self.assert_index_equal(dti_tz + pd.NaT, exp) - self.assert_index_equal(pd.NaT + dti_tz, exp) - - exp = pd.TimedeltaIndex([pd.NaT, pd.NaT], name='x') - for (left, right) in [(pd.NaT, dti), (pd.NaT, dti_tz)]: - self.assert_index_equal(left - right, exp) - self.assert_index_equal(right - left, exp) - - # timedelta - tdi = pd.TimedeltaIndex(['1 day', '2 day'], name='x') - exp = pd.DatetimeIndex([pd.NaT, pd.NaT], name='x') - for (left, right) in [(pd.NaT, tdi)]: - self.assert_index_equal(left + right, exp) - self.assert_index_equal(right + left, exp) - self.assert_index_equal(left - right, exp) - self.assert_index_equal(right - left, exp) - - class TestTslib(tm.TestCase): def test_intraday_conversion_factors(self): self.assertEqual(period_asfreq( @@ -1461,86 +692,6 @@ def _check_round(freq, expected): stamp.round('foo') -class TestTimestampOps(tm.TestCase): - def test_timestamp_and_datetime(self): - self.assertEqual((Timestamp(datetime.datetime( - 2013, 10, 13)) - datetime.datetime(2013, 10, 12)).days, 1) - self.assertEqual((datetime.datetime(2013, 10, 12) - - Timestamp(datetime.datetime(2013, 10, 13))).days, -1) - - def test_timestamp_and_series(self): - timestamp_series = Series(date_range('2014-03-17', periods=2, freq='D', - tz='US/Eastern')) - first_timestamp = timestamp_series[0] - - delta_series = Series([np.timedelta64(0, 'D'), np.timedelta64(1, 'D')]) - assert_series_equal(timestamp_series - first_timestamp, delta_series) - assert_series_equal(first_timestamp - timestamp_series, -delta_series) - - def test_addition_subtraction_types(self): - # Assert on the types resulting from Timestamp +/- various date/time - # objects - datetime_instance = datetime.datetime(2014, 3, 4) - timedelta_instance = datetime.timedelta(seconds=1) - # build a timestamp with a frequency, since then it supports - # addition/subtraction of integers - timestamp_instance = date_range(datetime_instance, periods=1, - freq='D')[0] - - self.assertEqual(type(timestamp_instance + 1), Timestamp) - self.assertEqual(type(timestamp_instance - 1), Timestamp) - - # Timestamp + datetime not supported, though subtraction is supported - # and yields timedelta more tests in tseries/base/tests/test_base.py - self.assertEqual( - type(timestamp_instance - datetime_instance), Timedelta) - self.assertEqual( - type(timestamp_instance + timedelta_instance), Timestamp) - self.assertEqual( - type(timestamp_instance - timedelta_instance), Timestamp) - - # Timestamp +/- datetime64 not supported, so not tested (could possibly - # assert error raised?) - timedelta64_instance = np.timedelta64(1, 'D') - self.assertEqual( - type(timestamp_instance + timedelta64_instance), Timestamp) - self.assertEqual( - type(timestamp_instance - timedelta64_instance), Timestamp) - - def test_addition_subtraction_preserve_frequency(self): - timestamp_instance = date_range('2014-03-05', periods=1, freq='D')[0] - timedelta_instance = datetime.timedelta(days=1) - original_freq = timestamp_instance.freq - self.assertEqual((timestamp_instance + 1).freq, original_freq) - self.assertEqual((timestamp_instance - 1).freq, original_freq) - self.assertEqual( - (timestamp_instance + timedelta_instance).freq, original_freq) - self.assertEqual( - (timestamp_instance - timedelta_instance).freq, original_freq) - - timedelta64_instance = np.timedelta64(1, 'D') - self.assertEqual( - (timestamp_instance + timedelta64_instance).freq, original_freq) - self.assertEqual( - (timestamp_instance - timedelta64_instance).freq, original_freq) - - def test_resolution(self): - - for freq, expected in zip(['A', 'Q', 'M', 'D', 'H', 'T', - 'S', 'L', 'U'], - [RESO_DAY, RESO_DAY, - RESO_DAY, RESO_DAY, - RESO_HR, RESO_MIN, - RESO_SEC, RESO_MS, - RESO_US]): - for tz in [None, 'Asia/Tokyo', 'US/Eastern', - 'dateutil/US/Eastern']: - idx = date_range(start='2013-04-01', periods=30, freq=freq, - tz=tz) - result = period.resolution(idx.asi8, idx.tz) - self.assertEqual(result, expected) - - if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) From a7f7127eb5dc0db51475a5eeb68d45c7590b74b6 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 4 Feb 2017 11:31:12 -0500 Subject: [PATCH 012/353] TST: making test files a bit more balanced TST: move parts of test_datetimelike.py to indexes/datetimes --- pandas/sparse/tests/test_frame.py | 44 ++ pandas/tests/frame/test_missing.py | 34 ++ pandas/tests/frame/test_timeseries.py | 58 ++- pandas/tests/indexes/datetimelike.py | 40 ++ pandas/tests/indexes/datetimes/test_astype.py | 62 +++ .../indexes/datetimes/test_construction.py | 17 + .../tests/indexes/datetimes/test_datetime.py | 8 + .../indexes/datetimes/test_datetimelike.py | 76 +++ pandas/tests/indexes/datetimes/test_misc.py | 62 ++- pandas/tests/indexes/test_datetimelike.py | 111 +--- pandas/tests/scalar/test_timestamp.py | 59 +++ pandas/tests/series/test_indexing.py | 129 +++++ pandas/tests/series/test_missing.py | 53 +- pandas/tests/series/test_timeseries.py | 489 ------------------ 14 files changed, 638 insertions(+), 604 deletions(-) create mode 100644 pandas/tests/indexes/datetimelike.py create mode 100644 pandas/tests/indexes/datetimes/test_datetimelike.py diff --git a/pandas/sparse/tests/test_frame.py b/pandas/sparse/tests/test_frame.py index b9e8a31393931..23bb827974c61 100644 --- a/pandas/sparse/tests/test_frame.py +++ b/pandas/sparse/tests/test_frame.py @@ -705,6 +705,50 @@ def test_fillna_fill_value(self): tm.assert_frame_equal(sparse.fillna(-1).to_dense(), df.fillna(-1), check_dtype=False) + def test_sparse_frame_pad_backfill_limit(self): + index = np.arange(10) + df = DataFrame(np.random.randn(10, 4), index=index) + sdf = df.to_sparse() + + result = sdf[:2].reindex(index, method='pad', limit=5) + + expected = sdf[:2].reindex(index).fillna(method='pad') + expected = expected.to_dense() + expected.values[-3:] = np.nan + expected = expected.to_sparse() + tm.assert_frame_equal(result, expected) + + result = sdf[-2:].reindex(index, method='backfill', limit=5) + + expected = sdf[-2:].reindex(index).fillna(method='backfill') + expected = expected.to_dense() + expected.values[:3] = np.nan + expected = expected.to_sparse() + tm.assert_frame_equal(result, expected) + + def test_sparse_frame_fillna_limit(self): + index = np.arange(10) + df = DataFrame(np.random.randn(10, 4), index=index) + sdf = df.to_sparse() + + result = sdf[:2].reindex(index) + result = result.fillna(method='pad', limit=5) + + expected = sdf[:2].reindex(index).fillna(method='pad') + expected = expected.to_dense() + expected.values[-3:] = np.nan + expected = expected.to_sparse() + tm.assert_frame_equal(result, expected) + + result = sdf[-2:].reindex(index) + result = result.fillna(method='backfill', limit=5) + + expected = sdf[-2:].reindex(index).fillna(method='backfill') + expected = expected.to_dense() + expected.values[:3] = np.nan + expected = expected.to_sparse() + tm.assert_frame_equal(result, expected) + def test_rename(self): # just check this works renamed = self.frame.rename(index=str) # noqa diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py index c4f037e85edf6..a8c9c72956463 100644 --- a/pandas/tests/frame/test_missing.py +++ b/pandas/tests/frame/test_missing.py @@ -322,6 +322,40 @@ def test_bfill(self): assert_frame_equal(self.tsframe.bfill(), self.tsframe.fillna(method='bfill')) + def test_frame_pad_backfill_limit(self): + index = np.arange(10) + df = DataFrame(np.random.randn(10, 4), index=index) + + result = df[:2].reindex(index, method='pad', limit=5) + + expected = df[:2].reindex(index).fillna(method='pad') + expected.values[-3:] = np.nan + tm.assert_frame_equal(result, expected) + + result = df[-2:].reindex(index, method='backfill', limit=5) + + expected = df[-2:].reindex(index).fillna(method='backfill') + expected.values[:3] = np.nan + tm.assert_frame_equal(result, expected) + + def test_frame_fillna_limit(self): + index = np.arange(10) + df = DataFrame(np.random.randn(10, 4), index=index) + + result = df[:2].reindex(index) + result = result.fillna(method='pad', limit=5) + + expected = df[:2].reindex(index).fillna(method='pad') + expected.values[-3:] = np.nan + tm.assert_frame_equal(result, expected) + + result = df[-2:].reindex(index) + result = result.fillna(method='backfill', limit=5) + + expected = df[-2:].reindex(index).fillna(method='backfill') + expected.values[:3] = np.nan + tm.assert_frame_equal(result, expected) + def test_fillna_skip_certain_blocks(self): # don't try to fill boolean, int blocks diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index 85967e9eda0d6..934aafc500611 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -8,7 +8,9 @@ from numpy.random import randn import numpy as np -from pandas import DataFrame, Series, Index, Timestamp, DatetimeIndex +from pandas import (DataFrame, Series, Index, + Timestamp, DatetimeIndex, + to_datetime, date_range) import pandas as pd import pandas.tseries.offsets as offsets @@ -117,6 +119,60 @@ def test_pct_change_shift_over_nas(self): edf = DataFrame({'a': expected, 'b': expected}) assert_frame_equal(chg, edf) + def test_frame_ctor_datetime64_column(self): + rng = date_range('1/1/2000 00:00:00', '1/1/2000 1:59:50', freq='10s') + dates = np.asarray(rng) + + df = DataFrame({'A': np.random.randn(len(rng)), 'B': dates}) + self.assertTrue(np.issubdtype(df['B'].dtype, np.dtype('M8[ns]'))) + + def test_frame_add_datetime64_column(self): + rng = date_range('1/1/2000 00:00:00', '1/1/2000 1:59:50', freq='10s') + df = DataFrame(index=np.arange(len(rng))) + + df['A'] = rng + self.assertTrue(np.issubdtype(df['A'].dtype, np.dtype('M8[ns]'))) + + def test_frame_datetime64_pre1900_repr(self): + df = DataFrame({'year': date_range('1/1/1700', periods=50, + freq='A-DEC')}) + # it works! + repr(df) + + def test_frame_add_datetime64_col_other_units(self): + n = 100 + + units = ['h', 'm', 's', 'ms', 'D', 'M', 'Y'] + + ns_dtype = np.dtype('M8[ns]') + + for unit in units: + dtype = np.dtype('M8[%s]' % unit) + vals = np.arange(n, dtype=np.int64).view(dtype) + + df = DataFrame({'ints': np.arange(n)}, index=np.arange(n)) + df[unit] = vals + + ex_vals = to_datetime(vals.astype('O')).values + + self.assertEqual(df[unit].dtype, ns_dtype) + self.assertTrue((df[unit].values == ex_vals).all()) + + # Test insertion into existing datetime64 column + df = DataFrame({'ints': np.arange(n)}, index=np.arange(n)) + df['dates'] = np.arange(n, dtype=np.int64).view(ns_dtype) + + for unit in units: + dtype = np.dtype('M8[%s]' % unit) + vals = np.arange(n, dtype=np.int64).view(dtype) + + tmp = df.copy() + + tmp['dates'] = vals + ex_vals = to_datetime(vals.astype('O')).values + + self.assertTrue((tmp['dates'].values == ex_vals).all()) + def test_shift(self): # naive shift shiftedFrame = self.tsframe.shift(5) diff --git a/pandas/tests/indexes/datetimelike.py b/pandas/tests/indexes/datetimelike.py new file mode 100644 index 0000000000000..964511a2e9d5b --- /dev/null +++ b/pandas/tests/indexes/datetimelike.py @@ -0,0 +1,40 @@ +""" generic datetimelike tests """ + +from .common import Base +import pandas.util.testing as tm + + +class DatetimeLike(Base): + + def test_shift_identity(self): + + idx = self.create_index() + self.assert_index_equal(idx, idx.shift(0)) + + def test_str(self): + + # test the string repr + idx = self.create_index() + idx.name = 'foo' + self.assertFalse("length=%s" % len(idx) in str(idx)) + self.assertTrue("'foo'" in str(idx)) + self.assertTrue(idx.__class__.__name__ in str(idx)) + + if hasattr(idx, 'tz'): + if idx.tz is not None: + self.assertTrue(idx.tz in str(idx)) + if hasattr(idx, 'freq'): + self.assertTrue("freq='%s'" % idx.freqstr in str(idx)) + + def test_view(self): + super(DatetimeLike, self).test_view() + + i = self.create_index() + + i_view = i.view('i8') + result = self._holder(i) + tm.assert_index_equal(result, i) + + i_view = i.view(self._holder) + result = self._holder(i) + tm.assert_index_equal(result, i_view) diff --git a/pandas/tests/indexes/datetimes/test_astype.py b/pandas/tests/indexes/datetimes/test_astype.py index f64d18a69a093..d452a7e1840d7 100644 --- a/pandas/tests/indexes/datetimes/test_astype.py +++ b/pandas/tests/indexes/datetimes/test_astype.py @@ -1,5 +1,6 @@ import numpy as np +from datetime import datetime import pandas as pd import pandas.util.testing as tm from pandas import (DatetimeIndex, date_range, Series, NaT, Index, Timestamp, @@ -120,3 +121,64 @@ def test_astype_raises(self): self.assertRaises(ValueError, idx.astype, 'timedelta64[ns]') self.assertRaises(ValueError, idx.astype, 'datetime64') self.assertRaises(ValueError, idx.astype, 'datetime64[D]') + + def test_index_convert_to_datetime_array(self): + tm._skip_if_no_pytz() + + def _check_rng(rng): + converted = rng.to_pydatetime() + tm.assertIsInstance(converted, np.ndarray) + for x, stamp in zip(converted, rng): + tm.assertIsInstance(x, datetime) + self.assertEqual(x, stamp.to_pydatetime()) + self.assertEqual(x.tzinfo, stamp.tzinfo) + + rng = date_range('20090415', '20090519') + rng_eastern = date_range('20090415', '20090519', tz='US/Eastern') + rng_utc = date_range('20090415', '20090519', tz='utc') + + _check_rng(rng) + _check_rng(rng_eastern) + _check_rng(rng_utc) + + def test_index_convert_to_datetime_array_explicit_pytz(self): + tm._skip_if_no_pytz() + import pytz + + def _check_rng(rng): + converted = rng.to_pydatetime() + tm.assertIsInstance(converted, np.ndarray) + for x, stamp in zip(converted, rng): + tm.assertIsInstance(x, datetime) + self.assertEqual(x, stamp.to_pydatetime()) + self.assertEqual(x.tzinfo, stamp.tzinfo) + + rng = date_range('20090415', '20090519') + rng_eastern = date_range('20090415', '20090519', + tz=pytz.timezone('US/Eastern')) + rng_utc = date_range('20090415', '20090519', tz=pytz.utc) + + _check_rng(rng) + _check_rng(rng_eastern) + _check_rng(rng_utc) + + def test_index_convert_to_datetime_array_dateutil(self): + tm._skip_if_no_dateutil() + import dateutil + + def _check_rng(rng): + converted = rng.to_pydatetime() + tm.assertIsInstance(converted, np.ndarray) + for x, stamp in zip(converted, rng): + tm.assertIsInstance(x, datetime) + self.assertEqual(x, stamp.to_pydatetime()) + self.assertEqual(x.tzinfo, stamp.tzinfo) + + rng = date_range('20090415', '20090519') + rng_eastern = date_range('20090415', '20090519', + tz='dateutil/US/Eastern') + rng_utc = date_range('20090415', '20090519', tz=dateutil.tz.tzutc()) + + _check_rng(rng) + _check_rng(rng_eastern) + _check_rng(rng_utc) diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index f8eca0f0d91d0..03bc0e0c554b0 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -2,6 +2,7 @@ from datetime import timedelta import pandas as pd +from pandas import tslib import pandas.util.testing as tm from pandas.tslib import OutOfBoundsDatetime from pandas import (DatetimeIndex, Index, Timestamp, datetime, date_range, @@ -477,6 +478,22 @@ def test_dti_constructor_numpy_timeunits(self): tm.assert_index_equal(DatetimeIndex(values), base) tm.assert_index_equal(to_datetime(values), base) + def test_ctor_str_intraday(self): + rng = DatetimeIndex(['1-1-2000 00:00:01']) + self.assertEqual(rng[0].second, 1) + + def test_is_(self): + dti = DatetimeIndex(start='1/1/2005', end='12/1/2005', freq='M') + self.assertTrue(dti.is_(dti)) + self.assertTrue(dti.is_(dti.view())) + self.assertFalse(dti.is_(dti.copy())) + + def test_index_cast_datetime64_other_units(self): + arr = np.arange(0, 100, 10, dtype=np.int64).view('M8[D]') + idx = Index(arr) + + self.assertTrue((idx.values == tslib.cast_to_nanoseconds(arr)).all()) + def test_constructor_int64_nocopy(self): # #1624 arr = np.arange(1000, dtype=np.int64) diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index f92fca6ecfa14..628cb9df94e39 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -94,6 +94,14 @@ def test_get_indexer(self): with tm.assertRaises(ValueError): idx.get_indexer(idx[[0]], method='nearest', tolerance='foo') + def test_reasonable_keyerror(self): + # GH #1062 + index = DatetimeIndex(['1/3/2000']) + try: + index.get_loc('1/1/2000') + except KeyError as e: + self.assertIn('2000', str(e)) + def test_roundtrip_pickle_with_tz(self): # GH 8367 diff --git a/pandas/tests/indexes/datetimes/test_datetimelike.py b/pandas/tests/indexes/datetimes/test_datetimelike.py new file mode 100644 index 0000000000000..b32801a8bcf25 --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_datetimelike.py @@ -0,0 +1,76 @@ +""" generic tests from the Datetimelike class """ + +import numpy as np +import pandas as pd +from pandas.util import testing as tm +from pandas import Series, Index, DatetimeIndex, date_range + +from ..datetimelike import DatetimeLike + +class TestDatetimeIndex(DatetimeLike, tm.TestCase): + _holder = DatetimeIndex + _multiprocess_can_split_ = True + + def setUp(self): + self.indices = dict(index=tm.makeDateIndex(10)) + self.setup_indices() + + def create_index(self): + return date_range('20130101', periods=5) + + def test_shift(self): + + # test shift for datetimeIndex and non datetimeIndex + # GH8083 + + drange = self.create_index() + result = drange.shift(1) + expected = DatetimeIndex(['2013-01-02', '2013-01-03', '2013-01-04', + '2013-01-05', + '2013-01-06'], freq='D') + self.assert_index_equal(result, expected) + + result = drange.shift(-1) + expected = DatetimeIndex(['2012-12-31', '2013-01-01', '2013-01-02', + '2013-01-03', '2013-01-04'], + freq='D') + self.assert_index_equal(result, expected) + + result = drange.shift(3, freq='2D') + expected = DatetimeIndex(['2013-01-07', '2013-01-08', '2013-01-09', + '2013-01-10', + '2013-01-11'], freq='D') + self.assert_index_equal(result, expected) + + def test_pickle_compat_construction(self): + pass + + def test_intersection(self): + first = self.index + second = self.index[5:] + intersect = first.intersection(second) + self.assertTrue(tm.equalContents(intersect, second)) + + # GH 10149 + cases = [klass(second.values) for klass in [np.array, Series, list]] + for case in cases: + result = first.intersection(case) + self.assertTrue(tm.equalContents(result, second)) + + third = Index(['a', 'b', 'c']) + result = first.intersection(third) + expected = pd.Index([], dtype=object) + self.assert_index_equal(result, expected) + + def test_union(self): + first = self.index[:5] + second = self.index[5:] + everything = self.index + union = first.union(second) + self.assertTrue(tm.equalContents(union, everything)) + + # GH 10149 + cases = [klass(second.values) for klass in [np.array, Series, list]] + for case in cases: + result = first.union(case) + self.assertTrue(tm.equalContents(result, everything)) diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py index 4685df580190b..92aad5a0b1997 100644 --- a/pandas/tests/indexes/datetimes/test_misc.py +++ b/pandas/tests/indexes/datetimes/test_misc.py @@ -4,8 +4,9 @@ import pandas.lib as lib import pandas.util.testing as tm from pandas import (Index, DatetimeIndex, datetime, offsets, to_datetime, - Series, DataFrame, Float64Index, date_range, Timestamp) - + Series, DataFrame, Float64Index, date_range, + Timestamp, isnull) +from pandas import tslib from pandas.util.testing import assert_series_equal @@ -143,6 +144,63 @@ def test_datetimeindex_integers_shift(self): expected = rng.shift(-5) tm.assert_index_equal(result, expected) + def test_string_na_nat_conversion(self): + # GH #999, #858 + + from pandas.compat import parse_date + + strings = np.array(['1/1/2000', '1/2/2000', np.nan, + '1/4/2000, 12:34:56'], dtype=object) + + expected = np.empty(4, dtype='M8[ns]') + for i, val in enumerate(strings): + if isnull(val): + expected[i] = tslib.iNaT + else: + expected[i] = parse_date(val) + + result = tslib.array_to_datetime(strings) + tm.assert_almost_equal(result, expected) + + result2 = to_datetime(strings) + tm.assertIsInstance(result2, DatetimeIndex) + tm.assert_numpy_array_equal(result, result2.values) + + malformed = np.array(['1/100/2000', np.nan], dtype=object) + + # GH 10636, default is now 'raise' + self.assertRaises(ValueError, + lambda: to_datetime(malformed, errors='raise')) + + result = to_datetime(malformed, errors='ignore') + tm.assert_numpy_array_equal(result, malformed) + + self.assertRaises(ValueError, to_datetime, malformed, errors='raise') + + idx = ['a', 'b', 'c', 'd', 'e'] + series = Series(['1/1/2000', np.nan, '1/3/2000', np.nan, + '1/5/2000'], index=idx, name='foo') + dseries = Series([to_datetime('1/1/2000'), np.nan, + to_datetime('1/3/2000'), np.nan, + to_datetime('1/5/2000')], index=idx, name='foo') + + result = to_datetime(series) + dresult = to_datetime(dseries) + + expected = Series(np.empty(5, dtype='M8[ns]'), index=idx) + for i in range(5): + x = series[i] + if isnull(x): + expected[i] = tslib.iNaT + else: + expected[i] = to_datetime(x) + + assert_series_equal(result, expected, check_names=False) + self.assertEqual(result.name, 'foo') + + assert_series_equal(dresult, expected, check_names=False) + self.assertEqual(dresult.name, 'foo') + def test_datetimeindex_repr_short(self): dr = date_range(start='1/1/2012', periods=1) repr(dr) diff --git a/pandas/tests/indexes/test_datetimelike.py b/pandas/tests/indexes/test_datetimelike.py index 32e4029a57fe9..e5a4ced4ced4d 100644 --- a/pandas/tests/indexes/test_datetimelike.py +++ b/pandas/tests/indexes/test_datetimelike.py @@ -4,119 +4,14 @@ from datetime import timedelta import pandas as pd -import pandas.util.testing as tm +from pandas.util import testing as tm from pandas import (DatetimeIndex, Float64Index, Index, Int64Index, NaT, Period, PeriodIndex, Series, Timedelta, - TimedeltaIndex, date_range, period_range, + TimedeltaIndex, period_range, timedelta_range, notnull) -from .common import Base - - -class DatetimeLike(Base): - - def test_shift_identity(self): - - idx = self.create_index() - self.assert_index_equal(idx, idx.shift(0)) - - def test_str(self): - - # test the string repr - idx = self.create_index() - idx.name = 'foo' - self.assertFalse("length=%s" % len(idx) in str(idx)) - self.assertTrue("'foo'" in str(idx)) - self.assertTrue(idx.__class__.__name__ in str(idx)) - - if hasattr(idx, 'tz'): - if idx.tz is not None: - self.assertTrue(idx.tz in str(idx)) - if hasattr(idx, 'freq'): - self.assertTrue("freq='%s'" % idx.freqstr in str(idx)) - - def test_view(self): - super(DatetimeLike, self).test_view() - - i = self.create_index() - - i_view = i.view('i8') - result = self._holder(i) - tm.assert_index_equal(result, i) - - i_view = i.view(self._holder) - result = self._holder(i) - tm.assert_index_equal(result, i_view) - - -class TestDatetimeIndex(DatetimeLike, tm.TestCase): - _holder = DatetimeIndex - _multiprocess_can_split_ = True - - def setUp(self): - self.indices = dict(index=tm.makeDateIndex(10)) - self.setup_indices() - - def create_index(self): - return date_range('20130101', periods=5) - - def test_shift(self): - - # test shift for datetimeIndex and non datetimeIndex - # GH8083 - - drange = self.create_index() - result = drange.shift(1) - expected = DatetimeIndex(['2013-01-02', '2013-01-03', '2013-01-04', - '2013-01-05', - '2013-01-06'], freq='D') - self.assert_index_equal(result, expected) - - result = drange.shift(-1) - expected = DatetimeIndex(['2012-12-31', '2013-01-01', '2013-01-02', - '2013-01-03', '2013-01-04'], - freq='D') - self.assert_index_equal(result, expected) - - result = drange.shift(3, freq='2D') - expected = DatetimeIndex(['2013-01-07', '2013-01-08', '2013-01-09', - '2013-01-10', - '2013-01-11'], freq='D') - self.assert_index_equal(result, expected) - - def test_pickle_compat_construction(self): - pass - - def test_intersection(self): - first = self.index - second = self.index[5:] - intersect = first.intersection(second) - self.assertTrue(tm.equalContents(intersect, second)) - - # GH 10149 - cases = [klass(second.values) for klass in [np.array, Series, list]] - for case in cases: - result = first.intersection(case) - self.assertTrue(tm.equalContents(result, second)) - - third = Index(['a', 'b', 'c']) - result = first.intersection(third) - expected = pd.Index([], dtype=object) - self.assert_index_equal(result, expected) - - def test_union(self): - first = self.index[:5] - second = self.index[5:] - everything = self.index - union = first.union(second) - self.assertTrue(tm.equalContents(union, everything)) - - # GH 10149 - cases = [klass(second.values) for klass in [np.array, Series, list]] - for case in cases: - result = first.union(case) - self.assertTrue(tm.equalContents(result, everything)) +from .datetimelike import DatetimeLike class TestPeriodIndex(DatetimeLike, tm.TestCase): diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py index 94369ebbd0a19..f686f1aa6dc47 100644 --- a/pandas/tests/scalar/test_timestamp.py +++ b/pandas/tests/scalar/test_timestamp.py @@ -566,6 +566,65 @@ def test_nat_fields(self): self.assertTrue(np.isnan(ts.daysinmonth)) self.assertTrue(np.isnan(ts.days_in_month)) + def test_nat_vector_field_access(self): + idx = DatetimeIndex(['1/1/2000', None, None, '1/4/2000']) + + fields = ['year', 'quarter', 'month', 'day', 'hour', 'minute', + 'second', 'microsecond', 'nanosecond', 'week', 'dayofyear', + 'days_in_month', 'is_leap_year'] + + for field in fields: + result = getattr(idx, field) + expected = [getattr(x, field) for x in idx] + self.assert_numpy_array_equal(result, np.array(expected)) + + s = pd.Series(idx) + + for field in fields: + result = getattr(s.dt, field) + expected = [getattr(x, field) for x in idx] + self.assert_series_equal(result, pd.Series(expected)) + + def test_nat_scalar_field_access(self): + fields = ['year', 'quarter', 'month', 'day', 'hour', 'minute', + 'second', 'microsecond', 'nanosecond', 'week', 'dayofyear', + 'days_in_month', 'daysinmonth', 'dayofweek', 'weekday_name'] + for field in fields: + result = getattr(NaT, field) + self.assertTrue(np.isnan(result)) + + def test_NaT_methods(self): + # GH 9513 + raise_methods = ['astimezone', 'combine', 'ctime', 'dst', + 'fromordinal', 'fromtimestamp', 'isocalendar', + 'strftime', 'strptime', 'time', 'timestamp', + 'timetuple', 'timetz', 'toordinal', 'tzname', + 'utcfromtimestamp', 'utcnow', 'utcoffset', + 'utctimetuple'] + nat_methods = ['date', 'now', 'replace', 'to_datetime', 'today'] + nan_methods = ['weekday', 'isoweekday'] + + for method in raise_methods: + if hasattr(NaT, method): + self.assertRaises(ValueError, getattr(NaT, method)) + + for method in nan_methods: + if hasattr(NaT, method): + self.assertTrue(np.isnan(getattr(NaT, method)())) + + for method in nat_methods: + if hasattr(NaT, method): + # see gh-8254 + exp_warning = None + if method == 'to_datetime': + exp_warning = FutureWarning + with tm.assert_produces_warning( + exp_warning, check_stacklevel=False): + self.assertIs(getattr(NaT, method)(), NaT) + + # GH 12300 + self.assertEqual(NaT.isoformat(), 'NaT') + def test_pprint(self): # GH12622 import pprint diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index e6209a853e958..d4b6e7dd5349f 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -346,6 +346,135 @@ def test_getitem_setitem_slice_integers(self): self.assertTrue((s[:4] == 0).all()) self.assertTrue(not (s[4:] == 0).any()) + def test_getitem_setitem_datetime_tz_pytz(self): + tm._skip_if_no_pytz() + from pytz import timezone as tz + + from pandas import date_range + + N = 50 + # testing with timezone, GH #2785 + rng = date_range('1/1/1990', periods=N, freq='H', tz='US/Eastern') + ts = Series(np.random.randn(N), index=rng) + + # also test Timestamp tz handling, GH #2789 + result = ts.copy() + result["1990-01-01 09:00:00+00:00"] = 0 + result["1990-01-01 09:00:00+00:00"] = ts[4] + assert_series_equal(result, ts) + + result = ts.copy() + result["1990-01-01 03:00:00-06:00"] = 0 + result["1990-01-01 03:00:00-06:00"] = ts[4] + assert_series_equal(result, ts) + + # repeat with datetimes + result = ts.copy() + result[datetime(1990, 1, 1, 9, tzinfo=tz('UTC'))] = 0 + result[datetime(1990, 1, 1, 9, tzinfo=tz('UTC'))] = ts[4] + assert_series_equal(result, ts) + + result = ts.copy() + + # comparison dates with datetime MUST be localized! + date = tz('US/Central').localize(datetime(1990, 1, 1, 3)) + result[date] = 0 + result[date] = ts[4] + assert_series_equal(result, ts) + + def test_getitem_setitem_datetime_tz_dateutil(self): + tm._skip_if_no_dateutil() + from dateutil.tz import tzutc + from pandas.tslib import _dateutil_gettz as gettz + + tz = lambda x: tzutc() if x == 'UTC' else gettz( + x) # handle special case for utc in dateutil + + from pandas import date_range + + N = 50 + + # testing with timezone, GH #2785 + rng = date_range('1/1/1990', periods=N, freq='H', + tz='America/New_York') + ts = Series(np.random.randn(N), index=rng) + + # also test Timestamp tz handling, GH #2789 + result = ts.copy() + result["1990-01-01 09:00:00+00:00"] = 0 + result["1990-01-01 09:00:00+00:00"] = ts[4] + assert_series_equal(result, ts) + + result = ts.copy() + result["1990-01-01 03:00:00-06:00"] = 0 + result["1990-01-01 03:00:00-06:00"] = ts[4] + assert_series_equal(result, ts) + + # repeat with datetimes + result = ts.copy() + result[datetime(1990, 1, 1, 9, tzinfo=tz('UTC'))] = 0 + result[datetime(1990, 1, 1, 9, tzinfo=tz('UTC'))] = ts[4] + assert_series_equal(result, ts) + + result = ts.copy() + result[datetime(1990, 1, 1, 3, tzinfo=tz('America/Chicago'))] = 0 + result[datetime(1990, 1, 1, 3, tzinfo=tz('America/Chicago'))] = ts[4] + assert_series_equal(result, ts) + + def test_getitem_setitem_periodindex(self): + from pandas import period_range + + N = 50 + rng = period_range('1/1/1990', periods=N, freq='H') + ts = Series(np.random.randn(N), index=rng) + + result = ts["1990-01-01 04"] + expected = ts[4] + self.assertEqual(result, expected) + + result = ts.copy() + result["1990-01-01 04"] = 0 + result["1990-01-01 04"] = ts[4] + assert_series_equal(result, ts) + + result = ts["1990-01-01 04":"1990-01-01 07"] + expected = ts[4:8] + assert_series_equal(result, expected) + + result = ts.copy() + result["1990-01-01 04":"1990-01-01 07"] = 0 + result["1990-01-01 04":"1990-01-01 07"] = ts[4:8] + assert_series_equal(result, ts) + + lb = "1990-01-01 04" + rb = "1990-01-01 07" + result = ts[(ts.index >= lb) & (ts.index <= rb)] + expected = ts[4:8] + assert_series_equal(result, expected) + + # GH 2782 + result = ts[ts.index[4]] + expected = ts[4] + self.assertEqual(result, expected) + + result = ts[ts.index[4:8]] + expected = ts[4:8] + assert_series_equal(result, expected) + + result = ts.copy() + result[ts.index[4:8]] = 0 + result[4:8] = ts[4:8] + assert_series_equal(result, ts) + + def test_getitem_median_slice_bug(self): + index = date_range('20090415', '20090519', freq='2B') + s = Series(np.random.randn(13), index=index) + + indexer = [slice(6, 7, None)] + result = s[indexer] + expected = s[indexer[0]] + assert_series_equal(result, expected) + def test_getitem_out_of_bounds(self): # don't segfault, GH #495 self.assertRaises(IndexError, self.ts.__getitem__, len(self.ts)) diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 91da36161e188..8cf0d190a95cc 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -8,11 +8,11 @@ import numpy as np import pandas as pd -from pandas import (Series, isnull, date_range, - MultiIndex, Index) -from pandas.tseries.index import Timestamp +from pandas import (Series, DataFrame, isnull, date_range, + MultiIndex, Index, Timestamp) from pandas.compat import range -from pandas.util.testing import assert_series_equal +from pandas import tslib +from pandas.util.testing import assert_series_equal, assert_frame_equal import pandas.util.testing as tm from .common import TestData @@ -283,6 +283,43 @@ def test_fillna_raise(self): self.assertRaises(TypeError, s.fillna, [1, 2]) self.assertRaises(TypeError, s.fillna, (1, 2)) + def test_fillna_nat(self): + series = Series([0, 1, 2, tslib.iNaT], dtype='M8[ns]') + + filled = series.fillna(method='pad') + filled2 = series.fillna(value=series.values[2]) + + expected = series.copy() + expected.values[3] = expected.values[2] + + assert_series_equal(filled, expected) + assert_series_equal(filled2, expected) + + df = DataFrame({'A': series}) + filled = df.fillna(method='pad') + filled2 = df.fillna(value=series.values[2]) + expected = DataFrame({'A': expected}) + assert_frame_equal(filled, expected) + assert_frame_equal(filled2, expected) + + series = Series([tslib.iNaT, 0, 1, 2], dtype='M8[ns]') + + filled = series.fillna(method='bfill') + filled2 = series.fillna(value=series[1]) + + expected = series.copy() + expected[0] = expected[1] + + assert_series_equal(filled, expected) + assert_series_equal(filled2, expected) + + df = DataFrame({'A': series}) + filled = df.fillna(method='bfill') + filled2 = df.fillna(value=series[1]) + expected = DataFrame({'A': expected}) + assert_frame_equal(filled, expected) + assert_frame_equal(filled2, expected) + def test_isnull_for_inf(self): s = Series(['a', np.inf, np.nan, 1.0]) with pd.option_context('mode.use_inf_as_null', True): @@ -518,6 +555,14 @@ def test_pad_nan(self): assert_series_equal(x[1:], expected[1:]) self.assertTrue(np.isnan(x[0]), np.isnan(expected[0])) + def test_pad_require_monotonicity(self): + rng = date_range('1/1/2000', '3/1/2000', freq='B') + + # neither monotonic increasing or decreasing + rng2 = rng[[1, 0, 2]] + + self.assertRaises(ValueError, rng2.get_indexer, rng, method='pad') + def test_dropna_preserve_name(self): self.ts[:5] = np.nan result = self.ts.dropna() diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index 073b8bfeee131..571a802e37211 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -230,126 +230,6 @@ def test_truncate(self): before=self.ts.index[-1] + offset, after=self.ts.index[0] - offset) - def test_getitem_setitem_datetime_tz_pytz(self): - tm._skip_if_no_pytz() - from pytz import timezone as tz - - from pandas import date_range - - N = 50 - # testing with timezone, GH #2785 - rng = date_range('1/1/1990', periods=N, freq='H', tz='US/Eastern') - ts = Series(np.random.randn(N), index=rng) - - # also test Timestamp tz handling, GH #2789 - result = ts.copy() - result["1990-01-01 09:00:00+00:00"] = 0 - result["1990-01-01 09:00:00+00:00"] = ts[4] - assert_series_equal(result, ts) - - result = ts.copy() - result["1990-01-01 03:00:00-06:00"] = 0 - result["1990-01-01 03:00:00-06:00"] = ts[4] - assert_series_equal(result, ts) - - # repeat with datetimes - result = ts.copy() - result[datetime(1990, 1, 1, 9, tzinfo=tz('UTC'))] = 0 - result[datetime(1990, 1, 1, 9, tzinfo=tz('UTC'))] = ts[4] - assert_series_equal(result, ts) - - result = ts.copy() - - # comparison dates with datetime MUST be localized! - date = tz('US/Central').localize(datetime(1990, 1, 1, 3)) - result[date] = 0 - result[date] = ts[4] - assert_series_equal(result, ts) - - def test_getitem_setitem_datetime_tz_dateutil(self): - tm._skip_if_no_dateutil() - from dateutil.tz import tzutc - from pandas.tslib import _dateutil_gettz as gettz - - tz = lambda x: tzutc() if x == 'UTC' else gettz( - x) # handle special case for utc in dateutil - - from pandas import date_range - - N = 50 - - # testing with timezone, GH #2785 - rng = date_range('1/1/1990', periods=N, freq='H', - tz='America/New_York') - ts = Series(np.random.randn(N), index=rng) - - # also test Timestamp tz handling, GH #2789 - result = ts.copy() - result["1990-01-01 09:00:00+00:00"] = 0 - result["1990-01-01 09:00:00+00:00"] = ts[4] - assert_series_equal(result, ts) - - result = ts.copy() - result["1990-01-01 03:00:00-06:00"] = 0 - result["1990-01-01 03:00:00-06:00"] = ts[4] - assert_series_equal(result, ts) - - # repeat with datetimes - result = ts.copy() - result[datetime(1990, 1, 1, 9, tzinfo=tz('UTC'))] = 0 - result[datetime(1990, 1, 1, 9, tzinfo=tz('UTC'))] = ts[4] - assert_series_equal(result, ts) - - result = ts.copy() - result[datetime(1990, 1, 1, 3, tzinfo=tz('America/Chicago'))] = 0 - result[datetime(1990, 1, 1, 3, tzinfo=tz('America/Chicago'))] = ts[4] - assert_series_equal(result, ts) - - def test_getitem_setitem_periodindex(self): - from pandas import period_range - - N = 50 - rng = period_range('1/1/1990', periods=N, freq='H') - ts = Series(np.random.randn(N), index=rng) - - result = ts["1990-01-01 04"] - expected = ts[4] - self.assertEqual(result, expected) - - result = ts.copy() - result["1990-01-01 04"] = 0 - result["1990-01-01 04"] = ts[4] - assert_series_equal(result, ts) - - result = ts["1990-01-01 04":"1990-01-01 07"] - expected = ts[4:8] - assert_series_equal(result, expected) - - result = ts.copy() - result["1990-01-01 04":"1990-01-01 07"] = 0 - result["1990-01-01 04":"1990-01-01 07"] = ts[4:8] - assert_series_equal(result, ts) - - lb = "1990-01-01 04" - rb = "1990-01-01 07" - result = ts[(ts.index >= lb) & (ts.index <= rb)] - expected = ts[4:8] - assert_series_equal(result, expected) - - # GH 2782 - result = ts[ts.index[4]] - expected = ts[4] - self.assertEqual(result, expected) - - result = ts[ts.index[4:8]] - expected = ts[4:8] - assert_series_equal(result, expected) - - result = ts.copy() - result[ts.index[4:8]] = 0 - result[4:8] = ts[4:8] - assert_series_equal(result, ts) - def test_asfreq(self): ts = Series([0., 1., 2.], index=[datetime(2009, 10, 30), datetime( 2009, 11, 30), datetime(2009, 12, 31)]) @@ -513,12 +393,6 @@ def test_empty_series_ops(self): assert_series_equal(a, b + a) self.assertRaises(TypeError, lambda x, y: x - y, b, a) - def test_is_(self): - dti = DatetimeIndex(start='1/1/2005', end='12/1/2005', freq='M') - self.assertTrue(dti.is_(dti)) - self.assertTrue(dti.is_(dti.view())) - self.assertFalse(dti.is_(dti.copy())) - def test_contiguous_boolean_preserve_freq(self): rng = date_range('1/1/2000', '3/1/2000', freq='B') @@ -534,159 +408,6 @@ def test_contiguous_boolean_preserve_freq(self): masked = rng[mask] self.assertIsNone(masked.freq) - def test_getitem_median_slice_bug(self): - index = date_range('20090415', '20090519', freq='2B') - s = Series(np.random.randn(13), index=index) - - indexer = [slice(6, 7, None)] - result = s[indexer] - expected = s[indexer[0]] - assert_series_equal(result, expected) - - def test_ctor_str_intraday(self): - rng = DatetimeIndex(['1-1-2000 00:00:01']) - self.assertEqual(rng[0].second, 1) - - def test_frame_pad_backfill_limit(self): - index = np.arange(10) - df = DataFrame(np.random.randn(10, 4), index=index) - - result = df[:2].reindex(index, method='pad', limit=5) - - expected = df[:2].reindex(index).fillna(method='pad') - expected.values[-3:] = np.nan - tm.assert_frame_equal(result, expected) - - result = df[-2:].reindex(index, method='backfill', limit=5) - - expected = df[-2:].reindex(index).fillna(method='backfill') - expected.values[:3] = np.nan - tm.assert_frame_equal(result, expected) - - def test_frame_fillna_limit(self): - index = np.arange(10) - df = DataFrame(np.random.randn(10, 4), index=index) - - result = df[:2].reindex(index) - result = result.fillna(method='pad', limit=5) - - expected = df[:2].reindex(index).fillna(method='pad') - expected.values[-3:] = np.nan - tm.assert_frame_equal(result, expected) - - result = df[-2:].reindex(index) - result = result.fillna(method='backfill', limit=5) - - expected = df[-2:].reindex(index).fillna(method='backfill') - expected.values[:3] = np.nan - tm.assert_frame_equal(result, expected) - - def test_sparse_frame_pad_backfill_limit(self): - index = np.arange(10) - df = DataFrame(np.random.randn(10, 4), index=index) - sdf = df.to_sparse() - - result = sdf[:2].reindex(index, method='pad', limit=5) - - expected = sdf[:2].reindex(index).fillna(method='pad') - expected = expected.to_dense() - expected.values[-3:] = np.nan - expected = expected.to_sparse() - tm.assert_frame_equal(result, expected) - - result = sdf[-2:].reindex(index, method='backfill', limit=5) - - expected = sdf[-2:].reindex(index).fillna(method='backfill') - expected = expected.to_dense() - expected.values[:3] = np.nan - expected = expected.to_sparse() - tm.assert_frame_equal(result, expected) - - def test_sparse_frame_fillna_limit(self): - index = np.arange(10) - df = DataFrame(np.random.randn(10, 4), index=index) - sdf = df.to_sparse() - - result = sdf[:2].reindex(index) - result = result.fillna(method='pad', limit=5) - - expected = sdf[:2].reindex(index).fillna(method='pad') - expected = expected.to_dense() - expected.values[-3:] = np.nan - expected = expected.to_sparse() - tm.assert_frame_equal(result, expected) - - result = sdf[-2:].reindex(index) - result = result.fillna(method='backfill', limit=5) - - expected = sdf[-2:].reindex(index).fillna(method='backfill') - expected = expected.to_dense() - expected.values[:3] = np.nan - expected = expected.to_sparse() - tm.assert_frame_equal(result, expected) - - def test_pad_require_monotonicity(self): - rng = date_range('1/1/2000', '3/1/2000', freq='B') - - # neither monotonic increasing or decreasing - rng2 = rng[[1, 0, 2]] - - self.assertRaises(ValueError, rng2.get_indexer, rng, method='pad') - - def test_frame_ctor_datetime64_column(self): - rng = date_range('1/1/2000 00:00:00', '1/1/2000 1:59:50', freq='10s') - dates = np.asarray(rng) - - df = DataFrame({'A': np.random.randn(len(rng)), 'B': dates}) - self.assertTrue(np.issubdtype(df['B'].dtype, np.dtype('M8[ns]'))) - - def test_frame_add_datetime64_column(self): - rng = date_range('1/1/2000 00:00:00', '1/1/2000 1:59:50', freq='10s') - df = DataFrame(index=np.arange(len(rng))) - - df['A'] = rng - self.assertTrue(np.issubdtype(df['A'].dtype, np.dtype('M8[ns]'))) - - def test_frame_datetime64_pre1900_repr(self): - df = DataFrame({'year': date_range('1/1/1700', periods=50, - freq='A-DEC')}) - # it works! - repr(df) - - def test_frame_add_datetime64_col_other_units(self): - n = 100 - - units = ['h', 'm', 's', 'ms', 'D', 'M', 'Y'] - - ns_dtype = np.dtype('M8[ns]') - - for unit in units: - dtype = np.dtype('M8[%s]' % unit) - vals = np.arange(n, dtype=np.int64).view(dtype) - - df = DataFrame({'ints': np.arange(n)}, index=np.arange(n)) - df[unit] = vals - - ex_vals = to_datetime(vals.astype('O')).values - - self.assertEqual(df[unit].dtype, ns_dtype) - self.assertTrue((df[unit].values == ex_vals).all()) - - # Test insertion into existing datetime64 column - df = DataFrame({'ints': np.arange(n)}, index=np.arange(n)) - df['dates'] = np.arange(n, dtype=np.int64).view(ns_dtype) - - for unit in units: - dtype = np.dtype('M8[%s]' % unit) - vals = np.arange(n, dtype=np.int64).view(dtype) - - tmp = df.copy() - - tmp['dates'] = vals - ex_vals = to_datetime(vals.astype('O')).values - - self.assertTrue((tmp['dates'].values == ex_vals).all()) - def test_to_datetime_unit(self): epoch = 1370745748 @@ -756,13 +477,6 @@ def test_series_ctor_datetime64(self): series = Series(dates) self.assertTrue(np.issubdtype(series.dtype, np.dtype('M8[ns]'))) - def test_index_cast_datetime64_other_units(self): - arr = np.arange(0, 100, 10, dtype=np.int64).view('M8[D]') - - idx = Index(arr) - - self.assertTrue((idx.values == tslib.cast_to_nanoseconds(arr)).all()) - def test_reindex_series_add_nat(self): rng = date_range('1/1/2000 00:00:00', periods=10, freq='10s') series = Series(rng) @@ -796,159 +510,6 @@ def test_series_repr_nat(self): 'dtype: datetime64[ns]') self.assertEqual(result, expected) - def test_fillna_nat(self): - series = Series([0, 1, 2, iNaT], dtype='M8[ns]') - - filled = series.fillna(method='pad') - filled2 = series.fillna(value=series.values[2]) - - expected = series.copy() - expected.values[3] = expected.values[2] - - assert_series_equal(filled, expected) - assert_series_equal(filled2, expected) - - df = DataFrame({'A': series}) - filled = df.fillna(method='pad') - filled2 = df.fillna(value=series.values[2]) - expected = DataFrame({'A': expected}) - assert_frame_equal(filled, expected) - assert_frame_equal(filled2, expected) - - series = Series([iNaT, 0, 1, 2], dtype='M8[ns]') - - filled = series.fillna(method='bfill') - filled2 = series.fillna(value=series[1]) - - expected = series.copy() - expected[0] = expected[1] - - assert_series_equal(filled, expected) - assert_series_equal(filled2, expected) - - df = DataFrame({'A': series}) - filled = df.fillna(method='bfill') - filled2 = df.fillna(value=series[1]) - expected = DataFrame({'A': expected}) - assert_frame_equal(filled, expected) - assert_frame_equal(filled2, expected) - - def test_string_na_nat_conversion(self): - # GH #999, #858 - - from pandas.compat import parse_date - - strings = np.array(['1/1/2000', '1/2/2000', np.nan, - '1/4/2000, 12:34:56'], dtype=object) - - expected = np.empty(4, dtype='M8[ns]') - for i, val in enumerate(strings): - if com.isnull(val): - expected[i] = iNaT - else: - expected[i] = parse_date(val) - - result = tslib.array_to_datetime(strings) - assert_almost_equal(result, expected) - - result2 = to_datetime(strings) - tm.assertIsInstance(result2, DatetimeIndex) - tm.assert_numpy_array_equal(result, result2.values) - - malformed = np.array(['1/100/2000', np.nan], dtype=object) - - # GH 10636, default is now 'raise' - self.assertRaises(ValueError, - lambda: to_datetime(malformed, errors='raise')) - - result = to_datetime(malformed, errors='ignore') - tm.assert_numpy_array_equal(result, malformed) - - self.assertRaises(ValueError, to_datetime, malformed, errors='raise') - - idx = ['a', 'b', 'c', 'd', 'e'] - series = Series(['1/1/2000', np.nan, '1/3/2000', np.nan, - '1/5/2000'], index=idx, name='foo') - dseries = Series([to_datetime('1/1/2000'), np.nan, - to_datetime('1/3/2000'), np.nan, - to_datetime('1/5/2000')], index=idx, name='foo') - - result = to_datetime(series) - dresult = to_datetime(dseries) - - expected = Series(np.empty(5, dtype='M8[ns]'), index=idx) - for i in range(5): - x = series[i] - if isnull(x): - expected[i] = iNaT - else: - expected[i] = to_datetime(x) - - assert_series_equal(result, expected, check_names=False) - self.assertEqual(result.name, 'foo') - - assert_series_equal(dresult, expected, check_names=False) - self.assertEqual(dresult.name, 'foo') - - def test_nat_vector_field_access(self): - idx = DatetimeIndex(['1/1/2000', None, None, '1/4/2000']) - - fields = ['year', 'quarter', 'month', 'day', 'hour', 'minute', - 'second', 'microsecond', 'nanosecond', 'week', 'dayofyear', - 'days_in_month', 'is_leap_year'] - - for field in fields: - result = getattr(idx, field) - expected = [getattr(x, field) for x in idx] - self.assert_numpy_array_equal(result, np.array(expected)) - - s = pd.Series(idx) - - for field in fields: - result = getattr(s.dt, field) - expected = [getattr(x, field) for x in idx] - self.assert_series_equal(result, pd.Series(expected)) - - def test_nat_scalar_field_access(self): - fields = ['year', 'quarter', 'month', 'day', 'hour', 'minute', - 'second', 'microsecond', 'nanosecond', 'week', 'dayofyear', - 'days_in_month', 'daysinmonth', 'dayofweek', 'weekday_name'] - for field in fields: - result = getattr(NaT, field) - self.assertTrue(np.isnan(result)) - - def test_NaT_methods(self): - # GH 9513 - raise_methods = ['astimezone', 'combine', 'ctime', 'dst', - 'fromordinal', 'fromtimestamp', 'isocalendar', - 'strftime', 'strptime', 'time', 'timestamp', - 'timetuple', 'timetz', 'toordinal', 'tzname', - 'utcfromtimestamp', 'utcnow', 'utcoffset', - 'utctimetuple'] - nat_methods = ['date', 'now', 'replace', 'to_datetime', 'today'] - nan_methods = ['weekday', 'isoweekday'] - - for method in raise_methods: - if hasattr(NaT, method): - self.assertRaises(ValueError, getattr(NaT, method)) - - for method in nan_methods: - if hasattr(NaT, method): - self.assertTrue(np.isnan(getattr(NaT, method)())) - - for method in nat_methods: - if hasattr(NaT, method): - # see gh-8254 - exp_warning = None - if method == 'to_datetime': - exp_warning = FutureWarning - with tm.assert_produces_warning( - exp_warning, check_stacklevel=False): - self.assertIs(getattr(NaT, method)(), NaT) - - # GH 12300 - self.assertEqual(NaT.isoformat(), 'NaT') - def test_index_convert_to_datetime_array(self): tm._skip_if_no_pytz() @@ -968,56 +529,6 @@ def _check_rng(rng): _check_rng(rng_eastern) _check_rng(rng_utc) - def test_index_convert_to_datetime_array_explicit_pytz(self): - tm._skip_if_no_pytz() - import pytz - - def _check_rng(rng): - converted = rng.to_pydatetime() - tm.assertIsInstance(converted, np.ndarray) - for x, stamp in zip(converted, rng): - tm.assertIsInstance(x, datetime) - self.assertEqual(x, stamp.to_pydatetime()) - self.assertEqual(x.tzinfo, stamp.tzinfo) - - rng = date_range('20090415', '20090519') - rng_eastern = date_range('20090415', '20090519', - tz=pytz.timezone('US/Eastern')) - rng_utc = date_range('20090415', '20090519', tz=pytz.utc) - - _check_rng(rng) - _check_rng(rng_eastern) - _check_rng(rng_utc) - - def test_index_convert_to_datetime_array_dateutil(self): - tm._skip_if_no_dateutil() - import dateutil - - def _check_rng(rng): - converted = rng.to_pydatetime() - tm.assertIsInstance(converted, np.ndarray) - for x, stamp in zip(converted, rng): - tm.assertIsInstance(x, datetime) - self.assertEqual(x, stamp.to_pydatetime()) - self.assertEqual(x.tzinfo, stamp.tzinfo) - - rng = date_range('20090415', '20090519') - rng_eastern = date_range('20090415', '20090519', - tz='dateutil/US/Eastern') - rng_utc = date_range('20090415', '20090519', tz=dateutil.tz.tzutc()) - - _check_rng(rng) - _check_rng(rng_eastern) - _check_rng(rng_utc) - - def test_reasonable_keyerror(self): - # GH #1062 - index = DatetimeIndex(['1/3/2000']) - try: - index.get_loc('1/1/2000') - except KeyError as e: - self.assertIn('2000', str(e)) - def test_reindex_with_datetimes(self): rng = date_range('1/1/2000', periods=20) ts = Series(np.random.randn(20), index=rng) From 72992df66854465a15f18f7d6445ae5e1a3e0c3d Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 4 Feb 2017 12:21:28 -0500 Subject: [PATCH 013/353] TST: more test moving from series/test_timeseries.py --- pandas/tests/frame/test_alter_axes.py | 28 +- pandas/tests/frame/test_apply.py | 9 + pandas/tests/frame/test_combine_concat.py | 16 +- pandas/tests/frame/test_constructors.py | 27 + pandas/tests/frame/test_indexing.py | 15 + pandas/tests/frame/test_timeseries.py | 121 +- pandas/tests/groupby/test_groupby.py | 8 + pandas/tests/indexes/datetimes/test_astype.py | 130 +- .../indexes/datetimes/test_construction.py | 90 +- .../indexes/datetimes/test_date_range.py | 18 + .../indexes/datetimes/test_datetimelike.py | 1 + pandas/tests/indexes/datetimes/test_misc.py | 293 +-- .../indexes/datetimes/test_partial_slcing.py | 256 ++ pandas/tests/indexes/datetimes/test_setops.py | 22 +- pandas/tests/indexes/datetimes/test_tools.py | 1019 ++++++++ pandas/tests/series/test_combine_concat.py | 102 +- pandas/tests/series/test_constructors.py | 45 +- pandas/tests/series/test_dtypes.py | 25 +- pandas/tests/series/test_indexing.py | 540 +++- pandas/tests/series/test_operators.py | 13 + pandas/tests/series/test_timeseries.py | 2173 +---------------- 21 files changed, 2485 insertions(+), 2466 deletions(-) create mode 100644 pandas/tests/indexes/datetimes/test_partial_slcing.py create mode 100644 pandas/tests/indexes/datetimes/test_tools.py diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index edeca0a664a87..cab627dec63cb 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -8,7 +8,7 @@ from pandas.compat import lrange from pandas import (DataFrame, Series, Index, MultiIndex, - RangeIndex) + RangeIndex, date_range) import pandas as pd from pandas.util.testing import (assert_series_equal, @@ -325,6 +325,32 @@ def test_set_columns(self): with assertRaisesRegexp(ValueError, 'Length mismatch'): self.mixed_frame.columns = cols[::2] + def test_dti_set_index_reindex(self): + # GH 6631 + df = DataFrame(np.random.random(6)) + idx1 = date_range('2011/01/01', periods=6, freq='M', tz='US/Eastern') + idx2 = date_range('2013', periods=6, freq='A', tz='Asia/Tokyo') + + df = df.set_index(idx1) + tm.assert_index_equal(df.index, idx1) + df = df.reindex(idx2) + tm.assert_index_equal(df.index, idx2) + + # 11314 + # with tz + index = date_range(datetime(2015, 10, 1), + datetime(2015, 10, 1, 23), + freq='H', tz='US/Eastern') + df = DataFrame(np.random.randn(24, 1), columns=['a'], index=index) + new_index = date_range(datetime(2015, 10, 2), + datetime(2015, 10, 2, 23), + freq='H', tz='US/Eastern') + + # TODO: unused? + result = df.set_index(new_index) # noqa + + self.assertEqual(new_index.freq, index.freq) + # Renaming def test_rename(self): diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index fe04d1005e003..19fa98afd2163 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -433,6 +433,15 @@ def test_applymap_box(self): 'd': ['Period', 'Period']}) tm.assert_frame_equal(res, exp) + def test_frame_apply_dont_convert_datetime64(self): + from pandas.tseries.offsets import BDay + df = DataFrame({'x1': [datetime(1996, 1, 1)]}) + + df = df.applymap(lambda x: x + BDay()) + df = df.applymap(lambda x: x + BDay()) + + self.assertTrue(df.x1.dtype == 'M8[ns]') + # See gh-12244 def test_apply_non_numpy_dtype(self): df = DataFrame({'dt': pd.date_range( diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py index 71b6500e7184a..1167662b69375 100644 --- a/pandas/tests/frame/test_combine_concat.py +++ b/pandas/tests/frame/test_combine_concat.py @@ -9,7 +9,7 @@ import pandas as pd -from pandas import DataFrame, Index, Series, Timestamp +from pandas import DataFrame, Index, Series, Timestamp, date_range from pandas.compat import lrange from pandas.tests.frame.common import TestData @@ -735,3 +735,17 @@ def test_combine_first_int(self): res = df1.combine_first(df2) tm.assert_frame_equal(res, df1) self.assertEqual(res['a'].dtype, 'int64') + + def test_concat_datetime_datetime64_frame(self): + # #2624 + rows = [] + rows.append([datetime(2010, 1, 1), 1]) + rows.append([datetime(2010, 1, 2), 'hi']) + + df2_obj = DataFrame.from_records(rows, columns=['date', 'test']) + + ind = date_range(start="2000/1/1", freq="D", periods=10) + df1 = DataFrame({'date': ind, 'test': lrange(10)}) + + # it works! + pd.concat([df1, df2_obj]) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 07cf6816330bc..fe6a12fcca28a 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1920,6 +1920,33 @@ def test_from_index(self): df2 = DataFrame(Series(idx2)) tm.assert_series_equal(df2[0], Series(idx2, name=0)) + def test_frame_dict_constructor_datetime64_1680(self): + dr = date_range('1/1/2012', periods=10) + s = Series(dr, index=dr) + + # it works! + DataFrame({'a': 'foo', 'b': s}, index=dr) + DataFrame({'a': 'foo', 'b': s.values}, index=dr) + + def test_frame_datetime64_mixed_index_ctor_1681(self): + dr = date_range('2011/1/1', '2012/1/1', freq='W-FRI') + ts = Series(dr) + + # it works! + d = DataFrame({'A': 'foo', 'B': ts}, index=dr) + self.assertTrue(d['B'].isnull().all()) + + def test_frame_timeseries_to_records(self): + index = date_range('1/1/2000', periods=10) + df = DataFrame(np.random.randn(10, 3), index=index, + columns=['a', 'b', 'c']) + + result = df.to_records() + result['index'].dtype == 'M8[ns]' + + result = df.to_records(index=False) + + if __name__ == '__main__': import nose # noqa diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index bc0a68f765903..7d68eac47766e 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -1945,6 +1945,21 @@ def test_reindex_methods(self): actual = df.reindex(target, method='nearest', tolerance=0.2) assert_frame_equal(expected, actual) + def test_reindex_frame_add_nat(self): + rng = date_range('1/1/2000 00:00:00', periods=10, freq='10s') + df = DataFrame({'A': np.random.randn(len(rng)), 'B': rng}) + + result = df.reindex(lrange(15)) + self.assertTrue(np.issubdtype(result['B'].dtype, np.dtype('M8[ns]'))) + + mask = com.isnull(result)['B'] + self.assertTrue(mask[-5:].all()) + self.assertFalse(mask[:-5].any()) + + def test_set_dataframe_column_ns_dtype(self): + x = DataFrame([datetime.now(), datetime.now()]) + self.assertEqual(x[0].dtype, np.dtype('M8[ns]')) + def test_non_monotonic_reindex_methods(self): dr = pd.date_range('2013-08-01', periods=6, freq='B') data = np.random.randn(6, 1) diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index 934aafc500611..9a9f0ee67fb89 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -2,7 +2,7 @@ from __future__ import print_function -from datetime import datetime +from datetime import datetime, time from numpy import nan from numpy.random import randn @@ -20,6 +20,7 @@ assertRaisesRegexp) import pandas.util.testing as tm +from pandas.compat import product from pandas.tests.frame.common import TestData @@ -418,6 +419,96 @@ def test_first_last_valid(self): self.assertIsNone(empty.last_valid_index()) self.assertIsNone(empty.first_valid_index()) + def test_at_time_frame(self): + rng = date_range('1/1/2000', '1/5/2000', freq='5min') + ts = DataFrame(np.random.randn(len(rng), 2), index=rng) + rs = ts.at_time(rng[1]) + self.assertTrue((rs.index.hour == rng[1].hour).all()) + self.assertTrue((rs.index.minute == rng[1].minute).all()) + self.assertTrue((rs.index.second == rng[1].second).all()) + + result = ts.at_time('9:30') + expected = ts.at_time(time(9, 30)) + assert_frame_equal(result, expected) + + result = ts.loc[time(9, 30)] + expected = ts.loc[(rng.hour == 9) & (rng.minute == 30)] + + assert_frame_equal(result, expected) + + # midnight, everything + rng = date_range('1/1/2000', '1/31/2000') + ts = DataFrame(np.random.randn(len(rng), 3), index=rng) + + result = ts.at_time(time(0, 0)) + assert_frame_equal(result, ts) + + # time doesn't exist + rng = date_range('1/1/2012', freq='23Min', periods=384) + ts = DataFrame(np.random.randn(len(rng), 2), rng) + rs = ts.at_time('16:00') + self.assertEqual(len(rs), 0) + + def test_between_time_frame(self): + rng = date_range('1/1/2000', '1/5/2000', freq='5min') + ts = DataFrame(np.random.randn(len(rng), 2), index=rng) + stime = time(0, 0) + etime = time(1, 0) + + close_open = product([True, False], [True, False]) + for inc_start, inc_end in close_open: + filtered = ts.between_time(stime, etime, inc_start, inc_end) + exp_len = 13 * 4 + 1 + if not inc_start: + exp_len -= 5 + if not inc_end: + exp_len -= 4 + + self.assertEqual(len(filtered), exp_len) + for rs in filtered.index: + t = rs.time() + if inc_start: + self.assertTrue(t >= stime) + else: + self.assertTrue(t > stime) + + if inc_end: + self.assertTrue(t <= etime) + else: + self.assertTrue(t < etime) + + result = ts.between_time('00:00', '01:00') + expected = ts.between_time(stime, etime) + assert_frame_equal(result, expected) + + # across midnight + rng = date_range('1/1/2000', '1/5/2000', freq='5min') + ts = DataFrame(np.random.randn(len(rng), 2), index=rng) + stime = time(22, 0) + etime = time(9, 0) + + close_open = product([True, False], [True, False]) + for inc_start, inc_end in close_open: + filtered = ts.between_time(stime, etime, inc_start, inc_end) + exp_len = (12 * 11 + 1) * 4 + 1 + if not inc_start: + exp_len -= 4 + if not inc_end: + exp_len -= 4 + + self.assertEqual(len(filtered), exp_len) + for rs in filtered.index: + t = rs.time() + if inc_start: + self.assertTrue((t >= stime) or (t <= etime)) + else: + self.assertTrue((t > stime) or (t <= etime)) + + if inc_end: + self.assertTrue((t <= etime) or (t >= stime)) + else: + self.assertTrue((t < etime) or (t >= stime)) + def test_operation_on_NaT(self): # Both NaT and Timestamp are in DataFrame. df = pd.DataFrame({'foo': [pd.NaT, pd.NaT, @@ -457,6 +548,34 @@ def test_datetime_assignment_with_NaT_and_diff_time_units(self): 'new': [1e9, None]}, dtype='datetime64[ns]') tm.assert_frame_equal(result, expected) + def test_frame_to_period(self): + K = 5 + from pandas.tseries.period import period_range + + dr = date_range('1/1/2000', '1/1/2001') + pr = period_range('1/1/2000', '1/1/2001') + df = DataFrame(randn(len(dr), K), index=dr) + df['mix'] = 'a' + + pts = df.to_period() + exp = df.copy() + exp.index = pr + assert_frame_equal(pts, exp) + + pts = df.to_period('M') + tm.assert_index_equal(pts.index, exp.index.asfreq('M')) + + df = df.T + pts = df.to_period(axis=1) + exp = df.copy() + exp.columns = pr + assert_frame_equal(pts, exp) + + pts = df.to_period('M', axis=1) + tm.assert_index_equal(pts.columns, exp.columns.asfreq('M')) + + self.assertRaises(ValueError, df.to_period, axis=2) + if __name__ == '__main__': import nose diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index ffb6025163a6b..bf61f5ef83859 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -4167,6 +4167,14 @@ def test_groupby_groups_datetimeindex_tz(self): result = df.groupby(level=0).sum() assert_frame_equal(result, expected) + def test_frame_datetime64_handling_groupby(self): + # it works! + df = DataFrame([(3, np.datetime64('2012-07-03')), + (3, np.datetime64('2012-07-04'))], + columns=['a', 'date']) + result = df.groupby('a').first() + self.assertEqual(result['date'][3], Timestamp('2012-07-03')) + def test_groupby_multi_timezone(self): # combining multiple / different timezones yields UTC diff --git a/pandas/tests/indexes/datetimes/test_astype.py b/pandas/tests/indexes/datetimes/test_astype.py index d452a7e1840d7..edb044a3cb2d7 100644 --- a/pandas/tests/indexes/datetimes/test_astype.py +++ b/pandas/tests/indexes/datetimes/test_astype.py @@ -4,7 +4,7 @@ import pandas as pd import pandas.util.testing as tm from pandas import (DatetimeIndex, date_range, Series, NaT, Index, Timestamp, - Int64Index) + Int64Index, Period) class TestDatetimeIndex(tm.TestCase): @@ -182,3 +182,131 @@ def _check_rng(rng): _check_rng(rng) _check_rng(rng_eastern) _check_rng(rng_utc) + + +class TestToPeriod(tm.TestCase): + _multiprocess_can_split_ = True + + def setUp(self): + data = [Timestamp('2007-01-01 10:11:12.123456Z'), + Timestamp('2007-01-01 10:11:13.789123Z')] + self.index = DatetimeIndex(data) + + def test_to_period_millisecond(self): + index = self.index + + period = index.to_period(freq='L') + self.assertEqual(2, len(period)) + self.assertEqual(period[0], Period('2007-01-01 10:11:12.123Z', 'L')) + self.assertEqual(period[1], Period('2007-01-01 10:11:13.789Z', 'L')) + + def test_to_period_microsecond(self): + index = self.index + + period = index.to_period(freq='U') + self.assertEqual(2, len(period)) + self.assertEqual(period[0], Period('2007-01-01 10:11:12.123456Z', 'U')) + self.assertEqual(period[1], Period('2007-01-01 10:11:13.789123Z', 'U')) + + def test_to_period_tz_pytz(self): + tm._skip_if_no_pytz() + from dateutil.tz import tzlocal + from pytz import utc as UTC + + xp = date_range('1/1/2000', '4/1/2000').to_period() + + ts = date_range('1/1/2000', '4/1/2000', tz='US/Eastern') + + result = ts.to_period()[0] + expected = ts[0].to_period() + + self.assertEqual(result, expected) + tm.assert_index_equal(ts.to_period(), xp) + + ts = date_range('1/1/2000', '4/1/2000', tz=UTC) + + result = ts.to_period()[0] + expected = ts[0].to_period() + + self.assertEqual(result, expected) + tm.assert_index_equal(ts.to_period(), xp) + + ts = date_range('1/1/2000', '4/1/2000', tz=tzlocal()) + + result = ts.to_period()[0] + expected = ts[0].to_period() + + self.assertEqual(result, expected) + tm.assert_index_equal(ts.to_period(), xp) + + def test_to_period_tz_explicit_pytz(self): + tm._skip_if_no_pytz() + import pytz + from dateutil.tz import tzlocal + + xp = date_range('1/1/2000', '4/1/2000').to_period() + + ts = date_range('1/1/2000', '4/1/2000', tz=pytz.timezone('US/Eastern')) + + result = ts.to_period()[0] + expected = ts[0].to_period() + + self.assertTrue(result == expected) + tm.assert_index_equal(ts.to_period(), xp) + + ts = date_range('1/1/2000', '4/1/2000', tz=pytz.utc) + + result = ts.to_period()[0] + expected = ts[0].to_period() + + self.assertTrue(result == expected) + tm.assert_index_equal(ts.to_period(), xp) + + ts = date_range('1/1/2000', '4/1/2000', tz=tzlocal()) + + result = ts.to_period()[0] + expected = ts[0].to_period() + + self.assertTrue(result == expected) + tm.assert_index_equal(ts.to_period(), xp) + + def test_to_period_tz_dateutil(self): + tm._skip_if_no_dateutil() + import dateutil + from dateutil.tz import tzlocal + + xp = date_range('1/1/2000', '4/1/2000').to_period() + + ts = date_range('1/1/2000', '4/1/2000', tz='dateutil/US/Eastern') + + result = ts.to_period()[0] + expected = ts[0].to_period() + + self.assertTrue(result == expected) + tm.assert_index_equal(ts.to_period(), xp) + + ts = date_range('1/1/2000', '4/1/2000', tz=dateutil.tz.tzutc()) + + result = ts.to_period()[0] + expected = ts[0].to_period() + + self.assertTrue(result == expected) + tm.assert_index_equal(ts.to_period(), xp) + + ts = date_range('1/1/2000', '4/1/2000', tz=tzlocal()) + + result = ts.to_period()[0] + expected = ts[0].to_period() + + self.assertTrue(result == expected) + tm.assert_index_equal(ts.to_period(), xp) + + def test_astype_object(self): + # NumPy 1.6.1 weak ns support + rng = date_range('1/1/2000', periods=20) + + casted = rng.astype('O') + exp_values = list(rng) + + tm.assert_index_equal(casted, Index(exp_values, dtype=np.object_)) + self.assertEqual(casted.tolist(), exp_values) diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index 03bc0e0c554b0..e54ebe3d93bc6 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -2,7 +2,7 @@ from datetime import timedelta import pandas as pd -from pandas import tslib +from pandas import tslib, offsets, lib import pandas.util.testing as tm from pandas.tslib import OutOfBoundsDatetime from pandas import (DatetimeIndex, Index, Timestamp, datetime, date_range, @@ -467,17 +467,6 @@ def test_dti_constructor_small_int(self): arr = np.array([0, 10, 20], dtype=dtype) tm.assert_index_equal(DatetimeIndex(arr), exp) - def test_dti_constructor_numpy_timeunits(self): - # GH 9114 - base = pd.to_datetime(['2000-01-01T00:00', '2000-01-02T00:00', 'NaT']) - - for dtype in ['datetime64[h]', 'datetime64[m]', 'datetime64[s]', - 'datetime64[ms]', 'datetime64[us]', 'datetime64[ns]']: - values = base.values.astype(dtype) - - tm.assert_index_equal(DatetimeIndex(values), base) - tm.assert_index_equal(to_datetime(values), base) - def test_ctor_str_intraday(self): rng = DatetimeIndex(['1-1-2000 00:00:01']) self.assertEqual(rng[0].second, 1) @@ -507,3 +496,80 @@ def test_constructor_int64_nocopy(self): arr[50:100] = -1 self.assertTrue((index.asi8[50:100] != -1).all()) + + def test_from_freq_recreate_from_data(self): + freqs = ['M', 'Q', 'A', 'D', 'B', 'BH', 'T', 'S', 'L', 'U', 'H', 'N', + 'C'] + + for f in freqs: + org = DatetimeIndex(start='2001/02/01 09:00', freq=f, periods=1) + idx = DatetimeIndex(org, freq=f) + tm.assert_index_equal(idx, org) + + org = DatetimeIndex(start='2001/02/01 09:00', freq=f, + tz='US/Pacific', periods=1) + idx = DatetimeIndex(org, freq=f, tz='US/Pacific') + tm.assert_index_equal(idx, org) + + def test_datetimeindex_constructor_misc(self): + arr = ['1/1/2005', '1/2/2005', 'Jn 3, 2005', '2005-01-04'] + self.assertRaises(Exception, DatetimeIndex, arr) + + arr = ['1/1/2005', '1/2/2005', '1/3/2005', '2005-01-04'] + idx1 = DatetimeIndex(arr) + + arr = [datetime(2005, 1, 1), '1/2/2005', '1/3/2005', '2005-01-04'] + idx2 = DatetimeIndex(arr) + + arr = [lib.Timestamp(datetime(2005, 1, 1)), '1/2/2005', '1/3/2005', + '2005-01-04'] + idx3 = DatetimeIndex(arr) + + arr = np.array(['1/1/2005', '1/2/2005', '1/3/2005', + '2005-01-04'], dtype='O') + idx4 = DatetimeIndex(arr) + + arr = to_datetime(['1/1/2005', '1/2/2005', '1/3/2005', '2005-01-04']) + idx5 = DatetimeIndex(arr) + + arr = to_datetime(['1/1/2005', '1/2/2005', 'Jan 3, 2005', '2005-01-04' + ]) + idx6 = DatetimeIndex(arr) + + idx7 = DatetimeIndex(['12/05/2007', '25/01/2008'], dayfirst=True) + idx8 = DatetimeIndex(['2007/05/12', '2008/01/25'], dayfirst=False, + yearfirst=True) + tm.assert_index_equal(idx7, idx8) + + for other in [idx2, idx3, idx4, idx5, idx6]: + self.assertTrue((idx1.values == other.values).all()) + + sdate = datetime(1999, 12, 25) + edate = datetime(2000, 1, 1) + idx = DatetimeIndex(start=sdate, freq='1B', periods=20) + self.assertEqual(len(idx), 20) + self.assertEqual(idx[0], sdate + 0 * offsets.BDay()) + self.assertEqual(idx.freq, 'B') + + idx = DatetimeIndex(end=edate, freq=('D', 5), periods=20) + self.assertEqual(len(idx), 20) + self.assertEqual(idx[-1], edate) + self.assertEqual(idx.freq, '5D') + + idx1 = DatetimeIndex(start=sdate, end=edate, freq='W-SUN') + idx2 = DatetimeIndex(start=sdate, end=edate, + freq=offsets.Week(weekday=6)) + self.assertEqual(len(idx1), len(idx2)) + self.assertEqual(idx1.offset, idx2.offset) + + idx1 = DatetimeIndex(start=sdate, end=edate, freq='QS') + idx2 = DatetimeIndex(start=sdate, end=edate, + freq=offsets.QuarterBegin(startingMonth=1)) + self.assertEqual(len(idx1), len(idx2)) + self.assertEqual(idx1.offset, idx2.offset) + + idx1 = DatetimeIndex(start=sdate, end=edate, freq='BQ') + idx2 = DatetimeIndex(start=sdate, end=edate, + freq=offsets.BQuarterEnd(startingMonth=12)) + self.assertEqual(len(idx1), len(idx2)) + self.assertEqual(idx1.offset, idx2.offset) diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index b3d6c41573ab8..b2161aa5c75c6 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -3,6 +3,7 @@ import pandas as pd import pandas.util.testing as tm from pandas import date_range, offsets, DatetimeIndex, Timestamp +from pandas import compat from pandas.tests.series.common import TestData @@ -110,3 +111,20 @@ def test_range_misspecified(self): self.assertRaises(ValueError, date_range, '1/1/2000', freq='H') self.assertRaises(ValueError, date_range, end='1/1/2000', freq='H') self.assertRaises(ValueError, date_range, periods=10, freq='H') + + def test_compat_replace(self): + # https://github.com/statsmodels/statsmodels/issues/3349 + # replace should take ints/longs for compat + + for f in [compat.long, int]: + result = date_range(Timestamp('1960-04-01 00:00:00', + freq='QS-JAN'), + periods=f(76), + freq='QS-JAN') + self.assertEqual(len(result), 76) + + def test_catch_infinite_loop(self): + offset = offsets.DateOffset(minute=5) + # blow up, don't loop forever + self.assertRaises(Exception, date_range, datetime(2011, 11, 11), + datetime(2011, 11, 12), freq=offset) diff --git a/pandas/tests/indexes/datetimes/test_datetimelike.py b/pandas/tests/indexes/datetimes/test_datetimelike.py index b32801a8bcf25..eea08febc86e6 100644 --- a/pandas/tests/indexes/datetimes/test_datetimelike.py +++ b/pandas/tests/indexes/datetimes/test_datetimelike.py @@ -7,6 +7,7 @@ from ..datetimelike import DatetimeLike + class TestDatetimeIndex(DatetimeLike, tm.TestCase): _holder = DatetimeIndex _multiprocess_can_split_ = True diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py index 92aad5a0b1997..dda2785d2b0ae 100644 --- a/pandas/tests/indexes/datetimes/test_misc.py +++ b/pandas/tests/indexes/datetimes/test_misc.py @@ -1,13 +1,9 @@ import numpy as np import pandas as pd -import pandas.lib as lib import pandas.util.testing as tm -from pandas import (Index, DatetimeIndex, datetime, offsets, to_datetime, - Series, DataFrame, Float64Index, date_range, - Timestamp, isnull) -from pandas import tslib -from pandas.util.testing import assert_series_equal +from pandas import (Index, DatetimeIndex, datetime, offsets, + Float64Index, date_range, Timestamp) class TestDateTimeIndexToJulianDate(tm.TestCase): @@ -144,63 +140,6 @@ def test_datetimeindex_integers_shift(self): expected = rng.shift(-5) tm.assert_index_equal(result, expected) - def test_string_na_nat_conversion(self): - # GH #999, #858 - - from pandas.compat import parse_date - - strings = np.array(['1/1/2000', '1/2/2000', np.nan, - '1/4/2000, 12:34:56'], dtype=object) - - expected = np.empty(4, dtype='M8[ns]') - for i, val in enumerate(strings): - if isnull(val): - expected[i] = tslib.iNaT - else: - expected[i] = parse_date(val) - - result = tslib.array_to_datetime(strings) - tm.assert_almost_equal(result, expected) - - result2 = to_datetime(strings) - tm.assertIsInstance(result2, DatetimeIndex) - tm.assert_numpy_array_equal(result, result2.values) - - malformed = np.array(['1/100/2000', np.nan], dtype=object) - - # GH 10636, default is now 'raise' - self.assertRaises(ValueError, - lambda: to_datetime(malformed, errors='raise')) - - result = to_datetime(malformed, errors='ignore') - tm.assert_numpy_array_equal(result, malformed) - - self.assertRaises(ValueError, to_datetime, malformed, errors='raise') - - idx = ['a', 'b', 'c', 'd', 'e'] - series = Series(['1/1/2000', np.nan, '1/3/2000', np.nan, - '1/5/2000'], index=idx, name='foo') - dseries = Series([to_datetime('1/1/2000'), np.nan, - to_datetime('1/3/2000'), np.nan, - to_datetime('1/5/2000')], index=idx, name='foo') - - result = to_datetime(series) - dresult = to_datetime(dseries) - - expected = Series(np.empty(5, dtype='M8[ns]'), index=idx) - for i in range(5): - x = series[i] - if isnull(x): - expected[i] = tslib.iNaT - else: - expected[i] = to_datetime(x) - - assert_series_equal(result, expected, check_names=False) - self.assertEqual(result.name, 'foo') - - assert_series_equal(dresult, expected, check_names=False) - self.assertEqual(dresult.name, 'foo') - def test_datetimeindex_repr_short(self): dr = date_range(start='1/1/2012', periods=1) repr(dr) @@ -211,84 +150,6 @@ def test_datetimeindex_repr_short(self): dr = date_range(start='1/1/2012', periods=3) repr(dr) - def test_getitem_setitem_datetimeindex(self): - N = 50 - # testing with timezone, GH #2785 - rng = date_range('1/1/1990', periods=N, freq='H', tz='US/Eastern') - ts = Series(np.random.randn(N), index=rng) - - result = ts["1990-01-01 04:00:00"] - expected = ts[4] - self.assertEqual(result, expected) - - result = ts.copy() - result["1990-01-01 04:00:00"] = 0 - result["1990-01-01 04:00:00"] = ts[4] - assert_series_equal(result, ts) - - result = ts["1990-01-01 04:00:00":"1990-01-01 07:00:00"] - expected = ts[4:8] - assert_series_equal(result, expected) - - result = ts.copy() - result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = 0 - result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = ts[4:8] - assert_series_equal(result, ts) - - lb = "1990-01-01 04:00:00" - rb = "1990-01-01 07:00:00" - result = ts[(ts.index >= lb) & (ts.index <= rb)] - expected = ts[4:8] - assert_series_equal(result, expected) - - # repeat all the above with naive datetimes - result = ts[datetime(1990, 1, 1, 4)] - expected = ts[4] - self.assertEqual(result, expected) - - result = ts.copy() - result[datetime(1990, 1, 1, 4)] = 0 - result[datetime(1990, 1, 1, 4)] = ts[4] - assert_series_equal(result, ts) - - result = ts[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)] - expected = ts[4:8] - assert_series_equal(result, expected) - - result = ts.copy() - result[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)] = 0 - result[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)] = ts[4:8] - assert_series_equal(result, ts) - - lb = datetime(1990, 1, 1, 4) - rb = datetime(1990, 1, 1, 7) - result = ts[(ts.index >= lb) & (ts.index <= rb)] - expected = ts[4:8] - assert_series_equal(result, expected) - - result = ts[ts.index[4]] - expected = ts[4] - self.assertEqual(result, expected) - - result = ts[ts.index[4:8]] - expected = ts[4:8] - assert_series_equal(result, expected) - - result = ts.copy() - result[ts.index[4:8]] = 0 - result[4:8] = ts[4:8] - assert_series_equal(result, ts) - - # also test partial date slicing - result = ts["1990-01-02"] - expected = ts[24:48] - assert_series_equal(result, expected) - - result = ts.copy() - result["1990-01-02"] = 0 - result["1990-01-02"] = ts[24:48] - assert_series_equal(result, ts) - def test_normalize(self): rng = date_range('1/1/2000 9:30', periods=10, freq='D') @@ -308,13 +169,6 @@ def test_normalize(self): self.assertTrue(result.is_normalized) self.assertFalse(rng.is_normalized) - def test_series_ctor_plus_datetimeindex(self): - rng = date_range('20090415', '20090519', freq='B') - data = dict((k, 1) for k in rng) - - result = Series(data, index=rng) - self.assertIs(result.index, rng) - class TestDatetime64(tm.TestCase): @@ -451,151 +305,8 @@ def test_datetimeindex_accessors(self): for ts, value in tests: self.assertEqual(ts, value) - def test_datetimeindex_diff(self): - dti1 = DatetimeIndex(freq='Q-JAN', start=datetime(1997, 12, 31), - periods=100) - dti2 = DatetimeIndex(freq='Q-JAN', start=datetime(1997, 12, 31), - periods=98) - self.assertEqual(len(dti1.difference(dti2)), 2) - def test_nanosecond_field(self): dti = DatetimeIndex(np.arange(10)) self.assert_numpy_array_equal(dti.nanosecond, np.arange(10, dtype=np.int32)) - - def test_datetimeindex_constructor(self): - arr = ['1/1/2005', '1/2/2005', 'Jn 3, 2005', '2005-01-04'] - self.assertRaises(Exception, DatetimeIndex, arr) - - arr = ['1/1/2005', '1/2/2005', '1/3/2005', '2005-01-04'] - idx1 = DatetimeIndex(arr) - - arr = [datetime(2005, 1, 1), '1/2/2005', '1/3/2005', '2005-01-04'] - idx2 = DatetimeIndex(arr) - - arr = [lib.Timestamp(datetime(2005, 1, 1)), '1/2/2005', '1/3/2005', - '2005-01-04'] - idx3 = DatetimeIndex(arr) - - arr = np.array(['1/1/2005', '1/2/2005', '1/3/2005', - '2005-01-04'], dtype='O') - idx4 = DatetimeIndex(arr) - - arr = to_datetime(['1/1/2005', '1/2/2005', '1/3/2005', '2005-01-04']) - idx5 = DatetimeIndex(arr) - - arr = to_datetime(['1/1/2005', '1/2/2005', 'Jan 3, 2005', '2005-01-04' - ]) - idx6 = DatetimeIndex(arr) - - idx7 = DatetimeIndex(['12/05/2007', '25/01/2008'], dayfirst=True) - idx8 = DatetimeIndex(['2007/05/12', '2008/01/25'], dayfirst=False, - yearfirst=True) - tm.assert_index_equal(idx7, idx8) - - for other in [idx2, idx3, idx4, idx5, idx6]: - self.assertTrue((idx1.values == other.values).all()) - - sdate = datetime(1999, 12, 25) - edate = datetime(2000, 1, 1) - idx = DatetimeIndex(start=sdate, freq='1B', periods=20) - self.assertEqual(len(idx), 20) - self.assertEqual(idx[0], sdate + 0 * offsets.BDay()) - self.assertEqual(idx.freq, 'B') - - idx = DatetimeIndex(end=edate, freq=('D', 5), periods=20) - self.assertEqual(len(idx), 20) - self.assertEqual(idx[-1], edate) - self.assertEqual(idx.freq, '5D') - - idx1 = DatetimeIndex(start=sdate, end=edate, freq='W-SUN') - idx2 = DatetimeIndex(start=sdate, end=edate, - freq=offsets.Week(weekday=6)) - self.assertEqual(len(idx1), len(idx2)) - self.assertEqual(idx1.offset, idx2.offset) - - idx1 = DatetimeIndex(start=sdate, end=edate, freq='QS') - idx2 = DatetimeIndex(start=sdate, end=edate, - freq=offsets.QuarterBegin(startingMonth=1)) - self.assertEqual(len(idx1), len(idx2)) - self.assertEqual(idx1.offset, idx2.offset) - - idx1 = DatetimeIndex(start=sdate, end=edate, freq='BQ') - idx2 = DatetimeIndex(start=sdate, end=edate, - freq=offsets.BQuarterEnd(startingMonth=12)) - self.assertEqual(len(idx1), len(idx2)) - self.assertEqual(idx1.offset, idx2.offset) - - def test_dayfirst(self): - # GH 5917 - arr = ['10/02/2014', '11/02/2014', '12/02/2014'] - expected = DatetimeIndex([datetime(2014, 2, 10), datetime(2014, 2, 11), - datetime(2014, 2, 12)]) - idx1 = DatetimeIndex(arr, dayfirst=True) - idx2 = DatetimeIndex(np.array(arr), dayfirst=True) - idx3 = to_datetime(arr, dayfirst=True) - idx4 = to_datetime(np.array(arr), dayfirst=True) - idx5 = DatetimeIndex(Index(arr), dayfirst=True) - idx6 = DatetimeIndex(Series(arr), dayfirst=True) - tm.assert_index_equal(expected, idx1) - tm.assert_index_equal(expected, idx2) - tm.assert_index_equal(expected, idx3) - tm.assert_index_equal(expected, idx4) - tm.assert_index_equal(expected, idx5) - tm.assert_index_equal(expected, idx6) - - def test_dti_set_index_reindex(self): - # GH 6631 - df = DataFrame(np.random.random(6)) - idx1 = date_range('2011/01/01', periods=6, freq='M', tz='US/Eastern') - idx2 = date_range('2013', periods=6, freq='A', tz='Asia/Tokyo') - - df = df.set_index(idx1) - tm.assert_index_equal(df.index, idx1) - df = df.reindex(idx2) - tm.assert_index_equal(df.index, idx2) - - # 11314 - # with tz - index = date_range(datetime(2015, 10, 1), - datetime(2015, 10, 1, 23), - freq='H', tz='US/Eastern') - df = DataFrame(np.random.randn(24, 1), columns=['a'], index=index) - new_index = date_range(datetime(2015, 10, 2), - datetime(2015, 10, 2, 23), - freq='H', tz='US/Eastern') - - # TODO: unused? - result = df.set_index(new_index) # noqa - - self.assertEqual(new_index.freq, index.freq) - - def test_datetimeindex_union_join_empty(self): - dti = DatetimeIndex(start='1/1/2001', end='2/1/2001', freq='D') - empty = Index([]) - - result = dti.union(empty) - tm.assertIsInstance(result, DatetimeIndex) - self.assertIs(result, result) - - result = dti.join(empty) - tm.assertIsInstance(result, DatetimeIndex) - - -class TestTimeSeriesDuplicates(tm.TestCase): - _multiprocess_can_split_ = True - - def test_recreate_from_data(self): - freqs = ['M', 'Q', 'A', 'D', 'B', 'BH', 'T', 'S', 'L', 'U', 'H', 'N', - 'C'] - - for f in freqs: - org = DatetimeIndex(start='2001/02/01 09:00', freq=f, periods=1) - idx = DatetimeIndex(org, freq=f) - tm.assert_index_equal(idx, org) - - org = DatetimeIndex(start='2001/02/01 09:00', freq=f, - tz='US/Pacific', periods=1) - idx = DatetimeIndex(org, freq=f, tz='US/Pacific') - tm.assert_index_equal(idx, org) diff --git a/pandas/tests/indexes/datetimes/test_partial_slcing.py b/pandas/tests/indexes/datetimes/test_partial_slcing.py new file mode 100644 index 0000000000000..a960f5cf9235a --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_partial_slcing.py @@ -0,0 +1,256 @@ +""" test partial slicing on Series/Frame """ +from datetime import datetime +import numpy as np +import pandas as pd + +from pandas import (DatetimeIndex, Series, DataFrame, + date_range, Index, Timedelta, Timestamp) +from pandas.util import testing as tm + + +class TestSlicing(tm.TestCase): + + def test_slice_year(self): + dti = DatetimeIndex(freq='B', start=datetime(2005, 1, 1), periods=500) + + s = Series(np.arange(len(dti)), index=dti) + result = s['2005'] + expected = s[s.index.year == 2005] + tm.assert_series_equal(result, expected) + + df = DataFrame(np.random.rand(len(dti), 5), index=dti) + result = df.loc['2005'] + expected = df[df.index.year == 2005] + tm.assert_frame_equal(result, expected) + + rng = date_range('1/1/2000', '1/1/2010') + + result = rng.get_loc('2009') + expected = slice(3288, 3653) + self.assertEqual(result, expected) + + def test_slice_quarter(self): + dti = DatetimeIndex(freq='D', start=datetime(2000, 6, 1), periods=500) + + s = Series(np.arange(len(dti)), index=dti) + self.assertEqual(len(s['2001Q1']), 90) + + df = DataFrame(np.random.rand(len(dti), 5), index=dti) + self.assertEqual(len(df.loc['1Q01']), 90) + + def test_slice_month(self): + dti = DatetimeIndex(freq='D', start=datetime(2005, 1, 1), periods=500) + s = Series(np.arange(len(dti)), index=dti) + self.assertEqual(len(s['2005-11']), 30) + + df = DataFrame(np.random.rand(len(dti), 5), index=dti) + self.assertEqual(len(df.loc['2005-11']), 30) + + tm.assert_series_equal(s['2005-11'], s['11-2005']) + + def test_partial_slice(self): + rng = DatetimeIndex(freq='D', start=datetime(2005, 1, 1), periods=500) + s = Series(np.arange(len(rng)), index=rng) + + result = s['2005-05':'2006-02'] + expected = s['20050501':'20060228'] + tm.assert_series_equal(result, expected) + + result = s['2005-05':] + expected = s['20050501':] + tm.assert_series_equal(result, expected) + + result = s[:'2006-02'] + expected = s[:'20060228'] + tm.assert_series_equal(result, expected) + + result = s['2005-1-1'] + self.assertEqual(result, s.iloc[0]) + + self.assertRaises(Exception, s.__getitem__, '2004-12-31') + + def test_partial_slice_daily(self): + rng = DatetimeIndex(freq='H', start=datetime(2005, 1, 31), periods=500) + s = Series(np.arange(len(rng)), index=rng) + + result = s['2005-1-31'] + tm.assert_series_equal(result, s.iloc[:24]) + + self.assertRaises(Exception, s.__getitem__, '2004-12-31 00') + + def test_partial_slice_hourly(self): + rng = DatetimeIndex(freq='T', start=datetime(2005, 1, 1, 20, 0, 0), + periods=500) + s = Series(np.arange(len(rng)), index=rng) + + result = s['2005-1-1'] + tm.assert_series_equal(result, s.iloc[:60 * 4]) + + result = s['2005-1-1 20'] + tm.assert_series_equal(result, s.iloc[:60]) + + self.assertEqual(s['2005-1-1 20:00'], s.iloc[0]) + self.assertRaises(Exception, s.__getitem__, '2004-12-31 00:15') + + def test_partial_slice_minutely(self): + rng = DatetimeIndex(freq='S', start=datetime(2005, 1, 1, 23, 59, 0), + periods=500) + s = Series(np.arange(len(rng)), index=rng) + + result = s['2005-1-1 23:59'] + tm.assert_series_equal(result, s.iloc[:60]) + + result = s['2005-1-1'] + tm.assert_series_equal(result, s.iloc[:60]) + + self.assertEqual(s[Timestamp('2005-1-1 23:59:00')], s.iloc[0]) + self.assertRaises(Exception, s.__getitem__, '2004-12-31 00:00:00') + + def test_partial_slice_second_precision(self): + rng = DatetimeIndex(start=datetime(2005, 1, 1, 0, 0, 59, + microsecond=999990), + periods=20, freq='US') + s = Series(np.arange(20), rng) + + tm.assert_series_equal(s['2005-1-1 00:00'], s.iloc[:10]) + tm.assert_series_equal(s['2005-1-1 00:00:59'], s.iloc[:10]) + + tm.assert_series_equal(s['2005-1-1 00:01'], s.iloc[10:]) + tm.assert_series_equal(s['2005-1-1 00:01:00'], s.iloc[10:]) + + self.assertEqual(s[Timestamp('2005-1-1 00:00:59.999990')], s.iloc[0]) + self.assertRaisesRegexp(KeyError, '2005-1-1 00:00:00', + lambda: s['2005-1-1 00:00:00']) + + def test_partial_slicing_dataframe(self): + # GH14856 + # Test various combinations of string slicing resolution vs. + # index resolution + # - If string resolution is less precise than index resolution, + # string is considered a slice + # - If string resolution is equal to or more precise than index + # resolution, string is considered an exact match + formats = ['%Y', '%Y-%m', '%Y-%m-%d', '%Y-%m-%d %H', + '%Y-%m-%d %H:%M', '%Y-%m-%d %H:%M:%S'] + resolutions = ['year', 'month', 'day', 'hour', 'minute', 'second'] + for rnum, resolution in enumerate(resolutions[2:], 2): + # we check only 'day', 'hour', 'minute' and 'second' + unit = Timedelta("1 " + resolution) + middate = datetime(2012, 1, 1, 0, 0, 0) + index = DatetimeIndex([middate - unit, + middate, middate + unit]) + values = [1, 2, 3] + df = DataFrame({'a': values}, index, dtype=np.int64) + self.assertEqual(df.index.resolution, resolution) + + # Timestamp with the same resolution as index + # Should be exact match for Series (return scalar) + # and raise KeyError for Frame + for timestamp, expected in zip(index, values): + ts_string = timestamp.strftime(formats[rnum]) + # make ts_string as precise as index + result = df['a'][ts_string] + self.assertIsInstance(result, np.int64) + self.assertEqual(result, expected) + self.assertRaises(KeyError, df.__getitem__, ts_string) + + # Timestamp with resolution less precise than index + for fmt in formats[:rnum]: + for element, theslice in [[0, slice(None, 1)], + [1, slice(1, None)]]: + ts_string = index[element].strftime(fmt) + + # Series should return slice + result = df['a'][ts_string] + expected = df['a'][theslice] + tm.assert_series_equal(result, expected) + + # Frame should return slice as well + result = df[ts_string] + expected = df[theslice] + tm.assert_frame_equal(result, expected) + + # Timestamp with resolution more precise than index + # Compatible with existing key + # Should return scalar for Series + # and raise KeyError for Frame + for fmt in formats[rnum + 1:]: + ts_string = index[1].strftime(fmt) + result = df['a'][ts_string] + self.assertIsInstance(result, np.int64) + self.assertEqual(result, 2) + self.assertRaises(KeyError, df.__getitem__, ts_string) + + # Not compatible with existing key + # Should raise KeyError + for fmt, res in list(zip(formats, resolutions))[rnum + 1:]: + ts = index[1] + Timedelta("1 " + res) + ts_string = ts.strftime(fmt) + self.assertRaises(KeyError, df['a'].__getitem__, ts_string) + self.assertRaises(KeyError, df.__getitem__, ts_string) + + def test_partial_slicing_with_multiindex(self): + + # GH 4758 + # partial string indexing with a multi-index buggy + df = DataFrame({'ACCOUNT': ["ACCT1", "ACCT1", "ACCT1", "ACCT2"], + 'TICKER': ["ABC", "MNP", "XYZ", "XYZ"], + 'val': [1, 2, 3, 4]}, + index=date_range("2013-06-19 09:30:00", + periods=4, freq='5T')) + df_multi = df.set_index(['ACCOUNT', 'TICKER'], append=True) + + expected = DataFrame([ + [1] + ], index=Index(['ABC'], name='TICKER'), columns=['val']) + result = df_multi.loc[('2013-06-19 09:30:00', 'ACCT1')] + tm.assert_frame_equal(result, expected) + + expected = df_multi.loc[ + (pd.Timestamp('2013-06-19 09:30:00', tz=None), 'ACCT1', 'ABC')] + result = df_multi.loc[('2013-06-19 09:30:00', 'ACCT1', 'ABC')] + tm.assert_series_equal(result, expected) + + # this is a KeyError as we don't do partial string selection on + # multi-levels + def f(): + df_multi.loc[('2013-06-19', 'ACCT1', 'ABC')] + + self.assertRaises(KeyError, f) + + # GH 4294 + # partial slice on a series mi + s = pd.DataFrame(np.random.rand(1000, 1000), index=pd.date_range( + '2000-1-1', periods=1000)).stack() + + s2 = s[:-1].copy() + expected = s2['2000-1-4'] + result = s2[pd.Timestamp('2000-1-4')] + tm.assert_series_equal(result, expected) + + result = s[pd.Timestamp('2000-1-4')] + expected = s['2000-1-4'] + tm.assert_series_equal(result, expected) + + df2 = pd.DataFrame(s) + expected = df2.xs('2000-1-4') + result = df2.loc[pd.Timestamp('2000-1-4')] + tm.assert_frame_equal(result, expected) + + def test_partial_slice_doesnt_require_monotonicity(self): + # For historical reasons. + s = pd.Series(np.arange(10), pd.date_range('2014-01-01', periods=10)) + + nonmonotonic = s[[3, 5, 4]] + expected = nonmonotonic.iloc[:0] + timestamp = pd.Timestamp('2014-01-10') + + tm.assert_series_equal(nonmonotonic['2014-01-10':], expected) + self.assertRaisesRegexp(KeyError, + r"Timestamp\('2014-01-10 00:00:00'\)", + lambda: nonmonotonic[timestamp:]) + + tm.assert_series_equal(nonmonotonic.loc['2014-01-10':], expected) + self.assertRaisesRegexp(KeyError, + r"Timestamp\('2014-01-10 00:00:00'\)", + lambda: nonmonotonic.loc[timestamp:]) diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index ba6beb03c7f24..229ae803aa2ff 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -1,9 +1,11 @@ +from datetime import datetime + import numpy as np import pandas as pd import pandas.util.testing as tm from pandas import (DatetimeIndex, date_range, Series, bdate_range, DataFrame, - Int64Index) + Int64Index, Index) class TestDatetimeIndex(tm.TestCase): @@ -166,3 +168,21 @@ def test_difference_freq(self): expected = DatetimeIndex(["20160920", "20160921"], freq=None) tm.assert_index_equal(idx_diff, expected) tm.assert_attr_equal('freq', idx_diff, expected) + + def test_datetimeindex_diff(self): + dti1 = DatetimeIndex(freq='Q-JAN', start=datetime(1997, 12, 31), + periods=100) + dti2 = DatetimeIndex(freq='Q-JAN', start=datetime(1997, 12, 31), + periods=98) + self.assertEqual(len(dti1.difference(dti2)), 2) + + def test_datetimeindex_union_join_empty(self): + dti = DatetimeIndex(start='1/1/2001', end='2/1/2001', freq='D') + empty = Index([]) + + result = dti.union(empty) + tm.assertIsInstance(result, DatetimeIndex) + self.assertIs(result, result) + + result = dti.join(empty) + tm.assertIsInstance(result, DatetimeIndex) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py new file mode 100644 index 0000000000000..42d135f634298 --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -0,0 +1,1019 @@ +""" test to_datetime """ + +import nose + +import sys +import calendar +import locale +from datetime import datetime + +import numpy as np +from pandas.types.common import is_datetime64_ns_dtype +from pandas import (isnull, to_datetime, Timestamp, Series, DataFrame, + Index, DatetimeIndex, NaT, date_range, bdate_range) +from pandas import tslib +from pandas.compat import lmap +import pandas as pd +from pandas.tseries import tools +from pandas.util import testing as tm +from pandas.util.testing import assert_series_equal + + +class TimeConversionFormats(tm.TestCase): + + def test_to_datetime_format(self): + values = ['1/1/2000', '1/2/2000', '1/3/2000'] + + results1 = [Timestamp('20000101'), Timestamp('20000201'), + Timestamp('20000301')] + results2 = [Timestamp('20000101'), Timestamp('20000102'), + Timestamp('20000103')] + for vals, expecteds in [(values, (Index(results1), Index(results2))), + (Series(values), + (Series(results1), Series(results2))), + (values[0], (results1[0], results2[0])), + (values[1], (results1[1], results2[1])), + (values[2], (results1[2], results2[2]))]: + + for i, fmt in enumerate(['%d/%m/%Y', '%m/%d/%Y']): + result = to_datetime(vals, format=fmt) + expected = expecteds[i] + + if isinstance(expected, Series): + assert_series_equal(result, Series(expected)) + elif isinstance(expected, Timestamp): + self.assertEqual(result, expected) + else: + tm.assert_index_equal(result, expected) + + def test_to_datetime_format_YYYYMMDD(self): + s = Series([19801222, 19801222] + [19810105] * 5) + expected = Series([Timestamp(x) for x in s.apply(str)]) + + result = to_datetime(s, format='%Y%m%d') + assert_series_equal(result, expected) + + result = to_datetime(s.apply(str), format='%Y%m%d') + assert_series_equal(result, expected) + + # with NaT + expected = Series([Timestamp("19801222"), Timestamp("19801222")] + + [Timestamp("19810105")] * 5) + expected[2] = np.nan + s[2] = np.nan + + result = to_datetime(s, format='%Y%m%d') + assert_series_equal(result, expected) + + # string with NaT + s = s.apply(str) + s[2] = 'nat' + result = to_datetime(s, format='%Y%m%d') + assert_series_equal(result, expected) + + # coercion + # GH 7930 + s = Series([20121231, 20141231, 99991231]) + result = pd.to_datetime(s, format='%Y%m%d', errors='ignore') + expected = Series([datetime(2012, 12, 31), + datetime(2014, 12, 31), datetime(9999, 12, 31)], + dtype=object) + self.assert_series_equal(result, expected) + + result = pd.to_datetime(s, format='%Y%m%d', errors='coerce') + expected = Series(['20121231', '20141231', 'NaT'], dtype='M8[ns]') + assert_series_equal(result, expected) + + # GH 10178 + def test_to_datetime_format_integer(self): + s = Series([2000, 2001, 2002]) + expected = Series([Timestamp(x) for x in s.apply(str)]) + + result = to_datetime(s, format='%Y') + assert_series_equal(result, expected) + + s = Series([200001, 200105, 200206]) + expected = Series([Timestamp(x[:4] + '-' + x[4:]) for x in s.apply(str) + ]) + + result = to_datetime(s, format='%Y%m') + assert_series_equal(result, expected) + + def test_to_datetime_format_microsecond(self): + + # these are locale dependent + lang, _ = locale.getlocale() + month_abbr = calendar.month_abbr[4] + val = '01-{}-2011 00:00:01.978'.format(month_abbr) + + format = '%d-%b-%Y %H:%M:%S.%f' + result = to_datetime(val, format=format) + exp = datetime.strptime(val, format) + self.assertEqual(result, exp) + + def test_to_datetime_format_time(self): + data = [ + ['01/10/2010 15:20', '%m/%d/%Y %H:%M', + Timestamp('2010-01-10 15:20')], + ['01/10/2010 05:43', '%m/%d/%Y %I:%M', + Timestamp('2010-01-10 05:43')], + ['01/10/2010 13:56:01', '%m/%d/%Y %H:%M:%S', + Timestamp('2010-01-10 13:56:01')] # , + # ['01/10/2010 08:14 PM', '%m/%d/%Y %I:%M %p', + # Timestamp('2010-01-10 20:14')], + # ['01/10/2010 07:40 AM', '%m/%d/%Y %I:%M %p', + # Timestamp('2010-01-10 07:40')], + # ['01/10/2010 09:12:56 AM', '%m/%d/%Y %I:%M:%S %p', + # Timestamp('2010-01-10 09:12:56')] + ] + for s, format, dt in data: + self.assertEqual(to_datetime(s, format=format), dt) + + def test_to_datetime_with_non_exact(self): + # GH 10834 + tm._skip_if_has_locale() + + # 8904 + # exact kw + if sys.version_info < (2, 7): + raise nose.SkipTest('on python version < 2.7') + + s = Series(['19MAY11', 'foobar19MAY11', '19MAY11:00:00:00', + '19MAY11 00:00:00Z']) + result = to_datetime(s, format='%d%b%y', exact=False) + expected = to_datetime(s.str.extract(r'(\d+\w+\d+)', expand=False), + format='%d%b%y') + assert_series_equal(result, expected) + + def test_parse_nanoseconds_with_formula(self): + + # GH8989 + # trunctaing the nanoseconds when a format was provided + for v in ["2012-01-01 09:00:00.000000001", + "2012-01-01 09:00:00.000001", + "2012-01-01 09:00:00.001", + "2012-01-01 09:00:00.001000", + "2012-01-01 09:00:00.001000000", ]: + expected = pd.to_datetime(v) + result = pd.to_datetime(v, format="%Y-%m-%d %H:%M:%S.%f") + self.assertEqual(result, expected) + + def test_to_datetime_format_weeks(self): + data = [ + ['2009324', '%Y%W%w', Timestamp('2009-08-13')], + ['2013020', '%Y%U%w', Timestamp('2013-01-13')] + ] + for s, format, dt in data: + self.assertEqual(to_datetime(s, format=format), dt) + + +class TestToDatetime(tm.TestCase): + _multiprocess_can_split_ = True + + def test_to_datetime_dt64s(self): + in_bound_dts = [ + np.datetime64('2000-01-01'), + np.datetime64('2000-01-02'), + ] + + for dt in in_bound_dts: + self.assertEqual(pd.to_datetime(dt), Timestamp(dt)) + + oob_dts = [np.datetime64('1000-01-01'), np.datetime64('5000-01-02'), ] + + for dt in oob_dts: + self.assertRaises(ValueError, pd.to_datetime, dt, errors='raise') + self.assertRaises(ValueError, Timestamp, dt) + self.assertIs(pd.to_datetime(dt, errors='coerce'), NaT) + + def test_to_datetime_array_of_dt64s(self): + dts = [np.datetime64('2000-01-01'), np.datetime64('2000-01-02'), ] + + # Assuming all datetimes are in bounds, to_datetime() returns + # an array that is equal to Timestamp() parsing + self.assert_numpy_array_equal( + pd.to_datetime(dts, box=False), + np.array([Timestamp(x).asm8 for x in dts]) + ) + + # A list of datetimes where the last one is out of bounds + dts_with_oob = dts + [np.datetime64('9999-01-01')] + + self.assertRaises(ValueError, pd.to_datetime, dts_with_oob, + errors='raise') + + self.assert_numpy_array_equal( + pd.to_datetime(dts_with_oob, box=False, errors='coerce'), + np.array( + [ + Timestamp(dts_with_oob[0]).asm8, + Timestamp(dts_with_oob[1]).asm8, + tslib.iNaT, + ], + dtype='M8' + ) + ) + + # With errors='ignore', out of bounds datetime64s + # are converted to their .item(), which depending on the version of + # numpy is either a python datetime.datetime or datetime.date + self.assert_numpy_array_equal( + pd.to_datetime(dts_with_oob, box=False, errors='ignore'), + np.array( + [dt.item() for dt in dts_with_oob], + dtype='O' + ) + ) + + def test_to_datetime_tz(self): + + # xref 8260 + # uniform returns a DatetimeIndex + arr = [pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'), + pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Pacific')] + result = pd.to_datetime(arr) + expected = DatetimeIndex( + ['2013-01-01 13:00:00', '2013-01-02 14:00:00'], tz='US/Pacific') + tm.assert_index_equal(result, expected) + + # mixed tzs will raise + arr = [pd.Timestamp('2013-01-01 13:00:00', tz='US/Pacific'), + pd.Timestamp('2013-01-02 14:00:00', tz='US/Eastern')] + self.assertRaises(ValueError, lambda: pd.to_datetime(arr)) + + def test_to_datetime_tz_pytz(self): + + # xref 8260 + tm._skip_if_no_pytz() + import pytz + + us_eastern = pytz.timezone('US/Eastern') + arr = np.array([us_eastern.localize(datetime(year=2000, month=1, day=1, + hour=3, minute=0)), + us_eastern.localize(datetime(year=2000, month=6, day=1, + hour=3, minute=0))], + dtype=object) + result = pd.to_datetime(arr, utc=True) + expected = DatetimeIndex(['2000-01-01 08:00:00+00:00', + '2000-06-01 07:00:00+00:00'], + dtype='datetime64[ns, UTC]', freq=None) + tm.assert_index_equal(result, expected) + + def test_to_datetime_utc_is_true(self): + # See gh-11934 + start = pd.Timestamp('2014-01-01', tz='utc') + end = pd.Timestamp('2014-01-03', tz='utc') + date_range = pd.bdate_range(start, end) + + result = pd.to_datetime(date_range, utc=True) + expected = pd.DatetimeIndex(data=date_range) + tm.assert_index_equal(result, expected) + + def test_to_datetime_tz_psycopg2(self): + + # xref 8260 + try: + import psycopg2 + except ImportError: + raise nose.SkipTest("no psycopg2 installed") + + # misc cases + tz1 = psycopg2.tz.FixedOffsetTimezone(offset=-300, name=None) + tz2 = psycopg2.tz.FixedOffsetTimezone(offset=-240, name=None) + arr = np.array([datetime(2000, 1, 1, 3, 0, tzinfo=tz1), + datetime(2000, 6, 1, 3, 0, tzinfo=tz2)], + dtype=object) + + result = pd.to_datetime(arr, errors='coerce', utc=True) + expected = DatetimeIndex(['2000-01-01 08:00:00+00:00', + '2000-06-01 07:00:00+00:00'], + dtype='datetime64[ns, UTC]', freq=None) + tm.assert_index_equal(result, expected) + + # dtype coercion + i = pd.DatetimeIndex([ + '2000-01-01 08:00:00+00:00' + ], tz=psycopg2.tz.FixedOffsetTimezone(offset=-300, name=None)) + self.assertTrue(is_datetime64_ns_dtype(i)) + + # tz coerceion + result = pd.to_datetime(i, errors='coerce') + tm.assert_index_equal(result, i) + + result = pd.to_datetime(i, errors='coerce', utc=True) + expected = pd.DatetimeIndex(['2000-01-01 13:00:00'], + dtype='datetime64[ns, UTC]') + tm.assert_index_equal(result, expected) + + def test_datetime_bool(self): + # GH13176 + with self.assertRaises(TypeError): + to_datetime(False) + self.assertTrue(to_datetime(False, errors="coerce") is NaT) + self.assertEqual(to_datetime(False, errors="ignore"), False) + with self.assertRaises(TypeError): + to_datetime(True) + self.assertTrue(to_datetime(True, errors="coerce") is NaT) + self.assertEqual(to_datetime(True, errors="ignore"), True) + with self.assertRaises(TypeError): + to_datetime([False, datetime.today()]) + with self.assertRaises(TypeError): + to_datetime(['20130101', True]) + tm.assert_index_equal(to_datetime([0, False, NaT, 0.0], + errors="coerce"), + DatetimeIndex([to_datetime(0), NaT, + NaT, to_datetime(0)])) + + def test_datetime_invalid_datatype(self): + # GH13176 + + with self.assertRaises(TypeError): + pd.to_datetime(bool) + with self.assertRaises(TypeError): + pd.to_datetime(pd.to_datetime) + + +class ToDatetimeUnit(tm.TestCase): + + def test_unit(self): + # GH 11758 + # test proper behavior with erros + + with self.assertRaises(ValueError): + to_datetime([1], unit='D', format='%Y%m%d') + + values = [11111111, 1, 1.0, tslib.iNaT, NaT, np.nan, + 'NaT', ''] + result = to_datetime(values, unit='D', errors='ignore') + expected = Index([11111111, Timestamp('1970-01-02'), + Timestamp('1970-01-02'), NaT, + NaT, NaT, NaT, NaT], + dtype=object) + tm.assert_index_equal(result, expected) + + result = to_datetime(values, unit='D', errors='coerce') + expected = DatetimeIndex(['NaT', '1970-01-02', '1970-01-02', + 'NaT', 'NaT', 'NaT', 'NaT', 'NaT']) + tm.assert_index_equal(result, expected) + + with self.assertRaises(tslib.OutOfBoundsDatetime): + to_datetime(values, unit='D', errors='raise') + + values = [1420043460000, tslib.iNaT, NaT, np.nan, 'NaT'] + + result = to_datetime(values, errors='ignore', unit='s') + expected = Index([1420043460000, NaT, NaT, + NaT, NaT], dtype=object) + tm.assert_index_equal(result, expected) + + result = to_datetime(values, errors='coerce', unit='s') + expected = DatetimeIndex(['NaT', 'NaT', 'NaT', 'NaT', 'NaT']) + tm.assert_index_equal(result, expected) + + with self.assertRaises(tslib.OutOfBoundsDatetime): + to_datetime(values, errors='raise', unit='s') + + # if we have a string, then we raise a ValueError + # and NOT an OutOfBoundsDatetime + for val in ['foo', Timestamp('20130101')]: + try: + to_datetime(val, errors='raise', unit='s') + except tslib.OutOfBoundsDatetime: + raise AssertionError("incorrect exception raised") + except ValueError: + pass + + def test_unit_consistency(self): + + # consistency of conversions + expected = Timestamp('1970-05-09 14:25:11') + result = pd.to_datetime(11111111, unit='s', errors='raise') + self.assertEqual(result, expected) + self.assertIsInstance(result, Timestamp) + + result = pd.to_datetime(11111111, unit='s', errors='coerce') + self.assertEqual(result, expected) + self.assertIsInstance(result, Timestamp) + + result = pd.to_datetime(11111111, unit='s', errors='ignore') + self.assertEqual(result, expected) + self.assertIsInstance(result, Timestamp) + + def test_unit_with_numeric(self): + + # GH 13180 + # coercions from floats/ints are ok + expected = DatetimeIndex(['2015-06-19 05:33:20', + '2015-05-27 22:33:20']) + arr1 = [1.434692e+18, 1.432766e+18] + arr2 = np.array(arr1).astype('int64') + for errors in ['ignore', 'raise', 'coerce']: + result = pd.to_datetime(arr1, errors=errors) + tm.assert_index_equal(result, expected) + + result = pd.to_datetime(arr2, errors=errors) + tm.assert_index_equal(result, expected) + + # but we want to make sure that we are coercing + # if we have ints/strings + expected = DatetimeIndex(['NaT', + '2015-06-19 05:33:20', + '2015-05-27 22:33:20']) + arr = ['foo', 1.434692e+18, 1.432766e+18] + result = pd.to_datetime(arr, errors='coerce') + tm.assert_index_equal(result, expected) + + expected = DatetimeIndex(['2015-06-19 05:33:20', + '2015-05-27 22:33:20', + 'NaT', + 'NaT']) + arr = [1.434692e+18, 1.432766e+18, 'foo', 'NaT'] + result = pd.to_datetime(arr, errors='coerce') + tm.assert_index_equal(result, expected) + + def test_unit_mixed(self): + + # mixed integers/datetimes + expected = DatetimeIndex(['2013-01-01', 'NaT', 'NaT']) + arr = [pd.Timestamp('20130101'), 1.434692e+18, 1.432766e+18] + result = pd.to_datetime(arr, errors='coerce') + tm.assert_index_equal(result, expected) + + with self.assertRaises(ValueError): + pd.to_datetime(arr, errors='raise') + + expected = DatetimeIndex(['NaT', + 'NaT', + '2013-01-01']) + arr = [1.434692e+18, 1.432766e+18, pd.Timestamp('20130101')] + result = pd.to_datetime(arr, errors='coerce') + tm.assert_index_equal(result, expected) + + with self.assertRaises(ValueError): + pd.to_datetime(arr, errors='raise') + + def test_dataframe(self): + + df = DataFrame({'year': [2015, 2016], + 'month': [2, 3], + 'day': [4, 5], + 'hour': [6, 7], + 'minute': [58, 59], + 'second': [10, 11], + 'ms': [1, 1], + 'us': [2, 2], + 'ns': [3, 3]}) + + result = to_datetime({'year': df['year'], + 'month': df['month'], + 'day': df['day']}) + expected = Series([Timestamp('20150204 00:00:00'), + Timestamp('20160305 00:0:00')]) + assert_series_equal(result, expected) + + # dict-like + result = to_datetime(df[['year', 'month', 'day']].to_dict()) + assert_series_equal(result, expected) + + # dict but with constructable + df2 = df[['year', 'month', 'day']].to_dict() + df2['month'] = 2 + result = to_datetime(df2) + expected2 = Series([Timestamp('20150204 00:00:00'), + Timestamp('20160205 00:0:00')]) + assert_series_equal(result, expected2) + + # unit mappings + units = [{'year': 'years', + 'month': 'months', + 'day': 'days', + 'hour': 'hours', + 'minute': 'minutes', + 'second': 'seconds'}, + {'year': 'year', + 'month': 'month', + 'day': 'day', + 'hour': 'hour', + 'minute': 'minute', + 'second': 'second'}, + ] + + for d in units: + result = to_datetime(df[list(d.keys())].rename(columns=d)) + expected = Series([Timestamp('20150204 06:58:10'), + Timestamp('20160305 07:59:11')]) + assert_series_equal(result, expected) + + d = {'year': 'year', + 'month': 'month', + 'day': 'day', + 'hour': 'hour', + 'minute': 'minute', + 'second': 'second', + 'ms': 'ms', + 'us': 'us', + 'ns': 'ns'} + + result = to_datetime(df.rename(columns=d)) + expected = Series([Timestamp('20150204 06:58:10.001002003'), + Timestamp('20160305 07:59:11.001002003')]) + assert_series_equal(result, expected) + + # coerce back to int + result = to_datetime(df.astype(str)) + assert_series_equal(result, expected) + + # passing coerce + df2 = DataFrame({'year': [2015, 2016], + 'month': [2, 20], + 'day': [4, 5]}) + with self.assertRaises(ValueError): + to_datetime(df2) + result = to_datetime(df2, errors='coerce') + expected = Series([Timestamp('20150204 00:00:00'), + NaT]) + assert_series_equal(result, expected) + + # extra columns + with self.assertRaises(ValueError): + df2 = df.copy() + df2['foo'] = 1 + to_datetime(df2) + + # not enough + for c in [['year'], + ['year', 'month'], + ['year', 'month', 'second'], + ['month', 'day'], + ['year', 'day', 'second']]: + with self.assertRaises(ValueError): + to_datetime(df[c]) + + # duplicates + df2 = DataFrame({'year': [2015, 2016], + 'month': [2, 20], + 'day': [4, 5]}) + df2.columns = ['year', 'year', 'day'] + with self.assertRaises(ValueError): + to_datetime(df2) + + df2 = DataFrame({'year': [2015, 2016], + 'month': [2, 20], + 'day': [4, 5], + 'hour': [4, 5]}) + df2.columns = ['year', 'month', 'day', 'day'] + with self.assertRaises(ValueError): + to_datetime(df2) + + def test_dataframe_dtypes(self): + # #13451 + df = DataFrame({'year': [2015, 2016], + 'month': [2, 3], + 'day': [4, 5]}) + + # int16 + result = to_datetime(df.astype('int16')) + expected = Series([Timestamp('20150204 00:00:00'), + Timestamp('20160305 00:00:00')]) + assert_series_equal(result, expected) + + # mixed dtypes + df['month'] = df['month'].astype('int8') + df['day'] = df['day'].astype('int8') + result = to_datetime(df) + expected = Series([Timestamp('20150204 00:00:00'), + Timestamp('20160305 00:00:00')]) + assert_series_equal(result, expected) + + # float + df = DataFrame({'year': [2000, 2001], + 'month': [1.5, 1], + 'day': [1, 1]}) + with self.assertRaises(ValueError): + to_datetime(df) + + +class ToDatetimeMisc(tm.TestCase): + + def test_index_to_datetime(self): + idx = Index(['1/1/2000', '1/2/2000', '1/3/2000']) + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = idx.to_datetime() + expected = DatetimeIndex(pd.to_datetime(idx.values)) + tm.assert_index_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + today = datetime.today() + idx = Index([today], dtype=object) + result = idx.to_datetime() + expected = DatetimeIndex([today]) + tm.assert_index_equal(result, expected) + + def test_to_datetime_iso8601(self): + result = to_datetime(["2012-01-01 00:00:00"]) + exp = Timestamp("2012-01-01 00:00:00") + self.assertEqual(result[0], exp) + + result = to_datetime(['20121001']) # bad iso 8601 + exp = Timestamp('2012-10-01') + self.assertEqual(result[0], exp) + + def test_to_datetime_default(self): + rs = to_datetime('2001') + xp = datetime(2001, 1, 1) + self.assertTrue(rs, xp) + + # dayfirst is essentially broken + + # to_datetime('01-13-2012', dayfirst=True) + # self.assertRaises(ValueError, to_datetime('01-13-2012', + # dayfirst=True)) + + def test_to_datetime_on_datetime64_series(self): + # #2699 + s = Series(date_range('1/1/2000', periods=10)) + + result = to_datetime(s) + self.assertEqual(result[0], s[0]) + + def test_to_datetime_with_space_in_series(self): + # GH 6428 + s = Series(['10/18/2006', '10/18/2008', ' ']) + tm.assertRaises(ValueError, lambda: to_datetime(s, errors='raise')) + result_coerce = to_datetime(s, errors='coerce') + expected_coerce = Series([datetime(2006, 10, 18), + datetime(2008, 10, 18), + NaT]) + tm.assert_series_equal(result_coerce, expected_coerce) + result_ignore = to_datetime(s, errors='ignore') + tm.assert_series_equal(result_ignore, s) + + def test_to_datetime_with_apply(self): + # this is only locale tested with US/None locales + tm._skip_if_has_locale() + + # GH 5195 + # with a format and coerce a single item to_datetime fails + td = Series(['May 04', 'Jun 02', 'Dec 11'], index=[1, 2, 3]) + expected = pd.to_datetime(td, format='%b %y') + result = td.apply(pd.to_datetime, format='%b %y') + assert_series_equal(result, expected) + + td = pd.Series(['May 04', 'Jun 02', ''], index=[1, 2, 3]) + self.assertRaises(ValueError, + lambda: pd.to_datetime(td, format='%b %y', + errors='raise')) + self.assertRaises(ValueError, + lambda: td.apply(pd.to_datetime, format='%b %y', + errors='raise')) + expected = pd.to_datetime(td, format='%b %y', errors='coerce') + + result = td.apply( + lambda x: pd.to_datetime(x, format='%b %y', errors='coerce')) + assert_series_equal(result, expected) + + def test_to_datetime_types(self): + + # empty string + result = to_datetime('') + self.assertIs(result, NaT) + + result = to_datetime(['', '']) + self.assertTrue(isnull(result).all()) + + # ints + result = Timestamp(0) + expected = to_datetime(0) + self.assertEqual(result, expected) + + # GH 3888 (strings) + expected = to_datetime(['2012'])[0] + result = to_datetime('2012') + self.assertEqual(result, expected) + + # array = ['2012','20120101','20120101 12:01:01'] + array = ['20120101', '20120101 12:01:01'] + expected = list(to_datetime(array)) + result = lmap(Timestamp, array) + tm.assert_almost_equal(result, expected) + + # currently fails ### + # result = Timestamp('2012') + # expected = to_datetime('2012') + # self.assertEqual(result, expected) + + def test_to_datetime_unprocessable_input(self): + # GH 4928 + self.assert_numpy_array_equal( + to_datetime([1, '1'], errors='ignore'), + np.array([1, '1'], dtype='O') + ) + self.assertRaises(TypeError, to_datetime, [1, '1'], errors='raise') + + def test_to_datetime_other_datetime64_units(self): + # 5/25/2012 + scalar = np.int64(1337904000000000).view('M8[us]') + as_obj = scalar.astype('O') + + index = DatetimeIndex([scalar]) + self.assertEqual(index[0], scalar.astype('O')) + + value = Timestamp(scalar) + self.assertEqual(value, as_obj) + + def test_to_datetime_list_of_integers(self): + rng = date_range('1/1/2000', periods=20) + rng = DatetimeIndex(rng.values) + + ints = list(rng.asi8) + + result = DatetimeIndex(ints) + + tm.assert_index_equal(rng, result) + + def test_to_datetime_freq(self): + xp = bdate_range('2000-1-1', periods=10, tz='UTC') + rs = xp.to_datetime() + self.assertEqual(xp.freq, rs.freq) + self.assertEqual(xp.tzinfo, rs.tzinfo) + + def test_string_na_nat_conversion(self): + # GH #999, #858 + + from pandas.compat import parse_date + + strings = np.array(['1/1/2000', '1/2/2000', np.nan, + '1/4/2000, 12:34:56'], dtype=object) + + expected = np.empty(4, dtype='M8[ns]') + for i, val in enumerate(strings): + if isnull(val): + expected[i] = tslib.iNaT + else: + expected[i] = parse_date(val) + + result = tslib.array_to_datetime(strings) + tm.assert_almost_equal(result, expected) + + result2 = to_datetime(strings) + tm.assertIsInstance(result2, DatetimeIndex) + tm.assert_numpy_array_equal(result, result2.values) + + malformed = np.array(['1/100/2000', np.nan], dtype=object) + + # GH 10636, default is now 'raise' + self.assertRaises(ValueError, + lambda: to_datetime(malformed, errors='raise')) + + result = to_datetime(malformed, errors='ignore') + tm.assert_numpy_array_equal(result, malformed) + + self.assertRaises(ValueError, to_datetime, malformed, errors='raise') + + idx = ['a', 'b', 'c', 'd', 'e'] + series = Series(['1/1/2000', np.nan, '1/3/2000', np.nan, + '1/5/2000'], index=idx, name='foo') + dseries = Series([to_datetime('1/1/2000'), np.nan, + to_datetime('1/3/2000'), np.nan, + to_datetime('1/5/2000')], index=idx, name='foo') + + result = to_datetime(series) + dresult = to_datetime(dseries) + + expected = Series(np.empty(5, dtype='M8[ns]'), index=idx) + for i in range(5): + x = series[i] + if isnull(x): + expected[i] = tslib.iNaT + else: + expected[i] = to_datetime(x) + + assert_series_equal(result, expected, check_names=False) + self.assertEqual(result.name, 'foo') + + assert_series_equal(dresult, expected, check_names=False) + self.assertEqual(dresult.name, 'foo') + + def test_dti_constructor_numpy_timeunits(self): + # GH 9114 + base = pd.to_datetime(['2000-01-01T00:00', '2000-01-02T00:00', 'NaT']) + + for dtype in ['datetime64[h]', 'datetime64[m]', 'datetime64[s]', + 'datetime64[ms]', 'datetime64[us]', 'datetime64[ns]']: + values = base.values.astype(dtype) + + tm.assert_index_equal(DatetimeIndex(values), base) + tm.assert_index_equal(to_datetime(values), base) + + def test_dayfirst(self): + # GH 5917 + arr = ['10/02/2014', '11/02/2014', '12/02/2014'] + expected = DatetimeIndex([datetime(2014, 2, 10), datetime(2014, 2, 11), + datetime(2014, 2, 12)]) + idx1 = DatetimeIndex(arr, dayfirst=True) + idx2 = DatetimeIndex(np.array(arr), dayfirst=True) + idx3 = to_datetime(arr, dayfirst=True) + idx4 = to_datetime(np.array(arr), dayfirst=True) + idx5 = DatetimeIndex(Index(arr), dayfirst=True) + idx6 = DatetimeIndex(Series(arr), dayfirst=True) + tm.assert_index_equal(expected, idx1) + tm.assert_index_equal(expected, idx2) + tm.assert_index_equal(expected, idx3) + tm.assert_index_equal(expected, idx4) + tm.assert_index_equal(expected, idx5) + tm.assert_index_equal(expected, idx6) + + +class TestGuessDatetimeFormat(tm.TestCase): + + def test_guess_datetime_format_with_parseable_formats(self): + tm._skip_if_not_us_locale() + dt_string_to_format = (('20111230', '%Y%m%d'), + ('2011-12-30', '%Y-%m-%d'), + ('30-12-2011', '%d-%m-%Y'), + ('2011-12-30 00:00:00', '%Y-%m-%d %H:%M:%S'), + ('2011-12-30T00:00:00', '%Y-%m-%dT%H:%M:%S'), + ('2011-12-30 00:00:00.000000', + '%Y-%m-%d %H:%M:%S.%f'), ) + + for dt_string, dt_format in dt_string_to_format: + self.assertEqual( + tools._guess_datetime_format(dt_string), + dt_format + ) + + def test_guess_datetime_format_with_dayfirst(self): + ambiguous_string = '01/01/2011' + self.assertEqual( + tools._guess_datetime_format(ambiguous_string, dayfirst=True), + '%d/%m/%Y' + ) + self.assertEqual( + tools._guess_datetime_format(ambiguous_string, dayfirst=False), + '%m/%d/%Y' + ) + + def test_guess_datetime_format_with_locale_specific_formats(self): + # The month names will vary depending on the locale, in which + # case these wont be parsed properly (dateutil can't parse them) + tm._skip_if_has_locale() + + dt_string_to_format = (('30/Dec/2011', '%d/%b/%Y'), + ('30/December/2011', '%d/%B/%Y'), + ('30/Dec/2011 00:00:00', '%d/%b/%Y %H:%M:%S'), ) + + for dt_string, dt_format in dt_string_to_format: + self.assertEqual( + tools._guess_datetime_format(dt_string), + dt_format + ) + + def test_guess_datetime_format_invalid_inputs(self): + # A datetime string must include a year, month and a day for it + # to be guessable, in addition to being a string that looks like + # a datetime + invalid_dts = [ + '2013', + '01/2013', + '12:00:00', + '1/1/1/1', + 'this_is_not_a_datetime', + '51a', + 9, + datetime(2011, 1, 1), + ] + + for invalid_dt in invalid_dts: + self.assertTrue(tools._guess_datetime_format(invalid_dt) is None) + + def test_guess_datetime_format_nopadding(self): + # GH 11142 + dt_string_to_format = (('2011-1-1', '%Y-%m-%d'), + ('30-1-2011', '%d-%m-%Y'), + ('1/1/2011', '%m/%d/%Y'), + ('2011-1-1 00:00:00', '%Y-%m-%d %H:%M:%S'), + ('2011-1-1 0:0:0', '%Y-%m-%d %H:%M:%S'), + ('2011-1-3T00:00:0', '%Y-%m-%dT%H:%M:%S')) + + for dt_string, dt_format in dt_string_to_format: + self.assertEqual( + tools._guess_datetime_format(dt_string), + dt_format + ) + + def test_guess_datetime_format_for_array(self): + tm._skip_if_not_us_locale() + expected_format = '%Y-%m-%d %H:%M:%S.%f' + dt_string = datetime(2011, 12, 30, 0, 0, 0).strftime(expected_format) + + test_arrays = [ + np.array([dt_string, dt_string, dt_string], dtype='O'), + np.array([np.nan, np.nan, dt_string], dtype='O'), + np.array([dt_string, 'random_string'], dtype='O'), + ] + + for test_array in test_arrays: + self.assertEqual( + tools._guess_datetime_format_for_array(test_array), + expected_format + ) + + format_for_string_of_nans = tools._guess_datetime_format_for_array( + np.array( + [np.nan, np.nan, np.nan], dtype='O')) + self.assertTrue(format_for_string_of_nans is None) + + +class TestToDatetimeInferFormat(tm.TestCase): + + def test_to_datetime_infer_datetime_format_consistent_format(self): + s = pd.Series(pd.date_range('20000101', periods=50, freq='H')) + + test_formats = ['%m-%d-%Y', '%m/%d/%Y %H:%M:%S.%f', + '%Y-%m-%dT%H:%M:%S.%f'] + + for test_format in test_formats: + s_as_dt_strings = s.apply(lambda x: x.strftime(test_format)) + + with_format = pd.to_datetime(s_as_dt_strings, format=test_format) + no_infer = pd.to_datetime(s_as_dt_strings, + infer_datetime_format=False) + yes_infer = pd.to_datetime(s_as_dt_strings, + infer_datetime_format=True) + + # Whether the format is explicitly passed, it is inferred, or + # it is not inferred, the results should all be the same + self.assert_series_equal(with_format, no_infer) + self.assert_series_equal(no_infer, yes_infer) + + def test_to_datetime_infer_datetime_format_inconsistent_format(self): + s = pd.Series(np.array(['01/01/2011 00:00:00', + '01-02-2011 00:00:00', + '2011-01-03T00:00:00'])) + + # When the format is inconsistent, infer_datetime_format should just + # fallback to the default parsing + tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False), + pd.to_datetime(s, infer_datetime_format=True)) + + s = pd.Series(np.array(['Jan/01/2011', 'Feb/01/2011', 'Mar/01/2011'])) + + tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False), + pd.to_datetime(s, infer_datetime_format=True)) + + def test_to_datetime_infer_datetime_format_series_with_nans(self): + s = pd.Series(np.array(['01/01/2011 00:00:00', np.nan, + '01/03/2011 00:00:00', np.nan])) + tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False), + pd.to_datetime(s, infer_datetime_format=True)) + + def test_to_datetime_infer_datetime_format_series_starting_with_nans(self): + s = pd.Series(np.array([np.nan, np.nan, '01/01/2011 00:00:00', + '01/02/2011 00:00:00', '01/03/2011 00:00:00'])) + + tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False), + pd.to_datetime(s, infer_datetime_format=True)) + + def test_to_datetime_iso8601_noleading_0s(self): + # GH 11871 + s = pd.Series(['2014-1-1', '2014-2-2', '2015-3-3']) + expected = pd.Series([pd.Timestamp('2014-01-01'), + pd.Timestamp('2014-02-02'), + pd.Timestamp('2015-03-03')]) + tm.assert_series_equal(pd.to_datetime(s), expected) + tm.assert_series_equal(pd.to_datetime(s, format='%Y-%m-%d'), expected) + + +class TestDaysInMonth(tm.TestCase): + # tests for issue #10154 + def test_day_not_in_month_coerce(self): + self.assertTrue(isnull(to_datetime('2015-02-29', errors='coerce'))) + self.assertTrue(isnull(to_datetime('2015-02-29', format="%Y-%m-%d", + errors='coerce'))) + self.assertTrue(isnull(to_datetime('2015-02-32', format="%Y-%m-%d", + errors='coerce'))) + self.assertTrue(isnull(to_datetime('2015-04-31', format="%Y-%m-%d", + errors='coerce'))) + + def test_day_not_in_month_raise(self): + self.assertRaises(ValueError, to_datetime, '2015-02-29', + errors='raise') + self.assertRaises(ValueError, to_datetime, '2015-02-29', + errors='raise', format="%Y-%m-%d") + self.assertRaises(ValueError, to_datetime, '2015-02-32', + errors='raise', format="%Y-%m-%d") + self.assertRaises(ValueError, to_datetime, '2015-04-31', + errors='raise', format="%Y-%m-%d") + + def test_day_not_in_month_ignore(self): + self.assertEqual(to_datetime( + '2015-02-29', errors='ignore'), '2015-02-29') + self.assertEqual(to_datetime( + '2015-02-29', errors='ignore', format="%Y-%m-%d"), '2015-02-29') + self.assertEqual(to_datetime( + '2015-02-32', errors='ignore', format="%Y-%m-%d"), '2015-02-32') + self.assertEqual(to_datetime( + '2015-04-31', errors='ignore', format="%Y-%m-%d"), '2015-04-31') diff --git a/pandas/tests/series/test_combine_concat.py b/pandas/tests/series/test_combine_concat.py index 23261c2ef79e2..7bcd1763537dc 100644 --- a/pandas/tests/series/test_combine_concat.py +++ b/pandas/tests/series/test_combine_concat.py @@ -7,7 +7,7 @@ import numpy as np import pandas as pd -from pandas import Series, DataFrame +from pandas import Series, DataFrame, date_range, DatetimeIndex from pandas import compat from pandas.util.testing import assert_series_equal @@ -218,3 +218,103 @@ def test_combine_first_dt64(self): rs = s0.combine_first(s1) xp = Series([datetime(2010, 1, 1), '2011']) assert_series_equal(rs, xp) + + +class TestTimeseries(tm.TestCase): + + _multiprocess_can_split_ = True + + def test_append_concat(self): + rng = date_range('5/8/2012 1:45', periods=10, freq='5T') + ts = Series(np.random.randn(len(rng)), rng) + df = DataFrame(np.random.randn(len(rng), 4), index=rng) + + result = ts.append(ts) + result_df = df.append(df) + ex_index = DatetimeIndex(np.tile(rng.values, 2)) + tm.assert_index_equal(result.index, ex_index) + tm.assert_index_equal(result_df.index, ex_index) + + appended = rng.append(rng) + tm.assert_index_equal(appended, ex_index) + + appended = rng.append([rng, rng]) + ex_index = DatetimeIndex(np.tile(rng.values, 3)) + tm.assert_index_equal(appended, ex_index) + + # different index names + rng1 = rng.copy() + rng2 = rng.copy() + rng1.name = 'foo' + rng2.name = 'bar' + self.assertEqual(rng1.append(rng1).name, 'foo') + self.assertIsNone(rng1.append(rng2).name) + + def test_append_concat_tz(self): + # GH 2938 + tm._skip_if_no_pytz() + + rng = date_range('5/8/2012 1:45', periods=10, freq='5T', + tz='US/Eastern') + rng2 = date_range('5/8/2012 2:35', periods=10, freq='5T', + tz='US/Eastern') + rng3 = date_range('5/8/2012 1:45', periods=20, freq='5T', + tz='US/Eastern') + ts = Series(np.random.randn(len(rng)), rng) + df = DataFrame(np.random.randn(len(rng), 4), index=rng) + ts2 = Series(np.random.randn(len(rng2)), rng2) + df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2) + + result = ts.append(ts2) + result_df = df.append(df2) + tm.assert_index_equal(result.index, rng3) + tm.assert_index_equal(result_df.index, rng3) + + appended = rng.append(rng2) + tm.assert_index_equal(appended, rng3) + + def test_append_concat_tz_explicit_pytz(self): + # GH 2938 + tm._skip_if_no_pytz() + from pytz import timezone as timezone + + rng = date_range('5/8/2012 1:45', periods=10, freq='5T', + tz=timezone('US/Eastern')) + rng2 = date_range('5/8/2012 2:35', periods=10, freq='5T', + tz=timezone('US/Eastern')) + rng3 = date_range('5/8/2012 1:45', periods=20, freq='5T', + tz=timezone('US/Eastern')) + ts = Series(np.random.randn(len(rng)), rng) + df = DataFrame(np.random.randn(len(rng), 4), index=rng) + ts2 = Series(np.random.randn(len(rng2)), rng2) + df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2) + + result = ts.append(ts2) + result_df = df.append(df2) + tm.assert_index_equal(result.index, rng3) + tm.assert_index_equal(result_df.index, rng3) + + appended = rng.append(rng2) + tm.assert_index_equal(appended, rng3) + + def test_append_concat_tz_dateutil(self): + # GH 2938 + tm._skip_if_no_dateutil() + rng = date_range('5/8/2012 1:45', periods=10, freq='5T', + tz='dateutil/US/Eastern') + rng2 = date_range('5/8/2012 2:35', periods=10, freq='5T', + tz='dateutil/US/Eastern') + rng3 = date_range('5/8/2012 1:45', periods=20, freq='5T', + tz='dateutil/US/Eastern') + ts = Series(np.random.randn(len(rng)), rng) + df = DataFrame(np.random.randn(len(rng), 4), index=rng) + ts2 = Series(np.random.randn(len(rng2)), rng2) + df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2) + + result = ts.append(ts2) + result_df = df.append(df2) + tm.assert_index_equal(result.index, rng3) + tm.assert_index_equal(result_df.index, rng3) + + appended = rng.append(rng2) + tm.assert_index_equal(appended, rng3) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 05818b013ac52..777b188b8fdd9 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -9,11 +9,12 @@ import pandas as pd from pandas.types.common import is_categorical_dtype, is_datetime64tz_dtype -from pandas import Index, Series, isnull, date_range, period_range +from pandas import (Index, Series, isnull, date_range, + period_range, NaT) from pandas.core.index import MultiIndex from pandas.tseries.index import Timestamp, DatetimeIndex -import pandas.lib as lib +from pandas import lib, tslib from pandas.compat import lrange, range, zip, OrderedDict, long from pandas import compat @@ -214,7 +215,6 @@ def test_constructor_maskedarray(self): expected = Series([True, True, False], index=index, dtype=bool) assert_series_equal(result, expected) - from pandas import tslib data = ma.masked_all((3, ), dtype='M8[ns]') result = Series(data) expected = Series([tslib.iNaT, tslib.iNaT, tslib.iNaT], dtype='M8[ns]') @@ -234,6 +234,13 @@ def test_constructor_maskedarray(self): datetime(2001, 1, 3)], index=index, dtype='M8[ns]') assert_series_equal(result, expected) + def test_series_ctor_plus_datetimeindex(self): + rng = date_range('20090415', '20090519', freq='B') + data = dict((k, 1) for k in rng) + + result = Series(data, index=rng) + self.assertIs(result.index, rng) + def test_constructor_default_index(self): s = Series([0, 1, 2]) tm.assert_index_equal(s.index, pd.Index(np.arange(3))) @@ -800,6 +807,21 @@ def f(): s = Series([pd.NaT, np.nan, '1 Day']) self.assertEqual(s.dtype, 'timedelta64[ns]') + def test_NaT_scalar(self): + series = Series([0, 1000, 2000, tslib.iNaT], dtype='M8[ns]') + + val = series[3] + self.assertTrue(isnull(val)) + + series[2] = val + self.assertTrue(isnull(series[2])) + + def test_NaT_cast(self): + # GH10747 + result = Series([np.nan]).astype('M8[ns]') + expected = Series([NaT]) + assert_series_equal(result, expected) + def test_constructor_name_hashable(self): for n in [777, 777., 'name', datetime(2001, 11, 11), (1, ), u"\u05D0"]: for data in [[1, 2, 3], np.ones(3), {'a': 0, 'b': 1}]: @@ -810,3 +832,20 @@ def test_constructor_name_unhashable(self): for n in [['name_list'], np.ones(2), {1: 2}]: for data in [['name_list'], np.ones(2), {1: 2}]: self.assertRaises(TypeError, Series, data, name=n) + + def test_auto_conversion(self): + series = Series(list(date_range('1/1/2000', periods=10))) + self.assertEqual(series.dtype, 'M8[ns]') + + def test_constructor_cant_cast_datetime64(self): + msg = "Cannot cast datetime64 to " + with tm.assertRaisesRegexp(TypeError, msg): + Series(date_range('1/1/2000', periods=10), dtype=float) + + with tm.assertRaisesRegexp(TypeError, msg): + Series(date_range('1/1/2000', periods=10), dtype=int) + + def test_constructor_cast_object(self): + s = Series(date_range('1/1/2000', periods=10), dtype=object) + exp = Series(date_range('1/1/2000', periods=10)) + tm.assert_series_equal(s, exp) diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index 1a1ff28bbb398..127a410f66fdb 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -8,9 +8,7 @@ from numpy import nan import numpy as np -from pandas import Series -from pandas.tseries.index import Timestamp -from pandas.tseries.tdi import Timedelta +from pandas import Series, Timestamp, Timedelta, DataFrame, date_range from pandas.compat import lrange, range, u from pandas import compat @@ -181,3 +179,24 @@ def test_arg_for_errors_in_astype(self): sr.astype(np.int8, raise_on_error=True) sr.astype(np.int8, errors='raise') + + def test_intercept_astype_object(self): + series = Series(date_range('1/1/2000', periods=10)) + + # this test no longer makes sense as series is by default already + # M8[ns] + expected = series.astype('object') + + df = DataFrame({'a': series, + 'b': np.random.randn(len(series))}) + exp_dtypes = Series([np.dtype('datetime64[ns]'), + np.dtype('float64')], index=['a', 'b']) + tm.assert_series_equal(df.dtypes, exp_dtypes) + + result = df.values.squeeze() + self.assertTrue((result[:, 0] == expected.values).all()) + + df = DataFrame({'a': series, 'b': ['foo'] * len(series)}) + + result = df.values.squeeze() + self.assertTrue((result[:, 0] == expected.values).all()) diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index d4b6e7dd5349f..bdae11770de65 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -7,17 +7,21 @@ import numpy as np import pandas as pd +import pandas.index as _index from pandas.types.common import is_integer, is_scalar -from pandas import Index, Series, DataFrame, isnull, date_range -from pandas.core.index import MultiIndex +from pandas import (Index, Series, DataFrame, isnull, + date_range, NaT, MultiIndex, + Timestamp, DatetimeIndex, Timedelta) from pandas.core.indexing import IndexingError -from pandas.tseries.index import Timestamp from pandas.tseries.offsets import BDay -from pandas.tseries.tdi import Timedelta +from pandas import lib, tslib from pandas.compat import lrange, range from pandas import compat -from pandas.util.testing import assert_series_equal, assert_almost_equal +from pandas.util.testing import (slow, + assert_series_equal, + assert_almost_equal, + assert_frame_equal) import pandas.util.testing as tm from pandas.tests.series.common import TestData @@ -421,6 +425,84 @@ def test_getitem_setitem_datetime_tz_dateutil(self): result[datetime(1990, 1, 1, 3, tzinfo=tz('America/Chicago'))] = ts[4] assert_series_equal(result, ts) + def test_getitem_setitem_datetimeindex(self): + N = 50 + # testing with timezone, GH #2785 + rng = date_range('1/1/1990', periods=N, freq='H', tz='US/Eastern') + ts = Series(np.random.randn(N), index=rng) + + result = ts["1990-01-01 04:00:00"] + expected = ts[4] + self.assertEqual(result, expected) + + result = ts.copy() + result["1990-01-01 04:00:00"] = 0 + result["1990-01-01 04:00:00"] = ts[4] + assert_series_equal(result, ts) + + result = ts["1990-01-01 04:00:00":"1990-01-01 07:00:00"] + expected = ts[4:8] + assert_series_equal(result, expected) + + result = ts.copy() + result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = 0 + result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = ts[4:8] + assert_series_equal(result, ts) + + lb = "1990-01-01 04:00:00" + rb = "1990-01-01 07:00:00" + result = ts[(ts.index >= lb) & (ts.index <= rb)] + expected = ts[4:8] + assert_series_equal(result, expected) + + # repeat all the above with naive datetimes + result = ts[datetime(1990, 1, 1, 4)] + expected = ts[4] + self.assertEqual(result, expected) + + result = ts.copy() + result[datetime(1990, 1, 1, 4)] = 0 + result[datetime(1990, 1, 1, 4)] = ts[4] + assert_series_equal(result, ts) + + result = ts[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)] + expected = ts[4:8] + assert_series_equal(result, expected) + + result = ts.copy() + result[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)] = 0 + result[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)] = ts[4:8] + assert_series_equal(result, ts) + + lb = datetime(1990, 1, 1, 4) + rb = datetime(1990, 1, 1, 7) + result = ts[(ts.index >= lb) & (ts.index <= rb)] + expected = ts[4:8] + assert_series_equal(result, expected) + + result = ts[ts.index[4]] + expected = ts[4] + self.assertEqual(result, expected) + + result = ts[ts.index[4:8]] + expected = ts[4:8] + assert_series_equal(result, expected) + + result = ts.copy() + result[ts.index[4:8]] = 0 + result[4:8] = ts[4:8] + assert_series_equal(result, ts) + + # also test partial date slicing + result = ts["1990-01-02"] + expected = ts[24:48] + assert_series_equal(result, expected) + + result = ts.copy() + result["1990-01-02"] = 0 + result["1990-01-02"] = ts[24:48] + assert_series_equal(result, ts) + def test_getitem_setitem_periodindex(self): from pandas import period_range @@ -1835,6 +1917,28 @@ def test_reindex_nan(self): # reindex coerces index.dtype to float, loc/iloc doesn't assert_series_equal(ts.reindex(i), ts.iloc[j], check_index_type=False) + def test_reindex_series_add_nat(self): + rng = date_range('1/1/2000 00:00:00', periods=10, freq='10s') + series = Series(rng) + + result = series.reindex(lrange(15)) + self.assertTrue(np.issubdtype(result.dtype, np.dtype('M8[ns]'))) + + mask = result.isnull() + self.assertTrue(mask[-5:].all()) + self.assertFalse(mask[:-5].any()) + + def test_reindex_with_datetimes(self): + rng = date_range('1/1/2000', periods=20) + ts = Series(np.random.randn(20), index=rng) + + result = ts.reindex(list(ts.index[5:10])) + expected = ts[5:10] + tm.assert_series_equal(result, expected) + + result = ts[list(ts.index[5:10])] + tm.assert_series_equal(result, expected) + def test_reindex_corner(self): # (don't forget to fix this) I think it's fixed self.empty.reindex(self.ts.index, method='pad') # it works @@ -2110,6 +2214,432 @@ def test_setitem_slice_into_readonly_backing_data(self): ' array was still mutated!', ) + +class TestTimeSeriesDuplicates(tm.TestCase): + _multiprocess_can_split_ = True + + def setUp(self): + dates = [datetime(2000, 1, 2), datetime(2000, 1, 2), + datetime(2000, 1, 2), datetime(2000, 1, 3), + datetime(2000, 1, 3), datetime(2000, 1, 3), + datetime(2000, 1, 4), datetime(2000, 1, 4), + datetime(2000, 1, 4), datetime(2000, 1, 5)] + + self.dups = Series(np.random.randn(len(dates)), index=dates) + + def test_constructor(self): + tm.assertIsInstance(self.dups, Series) + tm.assertIsInstance(self.dups.index, DatetimeIndex) + + def test_is_unique_monotonic(self): + self.assertFalse(self.dups.index.is_unique) + + def test_index_unique(self): + uniques = self.dups.index.unique() + expected = DatetimeIndex([datetime(2000, 1, 2), datetime(2000, 1, 3), + datetime(2000, 1, 4), datetime(2000, 1, 5)]) + self.assertEqual(uniques.dtype, 'M8[ns]') # sanity + tm.assert_index_equal(uniques, expected) + self.assertEqual(self.dups.index.nunique(), 4) + + # #2563 + self.assertTrue(isinstance(uniques, DatetimeIndex)) + + dups_local = self.dups.index.tz_localize('US/Eastern') + dups_local.name = 'foo' + result = dups_local.unique() + expected = DatetimeIndex(expected, name='foo') + expected = expected.tz_localize('US/Eastern') + self.assertTrue(result.tz is not None) + self.assertEqual(result.name, 'foo') + tm.assert_index_equal(result, expected) + + # NaT, note this is excluded + arr = [1370745748 + t for t in range(20)] + [tslib.iNaT] + idx = DatetimeIndex(arr * 3) + tm.assert_index_equal(idx.unique(), DatetimeIndex(arr)) + self.assertEqual(idx.nunique(), 20) + self.assertEqual(idx.nunique(dropna=False), 21) + + arr = [Timestamp('2013-06-09 02:42:28') + timedelta(seconds=t) + for t in range(20)] + [NaT] + idx = DatetimeIndex(arr * 3) + tm.assert_index_equal(idx.unique(), DatetimeIndex(arr)) + self.assertEqual(idx.nunique(), 20) + self.assertEqual(idx.nunique(dropna=False), 21) + + def test_index_dupes_contains(self): + d = datetime(2011, 12, 5, 20, 30) + ix = DatetimeIndex([d, d]) + self.assertTrue(d in ix) + + def test_duplicate_dates_indexing(self): + ts = self.dups + + uniques = ts.index.unique() + for date in uniques: + result = ts[date] + + mask = ts.index == date + total = (ts.index == date).sum() + expected = ts[mask] + if total > 1: + assert_series_equal(result, expected) + else: + assert_almost_equal(result, expected[0]) + + cp = ts.copy() + cp[date] = 0 + expected = Series(np.where(mask, 0, ts), index=ts.index) + assert_series_equal(cp, expected) + + self.assertRaises(KeyError, ts.__getitem__, datetime(2000, 1, 6)) + + # new index + ts[datetime(2000, 1, 6)] = 0 + self.assertEqual(ts[datetime(2000, 1, 6)], 0) + + def test_range_slice(self): + idx = DatetimeIndex(['1/1/2000', '1/2/2000', '1/2/2000', '1/3/2000', + '1/4/2000']) + + ts = Series(np.random.randn(len(idx)), index=idx) + + result = ts['1/2/2000':] + expected = ts[1:] + assert_series_equal(result, expected) + + result = ts['1/2/2000':'1/3/2000'] + expected = ts[1:4] + assert_series_equal(result, expected) + + def test_groupby_average_dup_values(self): + result = self.dups.groupby(level=0).mean() + expected = self.dups.groupby(self.dups.index).mean() + assert_series_equal(result, expected) + + def test_indexing_over_size_cutoff(self): + import datetime + # #1821 + + old_cutoff = _index._SIZE_CUTOFF + try: + _index._SIZE_CUTOFF = 1000 + + # create large list of non periodic datetime + dates = [] + sec = datetime.timedelta(seconds=1) + half_sec = datetime.timedelta(microseconds=500000) + d = datetime.datetime(2011, 12, 5, 20, 30) + n = 1100 + for i in range(n): + dates.append(d) + dates.append(d + sec) + dates.append(d + sec + half_sec) + dates.append(d + sec + sec + half_sec) + d += 3 * sec + + # duplicate some values in the list + duplicate_positions = np.random.randint(0, len(dates) - 1, 20) + for p in duplicate_positions: + dates[p + 1] = dates[p] + + df = DataFrame(np.random.randn(len(dates), 4), + index=dates, + columns=list('ABCD')) + + pos = n * 3 + timestamp = df.index[pos] + self.assertIn(timestamp, df.index) + + # it works! + df.loc[timestamp] + self.assertTrue(len(df.loc[[timestamp]]) > 0) + finally: + _index._SIZE_CUTOFF = old_cutoff + + def test_indexing_unordered(self): + # GH 2437 + rng = date_range(start='2011-01-01', end='2011-01-15') + ts = Series(np.random.rand(len(rng)), index=rng) + ts2 = pd.concat([ts[0:4], ts[-4:], ts[4:-4]]) + + for t in ts.index: + # TODO: unused? + s = str(t) # noqa + + expected = ts[t] + result = ts2[t] + self.assertTrue(expected == result) + + # GH 3448 (ranges) + def compare(slobj): + result = ts2[slobj].copy() + result = result.sort_index() + expected = ts[slobj] + assert_series_equal(result, expected) + + compare(slice('2011-01-01', '2011-01-15')) + compare(slice('2010-12-30', '2011-01-15')) + compare(slice('2011-01-01', '2011-01-16')) + + # partial ranges + compare(slice('2011-01-01', '2011-01-6')) + compare(slice('2011-01-06', '2011-01-8')) + compare(slice('2011-01-06', '2011-01-12')) + + # single values + result = ts2['2011'].sort_index() + expected = ts['2011'] + assert_series_equal(result, expected) + + # diff freq + rng = date_range(datetime(2005, 1, 1), periods=20, freq='M') + ts = Series(np.arange(len(rng)), index=rng) + ts = ts.take(np.random.permutation(20)) + + result = ts['2005'] + for t in result.index: + self.assertTrue(t.year == 2005) + + def test_indexing(self): + + idx = date_range("2001-1-1", periods=20, freq='M') + ts = Series(np.random.rand(len(idx)), index=idx) + + # getting + + # GH 3070, make sure semantics work on Series/Frame + expected = ts['2001'] + expected.name = 'A' + + df = DataFrame(dict(A=ts)) + result = df['2001']['A'] + assert_series_equal(expected, result) + + # setting + ts['2001'] = 1 + expected = ts['2001'] + expected.name = 'A' + + df.loc['2001', 'A'] = 1 + + result = df['2001']['A'] + assert_series_equal(expected, result) + + # GH3546 (not including times on the last day) + idx = date_range(start='2013-05-31 00:00', end='2013-05-31 23:00', + freq='H') + ts = Series(lrange(len(idx)), index=idx) + expected = ts['2013-05'] + assert_series_equal(expected, ts) + + idx = date_range(start='2013-05-31 00:00', end='2013-05-31 23:59', + freq='S') + ts = Series(lrange(len(idx)), index=idx) + expected = ts['2013-05'] + assert_series_equal(expected, ts) + + idx = [Timestamp('2013-05-31 00:00'), + Timestamp(datetime(2013, 5, 31, 23, 59, 59, 999999))] + ts = Series(lrange(len(idx)), index=idx) + expected = ts['2013'] + assert_series_equal(expected, ts) + + # GH14826, indexing with a seconds resolution string / datetime object + df = DataFrame(np.random.rand(5, 5), + columns=['open', 'high', 'low', 'close', 'volume'], + index=date_range('2012-01-02 18:01:00', + periods=5, tz='US/Central', freq='s')) + expected = df.loc[[df.index[2]]] + + # this is a single date, so will raise + self.assertRaises(KeyError, df.__getitem__, '2012-01-02 18:01:02', ) + self.assertRaises(KeyError, df.__getitem__, df.index[2], ) + + +class TestDatetimeIndexing(tm.TestCase): + """ + Also test support for datetime64[ns] in Series / DataFrame + """ + + def setUp(self): + dti = DatetimeIndex(start=datetime(2005, 1, 1), + end=datetime(2005, 1, 10), freq='Min') + self.series = Series(np.random.rand(len(dti)), dti) + + def test_fancy_getitem(self): + dti = DatetimeIndex(freq='WOM-1FRI', start=datetime(2005, 1, 1), + end=datetime(2010, 1, 1)) + + s = Series(np.arange(len(dti)), index=dti) + + self.assertEqual(s[48], 48) + self.assertEqual(s['1/2/2009'], 48) + self.assertEqual(s['2009-1-2'], 48) + self.assertEqual(s[datetime(2009, 1, 2)], 48) + self.assertEqual(s[lib.Timestamp(datetime(2009, 1, 2))], 48) + self.assertRaises(KeyError, s.__getitem__, '2009-1-3') + + assert_series_equal(s['3/6/2009':'2009-06-05'], + s[datetime(2009, 3, 6):datetime(2009, 6, 5)]) + + def test_fancy_setitem(self): + dti = DatetimeIndex(freq='WOM-1FRI', start=datetime(2005, 1, 1), + end=datetime(2010, 1, 1)) + + s = Series(np.arange(len(dti)), index=dti) + s[48] = -1 + self.assertEqual(s[48], -1) + s['1/2/2009'] = -2 + self.assertEqual(s[48], -2) + s['1/2/2009':'2009-06-05'] = -3 + self.assertTrue((s[48:54] == -3).all()) + + def test_dti_snap(self): + dti = DatetimeIndex(['1/1/2002', '1/2/2002', '1/3/2002', '1/4/2002', + '1/5/2002', '1/6/2002', '1/7/2002'], freq='D') + + res = dti.snap(freq='W-MON') + exp = date_range('12/31/2001', '1/7/2002', freq='w-mon') + exp = exp.repeat([3, 4]) + self.assertTrue((res == exp).all()) + + res = dti.snap(freq='B') + + exp = date_range('1/1/2002', '1/7/2002', freq='b') + exp = exp.repeat([1, 1, 1, 2, 2]) + self.assertTrue((res == exp).all()) + + def test_dti_reset_index_round_trip(self): + dti = DatetimeIndex(start='1/1/2001', end='6/1/2001', freq='D') + d1 = DataFrame({'v': np.random.rand(len(dti))}, index=dti) + d2 = d1.reset_index() + self.assertEqual(d2.dtypes[0], np.dtype('M8[ns]')) + d3 = d2.set_index('index') + assert_frame_equal(d1, d3, check_names=False) + + # #2329 + stamp = datetime(2012, 11, 22) + df = DataFrame([[stamp, 12.1]], columns=['Date', 'Value']) + df = df.set_index('Date') + + self.assertEqual(df.index[0], stamp) + self.assertEqual(df.reset_index()['Date'][0], stamp) + + def test_series_set_value(self): + # #1561 + + dates = [datetime(2001, 1, 1), datetime(2001, 1, 2)] + index = DatetimeIndex(dates) + + s = Series().set_value(dates[0], 1.) + s2 = s.set_value(dates[1], np.nan) + + exp = Series([1., np.nan], index=index) + + assert_series_equal(s2, exp) + + # s = Series(index[:1], index[:1]) + # s2 = s.set_value(dates[1], index[1]) + # self.assertEqual(s2.values.dtype, 'M8[ns]') + + @slow + def test_slice_locs_indexerror(self): + times = [datetime(2000, 1, 1) + timedelta(minutes=i * 10) + for i in range(100000)] + s = Series(lrange(100000), times) + s.loc[datetime(1900, 1, 1):datetime(2100, 1, 1)] + + def test_slicing_datetimes(self): + + # GH 7523 + + # unique + df = DataFrame(np.arange(4., dtype='float64'), + index=[datetime(2001, 1, i, 10, 00) + for i in [1, 2, 3, 4]]) + result = df.loc[datetime(2001, 1, 1, 10):] + assert_frame_equal(result, df) + result = df.loc[:datetime(2001, 1, 4, 10)] + assert_frame_equal(result, df) + result = df.loc[datetime(2001, 1, 1, 10):datetime(2001, 1, 4, 10)] + assert_frame_equal(result, df) + + result = df.loc[datetime(2001, 1, 1, 11):] + expected = df.iloc[1:] + assert_frame_equal(result, expected) + result = df.loc['20010101 11':] + assert_frame_equal(result, expected) + + # duplicates + df = pd.DataFrame(np.arange(5., dtype='float64'), + index=[datetime(2001, 1, i, 10, 00) + for i in [1, 2, 2, 3, 4]]) + + result = df.loc[datetime(2001, 1, 1, 10):] + assert_frame_equal(result, df) + result = df.loc[:datetime(2001, 1, 4, 10)] + assert_frame_equal(result, df) + result = df.loc[datetime(2001, 1, 1, 10):datetime(2001, 1, 4, 10)] + assert_frame_equal(result, df) + + result = df.loc[datetime(2001, 1, 1, 11):] + expected = df.iloc[1:] + assert_frame_equal(result, expected) + result = df.loc['20010101 11':] + assert_frame_equal(result, expected) + + def test_frame_datetime64_duplicated(self): + dates = date_range('2010-07-01', end='2010-08-05') + + tst = DataFrame({'symbol': 'AAA', 'date': dates}) + result = tst.duplicated(['date', 'symbol']) + self.assertTrue((-result).all()) + + tst = DataFrame({'date': dates}) + result = tst.duplicated() + self.assertTrue((-result).all()) + + +class TestNatIndexing(tm.TestCase): + def setUp(self): + self.series = Series(date_range('1/1/2000', periods=10)) + + # --------------------------------------------------------------------- + # NaT support + + def test_set_none_nan(self): + self.series[3] = None + self.assertIs(self.series[3], NaT) + + self.series[3:5] = None + self.assertIs(self.series[4], NaT) + + self.series[5] = np.nan + self.assertIs(self.series[5], NaT) + + self.series[5:7] = np.nan + self.assertIs(self.series[6], NaT) + + def test_nat_operations(self): + # GH 8617 + s = Series([0, pd.NaT], dtype='m8[ns]') + exp = s[0] + self.assertEqual(s.median(), exp) + self.assertEqual(s.min(), exp) + self.assertEqual(s.max(), exp) + + def test_round_nat(self): + # GH14940 + s = Series([pd.NaT]) + expected = Series(pd.NaT) + for method in ["round", "floor", "ceil"]: + round_method = getattr(s.dt, method) + for freq in ["s", "5s", "min", "5min", "h", "5h"]: + assert_series_equal(round_method(freq), expected) + + if __name__ == '__main__': import nose nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index b013e1a6f1c10..7b1201b971c71 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -30,6 +30,19 @@ class TestSeriesOperators(TestData, tm.TestCase): _multiprocess_can_split_ = True + def test_series_comparison_scalars(self): + series = Series(date_range('1/1/2000', periods=10)) + + val = datetime(2000, 1, 4) + result = series > val + expected = Series([x > val for x in series]) + self.assert_series_equal(result, expected) + + val = series[5] + result = series > val + expected = Series([x > val for x in series]) + self.assert_series_equal(result, expected) + def test_comparisons(self): left = np.random.randn(10) right = np.random.randn(10) diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index 571a802e37211..9754a9d3737e3 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -1,35 +1,24 @@ # coding=utf-8 # pylint: disable-msg=E1101,W0612 -import sys -import nose -import locale -import calendar import numpy as np -from numpy.random import rand from datetime import datetime, timedelta, time import pandas as pd -import pandas.index as _index -import pandas.tseries.tools as tools -import pandas.core.common as com import pandas.util.testing as tm from pandas.tslib import iNaT -from pandas.compat import lrange, lmap, StringIO, product +from pandas.compat import lrange, StringIO, product from pandas.tseries.tdi import TimedeltaIndex from pandas.tseries.index import DatetimeIndex from pandas.tseries.offsets import BDay, BMonthEnd -from pandas.types.common import is_datetime64_ns_dtype from pandas import (Index, Series, date_range, NaT, concat, DataFrame, - Timestamp, lib, isnull, to_datetime, offsets, Timedelta, - tslib, bdate_range, Period, timedelta_range, compat) + Timestamp, to_datetime, offsets, + timedelta_range) from pandas.util.testing import (assert_series_equal, assert_almost_equal, - slow, assert_frame_equal, _skip_if_has_locale) + assert_frame_equal, _skip_if_has_locale) from pandas.tests.series.common import TestData -randn = np.random.randn - def _simple_ts(start, end, freq='D'): rng = date_range(start, end, freq=freq) @@ -118,6 +107,22 @@ def test_shift(self): tz='CET'), name='foo') self.assertRaises(ValueError, lambda: s - s2) + def test_shift2(self): + ts = Series(np.random.randn(5), + index=date_range('1/1/2000', periods=5, freq='H')) + + result = ts.shift(1, freq='5T') + exp_index = ts.index.shift(1, freq='5T') + tm.assert_index_equal(result.index, exp_index) + + # GH #1063, multiple of same base + result = ts.shift(1, freq='4H') + exp_index = ts.index + offsets.Hour(4) + tm.assert_index_equal(result.index, exp_index) + + idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-04']) + self.assertRaises(ValueError, idx.shift, 1) + def test_shift_dst(self): # GH 13926 dates = date_range('2016-11-06', freq='H', periods=10, tz='US/Eastern') @@ -477,28 +482,6 @@ def test_series_ctor_datetime64(self): series = Series(dates) self.assertTrue(np.issubdtype(series.dtype, np.dtype('M8[ns]'))) - def test_reindex_series_add_nat(self): - rng = date_range('1/1/2000 00:00:00', periods=10, freq='10s') - series = Series(rng) - - result = series.reindex(lrange(15)) - self.assertTrue(np.issubdtype(result.dtype, np.dtype('M8[ns]'))) - - mask = result.isnull() - self.assertTrue(mask[-5:].all()) - self.assertFalse(mask[:-5].any()) - - def test_reindex_frame_add_nat(self): - rng = date_range('1/1/2000 00:00:00', periods=10, freq='10s') - df = DataFrame({'A': np.random.randn(len(rng)), 'B': rng}) - - result = df.reindex(lrange(15)) - self.assertTrue(np.issubdtype(result['B'].dtype, np.dtype('M8[ns]'))) - - mask = com.isnull(result)['B'] - self.assertTrue(mask[-5:].all()) - self.assertFalse(mask[:-5].any()) - def test_series_repr_nat(self): series = Series([0, 1000, 2000, iNaT], dtype='M8[ns]') @@ -510,36 +493,6 @@ def test_series_repr_nat(self): 'dtype: datetime64[ns]') self.assertEqual(result, expected) - def test_index_convert_to_datetime_array(self): - tm._skip_if_no_pytz() - - def _check_rng(rng): - converted = rng.to_pydatetime() - tm.assertIsInstance(converted, np.ndarray) - for x, stamp in zip(converted, rng): - tm.assertIsInstance(x, datetime) - self.assertEqual(x, stamp.to_pydatetime()) - self.assertEqual(x.tzinfo, stamp.tzinfo) - - rng = date_range('20090415', '20090519') - rng_eastern = date_range('20090415', '20090519', tz='US/Eastern') - rng_utc = date_range('20090415', '20090519', tz='utc') - - _check_rng(rng) - _check_rng(rng_eastern) - _check_rng(rng_utc) - - def test_reindex_with_datetimes(self): - rng = date_range('1/1/2000', periods=20) - ts = Series(np.random.randn(20), index=rng) - - result = ts.reindex(list(ts.index[5:10])) - expected = ts[5:10] - tm.assert_series_equal(result, expected) - - result = ts[list(ts.index[5:10])] - tm.assert_series_equal(result, expected) - def test_asfreq_keep_index_name(self): # GH #9854 index_name = 'bar' @@ -680,35 +633,13 @@ def test_at_time(self): rs = ts.at_time('16:00') self.assertEqual(len(rs), 0) - def test_at_time_frame(self): - rng = date_range('1/1/2000', '1/5/2000', freq='5min') - ts = DataFrame(np.random.randn(len(rng), 2), index=rng) - rs = ts.at_time(rng[1]) - self.assertTrue((rs.index.hour == rng[1].hour).all()) - self.assertTrue((rs.index.minute == rng[1].minute).all()) - self.assertTrue((rs.index.second == rng[1].second).all()) - - result = ts.at_time('9:30') - expected = ts.at_time(time(9, 30)) - assert_frame_equal(result, expected) - - result = ts.loc[time(9, 30)] - expected = ts.loc[(rng.hour == 9) & (rng.minute == 30)] - - assert_frame_equal(result, expected) - - # midnight, everything - rng = date_range('1/1/2000', '1/31/2000') - ts = DataFrame(np.random.randn(len(rng), 3), index=rng) - - result = ts.at_time(time(0, 0)) - assert_frame_equal(result, ts) + def test_between(self): + series = Series(date_range('1/1/2000', periods=10)) + left, right = series[[2, 7]] - # time doesn't exist - rng = date_range('1/1/2012', freq='23Min', periods=384) - ts = DataFrame(np.random.randn(len(rng), 2), rng) - rs = ts.at_time('16:00') - self.assertEqual(len(rs), 0) + result = series.between(left, right) + expected = (series >= left) & (series <= right) + assert_series_equal(result, expected) def test_between_time(self): rng = date_range('1/1/2000', '1/5/2000', freq='5min') @@ -770,66 +701,6 @@ def test_between_time(self): else: self.assertTrue((t < etime) or (t >= stime)) - def test_between_time_frame(self): - rng = date_range('1/1/2000', '1/5/2000', freq='5min') - ts = DataFrame(np.random.randn(len(rng), 2), index=rng) - stime = time(0, 0) - etime = time(1, 0) - - close_open = product([True, False], [True, False]) - for inc_start, inc_end in close_open: - filtered = ts.between_time(stime, etime, inc_start, inc_end) - exp_len = 13 * 4 + 1 - if not inc_start: - exp_len -= 5 - if not inc_end: - exp_len -= 4 - - self.assertEqual(len(filtered), exp_len) - for rs in filtered.index: - t = rs.time() - if inc_start: - self.assertTrue(t >= stime) - else: - self.assertTrue(t > stime) - - if inc_end: - self.assertTrue(t <= etime) - else: - self.assertTrue(t < etime) - - result = ts.between_time('00:00', '01:00') - expected = ts.between_time(stime, etime) - assert_frame_equal(result, expected) - - # across midnight - rng = date_range('1/1/2000', '1/5/2000', freq='5min') - ts = DataFrame(np.random.randn(len(rng), 2), index=rng) - stime = time(22, 0) - etime = time(9, 0) - - close_open = product([True, False], [True, False]) - for inc_start, inc_end in close_open: - filtered = ts.between_time(stime, etime, inc_start, inc_end) - exp_len = (12 * 11 + 1) * 4 + 1 - if not inc_start: - exp_len -= 4 - if not inc_end: - exp_len -= 4 - - self.assertEqual(len(filtered), exp_len) - for rs in filtered.index: - t = rs.time() - if inc_start: - self.assertTrue((t >= stime) or (t <= etime)) - else: - self.assertTrue((t > stime) or (t <= etime)) - - if inc_end: - self.assertTrue((t <= etime) or (t >= stime)) - else: - self.assertTrue((t < etime) or (t >= stime)) - def test_between_time_types(self): # GH11818 rng = date_range('1/1/2000', '1/5/2000', freq='5min') @@ -897,275 +768,6 @@ def test_to_period(self): expected.columns = exp_idx assert_frame_equal(df.to_period(axis=1), expected) - def create_dt64_based_index(self): - data = [Timestamp('2007-01-01 10:11:12.123456Z'), - Timestamp('2007-01-01 10:11:13.789123Z')] - index = DatetimeIndex(data) - return index - - def test_to_period_millisecond(self): - index = self.create_dt64_based_index() - - period = index.to_period(freq='L') - self.assertEqual(2, len(period)) - self.assertEqual(period[0], Period('2007-01-01 10:11:12.123Z', 'L')) - self.assertEqual(period[1], Period('2007-01-01 10:11:13.789Z', 'L')) - - def test_to_period_microsecond(self): - index = self.create_dt64_based_index() - - period = index.to_period(freq='U') - self.assertEqual(2, len(period)) - self.assertEqual(period[0], Period('2007-01-01 10:11:12.123456Z', 'U')) - self.assertEqual(period[1], Period('2007-01-01 10:11:13.789123Z', 'U')) - - def test_to_period_tz_pytz(self): - tm._skip_if_no_pytz() - from dateutil.tz import tzlocal - from pytz import utc as UTC - - xp = date_range('1/1/2000', '4/1/2000').to_period() - - ts = date_range('1/1/2000', '4/1/2000', tz='US/Eastern') - - result = ts.to_period()[0] - expected = ts[0].to_period() - - self.assertEqual(result, expected) - tm.assert_index_equal(ts.to_period(), xp) - - ts = date_range('1/1/2000', '4/1/2000', tz=UTC) - - result = ts.to_period()[0] - expected = ts[0].to_period() - - self.assertEqual(result, expected) - tm.assert_index_equal(ts.to_period(), xp) - - ts = date_range('1/1/2000', '4/1/2000', tz=tzlocal()) - - result = ts.to_period()[0] - expected = ts[0].to_period() - - self.assertEqual(result, expected) - tm.assert_index_equal(ts.to_period(), xp) - - def test_to_period_tz_explicit_pytz(self): - tm._skip_if_no_pytz() - import pytz - from dateutil.tz import tzlocal - - xp = date_range('1/1/2000', '4/1/2000').to_period() - - ts = date_range('1/1/2000', '4/1/2000', tz=pytz.timezone('US/Eastern')) - - result = ts.to_period()[0] - expected = ts[0].to_period() - - self.assertTrue(result == expected) - tm.assert_index_equal(ts.to_period(), xp) - - ts = date_range('1/1/2000', '4/1/2000', tz=pytz.utc) - - result = ts.to_period()[0] - expected = ts[0].to_period() - - self.assertTrue(result == expected) - tm.assert_index_equal(ts.to_period(), xp) - - ts = date_range('1/1/2000', '4/1/2000', tz=tzlocal()) - - result = ts.to_period()[0] - expected = ts[0].to_period() - - self.assertTrue(result == expected) - tm.assert_index_equal(ts.to_period(), xp) - - def test_to_period_tz_dateutil(self): - tm._skip_if_no_dateutil() - import dateutil - from dateutil.tz import tzlocal - - xp = date_range('1/1/2000', '4/1/2000').to_period() - - ts = date_range('1/1/2000', '4/1/2000', tz='dateutil/US/Eastern') - - result = ts.to_period()[0] - expected = ts[0].to_period() - - self.assertTrue(result == expected) - tm.assert_index_equal(ts.to_period(), xp) - - ts = date_range('1/1/2000', '4/1/2000', tz=dateutil.tz.tzutc()) - - result = ts.to_period()[0] - expected = ts[0].to_period() - - self.assertTrue(result == expected) - tm.assert_index_equal(ts.to_period(), xp) - - ts = date_range('1/1/2000', '4/1/2000', tz=tzlocal()) - - result = ts.to_period()[0] - expected = ts[0].to_period() - - self.assertTrue(result == expected) - tm.assert_index_equal(ts.to_period(), xp) - - def test_frame_to_period(self): - K = 5 - from pandas.tseries.period import period_range - - dr = date_range('1/1/2000', '1/1/2001') - pr = period_range('1/1/2000', '1/1/2001') - df = DataFrame(randn(len(dr), K), index=dr) - df['mix'] = 'a' - - pts = df.to_period() - exp = df.copy() - exp.index = pr - assert_frame_equal(pts, exp) - - pts = df.to_period('M') - tm.assert_index_equal(pts.index, exp.index.asfreq('M')) - - df = df.T - pts = df.to_period(axis=1) - exp = df.copy() - exp.columns = pr - assert_frame_equal(pts, exp) - - pts = df.to_period('M', axis=1) - tm.assert_index_equal(pts.columns, exp.columns.asfreq('M')) - - self.assertRaises(ValueError, df.to_period, axis=2) - - def test_compat_replace(self): - # https://github.com/statsmodels/statsmodels/issues/3349 - # replace should take ints/longs for compat - - for f in [compat.long, int]: - result = date_range(Timestamp('1960-04-01 00:00:00', - freq='QS-JAN'), - periods=f(76), - freq='QS-JAN') - self.assertEqual(len(result), 76) - - def test_astype_object(self): - # NumPy 1.6.1 weak ns support - rng = date_range('1/1/2000', periods=20) - - casted = rng.astype('O') - exp_values = list(rng) - - tm.assert_index_equal(casted, Index(exp_values, dtype=np.object_)) - self.assertEqual(casted.tolist(), exp_values) - - def test_catch_infinite_loop(self): - offset = offsets.DateOffset(minute=5) - # blow up, don't loop forever - self.assertRaises(Exception, date_range, datetime(2011, 11, 11), - datetime(2011, 11, 12), freq=offset) - - def test_append_concat(self): - rng = date_range('5/8/2012 1:45', periods=10, freq='5T') - ts = Series(np.random.randn(len(rng)), rng) - df = DataFrame(np.random.randn(len(rng), 4), index=rng) - - result = ts.append(ts) - result_df = df.append(df) - ex_index = DatetimeIndex(np.tile(rng.values, 2)) - tm.assert_index_equal(result.index, ex_index) - tm.assert_index_equal(result_df.index, ex_index) - - appended = rng.append(rng) - tm.assert_index_equal(appended, ex_index) - - appended = rng.append([rng, rng]) - ex_index = DatetimeIndex(np.tile(rng.values, 3)) - tm.assert_index_equal(appended, ex_index) - - # different index names - rng1 = rng.copy() - rng2 = rng.copy() - rng1.name = 'foo' - rng2.name = 'bar' - self.assertEqual(rng1.append(rng1).name, 'foo') - self.assertIsNone(rng1.append(rng2).name) - - def test_append_concat_tz(self): - # GH 2938 - tm._skip_if_no_pytz() - - rng = date_range('5/8/2012 1:45', periods=10, freq='5T', - tz='US/Eastern') - rng2 = date_range('5/8/2012 2:35', periods=10, freq='5T', - tz='US/Eastern') - rng3 = date_range('5/8/2012 1:45', periods=20, freq='5T', - tz='US/Eastern') - ts = Series(np.random.randn(len(rng)), rng) - df = DataFrame(np.random.randn(len(rng), 4), index=rng) - ts2 = Series(np.random.randn(len(rng2)), rng2) - df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2) - - result = ts.append(ts2) - result_df = df.append(df2) - tm.assert_index_equal(result.index, rng3) - tm.assert_index_equal(result_df.index, rng3) - - appended = rng.append(rng2) - tm.assert_index_equal(appended, rng3) - - def test_append_concat_tz_explicit_pytz(self): - # GH 2938 - tm._skip_if_no_pytz() - from pytz import timezone as timezone - - rng = date_range('5/8/2012 1:45', periods=10, freq='5T', - tz=timezone('US/Eastern')) - rng2 = date_range('5/8/2012 2:35', periods=10, freq='5T', - tz=timezone('US/Eastern')) - rng3 = date_range('5/8/2012 1:45', periods=20, freq='5T', - tz=timezone('US/Eastern')) - ts = Series(np.random.randn(len(rng)), rng) - df = DataFrame(np.random.randn(len(rng), 4), index=rng) - ts2 = Series(np.random.randn(len(rng2)), rng2) - df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2) - - result = ts.append(ts2) - result_df = df.append(df2) - tm.assert_index_equal(result.index, rng3) - tm.assert_index_equal(result_df.index, rng3) - - appended = rng.append(rng2) - tm.assert_index_equal(appended, rng3) - - def test_append_concat_tz_dateutil(self): - # GH 2938 - tm._skip_if_no_dateutil() - rng = date_range('5/8/2012 1:45', periods=10, freq='5T', - tz='dateutil/US/Eastern') - rng2 = date_range('5/8/2012 2:35', periods=10, freq='5T', - tz='dateutil/US/Eastern') - rng3 = date_range('5/8/2012 1:45', periods=20, freq='5T', - tz='dateutil/US/Eastern') - ts = Series(np.random.randn(len(rng)), rng) - df = DataFrame(np.random.randn(len(rng), 4), index=rng) - ts2 = Series(np.random.randn(len(rng2)), rng2) - df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2) - - result = ts.append(ts2) - result_df = df.append(df2) - tm.assert_index_equal(result.index, rng3) - tm.assert_index_equal(result_df.index, rng3) - - appended = rng.append(rng2) - tm.assert_index_equal(appended, rng3) - - def test_set_dataframe_column_ns_dtype(self): - x = DataFrame([datetime.now(), datetime.now()]) - self.assertEqual(x[0].dtype, np.dtype('M8[ns]')) - def test_groupby_count_dateparseerror(self): dr = date_range(start='1/1/2012', freq='5min', periods=10) @@ -1180,40 +782,6 @@ def test_groupby_count_dateparseerror(self): assert_series_equal(result, expected) - def test_frame_datetime64_handling_groupby(self): - # it works! - df = DataFrame([(3, np.datetime64('2012-07-03')), - (3, np.datetime64('2012-07-04'))], - columns=['a', 'date']) - result = df.groupby('a').first() - self.assertEqual(result['date'][3], Timestamp('2012-07-03')) - - def test_frame_dict_constructor_datetime64_1680(self): - dr = date_range('1/1/2012', periods=10) - s = Series(dr, index=dr) - - # it works! - DataFrame({'a': 'foo', 'b': s}, index=dr) - DataFrame({'a': 'foo', 'b': s.values}, index=dr) - - def test_frame_datetime64_mixed_index_ctor_1681(self): - dr = date_range('2011/1/1', '2012/1/1', freq='W-FRI') - ts = Series(dr) - - # it works! - d = DataFrame({'A': 'foo', 'B': ts}, index=dr) - self.assertTrue(d['B'].isnull().all()) - - def test_frame_timeseries_to_records(self): - index = date_range('1/1/2000', periods=10) - df = DataFrame(np.random.randn(10, 3), index=index, - columns=['a', 'b', 'c']) - - result = df.to_records() - result['index'].dtype == 'M8[ns]' - - result = df.to_records(index=False) - def test_to_csv_numpy_16_bug(self): frame = DataFrame({'a': date_range('1/1/2000', periods=10)}) @@ -1234,20 +802,6 @@ def f(x): s.apply(f) DataFrame(s).applymap(f) - def test_concat_datetime_datetime64_frame(self): - # #2624 - rows = [] - rows.append([datetime(2010, 1, 1), 1]) - rows.append([datetime(2010, 1, 2), 'hi']) - - df2_obj = DataFrame.from_records(rows, columns=['date', 'test']) - - ind = date_range(start="2000/1/1", freq="D", periods=10) - df1 = DataFrame({'date': ind, 'test': lrange(10)}) - - # it works! - pd.concat([df1, df2_obj]) - def test_asfreq_resample_set_correct_freq(self): # GH5613 # we test if .asfreq() and .resample() set the correct value for .freq @@ -1283,1085 +837,6 @@ def test_pickle(self): idx_p = self.round_trip_pickle(idx) tm.assert_index_equal(idx, idx_p) - -class TestTimeSeriesDuplicates(tm.TestCase): - _multiprocess_can_split_ = True - - def setUp(self): - dates = [datetime(2000, 1, 2), datetime(2000, 1, 2), - datetime(2000, 1, 2), datetime(2000, 1, 3), - datetime(2000, 1, 3), datetime(2000, 1, 3), - datetime(2000, 1, 4), datetime(2000, 1, 4), - datetime(2000, 1, 4), datetime(2000, 1, 5)] - - self.dups = Series(np.random.randn(len(dates)), index=dates) - - def test_constructor(self): - tm.assertIsInstance(self.dups, Series) - tm.assertIsInstance(self.dups.index, DatetimeIndex) - - def test_is_unique_monotonic(self): - self.assertFalse(self.dups.index.is_unique) - - def test_index_unique(self): - uniques = self.dups.index.unique() - expected = DatetimeIndex([datetime(2000, 1, 2), datetime(2000, 1, 3), - datetime(2000, 1, 4), datetime(2000, 1, 5)]) - self.assertEqual(uniques.dtype, 'M8[ns]') # sanity - tm.assert_index_equal(uniques, expected) - self.assertEqual(self.dups.index.nunique(), 4) - - # #2563 - self.assertTrue(isinstance(uniques, DatetimeIndex)) - - dups_local = self.dups.index.tz_localize('US/Eastern') - dups_local.name = 'foo' - result = dups_local.unique() - expected = DatetimeIndex(expected, name='foo') - expected = expected.tz_localize('US/Eastern') - self.assertTrue(result.tz is not None) - self.assertEqual(result.name, 'foo') - tm.assert_index_equal(result, expected) - - # NaT, note this is excluded - arr = [1370745748 + t for t in range(20)] + [iNaT] - idx = DatetimeIndex(arr * 3) - tm.assert_index_equal(idx.unique(), DatetimeIndex(arr)) - self.assertEqual(idx.nunique(), 20) - self.assertEqual(idx.nunique(dropna=False), 21) - - arr = [Timestamp('2013-06-09 02:42:28') + timedelta(seconds=t) - for t in range(20)] + [NaT] - idx = DatetimeIndex(arr * 3) - tm.assert_index_equal(idx.unique(), DatetimeIndex(arr)) - self.assertEqual(idx.nunique(), 20) - self.assertEqual(idx.nunique(dropna=False), 21) - - def test_index_dupes_contains(self): - d = datetime(2011, 12, 5, 20, 30) - ix = DatetimeIndex([d, d]) - self.assertTrue(d in ix) - - def test_duplicate_dates_indexing(self): - ts = self.dups - - uniques = ts.index.unique() - for date in uniques: - result = ts[date] - - mask = ts.index == date - total = (ts.index == date).sum() - expected = ts[mask] - if total > 1: - assert_series_equal(result, expected) - else: - assert_almost_equal(result, expected[0]) - - cp = ts.copy() - cp[date] = 0 - expected = Series(np.where(mask, 0, ts), index=ts.index) - assert_series_equal(cp, expected) - - self.assertRaises(KeyError, ts.__getitem__, datetime(2000, 1, 6)) - - # new index - ts[datetime(2000, 1, 6)] = 0 - self.assertEqual(ts[datetime(2000, 1, 6)], 0) - - def test_range_slice(self): - idx = DatetimeIndex(['1/1/2000', '1/2/2000', '1/2/2000', '1/3/2000', - '1/4/2000']) - - ts = Series(np.random.randn(len(idx)), index=idx) - - result = ts['1/2/2000':] - expected = ts[1:] - assert_series_equal(result, expected) - - result = ts['1/2/2000':'1/3/2000'] - expected = ts[1:4] - assert_series_equal(result, expected) - - def test_groupby_average_dup_values(self): - result = self.dups.groupby(level=0).mean() - expected = self.dups.groupby(self.dups.index).mean() - assert_series_equal(result, expected) - - def test_indexing_over_size_cutoff(self): - import datetime - # #1821 - - old_cutoff = _index._SIZE_CUTOFF - try: - _index._SIZE_CUTOFF = 1000 - - # create large list of non periodic datetime - dates = [] - sec = datetime.timedelta(seconds=1) - half_sec = datetime.timedelta(microseconds=500000) - d = datetime.datetime(2011, 12, 5, 20, 30) - n = 1100 - for i in range(n): - dates.append(d) - dates.append(d + sec) - dates.append(d + sec + half_sec) - dates.append(d + sec + sec + half_sec) - d += 3 * sec - - # duplicate some values in the list - duplicate_positions = np.random.randint(0, len(dates) - 1, 20) - for p in duplicate_positions: - dates[p + 1] = dates[p] - - df = DataFrame(np.random.randn(len(dates), 4), - index=dates, - columns=list('ABCD')) - - pos = n * 3 - timestamp = df.index[pos] - self.assertIn(timestamp, df.index) - - # it works! - df.loc[timestamp] - self.assertTrue(len(df.loc[[timestamp]]) > 0) - finally: - _index._SIZE_CUTOFF = old_cutoff - - def test_indexing_unordered(self): - # GH 2437 - rng = date_range(start='2011-01-01', end='2011-01-15') - ts = Series(randn(len(rng)), index=rng) - ts2 = concat([ts[0:4], ts[-4:], ts[4:-4]]) - - for t in ts.index: - # TODO: unused? - s = str(t) # noqa - - expected = ts[t] - result = ts2[t] - self.assertTrue(expected == result) - - # GH 3448 (ranges) - def compare(slobj): - result = ts2[slobj].copy() - result = result.sort_index() - expected = ts[slobj] - assert_series_equal(result, expected) - - compare(slice('2011-01-01', '2011-01-15')) - compare(slice('2010-12-30', '2011-01-15')) - compare(slice('2011-01-01', '2011-01-16')) - - # partial ranges - compare(slice('2011-01-01', '2011-01-6')) - compare(slice('2011-01-06', '2011-01-8')) - compare(slice('2011-01-06', '2011-01-12')) - - # single values - result = ts2['2011'].sort_index() - expected = ts['2011'] - assert_series_equal(result, expected) - - # diff freq - rng = date_range(datetime(2005, 1, 1), periods=20, freq='M') - ts = Series(np.arange(len(rng)), index=rng) - ts = ts.take(np.random.permutation(20)) - - result = ts['2005'] - for t in result.index: - self.assertTrue(t.year == 2005) - - def test_indexing(self): - - idx = date_range("2001-1-1", periods=20, freq='M') - ts = Series(np.random.rand(len(idx)), index=idx) - - # getting - - # GH 3070, make sure semantics work on Series/Frame - expected = ts['2001'] - expected.name = 'A' - - df = DataFrame(dict(A=ts)) - result = df['2001']['A'] - assert_series_equal(expected, result) - - # setting - ts['2001'] = 1 - expected = ts['2001'] - expected.name = 'A' - - df.loc['2001', 'A'] = 1 - - result = df['2001']['A'] - assert_series_equal(expected, result) - - # GH3546 (not including times on the last day) - idx = date_range(start='2013-05-31 00:00', end='2013-05-31 23:00', - freq='H') - ts = Series(lrange(len(idx)), index=idx) - expected = ts['2013-05'] - assert_series_equal(expected, ts) - - idx = date_range(start='2013-05-31 00:00', end='2013-05-31 23:59', - freq='S') - ts = Series(lrange(len(idx)), index=idx) - expected = ts['2013-05'] - assert_series_equal(expected, ts) - - idx = [Timestamp('2013-05-31 00:00'), - Timestamp(datetime(2013, 5, 31, 23, 59, 59, 999999))] - ts = Series(lrange(len(idx)), index=idx) - expected = ts['2013'] - assert_series_equal(expected, ts) - - # GH14826, indexing with a seconds resolution string / datetime object - df = DataFrame(randn(5, 5), - columns=['open', 'high', 'low', 'close', 'volume'], - index=date_range('2012-01-02 18:01:00', - periods=5, tz='US/Central', freq='s')) - expected = df.loc[[df.index[2]]] - - # this is a single date, so will raise - self.assertRaises(KeyError, df.__getitem__, '2012-01-02 18:01:02', ) - self.assertRaises(KeyError, df.__getitem__, df.index[2], ) - - -class TestDatetime64(tm.TestCase): - """ - Also test support for datetime64[ns] in Series / DataFrame - """ - - def setUp(self): - dti = DatetimeIndex(start=datetime(2005, 1, 1), - end=datetime(2005, 1, 10), freq='Min') - self.series = Series(rand(len(dti)), dti) - - def test_fancy_getitem(self): - dti = DatetimeIndex(freq='WOM-1FRI', start=datetime(2005, 1, 1), - end=datetime(2010, 1, 1)) - - s = Series(np.arange(len(dti)), index=dti) - - self.assertEqual(s[48], 48) - self.assertEqual(s['1/2/2009'], 48) - self.assertEqual(s['2009-1-2'], 48) - self.assertEqual(s[datetime(2009, 1, 2)], 48) - self.assertEqual(s[lib.Timestamp(datetime(2009, 1, 2))], 48) - self.assertRaises(KeyError, s.__getitem__, '2009-1-3') - - assert_series_equal(s['3/6/2009':'2009-06-05'], - s[datetime(2009, 3, 6):datetime(2009, 6, 5)]) - - def test_fancy_setitem(self): - dti = DatetimeIndex(freq='WOM-1FRI', start=datetime(2005, 1, 1), - end=datetime(2010, 1, 1)) - - s = Series(np.arange(len(dti)), index=dti) - s[48] = -1 - self.assertEqual(s[48], -1) - s['1/2/2009'] = -2 - self.assertEqual(s[48], -2) - s['1/2/2009':'2009-06-05'] = -3 - self.assertTrue((s[48:54] == -3).all()) - - def test_dti_snap(self): - dti = DatetimeIndex(['1/1/2002', '1/2/2002', '1/3/2002', '1/4/2002', - '1/5/2002', '1/6/2002', '1/7/2002'], freq='D') - - res = dti.snap(freq='W-MON') - exp = date_range('12/31/2001', '1/7/2002', freq='w-mon') - exp = exp.repeat([3, 4]) - self.assertTrue((res == exp).all()) - - res = dti.snap(freq='B') - - exp = date_range('1/1/2002', '1/7/2002', freq='b') - exp = exp.repeat([1, 1, 1, 2, 2]) - self.assertTrue((res == exp).all()) - - def test_dti_reset_index_round_trip(self): - dti = DatetimeIndex(start='1/1/2001', end='6/1/2001', freq='D') - d1 = DataFrame({'v': np.random.rand(len(dti))}, index=dti) - d2 = d1.reset_index() - self.assertEqual(d2.dtypes[0], np.dtype('M8[ns]')) - d3 = d2.set_index('index') - assert_frame_equal(d1, d3, check_names=False) - - # #2329 - stamp = datetime(2012, 11, 22) - df = DataFrame([[stamp, 12.1]], columns=['Date', 'Value']) - df = df.set_index('Date') - - self.assertEqual(df.index[0], stamp) - self.assertEqual(df.reset_index()['Date'][0], stamp) - - def test_series_set_value(self): - # #1561 - - dates = [datetime(2001, 1, 1), datetime(2001, 1, 2)] - index = DatetimeIndex(dates) - - s = Series().set_value(dates[0], 1.) - s2 = s.set_value(dates[1], np.nan) - - exp = Series([1., np.nan], index=index) - - assert_series_equal(s2, exp) - - # s = Series(index[:1], index[:1]) - # s2 = s.set_value(dates[1], index[1]) - # self.assertEqual(s2.values.dtype, 'M8[ns]') - - @slow - def test_slice_locs_indexerror(self): - times = [datetime(2000, 1, 1) + timedelta(minutes=i * 10) - for i in range(100000)] - s = Series(lrange(100000), times) - s.loc[datetime(1900, 1, 1):datetime(2100, 1, 1)] - - def test_slicing_datetimes(self): - - # GH 7523 - - # unique - df = DataFrame(np.arange(4., dtype='float64'), - index=[datetime(2001, 1, i, 10, 00) - for i in [1, 2, 3, 4]]) - result = df.loc[datetime(2001, 1, 1, 10):] - assert_frame_equal(result, df) - result = df.loc[:datetime(2001, 1, 4, 10)] - assert_frame_equal(result, df) - result = df.loc[datetime(2001, 1, 1, 10):datetime(2001, 1, 4, 10)] - assert_frame_equal(result, df) - - result = df.loc[datetime(2001, 1, 1, 11):] - expected = df.iloc[1:] - assert_frame_equal(result, expected) - result = df.loc['20010101 11':] - assert_frame_equal(result, expected) - - # duplicates - df = pd.DataFrame(np.arange(5., dtype='float64'), - index=[datetime(2001, 1, i, 10, 00) - for i in [1, 2, 2, 3, 4]]) - - result = df.loc[datetime(2001, 1, 1, 10):] - assert_frame_equal(result, df) - result = df.loc[:datetime(2001, 1, 4, 10)] - assert_frame_equal(result, df) - result = df.loc[datetime(2001, 1, 1, 10):datetime(2001, 1, 4, 10)] - assert_frame_equal(result, df) - - result = df.loc[datetime(2001, 1, 1, 11):] - expected = df.iloc[1:] - assert_frame_equal(result, expected) - result = df.loc['20010101 11':] - assert_frame_equal(result, expected) - - def test_frame_datetime64_duplicated(self): - dates = date_range('2010-07-01', end='2010-08-05') - - tst = DataFrame({'symbol': 'AAA', 'date': dates}) - result = tst.duplicated(['date', 'symbol']) - self.assertTrue((-result).all()) - - tst = DataFrame({'date': dates}) - result = tst.duplicated() - self.assertTrue((-result).all()) - - -class TestSeriesDatetime64(tm.TestCase): - def setUp(self): - self.series = Series(date_range('1/1/2000', periods=10)) - - def test_auto_conversion(self): - series = Series(list(date_range('1/1/2000', periods=10))) - self.assertEqual(series.dtype, 'M8[ns]') - - def test_constructor_cant_cast_datetime64(self): - msg = "Cannot cast datetime64 to " - with tm.assertRaisesRegexp(TypeError, msg): - Series(date_range('1/1/2000', periods=10), dtype=float) - - with tm.assertRaisesRegexp(TypeError, msg): - Series(date_range('1/1/2000', periods=10), dtype=int) - - def test_constructor_cast_object(self): - s = Series(date_range('1/1/2000', periods=10), dtype=object) - exp = Series(date_range('1/1/2000', periods=10)) - tm.assert_series_equal(s, exp) - - def test_series_comparison_scalars(self): - val = datetime(2000, 1, 4) - result = self.series > val - expected = Series([x > val for x in self.series]) - self.assert_series_equal(result, expected) - - val = self.series[5] - result = self.series > val - expected = Series([x > val for x in self.series]) - self.assert_series_equal(result, expected) - - def test_between(self): - left, right = self.series[[2, 7]] - - result = self.series.between(left, right) - expected = (self.series >= left) & (self.series <= right) - assert_series_equal(result, expected) - - # --------------------------------------------------------------------- - # NaT support - - def test_NaT_scalar(self): - series = Series([0, 1000, 2000, iNaT], dtype='M8[ns]') - - val = series[3] - self.assertTrue(com.isnull(val)) - - series[2] = val - self.assertTrue(com.isnull(series[2])) - - def test_NaT_cast(self): - # GH10747 - result = Series([np.nan]).astype('M8[ns]') - expected = Series([NaT]) - assert_series_equal(result, expected) - - def test_set_none_nan(self): - self.series[3] = None - self.assertIs(self.series[3], NaT) - - self.series[3:5] = None - self.assertIs(self.series[4], NaT) - - self.series[5] = np.nan - self.assertIs(self.series[5], NaT) - - self.series[5:7] = np.nan - self.assertIs(self.series[6], NaT) - - def test_intercept_astype_object(self): - - # this test no longer makes sense as series is by default already - # M8[ns] - expected = self.series.astype('object') - - df = DataFrame({'a': self.series, - 'b': np.random.randn(len(self.series))}) - exp_dtypes = pd.Series([np.dtype('datetime64[ns]'), - np.dtype('float64')], index=['a', 'b']) - tm.assert_series_equal(df.dtypes, exp_dtypes) - - result = df.values.squeeze() - self.assertTrue((result[:, 0] == expected.values).all()) - - df = DataFrame({'a': self.series, 'b': ['foo'] * len(self.series)}) - - result = df.values.squeeze() - self.assertTrue((result[:, 0] == expected.values).all()) - - def test_nat_operations(self): - # GH 8617 - s = Series([0, pd.NaT], dtype='m8[ns]') - exp = s[0] - self.assertEqual(s.median(), exp) - self.assertEqual(s.min(), exp) - self.assertEqual(s.max(), exp) - - def test_round_nat(self): - # GH14940 - s = Series([pd.NaT]) - expected = Series(pd.NaT) - for method in ["round", "floor", "ceil"]: - round_method = getattr(s.dt, method) - for freq in ["s", "5s", "min", "5min", "h", "5h"]: - assert_series_equal(round_method(freq), expected) - - -class TestDaysInMonth(tm.TestCase): - # tests for issue #10154 - def test_day_not_in_month_coerce(self): - self.assertTrue(isnull(to_datetime('2015-02-29', errors='coerce'))) - self.assertTrue(isnull(to_datetime('2015-02-29', format="%Y-%m-%d", - errors='coerce'))) - self.assertTrue(isnull(to_datetime('2015-02-32', format="%Y-%m-%d", - errors='coerce'))) - self.assertTrue(isnull(to_datetime('2015-04-31', format="%Y-%m-%d", - errors='coerce'))) - - def test_day_not_in_month_raise(self): - self.assertRaises(ValueError, to_datetime, '2015-02-29', - errors='raise') - self.assertRaises(ValueError, to_datetime, '2015-02-29', - errors='raise', format="%Y-%m-%d") - self.assertRaises(ValueError, to_datetime, '2015-02-32', - errors='raise', format="%Y-%m-%d") - self.assertRaises(ValueError, to_datetime, '2015-04-31', - errors='raise', format="%Y-%m-%d") - - def test_day_not_in_month_ignore(self): - self.assertEqual(to_datetime( - '2015-02-29', errors='ignore'), '2015-02-29') - self.assertEqual(to_datetime( - '2015-02-29', errors='ignore', format="%Y-%m-%d"), '2015-02-29') - self.assertEqual(to_datetime( - '2015-02-32', errors='ignore', format="%Y-%m-%d"), '2015-02-32') - self.assertEqual(to_datetime( - '2015-04-31', errors='ignore', format="%Y-%m-%d"), '2015-04-31') - - -class TestGuessDatetimeFormat(tm.TestCase): - - def test_guess_datetime_format_with_parseable_formats(self): - tm._skip_if_not_us_locale() - dt_string_to_format = (('20111230', '%Y%m%d'), - ('2011-12-30', '%Y-%m-%d'), - ('30-12-2011', '%d-%m-%Y'), - ('2011-12-30 00:00:00', '%Y-%m-%d %H:%M:%S'), - ('2011-12-30T00:00:00', '%Y-%m-%dT%H:%M:%S'), - ('2011-12-30 00:00:00.000000', - '%Y-%m-%d %H:%M:%S.%f'), ) - - for dt_string, dt_format in dt_string_to_format: - self.assertEqual( - tools._guess_datetime_format(dt_string), - dt_format - ) - - def test_guess_datetime_format_with_dayfirst(self): - ambiguous_string = '01/01/2011' - self.assertEqual( - tools._guess_datetime_format(ambiguous_string, dayfirst=True), - '%d/%m/%Y' - ) - self.assertEqual( - tools._guess_datetime_format(ambiguous_string, dayfirst=False), - '%m/%d/%Y' - ) - - def test_guess_datetime_format_with_locale_specific_formats(self): - # The month names will vary depending on the locale, in which - # case these wont be parsed properly (dateutil can't parse them) - _skip_if_has_locale() - - dt_string_to_format = (('30/Dec/2011', '%d/%b/%Y'), - ('30/December/2011', '%d/%B/%Y'), - ('30/Dec/2011 00:00:00', '%d/%b/%Y %H:%M:%S'), ) - - for dt_string, dt_format in dt_string_to_format: - self.assertEqual( - tools._guess_datetime_format(dt_string), - dt_format - ) - - def test_guess_datetime_format_invalid_inputs(self): - # A datetime string must include a year, month and a day for it - # to be guessable, in addition to being a string that looks like - # a datetime - invalid_dts = [ - '2013', - '01/2013', - '12:00:00', - '1/1/1/1', - 'this_is_not_a_datetime', - '51a', - 9, - datetime(2011, 1, 1), - ] - - for invalid_dt in invalid_dts: - self.assertTrue(tools._guess_datetime_format(invalid_dt) is None) - - def test_guess_datetime_format_nopadding(self): - # GH 11142 - dt_string_to_format = (('2011-1-1', '%Y-%m-%d'), - ('30-1-2011', '%d-%m-%Y'), - ('1/1/2011', '%m/%d/%Y'), - ('2011-1-1 00:00:00', '%Y-%m-%d %H:%M:%S'), - ('2011-1-1 0:0:0', '%Y-%m-%d %H:%M:%S'), - ('2011-1-3T00:00:0', '%Y-%m-%dT%H:%M:%S')) - - for dt_string, dt_format in dt_string_to_format: - self.assertEqual( - tools._guess_datetime_format(dt_string), - dt_format - ) - - def test_guess_datetime_format_for_array(self): - tm._skip_if_not_us_locale() - expected_format = '%Y-%m-%d %H:%M:%S.%f' - dt_string = datetime(2011, 12, 30, 0, 0, 0).strftime(expected_format) - - test_arrays = [ - np.array([dt_string, dt_string, dt_string], dtype='O'), - np.array([np.nan, np.nan, dt_string], dtype='O'), - np.array([dt_string, 'random_string'], dtype='O'), - ] - - for test_array in test_arrays: - self.assertEqual( - tools._guess_datetime_format_for_array(test_array), - expected_format - ) - - format_for_string_of_nans = tools._guess_datetime_format_for_array( - np.array( - [np.nan, np.nan, np.nan], dtype='O')) - self.assertTrue(format_for_string_of_nans is None) - - -class TestToDatetimeInferFormat(tm.TestCase): - - def test_to_datetime_infer_datetime_format_consistent_format(self): - s = pd.Series(pd.date_range('20000101', periods=50, freq='H')) - - test_formats = ['%m-%d-%Y', '%m/%d/%Y %H:%M:%S.%f', - '%Y-%m-%dT%H:%M:%S.%f'] - - for test_format in test_formats: - s_as_dt_strings = s.apply(lambda x: x.strftime(test_format)) - - with_format = pd.to_datetime(s_as_dt_strings, format=test_format) - no_infer = pd.to_datetime(s_as_dt_strings, - infer_datetime_format=False) - yes_infer = pd.to_datetime(s_as_dt_strings, - infer_datetime_format=True) - - # Whether the format is explicitly passed, it is inferred, or - # it is not inferred, the results should all be the same - self.assert_series_equal(with_format, no_infer) - self.assert_series_equal(no_infer, yes_infer) - - def test_to_datetime_infer_datetime_format_inconsistent_format(self): - s = pd.Series(np.array(['01/01/2011 00:00:00', - '01-02-2011 00:00:00', - '2011-01-03T00:00:00'])) - - # When the format is inconsistent, infer_datetime_format should just - # fallback to the default parsing - tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False), - pd.to_datetime(s, infer_datetime_format=True)) - - s = pd.Series(np.array(['Jan/01/2011', 'Feb/01/2011', 'Mar/01/2011'])) - - tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False), - pd.to_datetime(s, infer_datetime_format=True)) - - def test_to_datetime_infer_datetime_format_series_with_nans(self): - s = pd.Series(np.array(['01/01/2011 00:00:00', np.nan, - '01/03/2011 00:00:00', np.nan])) - tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False), - pd.to_datetime(s, infer_datetime_format=True)) - - def test_to_datetime_infer_datetime_format_series_starting_with_nans(self): - s = pd.Series(np.array([np.nan, np.nan, '01/01/2011 00:00:00', - '01/02/2011 00:00:00', '01/03/2011 00:00:00'])) - - tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False), - pd.to_datetime(s, infer_datetime_format=True)) - - def test_to_datetime_iso8601_noleading_0s(self): - # GH 11871 - s = pd.Series(['2014-1-1', '2014-2-2', '2015-3-3']) - expected = pd.Series([pd.Timestamp('2014-01-01'), - pd.Timestamp('2014-02-02'), - pd.Timestamp('2015-03-03')]) - tm.assert_series_equal(pd.to_datetime(s), expected) - tm.assert_series_equal(pd.to_datetime(s, format='%Y-%m-%d'), expected) - - -class TimeConversionFormats(tm.TestCase): - def test_to_datetime_format(self): - values = ['1/1/2000', '1/2/2000', '1/3/2000'] - - results1 = [Timestamp('20000101'), Timestamp('20000201'), - Timestamp('20000301')] - results2 = [Timestamp('20000101'), Timestamp('20000102'), - Timestamp('20000103')] - for vals, expecteds in [(values, (Index(results1), Index(results2))), - (Series(values), - (Series(results1), Series(results2))), - (values[0], (results1[0], results2[0])), - (values[1], (results1[1], results2[1])), - (values[2], (results1[2], results2[2]))]: - - for i, fmt in enumerate(['%d/%m/%Y', '%m/%d/%Y']): - result = to_datetime(vals, format=fmt) - expected = expecteds[i] - - if isinstance(expected, Series): - assert_series_equal(result, Series(expected)) - elif isinstance(expected, Timestamp): - self.assertEqual(result, expected) - else: - tm.assert_index_equal(result, expected) - - def test_to_datetime_format_YYYYMMDD(self): - s = Series([19801222, 19801222] + [19810105] * 5) - expected = Series([Timestamp(x) for x in s.apply(str)]) - - result = to_datetime(s, format='%Y%m%d') - assert_series_equal(result, expected) - - result = to_datetime(s.apply(str), format='%Y%m%d') - assert_series_equal(result, expected) - - # with NaT - expected = Series([Timestamp("19801222"), Timestamp("19801222")] + - [Timestamp("19810105")] * 5) - expected[2] = np.nan - s[2] = np.nan - - result = to_datetime(s, format='%Y%m%d') - assert_series_equal(result, expected) - - # string with NaT - s = s.apply(str) - s[2] = 'nat' - result = to_datetime(s, format='%Y%m%d') - assert_series_equal(result, expected) - - # coercion - # GH 7930 - s = Series([20121231, 20141231, 99991231]) - result = pd.to_datetime(s, format='%Y%m%d', errors='ignore') - expected = Series([datetime(2012, 12, 31), - datetime(2014, 12, 31), datetime(9999, 12, 31)], - dtype=object) - self.assert_series_equal(result, expected) - - result = pd.to_datetime(s, format='%Y%m%d', errors='coerce') - expected = Series(['20121231', '20141231', 'NaT'], dtype='M8[ns]') - assert_series_equal(result, expected) - - # GH 10178 - def test_to_datetime_format_integer(self): - s = Series([2000, 2001, 2002]) - expected = Series([Timestamp(x) for x in s.apply(str)]) - - result = to_datetime(s, format='%Y') - assert_series_equal(result, expected) - - s = Series([200001, 200105, 200206]) - expected = Series([Timestamp(x[:4] + '-' + x[4:]) for x in s.apply(str) - ]) - - result = to_datetime(s, format='%Y%m') - assert_series_equal(result, expected) - - def test_to_datetime_format_microsecond(self): - - # these are locale dependent - lang, _ = locale.getlocale() - month_abbr = calendar.month_abbr[4] - val = '01-{}-2011 00:00:01.978'.format(month_abbr) - - format = '%d-%b-%Y %H:%M:%S.%f' - result = to_datetime(val, format=format) - exp = datetime.strptime(val, format) - self.assertEqual(result, exp) - - def test_to_datetime_format_time(self): - data = [ - ['01/10/2010 15:20', '%m/%d/%Y %H:%M', - Timestamp('2010-01-10 15:20')], - ['01/10/2010 05:43', '%m/%d/%Y %I:%M', - Timestamp('2010-01-10 05:43')], - ['01/10/2010 13:56:01', '%m/%d/%Y %H:%M:%S', - Timestamp('2010-01-10 13:56:01')] # , - # ['01/10/2010 08:14 PM', '%m/%d/%Y %I:%M %p', - # Timestamp('2010-01-10 20:14')], - # ['01/10/2010 07:40 AM', '%m/%d/%Y %I:%M %p', - # Timestamp('2010-01-10 07:40')], - # ['01/10/2010 09:12:56 AM', '%m/%d/%Y %I:%M:%S %p', - # Timestamp('2010-01-10 09:12:56')] - ] - for s, format, dt in data: - self.assertEqual(to_datetime(s, format=format), dt) - - def test_to_datetime_with_non_exact(self): - # GH 10834 - _skip_if_has_locale() - - # 8904 - # exact kw - if sys.version_info < (2, 7): - raise nose.SkipTest('on python version < 2.7') - - s = Series(['19MAY11', 'foobar19MAY11', '19MAY11:00:00:00', - '19MAY11 00:00:00Z']) - result = to_datetime(s, format='%d%b%y', exact=False) - expected = to_datetime(s.str.extract(r'(\d+\w+\d+)', expand=False), - format='%d%b%y') - assert_series_equal(result, expected) - - def test_parse_nanoseconds_with_formula(self): - - # GH8989 - # trunctaing the nanoseconds when a format was provided - for v in ["2012-01-01 09:00:00.000000001", - "2012-01-01 09:00:00.000001", - "2012-01-01 09:00:00.001", - "2012-01-01 09:00:00.001000", - "2012-01-01 09:00:00.001000000", ]: - expected = pd.to_datetime(v) - result = pd.to_datetime(v, format="%Y-%m-%d %H:%M:%S.%f") - self.assertEqual(result, expected) - - def test_to_datetime_format_weeks(self): - data = [ - ['2009324', '%Y%W%w', Timestamp('2009-08-13')], - ['2013020', '%Y%U%w', Timestamp('2013-01-13')] - ] - for s, format, dt in data: - self.assertEqual(to_datetime(s, format=format), dt) - - -class TestSlicing(tm.TestCase): - def test_slice_year(self): - dti = DatetimeIndex(freq='B', start=datetime(2005, 1, 1), periods=500) - - s = Series(np.arange(len(dti)), index=dti) - result = s['2005'] - expected = s[s.index.year == 2005] - assert_series_equal(result, expected) - - df = DataFrame(np.random.rand(len(dti), 5), index=dti) - result = df.loc['2005'] - expected = df[df.index.year == 2005] - assert_frame_equal(result, expected) - - rng = date_range('1/1/2000', '1/1/2010') - - result = rng.get_loc('2009') - expected = slice(3288, 3653) - self.assertEqual(result, expected) - - def test_slice_quarter(self): - dti = DatetimeIndex(freq='D', start=datetime(2000, 6, 1), periods=500) - - s = Series(np.arange(len(dti)), index=dti) - self.assertEqual(len(s['2001Q1']), 90) - - df = DataFrame(np.random.rand(len(dti), 5), index=dti) - self.assertEqual(len(df.loc['1Q01']), 90) - - def test_slice_month(self): - dti = DatetimeIndex(freq='D', start=datetime(2005, 1, 1), periods=500) - s = Series(np.arange(len(dti)), index=dti) - self.assertEqual(len(s['2005-11']), 30) - - df = DataFrame(np.random.rand(len(dti), 5), index=dti) - self.assertEqual(len(df.loc['2005-11']), 30) - - assert_series_equal(s['2005-11'], s['11-2005']) - - def test_partial_slice(self): - rng = DatetimeIndex(freq='D', start=datetime(2005, 1, 1), periods=500) - s = Series(np.arange(len(rng)), index=rng) - - result = s['2005-05':'2006-02'] - expected = s['20050501':'20060228'] - assert_series_equal(result, expected) - - result = s['2005-05':] - expected = s['20050501':] - assert_series_equal(result, expected) - - result = s[:'2006-02'] - expected = s[:'20060228'] - assert_series_equal(result, expected) - - result = s['2005-1-1'] - self.assertEqual(result, s.iloc[0]) - - self.assertRaises(Exception, s.__getitem__, '2004-12-31') - - def test_partial_slice_daily(self): - rng = DatetimeIndex(freq='H', start=datetime(2005, 1, 31), periods=500) - s = Series(np.arange(len(rng)), index=rng) - - result = s['2005-1-31'] - assert_series_equal(result, s.iloc[:24]) - - self.assertRaises(Exception, s.__getitem__, '2004-12-31 00') - - def test_partial_slice_hourly(self): - rng = DatetimeIndex(freq='T', start=datetime(2005, 1, 1, 20, 0, 0), - periods=500) - s = Series(np.arange(len(rng)), index=rng) - - result = s['2005-1-1'] - assert_series_equal(result, s.iloc[:60 * 4]) - - result = s['2005-1-1 20'] - assert_series_equal(result, s.iloc[:60]) - - self.assertEqual(s['2005-1-1 20:00'], s.iloc[0]) - self.assertRaises(Exception, s.__getitem__, '2004-12-31 00:15') - - def test_partial_slice_minutely(self): - rng = DatetimeIndex(freq='S', start=datetime(2005, 1, 1, 23, 59, 0), - periods=500) - s = Series(np.arange(len(rng)), index=rng) - - result = s['2005-1-1 23:59'] - assert_series_equal(result, s.iloc[:60]) - - result = s['2005-1-1'] - assert_series_equal(result, s.iloc[:60]) - - self.assertEqual(s[Timestamp('2005-1-1 23:59:00')], s.iloc[0]) - self.assertRaises(Exception, s.__getitem__, '2004-12-31 00:00:00') - - def test_partial_slice_second_precision(self): - rng = DatetimeIndex(start=datetime(2005, 1, 1, 0, 0, 59, - microsecond=999990), - periods=20, freq='US') - s = Series(np.arange(20), rng) - - assert_series_equal(s['2005-1-1 00:00'], s.iloc[:10]) - assert_series_equal(s['2005-1-1 00:00:59'], s.iloc[:10]) - - assert_series_equal(s['2005-1-1 00:01'], s.iloc[10:]) - assert_series_equal(s['2005-1-1 00:01:00'], s.iloc[10:]) - - self.assertEqual(s[Timestamp('2005-1-1 00:00:59.999990')], s.iloc[0]) - self.assertRaisesRegexp(KeyError, '2005-1-1 00:00:00', - lambda: s['2005-1-1 00:00:00']) - - def test_partial_slicing_dataframe(self): - # GH14856 - # Test various combinations of string slicing resolution vs. - # index resolution - # - If string resolution is less precise than index resolution, - # string is considered a slice - # - If string resolution is equal to or more precise than index - # resolution, string is considered an exact match - formats = ['%Y', '%Y-%m', '%Y-%m-%d', '%Y-%m-%d %H', - '%Y-%m-%d %H:%M', '%Y-%m-%d %H:%M:%S'] - resolutions = ['year', 'month', 'day', 'hour', 'minute', 'second'] - for rnum, resolution in enumerate(resolutions[2:], 2): - # we check only 'day', 'hour', 'minute' and 'second' - unit = Timedelta("1 " + resolution) - middate = datetime(2012, 1, 1, 0, 0, 0) - index = DatetimeIndex([middate - unit, - middate, middate + unit]) - values = [1, 2, 3] - df = DataFrame({'a': values}, index, dtype=np.int64) - self.assertEqual(df.index.resolution, resolution) - - # Timestamp with the same resolution as index - # Should be exact match for Series (return scalar) - # and raise KeyError for Frame - for timestamp, expected in zip(index, values): - ts_string = timestamp.strftime(formats[rnum]) - # make ts_string as precise as index - result = df['a'][ts_string] - self.assertIsInstance(result, np.int64) - self.assertEqual(result, expected) - self.assertRaises(KeyError, df.__getitem__, ts_string) - - # Timestamp with resolution less precise than index - for fmt in formats[:rnum]: - for element, theslice in [[0, slice(None, 1)], - [1, slice(1, None)]]: - ts_string = index[element].strftime(fmt) - - # Series should return slice - result = df['a'][ts_string] - expected = df['a'][theslice] - assert_series_equal(result, expected) - - # Frame should return slice as well - result = df[ts_string] - expected = df[theslice] - assert_frame_equal(result, expected) - - # Timestamp with resolution more precise than index - # Compatible with existing key - # Should return scalar for Series - # and raise KeyError for Frame - for fmt in formats[rnum + 1:]: - ts_string = index[1].strftime(fmt) - result = df['a'][ts_string] - self.assertIsInstance(result, np.int64) - self.assertEqual(result, 2) - self.assertRaises(KeyError, df.__getitem__, ts_string) - - # Not compatible with existing key - # Should raise KeyError - for fmt, res in list(zip(formats, resolutions))[rnum + 1:]: - ts = index[1] + Timedelta("1 " + res) - ts_string = ts.strftime(fmt) - self.assertRaises(KeyError, df['a'].__getitem__, ts_string) - self.assertRaises(KeyError, df.__getitem__, ts_string) - - def test_partial_slicing_with_multiindex(self): - - # GH 4758 - # partial string indexing with a multi-index buggy - df = DataFrame({'ACCOUNT': ["ACCT1", "ACCT1", "ACCT1", "ACCT2"], - 'TICKER': ["ABC", "MNP", "XYZ", "XYZ"], - 'val': [1, 2, 3, 4]}, - index=date_range("2013-06-19 09:30:00", - periods=4, freq='5T')) - df_multi = df.set_index(['ACCOUNT', 'TICKER'], append=True) - - expected = DataFrame([ - [1] - ], index=Index(['ABC'], name='TICKER'), columns=['val']) - result = df_multi.loc[('2013-06-19 09:30:00', 'ACCT1')] - assert_frame_equal(result, expected) - - expected = df_multi.loc[ - (pd.Timestamp('2013-06-19 09:30:00', tz=None), 'ACCT1', 'ABC')] - result = df_multi.loc[('2013-06-19 09:30:00', 'ACCT1', 'ABC')] - assert_series_equal(result, expected) - - # this is a KeyError as we don't do partial string selection on - # multi-levels - def f(): - df_multi.loc[('2013-06-19', 'ACCT1', 'ABC')] - - self.assertRaises(KeyError, f) - - # GH 4294 - # partial slice on a series mi - s = pd.DataFrame(randn(1000, 1000), index=pd.date_range( - '2000-1-1', periods=1000)).stack() - - s2 = s[:-1].copy() - expected = s2['2000-1-4'] - result = s2[pd.Timestamp('2000-1-4')] - assert_series_equal(result, expected) - - result = s[pd.Timestamp('2000-1-4')] - expected = s['2000-1-4'] - assert_series_equal(result, expected) - - df2 = pd.DataFrame(s) - expected = df2.xs('2000-1-4') - result = df2.loc[pd.Timestamp('2000-1-4')] - assert_frame_equal(result, expected) - - def test_shift(self): - ts = Series(np.random.randn(5), - index=date_range('1/1/2000', periods=5, freq='H')) - - result = ts.shift(1, freq='5T') - exp_index = ts.index.shift(1, freq='5T') - tm.assert_index_equal(result.index, exp_index) - - # GH #1063, multiple of same base - result = ts.shift(1, freq='4H') - exp_index = ts.index + offsets.Hour(4) - tm.assert_index_equal(result.index, exp_index) - - idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-04']) - self.assertRaises(ValueError, idx.shift, 1) - def test_setops_preserve_freq(self): for tz in [None, 'Asia/Tokyo', 'US/Eastern']: rng = date_range('1/1/2000', '1/1/2002', name='idx', tz=tz) @@ -2453,602 +928,8 @@ def test_get_level_values_box(self): self.assertTrue(isinstance(index.get_level_values(0)[0], Timestamp)) - def test_frame_apply_dont_convert_datetime64(self): - from pandas.tseries.offsets import BDay - df = DataFrame({'x1': [datetime(1996, 1, 1)]}) - - df = df.applymap(lambda x: x + BDay()) - df = df.applymap(lambda x: x + BDay()) - - self.assertTrue(df.x1.dtype == 'M8[ns]') - - def test_partial_slice_doesnt_require_monotonicity(self): - # For historical reasons. - s = pd.Series(np.arange(10), pd.date_range('2014-01-01', periods=10)) - - nonmonotonic = s[[3, 5, 4]] - expected = nonmonotonic.iloc[:0] - timestamp = pd.Timestamp('2014-01-10') - - assert_series_equal(nonmonotonic['2014-01-10':], expected) - self.assertRaisesRegexp(KeyError, - r"Timestamp\('2014-01-10 00:00:00'\)", - lambda: nonmonotonic[timestamp:]) - - assert_series_equal(nonmonotonic.loc['2014-01-10':], expected) - self.assertRaisesRegexp(KeyError, - r"Timestamp\('2014-01-10 00:00:00'\)", - lambda: nonmonotonic.loc[timestamp:]) - - -class TestToDatetime(tm.TestCase): - _multiprocess_can_split_ = True - - def test_to_datetime_dt64s(self): - in_bound_dts = [ - np.datetime64('2000-01-01'), - np.datetime64('2000-01-02'), - ] - - for dt in in_bound_dts: - self.assertEqual(pd.to_datetime(dt), Timestamp(dt)) - - oob_dts = [np.datetime64('1000-01-01'), np.datetime64('5000-01-02'), ] - - for dt in oob_dts: - self.assertRaises(ValueError, pd.to_datetime, dt, errors='raise') - self.assertRaises(ValueError, tslib.Timestamp, dt) - self.assertIs(pd.to_datetime(dt, errors='coerce'), NaT) - - def test_to_datetime_array_of_dt64s(self): - dts = [np.datetime64('2000-01-01'), np.datetime64('2000-01-02'), ] - - # Assuming all datetimes are in bounds, to_datetime() returns - # an array that is equal to Timestamp() parsing - self.assert_numpy_array_equal( - pd.to_datetime(dts, box=False), - np.array([Timestamp(x).asm8 for x in dts]) - ) - - # A list of datetimes where the last one is out of bounds - dts_with_oob = dts + [np.datetime64('9999-01-01')] - - self.assertRaises(ValueError, pd.to_datetime, dts_with_oob, - errors='raise') - - self.assert_numpy_array_equal( - pd.to_datetime(dts_with_oob, box=False, errors='coerce'), - np.array( - [ - Timestamp(dts_with_oob[0]).asm8, - Timestamp(dts_with_oob[1]).asm8, - iNaT, - ], - dtype='M8' - ) - ) - - # With errors='ignore', out of bounds datetime64s - # are converted to their .item(), which depending on the version of - # numpy is either a python datetime.datetime or datetime.date - self.assert_numpy_array_equal( - pd.to_datetime(dts_with_oob, box=False, errors='ignore'), - np.array( - [dt.item() for dt in dts_with_oob], - dtype='O' - ) - ) - - def test_to_datetime_tz(self): - - # xref 8260 - # uniform returns a DatetimeIndex - arr = [pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'), - pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Pacific')] - result = pd.to_datetime(arr) - expected = DatetimeIndex( - ['2013-01-01 13:00:00', '2013-01-02 14:00:00'], tz='US/Pacific') - tm.assert_index_equal(result, expected) - - # mixed tzs will raise - arr = [pd.Timestamp('2013-01-01 13:00:00', tz='US/Pacific'), - pd.Timestamp('2013-01-02 14:00:00', tz='US/Eastern')] - self.assertRaises(ValueError, lambda: pd.to_datetime(arr)) - - def test_to_datetime_tz_pytz(self): - - # xref 8260 - tm._skip_if_no_pytz() - import pytz - - us_eastern = pytz.timezone('US/Eastern') - arr = np.array([us_eastern.localize(datetime(year=2000, month=1, day=1, - hour=3, minute=0)), - us_eastern.localize(datetime(year=2000, month=6, day=1, - hour=3, minute=0))], - dtype=object) - result = pd.to_datetime(arr, utc=True) - expected = DatetimeIndex(['2000-01-01 08:00:00+00:00', - '2000-06-01 07:00:00+00:00'], - dtype='datetime64[ns, UTC]', freq=None) - tm.assert_index_equal(result, expected) - - def test_to_datetime_utc_is_true(self): - # See gh-11934 - start = pd.Timestamp('2014-01-01', tz='utc') - end = pd.Timestamp('2014-01-03', tz='utc') - date_range = pd.bdate_range(start, end) - - result = pd.to_datetime(date_range, utc=True) - expected = pd.DatetimeIndex(data=date_range) - tm.assert_index_equal(result, expected) - - def test_to_datetime_tz_psycopg2(self): - - # xref 8260 - try: - import psycopg2 - except ImportError: - raise nose.SkipTest("no psycopg2 installed") - - # misc cases - tz1 = psycopg2.tz.FixedOffsetTimezone(offset=-300, name=None) - tz2 = psycopg2.tz.FixedOffsetTimezone(offset=-240, name=None) - arr = np.array([datetime(2000, 1, 1, 3, 0, tzinfo=tz1), - datetime(2000, 6, 1, 3, 0, tzinfo=tz2)], - dtype=object) - - result = pd.to_datetime(arr, errors='coerce', utc=True) - expected = DatetimeIndex(['2000-01-01 08:00:00+00:00', - '2000-06-01 07:00:00+00:00'], - dtype='datetime64[ns, UTC]', freq=None) - tm.assert_index_equal(result, expected) - - # dtype coercion - i = pd.DatetimeIndex([ - '2000-01-01 08:00:00+00:00' - ], tz=psycopg2.tz.FixedOffsetTimezone(offset=-300, name=None)) - self.assertTrue(is_datetime64_ns_dtype(i)) - - # tz coerceion - result = pd.to_datetime(i, errors='coerce') - tm.assert_index_equal(result, i) - - result = pd.to_datetime(i, errors='coerce', utc=True) - expected = pd.DatetimeIndex(['2000-01-01 13:00:00'], - dtype='datetime64[ns, UTC]') - tm.assert_index_equal(result, expected) - - def test_datetime_bool(self): - # GH13176 - with self.assertRaises(TypeError): - to_datetime(False) - self.assertTrue(to_datetime(False, errors="coerce") is tslib.NaT) - self.assertEqual(to_datetime(False, errors="ignore"), False) - with self.assertRaises(TypeError): - to_datetime(True) - self.assertTrue(to_datetime(True, errors="coerce") is tslib.NaT) - self.assertEqual(to_datetime(True, errors="ignore"), True) - with self.assertRaises(TypeError): - to_datetime([False, datetime.today()]) - with self.assertRaises(TypeError): - to_datetime(['20130101', True]) - tm.assert_index_equal(to_datetime([0, False, tslib.NaT, 0.0], - errors="coerce"), - DatetimeIndex([to_datetime(0), tslib.NaT, - tslib.NaT, to_datetime(0)])) - - def test_datetime_invalid_datatype(self): - # GH13176 - - with self.assertRaises(TypeError): - pd.to_datetime(bool) - with self.assertRaises(TypeError): - pd.to_datetime(pd.to_datetime) - - def test_unit(self): - # GH 11758 - # test proper behavior with erros - - with self.assertRaises(ValueError): - to_datetime([1], unit='D', format='%Y%m%d') - - values = [11111111, 1, 1.0, tslib.iNaT, pd.NaT, np.nan, - 'NaT', ''] - result = to_datetime(values, unit='D', errors='ignore') - expected = Index([11111111, Timestamp('1970-01-02'), - Timestamp('1970-01-02'), pd.NaT, - pd.NaT, pd.NaT, pd.NaT, pd.NaT], - dtype=object) - tm.assert_index_equal(result, expected) - - result = to_datetime(values, unit='D', errors='coerce') - expected = DatetimeIndex(['NaT', '1970-01-02', '1970-01-02', - 'NaT', 'NaT', 'NaT', 'NaT', 'NaT']) - tm.assert_index_equal(result, expected) - - with self.assertRaises(tslib.OutOfBoundsDatetime): - to_datetime(values, unit='D', errors='raise') - - values = [1420043460000, tslib.iNaT, pd.NaT, np.nan, 'NaT'] - - result = to_datetime(values, errors='ignore', unit='s') - expected = Index([1420043460000, pd.NaT, pd.NaT, - pd.NaT, pd.NaT], dtype=object) - tm.assert_index_equal(result, expected) - - result = to_datetime(values, errors='coerce', unit='s') - expected = DatetimeIndex(['NaT', 'NaT', 'NaT', 'NaT', 'NaT']) - tm.assert_index_equal(result, expected) - - with self.assertRaises(tslib.OutOfBoundsDatetime): - to_datetime(values, errors='raise', unit='s') - - # if we have a string, then we raise a ValueError - # and NOT an OutOfBoundsDatetime - for val in ['foo', Timestamp('20130101')]: - try: - to_datetime(val, errors='raise', unit='s') - except tslib.OutOfBoundsDatetime: - raise AssertionError("incorrect exception raised") - except ValueError: - pass - - def test_unit_consistency(self): - - # consistency of conversions - expected = Timestamp('1970-05-09 14:25:11') - result = pd.to_datetime(11111111, unit='s', errors='raise') - self.assertEqual(result, expected) - self.assertIsInstance(result, Timestamp) - - result = pd.to_datetime(11111111, unit='s', errors='coerce') - self.assertEqual(result, expected) - self.assertIsInstance(result, Timestamp) - - result = pd.to_datetime(11111111, unit='s', errors='ignore') - self.assertEqual(result, expected) - self.assertIsInstance(result, Timestamp) - - def test_unit_with_numeric(self): - - # GH 13180 - # coercions from floats/ints are ok - expected = DatetimeIndex(['2015-06-19 05:33:20', - '2015-05-27 22:33:20']) - arr1 = [1.434692e+18, 1.432766e+18] - arr2 = np.array(arr1).astype('int64') - for errors in ['ignore', 'raise', 'coerce']: - result = pd.to_datetime(arr1, errors=errors) - tm.assert_index_equal(result, expected) - - result = pd.to_datetime(arr2, errors=errors) - tm.assert_index_equal(result, expected) - - # but we want to make sure that we are coercing - # if we have ints/strings - expected = DatetimeIndex(['NaT', - '2015-06-19 05:33:20', - '2015-05-27 22:33:20']) - arr = ['foo', 1.434692e+18, 1.432766e+18] - result = pd.to_datetime(arr, errors='coerce') - tm.assert_index_equal(result, expected) - - expected = DatetimeIndex(['2015-06-19 05:33:20', - '2015-05-27 22:33:20', - 'NaT', - 'NaT']) - arr = [1.434692e+18, 1.432766e+18, 'foo', 'NaT'] - result = pd.to_datetime(arr, errors='coerce') - tm.assert_index_equal(result, expected) - - def test_unit_mixed(self): - - # mixed integers/datetimes - expected = DatetimeIndex(['2013-01-01', 'NaT', 'NaT']) - arr = [pd.Timestamp('20130101'), 1.434692e+18, 1.432766e+18] - result = pd.to_datetime(arr, errors='coerce') - tm.assert_index_equal(result, expected) - - with self.assertRaises(ValueError): - pd.to_datetime(arr, errors='raise') - - expected = DatetimeIndex(['NaT', - 'NaT', - '2013-01-01']) - arr = [1.434692e+18, 1.432766e+18, pd.Timestamp('20130101')] - result = pd.to_datetime(arr, errors='coerce') - tm.assert_index_equal(result, expected) - - with self.assertRaises(ValueError): - pd.to_datetime(arr, errors='raise') - - def test_dataframe(self): - - df = DataFrame({'year': [2015, 2016], - 'month': [2, 3], - 'day': [4, 5], - 'hour': [6, 7], - 'minute': [58, 59], - 'second': [10, 11], - 'ms': [1, 1], - 'us': [2, 2], - 'ns': [3, 3]}) - - result = to_datetime({'year': df['year'], - 'month': df['month'], - 'day': df['day']}) - expected = Series([Timestamp('20150204 00:00:00'), - Timestamp('20160305 00:0:00')]) - assert_series_equal(result, expected) - - # dict-like - result = to_datetime(df[['year', 'month', 'day']].to_dict()) - assert_series_equal(result, expected) - - # dict but with constructable - df2 = df[['year', 'month', 'day']].to_dict() - df2['month'] = 2 - result = to_datetime(df2) - expected2 = Series([Timestamp('20150204 00:00:00'), - Timestamp('20160205 00:0:00')]) - assert_series_equal(result, expected2) - - # unit mappings - units = [{'year': 'years', - 'month': 'months', - 'day': 'days', - 'hour': 'hours', - 'minute': 'minutes', - 'second': 'seconds'}, - {'year': 'year', - 'month': 'month', - 'day': 'day', - 'hour': 'hour', - 'minute': 'minute', - 'second': 'second'}, - ] - - for d in units: - result = to_datetime(df[list(d.keys())].rename(columns=d)) - expected = Series([Timestamp('20150204 06:58:10'), - Timestamp('20160305 07:59:11')]) - assert_series_equal(result, expected) - - d = {'year': 'year', - 'month': 'month', - 'day': 'day', - 'hour': 'hour', - 'minute': 'minute', - 'second': 'second', - 'ms': 'ms', - 'us': 'us', - 'ns': 'ns'} - - result = to_datetime(df.rename(columns=d)) - expected = Series([Timestamp('20150204 06:58:10.001002003'), - Timestamp('20160305 07:59:11.001002003')]) - assert_series_equal(result, expected) - - # coerce back to int - result = to_datetime(df.astype(str)) - assert_series_equal(result, expected) - - # passing coerce - df2 = DataFrame({'year': [2015, 2016], - 'month': [2, 20], - 'day': [4, 5]}) - with self.assertRaises(ValueError): - to_datetime(df2) - result = to_datetime(df2, errors='coerce') - expected = Series([Timestamp('20150204 00:00:00'), - pd.NaT]) - assert_series_equal(result, expected) - - # extra columns - with self.assertRaises(ValueError): - df2 = df.copy() - df2['foo'] = 1 - to_datetime(df2) - - # not enough - for c in [['year'], - ['year', 'month'], - ['year', 'month', 'second'], - ['month', 'day'], - ['year', 'day', 'second']]: - with self.assertRaises(ValueError): - to_datetime(df[c]) - - # duplicates - df2 = DataFrame({'year': [2015, 2016], - 'month': [2, 20], - 'day': [4, 5]}) - df2.columns = ['year', 'year', 'day'] - with self.assertRaises(ValueError): - to_datetime(df2) - - df2 = DataFrame({'year': [2015, 2016], - 'month': [2, 20], - 'day': [4, 5], - 'hour': [4, 5]}) - df2.columns = ['year', 'month', 'day', 'day'] - with self.assertRaises(ValueError): - to_datetime(df2) - - def test_dataframe_dtypes(self): - # #13451 - df = DataFrame({'year': [2015, 2016], - 'month': [2, 3], - 'day': [4, 5]}) - - # int16 - result = to_datetime(df.astype('int16')) - expected = Series([Timestamp('20150204 00:00:00'), - Timestamp('20160305 00:00:00')]) - assert_series_equal(result, expected) - - # mixed dtypes - df['month'] = df['month'].astype('int8') - df['day'] = df['day'].astype('int8') - result = to_datetime(df) - expected = Series([Timestamp('20150204 00:00:00'), - Timestamp('20160305 00:00:00')]) - assert_series_equal(result, expected) - - # float - df = DataFrame({'year': [2000, 2001], - 'month': [1.5, 1], - 'day': [1, 1]}) - with self.assertRaises(ValueError): - to_datetime(df) - - def test_index_to_datetime(self): - idx = Index(['1/1/2000', '1/2/2000', '1/3/2000']) - - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - result = idx.to_datetime() - expected = DatetimeIndex(pd.to_datetime(idx.values)) - tm.assert_index_equal(result, expected) - - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - today = datetime.today() - idx = Index([today], dtype=object) - result = idx.to_datetime() - expected = DatetimeIndex([today]) - tm.assert_index_equal(result, expected) - - def test_to_datetime_iso8601(self): - result = to_datetime(["2012-01-01 00:00:00"]) - exp = Timestamp("2012-01-01 00:00:00") - self.assertEqual(result[0], exp) - - result = to_datetime(['20121001']) # bad iso 8601 - exp = Timestamp('2012-10-01') - self.assertEqual(result[0], exp) - - def test_to_datetime_default(self): - rs = to_datetime('2001') - xp = datetime(2001, 1, 1) - self.assertTrue(rs, xp) - - # dayfirst is essentially broken - - # to_datetime('01-13-2012', dayfirst=True) - # self.assertRaises(ValueError, to_datetime('01-13-2012', - # dayfirst=True)) - - def test_to_datetime_on_datetime64_series(self): - # #2699 - s = Series(date_range('1/1/2000', periods=10)) - - result = to_datetime(s) - self.assertEqual(result[0], s[0]) - - def test_to_datetime_with_space_in_series(self): - # GH 6428 - s = Series(['10/18/2006', '10/18/2008', ' ']) - tm.assertRaises(ValueError, lambda: to_datetime(s, errors='raise')) - result_coerce = to_datetime(s, errors='coerce') - expected_coerce = Series([datetime(2006, 10, 18), - datetime(2008, 10, 18), - pd.NaT]) - tm.assert_series_equal(result_coerce, expected_coerce) - result_ignore = to_datetime(s, errors='ignore') - tm.assert_series_equal(result_ignore, s) - - def test_to_datetime_with_apply(self): - # this is only locale tested with US/None locales - _skip_if_has_locale() - - # GH 5195 - # with a format and coerce a single item to_datetime fails - td = Series(['May 04', 'Jun 02', 'Dec 11'], index=[1, 2, 3]) - expected = pd.to_datetime(td, format='%b %y') - result = td.apply(pd.to_datetime, format='%b %y') - assert_series_equal(result, expected) - - td = pd.Series(['May 04', 'Jun 02', ''], index=[1, 2, 3]) - self.assertRaises(ValueError, - lambda: pd.to_datetime(td, format='%b %y', - errors='raise')) - self.assertRaises(ValueError, - lambda: td.apply(pd.to_datetime, format='%b %y', - errors='raise')) - expected = pd.to_datetime(td, format='%b %y', errors='coerce') - - result = td.apply( - lambda x: pd.to_datetime(x, format='%b %y', errors='coerce')) - assert_series_equal(result, expected) - - def test_to_datetime_types(self): - - # empty string - result = to_datetime('') - self.assertIs(result, NaT) - - result = to_datetime(['', '']) - self.assertTrue(isnull(result).all()) - - # ints - result = Timestamp(0) - expected = to_datetime(0) - self.assertEqual(result, expected) - - # GH 3888 (strings) - expected = to_datetime(['2012'])[0] - result = to_datetime('2012') - self.assertEqual(result, expected) - - # array = ['2012','20120101','20120101 12:01:01'] - array = ['20120101', '20120101 12:01:01'] - expected = list(to_datetime(array)) - result = lmap(Timestamp, array) - tm.assert_almost_equal(result, expected) - - # currently fails ### - # result = Timestamp('2012') - # expected = to_datetime('2012') - # self.assertEqual(result, expected) - - def test_to_datetime_unprocessable_input(self): - # GH 4928 - self.assert_numpy_array_equal( - to_datetime([1, '1'], errors='ignore'), - np.array([1, '1'], dtype='O') - ) - self.assertRaises(TypeError, to_datetime, [1, '1'], errors='raise') - - def test_to_datetime_other_datetime64_units(self): - # 5/25/2012 - scalar = np.int64(1337904000000000).view('M8[us]') - as_obj = scalar.astype('O') - - index = DatetimeIndex([scalar]) - self.assertEqual(index[0], scalar.astype('O')) - - value = Timestamp(scalar) - self.assertEqual(value, as_obj) - - def test_to_datetime_list_of_integers(self): - rng = date_range('1/1/2000', periods=20) - rng = DatetimeIndex(rng.values) - - ints = list(rng.asi8) - - result = DatetimeIndex(ints) - - tm.assert_index_equal(rng, result) - - def test_to_datetime_freq(self): - xp = bdate_range('2000-1-1', periods=10, tz='UTC') - rs = xp.to_datetime() - self.assertEqual(xp.freq, rs.freq) - self.assertEqual(xp.tzinfo, rs.tzinfo) - if __name__ == '__main__': + import nose nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) From 7d6afc4b22fde9ce32161917c2440947505bf4ad Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 4 Feb 2017 13:28:49 -0500 Subject: [PATCH 014/353] ENH: .isnull and .notnull have been added as methods to Index to make this more consistent with the Series API Author: Jeff Reback Closes #15300 from jreback/null and squashes the following commits: 8c35656 [Jeff Reback] DOC: move Index.where to shared_docs e4502bf [Jeff Reback] ENH: .isnull and .notnull have been added as methods to Index to make this more consistent with the Series API --- doc/source/api.rst | 8 +++++++ doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/indexes/base.py | 38 +++++++++++++++++++++++++++++++-- pandas/indexes/category.py | 13 +---------- pandas/tests/indexes/common.py | 27 ++++++++++++++++++++++- pandas/tseries/base.py | 13 +---------- 6 files changed, 73 insertions(+), 28 deletions(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index 92f290b5ee0a9..6c4a3cff5b4cf 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -1356,8 +1356,16 @@ Modifying and Computations Index.unique Index.nunique Index.value_counts + +Missing Values +~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: generated/ + Index.fillna Index.dropna + Index.isnull + Index.notnull Conversion ~~~~~~~~~~ diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index d76a78c68fb73..c6d757c6884d0 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -125,7 +125,7 @@ Other enhancements - ``pd.read_excel`` now preserves sheet order when using ``sheetname=None`` (:issue:`9930`) - Multiple offset aliases with decimal points are now supported (e.g. '0.5min' is parsed as '30s') (:issue:`8419`) - +- ``.isnull()`` and ``.notnull()`` have been added to ``Index`` object to make them more consistent with the ``Series`` API (:issue:`15300`) - ``pd.read_gbq`` method now allows query configuration preferences (:issue:`14742`) - New ``UnsortedIndexError`` (subclass of ``KeyError``) raised when indexing/slicing into an diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index bc2dce4e97e5b..dcd565ee5f0e9 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -564,8 +564,7 @@ def repeat(self, repeats, *args, **kwargs): nv.validate_repeat(args, kwargs) return self._shallow_copy(self._values.repeat(repeats)) - def where(self, cond, other=None): - """ + _index_shared_docs['where'] = """ .. versionadded:: 0.19.0 Return an Index of same shape as self and whose corresponding @@ -577,6 +576,9 @@ def where(self, cond, other=None): cond : boolean same length as self other : scalar, or array-like """ + + @Appender(_index_shared_docs['where']) + def where(self, cond, other=None): if other is None: other = self._na_value values = np.where(cond, self.values, other) @@ -1662,6 +1664,38 @@ def hasnans(self): else: return False + def isnull(self): + """ + Detect missing values + + .. versionadded:: 0.20.0 + + Returns + ------- + a boolean array of whether my values are null + + See also + -------- + pandas.isnull : pandas version + """ + return self._isnan + + def notnull(self): + """ + Reverse of isnull + + .. versionadded:: 0.20.0 + + Returns + ------- + a boolean array of whether my values are not null + + See also + -------- + pandas.notnull : pandas version + """ + return ~self.isnull() + def putmask(self, mask, value): """ return a new Index of the values set with the mask diff --git a/pandas/indexes/category.py b/pandas/indexes/category.py index e3ffa40f5f94a..e2e0fd056b111 100644 --- a/pandas/indexes/category.py +++ b/pandas/indexes/category.py @@ -332,19 +332,8 @@ def _can_reindex(self, indexer): """ always allow reindexing """ pass + @Appender(_index_shared_docs['where']) def where(self, cond, other=None): - """ - .. versionadded:: 0.19.0 - - Return an Index of same shape as self and whose corresponding - entries are from self where cond is True and otherwise are from - other. - - Parameters - ---------- - cond : boolean same length as self - other : scalar, or array-like - """ if other is None: other = self._na_value values = np.where(cond, self.values, other) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 5a482acf403cd..81ad0524807f3 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -7,7 +7,7 @@ from pandas import (Series, Index, Float64Index, Int64Index, UInt64Index, RangeIndex, MultiIndex, CategoricalIndex, DatetimeIndex, - TimedeltaIndex, PeriodIndex, notnull) + TimedeltaIndex, PeriodIndex, notnull, isnull) from pandas.types.common import needs_i8_conversion from pandas.util.testing import assertRaisesRegexp @@ -879,3 +879,28 @@ def test_fillna(self): expected[1] = True self.assert_numpy_array_equal(idx._isnan, expected) self.assertTrue(idx.hasnans) + + def test_nulls(self): + # this is really a smoke test for the methods + # as these are adequantely tested for function elsewhere + + for name, index in self.indices.items(): + if len(index) == 0: + self.assert_numpy_array_equal( + index.isnull(), np.array([], dtype=bool)) + elif isinstance(index, MultiIndex): + idx = index.copy() + msg = "isnull is not defined for MultiIndex" + with self.assertRaisesRegexp(NotImplementedError, msg): + idx.isnull() + else: + + if not index.hasnans: + self.assert_numpy_array_equal( + index.isnull(), np.zeros(len(index), dtype=bool)) + self.assert_numpy_array_equal( + index.notnull(), np.ones(len(index), dtype=bool)) + else: + result = isnull(index) + self.assert_numpy_array_equal(index.isnull(), result) + self.assert_numpy_array_equal(index.notnull(), ~result) diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py index a8dd2238c2063..ee9234d6c8237 100644 --- a/pandas/tseries/base.py +++ b/pandas/tseries/base.py @@ -786,19 +786,8 @@ def repeat(self, repeats, *args, **kwargs): return self._shallow_copy(self.asi8.repeat(repeats), freq=freq) + @Appender(_index_shared_docs['where']) def where(self, cond, other=None): - """ - .. versionadded:: 0.19.0 - - Return an Index of same shape as self and whose corresponding - entries are from self where cond is True and otherwise are from - other. - - Parameters - ---------- - cond : boolean same length as self - other : scalar, or array-like - """ other = _ensure_datetimelike_to_i8(other) values = _ensure_datetimelike_to_i8(self) result = np.where(cond, values, other).astype('i8') From f742a66a9b1c5c7756ecfefb5d38c5fca14700b2 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral Date: Mon, 6 Feb 2017 10:23:26 -0500 Subject: [PATCH 015/353] BUG: Fix downcast argument for DataFrame.fillna() closes #15277 Author: Albert Villanova del Moral Closes #15278 from albertvillanova/fix-15277 and squashes the following commits: 1b594a9 [Albert Villanova del Moral] Fix tab indentation 631a2dc [Albert Villanova del Moral] Add whatsnew note d691954 [Albert Villanova del Moral] BUG: Fix downcast argument for DataFrame.fillna() --- doc/source/whatsnew/v0.20.0.txt | 3 +-- pandas/core/generic.py | 2 +- pandas/tests/frame/test_missing.py | 14 ++++++++++++++ pandas/tests/series/test_missing.py | 14 ++++++++++++++ 4 files changed, 30 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index c6d757c6884d0..16caef57673f7 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -496,6 +496,7 @@ Bug Fixes - Bug in ``pd.read_csv()`` for the C engine where ``usecols`` were being indexed incorrectly with ``parse_dates`` (:issue:`14792`) - Incorrect dtyped ``Series`` was returned by comparison methods (e.g., ``lt``, ``gt``, ...) against a constant for an empty ``DataFrame`` (:issue:`15077`) - Bug in ``Series.dt.round`` inconsistent behaviour on NAT's with different arguments (:issue:`14940`) +- Bug in ``DataFrame.fillna()`` where the argument ``downcast`` was ignored when fillna value was of type ``dict`` (:issue:`15277`) - Bug in ``.read_json()`` for Python 2 where ``lines=True`` and contents contain non-ascii unicode characters (:issue:`15132`) @@ -509,7 +510,5 @@ Bug Fixes - - - Bug in ``DataFrame.boxplot`` where ``fontsize`` was not applied to the tick labels on both axes (:issue:`15108`) - Bug in ``Series.replace`` and ``DataFrame.replace`` which failed on empty replacement dicts (:issue:`15289`) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 8074b167ff176..bb2664a5b8d28 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3347,7 +3347,7 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, if k not in result: continue obj = result[k] - obj.fillna(v, limit=limit, inplace=True) + obj.fillna(v, limit=limit, inplace=True, downcast=downcast) return result elif not is_list_like(value): new_data = self._data.fillna(value=value, limit=limit, diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py index a8c9c72956463..eabdb79295c27 100644 --- a/pandas/tests/frame/test_missing.py +++ b/pandas/tests/frame/test_missing.py @@ -252,6 +252,20 @@ def test_fillna(self): result = df.fillna(value={'Date': df['Date2']}) assert_frame_equal(result, expected) + def test_fillna_downcast(self): + # GH 15277 + # infer int64 from float64 + df = pd.DataFrame({'a': [1., np.nan]}) + result = df.fillna(0, downcast='infer') + expected = pd.DataFrame({'a': [1, 0]}) + assert_frame_equal(result, expected) + + # infer int64 from float64 when fillna value is a dict + df = pd.DataFrame({'a': [1., np.nan]}) + result = df.fillna({'a': 0}, downcast='infer') + expected = pd.DataFrame({'a': [1, 0]}) + assert_frame_equal(result, expected) + def test_fillna_dtype_conversion(self): # make sure that fillna on an empty frame works df = DataFrame(index=["A", "B", "C"], columns=[1, 2, 3, 4, 5]) diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 8cf0d190a95cc..8c877ade6fe98 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -273,6 +273,20 @@ def test_datetime64tz_fillna_round_issue(self): assert_series_equal(filled, expected) + def test_fillna_downcast(self): + # GH 15277 + # infer int64 from float64 + s = pd.Series([1., np.nan]) + result = s.fillna(0, downcast='infer') + expected = pd.Series([1, 0]) + assert_series_equal(result, expected) + + # infer int64 from float64 when fillna value is a dict + s = pd.Series([1., np.nan]) + result = s.fillna({1: 0}, downcast='infer') + expected = pd.Series([1, 0]) + assert_series_equal(result, expected) + def test_fillna_int(self): s = Series(np.random.randint(-100, 100, 50)) s.fillna(method='ffill', inplace=True) From f93714b793f170bd12f5c818752d2b862cd0045b Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 6 Feb 2017 16:55:38 +0100 Subject: [PATCH 016/353] DOC/CI: ensure correct pandas version (GH15311) (#15317) --- ci/build_docs.sh | 2 +- ci/requirements-3.5_DOC_BUILD.sh | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/ci/build_docs.sh b/ci/build_docs.sh index 4dc9a203f1978..5dc649a91c4f7 100755 --- a/ci/build_docs.sh +++ b/ci/build_docs.sh @@ -22,8 +22,8 @@ if [ x"$DOC_BUILD" != x"" ]; then echo "Will build docs" source activate pandas - conda install -n pandas -c r r rpy2 --yes + # install sudo deps time sudo apt-get $APT_ARGS install dvipng texlive-latex-base texlive-latex-extra mv "$TRAVIS_BUILD_DIR"/doc /tmp diff --git a/ci/requirements-3.5_DOC_BUILD.sh b/ci/requirements-3.5_DOC_BUILD.sh index ca18ad976d46d..25bc63acc96d1 100644 --- a/ci/requirements-3.5_DOC_BUILD.sh +++ b/ci/requirements-3.5_DOC_BUILD.sh @@ -2,6 +2,8 @@ source activate pandas -echo "install DOC_BUILD" +echo "[install DOC_BUILD deps]" conda install -n pandas -c conda-forge feather-format + +conda install -n pandas -c r r rpy2 --yes From 34cdfa48881118a6327fe0e599fb41467ef6ffcc Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 6 Feb 2017 18:03:26 -0500 Subject: [PATCH 017/353] CLN: reorg pandas/io/json to sub-dirs xref #14904 Author: Jeff Reback Closes #15322 from jreback/json and squashes the following commits: 0c2da60 [Jeff Reback] DOC: whatsnew update fa3deef [Jeff Reback] CLN: reorg pandas/io/json to sub-dirs --- doc/source/whatsnew/v0.20.0.txt | 3 + pandas/io/json/__init__.py | 4 + pandas/io/{ => json}/json.py | 246 +---------------- pandas/io/json/normalize.py | 248 ++++++++++++++++++ .../{test_json_norm.py => test_normalize.py} | 3 +- setup.py | 1 + 6 files changed, 259 insertions(+), 246 deletions(-) create mode 100644 pandas/io/json/__init__.py rename pandas/io/{ => json}/json.py (73%) create mode 100644 pandas/io/json/normalize.py rename pandas/io/tests/json/{test_json_norm.py => test_normalize.py} (99%) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 16caef57673f7..1a32498d53c23 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -96,6 +96,9 @@ support for bz2 compression in the python 2 c-engine improved (:issue:`14874`). .. _whatsnew_0200.enhancements.uint64_support: +UInt64 Support Improved +^^^^^^^^^^^^^^^^^^^^^^^ + Pandas has significantly improved support for operations involving unsigned, or purely non-negative, integers. Previously, handling these integers would result in improper rounding or data-type casting, leading to incorrect results. diff --git a/pandas/io/json/__init__.py b/pandas/io/json/__init__.py new file mode 100644 index 0000000000000..a9390a04cc2cd --- /dev/null +++ b/pandas/io/json/__init__.py @@ -0,0 +1,4 @@ +from .json import to_json, read_json, loads, dumps # noqa +from .normalize import json_normalize # noqa + +del json, normalize # noqa diff --git a/pandas/io/json.py b/pandas/io/json/json.py similarity index 73% rename from pandas/io/json.py rename to pandas/io/json/json.py index 767a2212d92da..d29f4a371dd4d 100644 --- a/pandas/io/json.py +++ b/pandas/io/json/json.py @@ -1,8 +1,6 @@ # pylint: disable-msg=E1101,W0613,W0603 import os -import copy -from collections import defaultdict import numpy as np import pandas.json as _json @@ -13,6 +11,7 @@ from pandas.io.common import get_filepath_or_buffer, _get_handle from pandas.core.common import AbstractMethodError from pandas.formats.printing import pprint_thing +from .normalize import _convert_to_line_delimits loads = _json.loads dumps = _json.dumps @@ -641,246 +640,3 @@ def is_ok(col): lambda col, c: self._try_convert_to_date(c), lambda col, c: ((self.keep_default_dates and is_ok(col)) or col in convert_dates)) - -# --------------------------------------------------------------------- -# JSON normalization routines - - -def _convert_to_line_delimits(s): - """Helper function that converts json lists to line delimited json.""" - - # Determine we have a JSON list to turn to lines otherwise just return the - # json object, only lists can - if not s[0] == '[' and s[-1] == ']': - return s - s = s[1:-1] - - from pandas.lib import convert_json_to_lines - return convert_json_to_lines(s) - - -def nested_to_record(ds, prefix="", level=0): - """a simplified json_normalize - - converts a nested dict into a flat dict ("record"), unlike json_normalize, - it does not attempt to extract a subset of the data. - - Parameters - ---------- - ds : dict or list of dicts - prefix: the prefix, optional, default: "" - level: the number of levels in the jason string, optional, default: 0 - - Returns - ------- - d - dict or list of dicts, matching `ds` - - Examples - -------- - - IN[52]: nested_to_record(dict(flat1=1,dict1=dict(c=1,d=2), - nested=dict(e=dict(c=1,d=2),d=2))) - Out[52]: - {'dict1.c': 1, - 'dict1.d': 2, - 'flat1': 1, - 'nested.d': 2, - 'nested.e.c': 1, - 'nested.e.d': 2} - """ - singleton = False - if isinstance(ds, dict): - ds = [ds] - singleton = True - - new_ds = [] - for d in ds: - - new_d = copy.deepcopy(d) - for k, v in d.items(): - # each key gets renamed with prefix - if not isinstance(k, compat.string_types): - k = str(k) - if level == 0: - newkey = k - else: - newkey = prefix + '.' + k - - # only dicts gets recurse-flattend - # only at level>1 do we rename the rest of the keys - if not isinstance(v, dict): - if level != 0: # so we skip copying for top level, common case - v = new_d.pop(k) - new_d[newkey] = v - continue - else: - v = new_d.pop(k) - new_d.update(nested_to_record(v, newkey, level + 1)) - new_ds.append(new_d) - - if singleton: - return new_ds[0] - return new_ds - - -def json_normalize(data, record_path=None, meta=None, - meta_prefix=None, - record_prefix=None, - errors='raise'): - - """ - "Normalize" semi-structured JSON data into a flat table - - Parameters - ---------- - data : dict or list of dicts - Unserialized JSON objects - record_path : string or list of strings, default None - Path in each object to list of records. If not passed, data will be - assumed to be an array of records - meta : list of paths (string or list of strings), default None - Fields to use as metadata for each record in resulting table - record_prefix : string, default None - If True, prefix records with dotted (?) path, e.g. foo.bar.field if - path to records is ['foo', 'bar'] - meta_prefix : string, default None - errors : {'raise', 'ignore'}, default 'raise' - - * ignore : will ignore KeyError if keys listed in meta are not - always present - * raise : will raise KeyError if keys listed in meta are not - always present - - .. versionadded:: 0.20.0 - - Returns - ------- - frame : DataFrame - - Examples - -------- - - >>> data = [{'state': 'Florida', - ... 'shortname': 'FL', - ... 'info': { - ... 'governor': 'Rick Scott' - ... }, - ... 'counties': [{'name': 'Dade', 'population': 12345}, - ... {'name': 'Broward', 'population': 40000}, - ... {'name': 'Palm Beach', 'population': 60000}]}, - ... {'state': 'Ohio', - ... 'shortname': 'OH', - ... 'info': { - ... 'governor': 'John Kasich' - ... }, - ... 'counties': [{'name': 'Summit', 'population': 1234}, - ... {'name': 'Cuyahoga', 'population': 1337}]}] - >>> from pandas.io.json import json_normalize - >>> result = json_normalize(data, 'counties', ['state', 'shortname', - ... ['info', 'governor']]) - >>> result - name population info.governor state shortname - 0 Dade 12345 Rick Scott Florida FL - 1 Broward 40000 Rick Scott Florida FL - 2 Palm Beach 60000 Rick Scott Florida FL - 3 Summit 1234 John Kasich Ohio OH - 4 Cuyahoga 1337 John Kasich Ohio OH - - """ - def _pull_field(js, spec): - result = js - if isinstance(spec, list): - for field in spec: - result = result[field] - else: - result = result[spec] - - return result - - # A bit of a hackjob - if isinstance(data, dict): - data = [data] - - if record_path is None: - if any([isinstance(x, dict) for x in compat.itervalues(data[0])]): - # naive normalization, this is idempotent for flat records - # and potentially will inflate the data considerably for - # deeply nested structures: - # {VeryLong: { b: 1,c:2}} -> {VeryLong.b:1 ,VeryLong.c:@} - # - # TODO: handle record value which are lists, at least error - # reasonably - data = nested_to_record(data) - return DataFrame(data) - elif not isinstance(record_path, list): - record_path = [record_path] - - if meta is None: - meta = [] - elif not isinstance(meta, list): - meta = [meta] - - for i, x in enumerate(meta): - if not isinstance(x, list): - meta[i] = [x] - - # Disastrously inefficient for now - records = [] - lengths = [] - - meta_vals = defaultdict(list) - meta_keys = ['.'.join(val) for val in meta] - - def _recursive_extract(data, path, seen_meta, level=0): - if len(path) > 1: - for obj in data: - for val, key in zip(meta, meta_keys): - if level + 1 == len(val): - seen_meta[key] = _pull_field(obj, val[-1]) - - _recursive_extract(obj[path[0]], path[1:], - seen_meta, level=level + 1) - else: - for obj in data: - recs = _pull_field(obj, path[0]) - - # For repeating the metadata later - lengths.append(len(recs)) - - for val, key in zip(meta, meta_keys): - if level + 1 > len(val): - meta_val = seen_meta[key] - else: - try: - meta_val = _pull_field(obj, val[level:]) - except KeyError as e: - if errors == 'ignore': - meta_val = np.nan - else: - raise \ - KeyError("Try running with " - "errors='ignore' as key " - "%s is not always present", e) - meta_vals[key].append(meta_val) - - records.extend(recs) - - _recursive_extract(data, record_path, {}, level=0) - - result = DataFrame(records) - - if record_prefix is not None: - result.rename(columns=lambda x: record_prefix + x, inplace=True) - - # Data types, a problem - for k, v in compat.iteritems(meta_vals): - if meta_prefix is not None: - k = meta_prefix + k - - if k in result: - raise ValueError('Conflicting metadata name %s, ' - 'need distinguishing prefix ' % k) - - result[k] = np.array(v).repeat(lengths) - - return result diff --git a/pandas/io/json/normalize.py b/pandas/io/json/normalize.py new file mode 100644 index 0000000000000..aa80954233682 --- /dev/null +++ b/pandas/io/json/normalize.py @@ -0,0 +1,248 @@ +# --------------------------------------------------------------------- +# JSON normalization routines + +import copy +from collections import defaultdict +import numpy as np + +from pandas.lib import convert_json_to_lines +from pandas import compat, DataFrame + + +def _convert_to_line_delimits(s): + """Helper function that converts json lists to line delimited json.""" + + # Determine we have a JSON list to turn to lines otherwise just return the + # json object, only lists can + if not s[0] == '[' and s[-1] == ']': + return s + s = s[1:-1] + + return convert_json_to_lines(s) + + +def nested_to_record(ds, prefix="", level=0): + """a simplified json_normalize + + converts a nested dict into a flat dict ("record"), unlike json_normalize, + it does not attempt to extract a subset of the data. + + Parameters + ---------- + ds : dict or list of dicts + prefix: the prefix, optional, default: "" + level: the number of levels in the jason string, optional, default: 0 + + Returns + ------- + d - dict or list of dicts, matching `ds` + + Examples + -------- + + IN[52]: nested_to_record(dict(flat1=1,dict1=dict(c=1,d=2), + nested=dict(e=dict(c=1,d=2),d=2))) + Out[52]: + {'dict1.c': 1, + 'dict1.d': 2, + 'flat1': 1, + 'nested.d': 2, + 'nested.e.c': 1, + 'nested.e.d': 2} + """ + singleton = False + if isinstance(ds, dict): + ds = [ds] + singleton = True + + new_ds = [] + for d in ds: + + new_d = copy.deepcopy(d) + for k, v in d.items(): + # each key gets renamed with prefix + if not isinstance(k, compat.string_types): + k = str(k) + if level == 0: + newkey = k + else: + newkey = prefix + '.' + k + + # only dicts gets recurse-flattend + # only at level>1 do we rename the rest of the keys + if not isinstance(v, dict): + if level != 0: # so we skip copying for top level, common case + v = new_d.pop(k) + new_d[newkey] = v + continue + else: + v = new_d.pop(k) + new_d.update(nested_to_record(v, newkey, level + 1)) + new_ds.append(new_d) + + if singleton: + return new_ds[0] + return new_ds + + +def json_normalize(data, record_path=None, meta=None, + meta_prefix=None, + record_prefix=None, + errors='raise'): + + """ + "Normalize" semi-structured JSON data into a flat table + + Parameters + ---------- + data : dict or list of dicts + Unserialized JSON objects + record_path : string or list of strings, default None + Path in each object to list of records. If not passed, data will be + assumed to be an array of records + meta : list of paths (string or list of strings), default None + Fields to use as metadata for each record in resulting table + record_prefix : string, default None + If True, prefix records with dotted (?) path, e.g. foo.bar.field if + path to records is ['foo', 'bar'] + meta_prefix : string, default None + errors : {'raise', 'ignore'}, default 'raise' + + * ignore : will ignore KeyError if keys listed in meta are not + always present + * raise : will raise KeyError if keys listed in meta are not + always present + + .. versionadded:: 0.20.0 + + Returns + ------- + frame : DataFrame + + Examples + -------- + + >>> data = [{'state': 'Florida', + ... 'shortname': 'FL', + ... 'info': { + ... 'governor': 'Rick Scott' + ... }, + ... 'counties': [{'name': 'Dade', 'population': 12345}, + ... {'name': 'Broward', 'population': 40000}, + ... {'name': 'Palm Beach', 'population': 60000}]}, + ... {'state': 'Ohio', + ... 'shortname': 'OH', + ... 'info': { + ... 'governor': 'John Kasich' + ... }, + ... 'counties': [{'name': 'Summit', 'population': 1234}, + ... {'name': 'Cuyahoga', 'population': 1337}]}] + >>> from pandas.io.json import json_normalize + >>> result = json_normalize(data, 'counties', ['state', 'shortname', + ... ['info', 'governor']]) + >>> result + name population info.governor state shortname + 0 Dade 12345 Rick Scott Florida FL + 1 Broward 40000 Rick Scott Florida FL + 2 Palm Beach 60000 Rick Scott Florida FL + 3 Summit 1234 John Kasich Ohio OH + 4 Cuyahoga 1337 John Kasich Ohio OH + + """ + def _pull_field(js, spec): + result = js + if isinstance(spec, list): + for field in spec: + result = result[field] + else: + result = result[spec] + + return result + + # A bit of a hackjob + if isinstance(data, dict): + data = [data] + + if record_path is None: + if any([isinstance(x, dict) for x in compat.itervalues(data[0])]): + # naive normalization, this is idempotent for flat records + # and potentially will inflate the data considerably for + # deeply nested structures: + # {VeryLong: { b: 1,c:2}} -> {VeryLong.b:1 ,VeryLong.c:@} + # + # TODO: handle record value which are lists, at least error + # reasonably + data = nested_to_record(data) + return DataFrame(data) + elif not isinstance(record_path, list): + record_path = [record_path] + + if meta is None: + meta = [] + elif not isinstance(meta, list): + meta = [meta] + + for i, x in enumerate(meta): + if not isinstance(x, list): + meta[i] = [x] + + # Disastrously inefficient for now + records = [] + lengths = [] + + meta_vals = defaultdict(list) + meta_keys = ['.'.join(val) for val in meta] + + def _recursive_extract(data, path, seen_meta, level=0): + if len(path) > 1: + for obj in data: + for val, key in zip(meta, meta_keys): + if level + 1 == len(val): + seen_meta[key] = _pull_field(obj, val[-1]) + + _recursive_extract(obj[path[0]], path[1:], + seen_meta, level=level + 1) + else: + for obj in data: + recs = _pull_field(obj, path[0]) + + # For repeating the metadata later + lengths.append(len(recs)) + + for val, key in zip(meta, meta_keys): + if level + 1 > len(val): + meta_val = seen_meta[key] + else: + try: + meta_val = _pull_field(obj, val[level:]) + except KeyError as e: + if errors == 'ignore': + meta_val = np.nan + else: + raise \ + KeyError("Try running with " + "errors='ignore' as key " + "%s is not always present", e) + meta_vals[key].append(meta_val) + + records.extend(recs) + + _recursive_extract(data, record_path, {}, level=0) + + result = DataFrame(records) + + if record_prefix is not None: + result.rename(columns=lambda x: record_prefix + x, inplace=True) + + # Data types, a problem + for k, v in compat.iteritems(meta_vals): + if meta_prefix is not None: + k = meta_prefix + k + + if k in result: + raise ValueError('Conflicting metadata name %s, ' + 'need distinguishing prefix ' % k) + + result[k] = np.array(v).repeat(lengths) + + return result diff --git a/pandas/io/tests/json/test_json_norm.py b/pandas/io/tests/json/test_normalize.py similarity index 99% rename from pandas/io/tests/json/test_json_norm.py rename to pandas/io/tests/json/test_normalize.py index 36110898448ea..e5aba43648d0c 100644 --- a/pandas/io/tests/json/test_json_norm.py +++ b/pandas/io/tests/json/test_normalize.py @@ -7,7 +7,8 @@ import pandas.util.testing as tm from pandas import compat -from pandas.io.json import json_normalize, nested_to_record +from pandas.io.json import json_normalize +from pandas.io.json.normalize import nested_to_record def _assert_equal_data(left, right): diff --git a/setup.py b/setup.py index 93a044bc3cc7d..4d6bb76fd6b7c 100755 --- a/setup.py +++ b/setup.py @@ -631,6 +631,7 @@ def pxd(name): 'pandas.core', 'pandas.indexes', 'pandas.io', + 'pandas.io.json', 'pandas.io.sas', 'pandas.formats', 'pandas.sparse', From 6d2293f7399390800ec00b2cf78afa3b9043bef9 Mon Sep 17 00:00:00 2001 From: Nicholas Ver Halen Date: Tue, 7 Feb 2017 08:47:18 -0500 Subject: [PATCH 018/353] BUG: bug in passing 'on=' keyword for groupby(..).resample() closes #15021 Author: Nicholas Ver Halen Closes #15326 from verhalenn/issue15021 and squashes the following commits: 9fc3b4f [Nicholas Ver Halen] Updated the whatsnew for issue 15021 ec1f316 [Nicholas Ver Halen] Created a test for GH 15021 b8b10b0 [Nicholas Ver Halen] Added the on arg to resample on a grouped dataframe. --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/tseries/resample.py | 4 ++++ pandas/tseries/tests/test_resample.py | 14 ++++++++++++++ 3 files changed, 19 insertions(+) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 1a32498d53c23..3f6c06e20b546 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -434,6 +434,7 @@ Bug Fixes - Bug in ``pd.read_csv()`` in which missing data was being improperly handled with ``usecols`` (:issue:`6710`) - Bug in ``pd.read_csv()`` in which a file containing a row with many columns followed by rows with fewer columns would cause a crash (:issue:`14125`) - Bug in ``pd.tools.hashing.hash_pandas_object()`` in which hashing of categoricals depended on the ordering of categories, instead of just their values. (:issue:`15143`) +- Bug in ``.groupby(..).resample()`` when passed the ``on=`` kwarg. (:issue:`15021`) - Bug in ``DataFrame.loc`` with indexing a ``MultiIndex`` with a ``Series`` indexer (:issue:`14730`) diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py index e93e5637099c1..5692d6c5cabde 100755 --- a/pandas/tseries/resample.py +++ b/pandas/tseries/resample.py @@ -975,6 +975,10 @@ def resample(obj, kind=None, **kwds): def get_resampler_for_grouping(groupby, rule, how=None, fill_method=None, limit=None, kind=None, **kwargs): """ return our appropriate resampler when grouping as well """ + + # .resample uses 'on' similar to how .groupby uses 'key' + kwargs['key'] = kwargs.pop('on', None) + tg = TimeGrouper(freq=rule, **kwargs) resampler = tg._get_resampler(groupby.obj, kind=kind) r = resampler._get_resampler_for_grouping(groupby=groupby) diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py index 56953541265a6..c40f930fbd094 100755 --- a/pandas/tseries/tests/test_resample.py +++ b/pandas/tseries/tests/test_resample.py @@ -217,6 +217,20 @@ def test_groupby_resample_api(self): lambda x: x.resample('1D').ffill())[['val']] assert_frame_equal(result, expected) + def test_groupby_resample_on_api(self): + + # GH 15021 + # .groupby(...).resample(on=...) results in an unexpected + # keyword warning. + df = pd.DataFrame({'key': ['A', 'B'] * 5, + 'dates': pd.date_range('2016-01-01', periods=10), + 'values': np.random.randn(10)}) + + expected = df.set_index('dates').groupby('key').resample('D').mean() + + result = df.groupby('key').resample('D', on='dates').mean() + assert_frame_equal(result, expected) + def test_plot_api(self): tm._skip_if_no_mpl() From 8d574508d458072cc85488d1b432a8fa8813545a Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 7 Feb 2017 11:13:29 -0500 Subject: [PATCH 019/353] TST: remove __main__ from all test files (#15330) --- pandas/api/tests/test_api.py | 5 ---- pandas/computation/tests/test_compat.py | 5 ---- pandas/computation/tests/test_eval.py | 5 ---- pandas/core/frame.py | 5 ---- pandas/formats/format.py | 14 ----------- pandas/io/tests/json/test_normalize.py | 7 ------ pandas/io/tests/json/test_pandas.py | 5 ---- pandas/io/tests/json/test_ujson.py | 5 ---- pandas/io/tests/parser/test_network.py | 4 --- pandas/io/tests/parser/test_parsers.py | 5 ---- pandas/io/tests/parser/test_textreader.py | 5 ---- pandas/io/tests/parser/test_unsupported.py | 6 ----- pandas/io/tests/test_date_converters.py | 6 ----- pandas/io/tests/test_excel.py | 5 ---- pandas/io/tests/test_feather.py | 5 ---- pandas/io/tests/test_gbq.py | 4 --- pandas/io/tests/test_html.py | 4 --- pandas/io/tests/test_pickle.py | 6 ----- pandas/io/tests/test_pytables.py | 6 ----- pandas/io/tests/test_s3.py | 6 +---- pandas/io/tests/test_sql.py | 5 ---- pandas/io/tests/test_stata.py | 5 ---- pandas/sparse/tests/test_array.py | 6 ----- pandas/sparse/tests/test_combine_concat.py | 7 ------ pandas/sparse/tests/test_frame.py | 5 ---- pandas/sparse/tests/test_libsparse.py | 10 ++------ pandas/sparse/tests/test_list.py | 6 ----- pandas/sparse/tests/test_series.py | 6 ----- pandas/stats/tests/test_fama_macbeth.py | 5 ---- pandas/stats/tests/test_math.py | 4 --- pandas/stats/tests/test_ols.py | 6 ----- pandas/stats/tests/test_var.py | 5 ---- pandas/tests/formats/test_format.py | 5 ---- pandas/tests/formats/test_printing.py | 6 ----- pandas/tests/frame/test_analytics.py | 4 --- pandas/tests/frame/test_asof.py | 6 ----- pandas/tests/frame/test_constructors.py | 8 ------ pandas/tests/frame/test_indexing.py | 6 ----- pandas/tests/frame/test_misc_api.py | 6 ----- pandas/tests/frame/test_missing.py | 7 ------ pandas/tests/frame/test_operators.py | 5 ---- pandas/tests/frame/test_query_eval.py | 5 ---- pandas/tests/frame/test_timeseries.py | 6 ----- pandas/tests/frame/test_to_csv.py | 6 ----- pandas/tests/groupby/test_aggregate.py | 6 ----- pandas/tests/groupby/test_categorical.py | 7 ------ pandas/tests/groupby/test_filters.py | 7 ------ pandas/tests/groupby/test_groupby.py | 5 ---- pandas/tests/indexes/test_base.py | 6 ----- pandas/tests/indexes/test_numeric.py | 6 ----- pandas/tests/indexing/test_callable.py | 6 ----- pandas/tests/indexing/test_indexing.py | 5 ---- pandas/tests/plotting/test_boxplot_method.py | 5 ---- pandas/tests/plotting/test_datetimelike.py | 10 ++------ pandas/tests/plotting/test_frame.py | 10 ++------ pandas/tests/plotting/test_groupby.py | 11 ++------ pandas/tests/plotting/test_hist_method.py | 10 +------- pandas/tests/plotting/test_misc.py | 9 +------ pandas/tests/plotting/test_series.py | 12 +++------ pandas/tests/series/test_asof.py | 7 ------ pandas/tests/series/test_indexing.py | 6 ----- pandas/tests/series/test_missing.py | 7 ------ pandas/tests/series/test_timeseries.py | 6 ----- pandas/tests/test_algos.py | 6 ----- pandas/tests/test_base.py | 10 +------- pandas/tests/test_categorical.py | 7 ------ pandas/tests/test_common.py | 6 ----- pandas/tests/test_expressions.py | 10 ++------ pandas/tests/test_generic.py | 4 --- pandas/tests/test_internals.py | 5 ---- pandas/tests/test_join.py | 6 ----- pandas/tests/test_lib.py | 7 ------ pandas/tests/test_multilevel.py | 5 ---- pandas/tests/test_nanops.py | 6 ----- pandas/tests/test_panel.py | 5 ---- pandas/tests/test_panel4d.py | 5 ---- pandas/tests/test_panelnd.py | 6 ----- pandas/tests/test_reshape.py | 6 ----- pandas/tests/test_stats.py | 6 ----- pandas/tests/test_strings.py | 7 ------ pandas/tests/test_take.py | 6 ----- pandas/tests/test_testing.py | 5 ---- pandas/tests/test_util.py | 7 ------ pandas/tests/types/test_cast.py | 7 ------ pandas/tests/types/test_common.py | 6 ----- pandas/tests/types/test_concat.py | 6 ----- pandas/tests/types/test_dtypes.py | 6 ----- pandas/tests/types/test_generic.py | 6 ----- pandas/tests/types/test_inference.py | 6 ----- pandas/tests/types/test_io.py | 7 ------ pandas/tests/types/test_missing.py | 6 ----- pandas/tools/plotting.py | 25 ------------------- pandas/tools/tests/test_concat.py | 7 ------ pandas/tools/tests/test_join.py | 7 ------ pandas/tools/tests/test_merge.py | 7 ------ pandas/tools/tests/test_merge_asof.py | 6 ----- pandas/tools/tests/test_merge_ordered.py | 6 ----- pandas/tools/tests/test_pivot.py | 6 ----- pandas/tools/tests/test_tile.py | 6 ----- pandas/tools/tests/test_util.py | 5 ---- pandas/tseries/tests/test_base.py | 7 ------ pandas/tseries/tests/test_converter.py | 9 +------ pandas/tseries/tests/test_daterange.py | 6 ----- pandas/tseries/tests/test_holiday.py | 6 ----- pandas/tseries/tests/test_offsets.py | 10 +++----- pandas/tseries/tests/test_period.py | 6 ----- pandas/tseries/tests/test_resample.py | 6 ----- pandas/tseries/tests/test_timedeltas.py | 6 ----- .../tseries/tests/test_timeseries_legacy.py | 5 ---- pandas/tseries/tests/test_timezones.py | 7 ------ pandas/tseries/tests/test_tslib.py | 6 ----- pandas/tseries/tests/test_util.py | 6 ----- 112 files changed, 21 insertions(+), 703 deletions(-) diff --git a/pandas/api/tests/test_api.py b/pandas/api/tests/test_api.py index 410d70c65404f..f925fd792f9ca 100644 --- a/pandas/api/tests/test_api.py +++ b/pandas/api/tests/test_api.py @@ -227,8 +227,3 @@ def test_deprecation_access_obj(self): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): pd.datetools.monthEnd - -if __name__ == '__main__': - import nose - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/computation/tests/test_compat.py b/pandas/computation/tests/test_compat.py index 8e8924379f153..900dd2c28b4c5 100644 --- a/pandas/computation/tests/test_compat.py +++ b/pandas/computation/tests/test_compat.py @@ -61,8 +61,3 @@ def testit(): testit() else: testit() - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/computation/tests/test_eval.py b/pandas/computation/tests/test_eval.py index 3a446bfc36c21..dbac72c619a52 100644 --- a/pandas/computation/tests/test_eval.py +++ b/pandas/computation/tests/test_eval.py @@ -1977,8 +1977,3 @@ def test_validate_bool_args(self): for value in invalid_values: with self.assertRaises(ValueError): pd.eval("2+2", inplace=value) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index cf306034001db..cc81c66100a6f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5762,8 +5762,3 @@ def boxplot(self, column=None, by=None, ax=None, fontsize=None, rot=0, ops.add_flex_arithmetic_methods(DataFrame, **ops.frame_flex_funcs) ops.add_special_arithmetic_methods(DataFrame, **ops.frame_special_funcs) - -if __name__ == '__main__': - import nose - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/formats/format.py b/pandas/formats/format.py index adfb54c02d926..3bac7d2821760 100644 --- a/pandas/formats/format.py +++ b/pandas/formats/format.py @@ -2679,17 +2679,3 @@ def _binify(cols, line_width): bins.append(len(cols)) return bins - - -if __name__ == '__main__': - arr = np.array([746.03, 0.00, 5620.00, 1592.36]) - # arr = np.array([11111111.1, 1.55]) - # arr = [314200.0034, 1.4125678] - arr = np.array( - [327763.3119, 345040.9076, 364460.9915, 398226.8688, 383800.5172, - 433442.9262, 539415.0568, 568590.4108, 599502.4276, 620921.8593, - 620898.5294, 552427.1093, 555221.2193, 519639.7059, 388175.7, - 379199.5854, 614898.25, 504833.3333, 560600., 941214.2857, 1134250., - 1219550., 855736.85, 1042615.4286, 722621.3043, 698167.1818, 803750.]) - fmt = FloatArrayFormatter(arr, digits=7) - print(fmt.get_result()) diff --git a/pandas/io/tests/json/test_normalize.py b/pandas/io/tests/json/test_normalize.py index e5aba43648d0c..c60b81ffe504d 100644 --- a/pandas/io/tests/json/test_normalize.py +++ b/pandas/io/tests/json/test_normalize.py @@ -1,5 +1,3 @@ -import nose - from pandas import DataFrame import numpy as np import json @@ -283,8 +281,3 @@ def test_json_normalize_errors(self): ['general', 'trade_version']], errors='raise' ) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', - '--pdb-failure', '-s'], exit=False) diff --git a/pandas/io/tests/json/test_pandas.py b/pandas/io/tests/json/test_pandas.py index 345d181a0e53a..ee5039c38b182 100644 --- a/pandas/io/tests/json/test_pandas.py +++ b/pandas/io/tests/json/test_pandas.py @@ -1044,8 +1044,3 @@ def roundtrip(s, encoding='latin-1'): for s in examples: roundtrip(s) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', - '--pdb-failure', '-s'], exit=False) diff --git a/pandas/io/tests/json/test_ujson.py b/pandas/io/tests/json/test_ujson.py index 704023bd847b7..3da61b7696fdc 100644 --- a/pandas/io/tests/json/test_ujson.py +++ b/pandas/io/tests/json/test_ujson.py @@ -1611,8 +1611,3 @@ def test_encodeSet(self): def _clean_dict(d): return dict((str(k), v) for k, v in compat.iteritems(d)) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/io/tests/parser/test_network.py b/pandas/io/tests/parser/test_network.py index d84c2ae3beb0c..e06f94c780c8b 100644 --- a/pandas/io/tests/parser/test_network.py +++ b/pandas/io/tests/parser/test_network.py @@ -182,7 +182,3 @@ def test_s3_fails(self): # It's irrelevant here that this isn't actually a table. with tm.assertRaises(IOError): read_csv('s3://cant_get_it/') - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/io/tests/parser/test_parsers.py b/pandas/io/tests/parser/test_parsers.py index a90f546d37fc8..93b5fdcffed4c 100644 --- a/pandas/io/tests/parser/test_parsers.py +++ b/pandas/io/tests/parser/test_parsers.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- import os -import nose import pandas.util.testing as tm @@ -99,7 +98,3 @@ def read_table(self, *args, **kwds): kwds = kwds.copy() kwds['engine'] = self.engine return read_table(*args, **kwds) - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/io/tests/parser/test_textreader.py b/pandas/io/tests/parser/test_textreader.py index 98cb09cd85480..0e91ca806e8fe 100644 --- a/pandas/io/tests/parser/test_textreader.py +++ b/pandas/io/tests/parser/test_textreader.py @@ -10,7 +10,6 @@ import os import sys -import nose from numpy import nan import numpy as np @@ -402,7 +401,3 @@ def test_empty_csv_input(self): def assert_array_dicts_equal(left, right): for k, v in compat.iteritems(left): assert(np.array_equal(v, right[k])) - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/io/tests/parser/test_unsupported.py b/pandas/io/tests/parser/test_unsupported.py index 4d93df16a0279..e941c9186cd6a 100644 --- a/pandas/io/tests/parser/test_unsupported.py +++ b/pandas/io/tests/parser/test_unsupported.py @@ -9,8 +9,6 @@ test suite as new feature support is added to the parsers. """ -import nose - import pandas.io.parsers as parsers import pandas.util.testing as tm @@ -142,7 +140,3 @@ def test_deprecated_args(self): kwargs = {arg: non_default_val} read_csv(StringIO(data), engine=engine, **kwargs) - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/io/tests/test_date_converters.py b/pandas/io/tests/test_date_converters.py index 99abbacb604fa..5b54925c65fbd 100644 --- a/pandas/io/tests/test_date_converters.py +++ b/pandas/io/tests/test_date_converters.py @@ -1,8 +1,6 @@ from pandas.compat import StringIO from datetime import date, datetime -import nose - import numpy as np from pandas import DataFrame, MultiIndex @@ -150,7 +148,3 @@ def test_parse_date_column_with_empty_string(self): [621, ' ']] expected = DataFrame(expected_data, columns=['case', 'opdate']) assert_frame_equal(result, expected) - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py index 12aecfd50c3a6..2791e397d5b86 100644 --- a/pandas/io/tests/test_excel.py +++ b/pandas/io/tests/test_excel.py @@ -2325,8 +2325,3 @@ def check_called(func): check_called(lambda: panel.to_excel('something.test')) check_called(lambda: df.to_excel('something.xlsx')) check_called(lambda: df.to_excel('something.xls', engine='dummy')) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/io/tests/test_feather.py b/pandas/io/tests/test_feather.py index b8b85d7dbbece..dcb057ec30004 100644 --- a/pandas/io/tests/test_feather.py +++ b/pandas/io/tests/test_feather.py @@ -116,8 +116,3 @@ def test_write_with_index(self): df.index = [0, 1, 2] df.columns = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1)]), self.check_error_on_write(df, ValueError) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/io/tests/test_gbq.py b/pandas/io/tests/test_gbq.py index 8a414dcd3ba4f..ac481a44de5e8 100644 --- a/pandas/io/tests/test_gbq.py +++ b/pandas/io/tests/test_gbq.py @@ -1224,7 +1224,3 @@ def test_upload_data_as_service_account_with_key_contents(self): project_id=_get_project_id(), private_key=_get_private_key_contents()) self.assertEqual(result['NUM_ROWS'][0], test_size) - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/io/tests/test_html.py b/pandas/io/tests/test_html.py index 9ac8def3a074d..356adb92829c6 100644 --- a/pandas/io/tests/test_html.py +++ b/pandas/io/tests/test_html.py @@ -918,7 +918,3 @@ def test_same_ordering(): dfs_lxml = read_html(filename, index_col=0, flavor=['lxml']) dfs_bs4 = read_html(filename, index_col=0, flavor=['bs4']) assert_framelist_equal(dfs_lxml, dfs_bs4) - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/io/tests/test_pickle.py b/pandas/io/tests/test_pickle.py index a49f50b1bcb9f..b5c316b326b8d 100644 --- a/pandas/io/tests/test_pickle.py +++ b/pandas/io/tests/test_pickle.py @@ -283,9 +283,3 @@ def test_pickle_v0_15_2(self): # with open(pickle_path, 'wb') as f: pickle.dump(cat, f) # tm.assert_categorical_equal(cat, pd.read_pickle(pickle_path)) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - # '--with-coverage', '--cover-package=pandas.core'], - exit=False) diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index 40db10c42d5a7..f4f03856f94e2 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -5516,9 +5516,3 @@ def _test_sort(obj): return obj.reindex(major=sorted(obj.major_axis)) else: raise ValueError('type not supported here') - - -if __name__ == '__main__': - import nose - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/io/tests/test_s3.py b/pandas/io/tests/test_s3.py index 8058698a906ea..2983fa647445c 100644 --- a/pandas/io/tests/test_s3.py +++ b/pandas/io/tests/test_s3.py @@ -1,14 +1,10 @@ -import nose from pandas.util import testing as tm from pandas.io.common import _is_s3_url class TestS3URL(tm.TestCase): + def test_is_s3_url(self): self.assertTrue(_is_s3_url("s3://pandas/somethingelse.com")) self.assertFalse(_is_s3_url("s4://pandas/somethingelse.com")) - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py index 9e639f7ef6057..4bcde764001c1 100644 --- a/pandas/io/tests/test_sql.py +++ b/pandas/io/tests/test_sql.py @@ -2658,8 +2658,3 @@ def clean_up(test_table_to_drop): self.assertEqual(tquery(sql_select, con=self.conn), [(1, 'A'), (2, 'B'), (3, 'C'), (4, 'D'), (5, 'E')]) clean_up(table_name) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/io/tests/test_stata.py b/pandas/io/tests/test_stata.py index 8cfd5d98fe05f..fcb935925e61f 100644 --- a/pandas/io/tests/test_stata.py +++ b/pandas/io/tests/test_stata.py @@ -1276,8 +1276,3 @@ def test_out_of_range_float(self): original.to_stata(path) tm.assertTrue('ColumnTooBig' in cm.exception) tm.assertTrue('infinity' in cm.exception) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/sparse/tests/test_array.py b/pandas/sparse/tests/test_array.py index 592926f8e821d..55f292a8a231a 100644 --- a/pandas/sparse/tests/test_array.py +++ b/pandas/sparse/tests/test_array.py @@ -810,9 +810,3 @@ def test_ufunc_args(self): sparse = SparseArray([1, -1, 0, -2], fill_value=0) result = SparseArray([2, 0, 1, -1], fill_value=1) tm.assert_sp_array_equal(np.add(sparse, 1), result) - - -if __name__ == '__main__': - import nose - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/sparse/tests/test_combine_concat.py b/pandas/sparse/tests/test_combine_concat.py index fcdc6d9580dd5..5240d592810ad 100644 --- a/pandas/sparse/tests/test_combine_concat.py +++ b/pandas/sparse/tests/test_combine_concat.py @@ -1,6 +1,5 @@ # pylint: disable-msg=E1101,W0612 -import nose # noqa import numpy as np import pandas as pd import pandas.util.testing as tm @@ -356,9 +355,3 @@ def test_concat_sparse_dense(self): exp = pd.concat([self.dense1, self.dense3], axis=1) self.assertIsInstance(res, pd.SparseDataFrame) tm.assert_frame_equal(res, exp) - - -if __name__ == '__main__': - import nose # noqa - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/sparse/tests/test_frame.py b/pandas/sparse/tests/test_frame.py index 23bb827974c61..e26c0ed1afe58 100644 --- a/pandas/sparse/tests/test_frame.py +++ b/pandas/sparse/tests/test_frame.py @@ -1193,8 +1193,3 @@ def test_numpy_func_call(self): 'std', 'min', 'max'] for func in funcs: getattr(np, func)(self.frame) - -if __name__ == '__main__': - import nose - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/sparse/tests/test_libsparse.py b/pandas/sparse/tests/test_libsparse.py index c289b4a1b204f..b3aa3368e9455 100644 --- a/pandas/sparse/tests/test_libsparse.py +++ b/pandas/sparse/tests/test_libsparse.py @@ -1,6 +1,6 @@ from pandas import Series -import nose # noqa +import nose import numpy as np import operator import pandas.util.testing as tm @@ -196,7 +196,7 @@ def _check_correct(a, b, expected): assert (result.equals(expected)) def _check_length_exc(a, longer): - nose.tools.assert_raises(Exception, a.intersect, longer) + self.assertRaises(Exception, a.intersect, longer) def _check_case(xloc, xlen, yloc, ylen, eloc, elen): xindex = BlockIndex(TEST_LENGTH, xloc, xlen) @@ -585,9 +585,3 @@ def f(self): g = make_optestf(op) setattr(TestSparseOperators, g.__name__, g) del g - - -if __name__ == '__main__': - import nose # noqa - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/sparse/tests/test_list.py b/pandas/sparse/tests/test_list.py index b117685b6e968..458681cdc1de0 100644 --- a/pandas/sparse/tests/test_list.py +++ b/pandas/sparse/tests/test_list.py @@ -112,9 +112,3 @@ def test_getitem(self): for i in range(len(arr)): tm.assert_almost_equal(splist[i], arr[i]) tm.assert_almost_equal(splist[-i], arr[-i]) - - -if __name__ == '__main__': - import nose - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/sparse/tests/test_series.py b/pandas/sparse/tests/test_series.py index 06d76bdd4dd3d..b34f5dd2cee9f 100644 --- a/pandas/sparse/tests/test_series.py +++ b/pandas/sparse/tests/test_series.py @@ -1366,9 +1366,3 @@ def test_numpy_func_call(self): for func in funcs: for series in ('bseries', 'zbseries'): getattr(np, func)(getattr(self, series)) - - -if __name__ == '__main__': - import nose - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/stats/tests/test_fama_macbeth.py b/pandas/stats/tests/test_fama_macbeth.py index 706becfa730c4..0c9fcf775ad2d 100644 --- a/pandas/stats/tests/test_fama_macbeth.py +++ b/pandas/stats/tests/test_fama_macbeth.py @@ -66,8 +66,3 @@ def _check_stuff_works(self, result): # does it work? result.summary - -if __name__ == '__main__': - import nose - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/stats/tests/test_math.py b/pandas/stats/tests/test_math.py index bc09f33d2f467..3f89dbcd20065 100644 --- a/pandas/stats/tests/test_math.py +++ b/pandas/stats/tests/test_math.py @@ -57,7 +57,3 @@ def test_inv_illformed(self): rs = pmath.inv(singular) expected = np.array([[0.1, 0.2], [0.1, 0.2]]) self.assertTrue(np.allclose(rs, expected)) - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/stats/tests/test_ols.py b/pandas/stats/tests/test_ols.py index 2935f986cca9f..09fa21d58ea9d 100644 --- a/pandas/stats/tests/test_ols.py +++ b/pandas/stats/tests/test_ols.py @@ -974,9 +974,3 @@ def testFilterWithDictRHS(self): def tsAssertEqual(self, ts1, ts2, **kwargs): self.assert_series_equal(ts1, ts2, **kwargs) - - -if __name__ == '__main__': - import nose - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/stats/tests/test_var.py b/pandas/stats/tests/test_var.py index 9f2c95a2d3d5c..04e2019f00a82 100644 --- a/pandas/stats/tests/test_var.py +++ b/pandas/stats/tests/test_var.py @@ -6,7 +6,6 @@ from pandas.compat import range import nose -import unittest raise nose.SkipTest('skipping this for now') @@ -93,7 +92,3 @@ def __init__(self): self.res1 = VAR2(endog=data).fit(maxlag=2) from results import results_var self.res2 = results_var.MacrodataResults() - - -if __name__ == '__main__': - unittest.main() diff --git a/pandas/tests/formats/test_format.py b/pandas/tests/formats/test_format.py index 9eff64b40625d..7a2c5f3b7f7c1 100644 --- a/pandas/tests/formats/test_format.py +++ b/pandas/tests/formats/test_format.py @@ -4999,8 +4999,3 @@ def test_format_percentiles(): tm.assertRaises(ValueError, fmt.format_percentiles, [-0.001, 0.1, 0.5]) tm.assertRaises(ValueError, fmt.format_percentiles, [2, 0.1, 0.5]) tm.assertRaises(ValueError, fmt.format_percentiles, [0.1, 0.5, 'a']) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/formats/test_printing.py b/pandas/tests/formats/test_printing.py index 3bcceca1f50a7..d1eb1faecc401 100644 --- a/pandas/tests/formats/test_printing.py +++ b/pandas/tests/formats/test_printing.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -import nose from pandas import compat import pandas.formats.printing as printing import pandas.formats.format as fmt @@ -135,8 +134,3 @@ def test_ambiguous_width(self): # result = printing.console_encode(u"\u05d0") # expected = u"\u05d0".encode('utf-8') # assert (result == expected) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 5d51306363053..0dbb78ec89b2e 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -2202,7 +2202,3 @@ def test_dot(self): with tm.assertRaisesRegexp(ValueError, 'aligned'): df.dot(df2) - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/frame/test_asof.py b/pandas/tests/frame/test_asof.py index f68219120b48e..323960d54a42c 100644 --- a/pandas/tests/frame/test_asof.py +++ b/pandas/tests/frame/test_asof.py @@ -1,7 +1,5 @@ # coding=utf-8 -import nose - import numpy as np from pandas import (DataFrame, date_range, Timestamp, Series, to_datetime) @@ -84,7 +82,3 @@ def test_missing(self): expected = DataFrame(index=to_datetime(['1989-12-31']), columns=['A', 'B'], dtype='float64') assert_frame_equal(result, expected) - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index fe6a12fcca28a..1676c57a274cd 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -7,7 +7,6 @@ import itertools import nose - from numpy.random import randn import numpy as np @@ -1945,10 +1944,3 @@ def test_frame_timeseries_to_records(self): result['index'].dtype == 'M8[ns]' result = df.to_records(index=False) - - -if __name__ == '__main__': - import nose # noqa - - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index 7d68eac47766e..f0e6ab4c17915 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -2954,9 +2954,3 @@ def test_transpose(self): expected = DataFrame(self.df.values.T) expected.index = ['A', 'B'] assert_frame_equal(result, expected) - - -if __name__ == '__main__': - import nose - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/frame/test_misc_api.py b/pandas/tests/frame/test_misc_api.py index f5719fa1d8b85..2fc14d9e4d123 100644 --- a/pandas/tests/frame/test_misc_api.py +++ b/pandas/tests/frame/test_misc_api.py @@ -4,7 +4,6 @@ # pylint: disable-msg=W0612,E1101 from copy import deepcopy import sys -import nose from distutils.version import LooseVersion from pandas.compat import range, lrange @@ -486,8 +485,3 @@ def _check_f(base, f): # rename f = lambda x: x.rename({1: 'foo'}, inplace=True) _check_f(d.copy(), f) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py index eabdb79295c27..8c25f71c00684 100644 --- a/pandas/tests/frame/test_missing.py +++ b/pandas/tests/frame/test_missing.py @@ -711,10 +711,3 @@ def test_interp_ignore_all_good(self): # all good result = df[['B', 'D']].interpolate(downcast=None) assert_frame_equal(result, df[['B', 'D']]) - - -if __name__ == '__main__': - import nose - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - # '--with-coverage', '--cover-package=pandas.core'] - exit=False) diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index f843a5c08ce05..15f98abe1445d 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -1275,8 +1275,3 @@ def test_alignment_non_pandas(self): align(df, val, 'index') with tm.assertRaises(ValueError): align(df, val, 'columns') - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index 36ae5dac733a5..a9a90a6f5cd40 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -1155,8 +1155,3 @@ class TestDataFrameEvalPythonPython(TestDataFrameEvalNumExprPython): def setUpClass(cls): super(TestDataFrameEvalPythonPython, cls).tearDownClass() cls.engine = cls.parser = 'python' - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index 9a9f0ee67fb89..55848847f2266 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -575,9 +575,3 @@ def test_frame_to_period(self): tm.assert_index_equal(pts.columns, exp.columns.asfreq('M')) self.assertRaises(ValueError, df.to_period, axis=2) - - -if __name__ == '__main__': - import nose - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index b585462365606..5c47b0357b4f6 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -1145,9 +1145,3 @@ def test_to_csv_quoting(self): df = df.set_index(['a', 'b']) expected = '"a","b","c"\n"1","3","5"\n"2","4","6"\n' self.assertEqual(df.to_csv(quoting=csv.QUOTE_ALL), expected) - - -if __name__ == '__main__': - import nose - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/groupby/test_aggregate.py b/pandas/tests/groupby/test_aggregate.py index 6b162b71f79de..5f680a6876873 100644 --- a/pandas/tests/groupby/test_aggregate.py +++ b/pandas/tests/groupby/test_aggregate.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- from __future__ import print_function -import nose from datetime import datetime @@ -487,8 +486,3 @@ def testit(label_list, shape): shape = (10000, 10000) label_list = [np.tile(np.arange(10000), 5), np.tile(np.arange(10000), 5)] testit(label_list, shape) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure', '-s' - ], exit=False) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 81aa183426be9..82ec1832be961 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -1,9 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import print_function -import nose from numpy import nan - from pandas.core.index import Index, MultiIndex, CategoricalIndex from pandas.core.api import DataFrame, Categorical @@ -490,8 +488,3 @@ def testit(label_list, shape): shape = (10000, 10000) label_list = [np.tile(np.arange(10000), 5), np.tile(np.arange(10000), 5)] testit(label_list, shape) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure', '-s' - ], exit=False) diff --git a/pandas/tests/groupby/test_filters.py b/pandas/tests/groupby/test_filters.py index 40d8039f71576..663fbd04e7e5a 100644 --- a/pandas/tests/groupby/test_filters.py +++ b/pandas/tests/groupby/test_filters.py @@ -1,7 +1,5 @@ # -*- coding: utf-8 -*- from __future__ import print_function -import nose - from numpy import nan @@ -641,8 +639,3 @@ def testit(label_list, shape): shape = (10000, 10000) label_list = [np.tile(np.arange(10000), 5), np.tile(np.arange(10000), 5)] testit(label_list, shape) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure', '-s' - ], exit=False) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index bf61f5ef83859..01c81bd7904bd 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -6057,8 +6057,3 @@ def testit(label_list, shape): shape = (10000, 10000) label_list = [np.tile(np.arange(10000), 5), np.tile(np.arange(10000), 5)] testit(label_list, shape) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure', '-s' - ], exit=False) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index a0f2a090c9a06..c574a4a1f01a7 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -11,7 +11,6 @@ import operator import os -import nose import numpy as np from pandas import (period_range, date_range, Series, @@ -2078,8 +2077,3 @@ def test_intersect_str_dates(self): res = i2.intersection(i1) self.assertEqual(len(res), 0) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index c7acbf51a17e5..4dab7ae76a011 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -3,7 +3,6 @@ from datetime import datetime from pandas.compat import range, PY3 -import nose import numpy as np from pandas import (date_range, Series, Index, Float64Index, @@ -1144,8 +1143,3 @@ def test_join_outer(self): self.assert_index_equal(res, eres) tm.assert_numpy_array_equal(lidx, elidx) tm.assert_numpy_array_equal(ridx, eridx) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/indexing/test_callable.py b/pandas/tests/indexing/test_callable.py index ab225f72934ce..bcadc41b13370 100644 --- a/pandas/tests/indexing/test_callable.py +++ b/pandas/tests/indexing/test_callable.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- # pylint: disable-msg=W0612,E1101 -import nose import numpy as np import pandas as pd @@ -268,8 +267,3 @@ def test_frame_iloc_callable_setitem(self): exp = df.copy() exp.iloc[[1, 3], [0]] = [-5, -5] tm.assert_frame_equal(res, exp) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index f7fa07916ca74..a9dfcf2672357 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -5532,8 +5532,3 @@ def test_boolean_indexing(self): index=pd.to_timedelta(range(10), unit='s'), columns=['x']) tm.assert_frame_equal(expected, result) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py index 289d48ba6d4cc..f7fd6a8519533 100644 --- a/pandas/tests/plotting/test_boxplot_method.py +++ b/pandas/tests/plotting/test_boxplot_method.py @@ -378,8 +378,3 @@ def test_fontsize(self): df = DataFrame({"a": [1, 2, 3, 4, 5, 6], "b": [0, 0, 0, 1, 1, 1]}) self._check_ticks_props(df.boxplot("a", by="b", fontsize=16), xlabelsize=16, ylabelsize=16) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index 6486c8aa21c1b..bcc9c7ceea8b5 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -1,3 +1,5 @@ +""" Test cases for time series specific (freq conversion, etc) """ + from datetime import datetime, timedelta, date, time import nose @@ -18,9 +20,6 @@ _skip_if_no_scipy_gaussian_kde) -""" Test cases for time series specific (freq conversion, etc) """ - - @tm.mplskip class TestTSPlot(TestPlotBase): @@ -1309,8 +1308,3 @@ def _check_plot_works(f, freq=None, series=None, *args, **kwargs): plt.savefig(path) finally: plt.close(fig) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index fba554b03f191..81a54bd38b3f8 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -1,5 +1,7 @@ # coding: utf-8 +""" Test cases for DataFrame.plot """ + import nose import string import warnings @@ -26,9 +28,6 @@ _ok_for_gaussian_kde) -""" Test cases for DataFrame.plot """ - - @tm.mplskip class TestDataFramePlots(TestPlotBase): @@ -2726,8 +2725,3 @@ def _generate_4_axes_via_gridspec(): ax_lr = plt.subplot(gs[1, 1]) return gs, [ax_tl, ax_ll, ax_tr, ax_lr] - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/plotting/test_groupby.py b/pandas/tests/plotting/test_groupby.py index 3c682fbfbb89e..93efb3f994c38 100644 --- a/pandas/tests/plotting/test_groupby.py +++ b/pandas/tests/plotting/test_groupby.py @@ -1,6 +1,7 @@ # coding: utf-8 -import nose +""" Test cases for GroupBy.plot """ + from pandas import Series, DataFrame import pandas.util.testing as tm @@ -10,9 +11,6 @@ from pandas.tests.plotting.common import TestPlotBase -""" Test cases for GroupBy.plot """ - - @tm.mplskip class TestDataFrameGroupByPlots(TestPlotBase): @@ -74,8 +72,3 @@ def test_plot_kwargs(self): res = df.groupby('z').plot.scatter(x='x', y='y') self.assertEqual(len(res['a'].collections), 1) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py index bde5544390b85..4f64f66bd3c4d 100644 --- a/pandas/tests/plotting/test_hist_method.py +++ b/pandas/tests/plotting/test_hist_method.py @@ -1,6 +1,6 @@ # coding: utf-8 -import nose +""" Test cases for .hist method """ from pandas import Series, DataFrame import pandas.util.testing as tm @@ -13,9 +13,6 @@ from pandas.tests.plotting.common import (TestPlotBase, _check_plot_works) -""" Test cases for .hist method """ - - @tm.mplskip class TestSeriesPlots(TestPlotBase): @@ -418,8 +415,3 @@ def test_axis_share_xy(self): self.assertTrue(ax1._shared_y_axes.joined(ax1, ax2)) self.assertTrue(ax2._shared_y_axes.joined(ax1, ax2)) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index 2650ce2879db7..c92287b2bdc42 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -1,6 +1,6 @@ # coding: utf-8 -import nose +""" Test cases for misc plot functions """ from pandas import Series, DataFrame from pandas.compat import lmap @@ -15,8 +15,6 @@ from pandas.tests.plotting.common import (TestPlotBase, _check_plot_works, _ok_for_gaussian_kde) -""" Test cases for misc plot functions """ - @tm.mplskip class TestSeriesPlots(TestPlotBase): @@ -298,8 +296,3 @@ def test_subplot_titles(self): title=title[:-1]) title_list = [ax.get_title() for sublist in plot for ax in sublist] self.assertEqual(title_list, title[:3] + ['']) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index f668c46a15173..8c00d606059a4 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -1,6 +1,8 @@ # coding: utf-8 -import nose +""" Test cases for Series.plot """ + + import itertools from datetime import datetime @@ -20,9 +22,6 @@ _ok_for_gaussian_kde) -""" Test cases for Series.plot """ - - @tm.mplskip class TestSeriesPlots(TestPlotBase): @@ -811,8 +810,3 @@ def test_custom_business_day_freq(self): freq=CustomBusinessDay(holidays=['2014-05-26']))) _check_plot_works(s.plot) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/series/test_asof.py b/pandas/tests/series/test_asof.py index e2092feab9004..db306d2a742c1 100644 --- a/pandas/tests/series/test_asof.py +++ b/pandas/tests/series/test_asof.py @@ -1,9 +1,6 @@ # coding=utf-8 -import nose - import numpy as np - from pandas import (offsets, Series, notnull, isnull, date_range, Timestamp) @@ -152,7 +149,3 @@ def test_errors(self): s = Series(np.random.randn(N), index=rng) with self.assertRaises(ValueError): s.asof(s.index[0], subset='foo') - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index bdae11770de65..e0d83d6eeadac 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -2638,9 +2638,3 @@ def test_round_nat(self): round_method = getattr(s.dt, method) for freq in ["s", "5s", "min", "5min", "h", "5h"]: assert_series_equal(round_method(freq), expected) - - -if __name__ == '__main__': - import nose - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 8c877ade6fe98..6821a8b9f4221 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -1092,10 +1092,3 @@ def test_series_interpolate_intraday(self): result = ts.reindex(new_index).interpolate(method='time') self.assert_numpy_array_equal(result.values, exp.values) - - -if __name__ == '__main__': - import nose - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - # '--with-coverage', '--cover-package=pandas.core'] - exit=False) diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index 9754a9d3737e3..bd346fb9bb0c8 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -927,9 +927,3 @@ def test_get_level_values_box(self): index = MultiIndex(levels=levels, labels=labels) self.assertTrue(isinstance(index.get_level_values(0)[0], Timestamp)) - - -if __name__ == '__main__': - import nose - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 99453b9793007..40b277f3f1f8a 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -1372,9 +1372,3 @@ def test_index(self): idx = Index(['1 day', '1 day', '-1 day', '-1 day 2 min', '2 min', '2 min'], dtype='timedelta64[ns]') tm.assert_series_equal(algos.mode(idx), exp) - - -if __name__ == '__main__': - import nose - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index f750936961831..1d1ef1a08859c 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -4,7 +4,7 @@ import re import sys from datetime import datetime, timedelta - +import nose import numpy as np import pandas as pd @@ -1105,11 +1105,3 @@ def f(): self.assertRaises(AttributeError, f) self.assertFalse(hasattr(t, "b")) - - -if __name__ == '__main__': - import nose - - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - # '--with-coverage', '--cover-package=pandas.core'], - exit=False) diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 745914d3e7ef5..be55d6e1976ec 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -4576,10 +4576,3 @@ def test_map(self): self.assertIsInstance(res, tm.SubclassedCategorical) exp = Categorical(['A', 'B', 'C']) tm.assert_categorical_equal(res, exp) - - -if __name__ == '__main__': - import nose - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - # '--with-coverage', '--cover-package=pandas.core'] - exit=False) diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 09dd3f7ab517c..0239250129494 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- -import nose import numpy as np from pandas import Series, Timestamp @@ -196,8 +195,3 @@ def test_dict_compat(): assert (com._dict_compat(data_datetime64) == expected) assert (com._dict_compat(expected) == expected) assert (com._dict_compat(data_unchanged) == data_unchanged) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index c037f02f20609..18b078d0a677e 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -2,12 +2,12 @@ from __future__ import print_function # pylint: disable-msg=W0612,E1101 -import nose import re +import operator +import nose from numpy.random import randn -import operator import numpy as np from pandas.core.api import DataFrame, Panel @@ -439,9 +439,3 @@ def test_bool_ops_warn_on_arithmetic(self): r = f(df, True) e = fe(df, True) tm.assert_frame_equal(r, e) - - -if __name__ == '__main__': - import nose - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py index 0ca8ba47b8a8f..5bf2eda47ea27 100644 --- a/pandas/tests/test_generic.py +++ b/pandas/tests/test_generic.py @@ -2022,7 +2022,3 @@ def test_pipe_panel(self): with tm.assertRaises(ValueError): result = wp.pipe((f, 'y'), x=1, y=1) - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/test_internals.py b/pandas/tests/test_internals.py index 5000d6d4510fb..2bfe31ad4260e 100644 --- a/pandas/tests/test_internals.py +++ b/pandas/tests/test_internals.py @@ -1188,8 +1188,3 @@ def assert_add_equals(val, inc, result): lambda: BlockPlacement([1, 2, 4]).add(-10)) self.assertRaises(ValueError, lambda: BlockPlacement(slice(2, None, -1)).add(-1)) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/test_join.py b/pandas/tests/test_join.py index bfdb77f3fb350..0e7dda05a0c27 100644 --- a/pandas/tests/test_join.py +++ b/pandas/tests/test_join.py @@ -193,9 +193,3 @@ def test_inner_join_indexer2(): exp_ridx = np.array([0, 1, 2, 3], dtype=np.int64) assert_almost_equal(ridx, exp_ridx) - - -if __name__ == '__main__': - import nose - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/test_lib.py b/pandas/tests/test_lib.py index 945f8004687cd..2381c52ef14b6 100644 --- a/pandas/tests/test_lib.py +++ b/pandas/tests/test_lib.py @@ -232,10 +232,3 @@ def test_empty_like(self): expected = np.array([True]) self._check_behavior(arr, expected) - - -if __name__ == '__main__': - import nose - - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 37bfe667b0205..d87ad8d906854 100755 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -2478,8 +2478,3 @@ def test_iloc_mi(self): for r in range(5)]) assert_frame_equal(result, expected) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index dd3a49de55d73..937c20d009b6b 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -1000,9 +1000,3 @@ def test_nans_skipna(self): @property def prng(self): return np.random.RandomState(1234) - - -if __name__ == '__main__': - import nose - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure', '-s' - ], exit=False) diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index d79081a06dbc0..89e8fb78ad821 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -2538,8 +2538,3 @@ def test_panel_index(): np.repeat([1, 2, 3], 4)], names=['time', 'panel']) tm.assert_index_equal(index, expected) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py index 0769b8916a11b..aeca24964222a 100644 --- a/pandas/tests/test_panel4d.py +++ b/pandas/tests/test_panel4d.py @@ -949,8 +949,3 @@ def test_rename(self): def test_get_attr(self): assert_panel_equal(self.panel4d['l1'], self.panel4d.l1) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/test_panelnd.py b/pandas/tests/test_panelnd.py index 92805f3b30ec6..6a578d85d3ee3 100644 --- a/pandas/tests/test_panelnd.py +++ b/pandas/tests/test_panelnd.py @@ -1,6 +1,4 @@ # -*- coding: utf-8 -*- -import nose - from pandas.core import panelnd from pandas.core.panel import Panel @@ -101,7 +99,3 @@ def test_5d_construction(self): # test a transpose # results = p5d.transpose(1,2,3,4,0) # expected = - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/test_reshape.py b/pandas/tests/test_reshape.py index 603674ac01bc0..b5fa945a5bb8f 100644 --- a/pandas/tests/test_reshape.py +++ b/pandas/tests/test_reshape.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- # pylint: disable-msg=W0612,E1101 -import nose from pandas import DataFrame, Series from pandas.core.sparse import SparseDataFrame @@ -914,8 +913,3 @@ def test_multiple_id_columns(self): exp_frame = exp_frame.set_index(['famid', 'birth', 'age'])[['ht']] long_frame = wide_to_long(df, 'ht', i=['famid', 'birth'], j='age') tm.assert_frame_equal(long_frame, exp_frame) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/test_stats.py b/pandas/tests/test_stats.py index 41d25b9662b5b..eb8ab02c29548 100644 --- a/pandas/tests/test_stats.py +++ b/pandas/tests/test_stats.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- from pandas import compat -import nose from distutils.version import LooseVersion from numpy import nan @@ -185,8 +184,3 @@ def test_rank_object_bug(self): # smoke tests Series([np.nan] * 32).astype(object).rank(ascending=True) Series([np.nan] * 32).astype(object).rank(ascending=False) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index f59127c853ed1..f358946983dce 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -4,8 +4,6 @@ from datetime import datetime, timedelta import re -import nose - from numpy import nan as NA import numpy as np from numpy.random import randint @@ -2715,8 +2713,3 @@ def test_method_on_bytes(self): expected = Series(np.array( ['ad', 'be', 'cf'], 'S2').astype(object)) tm.assert_series_equal(result, expected) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/test_take.py b/pandas/tests/test_take.py index 98b3b474f785d..bf8a3ab370625 100644 --- a/pandas/tests/test_take.py +++ b/pandas/tests/test_take.py @@ -2,7 +2,6 @@ import re from datetime import datetime -import nose import numpy as np from pandas.compat import long import pandas.core.algorithms as algos @@ -448,8 +447,3 @@ def test_2d_datetime64(self): expected = arr.take(indexer, axis=1) expected[:, [2, 4]] = datetime(2007, 1, 1) tm.assert_almost_equal(result, expected) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/test_testing.py b/pandas/tests/test_testing.py index e2f295a5343bc..5e60efd153ab1 100644 --- a/pandas/tests/test_testing.py +++ b/pandas/tests/test_testing.py @@ -802,8 +802,3 @@ def f(): with assertRaises(ValueError): f() raise ValueError - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/test_util.py b/pandas/tests/test_util.py index ed82604035358..e2f6a7f6cc1ed 100644 --- a/pandas/tests/test_util.py +++ b/pandas/tests/test_util.py @@ -1,6 +1,4 @@ # -*- coding: utf-8 -*- -import nose - from collections import OrderedDict import sys import unittest @@ -402,8 +400,3 @@ def test_numpy_errstate_is_default(): from pandas.compat import numpy # noqa # The errstate should be unchanged after that import. tm.assert_equal(np.geterr(), expected) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/types/test_cast.py b/pandas/tests/types/test_cast.py index 56a14a51105ca..a8579e89aeb1f 100644 --- a/pandas/tests/types/test_cast.py +++ b/pandas/tests/types/test_cast.py @@ -5,8 +5,6 @@ """ - -import nose from datetime import datetime import numpy as np @@ -278,8 +276,3 @@ def test_period_dtype(self): np.dtype('datetime64[ns]'), np.object, np.int64]: self.assertEqual(_find_common_type([dtype, dtype2]), np.object) self.assertEqual(_find_common_type([dtype2, dtype]), np.object) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/types/test_common.py b/pandas/tests/types/test_common.py index 4d6f50862c562..7c17c61aec440 100644 --- a/pandas/tests/types/test_common.py +++ b/pandas/tests/types/test_common.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- -import nose import numpy as np from pandas.types.dtypes import DatetimeTZDtype, PeriodDtype, CategoricalDtype @@ -55,8 +54,3 @@ def test_dtype_equal(): assert not DatetimeTZDtype.is_dtype(np.int64) assert not PeriodDtype.is_dtype(np.int64) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/types/test_concat.py b/pandas/tests/types/test_concat.py index 6403dcb5a5350..8acafe0af1792 100644 --- a/pandas/tests/types/test_concat.py +++ b/pandas/tests/types/test_concat.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- -import nose import pandas as pd import pandas.types.concat as _concat import pandas.util.testing as tm @@ -79,8 +78,3 @@ def test_get_dtype_kinds_period(self): pd.Series([pd.Period('2011-02', freq='D')])] res = _concat.get_dtype_kinds(to_concat) self.assertEqual(res, set(['object'])) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/types/test_dtypes.py b/pandas/tests/types/test_dtypes.py index f190c85404ff9..68105cfd7c886 100644 --- a/pandas/tests/types/test_dtypes.py +++ b/pandas/tests/types/test_dtypes.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- from itertools import product -import nose import numpy as np import pandas as pd from pandas import Series, Categorical, date_range @@ -353,8 +352,3 @@ def test_empty(self): def test_not_string(self): # though PeriodDtype has object kind, it cannot be string self.assertFalse(is_string_dtype(PeriodDtype('D'))) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/types/test_generic.py b/pandas/tests/types/test_generic.py index 28600687e8062..2861252bef26a 100644 --- a/pandas/tests/types/test_generic.py +++ b/pandas/tests/types/test_generic.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- -import nose import numpy as np import pandas as pd import pandas.util.testing as tm @@ -41,8 +40,3 @@ def test_abc_types(self): self.assertIsInstance(self.sparse_array, gt.ABCSparseArray) self.assertIsInstance(self.categorical, gt.ABCCategorical) self.assertIsInstance(pd.Period('2012', freq='A-DEC'), gt.ABCPeriod) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/types/test_inference.py b/pandas/tests/types/test_inference.py index 5c35112d0fe19..15f9545f3476c 100644 --- a/pandas/tests/types/test_inference.py +++ b/pandas/tests/types/test_inference.py @@ -6,7 +6,6 @@ """ -import nose import collections import re from datetime import datetime, date, timedelta, time @@ -968,8 +967,3 @@ def test_ensure_categorical(): values = Categorical(values) result = _ensure_categorical(values) tm.assert_categorical_equal(result, values) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/types/test_io.py b/pandas/tests/types/test_io.py index 545edf8f1386c..ce8e23342bf5a 100644 --- a/pandas/tests/types/test_io.py +++ b/pandas/tests/types/test_io.py @@ -107,10 +107,3 @@ def test_convert_downcast_int64(self): expected = np.array([int8_na, 2, 3, 10, 15], dtype=np.int8) result = lib.downcast_int64(arr, na_values) self.assert_numpy_array_equal(result, expected) - - -if __name__ == '__main__': - import nose - - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/types/test_missing.py b/pandas/tests/types/test_missing.py index fa2bd535bb8d5..2b09cf5ab633d 100644 --- a/pandas/tests/types/test_missing.py +++ b/pandas/tests/types/test_missing.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- -import nose import numpy as np from datetime import datetime from pandas.util import testing as tm @@ -304,8 +303,3 @@ def test_na_value_for_dtype(): for dtype in ['O']: assert np.isnan(na_value_for_dtype(np.dtype(dtype))) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index 012d67d29cc3f..ee70515850b25 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -4003,28 +4003,3 @@ def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None, if gridsize is not None: kwds['gridsize'] = gridsize return self(kind='hexbin', x=x, y=y, C=C, **kwds) - - -if __name__ == '__main__': - # import pandas.rpy.common as com - # sales = com.load_data('sanfrancisco.home.sales', package='nutshell') - # top10 = sales['zip'].value_counts()[:10].index - # sales2 = sales[sales.zip.isin(top10)] - # _ = scatter_plot(sales2, 'squarefeet', 'price', by='zip') - - # plt.show() - - import matplotlib.pyplot as plt - - import pandas.tools.plotting as plots - import pandas.core.frame as fr - reload(plots) # noqa - reload(fr) # noqa - from pandas.core.frame import DataFrame - - data = DataFrame([[3, 6, -5], [4, 8, 2], [4, 9, -6], - [4, 9, -3], [2, 5, -1]], - columns=['A', 'B', 'C']) - data.plot(kind='barh', stacked=True) - - plt.show() diff --git a/pandas/tools/tests/test_concat.py b/pandas/tools/tests/test_concat.py index 2be7e75573d6e..dae24c48b8238 100644 --- a/pandas/tools/tests/test_concat.py +++ b/pandas/tools/tests/test_concat.py @@ -1,5 +1,3 @@ -import nose - import numpy as np from numpy.random import randn @@ -2171,8 +2169,3 @@ def test_concat_multiindex_dfs_with_deepcopy(self): tm.assert_frame_equal(result_copy, expected) result_no_copy = pd.concat(example_dict, names=['testname']) tm.assert_frame_equal(result_no_copy, expected) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tools/tests/test_join.py b/pandas/tools/tests/test_join.py index 4a2b64d080b4b..605a85026d605 100644 --- a/pandas/tools/tests/test_join.py +++ b/pandas/tools/tests/test_join.py @@ -1,7 +1,5 @@ # pylint: disable=E1103 -import nose - from numpy.random import randn import numpy as np @@ -799,8 +797,3 @@ def _join_by_hand(a, b, how='left'): for col, s in compat.iteritems(b_re): a_re[col] = s return a_re.reindex(columns=result_columns) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tools/tests/test_merge.py b/pandas/tools/tests/test_merge.py index e08074649f7e8..88856a012da6f 100644 --- a/pandas/tools/tests/test_merge.py +++ b/pandas/tools/tests/test_merge.py @@ -1,7 +1,5 @@ # pylint: disable=E1103 -import nose - from datetime import datetime from numpy.random import randn from numpy import nan @@ -1370,8 +1368,3 @@ def f(): def f(): household.join(log_return, how='outer') self.assertRaises(NotImplementedError, f) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tools/tests/test_merge_asof.py b/pandas/tools/tests/test_merge_asof.py index ef7b25008e80a..8e7323f72a8f5 100644 --- a/pandas/tools/tests/test_merge_asof.py +++ b/pandas/tools/tests/test_merge_asof.py @@ -1,4 +1,3 @@ -import nose import os import pytz @@ -938,8 +937,3 @@ def test_on_float_by_int(self): columns=['symbol', 'exch', 'price', 'mpv']) assert_frame_equal(result, expected) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tools/tests/test_merge_ordered.py b/pandas/tools/tests/test_merge_ordered.py index d163468abc88e..e08cc98e50794 100644 --- a/pandas/tools/tests/test_merge_ordered.py +++ b/pandas/tools/tests/test_merge_ordered.py @@ -1,5 +1,3 @@ -import nose - import pandas as pd from pandas import DataFrame, merge_ordered from pandas.util import testing as tm @@ -92,7 +90,3 @@ def test_empty_sequence_concat(self): pd.concat([pd.DataFrame()]) pd.concat([None, pd.DataFrame()]) pd.concat([pd.DataFrame(), None]) - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tools/tests/test_pivot.py b/pandas/tools/tests/test_pivot.py index 9cc520a7adb05..398e57d4ad0a4 100644 --- a/pandas/tools/tests/test_pivot.py +++ b/pandas/tools/tests/test_pivot.py @@ -1321,9 +1321,3 @@ def test_crosstab_with_numpy_size(self): index=expected_index, columns=expected_column) tm.assert_frame_equal(result, expected) - - -if __name__ == '__main__': - import nose - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tools/tests/test_tile.py b/pandas/tools/tests/test_tile.py index 5c7cee862ccd3..c5261597cf35d 100644 --- a/pandas/tools/tests/test_tile.py +++ b/pandas/tools/tests/test_tile.py @@ -1,5 +1,4 @@ import os -import nose import numpy as np from pandas.compat import zip @@ -351,8 +350,3 @@ def test_datetime_bin(self): def curpath(): pth, _ = os.path.split(os.path.abspath(__file__)) return pth - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tools/tests/test_util.py b/pandas/tools/tests/test_util.py index 8a8960a057926..e1d057eb3c3c0 100644 --- a/pandas/tools/tests/test_util.py +++ b/pandas/tools/tests/test_util.py @@ -478,8 +478,3 @@ def test_downcast_limits(self): for dtype, downcast, min_max in dtype_downcast_min_max: series = pd.to_numeric(pd.Series(min_max), downcast=downcast) tm.assert_equal(series.dtype, dtype) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tseries/tests/test_base.py b/pandas/tseries/tests/test_base.py index 4f2ac3ff0d87e..2ff06517f175a 100644 --- a/pandas/tseries/tests/test_base.py +++ b/pandas/tseries/tests/test_base.py @@ -1860,10 +1860,3 @@ def test_equals(self): self.assertFalse(idx.asobject.equals(idx3)) self.assertFalse(idx.equals(list(idx3))) self.assertFalse(idx.equals(pd.Series(idx3))) - - -if __name__ == '__main__': - import nose - - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tseries/tests/test_converter.py b/pandas/tseries/tests/test_converter.py index f6cf11c871bba..8caed80f5a45b 100644 --- a/pandas/tseries/tests/test_converter.py +++ b/pandas/tseries/tests/test_converter.py @@ -1,7 +1,5 @@ from datetime import datetime, date -import nose - import numpy as np from pandas import Timestamp, Period, Index from pandas.compat import u @@ -12,6 +10,7 @@ try: import pandas.tseries.converter as converter except ImportError: + import nose raise nose.SkipTest("no pandas.tseries.converter, skipping") @@ -199,9 +198,3 @@ def test_integer_passthrough(self): rs = self.pc.convert([0, 1], None, self.axis) xp = [0, 1] self.assertEqual(rs, xp) - - -if __name__ == '__main__': - import nose - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tseries/tests/test_daterange.py b/pandas/tseries/tests/test_daterange.py index 87f9f55e0189c..209e6e40d5cf0 100644 --- a/pandas/tseries/tests/test_daterange.py +++ b/pandas/tseries/tests/test_daterange.py @@ -1,6 +1,5 @@ from datetime import datetime from pandas.compat import range -import nose import numpy as np from pandas.core.index import Index @@ -817,8 +816,3 @@ def test_cdaterange_weekmask_and_holidays(self): holidays=['2013-05-01']) xp = DatetimeIndex(['2013-05-02', '2013-05-05', '2013-05-06']) self.assert_index_equal(xp, rng) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tseries/tests/test_holiday.py b/pandas/tseries/tests/test_holiday.py index 62446e8e637c6..d4d273347e6e3 100644 --- a/pandas/tseries/tests/test_holiday.py +++ b/pandas/tseries/tests/test_holiday.py @@ -15,7 +15,6 @@ USLaborDay, USColumbusDay, USMartinLutherKingJr, USPresidentsDay) from pytz import utc -import nose class TestCalendar(tm.TestCase): @@ -385,8 +384,3 @@ def test_both_offset_observance_raises(self): Holiday("Cyber Monday", month=11, day=1, offset=[DateOffset(weekday=SA(4))], observance=next_monday) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tseries/tests/test_offsets.py b/pandas/tseries/tests/test_offsets.py index 768e9212e6c42..ac488a3dfdcb2 100644 --- a/pandas/tseries/tests/test_offsets.py +++ b/pandas/tseries/tests/test_offsets.py @@ -2,10 +2,11 @@ from distutils.version import LooseVersion from datetime import date, datetime, timedelta from dateutil.relativedelta import relativedelta -from pandas.compat import range, iteritems -from pandas import compat + import nose from nose.tools import assert_raises +from pandas.compat import range, iteritems +from pandas import compat import numpy as np @@ -4956,8 +4957,3 @@ def test_all_offset_classes(self): first = Timestamp(test_values[0], tz='US/Eastern') + offset() second = Timestamp(test_values[1], tz='US/Eastern') self.assertEqual(first, second, msg=str(offset)) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py index a707cc3eb74ce..fdc067a827a5b 100644 --- a/pandas/tseries/tests/test_period.py +++ b/pandas/tseries/tests/test_period.py @@ -4967,9 +4967,3 @@ def test_get_period_field_raises_on_out_of_range(self): def test_get_period_field_array_raises_on_out_of_range(self): self.assertRaises(ValueError, _period.get_period_field_arr, -1, np.empty(1), 0) - - -if __name__ == '__main__': - import nose - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py index c40f930fbd094..222ffb735921a 100755 --- a/pandas/tseries/tests/test_resample.py +++ b/pandas/tseries/tests/test_resample.py @@ -3,7 +3,6 @@ from datetime import datetime, timedelta from functools import partial -import nose import numpy as np import pandas as pd @@ -3188,8 +3187,3 @@ def test_aggregate_with_nat(self): # if NaT is included, 'var', 'std', 'mean', 'first','last' # and 'nth' doesn't work yet - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tseries/tests/test_timedeltas.py b/pandas/tseries/tests/test_timedeltas.py index 6efa024d81b98..13263259e0b8a 100644 --- a/pandas/tseries/tests/test_timedeltas.py +++ b/pandas/tseries/tests/test_timedeltas.py @@ -2,7 +2,6 @@ from __future__ import division from datetime import timedelta, time -import nose from distutils.version import LooseVersion import numpy as np @@ -2051,8 +2050,3 @@ def test_add_overflow(self): result = (to_timedelta([pd.NaT, '5 days', '1 hours']) + to_timedelta(['7 seconds', pd.NaT, '4 hours'])) tm.assert_index_equal(result, exp) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tseries/tests/test_timeseries_legacy.py b/pandas/tseries/tests/test_timeseries_legacy.py index d8c01c53fb2e5..5395056c93412 100644 --- a/pandas/tseries/tests/test_timeseries_legacy.py +++ b/pandas/tseries/tests/test_timeseries_legacy.py @@ -219,8 +219,3 @@ def test_ms_vs_MS(self): def test_rule_aliases(self): rule = to_offset('10us') self.assertEqual(rule, Micro(10)) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tseries/tests/test_timezones.py b/pandas/tseries/tests/test_timezones.py index 64787b6e4e79a..00b60ba620c4b 100644 --- a/pandas/tseries/tests/test_timezones.py +++ b/pandas/tseries/tests/test_timezones.py @@ -1,7 +1,5 @@ # pylint: disable-msg=E1101,W0612 from datetime import datetime, timedelta, tzinfo, date -import nose - import numpy as np import pytz from distutils.version import LooseVersion @@ -1683,8 +1681,3 @@ def test_nat(self): idx = idx.tz_convert('US/Eastern') expected = ['2010-12-01 11:00', '2010-12-02 11:00', NaT] self.assert_index_equal(idx, DatetimeIndex(expected, tz='US/Eastern')) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tseries/tests/test_tslib.py b/pandas/tseries/tests/test_tslib.py index cf5dbd671d38c..20e91a6f5bc44 100644 --- a/pandas/tseries/tests/test_tslib.py +++ b/pandas/tseries/tests/test_tslib.py @@ -1,4 +1,3 @@ -import nose import datetime import numpy as np from distutils.version import LooseVersion @@ -690,8 +689,3 @@ def _check_round(freq, expected): msg = pd.tseries.frequencies._INVALID_FREQ_ERROR with self.assertRaisesRegexp(ValueError, msg): stamp.round('foo') - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tseries/tests/test_util.py b/pandas/tseries/tests/test_util.py index 96da32a4a845c..3feffe924c291 100644 --- a/pandas/tseries/tests/test_util.py +++ b/pandas/tseries/tests/test_util.py @@ -1,5 +1,4 @@ from pandas.compat import range -import nose import numpy as np @@ -125,8 +124,3 @@ def test_normalize_date(): result = normalize_date(value) assert (result == datetime(2012, 9, 7)) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) From 542c9166a6ceff4a4889caae3843c3a82a2301cd Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 7 Feb 2017 13:50:37 -0500 Subject: [PATCH 020/353] TST/STYLE: remove multiprocess nose flags and slight PEP fixes xref https://github.com/pandas-dev/pandas/pull/13856#issuecomment-278058522 Author: Jeff Reback Closes #15333 from jreback/mcs and squashes the following commits: 2edc842 [Jeff Reback] TST/STYLE: remove multiprocess nose flags and slight PEP fixes --- pandas/__init__.py | 8 +-- pandas/_version.py | 2 +- pandas/api/tests/test_api.py | 2 - pandas/compat/__init__.py | 24 +++++---- pandas/compat/chainmap.py | 1 + pandas/compat/numpy/function.py | 1 + pandas/compat/pickle_compat.py | 9 +++- pandas/computation/tests/test_eval.py | 10 ++-- pandas/core/base.py | 2 + pandas/core/config.py | 1 + pandas/core/frame.py | 5 +- pandas/core/indexing.py | 1 + pandas/core/internals.py | 1 + pandas/core/nanops.py | 2 + pandas/core/panel.py | 2 + pandas/core/series.py | 1 + pandas/core/window.py | 3 +- pandas/formats/format.py | 12 +++++ pandas/indexes/base.py | 6 +-- pandas/indexes/multi.py | 2 +- pandas/io/common.py | 1 + pandas/io/json/json.py | 4 +- pandas/io/json/normalize.py | 1 - pandas/io/sas/sasreader.py | 1 - pandas/io/tests/parser/c_parser_only.py | 9 ++-- pandas/io/tests/parser/compression.py | 1 + pandas/io/tests/parser/converters.py | 1 + pandas/io/tests/parser/dtypes.py | 1 + pandas/io/tests/parser/parse_dates.py | 1 + pandas/io/tests/parser/python_parser_only.py | 1 + pandas/io/tests/parser/test_parsers.py | 1 + pandas/io/tests/parser/test_unsupported.py | 2 + pandas/io/tests/test_clipboard.py | 2 +- pandas/io/tests/test_feather.py | 1 - pandas/io/tests/test_gbq.py | 3 ++ pandas/io/tests/test_packers.py | 4 +- pandas/io/tests/test_pickle.py | 1 - pandas/io/tests/test_pytables.py | 1 - pandas/msgpack/exceptions.py | 1 + pandas/sparse/array.py | 3 +- pandas/sparse/series.py | 1 + pandas/sparse/tests/test_arithmetics.py | 2 - pandas/sparse/tests/test_array.py | 2 +- pandas/sparse/tests/test_combine_concat.py | 4 -- pandas/sparse/tests/test_format.py | 2 - pandas/sparse/tests/test_frame.py | 2 +- pandas/sparse/tests/test_groupby.py | 2 - pandas/sparse/tests/test_indexing.py | 7 +-- pandas/sparse/tests/test_libsparse.py | 6 --- pandas/sparse/tests/test_list.py | 2 - pandas/sparse/tests/test_pivot.py | 2 - pandas/sparse/tests/test_series.py | 2 +- pandas/stats/fama_macbeth.py | 1 + pandas/stats/ols.py | 1 + pandas/stats/tests/test_ols.py | 8 --- pandas/tests/formats/test_format.py | 35 +++++++------ pandas/tests/formats/test_printing.py | 2 - pandas/tests/formats/test_style.py | 2 +- pandas/tests/frame/test_alter_axes.py | 2 - pandas/tests/frame/test_analytics.py | 2 - pandas/tests/frame/test_apply.py | 2 - pandas/tests/frame/test_asof.py | 1 - .../tests/frame/test_axis_select_reindex.py | 2 - pandas/tests/frame/test_block_internals.py | 2 - pandas/tests/frame/test_combine_concat.py | 4 -- pandas/tests/frame/test_constructors.py | 4 -- pandas/tests/frame/test_convert_to.py | 2 - pandas/tests/frame/test_dtypes.py | 4 -- pandas/tests/frame/test_indexing.py | 6 --- pandas/tests/frame/test_misc_api.py | 4 -- pandas/tests/frame/test_missing.py | 2 - pandas/tests/frame/test_mutate_columns.py | 2 - pandas/tests/frame/test_nonunique_indexes.py | 2 - pandas/tests/frame/test_operators.py | 2 - pandas/tests/frame/test_quantile.py | 2 - pandas/tests/frame/test_query_eval.py | 4 -- pandas/tests/frame/test_replace.py | 2 - pandas/tests/frame/test_repr_info.py | 2 - pandas/tests/frame/test_reshape.py | 2 - pandas/tests/frame/test_sorting.py | 2 - pandas/tests/frame/test_subclass.py | 2 - pandas/tests/frame/test_timeseries.py | 6 +-- pandas/tests/frame/test_to_csv.py | 2 - pandas/tests/groupby/test_aggregate.py | 2 - pandas/tests/groupby/test_categorical.py | 2 - pandas/tests/groupby/test_filters.py | 2 - pandas/tests/groupby/test_groupby.py | 6 +-- pandas/tests/indexes/datetimes/test_astype.py | 2 - .../indexes/datetimes/test_construction.py | 2 - .../indexes/datetimes/test_date_range.py | 1 - .../tests/indexes/datetimes/test_datetime.py | 1 - .../indexes/datetimes/test_datetimelike.py | 1 - .../tests/indexes/datetimes/test_indexing.py | 1 - pandas/tests/indexes/datetimes/test_misc.py | 1 - .../tests/indexes/datetimes/test_missing.py | 1 - pandas/tests/indexes/datetimes/test_ops.py | 1 - pandas/tests/indexes/datetimes/test_setops.py | 1 - pandas/tests/indexes/datetimes/test_tools.py | 2 +- pandas/tests/indexes/test_base.py | 4 +- pandas/tests/indexes/test_category.py | 6 ++- pandas/tests/indexes/test_datetimelike.py | 2 - pandas/tests/indexes/test_multi.py | 21 ++++---- pandas/tests/indexes/test_numeric.py | 3 -- pandas/tests/indexes/test_timedelta.py | 1 - pandas/tests/indexing/test_callable.py | 2 - pandas/tests/indexing/test_coercion.py | 4 +- pandas/tests/indexing/test_indexing.py | 2 - pandas/tests/indexing/test_indexing_slow.py | 2 - pandas/tests/plotting/test_misc.py | 2 + pandas/tests/scalar/test_timestamp.py | 4 +- pandas/tests/series/test_alter_axes.py | 2 - pandas/tests/series/test_analytics.py | 2 - pandas/tests/series/test_apply.py | 4 -- pandas/tests/series/test_asof.py | 1 - pandas/tests/series/test_combine_concat.py | 4 -- pandas/tests/series/test_constructors.py | 2 - pandas/tests/series/test_datetime_values.py | 6 +-- pandas/tests/series/test_dtypes.py | 2 - pandas/tests/series/test_indexing.py | 4 +- pandas/tests/series/test_internals.py | 2 - pandas/tests/series/test_io.py | 6 --- pandas/tests/series/test_misc_api.py | 2 - pandas/tests/series/test_missing.py | 2 - pandas/tests/series/test_operators.py | 2 - pandas/tests/series/test_replace.py | 2 - pandas/tests/series/test_repr.py | 2 - pandas/tests/series/test_sorting.py | 2 - pandas/tests/series/test_subclass.py | 4 -- pandas/tests/series/test_timeseries.py | 1 - pandas/tests/test_algos.py | 11 ---- pandas/tests/test_categorical.py | 50 +++++++++---------- pandas/tests/test_common.py | 2 - pandas/tests/test_config.py | 1 - pandas/tests/test_expressions.py | 2 - pandas/tests/test_generic.py | 2 - pandas/tests/test_internals.py | 5 -- pandas/tests/test_join.py | 1 - pandas/tests/test_multilevel.py | 2 - pandas/tests/test_panel.py | 6 --- pandas/tests/test_panel4d.py | 8 --- pandas/tests/test_reshape.py | 2 - pandas/tests/test_stats.py | 1 - pandas/tests/test_strings.py | 2 - pandas/tests/test_take.py | 4 -- pandas/tests/test_testing.py | 6 --- pandas/tests/test_util.py | 1 + pandas/tests/test_window.py | 2 - pandas/tests/types/test_cast.py | 2 - pandas/tests/types/test_common.py | 2 - pandas/tests/types/test_concat.py | 2 - pandas/tests/types/test_dtypes.py | 2 - pandas/tests/types/test_generic.py | 2 - pandas/tests/types/test_inference.py | 3 -- pandas/tests/types/test_missing.py | 2 - pandas/tools/tests/test_concat.py | 2 - pandas/tools/tests/test_hashing.py | 2 - pandas/tools/tests/test_join.py | 2 - pandas/tools/tests/test_merge.py | 2 - pandas/tools/tests/test_merge_asof.py | 3 +- pandas/tools/tests/test_pivot.py | 10 ++-- pandas/tools/tests/test_tile.py | 8 +-- pandas/tseries/period.py | 2 +- pandas/tseries/tests/test_base.py | 4 +- pandas/tseries/tests/test_bin_groupby.py | 2 +- pandas/tseries/tests/test_converter.py | 2 + pandas/tseries/tests/test_daterange.py | 2 + pandas/tseries/tests/test_frequencies.py | 1 + pandas/tseries/tests/test_holiday.py | 4 ++ pandas/tseries/tests/test_offsets.py | 19 ++++--- pandas/tseries/tests/test_period.py | 3 ++ pandas/tseries/tests/test_resample.py | 11 ++-- pandas/tseries/tests/test_timedeltas.py | 3 +- .../tseries/tests/test_timeseries_legacy.py | 2 - pandas/tseries/tests/test_timezones.py | 4 +- pandas/tseries/tests/test_tslib.py | 3 ++ pandas/types/generic.py | 1 + pandas/util/clipboard/__init__.py | 2 +- pandas/util/clipboard/clipboards.py | 1 + pandas/util/clipboard/exceptions.py | 1 + pandas/util/clipboard/windows.py | 4 +- pandas/util/decorators.py | 2 + pandas/util/depr_module.py | 1 + pandas/util/testing.py | 5 +- 183 files changed, 229 insertions(+), 418 deletions(-) diff --git a/pandas/__init__.py b/pandas/__init__.py index 2d91c97144e3c..9133e11beaa2b 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -15,7 +15,8 @@ missing_dependencies.append(dependency) if missing_dependencies: - raise ImportError("Missing required dependencies {0}".format(missing_dependencies)) + raise ImportError( + "Missing required dependencies {0}".format(missing_dependencies)) del hard_dependencies, dependency, missing_dependencies # numpy compat @@ -24,7 +25,8 @@ try: from pandas import hashtable, tslib, lib except ImportError as e: # pragma: no cover - module = str(e).lstrip('cannot import name ') # hack but overkill to use re + # hack but overkill to use re + module = str(e).lstrip('cannot import name ') raise ImportError("C extension: {0} not built. If you want to import " "pandas from the source directory, you may need to run " "'python setup.py build_ext --inplace --force' to build " @@ -61,5 +63,5 @@ # use the closest tagged version if possible from ._version import get_versions v = get_versions() -__version__ = v.get('closest-tag',v['version']) +__version__ = v.get('closest-tag', v['version']) del get_versions, v diff --git a/pandas/_version.py b/pandas/_version.py index 77b2fdca59576..d764923fd7247 100644 --- a/pandas/_version.py +++ b/pandas/_version.py @@ -157,7 +157,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # "stabilization", as well as "HEAD" and "master". tags = set([r for r in refs if re.search(r'\d', r)]) if verbose: - print("discarding '%s', no digits" % ",".join(refs-tags)) + print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: print("likely tags: %s" % ",".join(sorted(tags))) for ref in sorted(tags): diff --git a/pandas/api/tests/test_api.py b/pandas/api/tests/test_api.py index f925fd792f9ca..02165d82d4232 100644 --- a/pandas/api/tests/test_api.py +++ b/pandas/api/tests/test_api.py @@ -8,8 +8,6 @@ from pandas.api import types from pandas.util import testing as tm -_multiprocess_can_split_ = True - class Base(object): diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index 532f960468204..7ebdd9735b967 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -79,25 +79,25 @@ def signature(f): args = [ p.name for p in sig.parameters.values() if p.kind == inspect.Parameter.POSITIONAL_OR_KEYWORD - ] + ] varargs = [ p.name for p in sig.parameters.values() if p.kind == inspect.Parameter.VAR_POSITIONAL - ] + ] varargs = varargs[0] if varargs else None keywords = [ p.name for p in sig.parameters.values() if p.kind == inspect.Parameter.VAR_KEYWORD - ] + ] keywords = keywords[0] if keywords else None defaults = [ p.default for p in sig.parameters.values() if p.kind == inspect.Parameter.POSITIONAL_OR_KEYWORD and p.default is not p.empty - ] or None - argspec = namedtuple('Signature',['args','defaults', - 'varargs','keywords']) - return argspec(args,defaults,varargs,keywords) + ] or None + argspec = namedtuple('Signature', ['args', 'defaults', + 'varargs', 'keywords']) + return argspec(args, defaults, varargs, keywords) # have to explicitly put builtins into the namespace range = range @@ -170,7 +170,7 @@ def iterkeys(obj, **kw): def itervalues(obj, **kw): return obj.itervalues(**kw) - next = lambda it : it.next() + next = lambda it: it.next() else: def iteritems(obj, **kw): return iter(obj.items(**kw)) @@ -183,6 +183,7 @@ def itervalues(obj, **kw): next = next + def bind_method(cls, name, func): """Bind a method to class, python 2 and python 3 compatible. @@ -307,7 +308,8 @@ def set_function_name(f, name, cls): f.__name__ = name return f - class ResourceWarning(Warning): pass + class ResourceWarning(Warning): + pass string_and_binary_types = string_types + (binary_type,) @@ -398,14 +400,18 @@ def is_platform_little_endian(): """ am I little endian """ return sys.byteorder == 'little' + def is_platform_windows(): return sys.platform == 'win32' or sys.platform == 'cygwin' + def is_platform_linux(): return sys.platform == 'linux2' + def is_platform_mac(): return sys.platform == 'darwin' + def is_platform_32bit(): return struct.calcsize("P") * 8 < 64 diff --git a/pandas/compat/chainmap.py b/pandas/compat/chainmap.py index 9edd2ef056a52..cf1cad5694570 100644 --- a/pandas/compat/chainmap.py +++ b/pandas/compat/chainmap.py @@ -5,6 +5,7 @@ class DeepChainMap(ChainMap): + def __setitem__(self, key, value): for mapping in self.maps: if key in mapping: diff --git a/pandas/compat/numpy/function.py b/pandas/compat/numpy/function.py index 895a376457f09..72e89586d0280 100644 --- a/pandas/compat/numpy/function.py +++ b/pandas/compat/numpy/function.py @@ -27,6 +27,7 @@ class CompatValidator(object): + def __init__(self, defaults, fname=None, method=None, max_fname_arg_count=None): self.fname = fname diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index 7ed9e7ff90bd8..1cdf8afd563c6 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -9,6 +9,7 @@ from pandas import compat, Index from pandas.compat import u, string_types + def load_reduce(self): stack = self.stack args = stack.pop() @@ -34,7 +35,7 @@ def load_reduce(self): pass # try to reencode the arguments - if getattr(self,'encoding',None) is not None: + if getattr(self, 'encoding', None) is not None: args = tuple([arg.encode(self.encoding) if isinstance(arg, string_types) else arg for arg in args]) @@ -44,7 +45,7 @@ def load_reduce(self): except: pass - if getattr(self,'is_verbose',None): + if getattr(self, 'is_verbose', None): print(sys.exc_info()) print(func, args) raise @@ -61,6 +62,7 @@ class Unpickler(pkl.Unpickler): Unpickler.dispatch = copy.copy(Unpickler.dispatch) Unpickler.dispatch[pkl.REDUCE[0]] = load_reduce + def load_newobj(self): args = self.stack.pop() cls = self.stack[-1] @@ -75,6 +77,8 @@ def load_newobj(self): Unpickler.dispatch[pkl.NEWOBJ[0]] = load_newobj # py3 compat + + def load_newobj_ex(self): kwargs = self.stack.pop() args = self.stack.pop() @@ -91,6 +95,7 @@ def load_newobj_ex(self): except: pass + def load(fh, encoding=None, compat=False, is_verbose=False): """load a pickle, with a provided encoding diff --git a/pandas/computation/tests/test_eval.py b/pandas/computation/tests/test_eval.py index dbac72c619a52..aa05626af9175 100644 --- a/pandas/computation/tests/test_eval.py +++ b/pandas/computation/tests/test_eval.py @@ -201,7 +201,7 @@ def check_complex_cmp_op(self, lhs, cmp1, rhs, binop, cmp2): binop=binop, cmp2=cmp2) scalar_with_in_notin = (is_scalar(rhs) and (cmp1 in skip_these or - cmp2 in skip_these)) + cmp2 in skip_these)) if scalar_with_in_notin: with tm.assertRaises(TypeError): pd.eval(ex, engine=self.engine, parser=self.parser) @@ -702,7 +702,6 @@ def test_float_truncation(self): tm.assert_frame_equal(expected, result) - class TestEvalNumexprPython(TestEvalNumexprPandas): @classmethod @@ -782,6 +781,7 @@ def check_chained_cmp_op(self, lhs, cmp1, mid, cmp2, rhs): # typecasting rules consistency with python # issue #12388 + class TestTypeCasting(object): def check_binop_typecasting(self, engine, parser, op, dt): @@ -803,7 +803,8 @@ def test_binop_typecasting(self): for engine, parser in ENGINES_PARSERS: for op in ['+', '-', '*', '**', '/']: # maybe someday... numexpr has too many upcasting rules now - #for dt in chain(*(np.sctypes[x] for x in ['uint', 'int', 'float'])): + # for dt in chain(*(np.sctypes[x] for x in ['uint', 'int', + # 'float'])): for dt in [np.float32, np.float64]: yield self.check_binop_typecasting, engine, parser, op, dt @@ -1969,10 +1970,11 @@ def test_negate_lt_eq_le(): for engine, parser in product(_engines, expr._parsers): yield check_negate_lt_eq_le, engine, parser + class TestValidate(tm.TestCase): def test_validate_bool_args(self): - invalid_values = [1, "True", [1,2,3], 5.0] + invalid_values = [1, "True", [1, 2, 3], 5.0] for value in invalid_values: with self.assertRaises(ValueError): diff --git a/pandas/core/base.py b/pandas/core/base.py index e7a79c3291a92..657da859ddde2 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -230,6 +230,7 @@ def f(self, *args, **kwargs): class AccessorProperty(object): """Descriptor for implementing accessor properties like Series.str """ + def __init__(self, accessor_cls, construct_accessor): self.accessor_cls = accessor_cls self.construct_accessor = construct_accessor @@ -651,6 +652,7 @@ class GroupByMixin(object): @staticmethod def _dispatch(name, *args, **kwargs): """ dispatch to apply """ + def outer(self, *args, **kwargs): def f(x): x = self._shallow_copy(x, groupby=self._groupby) diff --git a/pandas/core/config.py b/pandas/core/config.py index 618de4e02b56f..ed63c865ebfb4 100644 --- a/pandas/core/config.py +++ b/pandas/core/config.py @@ -215,6 +215,7 @@ def __dir__(self): class CallableDynamicDoc(object): + def __init__(self, func, doc_tmpl): self.__doc_tmpl__ = doc_tmpl self.__func__ = func diff --git a/pandas/core/frame.py b/pandas/core/frame.py index cc81c66100a6f..79bdad82af5a3 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5326,9 +5326,10 @@ def isin(self, values): "allowed to be passed to DataFrame.isin(), " "you passed a " "{0!r}".format(type(values).__name__)) - return DataFrame(lib.ismember(self.values.ravel(), + return DataFrame( + lib.ismember(self.values.ravel(), set(values)).reshape(self.shape), self.index, - self.columns) + self.columns) # ---------------------------------------------------------------------- # Deprecated stuff diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 40050d6d769a6..6bb2d1c479844 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -42,6 +42,7 @@ def get_indexers_list(): # the public IndexSlicerMaker class _IndexSlice(object): + def __getitem__(self, arg): return arg diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 289ce150eb46b..f0b1516d786c6 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -5122,6 +5122,7 @@ def trim_join_unit(join_unit, length): class JoinUnit(object): + def __init__(self, block, shape, indexers=None): # Passing shape explicitly is required for cases when block is None. if indexers is None: diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 1f76bc850cee9..0cc3a2d039b5e 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -26,6 +26,7 @@ class disallow(object): + def __init__(self, *dtypes): super(disallow, self).__init__() self.dtypes = tuple(np.dtype(dtype).type for dtype in dtypes) @@ -58,6 +59,7 @@ def _f(*args, **kwargs): class bottleneck_switch(object): + def __init__(self, zero_value=None, **kwargs): self.zero_value = zero_value self.kwargs = kwargs diff --git a/pandas/core/panel.py b/pandas/core/panel.py index a11ef53de1af9..6da10305eb4fc 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -1560,6 +1560,7 @@ def f(self, other, axis=0): # legacy class WidePanel(Panel): + def __init__(self, *args, **kwargs): # deprecation, #10892 warnings.warn("WidePanel is deprecated. Please use Panel", @@ -1569,6 +1570,7 @@ def __init__(self, *args, **kwargs): class LongPanel(DataFrame): + def __init__(self, *args, **kwargs): # deprecation, #10892 warnings.warn("LongPanel is deprecated. Please use DataFrame", diff --git a/pandas/core/series.py b/pandas/core/series.py index 9845e1cd4ad47..43f16f690692a 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2987,6 +2987,7 @@ def create_from_value(value, index, dtype): # backwards compatiblity class TimeSeries(Series): + def __init__(self, *args, **kwargs): # deprecation TimeSeries, #10890 warnings.warn("TimeSeries is deprecated. Please use Series", diff --git a/pandas/core/window.py b/pandas/core/window.py index bda134dd8a2a4..50de6b84d7cba 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -659,6 +659,7 @@ def f(x, name=name, *args): class _Rolling(_Window): + @property def _constructor(self): return Rolling @@ -1718,7 +1719,7 @@ def dataframe_from_int_dict(data, frame_template): def _get_center_of_mass(com, span, halflife, alpha): valid_count = len([x for x in [com, span, halflife, alpha] - if x is not None]) + if x is not None]) if valid_count > 1: raise ValueError("com, span, halflife, and alpha " "are mutually exclusive") diff --git a/pandas/formats/format.py b/pandas/formats/format.py index 3bac7d2821760..439b96d650204 100644 --- a/pandas/formats/format.py +++ b/pandas/formats/format.py @@ -89,6 +89,7 @@ class CategoricalFormatter(object): + def __init__(self, categorical, buf=None, length=True, na_rep='NaN', footer=True): self.categorical = categorical @@ -142,6 +143,7 @@ def to_string(self): class SeriesFormatter(object): + def __init__(self, series, buf=None, length=True, header=True, index=True, na_rep='NaN', name=False, float_format=None, dtype=True, max_rows=None): @@ -272,6 +274,7 @@ def to_string(self): class TextAdjustment(object): + def __init__(self): self.encoding = get_option("display.encoding") @@ -287,6 +290,7 @@ def adjoin(self, space, *lists, **kwargs): class EastAsianTextAdjustment(TextAdjustment): + def __init__(self): super(EastAsianTextAdjustment, self).__init__() if get_option("display.unicode.ambiguous_as_wide"): @@ -1366,6 +1370,7 @@ def _get_level_lengths(levels, sentinel=''): class CSVFormatter(object): + def __init__(self, obj, path_or_buf=None, sep=",", na_rep='', float_format=None, cols=None, header=True, index=True, index_label=None, mode='w', nanRep=None, encoding=None, @@ -1950,6 +1955,7 @@ def format_array(values, formatter, float_format=None, na_rep='NaN', class GenericArrayFormatter(object): + def __init__(self, values, digits=7, formatter=None, na_rep='NaN', space=12, float_format=None, justify='right', decimal='.', quoting=None, fixed_width=True): @@ -2151,6 +2157,7 @@ def _format_strings(self): class IntArrayFormatter(GenericArrayFormatter): + def _format_strings(self): formatter = self.formatter or (lambda x: '% d' % x) fmt_values = [formatter(x) for x in self.values] @@ -2158,6 +2165,7 @@ def _format_strings(self): class Datetime64Formatter(GenericArrayFormatter): + def __init__(self, values, nat_rep='NaT', date_format=None, **kwargs): super(Datetime64Formatter, self).__init__(values, **kwargs) self.nat_rep = nat_rep @@ -2183,6 +2191,7 @@ def _format_strings(self): class PeriodArrayFormatter(IntArrayFormatter): + def _format_strings(self): from pandas.tseries.period import IncompatibleFrequency try: @@ -2197,6 +2206,7 @@ def _format_strings(self): class CategoricalArrayFormatter(GenericArrayFormatter): + def __init__(self, values, *args, **kwargs): GenericArrayFormatter.__init__(self, values, *args, **kwargs) @@ -2328,6 +2338,7 @@ def _get_format_datetime64_from_values(values, date_format): class Datetime64TZFormatter(Datetime64Formatter): + def _format_strings(self): """ we by definition have a TZ """ @@ -2342,6 +2353,7 @@ def _format_strings(self): class Timedelta64Formatter(GenericArrayFormatter): + def __init__(self, values, nat_rep='NaT', box=False, **kwargs): super(Timedelta64Formatter, self).__init__(values, **kwargs) self.nat_rep = nat_rep diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index dcd565ee5f0e9..bb2941a121452 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -168,8 +168,8 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, elif isinstance(data, (np.ndarray, Index, ABCSeries)): if (is_datetime64_any_dtype(data) or - (dtype is not None and is_datetime64_any_dtype(dtype)) or - 'tz' in kwargs): + (dtype is not None and is_datetime64_any_dtype(dtype)) or + 'tz' in kwargs): from pandas.tseries.index import DatetimeIndex result = DatetimeIndex(data, copy=copy, name=name, dtype=dtype, **kwargs) @@ -3606,7 +3606,7 @@ def _validate_for_numeric_binop(self, other, op, opstr): typ=type(other)) ) elif isinstance(other, np.ndarray) and not other.ndim: - other = other.item() + other = other.item() if isinstance(other, (Index, ABCSeries, np.ndarray)): if len(self) != len(other): diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index 00ead012a916a..d2469cf1a3eed 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -1813,7 +1813,7 @@ def partial_selection(key, indexer=None): for k, l in zip(key, self.levels)] can_index_exactly = any(all_dates) if (any([l.is_all_dates - for k, l in zip(key, self.levels)]) and + for k, l in zip(key, self.levels)]) and not can_index_exactly): indexer = self.get_loc(key) diff --git a/pandas/io/common.py b/pandas/io/common.py index 6817c824ad786..b24acb256c4a9 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -109,6 +109,7 @@ class BaseIterator(object): """Subclass this and provide a "__next__()" method to obtain an iterator. Useful only when the object being iterated is non-reusable (e.g. OK for a parser, not for an in-memory table, yes for its iterator).""" + def __iter__(self): return self diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py index d29f4a371dd4d..6fc766081eefe 100644 --- a/pandas/io/json/json.py +++ b/pandas/io/json/json.py @@ -23,8 +23,8 @@ def to_json(path_or_buf, obj, orient=None, date_format='epoch', default_handler=None, lines=False): if lines and orient != 'records': - raise ValueError( - "'lines' keyword only valid when 'orient' is records") + raise ValueError( + "'lines' keyword only valid when 'orient' is records") if isinstance(obj, Series): s = SeriesWriter( diff --git a/pandas/io/json/normalize.py b/pandas/io/json/normalize.py index aa80954233682..d684441c5974d 100644 --- a/pandas/io/json/normalize.py +++ b/pandas/io/json/normalize.py @@ -89,7 +89,6 @@ def json_normalize(data, record_path=None, meta=None, meta_prefix=None, record_prefix=None, errors='raise'): - """ "Normalize" semi-structured JSON data into a flat table diff --git a/pandas/io/sas/sasreader.py b/pandas/io/sas/sasreader.py index 29e7f131fd9bc..3e4d9c9024dbd 100644 --- a/pandas/io/sas/sasreader.py +++ b/pandas/io/sas/sasreader.py @@ -6,7 +6,6 @@ def read_sas(filepath_or_buffer, format=None, index=None, encoding=None, chunksize=None, iterator=False): - """ Read SAS files stored as either XPORT or SAS7BDAT format files. diff --git a/pandas/io/tests/parser/c_parser_only.py b/pandas/io/tests/parser/c_parser_only.py index 73edda90720af..11073f3f108ba 100644 --- a/pandas/io/tests/parser/c_parser_only.py +++ b/pandas/io/tests/parser/c_parser_only.py @@ -18,6 +18,7 @@ class CParserTests(object): + def test_buffer_overflow(self): # see gh-9205: test certain malformed input files that cause # buffer overflows in tokenizer.c @@ -375,13 +376,13 @@ def test_internal_null_byte(self): def test_read_nrows_large(self): # gh-7626 - Read only nrows of data in for large inputs (>262144b) header_narrow = '\t'.join(['COL_HEADER_' + str(i) - for i in range(10)]) + '\n' + for i in range(10)]) + '\n' data_narrow = '\t'.join(['somedatasomedatasomedata1' - for i in range(10)]) + '\n' + for i in range(10)]) + '\n' header_wide = '\t'.join(['COL_HEADER_' + str(i) - for i in range(15)]) + '\n' + for i in range(15)]) + '\n' data_wide = '\t'.join(['somedatasomedatasomedata2' - for i in range(15)]) + '\n' + for i in range(15)]) + '\n' test_input = (header_narrow + data_narrow * 1050 + header_wide + data_wide * 2) diff --git a/pandas/io/tests/parser/compression.py b/pandas/io/tests/parser/compression.py index e95617faf2071..308ca6e8a5a2c 100644 --- a/pandas/io/tests/parser/compression.py +++ b/pandas/io/tests/parser/compression.py @@ -11,6 +11,7 @@ class CompressionTests(object): + def test_zip(self): try: import zipfile diff --git a/pandas/io/tests/parser/converters.py b/pandas/io/tests/parser/converters.py index 68231d67534ee..2ceaff9291e7e 100644 --- a/pandas/io/tests/parser/converters.py +++ b/pandas/io/tests/parser/converters.py @@ -19,6 +19,7 @@ class ConverterTests(object): + def test_converters_type_must_be_dict(self): data = """index,A,B,C,D foo,2,3,4,5 diff --git a/pandas/io/tests/parser/dtypes.py b/pandas/io/tests/parser/dtypes.py index abcd14e9499cb..fa95c18c4d7a9 100644 --- a/pandas/io/tests/parser/dtypes.py +++ b/pandas/io/tests/parser/dtypes.py @@ -16,6 +16,7 @@ class DtypeTests(object): + def test_passing_dtype(self): # see gh-6607 df = DataFrame(np.random.rand(5, 2).round(4), columns=list( diff --git a/pandas/io/tests/parser/parse_dates.py b/pandas/io/tests/parser/parse_dates.py index e4af1ff70a498..ad3d5f2382a49 100644 --- a/pandas/io/tests/parser/parse_dates.py +++ b/pandas/io/tests/parser/parse_dates.py @@ -25,6 +25,7 @@ class ParseDatesTests(object): + def test_separator_date_conflict(self): # Regression test for gh-4678: make sure thousands separator and # date parsing do not conflict. diff --git a/pandas/io/tests/parser/python_parser_only.py b/pandas/io/tests/parser/python_parser_only.py index ad62aaa275127..283ff366b5efd 100644 --- a/pandas/io/tests/parser/python_parser_only.py +++ b/pandas/io/tests/parser/python_parser_only.py @@ -18,6 +18,7 @@ class PythonParserTests(object): + def test_negative_skipfooter_raises(self): text = """#foo,a,b,c #foo,a,b,c diff --git a/pandas/io/tests/parser/test_parsers.py b/pandas/io/tests/parser/test_parsers.py index 93b5fdcffed4c..2ae557a7d57db 100644 --- a/pandas/io/tests/parser/test_parsers.py +++ b/pandas/io/tests/parser/test_parsers.py @@ -32,6 +32,7 @@ class BaseParser(CommentTests, CompressionTests, ParseDatesTests, ParserTests, SkipRowsTests, UsecolsTests, QuotingTests, DtypeTests): + def read_csv(self, *args, **kwargs): raise NotImplementedError diff --git a/pandas/io/tests/parser/test_unsupported.py b/pandas/io/tests/parser/test_unsupported.py index e941c9186cd6a..999db47cf2eaf 100644 --- a/pandas/io/tests/parser/test_unsupported.py +++ b/pandas/io/tests/parser/test_unsupported.py @@ -18,6 +18,7 @@ class TestUnsupportedFeatures(tm.TestCase): + def test_mangle_dupe_cols_false(self): # see gh-12935 data = 'a b c\n1 2 3' @@ -111,6 +112,7 @@ def test_python_engine(self): class TestDeprecatedFeatures(tm.TestCase): + def test_deprecated_args(self): data = '1,2,3' diff --git a/pandas/io/tests/test_clipboard.py b/pandas/io/tests/test_clipboard.py index 93d14077aeacf..98a4152754b55 100644 --- a/pandas/io/tests/test_clipboard.py +++ b/pandas/io/tests/test_clipboard.py @@ -54,7 +54,7 @@ def setUpClass(cls): 'es': 'en español'.split()}) # unicode round trip test for GH 13747, GH 12529 cls.data['utf8'] = pd.DataFrame({'a': ['µasd', 'Ωœ∑´'], - 'b': ['øπ∆˚¬', 'œ∑´®']}) + 'b': ['øπ∆˚¬', 'œ∑´®']}) cls.data_types = list(cls.data.keys()) @classmethod diff --git a/pandas/io/tests/test_feather.py b/pandas/io/tests/test_feather.py index dcb057ec30004..218175e5ef527 100644 --- a/pandas/io/tests/test_feather.py +++ b/pandas/io/tests/test_feather.py @@ -18,7 +18,6 @@ class TestFeather(tm.TestCase): - _multiprocess_can_split_ = True def setUp(self): pass diff --git a/pandas/io/tests/test_gbq.py b/pandas/io/tests/test_gbq.py index ac481a44de5e8..0507f0d89661c 100644 --- a/pandas/io/tests/test_gbq.py +++ b/pandas/io/tests/test_gbq.py @@ -294,6 +294,7 @@ def test_get_application_default_credentials_returns_credentials(self): class TestGBQConnectorServiceAccountKeyPathIntegration(tm.TestCase): + def setUp(self): _setup_common() @@ -325,6 +326,7 @@ def test_should_be_able_to_get_results_from_query(self): class TestGBQConnectorServiceAccountKeyContentsIntegration(tm.TestCase): + def setUp(self): _setup_common() @@ -356,6 +358,7 @@ def test_should_be_able_to_get_results_from_query(self): class GBQUnitTests(tm.TestCase): + def setUp(self): _setup_common() diff --git a/pandas/io/tests/test_packers.py b/pandas/io/tests/test_packers.py index 6b368bb2bb5ce..8a0cfb92bd3c0 100644 --- a/pandas/io/tests/test_packers.py +++ b/pandas/io/tests/test_packers.py @@ -40,8 +40,6 @@ else: _ZLIB_INSTALLED = True -_multiprocess_can_split_ = False - def check_arbitrary(a, b): @@ -870,7 +868,7 @@ def read_msgpacks(self, version): for f in os.listdir(pth): # GH12142 0.17 files packed in P2 can't be read in P3 if (compat.PY3 and version.startswith('0.17.') and - f.split('.')[-4][-1] == '2'): + f.split('.')[-4][-1] == '2'): continue vf = os.path.join(pth, f) try: diff --git a/pandas/io/tests/test_pickle.py b/pandas/io/tests/test_pickle.py index b5c316b326b8d..73a9173e85906 100644 --- a/pandas/io/tests/test_pickle.py +++ b/pandas/io/tests/test_pickle.py @@ -30,7 +30,6 @@ class TestPickle(): http://stackoverflow.com/questions/6689537/ nose-test-generators-inside-class """ - _multiprocess_can_split_ = True def setUp(self): from pandas.io.tests.generate_legacy_storage_files import ( diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index f4f03856f94e2..501e744ad308c 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -50,7 +50,6 @@ _default_compressor = ('blosc' if LooseVersion(tables.__version__) >= '2.2' else 'zlib') -_multiprocess_can_split_ = False # testing on windows/py3 seems to fault # for using compression diff --git a/pandas/msgpack/exceptions.py b/pandas/msgpack/exceptions.py index 40f5a8af8f583..ae0f74a6700bd 100644 --- a/pandas/msgpack/exceptions.py +++ b/pandas/msgpack/exceptions.py @@ -15,6 +15,7 @@ class UnpackValueError(UnpackException, ValueError): class ExtraData(ValueError): + def __init__(self, unpacked, extra): self.unpacked = unpacked self.extra = extra diff --git a/pandas/sparse/array.py b/pandas/sparse/array.py index da13726e88a14..c65e0dd5c9f7b 100644 --- a/pandas/sparse/array.py +++ b/pandas/sparse/array.py @@ -239,7 +239,7 @@ def _simple_new(cls, data, sp_index, fill_value): fill_value = na_value_for_dtype(data.dtype) if (is_integer_dtype(data) and is_float(fill_value) and - sp_index.ngaps > 0): + sp_index.ngaps > 0): # if float fill_value is being included in dense repr, # convert values to float data = data.astype(float) @@ -405,7 +405,6 @@ def __iter__(self): yield self._get_val_at(i) def __getitem__(self, key): - """ """ diff --git a/pandas/sparse/series.py b/pandas/sparse/series.py index d6bc892921c42..2d3a9effe6939 100644 --- a/pandas/sparse/series.py +++ b/pandas/sparse/series.py @@ -847,6 +847,7 @@ def from_coo(cls, A, dense_index=False): # backwards compatiblity class SparseTimeSeries(SparseSeries): + def __init__(self, *args, **kwargs): # deprecation TimeSeries, #10890 warnings.warn("SparseTimeSeries is deprecated. Please use " diff --git a/pandas/sparse/tests/test_arithmetics.py b/pandas/sparse/tests/test_arithmetics.py index f24244b38c42b..eb926082a7b7c 100644 --- a/pandas/sparse/tests/test_arithmetics.py +++ b/pandas/sparse/tests/test_arithmetics.py @@ -5,8 +5,6 @@ class TestSparseArrayArithmetics(tm.TestCase): - _multiprocess_can_split_ = True - _base = np.array _klass = pd.SparseArray diff --git a/pandas/sparse/tests/test_array.py b/pandas/sparse/tests/test_array.py index 55f292a8a231a..70aaea5b5b1f0 100644 --- a/pandas/sparse/tests/test_array.py +++ b/pandas/sparse/tests/test_array.py @@ -14,7 +14,6 @@ class TestSparseArray(tm.TestCase): - _multiprocess_can_split_ = True def setUp(self): self.arr_data = np.array([nan, nan, 1, 2, 3, nan, 4, 5, nan, 6]) @@ -655,6 +654,7 @@ def test_fillna_overlap(self): class TestSparseArrayAnalytics(tm.TestCase): + def test_sum(self): data = np.arange(10).astype(float) out = SparseArray(data).sum() diff --git a/pandas/sparse/tests/test_combine_concat.py b/pandas/sparse/tests/test_combine_concat.py index 5240d592810ad..81655daec6164 100644 --- a/pandas/sparse/tests/test_combine_concat.py +++ b/pandas/sparse/tests/test_combine_concat.py @@ -7,8 +7,6 @@ class TestSparseSeriesConcat(tm.TestCase): - _multiprocess_can_split_ = True - def test_concat(self): val1 = np.array([1, 2, np.nan, np.nan, 0, np.nan]) val2 = np.array([3, np.nan, 4, 0, 0]) @@ -126,8 +124,6 @@ def test_concat_sparse_dense(self): class TestSparseDataFrameConcat(tm.TestCase): - _multiprocess_can_split_ = True - def setUp(self): self.dense1 = pd.DataFrame({'A': [0., 1., 2., np.nan], diff --git a/pandas/sparse/tests/test_format.py b/pandas/sparse/tests/test_format.py index 377eaa20565a2..0c0e773d19bb9 100644 --- a/pandas/sparse/tests/test_format.py +++ b/pandas/sparse/tests/test_format.py @@ -15,8 +15,6 @@ class TestSparseSeriesFormatting(tm.TestCase): - _multiprocess_can_split_ = True - @property def dtype_format_for_platform(self): return '' if use_32bit_repr else ', dtype=int32' diff --git a/pandas/sparse/tests/test_frame.py b/pandas/sparse/tests/test_frame.py index e26c0ed1afe58..e3b865492c043 100644 --- a/pandas/sparse/tests/test_frame.py +++ b/pandas/sparse/tests/test_frame.py @@ -22,7 +22,6 @@ class TestSparseDataFrame(tm.TestCase, SharedWithSparse): klass = SparseDataFrame - _multiprocess_can_split_ = True def setUp(self): self.data = {'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6], @@ -1150,6 +1149,7 @@ def test_comparison_op_scalar(self): class TestSparseDataFrameAnalytics(tm.TestCase): + def setUp(self): self.data = {'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6], 'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6], diff --git a/pandas/sparse/tests/test_groupby.py b/pandas/sparse/tests/test_groupby.py index 0cb33f4ea0a56..23bea94a2aef8 100644 --- a/pandas/sparse/tests/test_groupby.py +++ b/pandas/sparse/tests/test_groupby.py @@ -6,8 +6,6 @@ class TestSparseGroupBy(tm.TestCase): - _multiprocess_can_split_ = True - def setUp(self): self.dense = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], diff --git a/pandas/sparse/tests/test_indexing.py b/pandas/sparse/tests/test_indexing.py index a634c34139186..c400b68c8a7d8 100644 --- a/pandas/sparse/tests/test_indexing.py +++ b/pandas/sparse/tests/test_indexing.py @@ -8,8 +8,6 @@ class TestSparseSeriesIndexing(tm.TestCase): - _multiprocess_can_split_ = True - def setUp(self): self.orig = pd.Series([1, np.nan, np.nan, 3, np.nan]) self.sparse = self.orig.to_sparse() @@ -431,8 +429,6 @@ def tests_indexing_with_sparse(self): class TestSparseSeriesMultiIndexing(TestSparseSeriesIndexing): - _multiprocess_can_split_ = True - def setUp(self): # Mi with duplicated values idx = pd.MultiIndex.from_tuples([('A', 0), ('A', 1), ('B', 0), @@ -544,8 +540,6 @@ def test_loc_slice(self): class TestSparseDataFrameIndexing(tm.TestCase): - _multiprocess_can_split_ = True - def test_getitem(self): orig = pd.DataFrame([[1, np.nan, np.nan], [2, 3, np.nan], @@ -908,6 +902,7 @@ def test_reindex_fill_value(self): class TestMultitype(tm.TestCase): + def setUp(self): self.cols = ['string', 'int', 'float', 'object'] diff --git a/pandas/sparse/tests/test_libsparse.py b/pandas/sparse/tests/test_libsparse.py index b3aa3368e9455..491005db2ae79 100644 --- a/pandas/sparse/tests/test_libsparse.py +++ b/pandas/sparse/tests/test_libsparse.py @@ -241,8 +241,6 @@ def test_intersect_identical(self): class TestSparseIndexCommon(tm.TestCase): - _multiprocess_can_split_ = True - def test_int_internal(self): idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind='integer') self.assertIsInstance(idx, IntIndex) @@ -391,8 +389,6 @@ def _check(index): class TestBlockIndex(tm.TestCase): - _multiprocess_can_split_ = True - def test_block_internal(self): idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind='block') self.assertIsInstance(idx, BlockIndex) @@ -478,8 +474,6 @@ def test_to_block_index(self): class TestIntIndex(tm.TestCase): - _multiprocess_can_split_ = True - def test_int_internal(self): idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind='integer') self.assertIsInstance(idx, IntIndex) diff --git a/pandas/sparse/tests/test_list.py b/pandas/sparse/tests/test_list.py index 458681cdc1de0..8511cd5997368 100644 --- a/pandas/sparse/tests/test_list.py +++ b/pandas/sparse/tests/test_list.py @@ -10,8 +10,6 @@ class TestSparseList(unittest.TestCase): - _multiprocess_can_split_ = True - def setUp(self): self.na_data = np.array([nan, nan, 1, 2, 3, nan, 4, 5, nan, 6]) self.zero_data = np.array([0, 0, 1, 2, 3, 0, 4, 5, 0, 6]) diff --git a/pandas/sparse/tests/test_pivot.py b/pandas/sparse/tests/test_pivot.py index 482a99a96194f..4ff9f20093c67 100644 --- a/pandas/sparse/tests/test_pivot.py +++ b/pandas/sparse/tests/test_pivot.py @@ -5,8 +5,6 @@ class TestPivotTable(tm.TestCase): - _multiprocess_can_split_ = True - def setUp(self): self.dense = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], diff --git a/pandas/sparse/tests/test_series.py b/pandas/sparse/tests/test_series.py index b34f5dd2cee9f..db6ae14b096d3 100644 --- a/pandas/sparse/tests/test_series.py +++ b/pandas/sparse/tests/test_series.py @@ -56,7 +56,6 @@ def _test_data2_zero(): class TestSparseSeries(tm.TestCase, SharedWithSparse): - _multiprocess_can_split_ = True def setUp(self): arr, index = _test_data1() @@ -941,6 +940,7 @@ def test_combine_first(self): class TestSparseHandlingMultiIndexes(tm.TestCase): + def setUp(self): miindex = pd.MultiIndex.from_product( [["x", "y"], ["10", "20"]], names=['row-foo', 'row-bar']) diff --git a/pandas/stats/fama_macbeth.py b/pandas/stats/fama_macbeth.py index f7d50e8e72a5c..d564f9cb6c425 100644 --- a/pandas/stats/fama_macbeth.py +++ b/pandas/stats/fama_macbeth.py @@ -9,6 +9,7 @@ # flake8: noqa + def fama_macbeth(**kwargs): """Runs Fama-MacBeth regression. diff --git a/pandas/stats/ols.py b/pandas/stats/ols.py index b533d255bd196..96ec70d59488a 100644 --- a/pandas/stats/ols.py +++ b/pandas/stats/ols.py @@ -24,6 +24,7 @@ _FP_ERR = 1e-8 + class OLS(StringMixin): """ Runs a full sample ordinary least squares regression. diff --git a/pandas/stats/tests/test_ols.py b/pandas/stats/tests/test_ols.py index 09fa21d58ea9d..b90c51366c86f 100644 --- a/pandas/stats/tests/test_ols.py +++ b/pandas/stats/tests/test_ols.py @@ -60,8 +60,6 @@ def _compare_moving_ols(model1, model2): class TestOLS(BaseTest): - _multiprocess_can_split_ = True - # TODO: Add tests for OLS y predict # TODO: Right now we just check for consistency between full-sample and # rolling/expanding results of the panel OLS. We should also cross-check @@ -262,8 +260,6 @@ def test_ols_object_dtype(self): class TestOLSMisc(tm.TestCase): - _multiprocess_can_split_ = True - """ For test coverage with faux data """ @@ -511,8 +507,6 @@ def test_columns_tuples_summary(self): class TestPanelOLS(BaseTest): - _multiprocess_can_split_ = True - FIELDS = ['beta', 'df', 'df_model', 'df_resid', 'f_stat', 'p_value', 'r2', 'r2_adj', 'rmse', 'std_err', 't_stat', 'var_beta'] @@ -894,8 +888,6 @@ def _period_slice(panelModel, i): class TestOLSFilter(tm.TestCase): - _multiprocess_can_split_ = True - def setUp(self): date_index = date_range(datetime(2009, 12, 11), periods=3, freq=offsets.BDay()) diff --git a/pandas/tests/formats/test_format.py b/pandas/tests/formats/test_format.py index 7a2c5f3b7f7c1..a9553d9ea10cb 100644 --- a/pandas/tests/formats/test_format.py +++ b/pandas/tests/formats/test_format.py @@ -113,7 +113,6 @@ def has_expanded_repr(df): class TestDataFrameFormatting(tm.TestCase): - _multiprocess_can_split_ = True def setUp(self): self.warn_filters = warnings.filters @@ -762,14 +761,15 @@ def test_truncate_with_different_dtypes(self): # 11594 import datetime - s = Series([datetime.datetime(2012, 1, 1)]*10 + [datetime.datetime(1012,1,2)] + [datetime.datetime(2012, 1, 3)]*10) + s = Series([datetime.datetime(2012, 1, 1)] * 10 + + [datetime.datetime(1012, 1, 2)] + [datetime.datetime(2012, 1, 3)] * 10) with pd.option_context('display.max_rows', 8): result = str(s) self.assertTrue('object' in result) # 12045 - df = DataFrame({'text': ['some words'] + [None]*9}) + df = DataFrame({'text': ['some words'] + [None] * 9}) with pd.option_context('display.max_rows', 8, 'display.max_columns', 3): result = str(df) @@ -779,7 +779,8 @@ def test_truncate_with_different_dtypes(self): def test_datetimelike_frame(self): # GH 12211 - df = DataFrame({'date' : [pd.Timestamp('20130101').tz_localize('UTC')] + [pd.NaT]*5}) + df = DataFrame( + {'date': [pd.Timestamp('20130101').tz_localize('UTC')] + [pd.NaT] * 5}) with option_context("display.max_rows", 5): result = str(df) @@ -1219,8 +1220,8 @@ def test_to_html_multiindex_odd_even_truncate(self): mi = MultiIndex.from_product([[100, 200, 300], [10, 20, 30], [1, 2, 3, 4, 5, 6, 7]], - names=['a','b','c']) - df = DataFrame({'n' : range(len(mi))}, index = mi) + names=['a', 'b', 'c']) + df = DataFrame({'n': range(len(mi))}, index=mi) result = df.to_html(max_rows=60) expected = """\ @@ -3451,8 +3452,8 @@ def test_to_latex_with_formatters(self): 'float': [1.0, 2.0, 3.0], 'object': [(1, 2), True, False], 'datetime64': [datetime(2016, 1, 1), - datetime(2016, 2, 5), - datetime(2016, 3, 3)]}) + datetime(2016, 2, 5), + datetime(2016, 3, 3)]}) formatters = {'int': lambda x: '0x%x' % x, 'float': lambda x: '[% 4.1f]' % x, @@ -3896,7 +3897,7 @@ def test_to_csv_date_format(self): def test_to_csv_multi_index(self): # see gh-6618 - df = DataFrame([1], columns=pd.MultiIndex.from_arrays([[1],[2]])) + df = DataFrame([1], columns=pd.MultiIndex.from_arrays([[1], [2]])) exp = ",1\n,2\n0,1\n" self.assertEqual(df.to_csv(), exp) @@ -3904,8 +3905,8 @@ def test_to_csv_multi_index(self): exp = "1\n2\n1\n" self.assertEqual(df.to_csv(index=False), exp) - df = DataFrame([1], columns=pd.MultiIndex.from_arrays([[1],[2]]), - index=pd.MultiIndex.from_arrays([[1],[2]])) + df = DataFrame([1], columns=pd.MultiIndex.from_arrays([[1], [2]]), + index=pd.MultiIndex.from_arrays([[1], [2]])) exp = ",,1\n,,2\n1,2,1\n" self.assertEqual(df.to_csv(), exp) @@ -3913,7 +3914,8 @@ def test_to_csv_multi_index(self): exp = "1\n2\n1\n" self.assertEqual(df.to_csv(index=False), exp) - df = DataFrame([1], columns=pd.MultiIndex.from_arrays([['foo'],['bar']])) + df = DataFrame( + [1], columns=pd.MultiIndex.from_arrays([['foo'], ['bar']])) exp = ",foo\n,bar\n0,1\n" self.assertEqual(df.to_csv(), exp) @@ -3938,8 +3940,6 @@ def test_period(self): class TestSeriesFormatting(tm.TestCase): - _multiprocess_can_split_ = True - def setUp(self): self.ts = tm.makeTimeSeries() @@ -4452,7 +4452,6 @@ def test_to_string_header(self): class TestEngFormatter(tm.TestCase): - _multiprocess_can_split_ = True def test_eng_float_formatter(self): df = DataFrame({'A': [1.41, 141., 14100, 1410000.]}) @@ -4605,9 +4604,9 @@ def test_nan(self): result = formatter(np.nan) self.assertEqual(result, u('NaN')) - df = pd.DataFrame({'a':[1.5, 10.3, 20.5], - 'b':[50.3, 60.67, 70.12], - 'c':[100.2, 101.33, 120.33]}) + df = pd.DataFrame({'a': [1.5, 10.3, 20.5], + 'b': [50.3, 60.67, 70.12], + 'c': [100.2, 101.33, 120.33]}) pt = df.pivot_table(values='a', index='b', columns='c') fmt.set_eng_float_format(accuracy=1) result = pt.to_string() diff --git a/pandas/tests/formats/test_printing.py b/pandas/tests/formats/test_printing.py index d1eb1faecc401..1e6794c1c9c69 100644 --- a/pandas/tests/formats/test_printing.py +++ b/pandas/tests/formats/test_printing.py @@ -5,8 +5,6 @@ import pandas.util.testing as tm import pandas.core.config as cf -_multiprocess_can_split_ = True - def test_adjoin(): data = [['a', 'b', 'c'], ['dd', 'ee', 'ff'], ['ggg', 'hhh', 'iii']] diff --git a/pandas/tests/formats/test_style.py b/pandas/tests/formats/test_style.py index 2fec04b9c1aa3..eaa209178b2e9 100644 --- a/pandas/tests/formats/test_style.py +++ b/pandas/tests/formats/test_style.py @@ -660,7 +660,7 @@ def test_mi_sparse_disabled(self): with pd.option_context('display.multi_sparse', False): df = pd.DataFrame({'A': [1, 2]}, index=pd.MultiIndex.from_arrays([['a', 'a'], - [0, 1]])) + [0, 1]])) result = df.style._translate() body = result['body'] for row in body: diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index cab627dec63cb..e84bb6407fafc 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -22,8 +22,6 @@ class TestDataFrameAlterAxes(tm.TestCase, TestData): - _multiprocess_can_split_ = True - def test_set_index(self): idx = Index(np.arange(len(self.mixed_frame))) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 0dbb78ec89b2e..a55d2cfb2fb2b 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -25,8 +25,6 @@ class TestDataFrameAnalytics(tm.TestCase, TestData): - _multiprocess_can_split_ = True - # ---------------------------------------------------------------------= # Correlation and covariance diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index 19fa98afd2163..30fde4b5b78d8 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -19,8 +19,6 @@ class TestDataFrameApply(tm.TestCase, TestData): - _multiprocess_can_split_ = True - def test_apply(self): with np.errstate(all='ignore'): # ufunc diff --git a/pandas/tests/frame/test_asof.py b/pandas/tests/frame/test_asof.py index 323960d54a42c..8bb26d3d7474c 100644 --- a/pandas/tests/frame/test_asof.py +++ b/pandas/tests/frame/test_asof.py @@ -11,7 +11,6 @@ class TestFrameAsof(TestData, tm.TestCase): - _multiprocess_can_split_ = True def setUp(self): self.N = N = 50 diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index ff6215531fc64..839ceb5368240 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -26,8 +26,6 @@ class TestDataFrameSelectReindex(tm.TestCase, TestData): # These are specific reindex-based tests; other indexing tests should go in # test_indexing - _multiprocess_can_split_ = True - def test_drop_names(self): df = DataFrame([[1, 2, 3], [3, 4, 5], [5, 6, 7]], index=['a', 'b', 'c'], diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index 33550670720c3..7b64dea8c102d 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -29,8 +29,6 @@ class TestDataFrameBlockInternals(tm.TestCase, TestData): - _multiprocess_can_split_ = True - def test_cast_internals(self): casted = DataFrame(self.frame._data, dtype=int) expected = DataFrame(self.frame._series, dtype=int) diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py index 1167662b69375..eed4d6261d6e8 100644 --- a/pandas/tests/frame/test_combine_concat.py +++ b/pandas/tests/frame/test_combine_concat.py @@ -22,8 +22,6 @@ class TestDataFrameConcatCommon(tm.TestCase, TestData): - _multiprocess_can_split_ = True - def test_concat_multiple_frames_dtypes(self): # GH 2759 @@ -427,8 +425,6 @@ def test_concat_axis_parameter(self): class TestDataFrameCombineFirst(tm.TestCase, TestData): - _multiprocess_can_split_ = True - def test_combine_first_mixed(self): a = Series(['a', 'b'], index=lrange(2)) b = Series(lrange(2), index=lrange(2)) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 1676c57a274cd..66a235e1260bd 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -36,8 +36,6 @@ class TestDataFrameConstructors(tm.TestCase, TestData): - _multiprocess_can_split_ = True - def test_constructor(self): df = DataFrame() self.assertEqual(len(df.index), 0) @@ -1886,8 +1884,6 @@ def test_from_records_len0_with_columns(self): class TestDataFrameConstructorWithDatetimeTZ(tm.TestCase, TestData): - _multiprocess_can_split_ = True - def test_from_dict(self): # 8260 diff --git a/pandas/tests/frame/test_convert_to.py b/pandas/tests/frame/test_convert_to.py index 53083a602e183..1bc8313726d0c 100644 --- a/pandas/tests/frame/test_convert_to.py +++ b/pandas/tests/frame/test_convert_to.py @@ -16,8 +16,6 @@ class TestDataFrameConvertTo(tm.TestCase, TestData): - _multiprocess_can_split_ = True - def test_to_dict(self): test_data = { 'A': {'1': 1, '2': 2}, diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 798982bcbdedf..f7d2c1a654cd5 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -18,8 +18,6 @@ class TestDataFrameDataTypes(tm.TestCase, TestData): - _multiprocess_can_split_ = True - def test_concat_empty_dataframe_dtypes(self): df = DataFrame(columns=list("abc")) df['a'] = df['a'].astype(np.bool_) @@ -539,8 +537,6 @@ def test_arg_for_errors_in_astype(self): class TestDataFrameDatetimeWithTZ(tm.TestCase, TestData): - _multiprocess_can_split_ = True - def test_interleave(self): # interleave with object diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index f0e6ab4c17915..c06faa75ed346 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -37,8 +37,6 @@ class TestDataFrameIndexing(tm.TestCase, TestData): - _multiprocess_can_split_ = True - def test_getitem(self): # slicing sl = self.frame[:20] @@ -2841,8 +2839,6 @@ def test_type_error_multiindex(self): class TestDataFrameIndexingDatetimeWithTZ(tm.TestCase, TestData): - _multiprocess_can_split_ = True - def setUp(self): self.idx = Index(date_range('20130101', periods=3, tz='US/Eastern'), name='foo') @@ -2902,8 +2898,6 @@ def test_transpose(self): class TestDataFrameIndexingUInt64(tm.TestCase, TestData): - _multiprocess_can_split_ = True - def setUp(self): self.ir = Index(np.arange(3), dtype=np.uint64) self.idx = Index([2**63, 2**63 + 5, 2**63 + 10], name='foo') diff --git a/pandas/tests/frame/test_misc_api.py b/pandas/tests/frame/test_misc_api.py index 2fc14d9e4d123..674202980807a 100644 --- a/pandas/tests/frame/test_misc_api.py +++ b/pandas/tests/frame/test_misc_api.py @@ -27,8 +27,6 @@ class SharedWithSparse(object): - _multiprocess_can_split_ = True - def test_copy_index_name_checking(self): # don't want to be able to modify the index stored elsewhere after # making a copy @@ -159,8 +157,6 @@ class TestDataFrameMisc(tm.TestCase, SharedWithSparse, TestData): klass = DataFrame - _multiprocess_can_split_ = True - def test_get_axis(self): f = self.frame self.assertEqual(f._get_axis_number(0), 0) diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py index 8c25f71c00684..ef800f0dface3 100644 --- a/pandas/tests/frame/test_missing.py +++ b/pandas/tests/frame/test_missing.py @@ -29,8 +29,6 @@ def _skip_if_no_pchip(): class TestDataFrameMissingData(tm.TestCase, TestData): - _multiprocess_can_split_ = True - def test_dropEmptyRows(self): N = len(self.frame.index) mat = random.randn(N) diff --git a/pandas/tests/frame/test_mutate_columns.py b/pandas/tests/frame/test_mutate_columns.py index 5beab1565e538..6b4c56747c981 100644 --- a/pandas/tests/frame/test_mutate_columns.py +++ b/pandas/tests/frame/test_mutate_columns.py @@ -21,8 +21,6 @@ class TestDataFrameMutateColumns(tm.TestCase, TestData): - _multiprocess_can_split_ = True - def test_assign(self): df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}) original = df.copy() diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py index 835c18ffc6081..4ad88a12a2625 100644 --- a/pandas/tests/frame/test_nonunique_indexes.py +++ b/pandas/tests/frame/test_nonunique_indexes.py @@ -19,8 +19,6 @@ class TestDataFrameNonuniqueIndexes(tm.TestCase, TestData): - _multiprocess_can_split_ = True - def test_column_dups_operations(self): def check(result, expected=None): diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index 15f98abe1445d..ec73689088035 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -31,8 +31,6 @@ class TestDataFrameOperators(tm.TestCase, TestData): - _multiprocess_can_split_ = True - def test_operators(self): garbage = random.random(4) colSeries = Series(garbage, index=np.array(self.frame.columns)) diff --git a/pandas/tests/frame/test_quantile.py b/pandas/tests/frame/test_quantile.py index 22414a6ba8a53..400ead788aa7c 100644 --- a/pandas/tests/frame/test_quantile.py +++ b/pandas/tests/frame/test_quantile.py @@ -21,8 +21,6 @@ class TestDataFrameQuantile(tm.TestCase, TestData): - _multiprocess_can_split_ = True - def test_quantile(self): from numpy import percentile diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index a9a90a6f5cd40..aed02b7323f85 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -90,8 +90,6 @@ def test_query_numexpr(self): class TestDataFrameEval(tm.TestCase, TestData): - _multiprocess_can_split_ = True - def test_ops(self): # tst ops and reversed ops in evaluation @@ -168,8 +166,6 @@ def test_eval_resolvers_as_list(self): class TestDataFrameQueryWithMultiIndex(tm.TestCase): - _multiprocess_can_split_ = True - def check_query_with_named_multiindex(self, parser, engine): tm.skip_if_no_ne(engine) a = np.random.choice(['red', 'green'], size=10) diff --git a/pandas/tests/frame/test_replace.py b/pandas/tests/frame/test_replace.py index f46215105b375..8b50036cd50f8 100644 --- a/pandas/tests/frame/test_replace.py +++ b/pandas/tests/frame/test_replace.py @@ -23,8 +23,6 @@ class TestDataFrameReplace(tm.TestCase, TestData): - _multiprocess_can_split_ = True - def test_replace_inplace(self): self.tsframe['A'][:5] = nan self.tsframe['A'][-5:] = nan diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index 12cd62f8b4cc0..2df297d03bcdf 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -25,8 +25,6 @@ class TestDataFrameReprInfoEtc(tm.TestCase, TestData): - _multiprocess_can_split_ = True - def test_repr_empty(self): # empty foo = repr(self.empty) # noqa diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index 705270b695b77..1890b33e3dbaa 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -25,8 +25,6 @@ class TestDataFrameReshape(tm.TestCase, TestData): - _multiprocess_can_split_ = True - def test_pivot(self): data = { 'index': ['A', 'B', 'C', 'C', 'B', 'A'], diff --git a/pandas/tests/frame/test_sorting.py b/pandas/tests/frame/test_sorting.py index bbd8dd9b48b5c..7779afdc47b48 100644 --- a/pandas/tests/frame/test_sorting.py +++ b/pandas/tests/frame/test_sorting.py @@ -19,8 +19,6 @@ class TestDataFrameSorting(tm.TestCase, TestData): - _multiprocess_can_split_ = True - def test_sort_index(self): # GH13496 diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index 8bd6d3ba54371..9052a16bf973c 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -13,8 +13,6 @@ class TestDataFrameSubclassing(tm.TestCase, TestData): - _multiprocess_can_split_ = True - def test_frame_subclassing_and_slicing(self): # Subclass frame and ensure it returns the right class on slicing it # In reference to PR 9632 diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index 55848847f2266..862f76b4ecc05 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -27,8 +27,6 @@ class TestDataFrameTimeSeriesMethods(tm.TestCase, TestData): - _multiprocess_can_split_ = True - def test_diff(self): the_diff = self.tsframe.diff(1) @@ -539,13 +537,13 @@ def test_datetime_assignment_with_NaT_and_diff_time_units(self): result = pd.Series(data_ns).to_frame() result['new'] = data_ns expected = pd.DataFrame({0: [1, None], - 'new': [1, None]}, dtype='datetime64[ns]') + 'new': [1, None]}, dtype='datetime64[ns]') tm.assert_frame_equal(result, expected) # OutOfBoundsDatetime error shouldn't occur data_s = np.array([1, 'nat'], dtype='datetime64[s]') result['new'] = data_s expected = pd.DataFrame({0: [1, None], - 'new': [1e9, None]}, dtype='datetime64[ns]') + 'new': [1e9, None]}, dtype='datetime64[ns]') tm.assert_frame_equal(result, expected) def test_frame_to_period(self): diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index 5c47b0357b4f6..471fc536a90f6 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -31,8 +31,6 @@ class TestDataFrameToCSV(tm.TestCase, TestData): - _multiprocess_can_split_ = True - def test_to_csv_from_csv1(self): with ensure_clean('__tmp_to_csv_from_csv1__') as path: diff --git a/pandas/tests/groupby/test_aggregate.py b/pandas/tests/groupby/test_aggregate.py index 5f680a6876873..00ddd293f6014 100644 --- a/pandas/tests/groupby/test_aggregate.py +++ b/pandas/tests/groupby/test_aggregate.py @@ -27,8 +27,6 @@ class TestGroupByAggregate(tm.TestCase): - _multiprocess_can_split_ = True - def setUp(self): self.ts = tm.makeTimeSeries() diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 82ec1832be961..605b327208a03 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -23,8 +23,6 @@ class TestGroupByCategorical(tm.TestCase): - _multiprocess_can_split_ = True - def setUp(self): self.ts = tm.makeTimeSeries() diff --git a/pandas/tests/groupby/test_filters.py b/pandas/tests/groupby/test_filters.py index 663fbd04e7e5a..1640858802047 100644 --- a/pandas/tests/groupby/test_filters.py +++ b/pandas/tests/groupby/test_filters.py @@ -24,8 +24,6 @@ class TestGroupByFilter(tm.TestCase): - _multiprocess_can_split_ = True - def setUp(self): self.ts = tm.makeTimeSeries() diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 01c81bd7904bd..df4707fcef3f0 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -36,8 +36,6 @@ class TestGroupBy(tm.TestCase): - _multiprocess_can_split_ = True - def setUp(self): self.ts = tm.makeTimeSeries() @@ -5908,8 +5906,8 @@ def test_group_shift_with_null_key(self): g = df.groupby(["A", "B"]) expected = DataFrame([(i + 12 if i % 3 and i < n_rows - 12 - else np.nan) - for i in range(n_rows)], dtype=float, + else np.nan) + for i in range(n_rows)], dtype=float, columns=["Z"], index=None) result = g.shift(-1) diff --git a/pandas/tests/indexes/datetimes/test_astype.py b/pandas/tests/indexes/datetimes/test_astype.py index edb044a3cb2d7..c9a695ee8db3b 100644 --- a/pandas/tests/indexes/datetimes/test_astype.py +++ b/pandas/tests/indexes/datetimes/test_astype.py @@ -8,7 +8,6 @@ class TestDatetimeIndex(tm.TestCase): - _multiprocess_can_split_ = True def test_astype(self): # GH 13149, GH 13209 @@ -185,7 +184,6 @@ def _check_rng(rng): class TestToPeriod(tm.TestCase): - _multiprocess_can_split_ = True def setUp(self): data = [Timestamp('2007-01-01 10:11:12.123456Z'), diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index e54ebe3d93bc6..772d76305cff2 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -10,7 +10,6 @@ class TestDatetimeIndex(tm.TestCase): - _multiprocess_can_split_ = True def test_construction_with_alt(self): @@ -428,7 +427,6 @@ def test_000constructor_resolution(self): class TestTimeSeries(tm.TestCase): - _multiprocess_can_split_ = True def test_dti_constructor_preserve_dti_freq(self): rng = date_range('1/1/2000', '1/2/2000', freq='5min') diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index b2161aa5c75c6..9d5f397329c76 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -9,7 +9,6 @@ class TestTimeSeries(TestData, tm.TestCase): - _multiprocess_can_split_ = True def test_date_range_gen_error(self): rng = date_range('1/1/2000 00:00', '1/1/2000 00:18', freq='5min') diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index 628cb9df94e39..2c87c48bcda11 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -14,7 +14,6 @@ class TestDatetimeIndex(tm.TestCase): - _multiprocess_can_split_ = True def test_get_loc(self): idx = pd.date_range('2000-01-01', periods=3) diff --git a/pandas/tests/indexes/datetimes/test_datetimelike.py b/pandas/tests/indexes/datetimes/test_datetimelike.py index eea08febc86e6..2b254bc8be931 100644 --- a/pandas/tests/indexes/datetimes/test_datetimelike.py +++ b/pandas/tests/indexes/datetimes/test_datetimelike.py @@ -10,7 +10,6 @@ class TestDatetimeIndex(DatetimeLike, tm.TestCase): _holder = DatetimeIndex - _multiprocess_can_split_ = True def setUp(self): self.indices = dict(index=tm.makeDateIndex(10)) diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py index 5b6bcffe71856..23271a8d45499 100644 --- a/pandas/tests/indexes/datetimes/test_indexing.py +++ b/pandas/tests/indexes/datetimes/test_indexing.py @@ -7,7 +7,6 @@ class TestDatetimeIndex(tm.TestCase): - _multiprocess_can_split_ = True def test_where_other(self): diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py index dda2785d2b0ae..6b0191edbda5a 100644 --- a/pandas/tests/indexes/datetimes/test_misc.py +++ b/pandas/tests/indexes/datetimes/test_misc.py @@ -53,7 +53,6 @@ def test_second(self): class TestTimeSeries(tm.TestCase): - _multiprocess_can_split_ = True def test_pass_datetimeindex_to_index(self): # Bugs in #1396 diff --git a/pandas/tests/indexes/datetimes/test_missing.py b/pandas/tests/indexes/datetimes/test_missing.py index 5c408d5300cdc..8f3752227b6d0 100644 --- a/pandas/tests/indexes/datetimes/test_missing.py +++ b/pandas/tests/indexes/datetimes/test_missing.py @@ -3,7 +3,6 @@ class TestDatetimeIndex(tm.TestCase): - _multiprocess_can_split_ = True def test_fillna_datetime64(self): # GH 11343 diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index c25cd6a3fa90e..a46980a0f742a 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -955,7 +955,6 @@ def test_second(self): class TestDatetimeIndex(tm.TestCase): - _multiprocess_can_split_ = True # GH 10699 def test_datetime64_with_DateOffset(self): diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index 229ae803aa2ff..7777de869bb20 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -9,7 +9,6 @@ class TestDatetimeIndex(tm.TestCase): - _multiprocess_can_split_ = True def test_union(self): i1 = Int64Index(np.arange(0, 20, 2)) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 42d135f634298..841d0be605058 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -168,7 +168,6 @@ def test_to_datetime_format_weeks(self): class TestToDatetime(tm.TestCase): - _multiprocess_can_split_ = True def test_to_datetime_dt64s(self): in_bound_dts = [ @@ -989,6 +988,7 @@ def test_to_datetime_iso8601_noleading_0s(self): class TestDaysInMonth(tm.TestCase): # tests for issue #10154 + def test_day_not_in_month_coerce(self): self.assertTrue(isnull(to_datetime('2015-02-29', errors='coerce'))) self.assertTrue(isnull(to_datetime('2015-02-29', format="%Y-%m-%d", diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index c574a4a1f01a7..2f5b98d145e57 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -31,7 +31,6 @@ class TestIndex(Base, tm.TestCase): _holder = Index - _multiprocess_can_split_ = True def setUp(self): self.indices = dict(unicodeIndex=tm.makeUnicodeIndex(100), @@ -1795,7 +1794,6 @@ class TestMixedIntIndex(Base, tm.TestCase): # (GH 13514) _holder = Index - _multiprocess_can_split_ = True def setUp(self): self.indices = dict(mixedIndex=Index([0, 'a', 1, 'b', 2, 'c'])) @@ -1993,7 +1991,7 @@ def test_dropna(self): idx = pd.TimedeltaIndex(['1 days', '2 days', '3 days']) tm.assert_index_equal(idx.dropna(), idx) nanidx = pd.TimedeltaIndex([pd.NaT, '1 days', '2 days', - '3 days', pd.NaT]) + '3 days', pd.NaT]) tm.assert_index_equal(nanidx.dropna(), idx) idx = pd.PeriodIndex(['2012-02', '2012-04', '2012-05'], freq='M') diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index 708f424d9bad1..6b6885c082533 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -225,6 +225,7 @@ def test_map(self): # change categories dtype ci = pd.CategoricalIndex(list('ABABC'), categories=list('BAC'), ordered=False) + def f(x): return {'A': 10, 'B': 20, 'C': 30}.get(x) @@ -360,7 +361,8 @@ def test_reindexing(self): expected = oidx.get_indexer_non_unique(finder)[0] actual = ci.get_indexer(finder) - tm.assert_numpy_array_equal(expected.values, actual, check_dtype=False) + tm.assert_numpy_array_equal( + expected.values, actual, check_dtype=False) def test_reindex_dtype(self): c = CategoricalIndex(['a', 'b', 'c', 'a']) @@ -519,7 +521,7 @@ def test_ensure_copied_data(self): # GH12309 # Must be tested separately from other indexes because # self.value is not an ndarray - _base = lambda ar : ar if ar.base is None else ar.base + _base = lambda ar: ar if ar.base is None else ar.base for index in self.indices.values(): result = CategoricalIndex(index.values, copy=True) tm.assert_index_equal(index, result) diff --git a/pandas/tests/indexes/test_datetimelike.py b/pandas/tests/indexes/test_datetimelike.py index e5a4ced4ced4d..b212a7b75904c 100644 --- a/pandas/tests/indexes/test_datetimelike.py +++ b/pandas/tests/indexes/test_datetimelike.py @@ -16,7 +16,6 @@ class TestPeriodIndex(DatetimeLike, tm.TestCase): _holder = PeriodIndex - _multiprocess_can_split_ = True def setUp(self): self.indices = dict(index=tm.makePeriodIndex(10)) @@ -240,7 +239,6 @@ def test_difference_freq(self): class TestTimedeltaIndex(DatetimeLike, tm.TestCase): _holder = TimedeltaIndex - _multiprocess_can_split_ = True def setUp(self): self.indices = dict(index=tm.makeTimedeltaIndex(10)) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 7d9ceb526b912..365236f72e80e 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -30,7 +30,6 @@ class TestMultiIndex(Base, tm.TestCase): _holder = MultiIndex - _multiprocess_can_split_ = True _compat_props = ['shape', 'ndim', 'size', 'itemsize'] def setUp(self): @@ -900,11 +899,11 @@ def test_append_mixed_dtypes(self): res = mi.append(mi) exp = MultiIndex.from_arrays([[1, 2, 3, 1, 2, 3], - [1.1, np.nan, 3.3, 1.1, np.nan, 3.3], - ['a', 'b', 'c', 'a', 'b', 'c'], - dti.append(dti), - dti_tz.append(dti_tz), - pi.append(pi)]) + [1.1, np.nan, 3.3, 1.1, np.nan, 3.3], + ['a', 'b', 'c', 'a', 'b', 'c'], + dti.append(dti), + dti_tz.append(dti_tz), + pi.append(pi)]) tm.assert_index_equal(res, exp) other = MultiIndex.from_arrays([['x', 'y', 'z'], ['x', 'y', 'z'], @@ -913,11 +912,11 @@ def test_append_mixed_dtypes(self): res = mi.append(other) exp = MultiIndex.from_arrays([[1, 2, 3, 'x', 'y', 'z'], - [1.1, np.nan, 3.3, 'x', 'y', 'z'], - ['a', 'b', 'c', 'x', 'y', 'z'], - dti.append(pd.Index(['x', 'y', 'z'])), - dti_tz.append(pd.Index(['x', 'y', 'z'])), - pi.append(pd.Index(['x', 'y', 'z']))]) + [1.1, np.nan, 3.3, 'x', 'y', 'z'], + ['a', 'b', 'c', 'x', 'y', 'z'], + dti.append(pd.Index(['x', 'y', 'z'])), + dti_tz.append(pd.Index(['x', 'y', 'z'])), + pi.append(pd.Index(['x', 'y', 'z']))]) tm.assert_index_equal(res, exp) def test_get_level_values(self): diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 4dab7ae76a011..1bf9a10628542 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -176,7 +176,6 @@ def test_modulo(self): class TestFloat64Index(Numeric, tm.TestCase): _holder = Float64Index - _multiprocess_can_split_ = True def setUp(self): self.indices = dict(mixed=Float64Index([1.5, 2, 3, 4, 5]), @@ -624,7 +623,6 @@ def test_ufunc_coercions(self): class TestInt64Index(NumericInt, tm.TestCase): _dtype = 'int64' _holder = Int64Index - _multiprocess_can_split_ = True def setUp(self): self.indices = dict(index=Int64Index(np.arange(0, 20, 2))) @@ -895,7 +893,6 @@ class TestUInt64Index(NumericInt, tm.TestCase): _dtype = 'uint64' _holder = UInt64Index - _multiprocess_can_split_ = True def setUp(self): self.indices = dict(index=UInt64Index([2**63, 2**63 + 10, 2**63 + 15, diff --git a/pandas/tests/indexes/test_timedelta.py b/pandas/tests/indexes/test_timedelta.py index be01ad03a0660..e6071b8c4fa06 100644 --- a/pandas/tests/indexes/test_timedelta.py +++ b/pandas/tests/indexes/test_timedelta.py @@ -34,7 +34,6 @@ def test_timedelta(self): class TestTimeSeries(tm.TestCase): - _multiprocess_can_split_ = True def test_series_box_timedelta(self): rng = timedelta_range('1 day 1 s', periods=5, freq='h') diff --git a/pandas/tests/indexing/test_callable.py b/pandas/tests/indexing/test_callable.py index bcadc41b13370..1d70205076b86 100644 --- a/pandas/tests/indexing/test_callable.py +++ b/pandas/tests/indexing/test_callable.py @@ -8,8 +8,6 @@ class TestIndexingCallable(tm.TestCase): - _multiprocess_can_split_ = True - def test_frame_loc_ix_callable(self): # GH 11485 df = pd.DataFrame({'A': [1, 2, 3, 4], 'B': list('aabb'), diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py index 0cfa7258461f1..b9a746cd25c7a 100644 --- a/pandas/tests/indexing/test_coercion.py +++ b/pandas/tests/indexing/test_coercion.py @@ -15,8 +15,6 @@ class CoercionBase(object): - _multiprocess_can_split_ = True - klasses = ['index', 'series'] dtypes = ['object', 'int64', 'float64', 'complex128', 'bool', 'datetime64', 'datetime64tz', 'timedelta64', 'period'] @@ -1187,7 +1185,7 @@ def _assert_replace_conversion(self, from_key, to_key, how): to_key in ('bool')) or # TODO_GH12747 The result must be int? - (from_key == 'bool' and to_key == 'int64')): + (from_key == 'bool' and to_key == 'int64')): # buggy on 32-bit if tm.is_platform_32bit(): diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index a9dfcf2672357..b06b1067b7c6b 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -99,8 +99,6 @@ def _mklbl(prefix, n): class TestIndexing(tm.TestCase): - _multiprocess_can_split_ = True - _objs = set(['series', 'frame', 'panel']) _typs = set(['ints', 'uints', 'labels', 'mixed', 'ts', 'floats', 'empty', 'ts_rev']) diff --git a/pandas/tests/indexing/test_indexing_slow.py b/pandas/tests/indexing/test_indexing_slow.py index 5d563e20087b9..42b50e37f0492 100644 --- a/pandas/tests/indexing/test_indexing_slow.py +++ b/pandas/tests/indexing/test_indexing_slow.py @@ -10,8 +10,6 @@ class TestIndexingSlow(tm.TestCase): - _multiprocess_can_split_ = True - @tm.slow def test_multiindex_get_loc(self): # GH7724, GH2646 diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index c92287b2bdc42..11f00386ec592 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -18,6 +18,7 @@ @tm.mplskip class TestSeriesPlots(TestPlotBase): + def setUp(self): TestPlotBase.setUp(self) import matplotlib as mpl @@ -49,6 +50,7 @@ def test_bootstrap_plot(self): @tm.mplskip class TestDataFramePlots(TestPlotBase): + @slow def test_scatter_plot_legacy(self): tm._skip_if_no_scipy() diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py index f686f1aa6dc47..0cef27d2e41fc 100644 --- a/pandas/tests/scalar/test_timestamp.py +++ b/pandas/tests/scalar/test_timestamp.py @@ -1149,6 +1149,7 @@ def test_round_nat(self): class TestTimestampNsOperations(tm.TestCase): + def setUp(self): self.timestamp = Timestamp(datetime.utcnow()) @@ -1324,6 +1325,7 @@ def test_nat_arithmetic_index(self): class TestTimestampOps(tm.TestCase): + def test_timestamp_and_datetime(self): self.assertEqual((Timestamp(datetime( 2013, 10, 13)) - datetime(2013, 10, 12)).days, 1) @@ -1404,6 +1406,7 @@ def test_resolution(self): class TestTimestampToJulianDate(tm.TestCase): + def test_compare_1700(self): r = Timestamp('1700-06-23').to_julian_date() self.assertEqual(r, 2342145.5) @@ -1426,7 +1429,6 @@ def test_compare_hour13(self): class TestTimeSeries(tm.TestCase): - _multiprocess_can_split_ = True def test_timestamp_to_datetime(self): tm._skip_if_no_pytz() diff --git a/pandas/tests/series/test_alter_axes.py b/pandas/tests/series/test_alter_axes.py index 2ddfa27eea377..6473dbeeaa1bc 100644 --- a/pandas/tests/series/test_alter_axes.py +++ b/pandas/tests/series/test_alter_axes.py @@ -18,8 +18,6 @@ class TestSeriesAlterAxes(TestData, tm.TestCase): - _multiprocess_can_split_ = True - def test_setindex(self): # wrong type series = self.series.copy() diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 07e1be609670f..52b85c89a7009 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -30,8 +30,6 @@ class TestSeriesAnalytics(TestData, tm.TestCase): - _multiprocess_can_split_ = True - def test_sum_zero(self): arr = np.array([]) self.assertEqual(nanops.nansum(arr), 0) diff --git a/pandas/tests/series/test_apply.py b/pandas/tests/series/test_apply.py index ec7ffde344d31..16d1466bb90fe 100644 --- a/pandas/tests/series/test_apply.py +++ b/pandas/tests/series/test_apply.py @@ -15,8 +15,6 @@ class TestSeriesApply(TestData, tm.TestCase): - _multiprocess_can_split_ = True - def test_apply(self): with np.errstate(all='ignore'): assert_series_equal(self.ts.apply(np.sqrt), np.sqrt(self.ts)) @@ -141,8 +139,6 @@ def f(x): class TestSeriesMap(TestData, tm.TestCase): - _multiprocess_can_split_ = True - def test_map(self): index, data = tm.getMixedTypeDict() diff --git a/pandas/tests/series/test_asof.py b/pandas/tests/series/test_asof.py index db306d2a742c1..d2fd8858e7647 100644 --- a/pandas/tests/series/test_asof.py +++ b/pandas/tests/series/test_asof.py @@ -10,7 +10,6 @@ class TestSeriesAsof(TestData, tm.TestCase): - _multiprocess_can_split_ = True def test_basic(self): diff --git a/pandas/tests/series/test_combine_concat.py b/pandas/tests/series/test_combine_concat.py index 7bcd1763537dc..d4e5d36c15c68 100644 --- a/pandas/tests/series/test_combine_concat.py +++ b/pandas/tests/series/test_combine_concat.py @@ -18,8 +18,6 @@ class TestSeriesCombine(TestData, tm.TestCase): - _multiprocess_can_split_ = True - def test_append(self): appendedSeries = self.series.append(self.objSeries) for idx, value in compat.iteritems(appendedSeries): @@ -222,8 +220,6 @@ def test_combine_first_dt64(self): class TestTimeseries(tm.TestCase): - _multiprocess_can_split_ = True - def test_append_concat(self): rng = date_range('5/8/2012 1:45', periods=10, freq='5T') ts = Series(np.random.randn(len(rng)), rng) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 777b188b8fdd9..aef4c9269bc62 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -26,8 +26,6 @@ class TestSeriesConstructors(TestData, tm.TestCase): - _multiprocess_can_split_ = True - def test_scalar_conversion(self): # Pass in scalar is disabled diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py index b9f999a6c6ffe..4c697c7e52bb8 100644 --- a/pandas/tests/series/test_datetime_values.py +++ b/pandas/tests/series/test_datetime_values.py @@ -22,8 +22,6 @@ class TestSeriesDatetimeValues(TestData, tm.TestCase): - _multiprocess_can_split_ = True - def test_dt_namespace_accessor(self): # GH 7207, 11128 @@ -168,9 +166,9 @@ def compare(s, name): cases = [Series(timedelta_range('1 day', periods=5), index=list('abcde'), name='xxx'), Series(timedelta_range('1 day 01:23:45', periods=5, - freq='s'), name='xxx'), + freq='s'), name='xxx'), Series(timedelta_range('2 days 01:23:45.012345', periods=5, - freq='ms'), name='xxx')] + freq='ms'), name='xxx')] for s in cases: for prop in ok_for_td: # we test freq below diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index 127a410f66fdb..13375ab886d8d 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -20,8 +20,6 @@ class TestSeriesDtypes(TestData, tm.TestCase): - _multiprocess_can_split_ = True - def test_astype(self): s = Series(np.random.randn(5), name='foo') diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index e0d83d6eeadac..a20cb8324d2a3 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -31,8 +31,6 @@ class TestSeriesIndexing(TestData, tm.TestCase): - _multiprocess_can_split_ = True - def test_get(self): # GH 6383 @@ -2216,7 +2214,6 @@ def test_setitem_slice_into_readonly_backing_data(self): class TestTimeSeriesDuplicates(tm.TestCase): - _multiprocess_can_split_ = True def setUp(self): dates = [datetime(2000, 1, 2), datetime(2000, 1, 2), @@ -2603,6 +2600,7 @@ def test_frame_datetime64_duplicated(self): class TestNatIndexing(tm.TestCase): + def setUp(self): self.series = Series(date_range('1/1/2000', periods=10)) diff --git a/pandas/tests/series/test_internals.py b/pandas/tests/series/test_internals.py index e3a0e056f4da1..a3b13ba9b993a 100644 --- a/pandas/tests/series/test_internals.py +++ b/pandas/tests/series/test_internals.py @@ -16,8 +16,6 @@ class TestSeriesInternals(tm.TestCase): - _multiprocess_can_split_ = True - def test_convert_objects(self): s = Series([1., 2, 3], index=['a', 'b', 'c']) diff --git a/pandas/tests/series/test_io.py b/pandas/tests/series/test_io.py index 48528dc54adbd..d514fbfc142f0 100644 --- a/pandas/tests/series/test_io.py +++ b/pandas/tests/series/test_io.py @@ -18,8 +18,6 @@ class TestSeriesToCSV(TestData, tm.TestCase): - _multiprocess_can_split_ = True - def test_from_csv(self): with ensure_clean() as path: @@ -112,8 +110,6 @@ def test_to_csv_path_is_none(self): class TestSeriesIO(TestData, tm.TestCase): - _multiprocess_can_split_ = True - def test_to_frame(self): self.ts.name = None rs = self.ts.to_frame() @@ -174,8 +170,6 @@ class SubclassedFrame(DataFrame): class TestSeriesToList(TestData, tm.TestCase): - _multiprocess_can_split_ = True - def test_tolist(self): rs = self.ts.tolist() xp = self.ts.values.tolist() diff --git a/pandas/tests/series/test_misc_api.py b/pandas/tests/series/test_misc_api.py index b1b06cc7be8a4..2facbaf1fe31e 100644 --- a/pandas/tests/series/test_misc_api.py +++ b/pandas/tests/series/test_misc_api.py @@ -118,8 +118,6 @@ def test_to_sparse_pass_name(self): class TestSeriesMisc(TestData, SharedWithSparse, tm.TestCase): - _multiprocess_can_split_ = True - def test_tab_completion(self): # GH 9910 s = Series(list('abcd')) diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 6821a8b9f4221..702fa2acb5106 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -41,8 +41,6 @@ def _simple_ts(start, end, freq='D'): class TestSeriesMissingData(TestData, tm.TestCase): - _multiprocess_can_split_ = True - def test_timedelta_fillna(self): # GH 3371 s = Series([Timestamp('20130101'), Timestamp('20130101'), Timestamp( diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 7b1201b971c71..3d609dec7958a 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -28,8 +28,6 @@ class TestSeriesOperators(TestData, tm.TestCase): - _multiprocess_can_split_ = True - def test_series_comparison_scalars(self): series = Series(date_range('1/1/2000', periods=10)) diff --git a/pandas/tests/series/test_replace.py b/pandas/tests/series/test_replace.py index aa16f2cca9475..7fe31bab87537 100644 --- a/pandas/tests/series/test_replace.py +++ b/pandas/tests/series/test_replace.py @@ -11,8 +11,6 @@ class TestSeriesReplace(TestData, tm.TestCase): - _multiprocess_can_split_ = True - def test_replace(self): N = 100 ser = pd.Series(np.random.randn(N)) diff --git a/pandas/tests/series/test_repr.py b/pandas/tests/series/test_repr.py index af52f6e712e61..99a406a71b12b 100644 --- a/pandas/tests/series/test_repr.py +++ b/pandas/tests/series/test_repr.py @@ -18,8 +18,6 @@ class TestSeriesRepr(TestData, tm.TestCase): - _multiprocess_can_split_ = True - def test_multilevel_name_print(self): index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', 'three']], diff --git a/pandas/tests/series/test_sorting.py b/pandas/tests/series/test_sorting.py index fb3817eb84acd..db506f12a2293 100644 --- a/pandas/tests/series/test_sorting.py +++ b/pandas/tests/series/test_sorting.py @@ -13,8 +13,6 @@ class TestSeriesSorting(TestData, tm.TestCase): - _multiprocess_can_split_ = True - def test_sort(self): ts = self.ts.copy() diff --git a/pandas/tests/series/test_subclass.py b/pandas/tests/series/test_subclass.py index 5bcf258020349..3b1b8aca426e1 100644 --- a/pandas/tests/series/test_subclass.py +++ b/pandas/tests/series/test_subclass.py @@ -8,8 +8,6 @@ class TestSeriesSubclassing(tm.TestCase): - _multiprocess_can_split_ = True - def test_indexing_sliced(self): s = tm.SubclassedSeries([1, 2, 3, 4], index=list('abcd')) res = s.loc[['a', 'b']] @@ -37,8 +35,6 @@ def test_to_frame(self): class TestSparseSeriesSubclassing(tm.TestCase): - _multiprocess_can_split_ = True - def test_subclass_sparse_slice(self): # int64 s = tm.SubclassedSparseSeries([1, 2, 3, 4, 5]) diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index bd346fb9bb0c8..e0db813e60c14 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -32,7 +32,6 @@ def assert_range_equal(left, right): class TestTimeSeries(TestData, tm.TestCase): - _multiprocess_can_split_ = True def test_shift(self): shifted = self.ts.shift(1) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 40b277f3f1f8a..fab04f7fa4bf2 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -20,7 +20,6 @@ class TestMatch(tm.TestCase): - _multiprocess_can_split_ = True def test_ints(self): values = np.array([0, 2, 1]) @@ -57,7 +56,6 @@ def test_strings(self): class TestSafeSort(tm.TestCase): - _multiprocess_can_split_ = True def test_basic_sort(self): values = [3, 1, 2, 0, 4] @@ -144,7 +142,6 @@ def test_exceptions(self): class TestFactorize(tm.TestCase): - _multiprocess_can_split_ = True def test_basic(self): @@ -306,7 +303,6 @@ def test_uint64_factorize(self): class TestUnique(tm.TestCase): - _multiprocess_can_split_ = True def test_ints(self): arr = np.random.randint(0, 100, size=50) @@ -389,7 +385,6 @@ def test_uint64_overflow(self): class TestIsin(tm.TestCase): - _multiprocess_can_split_ = True def test_invalid(self): @@ -472,7 +467,6 @@ def test_large(self): class TestValueCounts(tm.TestCase): - _multiprocess_can_split_ = True def test_value_counts(self): np.random.seed(1234) @@ -659,8 +653,6 @@ def test_value_counts_uint64(self): class TestDuplicated(tm.TestCase): - _multiprocess_can_split_ = True - def test_duplicated_with_nas(self): keys = np.array([0, 1, np.nan, 0, 2, np.nan], dtype=object) @@ -896,7 +888,6 @@ def test_group_var_constant(self): class TestGroupVarFloat64(tm.TestCase, GroupVarTestMixin): __test__ = True - _multiprocess_can_split_ = True algo = algos.algos.group_var_float64 dtype = np.float64 @@ -920,7 +911,6 @@ def test_group_var_large_inputs(self): class TestGroupVarFloat32(tm.TestCase, GroupVarTestMixin): __test__ = True - _multiprocess_can_split_ = True algo = algos.algos.group_var_float32 dtype = np.float32 @@ -1068,7 +1058,6 @@ def test_arrmap(): class TestTseriesUtil(tm.TestCase): - _multiprocess_can_split_ = True def test_combineFunc(self): pass diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index be55d6e1976ec..cc99cf0f830aa 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -26,7 +26,6 @@ class TestCategorical(tm.TestCase): - _multiprocess_can_split_ = True def setUp(self): self.factor = Categorical(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'], @@ -1574,12 +1573,12 @@ def test_searchsorted(self): # https://github.com/pandas-dev/pandas/issues/14522 c1 = pd.Categorical(['cheese', 'milk', 'apple', 'bread', 'bread'], - categories=['cheese', 'milk', 'apple', 'bread'], - ordered=True) + categories=['cheese', 'milk', 'apple', 'bread'], + ordered=True) s1 = pd.Series(c1) c2 = pd.Categorical(['cheese', 'milk', 'apple', 'bread', 'bread'], - categories=['cheese', 'milk', 'apple', 'bread'], - ordered=False) + categories=['cheese', 'milk', 'apple', 'bread'], + ordered=False) s2 = pd.Series(c2) # Searching for single item argument, side='left' (default) @@ -1697,8 +1696,8 @@ def test_map(self): tm.assert_index_equal(result, Index(np.array([1] * 5, dtype=np.int64))) def test_validate_inplace(self): - cat = Categorical(['A','B','B','C','A']) - invalid_values = [1, "True", [1,2,3], 5.0] + cat = Categorical(['A', 'B', 'B', 'C', 'A']) + invalid_values = [1, "True", [1, 2, 3], 5.0] for value in invalid_values: with self.assertRaises(ValueError): @@ -1711,19 +1710,21 @@ def test_validate_inplace(self): cat.as_unordered(inplace=value) with self.assertRaises(ValueError): - cat.set_categories(['X','Y','Z'], rename=True, inplace=value) + cat.set_categories(['X', 'Y', 'Z'], rename=True, inplace=value) with self.assertRaises(ValueError): - cat.rename_categories(['X','Y','Z'], inplace=value) + cat.rename_categories(['X', 'Y', 'Z'], inplace=value) with self.assertRaises(ValueError): - cat.reorder_categories(['X','Y','Z'], ordered=True, inplace=value) + cat.reorder_categories( + ['X', 'Y', 'Z'], ordered=True, inplace=value) with self.assertRaises(ValueError): - cat.add_categories(new_categories=['D','E','F'], inplace=value) + cat.add_categories( + new_categories=['D', 'E', 'F'], inplace=value) with self.assertRaises(ValueError): - cat.remove_categories(removals=['D','E','F'], inplace=value) + cat.remove_categories(removals=['D', 'E', 'F'], inplace=value) with self.assertRaises(ValueError): cat.remove_unused_categories(inplace=value) @@ -1733,7 +1734,6 @@ def test_validate_inplace(self): class TestCategoricalAsBlock(tm.TestCase): - _multiprocess_can_split_ = True def setUp(self): self.factor = Categorical(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']) @@ -3045,13 +3045,15 @@ def test_value_counts_with_nan(self): tm.assert_series_equal(res, exp) # we don't exclude the count of None and sort by counts - exp = pd.Series([3, 2, 1], index=pd.CategoricalIndex([np.nan, "a", "b"])) + exp = pd.Series( + [3, 2, 1], index=pd.CategoricalIndex([np.nan, "a", "b"])) res = s.value_counts(dropna=False) tm.assert_series_equal(res, exp) # When we aren't sorting by counts, and np.nan isn't a # category, it should be last. - exp = pd.Series([2, 1, 3], index=pd.CategoricalIndex(["a", "b", np.nan])) + exp = pd.Series( + [2, 1, 3], index=pd.CategoricalIndex(["a", "b", np.nan])) res = s.value_counts(dropna=False, sort=False) tm.assert_series_equal(res, exp) @@ -3703,7 +3705,8 @@ def f(): # assign a part of a column with dtype == categorical -> # exp_parts_cats_col df = orig.copy() - df.loc["j":"k", df.columns[0]] = pd.Categorical(["b", "b"], categories=["a", "b"]) + df.loc["j":"k", df.columns[0]] = pd.Categorical( + ["b", "b"], categories=["a", "b"]) tm.assert_frame_equal(df, exp_parts_cats_col) with tm.assertRaises(ValueError): @@ -4013,7 +4016,6 @@ def test_concat_append_gh7864(self): self.assert_index_equal(df['grade'].cat.categories, dfa['grade'].cat.categories) - def test_concat_preserve(self): # GH 8641 series concat not preserving category dtype @@ -4042,7 +4044,7 @@ def test_concat_preserve(self): res = pd.concat([df2, df2]) exp = DataFrame({'A': pd.concat([a, a]), 'B': pd.concat([b, b]).astype( - 'category', categories=list('cab'))}) + 'category', categories=list('cab'))}) tm.assert_frame_equal(res, exp) def test_categorical_index_preserver(self): @@ -4052,18 +4054,18 @@ def test_categorical_index_preserver(self): df2 = DataFrame({'A': a, 'B': b.astype('category', categories=list('cab')) - }).set_index('B') + }).set_index('B') result = pd.concat([df2, df2]) expected = DataFrame({'A': pd.concat([a, a]), 'B': pd.concat([b, b]).astype( 'category', categories=list('cab')) - }).set_index('B') + }).set_index('B') tm.assert_frame_equal(result, expected) # wrong catgories df3 = DataFrame({'A': a, 'B': pd.Categorical(b, categories=list('abc')) - }).set_index('B') + }).set_index('B') self.assertRaises(TypeError, lambda: pd.concat([df2, df3])) def test_merge(self): @@ -4391,8 +4393,8 @@ def test_str_accessor_api_for_categorical(self): ('decode', ("UTF-8",), {}), ('encode', ("UTF-8",), {}), ('endswith', ("a",), {}), - ('extract', ("([a-z]*) ",), {"expand":False}), - ('extract', ("([a-z]*) ",), {"expand":True}), + ('extract', ("([a-z]*) ",), {"expand": False}), + ('extract', ("([a-z]*) ",), {"expand": True}), ('extractall', ("([a-z]*) ",), {}), ('find', ("a",), {}), ('findall', ("a",), {}), @@ -4550,8 +4552,6 @@ def test_concat_categorical(self): class TestCategoricalSubclassing(tm.TestCase): - _multiprocess_can_split_ = True - def test_constructor(self): sc = tm.SubclassedCategorical(['a', 'b', 'c']) self.assertIsInstance(sc, tm.SubclassedCategorical) diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 0239250129494..90b1157572be1 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -7,8 +7,6 @@ import pandas.core.common as com import pandas.util.testing as tm -_multiprocess_can_split_ = True - def test_mut_exclusive(): msg = "mutually exclusive arguments: '[ab]' and '[ab]'" diff --git a/pandas/tests/test_config.py b/pandas/tests/test_config.py index ed8c37fd6dd20..c58aada193b15 100644 --- a/pandas/tests/test_config.py +++ b/pandas/tests/test_config.py @@ -5,7 +5,6 @@ class TestConfig(unittest.TestCase): - _multiprocess_can_split_ = True def __init__(self, *args): super(TestConfig, self).__init__(*args) diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index 18b078d0a677e..eca4a8f3c9e66 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -58,8 +58,6 @@ class TestExpressions(tm.TestCase): - _multiprocess_can_split_ = False - def setUp(self): self.frame = _frame.copy() diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py index 5bf2eda47ea27..916d7ae0b0ec4 100644 --- a/pandas/tests/test_generic.py +++ b/pandas/tests/test_generic.py @@ -33,8 +33,6 @@ class Generic(object): - _multiprocess_can_split_ = True - def setUp(self): pass diff --git a/pandas/tests/test_internals.py b/pandas/tests/test_internals.py index 2bfe31ad4260e..1dfea168c067c 100644 --- a/pandas/tests/test_internals.py +++ b/pandas/tests/test_internals.py @@ -182,8 +182,6 @@ def create_mgr(descr, item_shape=None): class TestBlock(tm.TestCase): - _multiprocess_can_split_ = True - def setUp(self): # self.fblock = get_float_ex() # a,c,e # self.cblock = get_complex_ex() # @@ -299,7 +297,6 @@ def test_split_block_at(self): class TestDatetimeBlock(tm.TestCase): - _multiprocess_can_split_ = True def test_try_coerce_arg(self): block = create_block('datetime', [0]) @@ -318,7 +315,6 @@ def test_try_coerce_arg(self): class TestBlockManager(tm.TestCase): - _multiprocess_can_split_ = True def setUp(self): self.mgr = create_mgr( @@ -1057,7 +1053,6 @@ def assert_reindex_indexer_is_ok(mgr, axis, new_labels, indexer, class TestBlockPlacement(tm.TestCase): - _multiprocess_can_split_ = True def test_slice_len(self): self.assertEqual(len(BlockPlacement(slice(0, 4))), 4) diff --git a/pandas/tests/test_join.py b/pandas/tests/test_join.py index 0e7dda05a0c27..2a16d7663b0cf 100644 --- a/pandas/tests/test_join.py +++ b/pandas/tests/test_join.py @@ -9,7 +9,6 @@ class TestIndexer(tm.TestCase): - _multiprocess_can_split_ = True def test_outer_join_indexer(self): typemap = [('int32', _join.outer_join_indexer_int32), diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index d87ad8d906854..1fe2d701f5a41 100755 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -25,8 +25,6 @@ class TestMultiLevel(tm.TestCase): - _multiprocess_can_split_ = True - def setUp(self): index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 89e8fb78ad821..4f56419b1323a 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -52,7 +52,6 @@ def not_hashable(self): class SafeForLongAndSparse(object): - _multiprocess_can_split_ = True def test_repr(self): repr(self.panel) @@ -177,7 +176,6 @@ def wrapper(x): class SafeForSparse(object): - _multiprocess_can_split_ = True @classmethod def assert_panel_equal(cls, x, y): @@ -422,8 +420,6 @@ def test_abs(self): class CheckIndexing(object): - _multiprocess_can_split_ = True - def test_getitem(self): self.assertRaises(Exception, self.panel.__getitem__, 'ItemQ') @@ -869,7 +865,6 @@ def test_set_value(self): class TestPanel(tm.TestCase, PanelTests, CheckIndexing, SafeForLongAndSparse, SafeForSparse): - _multiprocess_can_split_ = True @classmethod def assert_panel_equal(cls, x, y): @@ -2278,7 +2273,6 @@ class TestLongPanel(tm.TestCase): """ LongPanel no longer exists, but... """ - _multiprocess_can_split_ = True def setUp(self): import warnings diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py index aeca24964222a..96864c626ba7f 100644 --- a/pandas/tests/test_panel4d.py +++ b/pandas/tests/test_panel4d.py @@ -29,8 +29,6 @@ def add_nans(panel4d): class SafeForLongAndSparse(object): - _multiprocess_can_split_ = True - def test_repr(self): repr(self.panel4d) @@ -148,8 +146,6 @@ def wrapper(x): class SafeForSparse(object): - _multiprocess_can_split_ = True - @classmethod def assert_panel_equal(cls, x, y): assert_panel_equal(x, y) @@ -305,8 +301,6 @@ def test_abs(self): class CheckIndexing(object): - _multiprocess_can_split_ = True - def test_getitem(self): self.assertRaises(Exception, self.panel4d.__getitem__, 'ItemQ') @@ -604,8 +598,6 @@ def test_set_value(self): class TestPanel4d(tm.TestCase, CheckIndexing, SafeForSparse, SafeForLongAndSparse): - _multiprocess_can_split_ = True - @classmethod def assert_panel4d_equal(cls, x, y): assert_panel4d_equal(x, y) diff --git a/pandas/tests/test_reshape.py b/pandas/tests/test_reshape.py index b5fa945a5bb8f..ed5ec970ba33c 100644 --- a/pandas/tests/test_reshape.py +++ b/pandas/tests/test_reshape.py @@ -14,8 +14,6 @@ import pandas.util.testing as tm from pandas.compat import range, u -_multiprocess_can_split_ = True - class TestMelt(tm.TestCase): diff --git a/pandas/tests/test_stats.py b/pandas/tests/test_stats.py index eb8ab02c29548..118c4147a2019 100644 --- a/pandas/tests/test_stats.py +++ b/pandas/tests/test_stats.py @@ -13,7 +13,6 @@ class TestRank(tm.TestCase): - _multiprocess_can_split_ = True s = Series([1, 3, 4, 2, nan, 2, 1, 5, nan, 3]) df = DataFrame({'A': s, 'B': s}) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index f358946983dce..ce97b09b7e3ca 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -20,8 +20,6 @@ class TestStringMethods(tm.TestCase): - _multiprocess_can_split_ = True - def test_api(self): # GH 6106, GH 9322 diff --git a/pandas/tests/test_take.py b/pandas/tests/test_take.py index bf8a3ab370625..3aed22c140ffe 100644 --- a/pandas/tests/test_take.py +++ b/pandas/tests/test_take.py @@ -8,15 +8,11 @@ import pandas.util.testing as tm from pandas.tslib import iNaT -_multiprocess_can_split_ = True - class TestTake(tm.TestCase): # standard incompatible fill error fill_error = re.compile("Incompatible type for fill_value") - _multiprocess_can_split_ = True - def test_1d_with_out(self): def _test_dtype(dtype, can_hold_na, writeable=True): data = np.random.randint(0, 2, 4).astype(dtype) diff --git a/pandas/tests/test_testing.py b/pandas/tests/test_testing.py index 5e60efd153ab1..466e9ee5a30b8 100644 --- a/pandas/tests/test_testing.py +++ b/pandas/tests/test_testing.py @@ -18,7 +18,6 @@ class TestAssertAlmostEqual(tm.TestCase): - _multiprocess_can_split_ = True def _assert_almost_equal_both(self, a, b, **kwargs): assert_almost_equal(a, b, **kwargs) @@ -146,7 +145,6 @@ def test_assert_almost_equal_object(self): class TestUtilTesting(tm.TestCase): - _multiprocess_can_split_ = True def test_raise_with_traceback(self): with assertRaisesRegexp(LookupError, "error_text"): @@ -347,7 +345,6 @@ def test_assert_almost_equal_iterable_message(self): class TestAssertIndexEqual(unittest.TestCase): - _multiprocess_can_split_ = True def test_index_equal_message(self): @@ -495,7 +492,6 @@ def test_index_equal_metadata_message(self): class TestAssertSeriesEqual(tm.TestCase): - _multiprocess_can_split_ = True def _assert_equal(self, x, y, **kwargs): assert_series_equal(x, y, **kwargs) @@ -590,7 +586,6 @@ def test_series_equal_message(self): class TestAssertFrameEqual(tm.TestCase): - _multiprocess_can_split_ = True def _assert_equal(self, x, y, **kwargs): assert_frame_equal(x, y, **kwargs) @@ -701,7 +696,6 @@ def test_notisinstance(self): class TestAssertCategoricalEqual(unittest.TestCase): - _multiprocess_can_split_ = True def test_categorical_equal_message(self): diff --git a/pandas/tests/test_util.py b/pandas/tests/test_util.py index e2f6a7f6cc1ed..1bf9f4da45bff 100644 --- a/pandas/tests/test_util.py +++ b/pandas/tests/test_util.py @@ -314,6 +314,7 @@ def test_validation(self): class TestMove(tm.TestCase): + def test_cannot_create_instance_of_stolenbuffer(self): """Stolen buffers need to be created through the smart constructor ``move_into_mutable_buffer`` which has a bunch of checks in it. diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index dc23469976e35..48861fc6a9528 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -32,8 +32,6 @@ def assert_equal(left, right): class Base(tm.TestCase): - _multiprocess_can_split_ = True - _nan_locs = np.arange(20, 40) _inf_locs = np.array([]) diff --git a/pandas/tests/types/test_cast.py b/pandas/tests/types/test_cast.py index a8579e89aeb1f..497130b117289 100644 --- a/pandas/tests/types/test_cast.py +++ b/pandas/tests/types/test_cast.py @@ -19,8 +19,6 @@ DatetimeTZDtype, PeriodDtype) from pandas.util import testing as tm -_multiprocess_can_split_ = True - class TestPossiblyDowncast(tm.TestCase): diff --git a/pandas/tests/types/test_common.py b/pandas/tests/types/test_common.py index 7c17c61aec440..4667bbd47ad18 100644 --- a/pandas/tests/types/test_common.py +++ b/pandas/tests/types/test_common.py @@ -7,8 +7,6 @@ import pandas.util.testing as tm -_multiprocess_can_split_ = True - class TestPandasDtype(tm.TestCase): diff --git a/pandas/tests/types/test_concat.py b/pandas/tests/types/test_concat.py index 8acafe0af1792..f4faab45f4ba2 100644 --- a/pandas/tests/types/test_concat.py +++ b/pandas/tests/types/test_concat.py @@ -7,8 +7,6 @@ class TestConcatCompat(tm.TestCase): - _multiprocess_can_split_ = True - def check_concat(self, to_concat, exp): for klass in [pd.Index, pd.Series]: to_concat_klass = [klass(c) for c in to_concat] diff --git a/pandas/tests/types/test_dtypes.py b/pandas/tests/types/test_dtypes.py index 68105cfd7c886..8ef2868ae324f 100644 --- a/pandas/tests/types/test_dtypes.py +++ b/pandas/tests/types/test_dtypes.py @@ -15,8 +15,6 @@ _coerce_to_dtype) import pandas.util.testing as tm -_multiprocess_can_split_ = True - class Base(object): diff --git a/pandas/tests/types/test_generic.py b/pandas/tests/types/test_generic.py index 2861252bef26a..c7c8b0becad63 100644 --- a/pandas/tests/types/test_generic.py +++ b/pandas/tests/types/test_generic.py @@ -5,8 +5,6 @@ import pandas.util.testing as tm from pandas.types import generic as gt -_multiprocess_can_split_ = True - class TestABCClasses(tm.TestCase): tuples = [[1, 2, 2], ['red', 'blue', 'red']] diff --git a/pandas/tests/types/test_inference.py b/pandas/tests/types/test_inference.py index 15f9545f3476c..629aa63f4a0ae 100644 --- a/pandas/tests/types/test_inference.py +++ b/pandas/tests/types/test_inference.py @@ -35,8 +35,6 @@ from pandas.types.missing import isnull from pandas.util import testing as tm -_multiprocess_can_split_ = True - def test_is_sequence(): is_seq = inference.is_sequence @@ -340,7 +338,6 @@ def test_mixed_dtypes_remain_object_array(self): class TestTypeInference(tm.TestCase): - _multiprocess_can_split_ = True def test_length_zero(self): result = lib.infer_dtype(np.array([], dtype='i4')) diff --git a/pandas/tests/types/test_missing.py b/pandas/tests/types/test_missing.py index 2b09cf5ab633d..cab44f1122ae1 100644 --- a/pandas/tests/types/test_missing.py +++ b/pandas/tests/types/test_missing.py @@ -14,8 +14,6 @@ from pandas.types.missing import (array_equivalent, isnull, notnull, na_value_for_dtype) -_multiprocess_can_split_ = True - def test_notnull(): assert notnull(1.) diff --git a/pandas/tools/tests/test_concat.py b/pandas/tools/tests/test_concat.py index dae24c48b8238..87a0dda34a525 100644 --- a/pandas/tools/tests/test_concat.py +++ b/pandas/tools/tests/test_concat.py @@ -17,8 +17,6 @@ class ConcatenateBase(tm.TestCase): - _multiprocess_can_split_ = True - def setUp(self): self.frame = DataFrame(tm.getSeriesData()) self.mixed_frame = self.frame.copy() diff --git a/pandas/tools/tests/test_hashing.py b/pandas/tools/tests/test_hashing.py index fb1f187ddd5c0..05a352f259e8b 100644 --- a/pandas/tools/tests/test_hashing.py +++ b/pandas/tools/tests/test_hashing.py @@ -8,8 +8,6 @@ class TestHashing(tm.TestCase): - _multiprocess_can_split_ = True - def setUp(self): self.df = DataFrame( {'i32': np.array([1, 2, 3] * 3, dtype='int32'), diff --git a/pandas/tools/tests/test_join.py b/pandas/tools/tests/test_join.py index 605a85026d605..ff0a494bd7d02 100644 --- a/pandas/tools/tests/test_join.py +++ b/pandas/tools/tests/test_join.py @@ -20,8 +20,6 @@ class TestJoin(tm.TestCase): - _multiprocess_can_split_ = True - def setUp(self): # aggregate multiple columns self.df = DataFrame({'key1': get_test_data(), diff --git a/pandas/tools/tests/test_merge.py b/pandas/tools/tests/test_merge.py index 88856a012da6f..a348a901442c9 100644 --- a/pandas/tools/tests/test_merge.py +++ b/pandas/tools/tests/test_merge.py @@ -33,8 +33,6 @@ def get_test_data(ngroups=NGROUPS, n=N): class TestMerge(tm.TestCase): - _multiprocess_can_split_ = True - def setUp(self): # aggregate multiple columns self.df = DataFrame({'key1': get_test_data(), diff --git a/pandas/tools/tests/test_merge_asof.py b/pandas/tools/tests/test_merge_asof.py index 8e7323f72a8f5..76798b3c895ea 100644 --- a/pandas/tools/tests/test_merge_asof.py +++ b/pandas/tools/tests/test_merge_asof.py @@ -11,7 +11,6 @@ class TestAsOfMerge(tm.TestCase): - _multiprocess_can_split_ = True def read_data(self, name, dedupe=False): path = os.path.join(tm.get_data_path(), name) @@ -686,7 +685,7 @@ def test_allow_exact_matches_and_tolerance3(self): # GH 13709 df1 = pd.DataFrame({ 'time': pd.to_datetime(['2016-07-15 13:30:00.030', - '2016-07-15 13:30:00.030']), + '2016-07-15 13:30:00.030']), 'username': ['bob', 'charlie']}) df2 = pd.DataFrame({ 'time': pd.to_datetime(['2016-07-15 13:30:00.000', diff --git a/pandas/tools/tests/test_pivot.py b/pandas/tools/tests/test_pivot.py index 398e57d4ad0a4..40b46c5413c8f 100644 --- a/pandas/tools/tests/test_pivot.py +++ b/pandas/tools/tests/test_pivot.py @@ -12,8 +12,6 @@ class TestPivotTable(tm.TestCase): - _multiprocess_can_split_ = True - def setUp(self): self.data = DataFrame({'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar', @@ -1152,8 +1150,8 @@ def test_crosstab_normalize(self): pd.crosstab(df.a, df.b, normalize='index')) row_normal_margins = pd.DataFrame([[1.0, 0], - [0.25, 0.75], - [0.4, 0.6]], + [0.25, 0.75], + [0.4, 0.6]], index=pd.Index([1, 2, 'All'], name='a', dtype='object'), @@ -1165,8 +1163,8 @@ def test_crosstab_normalize(self): name='b')) all_normal_margins = pd.DataFrame([[0.2, 0, 0.2], - [0.2, 0.6, 0.8], - [0.4, 0.6, 1]], + [0.2, 0.6, 0.8], + [0.4, 0.6, 1]], index=pd.Index([1, 2, 'All'], name='a', dtype='object'), diff --git a/pandas/tools/tests/test_tile.py b/pandas/tools/tests/test_tile.py index c5261597cf35d..de44eadc15751 100644 --- a/pandas/tools/tests/test_tile.py +++ b/pandas/tools/tests/test_tile.py @@ -303,7 +303,7 @@ def test_datetime_cut(self): data = to_datetime(Series(['2013-01-01', '2013-01-02', '2013-01-03'])) result, bins = cut(data, 3, retbins=True) expected = Series(['(2012-12-31 23:57:07.200000, 2013-01-01 16:00:00]', - '(2013-01-01 16:00:00, 2013-01-02 08:00:00]', + '(2013-01-01 16:00:00, 2013-01-02 08:00:00]', '(2013-01-02 08:00:00, 2013-01-03 00:00:00]'], ).astype("category", ordered=True) tm.assert_series_equal(result, expected) @@ -316,8 +316,8 @@ def test_datetime_cut(self): # testing for time data to be present as ndarray data = np.array([np.datetime64('2013-01-01'), - np.datetime64('2013-01-02'), - np.datetime64('2013-01-03')]) + np.datetime64('2013-01-02'), + np.datetime64('2013-01-03')]) result, bins = cut(data, 3, retbins=True) tm.assert_series_equal(Series(result), expected) @@ -330,7 +330,7 @@ def test_datetime_bin(self): data = [np.datetime64('2012-12-13'), np.datetime64('2012-12-15')] bin_data = ['2012-12-12', '2012-12-14', '2012-12-16'] expected = Series(['(2012-12-12 00:00:00, 2012-12-14 00:00:00]', - '(2012-12-14 00:00:00, 2012-12-16 00:00:00]'], + '(2012-12-14 00:00:00, 2012-12-16 00:00:00]'], ).astype("category", ordered=True) for conv in [Timestamp, Timestamp, np.datetime64]: diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 8c75195b25ef5..98151d5b6130c 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -393,7 +393,7 @@ def __array_wrap__(self, result, context=None): left = context[1][0] right = context[1][1] if (isinstance(left, PeriodIndex) and - isinstance(right, PeriodIndex)): + isinstance(right, PeriodIndex)): name = left.name if left.name == right.name else None return Index(result, name=name) elif isinstance(left, Period) or isinstance(right, Period): diff --git a/pandas/tseries/tests/test_base.py b/pandas/tseries/tests/test_base.py index 2ff06517f175a..be3b917cb8117 100644 --- a/pandas/tseries/tests/test_base.py +++ b/pandas/tseries/tests/test_base.py @@ -15,6 +15,7 @@ class TestTimedeltaIndexOps(Ops): + def setUp(self): super(TestTimedeltaIndexOps, self).setUp() mask = lambda x: isinstance(x, TimedeltaIndex) @@ -490,7 +491,7 @@ def test_addition_ops(self): def test_comp_nat(self): left = pd.TimedeltaIndex([pd.Timedelta('1 days'), pd.NaT, - pd.Timedelta('3 days')]) + pd.Timedelta('3 days')]) right = pd.TimedeltaIndex([pd.NaT, pd.NaT, pd.Timedelta('3 days')]) for l, r in [(left, right), (left.asobject, right.asobject)]: @@ -854,6 +855,7 @@ def test_equals(self): class TestPeriodIndexOps(Ops): + def setUp(self): super(TestPeriodIndexOps, self).setUp() mask = lambda x: (isinstance(x, DatetimeIndex) or diff --git a/pandas/tseries/tests/test_bin_groupby.py b/pandas/tseries/tests/test_bin_groupby.py index 08c0833be0cd6..51a10f4141ab5 100644 --- a/pandas/tseries/tests/test_bin_groupby.py +++ b/pandas/tseries/tests/test_bin_groupby.py @@ -46,7 +46,6 @@ def test_series_bin_grouper(): class TestBinGroupers(tm.TestCase): - _multiprocess_can_split_ = True def setUp(self): self.obj = np.random.randn(10, 1) @@ -122,6 +121,7 @@ class TestMoments(tm.TestCase): class TestReducer(tm.TestCase): + def test_int_index(self): from pandas.core.series import Series diff --git a/pandas/tseries/tests/test_converter.py b/pandas/tseries/tests/test_converter.py index 8caed80f5a45b..b934aaed7d41f 100644 --- a/pandas/tseries/tests/test_converter.py +++ b/pandas/tseries/tests/test_converter.py @@ -19,6 +19,7 @@ def test_timtetonum_accepts_unicode(): class TestDateTimeConverter(tm.TestCase): + def setUp(self): self.dtc = converter.DatetimeConverter() self.tc = converter.TimeFormatter(None) @@ -142,6 +143,7 @@ def _assert_less(ts1, ts2): class TestPeriodConverter(tm.TestCase): + def setUp(self): self.pc = converter.PeriodConverter() diff --git a/pandas/tseries/tests/test_daterange.py b/pandas/tseries/tests/test_daterange.py index 209e6e40d5cf0..a64882380850b 100644 --- a/pandas/tseries/tests/test_daterange.py +++ b/pandas/tseries/tests/test_daterange.py @@ -73,6 +73,7 @@ def test_precision_finer_than_offset(self): class TestDateRange(tm.TestCase): + def setUp(self): self.rng = bdate_range(START, END) @@ -588,6 +589,7 @@ def test_freq_divides_end_in_nanos(self): class TestCustomDateRange(tm.TestCase): + def setUp(self): self.rng = cdate_range(START, END) diff --git a/pandas/tseries/tests/test_frequencies.py b/pandas/tseries/tests/test_frequencies.py index dfb7b26371d7a..9983bf5270b29 100644 --- a/pandas/tseries/tests/test_frequencies.py +++ b/pandas/tseries/tests/test_frequencies.py @@ -477,6 +477,7 @@ def test_get_freq_code(self): class TestFrequencyInference(tm.TestCase): + def test_raise_if_period_index(self): index = PeriodIndex(start="1/1/1990", periods=20, freq="M") self.assertRaises(TypeError, frequencies.infer_freq, index) diff --git a/pandas/tseries/tests/test_holiday.py b/pandas/tseries/tests/test_holiday.py index d4d273347e6e3..2adf28a506c53 100644 --- a/pandas/tseries/tests/test_holiday.py +++ b/pandas/tseries/tests/test_holiday.py @@ -18,6 +18,7 @@ class TestCalendar(tm.TestCase): + def setUp(self): self.holiday_list = [ datetime(2012, 1, 2), @@ -54,6 +55,7 @@ def test_calendar_caching(self): # Test for issue #9552 class TestCalendar(AbstractHolidayCalendar): + def __init__(self, name=None, rules=None): super(TestCalendar, self).__init__(name=name, rules=rules) @@ -83,6 +85,7 @@ def test_rule_from_name(self): class TestHoliday(tm.TestCase): + def setUp(self): self.start_date = datetime(2011, 1, 1) self.end_date = datetime(2020, 12, 31) @@ -288,6 +291,7 @@ def test_factory(self): class TestObservanceRules(tm.TestCase): + def setUp(self): self.we = datetime(2014, 4, 9) self.th = datetime(2014, 4, 10) diff --git a/pandas/tseries/tests/test_offsets.py b/pandas/tseries/tests/test_offsets.py index ac488a3dfdcb2..7c5a4c3df28b2 100644 --- a/pandas/tseries/tests/test_offsets.py +++ b/pandas/tseries/tests/test_offsets.py @@ -38,8 +38,6 @@ import pandas.util.testing as tm from pandas.tseries.holiday import USFederalHolidayCalendar -_multiprocess_can_split_ = True - def test_monthrange(): import calendar @@ -507,7 +505,6 @@ def test_pickle_v0_15_2(self): class TestDateOffset(Base): - _multiprocess_can_split_ = True def setUp(self): self.d = Timestamp(datetime(2008, 1, 2)) @@ -547,7 +544,6 @@ def test_eq(self): class TestBusinessDay(Base): - _multiprocess_can_split_ = True _offset = BDay def setUp(self): @@ -725,7 +721,6 @@ def test_offsets_compare_equal(self): class TestBusinessHour(Base): - _multiprocess_can_split_ = True _offset = BusinessHour def setUp(self): @@ -1432,7 +1427,6 @@ def test_datetimeindex(self): class TestCustomBusinessHour(Base): - _multiprocess_can_split_ = True _offset = CustomBusinessHour def setUp(self): @@ -1693,7 +1687,6 @@ def test_apply_nanoseconds(self): class TestCustomBusinessDay(Base): - _multiprocess_can_split_ = True _offset = CDay def setUp(self): @@ -1931,7 +1924,6 @@ def test_pickle_compat_0_14_1(self): class CustomBusinessMonthBase(object): - _multiprocess_can_split_ = True def setUp(self): self.d = datetime(2008, 1, 1) @@ -3257,6 +3249,7 @@ def makeFY5253LastOfMonth(*args, **kwds): class TestFY5253LastOfMonth(Base): + def test_onOffset(self): offset_lom_sat_aug = makeFY5253LastOfMonth(1, startingMonth=8, @@ -3342,6 +3335,7 @@ def test_apply(self): class TestFY5253NearestEndMonth(Base): + def test_get_target_month_end(self): self.assertEqual(makeFY5253NearestEndMonth(startingMonth=8, weekday=WeekDay.SAT) @@ -3507,6 +3501,7 @@ def test_apply(self): class TestFY5253LastOfMonthQuarter(Base): + def test_isAnchored(self): self.assertTrue( makeFY5253LastOfMonthQuarter(startingMonth=1, weekday=WeekDay.SAT, @@ -3729,6 +3724,7 @@ def test_get_weeks(self): class TestFY5253NearestEndMonthQuarter(Base): + def test_onOffset(self): offset_nem_sat_aug_4 = makeFY5253NearestEndMonthQuarter( @@ -3814,6 +3810,7 @@ def test_offset(self): class TestQuarterBegin(Base): + def test_repr(self): self.assertEqual(repr(QuarterBegin()), "") @@ -4168,6 +4165,7 @@ def test_onOffset(self): class TestBYearEndLagged(Base): + def test_bad_month_fail(self): self.assertRaises(Exception, BYearEnd, month=13) self.assertRaises(Exception, BYearEnd, month=0) @@ -4307,6 +4305,7 @@ def test_onOffset(self): class TestYearEndDiffMonth(Base): + def test_offset(self): tests = [] @@ -4542,6 +4541,7 @@ def test_compare_ticks(self): class TestOffsetNames(tm.TestCase): + def test_get_offset_name(self): self.assertEqual(BDay().freqstr, 'B') self.assertEqual(BDay(2).freqstr, '2B') @@ -4600,6 +4600,7 @@ def test_get_offset_legacy(): class TestParseTimeString(tm.TestCase): + def test_parse_time_string(self): (date, parsed, reso) = parse_time_string('4Q1984') (date_lower, parsed_lower, reso_lower) = parse_time_string('4q1984') @@ -4662,6 +4663,7 @@ def test_quarterly_dont_normalize(): class TestOffsetAliases(tm.TestCase): + def setUp(self): _offset_map.clear() @@ -4797,6 +4799,7 @@ def test_week_of_month_index_creation(self): class TestReprNames(tm.TestCase): + def test_str_for_named_is_name(self): # look at all the amazing combinations! month_prefixes = ['A', 'AS', 'BA', 'BAS', 'Q', 'BQ', 'BQS', 'QS'] diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py index fdc067a827a5b..a39830b6aede6 100644 --- a/pandas/tseries/tests/test_period.py +++ b/pandas/tseries/tests/test_period.py @@ -1652,6 +1652,7 @@ def test_is_leap_year(self): class TestPeriodIndex(tm.TestCase): + def setUp(self): pass @@ -4456,6 +4457,7 @@ def test_negone_ordinals(self): class TestComparisons(tm.TestCase): + def setUp(self): self.january1 = Period('2000-01', 'M') self.january2 = Period('2000-01', 'M') @@ -4961,6 +4963,7 @@ def test_ops_frame_period(self): class TestPeriodField(tm.TestCase): + def test_get_period_field_raises_on_out_of_range(self): self.assertRaises(ValueError, _period.get_period_field, -1, 0, 0) diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py index 222ffb735921a..afb44887fe7d1 100755 --- a/pandas/tseries/tests/test_resample.py +++ b/pandas/tseries/tests/test_resample.py @@ -49,7 +49,6 @@ def _simple_pts(start, end, freq='D'): class TestResampleAPI(tm.TestCase): - _multiprocess_can_split_ = True def setUp(self): dti = DatetimeIndex(start=datetime(2005, 1, 1), @@ -754,8 +753,8 @@ def test_resample_empty_series(self): self.assertEqual(result.index.freq, expected.index.freq) if (method == 'size' and - isinstance(result.index, PeriodIndex) and - freq in ['M', 'D']): + isinstance(result.index, PeriodIndex) and + freq in ['M', 'D']): # GH12871 - TODO: name should propagate, but currently # doesn't on lower / same frequency with PeriodIndex assert_series_equal(result, expected, check_dtype=False, @@ -839,7 +838,6 @@ def test_resample_loffset_arg_type(self): class TestDatetimeIndex(Base, tm.TestCase): - _multiprocess_can_split_ = True _index_factory = lambda x: date_range def setUp(self): @@ -990,6 +988,7 @@ def fn(x, a=1): return str(type(x)) class fn_class: + def __call__(self, x): return str(type(x)) @@ -2135,7 +2134,6 @@ def test_resample_datetime_values(self): class TestPeriodIndex(Base, tm.TestCase): - _multiprocess_can_split_ = True _index_factory = lambda x: period_range def create_series(self): @@ -2744,7 +2742,6 @@ def test_evenly_divisible_with_no_extra_bins(self): class TestTimedeltaIndex(Base, tm.TestCase): - _multiprocess_can_split_ = True _index_factory = lambda x: timedelta_range def create_series(self): @@ -2766,6 +2763,7 @@ def test_asfreq_bug(self): class TestResamplerGrouper(tm.TestCase): + def setUp(self): self.frame = DataFrame({'A': [1] * 20 + [2] * 12 + [3] * 8, 'B': np.arange(40)}, @@ -2960,6 +2958,7 @@ def test_median_duplicate_columns(self): class TestTimeGrouper(tm.TestCase): + def setUp(self): self.ts = Series(np.random.randn(1000), index=date_range('1/1/2000', periods=1000)) diff --git a/pandas/tseries/tests/test_timedeltas.py b/pandas/tseries/tests/test_timedeltas.py index 13263259e0b8a..170d5cdafa60b 100644 --- a/pandas/tseries/tests/test_timedeltas.py +++ b/pandas/tseries/tests/test_timedeltas.py @@ -24,7 +24,6 @@ class TestTimedeltas(tm.TestCase): - _multiprocess_can_split_ = True def setUp(self): pass @@ -1231,7 +1230,6 @@ def test_timedelta_arithmetic(self): class TestTimedeltaIndex(tm.TestCase): - _multiprocess_can_split_ = True def test_pass_TimedeltaIndex_to_index(self): @@ -1907,6 +1905,7 @@ def test_factorize(self): class TestSlicing(tm.TestCase): + def test_partial_slice(self): rng = timedelta_range('1 day 10:11:12', freq='h', periods=500) s = Series(np.arange(len(rng)), index=rng) diff --git a/pandas/tseries/tests/test_timeseries_legacy.py b/pandas/tseries/tests/test_timeseries_legacy.py index 5395056c93412..17cc93ac42639 100644 --- a/pandas/tseries/tests/test_timeseries_legacy.py +++ b/pandas/tseries/tests/test_timeseries_legacy.py @@ -27,8 +27,6 @@ # class TestLegacySupport(unittest.TestCase): class LegacySupport(object): - _multiprocess_can_split_ = True - @classmethod def setUpClass(cls): if compat.PY3: diff --git a/pandas/tseries/tests/test_timezones.py b/pandas/tseries/tests/test_timezones.py index 00b60ba620c4b..38cd8079faf93 100644 --- a/pandas/tseries/tests/test_timezones.py +++ b/pandas/tseries/tests/test_timezones.py @@ -52,7 +52,6 @@ def dst(self, dt): class TestTimeZoneSupportPytz(tm.TestCase): - _multiprocess_can_split_ = True def setUp(self): tm._skip_if_no_pytz() @@ -899,7 +898,6 @@ def test_datetimeindex_tz_nat(self): class TestTimeZoneSupportDateutil(TestTimeZoneSupportPytz): - _multiprocess_can_split_ = True def setUp(self): tm._skip_if_no_dateutil() @@ -1142,6 +1140,7 @@ def test_tz_convert_tzlocal(self): class TestTimeZoneCacheKey(tm.TestCase): + def test_cache_keys_are_distinct_for_pytz_vs_dateutil(self): tzs = pytz.common_timezones for tz_name in tzs: @@ -1158,7 +1157,6 @@ def test_cache_keys_are_distinct_for_pytz_vs_dateutil(self): class TestTimeZones(tm.TestCase): - _multiprocess_can_split_ = True timezones = ['UTC', 'Asia/Tokyo', 'US/Eastern', 'dateutil/US/Pacific'] def setUp(self): diff --git a/pandas/tseries/tests/test_tslib.py b/pandas/tseries/tests/test_tslib.py index 20e91a6f5bc44..a141d445e6035 100644 --- a/pandas/tseries/tests/test_tslib.py +++ b/pandas/tseries/tests/test_tslib.py @@ -49,6 +49,7 @@ def test_to_datetime_bijective(self): class TestDatetimeParsingWrappers(tm.TestCase): + def test_does_not_convert_mixed_integer(self): bad_date_strings = ('-50000', '999', '123.1234', 'm', 'T') @@ -408,6 +409,7 @@ def test_parsers_iso8601(self): class TestArrayToDatetime(tm.TestCase): + def test_parsing_valid_dates(self): arr = np.array(['01-01-2013', '01-02-2013'], dtype=object) self.assert_numpy_array_equal( @@ -523,6 +525,7 @@ def test_parsing_timezone_offsets(self): class TestTslib(tm.TestCase): + def test_intraday_conversion_factors(self): self.assertEqual(period_asfreq( 1, get_freq('D'), get_freq('H'), False), 24) diff --git a/pandas/types/generic.py b/pandas/types/generic.py index 86d266f4595e2..756fb47596700 100644 --- a/pandas/types/generic.py +++ b/pandas/types/generic.py @@ -53,6 +53,7 @@ def _check(cls, inst): class _ABCGeneric(type): + def __instancecheck__(cls, inst): return hasattr(inst, "_data") diff --git a/pandas/util/clipboard/__init__.py b/pandas/util/clipboard/__init__.py index 358c9b5f8035a..9e2b2faf858db 100644 --- a/pandas/util/clipboard/__init__.py +++ b/pandas/util/clipboard/__init__.py @@ -107,4 +107,4 @@ def set_clipboard(clipboard): # pandas aliases clipboard_get = paste -clipboard_set = copy \ No newline at end of file +clipboard_set = copy diff --git a/pandas/util/clipboard/clipboards.py b/pandas/util/clipboard/clipboards.py index 182a685f956e6..f73f4f191d577 100644 --- a/pandas/util/clipboard/clipboards.py +++ b/pandas/util/clipboard/clipboards.py @@ -123,6 +123,7 @@ def paste_klipper(): def init_no_clipboard(): class ClipboardUnavailable(object): + def __call__(self, *args, **kwargs): raise PyperclipException(EXCEPT_MSG) diff --git a/pandas/util/clipboard/exceptions.py b/pandas/util/clipboard/exceptions.py index 615335f3a58da..f42d263a02993 100644 --- a/pandas/util/clipboard/exceptions.py +++ b/pandas/util/clipboard/exceptions.py @@ -7,6 +7,7 @@ class PyperclipException(RuntimeError): class PyperclipWindowsException(PyperclipException): + def __init__(self, message): message += " (%s)" % ctypes.WinError() super(PyperclipWindowsException, self).__init__(message) diff --git a/pandas/util/clipboard/windows.py b/pandas/util/clipboard/windows.py index 956d5b9d34025..5c9be9ddaf508 100644 --- a/pandas/util/clipboard/windows.py +++ b/pandas/util/clipboard/windows.py @@ -10,6 +10,7 @@ class CheckedCall(object): + def __init__(self, f): super(CheckedCall, self).__setattr__("f", f) @@ -133,7 +134,8 @@ def copy_windows(text): count * sizeof(c_wchar)) locked_handle = safeGlobalLock(handle) - ctypes.memmove(c_wchar_p(locked_handle), c_wchar_p(text), count * sizeof(c_wchar)) + ctypes.memmove(c_wchar_p(locked_handle), + c_wchar_p(text), count * sizeof(c_wchar)) safeGlobalUnlock(handle) safeSetClipboardData(CF_UNICODETEXT, handle) diff --git a/pandas/util/decorators.py b/pandas/util/decorators.py index e1888a3ffd62a..85d77c2f6f57c 100644 --- a/pandas/util/decorators.py +++ b/pandas/util/decorators.py @@ -125,6 +125,7 @@ def some_function(x): def some_function(x): "%s %s wrote the Raven" """ + def __init__(self, *args, **kwargs): if (args and kwargs): raise AssertionError("Only positional or keyword args are allowed") @@ -171,6 +172,7 @@ def my_dog(has='fleas'): "This docstring will have a copyright below" pass """ + def __init__(self, addendum, join='', indents=0): if indents > 0: self.addendum = indent(addendum, indents=indents) diff --git a/pandas/util/depr_module.py b/pandas/util/depr_module.py index 736d2cdaab31c..cf8b0f7960f17 100644 --- a/pandas/util/depr_module.py +++ b/pandas/util/depr_module.py @@ -16,6 +16,7 @@ class _DeprecatedModule(object): removals : objects or methods in module that will no longer be accessible once module is removed. """ + def __init__(self, deprmod, removals=None): self.deprmod = deprmod self.removals = removals diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 6ea91543677a7..6b2e920a24063 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -165,7 +165,7 @@ def assert_almost_equal(left, right, check_exact=False, pass else: if (isinstance(left, np.ndarray) or - isinstance(right, np.ndarray)): + isinstance(right, np.ndarray)): obj = 'numpy array' else: obj = 'Input' @@ -1103,7 +1103,6 @@ def assert_series_equal(left, right, check_dtype=True, check_datetimelike_compat=False, check_categorical=True, obj='Series'): - """Check that left and right Series are equal. Parameters @@ -1211,7 +1210,6 @@ def assert_frame_equal(left, right, check_dtype=True, check_categorical=True, check_like=False, obj='DataFrame'): - """Check that left and right DataFrame are equal. Parameters @@ -2446,6 +2444,7 @@ class _AssertRaisesContextmanager(object): Handles the behind the scenes work for assertRaises and assertRaisesRegexp """ + def __init__(self, exception, regexp=None, *args, **kwargs): self.exception = exception if regexp is not None and not hasattr(regexp, "search"): From bf8194a74c84c0ba3976d40cc8380df76aa32cdb Mon Sep 17 00:00:00 2001 From: TrigonaMinima Date: Tue, 7 Feb 2017 03:56:12 +0530 Subject: [PATCH 021/353] TST/CLN: reorg more of tseries/tests xref #14854 closes #15324 --- .../indexes/datetimes/test_date_range.py | 810 ++++++- pandas/tests/indexes/datetimes/test_ops.py | 17 +- pandas/tests/indexes/datetimes/test_tools.py | 626 ++++- pandas/tests/indexes/period/__init__.py | 0 pandas/tests/indexes/period/test_period.py | 233 ++ pandas/tests/indexes/test_datetimelike.py | 465 ---- pandas/tests/indexes/test_timedelta.py | 42 - pandas/tests/indexes/timedeltas/__init__.py | 0 .../tests/indexes/timedeltas/test_astype.py | 121 + .../indexes/timedeltas/test_construction.py | 88 + .../tests/indexes/timedeltas/test_indexing.py | 110 + pandas/tests/indexes/timedeltas/test_ops.py | 1276 ++++++++++ .../timedeltas/test_partial_slicing.py | 81 + .../tests/indexes/timedeltas/test_setops.py | 76 + .../indexes/timedeltas/test_timedelta.py | 592 +++++ .../timedeltas/test_timedelta_range.py | 51 + pandas/tests/indexes/timedeltas/test_tools.py | 201 ++ pandas/tests/scalar/test_timedelta.py | 712 ++++++ pandas/tests/scalar/test_timestamp.py | 45 + pandas/tseries/tests/test_base.py | 846 +------ pandas/tseries/tests/test_daterange.py | 820 ------- pandas/tseries/tests/test_period.py | 119 +- pandas/tseries/tests/test_timedeltas.py | 2051 ----------------- pandas/tseries/tests/test_timezones.py | 70 +- pandas/tseries/tests/test_tslib.py | 694 ------ pandas/tseries/tests/test_util.py | 126 - 26 files changed, 5192 insertions(+), 5080 deletions(-) create mode 100644 pandas/tests/indexes/period/__init__.py create mode 100644 pandas/tests/indexes/period/test_period.py delete mode 100644 pandas/tests/indexes/test_datetimelike.py delete mode 100644 pandas/tests/indexes/test_timedelta.py create mode 100644 pandas/tests/indexes/timedeltas/__init__.py create mode 100644 pandas/tests/indexes/timedeltas/test_astype.py create mode 100644 pandas/tests/indexes/timedeltas/test_construction.py create mode 100644 pandas/tests/indexes/timedeltas/test_indexing.py create mode 100644 pandas/tests/indexes/timedeltas/test_ops.py create mode 100644 pandas/tests/indexes/timedeltas/test_partial_slicing.py create mode 100644 pandas/tests/indexes/timedeltas/test_setops.py create mode 100644 pandas/tests/indexes/timedeltas/test_timedelta.py create mode 100644 pandas/tests/indexes/timedeltas/test_timedelta_range.py create mode 100644 pandas/tests/indexes/timedeltas/test_tools.py delete mode 100644 pandas/tseries/tests/test_daterange.py delete mode 100644 pandas/tseries/tests/test_timedeltas.py delete mode 100644 pandas/tseries/tests/test_tslib.py delete mode 100644 pandas/tseries/tests/test_util.py diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index 9d5f397329c76..8dab10269f76d 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -1,12 +1,20 @@ +import numpy as np from datetime import datetime, timedelta, time import pandas as pd import pandas.util.testing as tm -from pandas import date_range, offsets, DatetimeIndex, Timestamp from pandas import compat +from pandas.core import common as com +from pandas.util.testing import assertRaisesRegexp +from pandas.tseries.index import bdate_range, cdate_range +from pandas import date_range, offsets, DatetimeIndex, Timestamp, Index +from pandas.tseries.offsets import (generate_range, CDay, BDay, Minute, + BMonthEnd, DateOffset, MonthEnd) from pandas.tests.series.common import TestData +START, END = datetime(2009, 1, 1), datetime(2010, 1, 1) + class TestTimeSeries(TestData, tm.TestCase): @@ -127,3 +135,803 @@ def test_catch_infinite_loop(self): # blow up, don't loop forever self.assertRaises(Exception, date_range, datetime(2011, 11, 11), datetime(2011, 11, 12), freq=offset) + + +def eq_gen_range(kwargs, expected): + rng = generate_range(**kwargs) + assert (np.array_equal(list(rng), expected)) + + +class TestGenRangeGeneration(tm.TestCase): + + def test_generate(self): + rng1 = list(generate_range(START, END, offset=BDay())) + rng2 = list(generate_range(START, END, time_rule='B')) + self.assertEqual(rng1, rng2) + + def test_generate_cday(self): + rng1 = list(generate_range(START, END, offset=CDay())) + rng2 = list(generate_range(START, END, time_rule='C')) + self.assertEqual(rng1, rng2) + + def test_1(self): + eq_gen_range(dict(start=datetime(2009, 3, 25), periods=2), + [datetime(2009, 3, 25), datetime(2009, 3, 26)]) + + def test_2(self): + eq_gen_range(dict(start=datetime(2008, 1, 1), + end=datetime(2008, 1, 3)), + [datetime(2008, 1, 1), + datetime(2008, 1, 2), + datetime(2008, 1, 3)]) + + def test_3(self): + eq_gen_range(dict(start=datetime(2008, 1, 5), + end=datetime(2008, 1, 6)), + []) + + def test_precision_finer_than_offset(self): + # GH 9907 + result1 = DatetimeIndex(start='2015-04-15 00:00:03', + end='2016-04-22 00:00:00', freq='Q') + result2 = DatetimeIndex(start='2015-04-15 00:00:03', + end='2015-06-22 00:00:04', freq='W') + expected1_list = ['2015-06-30 00:00:03', '2015-09-30 00:00:03', + '2015-12-31 00:00:03', '2016-03-31 00:00:03'] + expected2_list = ['2015-04-19 00:00:03', '2015-04-26 00:00:03', + '2015-05-03 00:00:03', '2015-05-10 00:00:03', + '2015-05-17 00:00:03', '2015-05-24 00:00:03', + '2015-05-31 00:00:03', '2015-06-07 00:00:03', + '2015-06-14 00:00:03', '2015-06-21 00:00:03'] + expected1 = DatetimeIndex(expected1_list, dtype='datetime64[ns]', + freq='Q-DEC', tz=None) + expected2 = DatetimeIndex(expected2_list, dtype='datetime64[ns]', + freq='W-SUN', tz=None) + self.assert_index_equal(result1, expected1) + self.assert_index_equal(result2, expected2) + + +class TestDateRange(tm.TestCase): + def setUp(self): + self.rng = bdate_range(START, END) + + def test_constructor(self): + bdate_range(START, END, freq=BDay()) + bdate_range(START, periods=20, freq=BDay()) + bdate_range(end=START, periods=20, freq=BDay()) + self.assertRaises(ValueError, date_range, '2011-1-1', '2012-1-1', 'B') + self.assertRaises(ValueError, bdate_range, '2011-1-1', '2012-1-1', 'B') + + def test_naive_aware_conflicts(self): + naive = bdate_range(START, END, freq=BDay(), tz=None) + aware = bdate_range(START, END, freq=BDay(), + tz="Asia/Hong_Kong") + assertRaisesRegexp(TypeError, "tz-naive.*tz-aware", naive.join, aware) + assertRaisesRegexp(TypeError, "tz-naive.*tz-aware", aware.join, naive) + + def test_cached_range(self): + DatetimeIndex._cached_range(START, END, offset=BDay()) + DatetimeIndex._cached_range(START, periods=20, offset=BDay()) + DatetimeIndex._cached_range(end=START, periods=20, offset=BDay()) + + assertRaisesRegexp(TypeError, "offset", DatetimeIndex._cached_range, + START, END) + + assertRaisesRegexp(TypeError, "specify period", + DatetimeIndex._cached_range, START, + offset=BDay()) + + assertRaisesRegexp(TypeError, "specify period", + DatetimeIndex._cached_range, end=END, + offset=BDay()) + + assertRaisesRegexp(TypeError, "start or end", + DatetimeIndex._cached_range, periods=20, + offset=BDay()) + + def test_cached_range_bug(self): + rng = date_range('2010-09-01 05:00:00', periods=50, + freq=DateOffset(hours=6)) + self.assertEqual(len(rng), 50) + self.assertEqual(rng[0], datetime(2010, 9, 1, 5)) + + def test_timezone_comparaison_bug(self): + start = Timestamp('20130220 10:00', tz='US/Eastern') + try: + date_range(start, periods=2, tz='US/Eastern') + except AssertionError: + self.fail() + + def test_timezone_comparaison_assert(self): + start = Timestamp('20130220 10:00', tz='US/Eastern') + self.assertRaises(AssertionError, date_range, start, periods=2, + tz='Europe/Berlin') + + def test_comparison(self): + d = self.rng[10] + + comp = self.rng > d + self.assertTrue(comp[11]) + self.assertFalse(comp[9]) + + def test_copy(self): + cp = self.rng.copy() + repr(cp) + self.assert_index_equal(cp, self.rng) + + def test_repr(self): + # only really care that it works + repr(self.rng) + + def test_getitem(self): + smaller = self.rng[:5] + exp = DatetimeIndex(self.rng.view(np.ndarray)[:5]) + self.assert_index_equal(smaller, exp) + + self.assertEqual(smaller.offset, self.rng.offset) + + sliced = self.rng[::5] + self.assertEqual(sliced.offset, BDay() * 5) + + fancy_indexed = self.rng[[4, 3, 2, 1, 0]] + self.assertEqual(len(fancy_indexed), 5) + tm.assertIsInstance(fancy_indexed, DatetimeIndex) + self.assertIsNone(fancy_indexed.freq) + + # 32-bit vs. 64-bit platforms + self.assertEqual(self.rng[4], self.rng[np.int_(4)]) + + def test_getitem_matplotlib_hackaround(self): + values = self.rng[:, None] + expected = self.rng.values[:, None] + self.assert_numpy_array_equal(values, expected) + + def test_shift(self): + shifted = self.rng.shift(5) + self.assertEqual(shifted[0], self.rng[5]) + self.assertEqual(shifted.offset, self.rng.offset) + + shifted = self.rng.shift(-5) + self.assertEqual(shifted[5], self.rng[0]) + self.assertEqual(shifted.offset, self.rng.offset) + + shifted = self.rng.shift(0) + self.assertEqual(shifted[0], self.rng[0]) + self.assertEqual(shifted.offset, self.rng.offset) + + rng = date_range(START, END, freq=BMonthEnd()) + shifted = rng.shift(1, freq=BDay()) + self.assertEqual(shifted[0], rng[0] + BDay()) + + def test_pickle_unpickle(self): + unpickled = self.round_trip_pickle(self.rng) + self.assertIsNotNone(unpickled.offset) + + def test_union(self): + # overlapping + left = self.rng[:10] + right = self.rng[5:10] + + the_union = left.union(right) + tm.assertIsInstance(the_union, DatetimeIndex) + + # non-overlapping, gap in middle + left = self.rng[:5] + right = self.rng[10:] + + the_union = left.union(right) + tm.assertIsInstance(the_union, Index) + + # non-overlapping, no gap + left = self.rng[:5] + right = self.rng[5:10] + + the_union = left.union(right) + tm.assertIsInstance(the_union, DatetimeIndex) + + # order does not matter + tm.assert_index_equal(right.union(left), the_union) + + # overlapping, but different offset + rng = date_range(START, END, freq=BMonthEnd()) + + the_union = self.rng.union(rng) + tm.assertIsInstance(the_union, DatetimeIndex) + + def test_outer_join(self): + # should just behave as union + + # overlapping + left = self.rng[:10] + right = self.rng[5:10] + + the_join = left.join(right, how='outer') + tm.assertIsInstance(the_join, DatetimeIndex) + + # non-overlapping, gap in middle + left = self.rng[:5] + right = self.rng[10:] + + the_join = left.join(right, how='outer') + tm.assertIsInstance(the_join, DatetimeIndex) + self.assertIsNone(the_join.freq) + + # non-overlapping, no gap + left = self.rng[:5] + right = self.rng[5:10] + + the_join = left.join(right, how='outer') + tm.assertIsInstance(the_join, DatetimeIndex) + + # overlapping, but different offset + rng = date_range(START, END, freq=BMonthEnd()) + + the_join = self.rng.join(rng, how='outer') + tm.assertIsInstance(the_join, DatetimeIndex) + self.assertIsNone(the_join.freq) + + def test_union_not_cacheable(self): + rng = date_range('1/1/2000', periods=50, freq=Minute()) + rng1 = rng[10:] + rng2 = rng[:25] + the_union = rng1.union(rng2) + self.assert_index_equal(the_union, rng) + + rng1 = rng[10:] + rng2 = rng[15:35] + the_union = rng1.union(rng2) + expected = rng[10:] + self.assert_index_equal(the_union, expected) + + def test_intersection(self): + rng = date_range('1/1/2000', periods=50, freq=Minute()) + rng1 = rng[10:] + rng2 = rng[:25] + the_int = rng1.intersection(rng2) + expected = rng[10:25] + self.assert_index_equal(the_int, expected) + tm.assertIsInstance(the_int, DatetimeIndex) + self.assertEqual(the_int.offset, rng.offset) + + the_int = rng1.intersection(rng2.view(DatetimeIndex)) + self.assert_index_equal(the_int, expected) + + # non-overlapping + the_int = rng[:10].intersection(rng[10:]) + expected = DatetimeIndex([]) + self.assert_index_equal(the_int, expected) + + def test_intersection_bug(self): + # GH #771 + a = bdate_range('11/30/2011', '12/31/2011') + b = bdate_range('12/10/2011', '12/20/2011') + result = a.intersection(b) + self.assert_index_equal(result, b) + + def test_summary(self): + self.rng.summary() + self.rng[2:2].summary() + + def test_summary_pytz(self): + tm._skip_if_no_pytz() + import pytz + bdate_range('1/1/2005', '1/1/2009', tz=pytz.utc).summary() + + def test_summary_dateutil(self): + tm._skip_if_no_dateutil() + import dateutil + bdate_range('1/1/2005', '1/1/2009', tz=dateutil.tz.tzutc()).summary() + + def test_misc(self): + end = datetime(2009, 5, 13) + dr = bdate_range(end=end, periods=20) + firstDate = end - 19 * BDay() + + assert len(dr) == 20 + assert dr[0] == firstDate + assert dr[-1] == end + + def test_date_parse_failure(self): + badly_formed_date = '2007/100/1' + + self.assertRaises(ValueError, Timestamp, badly_formed_date) + + self.assertRaises(ValueError, bdate_range, start=badly_formed_date, + periods=10) + self.assertRaises(ValueError, bdate_range, end=badly_formed_date, + periods=10) + self.assertRaises(ValueError, bdate_range, badly_formed_date, + badly_formed_date) + + def test_equals(self): + self.assertFalse(self.rng.equals(list(self.rng))) + + def test_identical(self): + t1 = self.rng.copy() + t2 = self.rng.copy() + self.assertTrue(t1.identical(t2)) + + # name + t1 = t1.rename('foo') + self.assertTrue(t1.equals(t2)) + self.assertFalse(t1.identical(t2)) + t2 = t2.rename('foo') + self.assertTrue(t1.identical(t2)) + + # freq + t2v = Index(t2.values) + self.assertTrue(t1.equals(t2v)) + self.assertFalse(t1.identical(t2v)) + + def test_daterange_bug_456(self): + # GH #456 + rng1 = bdate_range('12/5/2011', '12/5/2011') + rng2 = bdate_range('12/2/2011', '12/5/2011') + rng2.offset = BDay() + + result = rng1.union(rng2) + tm.assertIsInstance(result, DatetimeIndex) + + def test_error_with_zero_monthends(self): + self.assertRaises(ValueError, date_range, '1/1/2000', '1/1/2001', + freq=MonthEnd(0)) + + def test_range_bug(self): + # GH #770 + offset = DateOffset(months=3) + result = date_range("2011-1-1", "2012-1-31", freq=offset) + + start = datetime(2011, 1, 1) + exp_values = [start + i * offset for i in range(5)] + tm.assert_index_equal(result, DatetimeIndex(exp_values)) + + def test_range_tz_pytz(self): + # GH 2906 + tm._skip_if_no_pytz() + from pytz import timezone + + tz = timezone('US/Eastern') + start = tz.localize(datetime(2011, 1, 1)) + end = tz.localize(datetime(2011, 1, 3)) + + dr = date_range(start=start, periods=3) + self.assertEqual(dr.tz.zone, tz.zone) + self.assertEqual(dr[0], start) + self.assertEqual(dr[2], end) + + dr = date_range(end=end, periods=3) + self.assertEqual(dr.tz.zone, tz.zone) + self.assertEqual(dr[0], start) + self.assertEqual(dr[2], end) + + dr = date_range(start=start, end=end) + self.assertEqual(dr.tz.zone, tz.zone) + self.assertEqual(dr[0], start) + self.assertEqual(dr[2], end) + + def test_range_tz_dst_straddle_pytz(self): + + tm._skip_if_no_pytz() + from pytz import timezone + tz = timezone('US/Eastern') + dates = [(tz.localize(datetime(2014, 3, 6)), + tz.localize(datetime(2014, 3, 12))), + (tz.localize(datetime(2013, 11, 1)), + tz.localize(datetime(2013, 11, 6)))] + for (start, end) in dates: + dr = date_range(start, end, freq='D') + self.assertEqual(dr[0], start) + self.assertEqual(dr[-1], end) + self.assertEqual(np.all(dr.hour == 0), True) + + dr = date_range(start, end, freq='D', tz='US/Eastern') + self.assertEqual(dr[0], start) + self.assertEqual(dr[-1], end) + self.assertEqual(np.all(dr.hour == 0), True) + + dr = date_range(start.replace(tzinfo=None), end.replace( + tzinfo=None), freq='D', tz='US/Eastern') + self.assertEqual(dr[0], start) + self.assertEqual(dr[-1], end) + self.assertEqual(np.all(dr.hour == 0), True) + + def test_range_tz_dateutil(self): + # GH 2906 + tm._skip_if_no_dateutil() + # Use maybe_get_tz to fix filename in tz under dateutil. + from pandas.tslib import maybe_get_tz + tz = lambda x: maybe_get_tz('dateutil/' + x) + + start = datetime(2011, 1, 1, tzinfo=tz('US/Eastern')) + end = datetime(2011, 1, 3, tzinfo=tz('US/Eastern')) + + dr = date_range(start=start, periods=3) + self.assertTrue(dr.tz == tz('US/Eastern')) + self.assertTrue(dr[0] == start) + self.assertTrue(dr[2] == end) + + dr = date_range(end=end, periods=3) + self.assertTrue(dr.tz == tz('US/Eastern')) + self.assertTrue(dr[0] == start) + self.assertTrue(dr[2] == end) + + dr = date_range(start=start, end=end) + self.assertTrue(dr.tz == tz('US/Eastern')) + self.assertTrue(dr[0] == start) + self.assertTrue(dr[2] == end) + + def test_month_range_union_tz_pytz(self): + tm._skip_if_no_pytz() + from pytz import timezone + tz = timezone('US/Eastern') + + early_start = datetime(2011, 1, 1) + early_end = datetime(2011, 3, 1) + + late_start = datetime(2011, 3, 1) + late_end = datetime(2011, 5, 1) + + early_dr = date_range(start=early_start, end=early_end, tz=tz, + freq=MonthEnd()) + late_dr = date_range(start=late_start, end=late_end, tz=tz, + freq=MonthEnd()) + + early_dr.union(late_dr) + + def test_month_range_union_tz_dateutil(self): + tm._skip_if_windows_python_3() + tm._skip_if_no_dateutil() + from pandas.tslib import _dateutil_gettz as timezone + tz = timezone('US/Eastern') + + early_start = datetime(2011, 1, 1) + early_end = datetime(2011, 3, 1) + + late_start = datetime(2011, 3, 1) + late_end = datetime(2011, 5, 1) + + early_dr = date_range(start=early_start, end=early_end, tz=tz, + freq=MonthEnd()) + late_dr = date_range(start=late_start, end=late_end, tz=tz, + freq=MonthEnd()) + + early_dr.union(late_dr) + + def test_range_closed(self): + begin = datetime(2011, 1, 1) + end = datetime(2014, 1, 1) + + for freq in ["1D", "3D", "2M", "7W", "3H", "A"]: + closed = date_range(begin, end, closed=None, freq=freq) + left = date_range(begin, end, closed="left", freq=freq) + right = date_range(begin, end, closed="right", freq=freq) + expected_left = left + expected_right = right + + if end == closed[-1]: + expected_left = closed[:-1] + if begin == closed[0]: + expected_right = closed[1:] + + self.assert_index_equal(expected_left, left) + self.assert_index_equal(expected_right, right) + + def test_range_closed_with_tz_aware_start_end(self): + # GH12409, GH12684 + begin = Timestamp('2011/1/1', tz='US/Eastern') + end = Timestamp('2014/1/1', tz='US/Eastern') + + for freq in ["1D", "3D", "2M", "7W", "3H", "A"]: + closed = date_range(begin, end, closed=None, freq=freq) + left = date_range(begin, end, closed="left", freq=freq) + right = date_range(begin, end, closed="right", freq=freq) + expected_left = left + expected_right = right + + if end == closed[-1]: + expected_left = closed[:-1] + if begin == closed[0]: + expected_right = closed[1:] + + self.assert_index_equal(expected_left, left) + self.assert_index_equal(expected_right, right) + + begin = Timestamp('2011/1/1') + end = Timestamp('2014/1/1') + begintz = Timestamp('2011/1/1', tz='US/Eastern') + endtz = Timestamp('2014/1/1', tz='US/Eastern') + + for freq in ["1D", "3D", "2M", "7W", "3H", "A"]: + closed = date_range(begin, end, closed=None, freq=freq, + tz='US/Eastern') + left = date_range(begin, end, closed="left", freq=freq, + tz='US/Eastern') + right = date_range(begin, end, closed="right", freq=freq, + tz='US/Eastern') + expected_left = left + expected_right = right + + if endtz == closed[-1]: + expected_left = closed[:-1] + if begintz == closed[0]: + expected_right = closed[1:] + + self.assert_index_equal(expected_left, left) + self.assert_index_equal(expected_right, right) + + def test_range_closed_boundary(self): + # GH 11804 + for closed in ['right', 'left', None]: + right_boundary = date_range('2015-09-12', '2015-12-01', + freq='QS-MAR', closed=closed) + left_boundary = date_range('2015-09-01', '2015-09-12', + freq='QS-MAR', closed=closed) + both_boundary = date_range('2015-09-01', '2015-12-01', + freq='QS-MAR', closed=closed) + expected_right = expected_left = expected_both = both_boundary + + if closed == 'right': + expected_left = both_boundary[1:] + if closed == 'left': + expected_right = both_boundary[:-1] + if closed is None: + expected_right = both_boundary[1:] + expected_left = both_boundary[:-1] + + self.assert_index_equal(right_boundary, expected_right) + self.assert_index_equal(left_boundary, expected_left) + self.assert_index_equal(both_boundary, expected_both) + + def test_years_only(self): + # GH 6961 + dr = date_range('2014', '2015', freq='M') + self.assertEqual(dr[0], datetime(2014, 1, 31)) + self.assertEqual(dr[-1], datetime(2014, 12, 31)) + + def test_freq_divides_end_in_nanos(self): + # GH 10885 + result_1 = date_range('2005-01-12 10:00', '2005-01-12 16:00', + freq='345min') + result_2 = date_range('2005-01-13 10:00', '2005-01-13 16:00', + freq='345min') + expected_1 = DatetimeIndex(['2005-01-12 10:00:00', + '2005-01-12 15:45:00'], + dtype='datetime64[ns]', freq='345T', + tz=None) + expected_2 = DatetimeIndex(['2005-01-13 10:00:00', + '2005-01-13 15:45:00'], + dtype='datetime64[ns]', freq='345T', + tz=None) + self.assert_index_equal(result_1, expected_1) + self.assert_index_equal(result_2, expected_2) + + +class TestCustomDateRange(tm.TestCase): + def setUp(self): + self.rng = cdate_range(START, END) + + def test_constructor(self): + cdate_range(START, END, freq=CDay()) + cdate_range(START, periods=20, freq=CDay()) + cdate_range(end=START, periods=20, freq=CDay()) + self.assertRaises(ValueError, date_range, '2011-1-1', '2012-1-1', 'C') + self.assertRaises(ValueError, cdate_range, '2011-1-1', '2012-1-1', 'C') + + def test_cached_range(self): + DatetimeIndex._cached_range(START, END, offset=CDay()) + DatetimeIndex._cached_range(START, periods=20, + offset=CDay()) + DatetimeIndex._cached_range(end=START, periods=20, + offset=CDay()) + + self.assertRaises(Exception, DatetimeIndex._cached_range, START, END) + + self.assertRaises(Exception, DatetimeIndex._cached_range, START, + freq=CDay()) + + self.assertRaises(Exception, DatetimeIndex._cached_range, end=END, + freq=CDay()) + + self.assertRaises(Exception, DatetimeIndex._cached_range, periods=20, + freq=CDay()) + + def test_comparison(self): + d = self.rng[10] + + comp = self.rng > d + self.assertTrue(comp[11]) + self.assertFalse(comp[9]) + + def test_copy(self): + cp = self.rng.copy() + repr(cp) + self.assert_index_equal(cp, self.rng) + + def test_repr(self): + # only really care that it works + repr(self.rng) + + def test_getitem(self): + smaller = self.rng[:5] + exp = DatetimeIndex(self.rng.view(np.ndarray)[:5]) + self.assert_index_equal(smaller, exp) + self.assertEqual(smaller.offset, self.rng.offset) + + sliced = self.rng[::5] + self.assertEqual(sliced.offset, CDay() * 5) + + fancy_indexed = self.rng[[4, 3, 2, 1, 0]] + self.assertEqual(len(fancy_indexed), 5) + tm.assertIsInstance(fancy_indexed, DatetimeIndex) + self.assertIsNone(fancy_indexed.freq) + + # 32-bit vs. 64-bit platforms + self.assertEqual(self.rng[4], self.rng[np.int_(4)]) + + def test_getitem_matplotlib_hackaround(self): + values = self.rng[:, None] + expected = self.rng.values[:, None] + self.assert_numpy_array_equal(values, expected) + + def test_shift(self): + + shifted = self.rng.shift(5) + self.assertEqual(shifted[0], self.rng[5]) + self.assertEqual(shifted.offset, self.rng.offset) + + shifted = self.rng.shift(-5) + self.assertEqual(shifted[5], self.rng[0]) + self.assertEqual(shifted.offset, self.rng.offset) + + shifted = self.rng.shift(0) + self.assertEqual(shifted[0], self.rng[0]) + self.assertEqual(shifted.offset, self.rng.offset) + + with tm.assert_produces_warning(com.PerformanceWarning): + rng = date_range(START, END, freq=BMonthEnd()) + shifted = rng.shift(1, freq=CDay()) + self.assertEqual(shifted[0], rng[0] + CDay()) + + def test_pickle_unpickle(self): + unpickled = self.round_trip_pickle(self.rng) + self.assertIsNotNone(unpickled.offset) + + def test_union(self): + # overlapping + left = self.rng[:10] + right = self.rng[5:10] + + the_union = left.union(right) + tm.assertIsInstance(the_union, DatetimeIndex) + + # non-overlapping, gap in middle + left = self.rng[:5] + right = self.rng[10:] + + the_union = left.union(right) + tm.assertIsInstance(the_union, Index) + + # non-overlapping, no gap + left = self.rng[:5] + right = self.rng[5:10] + + the_union = left.union(right) + tm.assertIsInstance(the_union, DatetimeIndex) + + # order does not matter + self.assert_index_equal(right.union(left), the_union) + + # overlapping, but different offset + rng = date_range(START, END, freq=BMonthEnd()) + + the_union = self.rng.union(rng) + tm.assertIsInstance(the_union, DatetimeIndex) + + def test_outer_join(self): + # should just behave as union + + # overlapping + left = self.rng[:10] + right = self.rng[5:10] + + the_join = left.join(right, how='outer') + tm.assertIsInstance(the_join, DatetimeIndex) + + # non-overlapping, gap in middle + left = self.rng[:5] + right = self.rng[10:] + + the_join = left.join(right, how='outer') + tm.assertIsInstance(the_join, DatetimeIndex) + self.assertIsNone(the_join.freq) + + # non-overlapping, no gap + left = self.rng[:5] + right = self.rng[5:10] + + the_join = left.join(right, how='outer') + tm.assertIsInstance(the_join, DatetimeIndex) + + # overlapping, but different offset + rng = date_range(START, END, freq=BMonthEnd()) + + the_join = self.rng.join(rng, how='outer') + tm.assertIsInstance(the_join, DatetimeIndex) + self.assertIsNone(the_join.freq) + + def test_intersection_bug(self): + # GH #771 + a = cdate_range('11/30/2011', '12/31/2011') + b = cdate_range('12/10/2011', '12/20/2011') + result = a.intersection(b) + self.assert_index_equal(result, b) + + def test_summary(self): + self.rng.summary() + self.rng[2:2].summary() + + def test_summary_pytz(self): + tm._skip_if_no_pytz() + import pytz + cdate_range('1/1/2005', '1/1/2009', tz=pytz.utc).summary() + + def test_summary_dateutil(self): + tm._skip_if_no_dateutil() + import dateutil + cdate_range('1/1/2005', '1/1/2009', tz=dateutil.tz.tzutc()).summary() + + def test_misc(self): + end = datetime(2009, 5, 13) + dr = cdate_range(end=end, periods=20) + firstDate = end - 19 * CDay() + + assert len(dr) == 20 + assert dr[0] == firstDate + assert dr[-1] == end + + def test_date_parse_failure(self): + badly_formed_date = '2007/100/1' + + self.assertRaises(ValueError, Timestamp, badly_formed_date) + + self.assertRaises(ValueError, cdate_range, start=badly_formed_date, + periods=10) + self.assertRaises(ValueError, cdate_range, end=badly_formed_date, + periods=10) + self.assertRaises(ValueError, cdate_range, badly_formed_date, + badly_formed_date) + + def test_equals(self): + self.assertFalse(self.rng.equals(list(self.rng))) + + def test_daterange_bug_456(self): + # GH #456 + rng1 = cdate_range('12/5/2011', '12/5/2011') + rng2 = cdate_range('12/2/2011', '12/5/2011') + rng2.offset = CDay() + + result = rng1.union(rng2) + tm.assertIsInstance(result, DatetimeIndex) + + def test_cdaterange(self): + rng = cdate_range('2013-05-01', periods=3) + xp = DatetimeIndex(['2013-05-01', '2013-05-02', '2013-05-03']) + self.assert_index_equal(xp, rng) + + def test_cdaterange_weekmask(self): + rng = cdate_range('2013-05-01', periods=3, + weekmask='Sun Mon Tue Wed Thu') + xp = DatetimeIndex(['2013-05-01', '2013-05-02', '2013-05-05']) + self.assert_index_equal(xp, rng) + + def test_cdaterange_holidays(self): + rng = cdate_range('2013-05-01', periods=3, holidays=['2013-05-01']) + xp = DatetimeIndex(['2013-05-02', '2013-05-03', '2013-05-06']) + self.assert_index_equal(xp, rng) + + def test_cdaterange_weekmask_and_holidays(self): + rng = cdate_range('2013-05-01', periods=3, + weekmask='Sun Mon Tue Wed Thu', + holidays=['2013-05-01']) + xp = DatetimeIndex(['2013-05-02', '2013-05-05', '2013-05-06']) + self.assert_index_equal(xp, rng) diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index a46980a0f742a..c7cdcd9318a0e 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -8,7 +8,7 @@ from pandas.core.common import PerformanceWarning from pandas import (DatetimeIndex, PeriodIndex, Series, Timestamp, Timedelta, date_range, TimedeltaIndex, _np_version_under1p10, Index, - datetime, Float64Index) + datetime, Float64Index, offsets) from pandas.tests.test_base import Ops @@ -1070,3 +1070,18 @@ def test_datetime64_with_DateOffset(self): assert_func(klass([x + op for x in s]), s + op) assert_func(klass([x - op for x in s]), s - op) assert_func(klass([op + x for x in s]), op + s) + + +class TestTslib(tm.TestCase): + + def test_shift_months(self): + s = DatetimeIndex([Timestamp('2000-01-05 00:15:00'), Timestamp( + '2000-01-31 00:23:00'), Timestamp('2000-01-01'), Timestamp( + '2000-02-29'), Timestamp('2000-12-31')]) + for years in [-1, 0, 1]: + for months in [-2, 0, 2]: + actual = DatetimeIndex(tslib.shift_months(s.asi8, years * 12 + + months)) + expected = DatetimeIndex([x + offsets.DateOffset( + years=years, months=months) for x in s]) + tm.assert_index_equal(actual, expected) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 841d0be605058..bf1f82b90d5d6 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -1,22 +1,26 @@ """ test to_datetime """ -import nose - import sys -import calendar +import nose import locale -from datetime import datetime - +import calendar import numpy as np -from pandas.types.common import is_datetime64_ns_dtype -from pandas import (isnull, to_datetime, Timestamp, Series, DataFrame, - Index, DatetimeIndex, NaT, date_range, bdate_range) -from pandas import tslib -from pandas.compat import lmap +from datetime import datetime, date, time +from distutils.version import LooseVersion + import pandas as pd +from pandas import tslib from pandas.tseries import tools +from pandas.tseries.tools import normalize_date +from pandas.tseries.util import pivot_annual, isleapyear +from pandas.compat import lmap +from pandas.compat.numpy import np_array_datetime64_compat +from pandas.types.common import is_datetime64_ns_dtype from pandas.util import testing as tm -from pandas.util.testing import assert_series_equal +from pandas.util.testing import assert_series_equal, _skip_if_has_locale +from pandas import (isnull, to_datetime, Timestamp, Series, DataFrame, + Index, DatetimeIndex, NaT, date_range, bdate_range, + compat, lib) class TimeConversionFormats(tm.TestCase): @@ -1017,3 +1021,603 @@ def test_day_not_in_month_ignore(self): '2015-02-32', errors='ignore', format="%Y-%m-%d"), '2015-02-32') self.assertEqual(to_datetime( '2015-04-31', errors='ignore', format="%Y-%m-%d"), '2015-04-31') + + +class TestDatetimeParsingWrappers(tm.TestCase): + def test_does_not_convert_mixed_integer(self): + bad_date_strings = ('-50000', '999', '123.1234', 'm', 'T') + + for bad_date_string in bad_date_strings: + self.assertFalse(tslib._does_string_look_like_datetime( + bad_date_string)) + + good_date_strings = ('2012-01-01', + '01/01/2012', + 'Mon Sep 16, 2013', + '01012012', + '0101', + '1-1', ) + + for good_date_string in good_date_strings: + self.assertTrue(tslib._does_string_look_like_datetime( + good_date_string)) + + def test_parsers(self): + + # https://github.com/dateutil/dateutil/issues/217 + import dateutil + yearfirst = dateutil.__version__ >= LooseVersion('2.5.0') + + cases = {'2011-01-01': datetime(2011, 1, 1), + '2Q2005': datetime(2005, 4, 1), + '2Q05': datetime(2005, 4, 1), + '2005Q1': datetime(2005, 1, 1), + '05Q1': datetime(2005, 1, 1), + '2011Q3': datetime(2011, 7, 1), + '11Q3': datetime(2011, 7, 1), + '3Q2011': datetime(2011, 7, 1), + '3Q11': datetime(2011, 7, 1), + + # quarterly without space + '2000Q4': datetime(2000, 10, 1), + '00Q4': datetime(2000, 10, 1), + '4Q2000': datetime(2000, 10, 1), + '4Q00': datetime(2000, 10, 1), + '2000q4': datetime(2000, 10, 1), + '2000-Q4': datetime(2000, 10, 1), + '00-Q4': datetime(2000, 10, 1), + '4Q-2000': datetime(2000, 10, 1), + '4Q-00': datetime(2000, 10, 1), + '00q4': datetime(2000, 10, 1), + '2005': datetime(2005, 1, 1), + '2005-11': datetime(2005, 11, 1), + '2005 11': datetime(2005, 11, 1), + '11-2005': datetime(2005, 11, 1), + '11 2005': datetime(2005, 11, 1), + '200511': datetime(2020, 5, 11), + '20051109': datetime(2005, 11, 9), + '20051109 10:15': datetime(2005, 11, 9, 10, 15), + '20051109 08H': datetime(2005, 11, 9, 8, 0), + '2005-11-09 10:15': datetime(2005, 11, 9, 10, 15), + '2005-11-09 08H': datetime(2005, 11, 9, 8, 0), + '2005/11/09 10:15': datetime(2005, 11, 9, 10, 15), + '2005/11/09 08H': datetime(2005, 11, 9, 8, 0), + "Thu Sep 25 10:36:28 2003": datetime(2003, 9, 25, 10, + 36, 28), + "Thu Sep 25 2003": datetime(2003, 9, 25), + "Sep 25 2003": datetime(2003, 9, 25), + "January 1 2014": datetime(2014, 1, 1), + + # GH 10537 + '2014-06': datetime(2014, 6, 1), + '06-2014': datetime(2014, 6, 1), + '2014-6': datetime(2014, 6, 1), + '6-2014': datetime(2014, 6, 1), + + '20010101 12': datetime(2001, 1, 1, 12), + '20010101 1234': datetime(2001, 1, 1, 12, 34), + '20010101 123456': datetime(2001, 1, 1, 12, 34, 56), + } + + for date_str, expected in compat.iteritems(cases): + result1, _, _ = tools.parse_time_string(date_str, + yearfirst=yearfirst) + result2 = to_datetime(date_str, yearfirst=yearfirst) + result3 = to_datetime([date_str], yearfirst=yearfirst) + # result5 is used below + result4 = to_datetime(np.array([date_str], dtype=object), + yearfirst=yearfirst) + result6 = DatetimeIndex([date_str], yearfirst=yearfirst) + # result7 is used below + result8 = DatetimeIndex(Index([date_str]), yearfirst=yearfirst) + result9 = DatetimeIndex(Series([date_str]), yearfirst=yearfirst) + + for res in [result1, result2]: + self.assertEqual(res, expected) + for res in [result3, result4, result6, result8, result9]: + exp = DatetimeIndex([pd.Timestamp(expected)]) + tm.assert_index_equal(res, exp) + + # these really need to have yearfist, but we don't support + if not yearfirst: + result5 = Timestamp(date_str) + self.assertEqual(result5, expected) + result7 = date_range(date_str, freq='S', periods=1, + yearfirst=yearfirst) + self.assertEqual(result7, expected) + + # NaT + result1, _, _ = tools.parse_time_string('NaT') + result2 = to_datetime('NaT') + result3 = Timestamp('NaT') + result4 = DatetimeIndex(['NaT'])[0] + self.assertTrue(result1 is tslib.NaT) + self.assertTrue(result1 is tslib.NaT) + self.assertTrue(result1 is tslib.NaT) + self.assertTrue(result1 is tslib.NaT) + + def test_parsers_quarter_invalid(self): + + cases = ['2Q 2005', '2Q-200A', '2Q-200', '22Q2005', '6Q-20', '2Q200.'] + for case in cases: + self.assertRaises(ValueError, tools.parse_time_string, case) + + def test_parsers_dayfirst_yearfirst(self): + tm._skip_if_no_dateutil() + + # OK + # 2.5.1 10-11-12 [dayfirst=0, yearfirst=0] -> 2012-10-11 00:00:00 + # 2.5.2 10-11-12 [dayfirst=0, yearfirst=1] -> 2012-10-11 00:00:00 + # 2.5.3 10-11-12 [dayfirst=0, yearfirst=0] -> 2012-10-11 00:00:00 + + # OK + # 2.5.1 10-11-12 [dayfirst=0, yearfirst=1] -> 2010-11-12 00:00:00 + # 2.5.2 10-11-12 [dayfirst=0, yearfirst=1] -> 2010-11-12 00:00:00 + # 2.5.3 10-11-12 [dayfirst=0, yearfirst=1] -> 2010-11-12 00:00:00 + + # bug fix in 2.5.2 + # 2.5.1 10-11-12 [dayfirst=1, yearfirst=1] -> 2010-11-12 00:00:00 + # 2.5.2 10-11-12 [dayfirst=1, yearfirst=1] -> 2010-12-11 00:00:00 + # 2.5.3 10-11-12 [dayfirst=1, yearfirst=1] -> 2010-12-11 00:00:00 + + # OK + # 2.5.1 10-11-12 [dayfirst=1, yearfirst=0] -> 2012-11-10 00:00:00 + # 2.5.2 10-11-12 [dayfirst=1, yearfirst=0] -> 2012-11-10 00:00:00 + # 2.5.3 10-11-12 [dayfirst=1, yearfirst=0] -> 2012-11-10 00:00:00 + + # OK + # 2.5.1 20/12/21 [dayfirst=0, yearfirst=0] -> 2021-12-20 00:00:00 + # 2.5.2 20/12/21 [dayfirst=0, yearfirst=0] -> 2021-12-20 00:00:00 + # 2.5.3 20/12/21 [dayfirst=0, yearfirst=0] -> 2021-12-20 00:00:00 + + # OK + # 2.5.1 20/12/21 [dayfirst=0, yearfirst=1] -> 2020-12-21 00:00:00 + # 2.5.2 20/12/21 [dayfirst=0, yearfirst=1] -> 2020-12-21 00:00:00 + # 2.5.3 20/12/21 [dayfirst=0, yearfirst=1] -> 2020-12-21 00:00:00 + + # revert of bug in 2.5.2 + # 2.5.1 20/12/21 [dayfirst=1, yearfirst=1] -> 2020-12-21 00:00:00 + # 2.5.2 20/12/21 [dayfirst=1, yearfirst=1] -> month must be in 1..12 + # 2.5.3 20/12/21 [dayfirst=1, yearfirst=1] -> 2020-12-21 00:00:00 + + # OK + # 2.5.1 20/12/21 [dayfirst=1, yearfirst=0] -> 2021-12-20 00:00:00 + # 2.5.2 20/12/21 [dayfirst=1, yearfirst=0] -> 2021-12-20 00:00:00 + # 2.5.3 20/12/21 [dayfirst=1, yearfirst=0] -> 2021-12-20 00:00:00 + + import dateutil + is_lt_253 = dateutil.__version__ < LooseVersion('2.5.3') + + # str : dayfirst, yearfirst, expected + cases = {'10-11-12': [(False, False, + datetime(2012, 10, 11)), + (True, False, + datetime(2012, 11, 10)), + (False, True, + datetime(2010, 11, 12)), + (True, True, + datetime(2010, 12, 11))], + '20/12/21': [(False, False, + datetime(2021, 12, 20)), + (True, False, + datetime(2021, 12, 20)), + (False, True, + datetime(2020, 12, 21)), + (True, True, + datetime(2020, 12, 21))]} + + from dateutil.parser import parse + for date_str, values in compat.iteritems(cases): + for dayfirst, yearfirst, expected in values: + + # odd comparisons across version + # let's just skip + if dayfirst and yearfirst and is_lt_253: + continue + + # compare with dateutil result + dateutil_result = parse(date_str, dayfirst=dayfirst, + yearfirst=yearfirst) + self.assertEqual(dateutil_result, expected) + + result1, _, _ = tools.parse_time_string(date_str, + dayfirst=dayfirst, + yearfirst=yearfirst) + + # we don't support dayfirst/yearfirst here: + if not dayfirst and not yearfirst: + result2 = Timestamp(date_str) + self.assertEqual(result2, expected) + + result3 = to_datetime(date_str, dayfirst=dayfirst, + yearfirst=yearfirst) + + result4 = DatetimeIndex([date_str], dayfirst=dayfirst, + yearfirst=yearfirst)[0] + + self.assertEqual(result1, expected) + self.assertEqual(result3, expected) + self.assertEqual(result4, expected) + + def test_parsers_timestring(self): + tm._skip_if_no_dateutil() + from dateutil.parser import parse + + # must be the same as dateutil result + cases = {'10:15': (parse('10:15'), datetime(1, 1, 1, 10, 15)), + '9:05': (parse('9:05'), datetime(1, 1, 1, 9, 5))} + + for date_str, (exp_now, exp_def) in compat.iteritems(cases): + result1, _, _ = tools.parse_time_string(date_str) + result2 = to_datetime(date_str) + result3 = to_datetime([date_str]) + result4 = Timestamp(date_str) + result5 = DatetimeIndex([date_str])[0] + # parse time string return time string based on default date + # others are not, and can't be changed because it is used in + # time series plot + self.assertEqual(result1, exp_def) + self.assertEqual(result2, exp_now) + self.assertEqual(result3, exp_now) + self.assertEqual(result4, exp_now) + self.assertEqual(result5, exp_now) + + def test_parsers_time(self): + # GH11818 + _skip_if_has_locale() + strings = ["14:15", "1415", "2:15pm", "0215pm", "14:15:00", "141500", + "2:15:00pm", "021500pm", time(14, 15)] + expected = time(14, 15) + + for time_string in strings: + self.assertEqual(tools.to_time(time_string), expected) + + new_string = "14.15" + self.assertRaises(ValueError, tools.to_time, new_string) + self.assertEqual(tools.to_time(new_string, format="%H.%M"), expected) + + arg = ["14:15", "20:20"] + expected_arr = [time(14, 15), time(20, 20)] + self.assertEqual(tools.to_time(arg), expected_arr) + self.assertEqual(tools.to_time(arg, format="%H:%M"), expected_arr) + self.assertEqual(tools.to_time(arg, infer_time_format=True), + expected_arr) + self.assertEqual(tools.to_time(arg, format="%I:%M%p", errors="coerce"), + [None, None]) + + res = tools.to_time(arg, format="%I:%M%p", errors="ignore") + self.assert_numpy_array_equal(res, np.array(arg, dtype=np.object_)) + + with tm.assertRaises(ValueError): + tools.to_time(arg, format="%I:%M%p", errors="raise") + + self.assert_series_equal(tools.to_time(Series(arg, name="test")), + Series(expected_arr, name="test")) + + res = tools.to_time(np.array(arg)) + self.assertIsInstance(res, list) + self.assert_equal(res, expected_arr) + + def test_parsers_monthfreq(self): + cases = {'201101': datetime(2011, 1, 1, 0, 0), + '200005': datetime(2000, 5, 1, 0, 0)} + + for date_str, expected in compat.iteritems(cases): + result1, _, _ = tools.parse_time_string(date_str, freq='M') + self.assertEqual(result1, expected) + + def test_parsers_quarterly_with_freq(self): + msg = ('Incorrect quarterly string is given, quarter ' + 'must be between 1 and 4: 2013Q5') + with tm.assertRaisesRegexp(tslib.DateParseError, msg): + tools.parse_time_string('2013Q5') + + # GH 5418 + msg = ('Unable to retrieve month information from given freq: ' + 'INVLD-L-DEC-SAT') + with tm.assertRaisesRegexp(tslib.DateParseError, msg): + tools.parse_time_string('2013Q1', freq='INVLD-L-DEC-SAT') + + cases = {('2013Q2', None): datetime(2013, 4, 1), + ('2013Q2', 'A-APR'): datetime(2012, 8, 1), + ('2013-Q2', 'A-DEC'): datetime(2013, 4, 1)} + + for (date_str, freq), exp in compat.iteritems(cases): + result, _, _ = tools.parse_time_string(date_str, freq=freq) + self.assertEqual(result, exp) + + def test_parsers_timezone_minute_offsets_roundtrip(self): + # GH11708 + base = to_datetime("2013-01-01 00:00:00") + dt_strings = [ + ('2013-01-01 05:45+0545', + "Asia/Katmandu", + "Timestamp('2013-01-01 05:45:00+0545', tz='Asia/Katmandu')"), + ('2013-01-01 05:30+0530', + "Asia/Kolkata", + "Timestamp('2013-01-01 05:30:00+0530', tz='Asia/Kolkata')") + ] + + for dt_string, tz, dt_string_repr in dt_strings: + dt_time = to_datetime(dt_string) + self.assertEqual(base, dt_time) + converted_time = dt_time.tz_localize('UTC').tz_convert(tz) + self.assertEqual(dt_string_repr, repr(converted_time)) + + def test_parsers_iso8601(self): + # GH 12060 + # test only the iso parser - flexibility to different + # separators and leadings 0s + # Timestamp construction falls back to dateutil + cases = {'2011-01-02': datetime(2011, 1, 2), + '2011-1-2': datetime(2011, 1, 2), + '2011-01': datetime(2011, 1, 1), + '2011-1': datetime(2011, 1, 1), + '2011 01 02': datetime(2011, 1, 2), + '2011.01.02': datetime(2011, 1, 2), + '2011/01/02': datetime(2011, 1, 2), + '2011\\01\\02': datetime(2011, 1, 2), + '2013-01-01 05:30:00': datetime(2013, 1, 1, 5, 30), + '2013-1-1 5:30:00': datetime(2013, 1, 1, 5, 30)} + for date_str, exp in compat.iteritems(cases): + actual = tslib._test_parse_iso8601(date_str) + self.assertEqual(actual, exp) + + # seperators must all match - YYYYMM not valid + invalid_cases = ['2011-01/02', '2011^11^11', + '201401', '201111', '200101', + # mixed separated and unseparated + '2005-0101', '200501-01', + '20010101 12:3456', '20010101 1234:56', + # HHMMSS must have two digits in each component + # if unseparated + '20010101 1', '20010101 123', '20010101 12345', + '20010101 12345Z', + # wrong separator for HHMMSS + '2001-01-01 12-34-56'] + for date_str in invalid_cases: + with tm.assertRaises(ValueError): + tslib._test_parse_iso8601(date_str) + # If no ValueError raised, let me know which case failed. + raise Exception(date_str) + + +class TestTsUtil(tm.TestCase): + + def test_try_parse_dates(self): + from dateutil.parser import parse + arr = np.array(['5/1/2000', '6/1/2000', '7/1/2000'], dtype=object) + + result = lib.try_parse_dates(arr, dayfirst=True) + expected = [parse(d, dayfirst=True) for d in arr] + self.assertTrue(np.array_equal(result, expected)) + + +class TestArrayToDatetime(tm.TestCase): + def test_parsing_valid_dates(self): + arr = np.array(['01-01-2013', '01-02-2013'], dtype=object) + self.assert_numpy_array_equal( + tslib.array_to_datetime(arr), + np_array_datetime64_compat( + [ + '2013-01-01T00:00:00.000000000-0000', + '2013-01-02T00:00:00.000000000-0000' + ], + dtype='M8[ns]' + ) + ) + + arr = np.array(['Mon Sep 16 2013', 'Tue Sep 17 2013'], dtype=object) + self.assert_numpy_array_equal( + tslib.array_to_datetime(arr), + np_array_datetime64_compat( + [ + '2013-09-16T00:00:00.000000000-0000', + '2013-09-17T00:00:00.000000000-0000' + ], + dtype='M8[ns]' + ) + ) + + def test_parsing_timezone_offsets(self): + # All of these datetime strings with offsets are equivalent + # to the same datetime after the timezone offset is added + dt_strings = [ + '01-01-2013 08:00:00+08:00', + '2013-01-01T08:00:00.000000000+0800', + '2012-12-31T16:00:00.000000000-0800', + '12-31-2012 23:00:00-01:00' + ] + + expected_output = tslib.array_to_datetime(np.array( + ['01-01-2013 00:00:00'], dtype=object)) + + for dt_string in dt_strings: + self.assert_numpy_array_equal( + tslib.array_to_datetime( + np.array([dt_string], dtype=object) + ), + expected_output + ) + + def test_number_looking_strings_not_into_datetime(self): + # #4601 + # These strings don't look like datetimes so they shouldn't be + # attempted to be converted + arr = np.array(['-352.737091', '183.575577'], dtype=object) + self.assert_numpy_array_equal( + tslib.array_to_datetime(arr, errors='ignore'), arr) + + arr = np.array(['1', '2', '3', '4', '5'], dtype=object) + self.assert_numpy_array_equal( + tslib.array_to_datetime(arr, errors='ignore'), arr) + + def test_coercing_dates_outside_of_datetime64_ns_bounds(self): + invalid_dates = [ + date(1000, 1, 1), + datetime(1000, 1, 1), + '1000-01-01', + 'Jan 1, 1000', + np.datetime64('1000-01-01'), + ] + + for invalid_date in invalid_dates: + self.assertRaises(ValueError, + tslib.array_to_datetime, + np.array( + [invalid_date], dtype='object'), + errors='raise', ) + self.assert_numpy_array_equal( + tslib.array_to_datetime( + np.array([invalid_date], dtype='object'), + errors='coerce'), + np.array([tslib.iNaT], dtype='M8[ns]') + ) + + arr = np.array(['1/1/1000', '1/1/2000'], dtype=object) + self.assert_numpy_array_equal( + tslib.array_to_datetime(arr, errors='coerce'), + np_array_datetime64_compat( + [ + tslib.iNaT, + '2000-01-01T00:00:00.000000000-0000' + ], + dtype='M8[ns]' + ) + ) + + def test_coerce_of_invalid_datetimes(self): + arr = np.array(['01-01-2013', 'not_a_date', '1'], dtype=object) + + # Without coercing, the presence of any invalid dates prevents + # any values from being converted + self.assert_numpy_array_equal( + tslib.array_to_datetime(arr, errors='ignore'), arr) + + # With coercing, the invalid dates becomes iNaT + self.assert_numpy_array_equal( + tslib.array_to_datetime(arr, errors='coerce'), + np_array_datetime64_compat( + [ + '2013-01-01T00:00:00.000000000-0000', + tslib.iNaT, + tslib.iNaT + ], + dtype='M8[ns]' + ) + ) + + +class TestPivotAnnual(tm.TestCase): + """ + New pandas of scikits.timeseries pivot_annual + """ + + def test_daily(self): + rng = date_range('1/1/2000', '12/31/2004', freq='D') + ts = Series(np.random.randn(len(rng)), index=rng) + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + annual = pivot_annual(ts, 'D') + + doy = ts.index.dayofyear + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + doy[(~isleapyear(ts.index.year)) & (doy >= 60)] += 1 + + for i in range(1, 367): + subset = ts[doy == i] + subset.index = [x.year for x in subset.index] + + result = annual[i].dropna() + tm.assert_series_equal(result, subset, check_names=False) + self.assertEqual(result.name, i) + + # check leap days + leaps = ts[(ts.index.month == 2) & (ts.index.day == 29)] + day = leaps.index.dayofyear[0] + leaps.index = leaps.index.year + leaps.name = 60 + tm.assert_series_equal(annual[day].dropna(), leaps) + + def test_hourly(self): + rng_hourly = date_range('1/1/1994', periods=(18 * 8760 + 4 * 24), + freq='H') + data_hourly = np.random.randint(100, 350, rng_hourly.size) + ts_hourly = Series(data_hourly, index=rng_hourly) + + grouped = ts_hourly.groupby(ts_hourly.index.year) + hoy = grouped.apply(lambda x: x.reset_index(drop=True)) + hoy = hoy.index.droplevel(0).values + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + hoy[~isleapyear(ts_hourly.index.year) & (hoy >= 1416)] += 24 + hoy += 1 + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + annual = pivot_annual(ts_hourly) + + ts_hourly = ts_hourly.astype(float) + for i in [1, 1416, 1417, 1418, 1439, 1440, 1441, 8784]: + subset = ts_hourly[hoy == i] + subset.index = [x.year for x in subset.index] + + result = annual[i].dropna() + tm.assert_series_equal(result, subset, check_names=False) + self.assertEqual(result.name, i) + + leaps = ts_hourly[(ts_hourly.index.month == 2) & ( + ts_hourly.index.day == 29) & (ts_hourly.index.hour == 0)] + hour = leaps.index.dayofyear[0] * 24 - 23 + leaps.index = leaps.index.year + leaps.name = 1417 + tm.assert_series_equal(annual[hour].dropna(), leaps) + + def test_weekly(self): + pass + + def test_monthly(self): + rng = date_range('1/1/2000', '12/31/2004', freq='M') + ts = Series(np.random.randn(len(rng)), index=rng) + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + annual = pivot_annual(ts, 'M') + + month = ts.index.month + for i in range(1, 13): + subset = ts[month == i] + subset.index = [x.year for x in subset.index] + result = annual[i].dropna() + tm.assert_series_equal(result, subset, check_names=False) + self.assertEqual(result.name, i) + + def test_period_monthly(self): + pass + + def test_period_daily(self): + pass + + def test_period_weekly(self): + pass + + def test_isleapyear_deprecate(self): + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + self.assertTrue(isleapyear(2000)) + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + self.assertFalse(isleapyear(2001)) + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + self.assertTrue(isleapyear(2004)) + + +def test_normalize_date(): + value = date(2012, 9, 7) + + result = normalize_date(value) + assert (result == datetime(2012, 9, 7)) + + value = datetime(2012, 9, 7, 12) + + result = normalize_date(value) + assert (result == datetime(2012, 9, 7)) diff --git a/pandas/tests/indexes/period/__init__.py b/pandas/tests/indexes/period/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py new file mode 100644 index 0000000000000..33653c92da719 --- /dev/null +++ b/pandas/tests/indexes/period/test_period.py @@ -0,0 +1,233 @@ +import numpy as np +from datetime import timedelta + +import pandas as pd +from pandas.util import testing as tm +from pandas import (PeriodIndex, period_range, notnull, DatetimeIndex, NaT, + Index, Period, Int64Index) + +from ..datetimelike import DatetimeLike + + +class TestPeriodIndex(DatetimeLike, tm.TestCase): + _holder = PeriodIndex + _multiprocess_can_split_ = True + + def setUp(self): + self.indices = dict(index=tm.makePeriodIndex(10)) + self.setup_indices() + + def create_index(self): + return period_range('20130101', periods=5, freq='D') + + def test_construction_base_constructor(self): + # GH 13664 + arr = [pd.Period('2011-01', freq='M'), pd.NaT, + pd.Period('2011-03', freq='M')] + tm.assert_index_equal(pd.Index(arr), pd.PeriodIndex(arr)) + tm.assert_index_equal(pd.Index(np.array(arr)), + pd.PeriodIndex(np.array(arr))) + + arr = [np.nan, pd.NaT, pd.Period('2011-03', freq='M')] + tm.assert_index_equal(pd.Index(arr), pd.PeriodIndex(arr)) + tm.assert_index_equal(pd.Index(np.array(arr)), + pd.PeriodIndex(np.array(arr))) + + arr = [pd.Period('2011-01', freq='M'), pd.NaT, + pd.Period('2011-03', freq='D')] + tm.assert_index_equal(pd.Index(arr), pd.Index(arr, dtype=object)) + + tm.assert_index_equal(pd.Index(np.array(arr)), + pd.Index(np.array(arr), dtype=object)) + + def test_astype(self): + # GH 13149, GH 13209 + idx = PeriodIndex(['2016-05-16', 'NaT', NaT, np.NaN], freq='D') + + result = idx.astype(object) + expected = Index([Period('2016-05-16', freq='D')] + + [Period(NaT, freq='D')] * 3, dtype='object') + tm.assert_index_equal(result, expected) + + result = idx.astype(int) + expected = Int64Index([16937] + [-9223372036854775808] * 3, + dtype=np.int64) + tm.assert_index_equal(result, expected) + + idx = period_range('1990', '2009', freq='A') + result = idx.astype('i8') + self.assert_index_equal(result, Index(idx.asi8)) + self.assert_numpy_array_equal(result.values, idx.asi8) + + def test_astype_raises(self): + # GH 13149, GH 13209 + idx = PeriodIndex(['2016-05-16', 'NaT', NaT, np.NaN], freq='D') + + self.assertRaises(ValueError, idx.astype, str) + self.assertRaises(ValueError, idx.astype, float) + self.assertRaises(ValueError, idx.astype, 'timedelta64') + self.assertRaises(ValueError, idx.astype, 'timedelta64[ns]') + + def test_shift(self): + + # test shift for PeriodIndex + # GH8083 + drange = self.create_index() + result = drange.shift(1) + expected = PeriodIndex(['2013-01-02', '2013-01-03', '2013-01-04', + '2013-01-05', '2013-01-06'], freq='D') + self.assert_index_equal(result, expected) + + def test_pickle_compat_construction(self): + pass + + def test_get_loc(self): + idx = pd.period_range('2000-01-01', periods=3) + + for method in [None, 'pad', 'backfill', 'nearest']: + self.assertEqual(idx.get_loc(idx[1], method), 1) + self.assertEqual( + idx.get_loc(idx[1].asfreq('H', how='start'), method), 1) + self.assertEqual(idx.get_loc(idx[1].to_timestamp(), method), 1) + self.assertEqual( + idx.get_loc(idx[1].to_timestamp().to_pydatetime(), method), 1) + self.assertEqual(idx.get_loc(str(idx[1]), method), 1) + + idx = pd.period_range('2000-01-01', periods=5)[::2] + self.assertEqual(idx.get_loc('2000-01-02T12', method='nearest', + tolerance='1 day'), 1) + self.assertEqual(idx.get_loc('2000-01-02T12', method='nearest', + tolerance=pd.Timedelta('1D')), 1) + self.assertEqual(idx.get_loc('2000-01-02T12', method='nearest', + tolerance=np.timedelta64(1, 'D')), 1) + self.assertEqual(idx.get_loc('2000-01-02T12', method='nearest', + tolerance=timedelta(1)), 1) + with tm.assertRaisesRegexp(ValueError, 'must be convertible'): + idx.get_loc('2000-01-10', method='nearest', tolerance='foo') + + msg = 'Input has different freq from PeriodIndex\\(freq=D\\)' + with tm.assertRaisesRegexp(ValueError, msg): + idx.get_loc('2000-01-10', method='nearest', tolerance='1 hour') + with tm.assertRaises(KeyError): + idx.get_loc('2000-01-10', method='nearest', tolerance='1 day') + + def test_where(self): + i = self.create_index() + result = i.where(notnull(i)) + expected = i + tm.assert_index_equal(result, expected) + + i2 = i.copy() + i2 = pd.PeriodIndex([pd.NaT, pd.NaT] + i[2:].tolist(), + freq='D') + result = i.where(notnull(i2)) + expected = i2 + tm.assert_index_equal(result, expected) + + def test_where_other(self): + + i = self.create_index() + for arr in [np.nan, pd.NaT]: + result = i.where(notnull(i), other=np.nan) + expected = i + tm.assert_index_equal(result, expected) + + i2 = i.copy() + i2 = pd.PeriodIndex([pd.NaT, pd.NaT] + i[2:].tolist(), + freq='D') + result = i.where(notnull(i2), i2) + tm.assert_index_equal(result, i2) + + i2 = i.copy() + i2 = pd.PeriodIndex([pd.NaT, pd.NaT] + i[2:].tolist(), + freq='D') + result = i.where(notnull(i2), i2.values) + tm.assert_index_equal(result, i2) + + def test_get_indexer(self): + idx = pd.period_range('2000-01-01', periods=3).asfreq('H', how='start') + tm.assert_numpy_array_equal(idx.get_indexer(idx), + np.array([0, 1, 2], dtype=np.intp)) + + target = pd.PeriodIndex(['1999-12-31T23', '2000-01-01T12', + '2000-01-02T01'], freq='H') + tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'), + np.array([-1, 0, 1], dtype=np.intp)) + tm.assert_numpy_array_equal(idx.get_indexer(target, 'backfill'), + np.array([0, 1, 2], dtype=np.intp)) + tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest'), + np.array([0, 1, 1], dtype=np.intp)) + tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest', + tolerance='1 hour'), + np.array([0, -1, 1], dtype=np.intp)) + + msg = 'Input has different freq from PeriodIndex\\(freq=H\\)' + with self.assertRaisesRegexp(ValueError, msg): + idx.get_indexer(target, 'nearest', tolerance='1 minute') + + tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest', + tolerance='1 day'), + np.array([0, 1, 1], dtype=np.intp)) + + def test_repeat(self): + # GH10183 + idx = pd.period_range('2000-01-01', periods=3, freq='D') + res = idx.repeat(3) + exp = PeriodIndex(idx.values.repeat(3), freq='D') + self.assert_index_equal(res, exp) + self.assertEqual(res.freqstr, 'D') + + def test_period_index_indexer(self): + # GH4125 + idx = pd.period_range('2002-01', '2003-12', freq='M') + df = pd.DataFrame(pd.np.random.randn(24, 10), index=idx) + self.assert_frame_equal(df, df.loc[idx]) + self.assert_frame_equal(df, df.loc[list(idx)]) + self.assert_frame_equal(df, df.loc[list(idx)]) + self.assert_frame_equal(df.iloc[0:5], df.loc[idx[0:5]]) + self.assert_frame_equal(df, df.loc[list(idx)]) + + def test_fillna_period(self): + # GH 11343 + idx = pd.PeriodIndex(['2011-01-01 09:00', pd.NaT, + '2011-01-01 11:00'], freq='H') + + exp = pd.PeriodIndex(['2011-01-01 09:00', '2011-01-01 10:00', + '2011-01-01 11:00'], freq='H') + self.assert_index_equal( + idx.fillna(pd.Period('2011-01-01 10:00', freq='H')), exp) + + exp = pd.Index([pd.Period('2011-01-01 09:00', freq='H'), 'x', + pd.Period('2011-01-01 11:00', freq='H')], dtype=object) + self.assert_index_equal(idx.fillna('x'), exp) + + exp = pd.Index([pd.Period('2011-01-01 09:00', freq='H'), + pd.Period('2011-01-01', freq='D'), + pd.Period('2011-01-01 11:00', freq='H')], dtype=object) + self.assert_index_equal(idx.fillna(pd.Period('2011-01-01', freq='D')), + exp) + + def test_no_millisecond_field(self): + with self.assertRaises(AttributeError): + DatetimeIndex.millisecond + + with self.assertRaises(AttributeError): + DatetimeIndex([]).millisecond + + def test_difference_freq(self): + # GH14323: difference of Period MUST preserve frequency + # but the ability to union results must be preserved + + index = period_range("20160920", "20160925", freq="D") + + other = period_range("20160921", "20160924", freq="D") + expected = PeriodIndex(["20160920", "20160925"], freq='D') + idx_diff = index.difference(other) + tm.assert_index_equal(idx_diff, expected) + tm.assert_attr_equal('freq', idx_diff, expected) + + other = period_range("20160922", "20160925", freq="D") + idx_diff = index.difference(other) + expected = PeriodIndex(["20160920", "20160921"], freq='D') + tm.assert_index_equal(idx_diff, expected) + tm.assert_attr_equal('freq', idx_diff, expected) diff --git a/pandas/tests/indexes/test_datetimelike.py b/pandas/tests/indexes/test_datetimelike.py deleted file mode 100644 index b212a7b75904c..0000000000000 --- a/pandas/tests/indexes/test_datetimelike.py +++ /dev/null @@ -1,465 +0,0 @@ -# -*- coding: utf-8 -*- - -import numpy as np -from datetime import timedelta - -import pandas as pd -from pandas.util import testing as tm -from pandas import (DatetimeIndex, Float64Index, Index, Int64Index, - NaT, Period, PeriodIndex, Series, Timedelta, - TimedeltaIndex, period_range, - timedelta_range, notnull) - - -from .datetimelike import DatetimeLike - - -class TestPeriodIndex(DatetimeLike, tm.TestCase): - _holder = PeriodIndex - - def setUp(self): - self.indices = dict(index=tm.makePeriodIndex(10)) - self.setup_indices() - - def create_index(self): - return period_range('20130101', periods=5, freq='D') - - def test_construction_base_constructor(self): - # GH 13664 - arr = [pd.Period('2011-01', freq='M'), pd.NaT, - pd.Period('2011-03', freq='M')] - tm.assert_index_equal(pd.Index(arr), pd.PeriodIndex(arr)) - tm.assert_index_equal(pd.Index(np.array(arr)), - pd.PeriodIndex(np.array(arr))) - - arr = [np.nan, pd.NaT, pd.Period('2011-03', freq='M')] - tm.assert_index_equal(pd.Index(arr), pd.PeriodIndex(arr)) - tm.assert_index_equal(pd.Index(np.array(arr)), - pd.PeriodIndex(np.array(arr))) - - arr = [pd.Period('2011-01', freq='M'), pd.NaT, - pd.Period('2011-03', freq='D')] - tm.assert_index_equal(pd.Index(arr), pd.Index(arr, dtype=object)) - - tm.assert_index_equal(pd.Index(np.array(arr)), - pd.Index(np.array(arr), dtype=object)) - - def test_astype(self): - # GH 13149, GH 13209 - idx = PeriodIndex(['2016-05-16', 'NaT', NaT, np.NaN], freq='D') - - result = idx.astype(object) - expected = Index([Period('2016-05-16', freq='D')] + - [Period(NaT, freq='D')] * 3, dtype='object') - tm.assert_index_equal(result, expected) - - result = idx.astype(int) - expected = Int64Index([16937] + [-9223372036854775808] * 3, - dtype=np.int64) - tm.assert_index_equal(result, expected) - - idx = period_range('1990', '2009', freq='A') - result = idx.astype('i8') - self.assert_index_equal(result, Index(idx.asi8)) - self.assert_numpy_array_equal(result.values, idx.asi8) - - def test_astype_raises(self): - # GH 13149, GH 13209 - idx = PeriodIndex(['2016-05-16', 'NaT', NaT, np.NaN], freq='D') - - self.assertRaises(ValueError, idx.astype, str) - self.assertRaises(ValueError, idx.astype, float) - self.assertRaises(ValueError, idx.astype, 'timedelta64') - self.assertRaises(ValueError, idx.astype, 'timedelta64[ns]') - - def test_shift(self): - - # test shift for PeriodIndex - # GH8083 - drange = self.create_index() - result = drange.shift(1) - expected = PeriodIndex(['2013-01-02', '2013-01-03', '2013-01-04', - '2013-01-05', '2013-01-06'], freq='D') - self.assert_index_equal(result, expected) - - def test_pickle_compat_construction(self): - pass - - def test_get_loc(self): - idx = pd.period_range('2000-01-01', periods=3) - - for method in [None, 'pad', 'backfill', 'nearest']: - self.assertEqual(idx.get_loc(idx[1], method), 1) - self.assertEqual( - idx.get_loc(idx[1].asfreq('H', how='start'), method), 1) - self.assertEqual(idx.get_loc(idx[1].to_timestamp(), method), 1) - self.assertEqual( - idx.get_loc(idx[1].to_timestamp().to_pydatetime(), method), 1) - self.assertEqual(idx.get_loc(str(idx[1]), method), 1) - - idx = pd.period_range('2000-01-01', periods=5)[::2] - self.assertEqual(idx.get_loc('2000-01-02T12', method='nearest', - tolerance='1 day'), 1) - self.assertEqual(idx.get_loc('2000-01-02T12', method='nearest', - tolerance=pd.Timedelta('1D')), 1) - self.assertEqual(idx.get_loc('2000-01-02T12', method='nearest', - tolerance=np.timedelta64(1, 'D')), 1) - self.assertEqual(idx.get_loc('2000-01-02T12', method='nearest', - tolerance=timedelta(1)), 1) - with tm.assertRaisesRegexp(ValueError, 'must be convertible'): - idx.get_loc('2000-01-10', method='nearest', tolerance='foo') - - msg = 'Input has different freq from PeriodIndex\\(freq=D\\)' - with tm.assertRaisesRegexp(ValueError, msg): - idx.get_loc('2000-01-10', method='nearest', tolerance='1 hour') - with tm.assertRaises(KeyError): - idx.get_loc('2000-01-10', method='nearest', tolerance='1 day') - - def test_where(self): - i = self.create_index() - result = i.where(notnull(i)) - expected = i - tm.assert_index_equal(result, expected) - - i2 = i.copy() - i2 = pd.PeriodIndex([pd.NaT, pd.NaT] + i[2:].tolist(), - freq='D') - result = i.where(notnull(i2)) - expected = i2 - tm.assert_index_equal(result, expected) - - def test_where_other(self): - - i = self.create_index() - for arr in [np.nan, pd.NaT]: - result = i.where(notnull(i), other=np.nan) - expected = i - tm.assert_index_equal(result, expected) - - i2 = i.copy() - i2 = pd.PeriodIndex([pd.NaT, pd.NaT] + i[2:].tolist(), - freq='D') - result = i.where(notnull(i2), i2) - tm.assert_index_equal(result, i2) - - i2 = i.copy() - i2 = pd.PeriodIndex([pd.NaT, pd.NaT] + i[2:].tolist(), - freq='D') - result = i.where(notnull(i2), i2.values) - tm.assert_index_equal(result, i2) - - def test_get_indexer(self): - idx = pd.period_range('2000-01-01', periods=3).asfreq('H', how='start') - tm.assert_numpy_array_equal(idx.get_indexer(idx), - np.array([0, 1, 2], dtype=np.intp)) - - target = pd.PeriodIndex(['1999-12-31T23', '2000-01-01T12', - '2000-01-02T01'], freq='H') - tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'), - np.array([-1, 0, 1], dtype=np.intp)) - tm.assert_numpy_array_equal(idx.get_indexer(target, 'backfill'), - np.array([0, 1, 2], dtype=np.intp)) - tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest'), - np.array([0, 1, 1], dtype=np.intp)) - tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest', - tolerance='1 hour'), - np.array([0, -1, 1], dtype=np.intp)) - - msg = 'Input has different freq from PeriodIndex\\(freq=H\\)' - with self.assertRaisesRegexp(ValueError, msg): - idx.get_indexer(target, 'nearest', tolerance='1 minute') - - tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest', - tolerance='1 day'), - np.array([0, 1, 1], dtype=np.intp)) - - def test_repeat(self): - # GH10183 - idx = pd.period_range('2000-01-01', periods=3, freq='D') - res = idx.repeat(3) - exp = PeriodIndex(idx.values.repeat(3), freq='D') - self.assert_index_equal(res, exp) - self.assertEqual(res.freqstr, 'D') - - def test_period_index_indexer(self): - # GH4125 - idx = pd.period_range('2002-01', '2003-12', freq='M') - df = pd.DataFrame(pd.np.random.randn(24, 10), index=idx) - self.assert_frame_equal(df, df.loc[idx]) - self.assert_frame_equal(df, df.loc[list(idx)]) - self.assert_frame_equal(df, df.loc[list(idx)]) - self.assert_frame_equal(df.iloc[0:5], df.loc[idx[0:5]]) - self.assert_frame_equal(df, df.loc[list(idx)]) - - def test_fillna_period(self): - # GH 11343 - idx = pd.PeriodIndex(['2011-01-01 09:00', pd.NaT, - '2011-01-01 11:00'], freq='H') - - exp = pd.PeriodIndex(['2011-01-01 09:00', '2011-01-01 10:00', - '2011-01-01 11:00'], freq='H') - self.assert_index_equal( - idx.fillna(pd.Period('2011-01-01 10:00', freq='H')), exp) - - exp = pd.Index([pd.Period('2011-01-01 09:00', freq='H'), 'x', - pd.Period('2011-01-01 11:00', freq='H')], dtype=object) - self.assert_index_equal(idx.fillna('x'), exp) - - exp = pd.Index([pd.Period('2011-01-01 09:00', freq='H'), - pd.Period('2011-01-01', freq='D'), - pd.Period('2011-01-01 11:00', freq='H')], dtype=object) - self.assert_index_equal(idx.fillna(pd.Period('2011-01-01', freq='D')), - exp) - - def test_no_millisecond_field(self): - with self.assertRaises(AttributeError): - DatetimeIndex.millisecond - - with self.assertRaises(AttributeError): - DatetimeIndex([]).millisecond - - def test_difference_freq(self): - # GH14323: difference of Period MUST preserve frequency - # but the ability to union results must be preserved - - index = period_range("20160920", "20160925", freq="D") - - other = period_range("20160921", "20160924", freq="D") - expected = PeriodIndex(["20160920", "20160925"], freq='D') - idx_diff = index.difference(other) - tm.assert_index_equal(idx_diff, expected) - tm.assert_attr_equal('freq', idx_diff, expected) - - other = period_range("20160922", "20160925", freq="D") - idx_diff = index.difference(other) - expected = PeriodIndex(["20160920", "20160921"], freq='D') - tm.assert_index_equal(idx_diff, expected) - tm.assert_attr_equal('freq', idx_diff, expected) - - -class TestTimedeltaIndex(DatetimeLike, tm.TestCase): - _holder = TimedeltaIndex - - def setUp(self): - self.indices = dict(index=tm.makeTimedeltaIndex(10)) - self.setup_indices() - - def create_index(self): - return pd.to_timedelta(range(5), unit='d') + pd.offsets.Hour(1) - - def test_construction_base_constructor(self): - arr = [pd.Timedelta('1 days'), pd.NaT, pd.Timedelta('3 days')] - tm.assert_index_equal(pd.Index(arr), pd.TimedeltaIndex(arr)) - tm.assert_index_equal(pd.Index(np.array(arr)), - pd.TimedeltaIndex(np.array(arr))) - - arr = [np.nan, pd.NaT, pd.Timedelta('1 days')] - tm.assert_index_equal(pd.Index(arr), pd.TimedeltaIndex(arr)) - tm.assert_index_equal(pd.Index(np.array(arr)), - pd.TimedeltaIndex(np.array(arr))) - - def test_shift(self): - # test shift for TimedeltaIndex - # err8083 - - drange = self.create_index() - result = drange.shift(1) - expected = TimedeltaIndex(['1 days 01:00:00', '2 days 01:00:00', - '3 days 01:00:00', - '4 days 01:00:00', '5 days 01:00:00'], - freq='D') - self.assert_index_equal(result, expected) - - result = drange.shift(3, freq='2D 1s') - expected = TimedeltaIndex(['6 days 01:00:03', '7 days 01:00:03', - '8 days 01:00:03', '9 days 01:00:03', - '10 days 01:00:03'], freq='D') - self.assert_index_equal(result, expected) - - def test_astype(self): - # GH 13149, GH 13209 - idx = TimedeltaIndex([1e14, 'NaT', pd.NaT, np.NaN]) - - result = idx.astype(object) - expected = Index([Timedelta('1 days 03:46:40')] + [pd.NaT] * 3, - dtype=object) - tm.assert_index_equal(result, expected) - - result = idx.astype(int) - expected = Int64Index([100000000000000] + [-9223372036854775808] * 3, - dtype=np.int64) - tm.assert_index_equal(result, expected) - - rng = timedelta_range('1 days', periods=10) - - result = rng.astype('i8') - self.assert_index_equal(result, Index(rng.asi8)) - self.assert_numpy_array_equal(rng.asi8, result.values) - - def test_astype_timedelta64(self): - # GH 13149, GH 13209 - idx = TimedeltaIndex([1e14, 'NaT', pd.NaT, np.NaN]) - - result = idx.astype('timedelta64') - expected = Float64Index([1e+14] + [np.NaN] * 3, dtype='float64') - tm.assert_index_equal(result, expected) - - result = idx.astype('timedelta64[ns]') - tm.assert_index_equal(result, idx) - self.assertFalse(result is idx) - - result = idx.astype('timedelta64[ns]', copy=False) - tm.assert_index_equal(result, idx) - self.assertTrue(result is idx) - - def test_astype_raises(self): - # GH 13149, GH 13209 - idx = TimedeltaIndex([1e14, 'NaT', pd.NaT, np.NaN]) - - self.assertRaises(ValueError, idx.astype, float) - self.assertRaises(ValueError, idx.astype, str) - self.assertRaises(ValueError, idx.astype, 'datetime64') - self.assertRaises(ValueError, idx.astype, 'datetime64[ns]') - - def test_get_loc(self): - idx = pd.to_timedelta(['0 days', '1 days', '2 days']) - - for method in [None, 'pad', 'backfill', 'nearest']: - self.assertEqual(idx.get_loc(idx[1], method), 1) - self.assertEqual(idx.get_loc(idx[1].to_pytimedelta(), method), 1) - self.assertEqual(idx.get_loc(str(idx[1]), method), 1) - - self.assertEqual( - idx.get_loc(idx[1], 'pad', tolerance=pd.Timedelta(0)), 1) - self.assertEqual( - idx.get_loc(idx[1], 'pad', tolerance=np.timedelta64(0, 's')), 1) - self.assertEqual(idx.get_loc(idx[1], 'pad', tolerance=timedelta(0)), 1) - - with tm.assertRaisesRegexp(ValueError, 'must be convertible'): - idx.get_loc(idx[1], method='nearest', tolerance='foo') - - for method, loc in [('pad', 1), ('backfill', 2), ('nearest', 1)]: - self.assertEqual(idx.get_loc('1 day 1 hour', method), loc) - - def test_get_indexer(self): - idx = pd.to_timedelta(['0 days', '1 days', '2 days']) - tm.assert_numpy_array_equal(idx.get_indexer(idx), - np.array([0, 1, 2], dtype=np.intp)) - - target = pd.to_timedelta(['-1 hour', '12 hours', '1 day 1 hour']) - tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'), - np.array([-1, 0, 1], dtype=np.intp)) - tm.assert_numpy_array_equal(idx.get_indexer(target, 'backfill'), - np.array([0, 1, 2], dtype=np.intp)) - tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest'), - np.array([0, 1, 1], dtype=np.intp)) - - res = idx.get_indexer(target, 'nearest', - tolerance=pd.Timedelta('1 hour')) - tm.assert_numpy_array_equal(res, np.array([0, -1, 1], dtype=np.intp)) - - def test_numeric_compat(self): - - idx = self._holder(np.arange(5, dtype='int64')) - didx = self._holder(np.arange(5, dtype='int64') ** 2) - result = idx * 1 - tm.assert_index_equal(result, idx) - - result = 1 * idx - tm.assert_index_equal(result, idx) - - result = idx / 1 - tm.assert_index_equal(result, idx) - - result = idx // 1 - tm.assert_index_equal(result, idx) - - result = idx * np.array(5, dtype='int64') - tm.assert_index_equal(result, - self._holder(np.arange(5, dtype='int64') * 5)) - - result = idx * np.arange(5, dtype='int64') - tm.assert_index_equal(result, didx) - - result = idx * Series(np.arange(5, dtype='int64')) - tm.assert_index_equal(result, didx) - - result = idx * Series(np.arange(5, dtype='float64') + 0.1) - tm.assert_index_equal(result, self._holder(np.arange( - 5, dtype='float64') * (np.arange(5, dtype='float64') + 0.1))) - - # invalid - self.assertRaises(TypeError, lambda: idx * idx) - self.assertRaises(ValueError, lambda: idx * self._holder(np.arange(3))) - self.assertRaises(ValueError, lambda: idx * np.array([1, 2])) - - def test_pickle_compat_construction(self): - pass - - def test_ufunc_coercions(self): - # normal ops are also tested in tseries/test_timedeltas.py - idx = TimedeltaIndex(['2H', '4H', '6H', '8H', '10H'], - freq='2H', name='x') - - for result in [idx * 2, np.multiply(idx, 2)]: - tm.assertIsInstance(result, TimedeltaIndex) - exp = TimedeltaIndex(['4H', '8H', '12H', '16H', '20H'], - freq='4H', name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, '4H') - - for result in [idx / 2, np.divide(idx, 2)]: - tm.assertIsInstance(result, TimedeltaIndex) - exp = TimedeltaIndex(['1H', '2H', '3H', '4H', '5H'], - freq='H', name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, 'H') - - idx = TimedeltaIndex(['2H', '4H', '6H', '8H', '10H'], - freq='2H', name='x') - for result in [-idx, np.negative(idx)]: - tm.assertIsInstance(result, TimedeltaIndex) - exp = TimedeltaIndex(['-2H', '-4H', '-6H', '-8H', '-10H'], - freq='-2H', name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, '-2H') - - idx = TimedeltaIndex(['-2H', '-1H', '0H', '1H', '2H'], - freq='H', name='x') - for result in [abs(idx), np.absolute(idx)]: - tm.assertIsInstance(result, TimedeltaIndex) - exp = TimedeltaIndex(['2H', '1H', '0H', '1H', '2H'], - freq=None, name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, None) - - def test_fillna_timedelta(self): - # GH 11343 - idx = pd.TimedeltaIndex(['1 day', pd.NaT, '3 day']) - - exp = pd.TimedeltaIndex(['1 day', '2 day', '3 day']) - self.assert_index_equal(idx.fillna(pd.Timedelta('2 day')), exp) - - exp = pd.TimedeltaIndex(['1 day', '3 hour', '3 day']) - idx.fillna(pd.Timedelta('3 hour')) - - exp = pd.Index( - [pd.Timedelta('1 day'), 'x', pd.Timedelta('3 day')], dtype=object) - self.assert_index_equal(idx.fillna('x'), exp) - - def test_difference_freq(self): - # GH14323: Difference of TimedeltaIndex should not preserve frequency - - index = timedelta_range("0 days", "5 days", freq="D") - - other = timedelta_range("1 days", "4 days", freq="D") - expected = TimedeltaIndex(["0 days", "5 days"], freq=None) - idx_diff = index.difference(other) - tm.assert_index_equal(idx_diff, expected) - tm.assert_attr_equal('freq', idx_diff, expected) - - other = timedelta_range("2 days", "5 days", freq="D") - idx_diff = index.difference(other) - expected = TimedeltaIndex(["0 days", "1 days"], freq=None) - tm.assert_index_equal(idx_diff, expected) - tm.assert_attr_equal('freq', idx_diff, expected) diff --git a/pandas/tests/indexes/test_timedelta.py b/pandas/tests/indexes/test_timedelta.py deleted file mode 100644 index e6071b8c4fa06..0000000000000 --- a/pandas/tests/indexes/test_timedelta.py +++ /dev/null @@ -1,42 +0,0 @@ -import numpy as np -from datetime import timedelta - -import pandas as pd -import pandas.util.testing as tm -from pandas import (timedelta_range, date_range, Series, Timedelta, - DatetimeIndex) - - -class TestSlicing(tm.TestCase): - - def test_timedelta(self): - # this is valid too - index = date_range('1/1/2000', periods=50, freq='B') - shifted = index + timedelta(1) - back = shifted + timedelta(-1) - self.assertTrue(tm.equalContents(index, back)) - self.assertEqual(shifted.freq, index.freq) - self.assertEqual(shifted.freq, back.freq) - - result = index - timedelta(1) - expected = index + timedelta(-1) - tm.assert_index_equal(result, expected) - - # GH4134, buggy with timedeltas - rng = date_range('2013', '2014') - s = Series(rng) - result1 = rng - pd.offsets.Hour(1) - result2 = DatetimeIndex(s - np.timedelta64(100000000)) - result3 = rng - np.timedelta64(100000000) - result4 = DatetimeIndex(s - pd.offsets.Hour(1)) - tm.assert_index_equal(result1, result4) - tm.assert_index_equal(result2, result3) - - -class TestTimeSeries(tm.TestCase): - - def test_series_box_timedelta(self): - rng = timedelta_range('1 day 1 s', periods=5, freq='h') - s = Series(rng) - tm.assertIsInstance(s[1], Timedelta) - tm.assertIsInstance(s.iat[2], Timedelta) diff --git a/pandas/tests/indexes/timedeltas/__init__.py b/pandas/tests/indexes/timedeltas/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/indexes/timedeltas/test_astype.py b/pandas/tests/indexes/timedeltas/test_astype.py new file mode 100644 index 0000000000000..88e7b1387feff --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_astype.py @@ -0,0 +1,121 @@ +import numpy as np + +import pandas as pd +import pandas.util.testing as tm +from pandas import (TimedeltaIndex, timedelta_range, Int64Index, Float64Index, + Index, Timedelta, Series) + +from ..datetimelike import DatetimeLike + + +class TestTimedeltaIndex(DatetimeLike, tm.TestCase): + _holder = TimedeltaIndex + _multiprocess_can_split_ = True + + def setUp(self): + self.indices = dict(index=tm.makeTimedeltaIndex(10)) + self.setup_indices() + + def create_index(self): + return pd.to_timedelta(range(5), unit='d') + pd.offsets.Hour(1) + + def test_astype(self): + # GH 13149, GH 13209 + idx = TimedeltaIndex([1e14, 'NaT', pd.NaT, np.NaN]) + + result = idx.astype(object) + expected = Index([Timedelta('1 days 03:46:40')] + [pd.NaT] * 3, + dtype=object) + tm.assert_index_equal(result, expected) + + result = idx.astype(int) + expected = Int64Index([100000000000000] + [-9223372036854775808] * 3, + dtype=np.int64) + tm.assert_index_equal(result, expected) + + rng = timedelta_range('1 days', periods=10) + + result = rng.astype('i8') + self.assert_index_equal(result, Index(rng.asi8)) + self.assert_numpy_array_equal(rng.asi8, result.values) + + def test_astype_timedelta64(self): + # GH 13149, GH 13209 + idx = TimedeltaIndex([1e14, 'NaT', pd.NaT, np.NaN]) + + result = idx.astype('timedelta64') + expected = Float64Index([1e+14] + [np.NaN] * 3, dtype='float64') + tm.assert_index_equal(result, expected) + + result = idx.astype('timedelta64[ns]') + tm.assert_index_equal(result, idx) + self.assertFalse(result is idx) + + result = idx.astype('timedelta64[ns]', copy=False) + tm.assert_index_equal(result, idx) + self.assertTrue(result is idx) + + def test_astype_raises(self): + # GH 13149, GH 13209 + idx = TimedeltaIndex([1e14, 'NaT', pd.NaT, np.NaN]) + + self.assertRaises(ValueError, idx.astype, float) + self.assertRaises(ValueError, idx.astype, str) + self.assertRaises(ValueError, idx.astype, 'datetime64') + self.assertRaises(ValueError, idx.astype, 'datetime64[ns]') + + def test_pickle_compat_construction(self): + pass + + def test_shift(self): + # test shift for TimedeltaIndex + # err8083 + + drange = self.create_index() + result = drange.shift(1) + expected = TimedeltaIndex(['1 days 01:00:00', '2 days 01:00:00', + '3 days 01:00:00', + '4 days 01:00:00', '5 days 01:00:00'], + freq='D') + self.assert_index_equal(result, expected) + + result = drange.shift(3, freq='2D 1s') + expected = TimedeltaIndex(['6 days 01:00:03', '7 days 01:00:03', + '8 days 01:00:03', '9 days 01:00:03', + '10 days 01:00:03'], freq='D') + self.assert_index_equal(result, expected) + + def test_numeric_compat(self): + + idx = self._holder(np.arange(5, dtype='int64')) + didx = self._holder(np.arange(5, dtype='int64') ** 2) + result = idx * 1 + tm.assert_index_equal(result, idx) + + result = 1 * idx + tm.assert_index_equal(result, idx) + + result = idx / 1 + tm.assert_index_equal(result, idx) + + result = idx // 1 + tm.assert_index_equal(result, idx) + + result = idx * np.array(5, dtype='int64') + tm.assert_index_equal(result, + self._holder(np.arange(5, dtype='int64') * 5)) + + result = idx * np.arange(5, dtype='int64') + tm.assert_index_equal(result, didx) + + result = idx * Series(np.arange(5, dtype='int64')) + tm.assert_index_equal(result, didx) + + result = idx * Series(np.arange(5, dtype='float64') + 0.1) + tm.assert_index_equal(result, self._holder(np.arange( + 5, dtype='float64') * (np.arange(5, dtype='float64') + 0.1))) + + # invalid + self.assertRaises(TypeError, lambda: idx * idx) + self.assertRaises(ValueError, lambda: idx * self._holder(np.arange(3))) + self.assertRaises(ValueError, lambda: idx * np.array([1, 2])) diff --git a/pandas/tests/indexes/timedeltas/test_construction.py b/pandas/tests/indexes/timedeltas/test_construction.py new file mode 100644 index 0000000000000..0810b13eb0f53 --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_construction.py @@ -0,0 +1,88 @@ +import numpy as np +from datetime import timedelta + +import pandas as pd +import pandas.util.testing as tm +from pandas import TimedeltaIndex, timedelta_range, tslib, to_timedelta + +iNaT = tslib.iNaT + + +class TestTimedeltaIndex(tm.TestCase): + _multiprocess_can_split_ = True + + def test_construction_base_constructor(self): + arr = [pd.Timedelta('1 days'), pd.NaT, pd.Timedelta('3 days')] + tm.assert_index_equal(pd.Index(arr), pd.TimedeltaIndex(arr)) + tm.assert_index_equal(pd.Index(np.array(arr)), + pd.TimedeltaIndex(np.array(arr))) + + arr = [np.nan, pd.NaT, pd.Timedelta('1 days')] + tm.assert_index_equal(pd.Index(arr), pd.TimedeltaIndex(arr)) + tm.assert_index_equal(pd.Index(np.array(arr)), + pd.TimedeltaIndex(np.array(arr))) + + def test_constructor(self): + expected = TimedeltaIndex(['1 days', '1 days 00:00:05', '2 days', + '2 days 00:00:02', '0 days 00:00:03']) + result = TimedeltaIndex(['1 days', '1 days, 00:00:05', np.timedelta64( + 2, 'D'), timedelta(days=2, seconds=2), pd.offsets.Second(3)]) + tm.assert_index_equal(result, expected) + + # unicode + result = TimedeltaIndex([u'1 days', '1 days, 00:00:05', np.timedelta64( + 2, 'D'), timedelta(days=2, seconds=2), pd.offsets.Second(3)]) + + expected = TimedeltaIndex(['0 days 00:00:00', '0 days 00:00:01', + '0 days 00:00:02']) + tm.assert_index_equal(TimedeltaIndex(range(3), unit='s'), expected) + expected = TimedeltaIndex(['0 days 00:00:00', '0 days 00:00:05', + '0 days 00:00:09']) + tm.assert_index_equal(TimedeltaIndex([0, 5, 9], unit='s'), expected) + expected = TimedeltaIndex( + ['0 days 00:00:00.400', '0 days 00:00:00.450', + '0 days 00:00:01.200']) + tm.assert_index_equal(TimedeltaIndex([400, 450, 1200], unit='ms'), + expected) + + def test_constructor_coverage(self): + rng = timedelta_range('1 days', periods=10.5) + exp = timedelta_range('1 days', periods=10) + self.assert_index_equal(rng, exp) + + self.assertRaises(ValueError, TimedeltaIndex, start='1 days', + periods='foo', freq='D') + + self.assertRaises(ValueError, TimedeltaIndex, start='1 days', + end='10 days') + + self.assertRaises(ValueError, TimedeltaIndex, '1 days') + + # generator expression + gen = (timedelta(i) for i in range(10)) + result = TimedeltaIndex(gen) + expected = TimedeltaIndex([timedelta(i) for i in range(10)]) + self.assert_index_equal(result, expected) + + # NumPy string array + strings = np.array(['1 days', '2 days', '3 days']) + result = TimedeltaIndex(strings) + expected = to_timedelta([1, 2, 3], unit='d') + self.assert_index_equal(result, expected) + + from_ints = TimedeltaIndex(expected.asi8) + self.assert_index_equal(from_ints, expected) + + # non-conforming freq + self.assertRaises(ValueError, TimedeltaIndex, + ['1 days', '2 days', '4 days'], freq='D') + + self.assertRaises(ValueError, TimedeltaIndex, periods=10, freq='D') + + def test_constructor_name(self): + idx = TimedeltaIndex(start='1 days', periods=1, freq='D', name='TEST') + self.assertEqual(idx.name, 'TEST') + + # GH10025 + idx2 = TimedeltaIndex(idx, name='something else') + self.assertEqual(idx2.name, 'something else') diff --git a/pandas/tests/indexes/timedeltas/test_indexing.py b/pandas/tests/indexes/timedeltas/test_indexing.py new file mode 100644 index 0000000000000..b4a8bc79921bf --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_indexing.py @@ -0,0 +1,110 @@ +from datetime import timedelta + +import pandas.util.testing as tm +from pandas import TimedeltaIndex, timedelta_range, compat, Index, Timedelta + + +class TestTimedeltaIndex(tm.TestCase): + _multiprocess_can_split_ = True + + def test_insert(self): + + idx = TimedeltaIndex(['4day', '1day', '2day'], name='idx') + + result = idx.insert(2, timedelta(days=5)) + exp = TimedeltaIndex(['4day', '1day', '5day', '2day'], name='idx') + self.assert_index_equal(result, exp) + + # insertion of non-datetime should coerce to object index + result = idx.insert(1, 'inserted') + expected = Index([Timedelta('4day'), 'inserted', Timedelta('1day'), + Timedelta('2day')], name='idx') + self.assertNotIsInstance(result, TimedeltaIndex) + tm.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + + idx = timedelta_range('1day 00:00:01', periods=3, freq='s', name='idx') + + # preserve freq + expected_0 = TimedeltaIndex(['1day', '1day 00:00:01', '1day 00:00:02', + '1day 00:00:03'], + name='idx', freq='s') + expected_3 = TimedeltaIndex(['1day 00:00:01', '1day 00:00:02', + '1day 00:00:03', '1day 00:00:04'], + name='idx', freq='s') + + # reset freq to None + expected_1_nofreq = TimedeltaIndex(['1day 00:00:01', '1day 00:00:01', + '1day 00:00:02', '1day 00:00:03'], + name='idx', freq=None) + expected_3_nofreq = TimedeltaIndex(['1day 00:00:01', '1day 00:00:02', + '1day 00:00:03', '1day 00:00:05'], + name='idx', freq=None) + + cases = [(0, Timedelta('1day'), expected_0), + (-3, Timedelta('1day'), expected_0), + (3, Timedelta('1day 00:00:04'), expected_3), + (1, Timedelta('1day 00:00:01'), expected_1_nofreq), + (3, Timedelta('1day 00:00:05'), expected_3_nofreq)] + + for n, d, expected in cases: + result = idx.insert(n, d) + self.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + self.assertEqual(result.freq, expected.freq) + + def test_delete(self): + idx = timedelta_range(start='1 Days', periods=5, freq='D', name='idx') + + # prserve freq + expected_0 = timedelta_range(start='2 Days', periods=4, freq='D', + name='idx') + expected_4 = timedelta_range(start='1 Days', periods=4, freq='D', + name='idx') + + # reset freq to None + expected_1 = TimedeltaIndex( + ['1 day', '3 day', '4 day', '5 day'], freq=None, name='idx') + + cases = {0: expected_0, + -5: expected_0, + -1: expected_4, + 4: expected_4, + 1: expected_1} + for n, expected in compat.iteritems(cases): + result = idx.delete(n) + self.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + self.assertEqual(result.freq, expected.freq) + + with tm.assertRaises((IndexError, ValueError)): + # either depeidnig on numpy version + result = idx.delete(5) + + def test_delete_slice(self): + idx = timedelta_range(start='1 days', periods=10, freq='D', name='idx') + + # prserve freq + expected_0_2 = timedelta_range(start='4 days', periods=7, freq='D', + name='idx') + expected_7_9 = timedelta_range(start='1 days', periods=7, freq='D', + name='idx') + + # reset freq to None + expected_3_5 = TimedeltaIndex(['1 d', '2 d', '3 d', + '7 d', '8 d', '9 d', '10d'], + freq=None, name='idx') + + cases = {(0, 1, 2): expected_0_2, + (7, 8, 9): expected_7_9, + (3, 4, 5): expected_3_5} + for n, expected in compat.iteritems(cases): + result = idx.delete(n) + self.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + self.assertEqual(result.freq, expected.freq) + + result = idx.delete(slice(n[0], n[-1] + 1)) + self.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + self.assertEqual(result.freq, expected.freq) diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py new file mode 100644 index 0000000000000..406a5bdbf3bcd --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -0,0 +1,1276 @@ +import numpy as np +from datetime import timedelta +from distutils.version import LooseVersion + +import pandas as pd +import pandas.util.testing as tm +from pandas import to_timedelta +from pandas.util.testing import assert_series_equal, assert_frame_equal +from pandas import (Series, Timedelta, DataFrame, Timestamp, TimedeltaIndex, + timedelta_range, date_range, DatetimeIndex, Int64Index, + _np_version_under1p10, Float64Index, Index, tslib) + +from pandas.tests.test_base import Ops + + +class TestTimedeltaIndexOps(Ops): + def setUp(self): + super(TestTimedeltaIndexOps, self).setUp() + mask = lambda x: isinstance(x, TimedeltaIndex) + self.is_valid_objs = [o for o in self.objs if mask(o)] + self.not_valid_objs = [] + + def test_ops_properties(self): + self.check_ops_properties(['days', 'hours', 'minutes', 'seconds', + 'milliseconds']) + self.check_ops_properties(['microseconds', 'nanoseconds']) + + def test_asobject_tolist(self): + idx = timedelta_range(start='1 days', periods=4, freq='D', name='idx') + expected_list = [Timedelta('1 days'), Timedelta('2 days'), + Timedelta('3 days'), Timedelta('4 days')] + expected = pd.Index(expected_list, dtype=object, name='idx') + result = idx.asobject + self.assertTrue(isinstance(result, Index)) + + self.assertEqual(result.dtype, object) + self.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + self.assertEqual(idx.tolist(), expected_list) + + idx = TimedeltaIndex([timedelta(days=1), timedelta(days=2), pd.NaT, + timedelta(days=4)], name='idx') + expected_list = [Timedelta('1 days'), Timedelta('2 days'), pd.NaT, + Timedelta('4 days')] + expected = pd.Index(expected_list, dtype=object, name='idx') + result = idx.asobject + self.assertTrue(isinstance(result, Index)) + self.assertEqual(result.dtype, object) + self.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + self.assertEqual(idx.tolist(), expected_list) + + def test_minmax(self): + + # monotonic + idx1 = TimedeltaIndex(['1 days', '2 days', '3 days']) + self.assertTrue(idx1.is_monotonic) + + # non-monotonic + idx2 = TimedeltaIndex(['1 days', np.nan, '3 days', 'NaT']) + self.assertFalse(idx2.is_monotonic) + + for idx in [idx1, idx2]: + self.assertEqual(idx.min(), Timedelta('1 days')), + self.assertEqual(idx.max(), Timedelta('3 days')), + self.assertEqual(idx.argmin(), 0) + self.assertEqual(idx.argmax(), 2) + + for op in ['min', 'max']: + # Return NaT + obj = TimedeltaIndex([]) + self.assertTrue(pd.isnull(getattr(obj, op)())) + + obj = TimedeltaIndex([pd.NaT]) + self.assertTrue(pd.isnull(getattr(obj, op)())) + + obj = TimedeltaIndex([pd.NaT, pd.NaT, pd.NaT]) + self.assertTrue(pd.isnull(getattr(obj, op)())) + + def test_numpy_minmax(self): + dr = pd.date_range(start='2016-01-15', end='2016-01-20') + td = TimedeltaIndex(np.asarray(dr)) + + self.assertEqual(np.min(td), Timedelta('16815 days')) + self.assertEqual(np.max(td), Timedelta('16820 days')) + + errmsg = "the 'out' parameter is not supported" + tm.assertRaisesRegexp(ValueError, errmsg, np.min, td, out=0) + tm.assertRaisesRegexp(ValueError, errmsg, np.max, td, out=0) + + self.assertEqual(np.argmin(td), 0) + self.assertEqual(np.argmax(td), 5) + + if not _np_version_under1p10: + errmsg = "the 'out' parameter is not supported" + tm.assertRaisesRegexp(ValueError, errmsg, np.argmin, td, out=0) + tm.assertRaisesRegexp(ValueError, errmsg, np.argmax, td, out=0) + + def test_round(self): + td = pd.timedelta_range(start='16801 days', periods=5, freq='30Min') + elt = td[1] + + expected_rng = TimedeltaIndex([ + Timedelta('16801 days 00:00:00'), + Timedelta('16801 days 00:00:00'), + Timedelta('16801 days 01:00:00'), + Timedelta('16801 days 02:00:00'), + Timedelta('16801 days 02:00:00'), + ]) + expected_elt = expected_rng[1] + + tm.assert_index_equal(td.round(freq='H'), expected_rng) + self.assertEqual(elt.round(freq='H'), expected_elt) + + msg = pd.tseries.frequencies._INVALID_FREQ_ERROR + with self.assertRaisesRegexp(ValueError, msg): + td.round(freq='foo') + with tm.assertRaisesRegexp(ValueError, msg): + elt.round(freq='foo') + + msg = " is a non-fixed frequency" + tm.assertRaisesRegexp(ValueError, msg, td.round, freq='M') + tm.assertRaisesRegexp(ValueError, msg, elt.round, freq='M') + + def test_representation(self): + idx1 = TimedeltaIndex([], freq='D') + idx2 = TimedeltaIndex(['1 days'], freq='D') + idx3 = TimedeltaIndex(['1 days', '2 days'], freq='D') + idx4 = TimedeltaIndex(['1 days', '2 days', '3 days'], freq='D') + idx5 = TimedeltaIndex(['1 days 00:00:01', '2 days', '3 days']) + + exp1 = """TimedeltaIndex([], dtype='timedelta64[ns]', freq='D')""" + + exp2 = ("TimedeltaIndex(['1 days'], dtype='timedelta64[ns]', " + "freq='D')") + + exp3 = ("TimedeltaIndex(['1 days', '2 days'], " + "dtype='timedelta64[ns]', freq='D')") + + exp4 = ("TimedeltaIndex(['1 days', '2 days', '3 days'], " + "dtype='timedelta64[ns]', freq='D')") + + exp5 = ("TimedeltaIndex(['1 days 00:00:01', '2 days 00:00:00', " + "'3 days 00:00:00'], dtype='timedelta64[ns]', freq=None)") + + with pd.option_context('display.width', 300): + for idx, expected in zip([idx1, idx2, idx3, idx4, idx5], + [exp1, exp2, exp3, exp4, exp5]): + for func in ['__repr__', '__unicode__', '__str__']: + result = getattr(idx, func)() + self.assertEqual(result, expected) + + def test_representation_to_series(self): + idx1 = TimedeltaIndex([], freq='D') + idx2 = TimedeltaIndex(['1 days'], freq='D') + idx3 = TimedeltaIndex(['1 days', '2 days'], freq='D') + idx4 = TimedeltaIndex(['1 days', '2 days', '3 days'], freq='D') + idx5 = TimedeltaIndex(['1 days 00:00:01', '2 days', '3 days']) + + exp1 = """Series([], dtype: timedelta64[ns])""" + + exp2 = """0 1 days +dtype: timedelta64[ns]""" + + exp3 = """0 1 days +1 2 days +dtype: timedelta64[ns]""" + + exp4 = """0 1 days +1 2 days +2 3 days +dtype: timedelta64[ns]""" + + exp5 = """0 1 days 00:00:01 +1 2 days 00:00:00 +2 3 days 00:00:00 +dtype: timedelta64[ns]""" + + with pd.option_context('display.width', 300): + for idx, expected in zip([idx1, idx2, idx3, idx4, idx5], + [exp1, exp2, exp3, exp4, exp5]): + result = repr(pd.Series(idx)) + self.assertEqual(result, expected) + + def test_summary(self): + # GH9116 + idx1 = TimedeltaIndex([], freq='D') + idx2 = TimedeltaIndex(['1 days'], freq='D') + idx3 = TimedeltaIndex(['1 days', '2 days'], freq='D') + idx4 = TimedeltaIndex(['1 days', '2 days', '3 days'], freq='D') + idx5 = TimedeltaIndex(['1 days 00:00:01', '2 days', '3 days']) + + exp1 = """TimedeltaIndex: 0 entries +Freq: D""" + + exp2 = """TimedeltaIndex: 1 entries, 1 days to 1 days +Freq: D""" + + exp3 = """TimedeltaIndex: 2 entries, 1 days to 2 days +Freq: D""" + + exp4 = """TimedeltaIndex: 3 entries, 1 days to 3 days +Freq: D""" + + exp5 = ("TimedeltaIndex: 3 entries, 1 days 00:00:01 to 3 days " + "00:00:00") + + for idx, expected in zip([idx1, idx2, idx3, idx4, idx5], + [exp1, exp2, exp3, exp4, exp5]): + result = idx.summary() + self.assertEqual(result, expected) + + def test_add_iadd(self): + + # only test adding/sub offsets as + is now numeric + + # offset + offsets = [pd.offsets.Hour(2), timedelta(hours=2), + np.timedelta64(2, 'h'), Timedelta(hours=2)] + + for delta in offsets: + rng = timedelta_range('1 days', '10 days') + result = rng + delta + expected = timedelta_range('1 days 02:00:00', '10 days 02:00:00', + freq='D') + tm.assert_index_equal(result, expected) + rng += delta + tm.assert_index_equal(rng, expected) + + # int + rng = timedelta_range('1 days 09:00:00', freq='H', periods=10) + result = rng + 1 + expected = timedelta_range('1 days 10:00:00', freq='H', periods=10) + tm.assert_index_equal(result, expected) + rng += 1 + tm.assert_index_equal(rng, expected) + + def test_sub_isub(self): + # only test adding/sub offsets as - is now numeric + + # offset + offsets = [pd.offsets.Hour(2), timedelta(hours=2), + np.timedelta64(2, 'h'), Timedelta(hours=2)] + + for delta in offsets: + rng = timedelta_range('1 days', '10 days') + result = rng - delta + expected = timedelta_range('0 days 22:00:00', '9 days 22:00:00') + tm.assert_index_equal(result, expected) + rng -= delta + tm.assert_index_equal(rng, expected) + + # int + rng = timedelta_range('1 days 09:00:00', freq='H', periods=10) + result = rng - 1 + expected = timedelta_range('1 days 08:00:00', freq='H', periods=10) + tm.assert_index_equal(result, expected) + rng -= 1 + tm.assert_index_equal(rng, expected) + + idx = TimedeltaIndex(['1 day', '2 day']) + msg = "cannot subtract a datelike from a TimedeltaIndex" + with tm.assertRaisesRegexp(TypeError, msg): + idx - Timestamp('2011-01-01') + + result = Timestamp('2011-01-01') + idx + expected = DatetimeIndex(['2011-01-02', '2011-01-03']) + tm.assert_index_equal(result, expected) + + def test_ops_compat(self): + + offsets = [pd.offsets.Hour(2), timedelta(hours=2), + np.timedelta64(2, 'h'), Timedelta(hours=2)] + + rng = timedelta_range('1 days', '10 days', name='foo') + + # multiply + for offset in offsets: + self.assertRaises(TypeError, lambda: rng * offset) + + # divide + expected = Int64Index((np.arange(10) + 1) * 12, name='foo') + for offset in offsets: + result = rng / offset + tm.assert_index_equal(result, expected, exact=False) + + # divide with nats + rng = TimedeltaIndex(['1 days', pd.NaT, '2 days'], name='foo') + expected = Float64Index([12, np.nan, 24], name='foo') + for offset in offsets: + result = rng / offset + tm.assert_index_equal(result, expected) + + # don't allow division by NaT (make could in the future) + self.assertRaises(TypeError, lambda: rng / pd.NaT) + + def test_subtraction_ops(self): + + # with datetimes/timedelta and tdi/dti + tdi = TimedeltaIndex(['1 days', pd.NaT, '2 days'], name='foo') + dti = date_range('20130101', periods=3, name='bar') + td = Timedelta('1 days') + dt = Timestamp('20130101') + + self.assertRaises(TypeError, lambda: tdi - dt) + self.assertRaises(TypeError, lambda: tdi - dti) + self.assertRaises(TypeError, lambda: td - dt) + self.assertRaises(TypeError, lambda: td - dti) + + result = dt - dti + expected = TimedeltaIndex(['0 days', '-1 days', '-2 days'], name='bar') + tm.assert_index_equal(result, expected) + + result = dti - dt + expected = TimedeltaIndex(['0 days', '1 days', '2 days'], name='bar') + tm.assert_index_equal(result, expected) + + result = tdi - td + expected = TimedeltaIndex(['0 days', pd.NaT, '1 days'], name='foo') + tm.assert_index_equal(result, expected, check_names=False) + + result = td - tdi + expected = TimedeltaIndex(['0 days', pd.NaT, '-1 days'], name='foo') + tm.assert_index_equal(result, expected, check_names=False) + + result = dti - td + expected = DatetimeIndex( + ['20121231', '20130101', '20130102'], name='bar') + tm.assert_index_equal(result, expected, check_names=False) + + result = dt - tdi + expected = DatetimeIndex(['20121231', pd.NaT, '20121230'], name='foo') + tm.assert_index_equal(result, expected) + + def test_subtraction_ops_with_tz(self): + + # check that dt/dti subtraction ops with tz are validated + dti = date_range('20130101', periods=3) + ts = Timestamp('20130101') + dt = ts.to_pydatetime() + dti_tz = date_range('20130101', periods=3).tz_localize('US/Eastern') + ts_tz = Timestamp('20130101').tz_localize('US/Eastern') + ts_tz2 = Timestamp('20130101').tz_localize('CET') + dt_tz = ts_tz.to_pydatetime() + td = Timedelta('1 days') + + def _check(result, expected): + self.assertEqual(result, expected) + self.assertIsInstance(result, Timedelta) + + # scalars + result = ts - ts + expected = Timedelta('0 days') + _check(result, expected) + + result = dt_tz - ts_tz + expected = Timedelta('0 days') + _check(result, expected) + + result = ts_tz - dt_tz + expected = Timedelta('0 days') + _check(result, expected) + + # tz mismatches + self.assertRaises(TypeError, lambda: dt_tz - ts) + self.assertRaises(TypeError, lambda: dt_tz - dt) + self.assertRaises(TypeError, lambda: dt_tz - ts_tz2) + self.assertRaises(TypeError, lambda: dt - dt_tz) + self.assertRaises(TypeError, lambda: ts - dt_tz) + self.assertRaises(TypeError, lambda: ts_tz2 - ts) + self.assertRaises(TypeError, lambda: ts_tz2 - dt) + self.assertRaises(TypeError, lambda: ts_tz - ts_tz2) + + # with dti + self.assertRaises(TypeError, lambda: dti - ts_tz) + self.assertRaises(TypeError, lambda: dti_tz - ts) + self.assertRaises(TypeError, lambda: dti_tz - ts_tz2) + + result = dti_tz - dt_tz + expected = TimedeltaIndex(['0 days', '1 days', '2 days']) + tm.assert_index_equal(result, expected) + + result = dt_tz - dti_tz + expected = TimedeltaIndex(['0 days', '-1 days', '-2 days']) + tm.assert_index_equal(result, expected) + + result = dti_tz - ts_tz + expected = TimedeltaIndex(['0 days', '1 days', '2 days']) + tm.assert_index_equal(result, expected) + + result = ts_tz - dti_tz + expected = TimedeltaIndex(['0 days', '-1 days', '-2 days']) + tm.assert_index_equal(result, expected) + + result = td - td + expected = Timedelta('0 days') + _check(result, expected) + + result = dti_tz - td + expected = DatetimeIndex( + ['20121231', '20130101', '20130102'], tz='US/Eastern') + tm.assert_index_equal(result, expected) + + def test_dti_tdi_numeric_ops(self): + + # These are normally union/diff set-like ops + tdi = TimedeltaIndex(['1 days', pd.NaT, '2 days'], name='foo') + dti = date_range('20130101', periods=3, name='bar') + + # TODO(wesm): unused? + # td = Timedelta('1 days') + # dt = Timestamp('20130101') + + result = tdi - tdi + expected = TimedeltaIndex(['0 days', pd.NaT, '0 days'], name='foo') + tm.assert_index_equal(result, expected) + + result = tdi + tdi + expected = TimedeltaIndex(['2 days', pd.NaT, '4 days'], name='foo') + tm.assert_index_equal(result, expected) + + result = dti - tdi # name will be reset + expected = DatetimeIndex(['20121231', pd.NaT, '20130101']) + tm.assert_index_equal(result, expected) + + def test_sub_period(self): + # GH 13078 + # not supported, check TypeError + p = pd.Period('2011-01-01', freq='D') + + for freq in [None, 'H']: + idx = pd.TimedeltaIndex(['1 hours', '2 hours'], freq=freq) + + with tm.assertRaises(TypeError): + idx - p + + with tm.assertRaises(TypeError): + p - idx + + def test_addition_ops(self): + + # with datetimes/timedelta and tdi/dti + tdi = TimedeltaIndex(['1 days', pd.NaT, '2 days'], name='foo') + dti = date_range('20130101', periods=3, name='bar') + td = Timedelta('1 days') + dt = Timestamp('20130101') + + result = tdi + dt + expected = DatetimeIndex(['20130102', pd.NaT, '20130103'], name='foo') + tm.assert_index_equal(result, expected) + + result = dt + tdi + expected = DatetimeIndex(['20130102', pd.NaT, '20130103'], name='foo') + tm.assert_index_equal(result, expected) + + result = td + tdi + expected = TimedeltaIndex(['2 days', pd.NaT, '3 days'], name='foo') + tm.assert_index_equal(result, expected) + + result = tdi + td + expected = TimedeltaIndex(['2 days', pd.NaT, '3 days'], name='foo') + tm.assert_index_equal(result, expected) + + # unequal length + self.assertRaises(ValueError, lambda: tdi + dti[0:1]) + self.assertRaises(ValueError, lambda: tdi[0:1] + dti) + + # random indexes + self.assertRaises(TypeError, lambda: tdi + Int64Index([1, 2, 3])) + + # this is a union! + # self.assertRaises(TypeError, lambda : Int64Index([1,2,3]) + tdi) + + result = tdi + dti # name will be reset + expected = DatetimeIndex(['20130102', pd.NaT, '20130105']) + tm.assert_index_equal(result, expected) + + result = dti + tdi # name will be reset + expected = DatetimeIndex(['20130102', pd.NaT, '20130105']) + tm.assert_index_equal(result, expected) + + result = dt + td + expected = Timestamp('20130102') + self.assertEqual(result, expected) + + result = td + dt + expected = Timestamp('20130102') + self.assertEqual(result, expected) + + def test_comp_nat(self): + left = pd.TimedeltaIndex([pd.Timedelta('1 days'), pd.NaT, + pd.Timedelta('3 days')]) + right = pd.TimedeltaIndex([pd.NaT, pd.NaT, pd.Timedelta('3 days')]) + + for l, r in [(left, right), (left.asobject, right.asobject)]: + result = l == r + expected = np.array([False, False, True]) + tm.assert_numpy_array_equal(result, expected) + + result = l != r + expected = np.array([True, True, False]) + tm.assert_numpy_array_equal(result, expected) + + expected = np.array([False, False, False]) + tm.assert_numpy_array_equal(l == pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT == r, expected) + + expected = np.array([True, True, True]) + tm.assert_numpy_array_equal(l != pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT != l, expected) + + expected = np.array([False, False, False]) + tm.assert_numpy_array_equal(l < pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT > l, expected) + + def test_value_counts_unique(self): + # GH 7735 + + idx = timedelta_range('1 days 09:00:00', freq='H', periods=10) + # create repeated values, 'n'th element is repeated by n+1 times + idx = TimedeltaIndex(np.repeat(idx.values, range(1, len(idx) + 1))) + + exp_idx = timedelta_range('1 days 18:00:00', freq='-1H', periods=10) + expected = Series(range(10, 0, -1), index=exp_idx, dtype='int64') + + for obj in [idx, Series(idx)]: + tm.assert_series_equal(obj.value_counts(), expected) + + expected = timedelta_range('1 days 09:00:00', freq='H', periods=10) + tm.assert_index_equal(idx.unique(), expected) + + idx = TimedeltaIndex(['1 days 09:00:00', '1 days 09:00:00', + '1 days 09:00:00', '1 days 08:00:00', + '1 days 08:00:00', pd.NaT]) + + exp_idx = TimedeltaIndex(['1 days 09:00:00', '1 days 08:00:00']) + expected = Series([3, 2], index=exp_idx) + + for obj in [idx, Series(idx)]: + tm.assert_series_equal(obj.value_counts(), expected) + + exp_idx = TimedeltaIndex(['1 days 09:00:00', '1 days 08:00:00', + pd.NaT]) + expected = Series([3, 2, 1], index=exp_idx) + + for obj in [idx, Series(idx)]: + tm.assert_series_equal(obj.value_counts(dropna=False), expected) + + tm.assert_index_equal(idx.unique(), exp_idx) + + def test_nonunique_contains(self): + # GH 9512 + for idx in map(TimedeltaIndex, ([0, 1, 0], [0, 0, -1], [0, -1, -1], + ['00:01:00', '00:01:00', '00:02:00'], + ['00:01:00', '00:01:00', '00:00:01'])): + tm.assertIn(idx[0], idx) + + def test_unknown_attribute(self): + # GH 9680 + tdi = pd.timedelta_range(start=0, periods=10, freq='1s') + ts = pd.Series(np.random.normal(size=10), index=tdi) + self.assertNotIn('foo', ts.__dict__.keys()) + self.assertRaises(AttributeError, lambda: ts.foo) + + def test_order(self): + # GH 10295 + idx1 = TimedeltaIndex(['1 day', '2 day', '3 day'], freq='D', + name='idx') + idx2 = TimedeltaIndex( + ['1 hour', '2 hour', '3 hour'], freq='H', name='idx') + + for idx in [idx1, idx2]: + ordered = idx.sort_values() + self.assert_index_equal(ordered, idx) + self.assertEqual(ordered.freq, idx.freq) + + ordered = idx.sort_values(ascending=False) + expected = idx[::-1] + self.assert_index_equal(ordered, expected) + self.assertEqual(ordered.freq, expected.freq) + self.assertEqual(ordered.freq.n, -1) + + ordered, indexer = idx.sort_values(return_indexer=True) + self.assert_index_equal(ordered, idx) + self.assert_numpy_array_equal(indexer, + np.array([0, 1, 2]), + check_dtype=False) + self.assertEqual(ordered.freq, idx.freq) + + ordered, indexer = idx.sort_values(return_indexer=True, + ascending=False) + self.assert_index_equal(ordered, idx[::-1]) + self.assertEqual(ordered.freq, expected.freq) + self.assertEqual(ordered.freq.n, -1) + + idx1 = TimedeltaIndex(['1 hour', '3 hour', '5 hour', + '2 hour ', '1 hour'], name='idx1') + exp1 = TimedeltaIndex(['1 hour', '1 hour', '2 hour', + '3 hour', '5 hour'], name='idx1') + + idx2 = TimedeltaIndex(['1 day', '3 day', '5 day', + '2 day', '1 day'], name='idx2') + + # TODO(wesm): unused? + # exp2 = TimedeltaIndex(['1 day', '1 day', '2 day', + # '3 day', '5 day'], name='idx2') + + # idx3 = TimedeltaIndex([pd.NaT, '3 minute', '5 minute', + # '2 minute', pd.NaT], name='idx3') + # exp3 = TimedeltaIndex([pd.NaT, pd.NaT, '2 minute', '3 minute', + # '5 minute'], name='idx3') + + for idx, expected in [(idx1, exp1), (idx1, exp1), (idx1, exp1)]: + ordered = idx.sort_values() + self.assert_index_equal(ordered, expected) + self.assertIsNone(ordered.freq) + + ordered = idx.sort_values(ascending=False) + self.assert_index_equal(ordered, expected[::-1]) + self.assertIsNone(ordered.freq) + + ordered, indexer = idx.sort_values(return_indexer=True) + self.assert_index_equal(ordered, expected) + + exp = np.array([0, 4, 3, 1, 2]) + self.assert_numpy_array_equal(indexer, exp, check_dtype=False) + self.assertIsNone(ordered.freq) + + ordered, indexer = idx.sort_values(return_indexer=True, + ascending=False) + self.assert_index_equal(ordered, expected[::-1]) + + exp = np.array([2, 1, 3, 4, 0]) + self.assert_numpy_array_equal(indexer, exp, check_dtype=False) + self.assertIsNone(ordered.freq) + + def test_getitem(self): + idx1 = pd.timedelta_range('1 day', '31 day', freq='D', name='idx') + + for idx in [idx1]: + result = idx[0] + self.assertEqual(result, pd.Timedelta('1 day')) + + result = idx[0:5] + expected = pd.timedelta_range('1 day', '5 day', freq='D', + name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + + result = idx[0:10:2] + expected = pd.timedelta_range('1 day', '9 day', freq='2D', + name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + + result = idx[-20:-5:3] + expected = pd.timedelta_range('12 day', '24 day', freq='3D', + name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + + result = idx[4::-1] + expected = TimedeltaIndex(['5 day', '4 day', '3 day', + '2 day', '1 day'], + freq='-1D', name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + + def test_drop_duplicates_metadata(self): + # GH 10115 + idx = pd.timedelta_range('1 day', '31 day', freq='D', name='idx') + result = idx.drop_duplicates() + self.assert_index_equal(idx, result) + self.assertEqual(idx.freq, result.freq) + + idx_dup = idx.append(idx) + self.assertIsNone(idx_dup.freq) # freq is reset + result = idx_dup.drop_duplicates() + self.assert_index_equal(idx, result) + self.assertIsNone(result.freq) + + def test_drop_duplicates(self): + # to check Index/Series compat + base = pd.timedelta_range('1 day', '31 day', freq='D', name='idx') + idx = base.append(base[:5]) + + res = idx.drop_duplicates() + tm.assert_index_equal(res, base) + res = Series(idx).drop_duplicates() + tm.assert_series_equal(res, Series(base)) + + res = idx.drop_duplicates(keep='last') + exp = base[5:].append(base[:5]) + tm.assert_index_equal(res, exp) + res = Series(idx).drop_duplicates(keep='last') + tm.assert_series_equal(res, Series(exp, index=np.arange(5, 36))) + + res = idx.drop_duplicates(keep=False) + tm.assert_index_equal(res, base[5:]) + res = Series(idx).drop_duplicates(keep=False) + tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31))) + + def test_take(self): + # GH 10295 + idx1 = pd.timedelta_range('1 day', '31 day', freq='D', name='idx') + + for idx in [idx1]: + result = idx.take([0]) + self.assertEqual(result, pd.Timedelta('1 day')) + + result = idx.take([-1]) + self.assertEqual(result, pd.Timedelta('31 day')) + + result = idx.take([0, 1, 2]) + expected = pd.timedelta_range('1 day', '3 day', freq='D', + name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + + result = idx.take([0, 2, 4]) + expected = pd.timedelta_range('1 day', '5 day', freq='2D', + name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + + result = idx.take([7, 4, 1]) + expected = pd.timedelta_range('8 day', '2 day', freq='-3D', + name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + + result = idx.take([3, 2, 5]) + expected = TimedeltaIndex(['4 day', '3 day', '6 day'], name='idx') + self.assert_index_equal(result, expected) + self.assertIsNone(result.freq) + + result = idx.take([-3, 2, 5]) + expected = TimedeltaIndex(['29 day', '3 day', '6 day'], name='idx') + self.assert_index_equal(result, expected) + self.assertIsNone(result.freq) + + def test_take_invalid_kwargs(self): + idx = pd.timedelta_range('1 day', '31 day', freq='D', name='idx') + indices = [1, 6, 5, 9, 10, 13, 15, 3] + + msg = r"take\(\) got an unexpected keyword argument 'foo'" + tm.assertRaisesRegexp(TypeError, msg, idx.take, + indices, foo=2) + + msg = "the 'out' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, idx.take, + indices, out=indices) + + msg = "the 'mode' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, idx.take, + indices, mode='clip') + + def test_infer_freq(self): + # GH 11018 + for freq in ['D', '3D', '-3D', 'H', '2H', '-2H', 'T', '2T', 'S', '-3S' + ]: + idx = pd.timedelta_range('1', freq=freq, periods=10) + result = pd.TimedeltaIndex(idx.asi8, freq='infer') + tm.assert_index_equal(idx, result) + self.assertEqual(result.freq, freq) + + def test_nat_new(self): + + idx = pd.timedelta_range('1', freq='D', periods=5, name='x') + result = idx._nat_new() + exp = pd.TimedeltaIndex([pd.NaT] * 5, name='x') + tm.assert_index_equal(result, exp) + + result = idx._nat_new(box=False) + exp = np.array([tslib.iNaT] * 5, dtype=np.int64) + tm.assert_numpy_array_equal(result, exp) + + def test_shift(self): + # GH 9903 + idx = pd.TimedeltaIndex([], name='xxx') + tm.assert_index_equal(idx.shift(0, freq='H'), idx) + tm.assert_index_equal(idx.shift(3, freq='H'), idx) + + idx = pd.TimedeltaIndex(['5 hours', '6 hours', '9 hours'], name='xxx') + tm.assert_index_equal(idx.shift(0, freq='H'), idx) + exp = pd.TimedeltaIndex(['8 hours', '9 hours', '12 hours'], name='xxx') + tm.assert_index_equal(idx.shift(3, freq='H'), exp) + exp = pd.TimedeltaIndex(['2 hours', '3 hours', '6 hours'], name='xxx') + tm.assert_index_equal(idx.shift(-3, freq='H'), exp) + + tm.assert_index_equal(idx.shift(0, freq='T'), idx) + exp = pd.TimedeltaIndex(['05:03:00', '06:03:00', '9:03:00'], + name='xxx') + tm.assert_index_equal(idx.shift(3, freq='T'), exp) + exp = pd.TimedeltaIndex(['04:57:00', '05:57:00', '8:57:00'], + name='xxx') + tm.assert_index_equal(idx.shift(-3, freq='T'), exp) + + def test_repeat(self): + index = pd.timedelta_range('1 days', periods=2, freq='D') + exp = pd.TimedeltaIndex(['1 days', '1 days', '2 days', '2 days']) + for res in [index.repeat(2), np.repeat(index, 2)]: + tm.assert_index_equal(res, exp) + self.assertIsNone(res.freq) + + index = TimedeltaIndex(['1 days', 'NaT', '3 days']) + exp = TimedeltaIndex(['1 days', '1 days', '1 days', + 'NaT', 'NaT', 'NaT', + '3 days', '3 days', '3 days']) + for res in [index.repeat(3), np.repeat(index, 3)]: + tm.assert_index_equal(res, exp) + self.assertIsNone(res.freq) + + def test_nat(self): + self.assertIs(pd.TimedeltaIndex._na_value, pd.NaT) + self.assertIs(pd.TimedeltaIndex([])._na_value, pd.NaT) + + idx = pd.TimedeltaIndex(['1 days', '2 days']) + self.assertTrue(idx._can_hold_na) + + tm.assert_numpy_array_equal(idx._isnan, np.array([False, False])) + self.assertFalse(idx.hasnans) + tm.assert_numpy_array_equal(idx._nan_idxs, + np.array([], dtype=np.intp)) + + idx = pd.TimedeltaIndex(['1 days', 'NaT']) + self.assertTrue(idx._can_hold_na) + + tm.assert_numpy_array_equal(idx._isnan, np.array([False, True])) + self.assertTrue(idx.hasnans) + tm.assert_numpy_array_equal(idx._nan_idxs, + np.array([1], dtype=np.intp)) + + def test_equals(self): + # GH 13107 + idx = pd.TimedeltaIndex(['1 days', '2 days', 'NaT']) + self.assertTrue(idx.equals(idx)) + self.assertTrue(idx.equals(idx.copy())) + self.assertTrue(idx.equals(idx.asobject)) + self.assertTrue(idx.asobject.equals(idx)) + self.assertTrue(idx.asobject.equals(idx.asobject)) + self.assertFalse(idx.equals(list(idx))) + self.assertFalse(idx.equals(pd.Series(idx))) + + idx2 = pd.TimedeltaIndex(['2 days', '1 days', 'NaT']) + self.assertFalse(idx.equals(idx2)) + self.assertFalse(idx.equals(idx2.copy())) + self.assertFalse(idx.equals(idx2.asobject)) + self.assertFalse(idx.asobject.equals(idx2)) + self.assertFalse(idx.asobject.equals(idx2.asobject)) + self.assertFalse(idx.equals(list(idx2))) + self.assertFalse(idx.equals(pd.Series(idx2))) + + +class TestTimedeltas(tm.TestCase): + _multiprocess_can_split_ = True + + def test_ops(self): + + td = Timedelta(10, unit='d') + self.assertEqual(-td, Timedelta(-10, unit='d')) + self.assertEqual(+td, Timedelta(10, unit='d')) + self.assertEqual(td - td, Timedelta(0, unit='ns')) + self.assertTrue((td - pd.NaT) is pd.NaT) + self.assertEqual(td + td, Timedelta(20, unit='d')) + self.assertTrue((td + pd.NaT) is pd.NaT) + self.assertEqual(td * 2, Timedelta(20, unit='d')) + self.assertTrue((td * pd.NaT) is pd.NaT) + self.assertEqual(td / 2, Timedelta(5, unit='d')) + self.assertEqual(abs(td), td) + self.assertEqual(abs(-td), td) + self.assertEqual(td / td, 1) + self.assertTrue((td / pd.NaT) is np.nan) + + # invert + self.assertEqual(-td, Timedelta('-10d')) + self.assertEqual(td * -1, Timedelta('-10d')) + self.assertEqual(-1 * td, Timedelta('-10d')) + self.assertEqual(abs(-td), Timedelta('10d')) + + # invalid + self.assertRaises(TypeError, lambda: Timedelta(11, unit='d') // 2) + + # invalid multiply with another timedelta + self.assertRaises(TypeError, lambda: td * td) + + # can't operate with integers + self.assertRaises(TypeError, lambda: td + 2) + self.assertRaises(TypeError, lambda: td - 2) + + def test_ops_offsets(self): + td = Timedelta(10, unit='d') + self.assertEqual(Timedelta(241, unit='h'), td + pd.offsets.Hour(1)) + self.assertEqual(Timedelta(241, unit='h'), pd.offsets.Hour(1) + td) + self.assertEqual(240, td / pd.offsets.Hour(1)) + self.assertEqual(1 / 240.0, pd.offsets.Hour(1) / td) + self.assertEqual(Timedelta(239, unit='h'), td - pd.offsets.Hour(1)) + self.assertEqual(Timedelta(-239, unit='h'), pd.offsets.Hour(1) - td) + + def test_ops_ndarray(self): + td = Timedelta('1 day') + + # timedelta, timedelta + other = pd.to_timedelta(['1 day']).values + expected = pd.to_timedelta(['2 days']).values + self.assert_numpy_array_equal(td + other, expected) + if LooseVersion(np.__version__) >= '1.8': + self.assert_numpy_array_equal(other + td, expected) + self.assertRaises(TypeError, lambda: td + np.array([1])) + self.assertRaises(TypeError, lambda: np.array([1]) + td) + + expected = pd.to_timedelta(['0 days']).values + self.assert_numpy_array_equal(td - other, expected) + if LooseVersion(np.__version__) >= '1.8': + self.assert_numpy_array_equal(-other + td, expected) + self.assertRaises(TypeError, lambda: td - np.array([1])) + self.assertRaises(TypeError, lambda: np.array([1]) - td) + + expected = pd.to_timedelta(['2 days']).values + self.assert_numpy_array_equal(td * np.array([2]), expected) + self.assert_numpy_array_equal(np.array([2]) * td, expected) + self.assertRaises(TypeError, lambda: td * other) + self.assertRaises(TypeError, lambda: other * td) + + self.assert_numpy_array_equal(td / other, + np.array([1], dtype=np.float64)) + if LooseVersion(np.__version__) >= '1.8': + self.assert_numpy_array_equal(other / td, + np.array([1], dtype=np.float64)) + + # timedelta, datetime + other = pd.to_datetime(['2000-01-01']).values + expected = pd.to_datetime(['2000-01-02']).values + self.assert_numpy_array_equal(td + other, expected) + if LooseVersion(np.__version__) >= '1.8': + self.assert_numpy_array_equal(other + td, expected) + + expected = pd.to_datetime(['1999-12-31']).values + self.assert_numpy_array_equal(-td + other, expected) + if LooseVersion(np.__version__) >= '1.8': + self.assert_numpy_array_equal(other - td, expected) + + def test_ops_series(self): + # regression test for GH8813 + td = Timedelta('1 day') + other = pd.Series([1, 2]) + expected = pd.Series(pd.to_timedelta(['1 day', '2 days'])) + tm.assert_series_equal(expected, td * other) + tm.assert_series_equal(expected, other * td) + + def test_ops_series_object(self): + # GH 13043 + s = pd.Series([pd.Timestamp('2015-01-01', tz='US/Eastern'), + pd.Timestamp('2015-01-01', tz='Asia/Tokyo')], + name='xxx') + self.assertEqual(s.dtype, object) + + exp = pd.Series([pd.Timestamp('2015-01-02', tz='US/Eastern'), + pd.Timestamp('2015-01-02', tz='Asia/Tokyo')], + name='xxx') + tm.assert_series_equal(s + pd.Timedelta('1 days'), exp) + tm.assert_series_equal(pd.Timedelta('1 days') + s, exp) + + # object series & object series + s2 = pd.Series([pd.Timestamp('2015-01-03', tz='US/Eastern'), + pd.Timestamp('2015-01-05', tz='Asia/Tokyo')], + name='xxx') + self.assertEqual(s2.dtype, object) + exp = pd.Series([pd.Timedelta('2 days'), pd.Timedelta('4 days')], + name='xxx') + tm.assert_series_equal(s2 - s, exp) + tm.assert_series_equal(s - s2, -exp) + + s = pd.Series([pd.Timedelta('01:00:00'), pd.Timedelta('02:00:00')], + name='xxx', dtype=object) + self.assertEqual(s.dtype, object) + + exp = pd.Series([pd.Timedelta('01:30:00'), pd.Timedelta('02:30:00')], + name='xxx') + tm.assert_series_equal(s + pd.Timedelta('00:30:00'), exp) + tm.assert_series_equal(pd.Timedelta('00:30:00') + s, exp) + + def test_ops_notimplemented(self): + class Other: + pass + + other = Other() + + td = Timedelta('1 day') + self.assertTrue(td.__add__(other) is NotImplemented) + self.assertTrue(td.__sub__(other) is NotImplemented) + self.assertTrue(td.__truediv__(other) is NotImplemented) + self.assertTrue(td.__mul__(other) is NotImplemented) + self.assertTrue(td.__floordiv__(td) is NotImplemented) + + def test_ops_error_str(self): + # GH 13624 + tdi = TimedeltaIndex(['1 day', '2 days']) + + for l, r in [(tdi, 'a'), ('a', tdi)]: + with tm.assertRaises(TypeError): + l + r + + with tm.assertRaises(TypeError): + l > r + + with tm.assertRaises(TypeError): + l == r + + with tm.assertRaises(TypeError): + l != r + + def test_timedelta_ops(self): + # GH4984 + # make sure ops return Timedelta + s = Series([Timestamp('20130101') + timedelta(seconds=i * i) + for i in range(10)]) + td = s.diff() + + result = td.mean() + expected = to_timedelta(timedelta(seconds=9)) + self.assertEqual(result, expected) + + result = td.to_frame().mean() + self.assertEqual(result[0], expected) + + result = td.quantile(.1) + expected = Timedelta(np.timedelta64(2600, 'ms')) + self.assertEqual(result, expected) + + result = td.median() + expected = to_timedelta('00:00:09') + self.assertEqual(result, expected) + + result = td.to_frame().median() + self.assertEqual(result[0], expected) + + # GH 6462 + # consistency in returned values for sum + result = td.sum() + expected = to_timedelta('00:01:21') + self.assertEqual(result, expected) + + result = td.to_frame().sum() + self.assertEqual(result[0], expected) + + # std + result = td.std() + expected = to_timedelta(Series(td.dropna().values).std()) + self.assertEqual(result, expected) + + result = td.to_frame().std() + self.assertEqual(result[0], expected) + + # invalid ops + for op in ['skew', 'kurt', 'sem', 'prod']: + self.assertRaises(TypeError, getattr(td, op)) + + # GH 10040 + # make sure NaT is properly handled by median() + s = Series([Timestamp('2015-02-03'), Timestamp('2015-02-07')]) + self.assertEqual(s.diff().median(), timedelta(days=4)) + + s = Series([Timestamp('2015-02-03'), Timestamp('2015-02-07'), + Timestamp('2015-02-15')]) + self.assertEqual(s.diff().median(), timedelta(days=6)) + + def test_timedelta_ops_scalar(self): + # GH 6808 + base = pd.to_datetime('20130101 09:01:12.123456') + expected_add = pd.to_datetime('20130101 09:01:22.123456') + expected_sub = pd.to_datetime('20130101 09:01:02.123456') + + for offset in [pd.to_timedelta(10, unit='s'), timedelta(seconds=10), + np.timedelta64(10, 's'), + np.timedelta64(10000000000, 'ns'), + pd.offsets.Second(10)]: + result = base + offset + self.assertEqual(result, expected_add) + + result = base - offset + self.assertEqual(result, expected_sub) + + base = pd.to_datetime('20130102 09:01:12.123456') + expected_add = pd.to_datetime('20130103 09:01:22.123456') + expected_sub = pd.to_datetime('20130101 09:01:02.123456') + + for offset in [pd.to_timedelta('1 day, 00:00:10'), + pd.to_timedelta('1 days, 00:00:10'), + timedelta(days=1, seconds=10), + np.timedelta64(1, 'D') + np.timedelta64(10, 's'), + pd.offsets.Day() + pd.offsets.Second(10)]: + result = base + offset + self.assertEqual(result, expected_add) + + result = base - offset + self.assertEqual(result, expected_sub) + + def test_timedelta_ops_with_missing_values(self): + # setup + s1 = pd.to_timedelta(Series(['00:00:01'])) + s2 = pd.to_timedelta(Series(['00:00:02'])) + sn = pd.to_timedelta(Series([pd.NaT])) + df1 = DataFrame(['00:00:01']).apply(pd.to_timedelta) + df2 = DataFrame(['00:00:02']).apply(pd.to_timedelta) + dfn = DataFrame([pd.NaT]).apply(pd.to_timedelta) + scalar1 = pd.to_timedelta('00:00:01') + scalar2 = pd.to_timedelta('00:00:02') + timedelta_NaT = pd.to_timedelta('NaT') + NA = np.nan + + actual = scalar1 + scalar1 + self.assertEqual(actual, scalar2) + actual = scalar2 - scalar1 + self.assertEqual(actual, scalar1) + + actual = s1 + s1 + assert_series_equal(actual, s2) + actual = s2 - s1 + assert_series_equal(actual, s1) + + actual = s1 + scalar1 + assert_series_equal(actual, s2) + actual = scalar1 + s1 + assert_series_equal(actual, s2) + actual = s2 - scalar1 + assert_series_equal(actual, s1) + actual = -scalar1 + s2 + assert_series_equal(actual, s1) + + actual = s1 + timedelta_NaT + assert_series_equal(actual, sn) + actual = timedelta_NaT + s1 + assert_series_equal(actual, sn) + actual = s1 - timedelta_NaT + assert_series_equal(actual, sn) + actual = -timedelta_NaT + s1 + assert_series_equal(actual, sn) + + actual = s1 + NA + assert_series_equal(actual, sn) + actual = NA + s1 + assert_series_equal(actual, sn) + actual = s1 - NA + assert_series_equal(actual, sn) + actual = -NA + s1 + assert_series_equal(actual, sn) + + actual = s1 + pd.NaT + assert_series_equal(actual, sn) + actual = s2 - pd.NaT + assert_series_equal(actual, sn) + + actual = s1 + df1 + assert_frame_equal(actual, df2) + actual = s2 - df1 + assert_frame_equal(actual, df1) + actual = df1 + s1 + assert_frame_equal(actual, df2) + actual = df2 - s1 + assert_frame_equal(actual, df1) + + actual = df1 + df1 + assert_frame_equal(actual, df2) + actual = df2 - df1 + assert_frame_equal(actual, df1) + + actual = df1 + scalar1 + assert_frame_equal(actual, df2) + actual = df2 - scalar1 + assert_frame_equal(actual, df1) + + actual = df1 + timedelta_NaT + assert_frame_equal(actual, dfn) + actual = df1 - timedelta_NaT + assert_frame_equal(actual, dfn) + + actual = df1 + NA + assert_frame_equal(actual, dfn) + actual = df1 - NA + assert_frame_equal(actual, dfn) + + actual = df1 + pd.NaT # NaT is datetime, not timedelta + assert_frame_equal(actual, dfn) + actual = df1 - pd.NaT + assert_frame_equal(actual, dfn) + + def test_compare_timedelta_series(self): + # regresssion test for GH5963 + s = pd.Series([timedelta(days=1), timedelta(days=2)]) + actual = s > timedelta(days=1) + expected = pd.Series([False, True]) + tm.assert_series_equal(actual, expected) + + def test_compare_timedelta_ndarray(self): + # GH11835 + periods = [Timedelta('0 days 01:00:00'), Timedelta('0 days 01:00:00')] + arr = np.array(periods) + result = arr[0] > arr + expected = np.array([False, False]) + self.assert_numpy_array_equal(result, expected) + + +class TestSlicing(tm.TestCase): + + def test_tdi_ops_attributes(self): + rng = timedelta_range('2 days', periods=5, freq='2D', name='x') + + result = rng + 1 + exp = timedelta_range('4 days', periods=5, freq='2D', name='x') + tm.assert_index_equal(result, exp) + self.assertEqual(result.freq, '2D') + + result = rng - 2 + exp = timedelta_range('-2 days', periods=5, freq='2D', name='x') + tm.assert_index_equal(result, exp) + self.assertEqual(result.freq, '2D') + + result = rng * 2 + exp = timedelta_range('4 days', periods=5, freq='4D', name='x') + tm.assert_index_equal(result, exp) + self.assertEqual(result.freq, '4D') + + result = rng / 2 + exp = timedelta_range('1 days', periods=5, freq='D', name='x') + tm.assert_index_equal(result, exp) + self.assertEqual(result.freq, 'D') + + result = -rng + exp = timedelta_range('-2 days', periods=5, freq='-2D', name='x') + tm.assert_index_equal(result, exp) + self.assertEqual(result.freq, '-2D') + + rng = pd.timedelta_range('-2 days', periods=5, freq='D', name='x') + + result = abs(rng) + exp = TimedeltaIndex(['2 days', '1 days', '0 days', '1 days', + '2 days'], name='x') + tm.assert_index_equal(result, exp) + self.assertEqual(result.freq, None) + + def test_add_overflow(self): + # see gh-14068 + msg = "too (big|large) to convert" + with tm.assertRaisesRegexp(OverflowError, msg): + to_timedelta(106580, 'D') + Timestamp('2000') + with tm.assertRaisesRegexp(OverflowError, msg): + Timestamp('2000') + to_timedelta(106580, 'D') + + _NaT = int(pd.NaT) + 1 + msg = "Overflow in int64 addition" + with tm.assertRaisesRegexp(OverflowError, msg): + to_timedelta([106580], 'D') + Timestamp('2000') + with tm.assertRaisesRegexp(OverflowError, msg): + Timestamp('2000') + to_timedelta([106580], 'D') + with tm.assertRaisesRegexp(OverflowError, msg): + to_timedelta([_NaT]) - Timedelta('1 days') + with tm.assertRaisesRegexp(OverflowError, msg): + to_timedelta(['5 days', _NaT]) - Timedelta('1 days') + with tm.assertRaisesRegexp(OverflowError, msg): + (to_timedelta([_NaT, '5 days', '1 hours']) - + to_timedelta(['7 seconds', _NaT, '4 hours'])) + + # These should not overflow! + exp = TimedeltaIndex([pd.NaT]) + result = to_timedelta([pd.NaT]) - Timedelta('1 days') + tm.assert_index_equal(result, exp) + + exp = TimedeltaIndex(['4 days', pd.NaT]) + result = to_timedelta(['5 days', pd.NaT]) - Timedelta('1 days') + tm.assert_index_equal(result, exp) + + exp = TimedeltaIndex([pd.NaT, pd.NaT, '5 hours']) + result = (to_timedelta([pd.NaT, '5 days', '1 hours']) + + to_timedelta(['7 seconds', pd.NaT, '4 hours'])) + tm.assert_index_equal(result, exp) diff --git a/pandas/tests/indexes/timedeltas/test_partial_slicing.py b/pandas/tests/indexes/timedeltas/test_partial_slicing.py new file mode 100644 index 0000000000000..0d46ee4172211 --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_partial_slicing.py @@ -0,0 +1,81 @@ +import numpy as np +import pandas.util.testing as tm + +import pandas as pd +from pandas import Series, timedelta_range, Timedelta +from pandas.util.testing import assert_series_equal + + +class TestSlicing(tm.TestCase): + + def test_partial_slice(self): + rng = timedelta_range('1 day 10:11:12', freq='h', periods=500) + s = Series(np.arange(len(rng)), index=rng) + + result = s['5 day':'6 day'] + expected = s.iloc[86:134] + assert_series_equal(result, expected) + + result = s['5 day':] + expected = s.iloc[86:] + assert_series_equal(result, expected) + + result = s[:'6 day'] + expected = s.iloc[:134] + assert_series_equal(result, expected) + + result = s['6 days, 23:11:12'] + self.assertEqual(result, s.iloc[133]) + + self.assertRaises(KeyError, s.__getitem__, '50 days') + + def test_partial_slice_high_reso(self): + + # higher reso + rng = timedelta_range('1 day 10:11:12', freq='us', periods=2000) + s = Series(np.arange(len(rng)), index=rng) + + result = s['1 day 10:11:12':] + expected = s.iloc[0:] + assert_series_equal(result, expected) + + result = s['1 day 10:11:12.001':] + expected = s.iloc[1000:] + assert_series_equal(result, expected) + + result = s['1 days, 10:11:12.001001'] + self.assertEqual(result, s.iloc[1001]) + + def test_slice_with_negative_step(self): + ts = Series(np.arange(20), timedelta_range('0', periods=20, freq='H')) + SLC = pd.IndexSlice + + def assert_slices_equivalent(l_slc, i_slc): + assert_series_equal(ts[l_slc], ts.iloc[i_slc]) + assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc]) + assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc]) + + assert_slices_equivalent(SLC[Timedelta(hours=7)::-1], SLC[7::-1]) + assert_slices_equivalent(SLC['7 hours'::-1], SLC[7::-1]) + + assert_slices_equivalent(SLC[:Timedelta(hours=7):-1], SLC[:6:-1]) + assert_slices_equivalent(SLC[:'7 hours':-1], SLC[:6:-1]) + + assert_slices_equivalent(SLC['15 hours':'7 hours':-1], SLC[15:6:-1]) + assert_slices_equivalent(SLC[Timedelta(hours=15):Timedelta(hours=7):- + 1], SLC[15:6:-1]) + assert_slices_equivalent(SLC['15 hours':Timedelta(hours=7):-1], + SLC[15:6:-1]) + assert_slices_equivalent(SLC[Timedelta(hours=15):'7 hours':-1], + SLC[15:6:-1]) + + assert_slices_equivalent(SLC['7 hours':'15 hours':-1], SLC[:0]) + + def test_slice_with_zero_step_raises(self): + ts = Series(np.arange(20), timedelta_range('0', periods=20, freq='H')) + self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', + lambda: ts[::0]) + self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', + lambda: ts.loc[::0]) + self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', + lambda: ts.loc[::0]) diff --git a/pandas/tests/indexes/timedeltas/test_setops.py b/pandas/tests/indexes/timedeltas/test_setops.py new file mode 100644 index 0000000000000..9000fb3beb279 --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_setops.py @@ -0,0 +1,76 @@ +import numpy as np + +import pandas as pd +import pandas.util.testing as tm +from pandas import TimedeltaIndex, timedelta_range, Int64Index + + +class TestTimedeltaIndex(tm.TestCase): + _multiprocess_can_split_ = True + + def test_union(self): + + i1 = timedelta_range('1day', periods=5) + i2 = timedelta_range('3day', periods=5) + result = i1.union(i2) + expected = timedelta_range('1day', periods=7) + self.assert_index_equal(result, expected) + + i1 = Int64Index(np.arange(0, 20, 2)) + i2 = TimedeltaIndex(start='1 day', periods=10, freq='D') + i1.union(i2) # Works + i2.union(i1) # Fails with "AttributeError: can't set attribute" + + def test_union_coverage(self): + + idx = TimedeltaIndex(['3d', '1d', '2d']) + ordered = TimedeltaIndex(idx.sort_values(), freq='infer') + result = ordered.union(idx) + self.assert_index_equal(result, ordered) + + result = ordered[:0].union(ordered) + self.assert_index_equal(result, ordered) + self.assertEqual(result.freq, ordered.freq) + + def test_union_bug_1730(self): + + rng_a = timedelta_range('1 day', periods=4, freq='3H') + rng_b = timedelta_range('1 day', periods=4, freq='4H') + + result = rng_a.union(rng_b) + exp = TimedeltaIndex(sorted(set(list(rng_a)) | set(list(rng_b)))) + self.assert_index_equal(result, exp) + + def test_union_bug_1745(self): + + left = TimedeltaIndex(['1 day 15:19:49.695000']) + right = TimedeltaIndex(['2 day 13:04:21.322000', + '1 day 15:27:24.873000', + '1 day 15:31:05.350000']) + + result = left.union(right) + exp = TimedeltaIndex(sorted(set(list(left)) | set(list(right)))) + self.assert_index_equal(result, exp) + + def test_union_bug_4564(self): + + left = timedelta_range("1 day", "30d") + right = left + pd.offsets.Minute(15) + + result = left.union(right) + exp = TimedeltaIndex(sorted(set(list(left)) | set(list(right)))) + self.assert_index_equal(result, exp) + + def test_intersection_bug_1708(self): + index_1 = timedelta_range('1 day', periods=4, freq='h') + index_2 = index_1 + pd.offsets.Hour(5) + + result = index_1 & index_2 + self.assertEqual(len(result), 0) + + index_1 = timedelta_range('1 day', periods=4, freq='h') + index_2 = index_1 + pd.offsets.Hour(1) + + result = index_1 & index_2 + expected = timedelta_range('1 day 01:00:00', periods=3, freq='h') + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py new file mode 100644 index 0000000000000..4c8571e4f08f9 --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -0,0 +1,592 @@ +import numpy as np +from datetime import timedelta + +import pandas as pd +import pandas.util.testing as tm +from pandas import (timedelta_range, date_range, Series, Timedelta, + DatetimeIndex, TimedeltaIndex, Index, DataFrame, + Int64Index, _np_version_under1p8) +from pandas.util.testing import (assert_almost_equal, assert_series_equal, + assert_index_equal) + +from ..datetimelike import DatetimeLike + +randn = np.random.randn + + +class TestTimedeltaIndex(DatetimeLike, tm.TestCase): + _holder = TimedeltaIndex + _multiprocess_can_split_ = True + + def setUp(self): + self.indices = dict(index=tm.makeTimedeltaIndex(10)) + self.setup_indices() + + def create_index(self): + return pd.to_timedelta(range(5), unit='d') + pd.offsets.Hour(1) + + def test_shift(self): + # test shift for TimedeltaIndex + # err8083 + + drange = self.create_index() + result = drange.shift(1) + expected = TimedeltaIndex(['1 days 01:00:00', '2 days 01:00:00', + '3 days 01:00:00', + '4 days 01:00:00', '5 days 01:00:00'], + freq='D') + self.assert_index_equal(result, expected) + + result = drange.shift(3, freq='2D 1s') + expected = TimedeltaIndex(['6 days 01:00:03', '7 days 01:00:03', + '8 days 01:00:03', '9 days 01:00:03', + '10 days 01:00:03'], freq='D') + self.assert_index_equal(result, expected) + + def test_get_loc(self): + idx = pd.to_timedelta(['0 days', '1 days', '2 days']) + + for method in [None, 'pad', 'backfill', 'nearest']: + self.assertEqual(idx.get_loc(idx[1], method), 1) + self.assertEqual(idx.get_loc(idx[1].to_pytimedelta(), method), 1) + self.assertEqual(idx.get_loc(str(idx[1]), method), 1) + + self.assertEqual( + idx.get_loc(idx[1], 'pad', tolerance=pd.Timedelta(0)), 1) + self.assertEqual( + idx.get_loc(idx[1], 'pad', tolerance=np.timedelta64(0, 's')), 1) + self.assertEqual(idx.get_loc(idx[1], 'pad', tolerance=timedelta(0)), 1) + + with tm.assertRaisesRegexp(ValueError, 'must be convertible'): + idx.get_loc(idx[1], method='nearest', tolerance='foo') + + for method, loc in [('pad', 1), ('backfill', 2), ('nearest', 1)]: + self.assertEqual(idx.get_loc('1 day 1 hour', method), loc) + + def test_get_loc_nat(self): + tidx = TimedeltaIndex(['1 days 01:00:00', 'NaT', '2 days 01:00:00']) + + self.assertEqual(tidx.get_loc(pd.NaT), 1) + self.assertEqual(tidx.get_loc(None), 1) + self.assertEqual(tidx.get_loc(float('nan')), 1) + self.assertEqual(tidx.get_loc(np.nan), 1) + + def test_get_indexer(self): + idx = pd.to_timedelta(['0 days', '1 days', '2 days']) + tm.assert_numpy_array_equal(idx.get_indexer(idx), + np.array([0, 1, 2], dtype=np.intp)) + + target = pd.to_timedelta(['-1 hour', '12 hours', '1 day 1 hour']) + tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'), + np.array([-1, 0, 1], dtype=np.intp)) + tm.assert_numpy_array_equal(idx.get_indexer(target, 'backfill'), + np.array([0, 1, 2], dtype=np.intp)) + tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest'), + np.array([0, 1, 1], dtype=np.intp)) + + res = idx.get_indexer(target, 'nearest', + tolerance=pd.Timedelta('1 hour')) + tm.assert_numpy_array_equal(res, np.array([0, -1, 1], dtype=np.intp)) + + def test_numeric_compat(self): + + idx = self._holder(np.arange(5, dtype='int64')) + didx = self._holder(np.arange(5, dtype='int64') ** 2) + result = idx * 1 + tm.assert_index_equal(result, idx) + + result = 1 * idx + tm.assert_index_equal(result, idx) + + result = idx / 1 + tm.assert_index_equal(result, idx) + + result = idx // 1 + tm.assert_index_equal(result, idx) + + result = idx * np.array(5, dtype='int64') + tm.assert_index_equal(result, + self._holder(np.arange(5, dtype='int64') * 5)) + + result = idx * np.arange(5, dtype='int64') + tm.assert_index_equal(result, didx) + + result = idx * Series(np.arange(5, dtype='int64')) + tm.assert_index_equal(result, didx) + + result = idx * Series(np.arange(5, dtype='float64') + 0.1) + tm.assert_index_equal(result, self._holder(np.arange( + 5, dtype='float64') * (np.arange(5, dtype='float64') + 0.1))) + + # invalid + self.assertRaises(TypeError, lambda: idx * idx) + self.assertRaises(ValueError, lambda: idx * self._holder(np.arange(3))) + self.assertRaises(ValueError, lambda: idx * np.array([1, 2])) + + def test_pickle_compat_construction(self): + pass + + def test_ufunc_coercions(self): + # normal ops are also tested in tseries/test_timedeltas.py + idx = TimedeltaIndex(['2H', '4H', '6H', '8H', '10H'], + freq='2H', name='x') + + for result in [idx * 2, np.multiply(idx, 2)]: + tm.assertIsInstance(result, TimedeltaIndex) + exp = TimedeltaIndex(['4H', '8H', '12H', '16H', '20H'], + freq='4H', name='x') + tm.assert_index_equal(result, exp) + self.assertEqual(result.freq, '4H') + + for result in [idx / 2, np.divide(idx, 2)]: + tm.assertIsInstance(result, TimedeltaIndex) + exp = TimedeltaIndex(['1H', '2H', '3H', '4H', '5H'], + freq='H', name='x') + tm.assert_index_equal(result, exp) + self.assertEqual(result.freq, 'H') + + idx = TimedeltaIndex(['2H', '4H', '6H', '8H', '10H'], + freq='2H', name='x') + for result in [-idx, np.negative(idx)]: + tm.assertIsInstance(result, TimedeltaIndex) + exp = TimedeltaIndex(['-2H', '-4H', '-6H', '-8H', '-10H'], + freq='-2H', name='x') + tm.assert_index_equal(result, exp) + self.assertEqual(result.freq, '-2H') + + idx = TimedeltaIndex(['-2H', '-1H', '0H', '1H', '2H'], + freq='H', name='x') + for result in [abs(idx), np.absolute(idx)]: + tm.assertIsInstance(result, TimedeltaIndex) + exp = TimedeltaIndex(['2H', '1H', '0H', '1H', '2H'], + freq=None, name='x') + tm.assert_index_equal(result, exp) + self.assertEqual(result.freq, None) + + def test_fillna_timedelta(self): + # GH 11343 + idx = pd.TimedeltaIndex(['1 day', pd.NaT, '3 day']) + + exp = pd.TimedeltaIndex(['1 day', '2 day', '3 day']) + self.assert_index_equal(idx.fillna(pd.Timedelta('2 day')), exp) + + exp = pd.TimedeltaIndex(['1 day', '3 hour', '3 day']) + idx.fillna(pd.Timedelta('3 hour')) + + exp = pd.Index( + [pd.Timedelta('1 day'), 'x', pd.Timedelta('3 day')], dtype=object) + self.assert_index_equal(idx.fillna('x'), exp) + + def test_difference_freq(self): + # GH14323: Difference of TimedeltaIndex should not preserve frequency + + index = timedelta_range("0 days", "5 days", freq="D") + + other = timedelta_range("1 days", "4 days", freq="D") + expected = TimedeltaIndex(["0 days", "5 days"], freq=None) + idx_diff = index.difference(other) + tm.assert_index_equal(idx_diff, expected) + tm.assert_attr_equal('freq', idx_diff, expected) + + other = timedelta_range("2 days", "5 days", freq="D") + idx_diff = index.difference(other) + expected = TimedeltaIndex(["0 days", "1 days"], freq=None) + tm.assert_index_equal(idx_diff, expected) + tm.assert_attr_equal('freq', idx_diff, expected) + + def test_take(self): + + tds = ['1day 02:00:00', '1 day 04:00:00', '1 day 10:00:00'] + idx = TimedeltaIndex(start='1d', end='2d', freq='H', name='idx') + expected = TimedeltaIndex(tds, freq=None, name='idx') + + taken1 = idx.take([2, 4, 10]) + taken2 = idx[[2, 4, 10]] + + for taken in [taken1, taken2]: + self.assert_index_equal(taken, expected) + tm.assertIsInstance(taken, TimedeltaIndex) + self.assertIsNone(taken.freq) + self.assertEqual(taken.name, expected.name) + + def test_take_fill_value(self): + # GH 12631 + idx = pd.TimedeltaIndex(['1 days', '2 days', '3 days'], + name='xxx') + result = idx.take(np.array([1, 0, -1])) + expected = pd.TimedeltaIndex(['2 days', '1 days', '3 days'], + name='xxx') + tm.assert_index_equal(result, expected) + + # fill_value + result = idx.take(np.array([1, 0, -1]), fill_value=True) + expected = pd.TimedeltaIndex(['2 days', '1 days', 'NaT'], + name='xxx') + tm.assert_index_equal(result, expected) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, + fill_value=True) + expected = pd.TimedeltaIndex(['2 days', '1 days', '3 days'], + name='xxx') + tm.assert_index_equal(result, expected) + + msg = ('When allow_fill=True and fill_value is not None, ' + 'all indices must be >= -1') + with tm.assertRaisesRegexp(ValueError, msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with tm.assertRaisesRegexp(ValueError, msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + with tm.assertRaises(IndexError): + idx.take(np.array([1, -5])) + + def test_isin(self): + + index = tm.makeTimedeltaIndex(4) + result = index.isin(index) + self.assertTrue(result.all()) + + result = index.isin(list(index)) + self.assertTrue(result.all()) + + assert_almost_equal(index.isin([index[2], 5]), + np.array([False, False, True, False])) + + def test_factorize(self): + idx1 = TimedeltaIndex(['1 day', '1 day', '2 day', '2 day', '3 day', + '3 day']) + + exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.intp) + exp_idx = TimedeltaIndex(['1 day', '2 day', '3 day']) + + arr, idx = idx1.factorize() + self.assert_numpy_array_equal(arr, exp_arr) + self.assert_index_equal(idx, exp_idx) + + arr, idx = idx1.factorize(sort=True) + self.assert_numpy_array_equal(arr, exp_arr) + self.assert_index_equal(idx, exp_idx) + + # freq must be preserved + idx3 = timedelta_range('1 day', periods=4, freq='s') + exp_arr = np.array([0, 1, 2, 3], dtype=np.intp) + arr, idx = idx3.factorize() + self.assert_numpy_array_equal(arr, exp_arr) + self.assert_index_equal(idx, idx3) + + def test_join_self(self): + + index = timedelta_range('1 day', periods=10) + kinds = 'outer', 'inner', 'left', 'right' + for kind in kinds: + joined = index.join(index, how=kind) + tm.assert_index_equal(index, joined) + + def test_slice_keeps_name(self): + + # GH4226 + dr = pd.timedelta_range('1d', '5d', freq='H', name='timebucket') + self.assertEqual(dr[1:].name, dr.name) + + def test_does_not_convert_mixed_integer(self): + df = tm.makeCustomDataframe(10, 10, + data_gen_f=lambda *args, **kwargs: randn(), + r_idx_type='i', c_idx_type='td') + str(df) + + cols = df.columns.join(df.index, how='outer') + joined = cols.join(df.columns) + self.assertEqual(cols.dtype, np.dtype('O')) + self.assertEqual(cols.dtype, joined.dtype) + tm.assert_index_equal(cols, joined) + + def test_sort_values(self): + + idx = TimedeltaIndex(['4d', '1d', '2d']) + + ordered = idx.sort_values() + self.assertTrue(ordered.is_monotonic) + + ordered = idx.sort_values(ascending=False) + self.assertTrue(ordered[::-1].is_monotonic) + + ordered, dexer = idx.sort_values(return_indexer=True) + self.assertTrue(ordered.is_monotonic) + self.assert_numpy_array_equal(dexer, + np.array([1, 2, 0]), + check_dtype=False) + + ordered, dexer = idx.sort_values(return_indexer=True, ascending=False) + self.assertTrue(ordered[::-1].is_monotonic) + self.assert_numpy_array_equal(dexer, + np.array([0, 2, 1]), + check_dtype=False) + + def test_get_duplicates(self): + idx = TimedeltaIndex(['1 day', '2 day', '2 day', '3 day', '3day', + '4day']) + + result = idx.get_duplicates() + ex = TimedeltaIndex(['2 day', '3day']) + self.assert_index_equal(result, ex) + + def test_argmin_argmax(self): + idx = TimedeltaIndex(['1 day 00:00:05', '1 day 00:00:01', + '1 day 00:00:02']) + self.assertEqual(idx.argmin(), 1) + self.assertEqual(idx.argmax(), 0) + + def test_misc_coverage(self): + + rng = timedelta_range('1 day', periods=5) + result = rng.groupby(rng.days) + tm.assertIsInstance(list(result.values())[0][0], Timedelta) + + idx = TimedeltaIndex(['3d', '1d', '2d']) + self.assertFalse(idx.equals(list(idx))) + + non_td = Index(list('abc')) + self.assertFalse(idx.equals(list(non_td))) + + def test_map(self): + + rng = timedelta_range('1 day', periods=10) + + f = lambda x: x.days + result = rng.map(f) + exp = Int64Index([f(x) for x in rng]) + tm.assert_index_equal(result, exp) + + def test_comparisons_nat(self): + + tdidx1 = pd.TimedeltaIndex(['1 day', pd.NaT, '1 day 00:00:01', pd.NaT, + '1 day 00:00:01', '5 day 00:00:03']) + tdidx2 = pd.TimedeltaIndex(['2 day', '2 day', pd.NaT, pd.NaT, + '1 day 00:00:02', '5 days 00:00:03']) + tdarr = np.array([np.timedelta64(2, 'D'), + np.timedelta64(2, 'D'), np.timedelta64('nat'), + np.timedelta64('nat'), + np.timedelta64(1, 'D') + np.timedelta64(2, 's'), + np.timedelta64(5, 'D') + np.timedelta64(3, 's')]) + + if _np_version_under1p8: + # cannot test array because np.datetime('nat') returns today's date + cases = [(tdidx1, tdidx2)] + else: + cases = [(tdidx1, tdidx2), (tdidx1, tdarr)] + + # Check pd.NaT is handles as the same as np.nan + for idx1, idx2 in cases: + + result = idx1 < idx2 + expected = np.array([True, False, False, False, True, False]) + self.assert_numpy_array_equal(result, expected) + + result = idx2 > idx1 + expected = np.array([True, False, False, False, True, False]) + self.assert_numpy_array_equal(result, expected) + + result = idx1 <= idx2 + expected = np.array([True, False, False, False, True, True]) + self.assert_numpy_array_equal(result, expected) + + result = idx2 >= idx1 + expected = np.array([True, False, False, False, True, True]) + self.assert_numpy_array_equal(result, expected) + + result = idx1 == idx2 + expected = np.array([False, False, False, False, False, True]) + self.assert_numpy_array_equal(result, expected) + + result = idx1 != idx2 + expected = np.array([True, True, True, True, True, False]) + self.assert_numpy_array_equal(result, expected) + + def test_comparisons_coverage(self): + rng = timedelta_range('1 days', periods=10) + + result = rng < rng[3] + exp = np.array([True, True, True] + [False] * 7) + self.assert_numpy_array_equal(result, exp) + + # raise TypeError for now + self.assertRaises(TypeError, rng.__lt__, rng[3].value) + + result = rng == list(rng) + exp = rng == rng + self.assert_numpy_array_equal(result, exp) + + def test_total_seconds(self): + # GH 10939 + # test index + rng = timedelta_range('1 days, 10:11:12.100123456', periods=2, + freq='s') + expt = [1 * 86400 + 10 * 3600 + 11 * 60 + 12 + 100123456. / 1e9, + 1 * 86400 + 10 * 3600 + 11 * 60 + 13 + 100123456. / 1e9] + tm.assert_almost_equal(rng.total_seconds(), np.array(expt)) + + # test Series + s = Series(rng) + s_expt = Series(expt, index=[0, 1]) + tm.assert_series_equal(s.dt.total_seconds(), s_expt) + + # with nat + s[1] = np.nan + s_expt = Series([1 * 86400 + 10 * 3600 + 11 * 60 + + 12 + 100123456. / 1e9, np.nan], index=[0, 1]) + tm.assert_series_equal(s.dt.total_seconds(), s_expt) + + # with both nat + s = Series([np.nan, np.nan], dtype='timedelta64[ns]') + tm.assert_series_equal(s.dt.total_seconds(), + Series([np.nan, np.nan], index=[0, 1])) + + def test_pass_TimedeltaIndex_to_index(self): + + rng = timedelta_range('1 days', '10 days') + idx = Index(rng, dtype=object) + + expected = Index(rng.to_pytimedelta(), dtype=object) + + self.assert_numpy_array_equal(idx.values, expected.values) + + def test_pickle(self): + + rng = timedelta_range('1 days', periods=10) + rng_p = self.round_trip_pickle(rng) + tm.assert_index_equal(rng, rng_p) + + def test_hash_error(self): + index = timedelta_range('1 days', periods=10) + with tm.assertRaisesRegexp(TypeError, "unhashable type: %r" % + type(index).__name__): + hash(index) + + def test_append_join_nondatetimeindex(self): + rng = timedelta_range('1 days', periods=10) + idx = Index(['a', 'b', 'c', 'd']) + + result = rng.append(idx) + tm.assertIsInstance(result[0], Timedelta) + + # it works + rng.join(idx, how='outer') + + def test_append_numpy_bug_1681(self): + + td = timedelta_range('1 days', '10 days', freq='2D') + a = DataFrame() + c = DataFrame({'A': 'foo', 'B': td}, index=td) + str(c) + + result = a.append(c) + self.assertTrue((result['B'] == td).all()) + + def test_fields(self): + rng = timedelta_range('1 days, 10:11:12.100123456', periods=2, + freq='s') + self.assert_numpy_array_equal(rng.days, np.array( + [1, 1], dtype='int64')) + self.assert_numpy_array_equal( + rng.seconds, + np.array([10 * 3600 + 11 * 60 + 12, 10 * 3600 + 11 * 60 + 13], + dtype='int64')) + self.assert_numpy_array_equal(rng.microseconds, np.array( + [100 * 1000 + 123, 100 * 1000 + 123], dtype='int64')) + self.assert_numpy_array_equal(rng.nanoseconds, np.array( + [456, 456], dtype='int64')) + + self.assertRaises(AttributeError, lambda: rng.hours) + self.assertRaises(AttributeError, lambda: rng.minutes) + self.assertRaises(AttributeError, lambda: rng.milliseconds) + + # with nat + s = Series(rng) + s[1] = np.nan + + tm.assert_series_equal(s.dt.days, Series([1, np.nan], index=[0, 1])) + tm.assert_series_equal(s.dt.seconds, Series( + [10 * 3600 + 11 * 60 + 12, np.nan], index=[0, 1])) + + def test_freq_conversion(self): + + # doc example + + # series + td = Series(date_range('20130101', periods=4)) - \ + Series(date_range('20121201', periods=4)) + td[2] += timedelta(minutes=5, seconds=3) + td[3] = np.nan + + result = td / np.timedelta64(1, 'D') + expected = Series([31, 31, (31 * 86400 + 5 * 60 + 3) / 86400.0, np.nan + ]) + assert_series_equal(result, expected) + + result = td.astype('timedelta64[D]') + expected = Series([31, 31, 31, np.nan]) + assert_series_equal(result, expected) + + result = td / np.timedelta64(1, 's') + expected = Series([31 * 86400, 31 * 86400, 31 * 86400 + 5 * 60 + 3, + np.nan]) + assert_series_equal(result, expected) + + result = td.astype('timedelta64[s]') + assert_series_equal(result, expected) + + # tdi + td = TimedeltaIndex(td) + + result = td / np.timedelta64(1, 'D') + expected = Index([31, 31, (31 * 86400 + 5 * 60 + 3) / 86400.0, np.nan]) + assert_index_equal(result, expected) + + result = td.astype('timedelta64[D]') + expected = Index([31, 31, 31, np.nan]) + assert_index_equal(result, expected) + + result = td / np.timedelta64(1, 's') + expected = Index([31 * 86400, 31 * 86400, 31 * 86400 + 5 * 60 + 3, + np.nan]) + assert_index_equal(result, expected) + + result = td.astype('timedelta64[s]') + assert_index_equal(result, expected) + + +class TestSlicing(tm.TestCase): + + def test_timedelta(self): + # this is valid too + index = date_range('1/1/2000', periods=50, freq='B') + shifted = index + timedelta(1) + back = shifted + timedelta(-1) + self.assertTrue(tm.equalContents(index, back)) + self.assertEqual(shifted.freq, index.freq) + self.assertEqual(shifted.freq, back.freq) + + result = index - timedelta(1) + expected = index + timedelta(-1) + tm.assert_index_equal(result, expected) + + # GH4134, buggy with timedeltas + rng = date_range('2013', '2014') + s = Series(rng) + result1 = rng - pd.offsets.Hour(1) + result2 = DatetimeIndex(s - np.timedelta64(100000000)) + result3 = rng - np.timedelta64(100000000) + result4 = DatetimeIndex(s - pd.offsets.Hour(1)) + tm.assert_index_equal(result1, result4) + tm.assert_index_equal(result2, result3) + + +class TestTimeSeries(tm.TestCase): + _multiprocess_can_split_ = True + + def test_series_box_timedelta(self): + rng = timedelta_range('1 day 1 s', periods=5, freq='h') + s = Series(rng) + tm.assertIsInstance(s[1], Timedelta) + tm.assertIsInstance(s.iat[2], Timedelta) diff --git a/pandas/tests/indexes/timedeltas/test_timedelta_range.py b/pandas/tests/indexes/timedeltas/test_timedelta_range.py new file mode 100644 index 0000000000000..8bd56b5885bba --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_timedelta_range.py @@ -0,0 +1,51 @@ +import numpy as np + +import pandas as pd +import pandas.util.testing as tm +from pandas.tseries.offsets import Day, Second +from pandas import to_timedelta, timedelta_range +from pandas.util.testing import assert_frame_equal + + +class TestTimedeltas(tm.TestCase): + _multiprocess_can_split_ = True + + def test_timedelta_range(self): + + expected = to_timedelta(np.arange(5), unit='D') + result = timedelta_range('0 days', periods=5, freq='D') + tm.assert_index_equal(result, expected) + + expected = to_timedelta(np.arange(11), unit='D') + result = timedelta_range('0 days', '10 days', freq='D') + tm.assert_index_equal(result, expected) + + expected = to_timedelta(np.arange(5), unit='D') + Second(2) + Day() + result = timedelta_range('1 days, 00:00:02', '5 days, 00:00:02', + freq='D') + tm.assert_index_equal(result, expected) + + expected = to_timedelta([1, 3, 5, 7, 9], unit='D') + Second(2) + result = timedelta_range('1 days, 00:00:02', periods=5, freq='2D') + tm.assert_index_equal(result, expected) + + expected = to_timedelta(np.arange(50), unit='T') * 30 + result = timedelta_range('0 days', freq='30T', periods=50) + tm.assert_index_equal(result, expected) + + # GH 11776 + arr = np.arange(10).reshape(2, 5) + df = pd.DataFrame(np.arange(10).reshape(2, 5)) + for arg in (arr, df): + with tm.assertRaisesRegexp(TypeError, "1-d array"): + to_timedelta(arg) + for errors in ['ignore', 'raise', 'coerce']: + with tm.assertRaisesRegexp(TypeError, "1-d array"): + to_timedelta(arg, errors=errors) + + # issue10583 + df = pd.DataFrame(np.random.normal(size=(10, 4))) + df.index = pd.timedelta_range(start='0s', periods=10, freq='s') + expected = df.loc[pd.Timedelta('0s'):, :] + result = df.loc['0s':, :] + assert_frame_equal(expected, result) diff --git a/pandas/tests/indexes/timedeltas/test_tools.py b/pandas/tests/indexes/timedeltas/test_tools.py new file mode 100644 index 0000000000000..2442051547312 --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_tools.py @@ -0,0 +1,201 @@ +from datetime import time, timedelta +import numpy as np + +import pandas as pd +import pandas.util.testing as tm +from pandas.util.testing import assert_series_equal +from pandas import (Series, Timedelta, to_timedelta, tslib, isnull, + TimedeltaIndex) + + +class TestTimedeltas(tm.TestCase): + _multiprocess_can_split_ = True + + def test_to_timedelta(self): + def conv(v): + return v.astype('m8[ns]') + + d1 = np.timedelta64(1, 'D') + + self.assertEqual(to_timedelta('1 days 06:05:01.00003', box=False), + conv(d1 + np.timedelta64(6 * 3600 + + 5 * 60 + 1, 's') + + np.timedelta64(30, 'us'))) + self.assertEqual(to_timedelta('15.5us', box=False), + conv(np.timedelta64(15500, 'ns'))) + + # empty string + result = to_timedelta('', box=False) + self.assertEqual(result.astype('int64'), tslib.iNaT) + + result = to_timedelta(['', '']) + self.assertTrue(isnull(result).all()) + + # pass thru + result = to_timedelta(np.array([np.timedelta64(1, 's')])) + expected = pd.Index(np.array([np.timedelta64(1, 's')])) + tm.assert_index_equal(result, expected) + + # ints + result = np.timedelta64(0, 'ns') + expected = to_timedelta(0, box=False) + self.assertEqual(result, expected) + + # Series + expected = Series([timedelta(days=1), timedelta(days=1, seconds=1)]) + result = to_timedelta(Series(['1d', '1days 00:00:01'])) + tm.assert_series_equal(result, expected) + + # with units + result = TimedeltaIndex([np.timedelta64(0, 'ns'), np.timedelta64( + 10, 's').astype('m8[ns]')]) + expected = to_timedelta([0, 10], unit='s') + tm.assert_index_equal(result, expected) + + # single element conversion + v = timedelta(seconds=1) + result = to_timedelta(v, box=False) + expected = np.timedelta64(timedelta(seconds=1)) + self.assertEqual(result, expected) + + v = np.timedelta64(timedelta(seconds=1)) + result = to_timedelta(v, box=False) + expected = np.timedelta64(timedelta(seconds=1)) + self.assertEqual(result, expected) + + # arrays of various dtypes + arr = np.array([1] * 5, dtype='int64') + result = to_timedelta(arr, unit='s') + expected = TimedeltaIndex([np.timedelta64(1, 's')] * 5) + tm.assert_index_equal(result, expected) + + arr = np.array([1] * 5, dtype='int64') + result = to_timedelta(arr, unit='m') + expected = TimedeltaIndex([np.timedelta64(1, 'm')] * 5) + tm.assert_index_equal(result, expected) + + arr = np.array([1] * 5, dtype='int64') + result = to_timedelta(arr, unit='h') + expected = TimedeltaIndex([np.timedelta64(1, 'h')] * 5) + tm.assert_index_equal(result, expected) + + arr = np.array([1] * 5, dtype='timedelta64[s]') + result = to_timedelta(arr) + expected = TimedeltaIndex([np.timedelta64(1, 's')] * 5) + tm.assert_index_equal(result, expected) + + arr = np.array([1] * 5, dtype='timedelta64[D]') + result = to_timedelta(arr) + expected = TimedeltaIndex([np.timedelta64(1, 'D')] * 5) + tm.assert_index_equal(result, expected) + + # Test with lists as input when box=false + expected = np.array(np.arange(3) * 1000000000, dtype='timedelta64[ns]') + result = to_timedelta(range(3), unit='s', box=False) + tm.assert_numpy_array_equal(expected, result) + + result = to_timedelta(np.arange(3), unit='s', box=False) + tm.assert_numpy_array_equal(expected, result) + + result = to_timedelta([0, 1, 2], unit='s', box=False) + tm.assert_numpy_array_equal(expected, result) + + # Tests with fractional seconds as input: + expected = np.array( + [0, 500000000, 800000000, 1200000000], dtype='timedelta64[ns]') + result = to_timedelta([0., 0.5, 0.8, 1.2], unit='s', box=False) + tm.assert_numpy_array_equal(expected, result) + + def test_to_timedelta_invalid(self): + + # bad value for errors parameter + msg = "errors must be one of" + tm.assertRaisesRegexp(ValueError, msg, to_timedelta, + ['foo'], errors='never') + + # these will error + self.assertRaises(ValueError, lambda: to_timedelta([1, 2], unit='foo')) + self.assertRaises(ValueError, lambda: to_timedelta(1, unit='foo')) + + # time not supported ATM + self.assertRaises(ValueError, lambda: to_timedelta(time(second=1))) + self.assertTrue(to_timedelta( + time(second=1), errors='coerce') is pd.NaT) + + self.assertRaises(ValueError, lambda: to_timedelta(['foo', 'bar'])) + tm.assert_index_equal(TimedeltaIndex([pd.NaT, pd.NaT]), + to_timedelta(['foo', 'bar'], errors='coerce')) + + tm.assert_index_equal(TimedeltaIndex(['1 day', pd.NaT, '1 min']), + to_timedelta(['1 day', 'bar', '1 min'], + errors='coerce')) + + # gh-13613: these should not error because errors='ignore' + invalid_data = 'apple' + self.assertEqual(invalid_data, to_timedelta( + invalid_data, errors='ignore')) + + invalid_data = ['apple', '1 days'] + tm.assert_numpy_array_equal( + np.array(invalid_data, dtype=object), + to_timedelta(invalid_data, errors='ignore')) + + invalid_data = pd.Index(['apple', '1 days']) + tm.assert_index_equal(invalid_data, to_timedelta( + invalid_data, errors='ignore')) + + invalid_data = Series(['apple', '1 days']) + tm.assert_series_equal(invalid_data, to_timedelta( + invalid_data, errors='ignore')) + + def test_to_timedelta_via_apply(self): + # GH 5458 + expected = Series([np.timedelta64(1, 's')]) + result = Series(['00:00:01']).apply(to_timedelta) + tm.assert_series_equal(result, expected) + + result = Series([to_timedelta('00:00:01')]) + tm.assert_series_equal(result, expected) + + def test_to_timedelta_on_missing_values(self): + # GH5438 + timedelta_NaT = np.timedelta64('NaT') + + actual = pd.to_timedelta(Series(['00:00:01', np.nan])) + expected = Series([np.timedelta64(1000000000, 'ns'), + timedelta_NaT], dtype=' r + + self.assertFalse(l == r) + self.assertTrue(l != r) diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py index 0cef27d2e41fc..2abc83ca6109c 100644 --- a/pandas/tests/scalar/test_timestamp.py +++ b/pandas/tests/scalar/test_timestamp.py @@ -1637,3 +1637,48 @@ def test_woy_boundary(self): for args in [(2000, 1, 1), (2000, 1, 2), ( 2005, 1, 1), (2005, 1, 2)]]) self.assertTrue((result == [52, 52, 53, 53]).all()) + + +class TestTsUtil(tm.TestCase): + + def test_min_valid(self): + # Ensure that Timestamp.min is a valid Timestamp + Timestamp(Timestamp.min) + + def test_max_valid(self): + # Ensure that Timestamp.max is a valid Timestamp + Timestamp(Timestamp.max) + + def test_to_datetime_bijective(self): + # Ensure that converting to datetime and back only loses precision + # by going from nanoseconds to microseconds. + exp_warning = None if Timestamp.max.nanosecond == 0 else UserWarning + with tm.assert_produces_warning(exp_warning, check_stacklevel=False): + self.assertEqual( + Timestamp(Timestamp.max.to_pydatetime()).value / 1000, + Timestamp.max.value / 1000) + + exp_warning = None if Timestamp.min.nanosecond == 0 else UserWarning + with tm.assert_produces_warning(exp_warning, check_stacklevel=False): + self.assertEqual( + Timestamp(Timestamp.min.to_pydatetime()).value / 1000, + Timestamp.min.value / 1000) + + +class TestTslib(tm.TestCase): + + def test_round(self): + stamp = Timestamp('2000-01-05 05:09:15.13') + + def _check_round(freq, expected): + result = stamp.round(freq=freq) + self.assertEqual(result, expected) + + for freq, expected in [('D', Timestamp('2000-01-05 00:00:00')), + ('H', Timestamp('2000-01-05 05:00:00')), + ('S', Timestamp('2000-01-05 05:09:15'))]: + _check_round(freq, expected) + + msg = pd.tseries.frequencies._INVALID_FREQ_ERROR + with self.assertRaisesRegexp(ValueError, msg): + stamp.round('foo') diff --git a/pandas/tseries/tests/test_base.py b/pandas/tseries/tests/test_base.py index be3b917cb8117..114cb02205d4f 100644 --- a/pandas/tseries/tests/test_base.py +++ b/pandas/tseries/tests/test_base.py @@ -2,10 +2,8 @@ from datetime import timedelta import numpy as np import pandas as pd -from pandas import (Series, Index, Int64Index, Timestamp, Period, - DatetimeIndex, PeriodIndex, TimedeltaIndex, - Timedelta, timedelta_range, date_range, Float64Index, - _np_version_under1p10) +from pandas import (Series, Index, Period, DatetimeIndex, PeriodIndex, + Timedelta, _np_version_under1p10) import pandas.tslib as tslib import pandas.tseries.period as period @@ -14,846 +12,6 @@ from pandas.tests.test_base import Ops -class TestTimedeltaIndexOps(Ops): - - def setUp(self): - super(TestTimedeltaIndexOps, self).setUp() - mask = lambda x: isinstance(x, TimedeltaIndex) - self.is_valid_objs = [o for o in self.objs if mask(o)] - self.not_valid_objs = [] - - def test_ops_properties(self): - self.check_ops_properties(['days', 'hours', 'minutes', 'seconds', - 'milliseconds']) - self.check_ops_properties(['microseconds', 'nanoseconds']) - - def test_asobject_tolist(self): - idx = timedelta_range(start='1 days', periods=4, freq='D', name='idx') - expected_list = [Timedelta('1 days'), Timedelta('2 days'), - Timedelta('3 days'), Timedelta('4 days')] - expected = pd.Index(expected_list, dtype=object, name='idx') - result = idx.asobject - self.assertTrue(isinstance(result, Index)) - - self.assertEqual(result.dtype, object) - self.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(idx.tolist(), expected_list) - - idx = TimedeltaIndex([timedelta(days=1), timedelta(days=2), pd.NaT, - timedelta(days=4)], name='idx') - expected_list = [Timedelta('1 days'), Timedelta('2 days'), pd.NaT, - Timedelta('4 days')] - expected = pd.Index(expected_list, dtype=object, name='idx') - result = idx.asobject - self.assertTrue(isinstance(result, Index)) - self.assertEqual(result.dtype, object) - self.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(idx.tolist(), expected_list) - - def test_minmax(self): - - # monotonic - idx1 = TimedeltaIndex(['1 days', '2 days', '3 days']) - self.assertTrue(idx1.is_monotonic) - - # non-monotonic - idx2 = TimedeltaIndex(['1 days', np.nan, '3 days', 'NaT']) - self.assertFalse(idx2.is_monotonic) - - for idx in [idx1, idx2]: - self.assertEqual(idx.min(), Timedelta('1 days')), - self.assertEqual(idx.max(), Timedelta('3 days')), - self.assertEqual(idx.argmin(), 0) - self.assertEqual(idx.argmax(), 2) - - for op in ['min', 'max']: - # Return NaT - obj = TimedeltaIndex([]) - self.assertTrue(pd.isnull(getattr(obj, op)())) - - obj = TimedeltaIndex([pd.NaT]) - self.assertTrue(pd.isnull(getattr(obj, op)())) - - obj = TimedeltaIndex([pd.NaT, pd.NaT, pd.NaT]) - self.assertTrue(pd.isnull(getattr(obj, op)())) - - def test_numpy_minmax(self): - dr = pd.date_range(start='2016-01-15', end='2016-01-20') - td = TimedeltaIndex(np.asarray(dr)) - - self.assertEqual(np.min(td), Timedelta('16815 days')) - self.assertEqual(np.max(td), Timedelta('16820 days')) - - errmsg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, errmsg, np.min, td, out=0) - tm.assertRaisesRegexp(ValueError, errmsg, np.max, td, out=0) - - self.assertEqual(np.argmin(td), 0) - self.assertEqual(np.argmax(td), 5) - - if not _np_version_under1p10: - errmsg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, errmsg, np.argmin, td, out=0) - tm.assertRaisesRegexp(ValueError, errmsg, np.argmax, td, out=0) - - def test_round(self): - td = pd.timedelta_range(start='16801 days', periods=5, freq='30Min') - elt = td[1] - - expected_rng = TimedeltaIndex([ - Timedelta('16801 days 00:00:00'), - Timedelta('16801 days 00:00:00'), - Timedelta('16801 days 01:00:00'), - Timedelta('16801 days 02:00:00'), - Timedelta('16801 days 02:00:00'), - ]) - expected_elt = expected_rng[1] - - tm.assert_index_equal(td.round(freq='H'), expected_rng) - self.assertEqual(elt.round(freq='H'), expected_elt) - - msg = pd.tseries.frequencies._INVALID_FREQ_ERROR - with self.assertRaisesRegexp(ValueError, msg): - td.round(freq='foo') - with tm.assertRaisesRegexp(ValueError, msg): - elt.round(freq='foo') - - msg = " is a non-fixed frequency" - tm.assertRaisesRegexp(ValueError, msg, td.round, freq='M') - tm.assertRaisesRegexp(ValueError, msg, elt.round, freq='M') - - def test_representation(self): - idx1 = TimedeltaIndex([], freq='D') - idx2 = TimedeltaIndex(['1 days'], freq='D') - idx3 = TimedeltaIndex(['1 days', '2 days'], freq='D') - idx4 = TimedeltaIndex(['1 days', '2 days', '3 days'], freq='D') - idx5 = TimedeltaIndex(['1 days 00:00:01', '2 days', '3 days']) - - exp1 = """TimedeltaIndex([], dtype='timedelta64[ns]', freq='D')""" - - exp2 = ("TimedeltaIndex(['1 days'], dtype='timedelta64[ns]', " - "freq='D')") - - exp3 = ("TimedeltaIndex(['1 days', '2 days'], " - "dtype='timedelta64[ns]', freq='D')") - - exp4 = ("TimedeltaIndex(['1 days', '2 days', '3 days'], " - "dtype='timedelta64[ns]', freq='D')") - - exp5 = ("TimedeltaIndex(['1 days 00:00:01', '2 days 00:00:00', " - "'3 days 00:00:00'], dtype='timedelta64[ns]', freq=None)") - - with pd.option_context('display.width', 300): - for idx, expected in zip([idx1, idx2, idx3, idx4, idx5], - [exp1, exp2, exp3, exp4, exp5]): - for func in ['__repr__', '__unicode__', '__str__']: - result = getattr(idx, func)() - self.assertEqual(result, expected) - - def test_representation_to_series(self): - idx1 = TimedeltaIndex([], freq='D') - idx2 = TimedeltaIndex(['1 days'], freq='D') - idx3 = TimedeltaIndex(['1 days', '2 days'], freq='D') - idx4 = TimedeltaIndex(['1 days', '2 days', '3 days'], freq='D') - idx5 = TimedeltaIndex(['1 days 00:00:01', '2 days', '3 days']) - - exp1 = """Series([], dtype: timedelta64[ns])""" - - exp2 = """0 1 days -dtype: timedelta64[ns]""" - - exp3 = """0 1 days -1 2 days -dtype: timedelta64[ns]""" - - exp4 = """0 1 days -1 2 days -2 3 days -dtype: timedelta64[ns]""" - - exp5 = """0 1 days 00:00:01 -1 2 days 00:00:00 -2 3 days 00:00:00 -dtype: timedelta64[ns]""" - - with pd.option_context('display.width', 300): - for idx, expected in zip([idx1, idx2, idx3, idx4, idx5], - [exp1, exp2, exp3, exp4, exp5]): - result = repr(pd.Series(idx)) - self.assertEqual(result, expected) - - def test_summary(self): - # GH9116 - idx1 = TimedeltaIndex([], freq='D') - idx2 = TimedeltaIndex(['1 days'], freq='D') - idx3 = TimedeltaIndex(['1 days', '2 days'], freq='D') - idx4 = TimedeltaIndex(['1 days', '2 days', '3 days'], freq='D') - idx5 = TimedeltaIndex(['1 days 00:00:01', '2 days', '3 days']) - - exp1 = """TimedeltaIndex: 0 entries -Freq: D""" - - exp2 = """TimedeltaIndex: 1 entries, 1 days to 1 days -Freq: D""" - - exp3 = """TimedeltaIndex: 2 entries, 1 days to 2 days -Freq: D""" - - exp4 = """TimedeltaIndex: 3 entries, 1 days to 3 days -Freq: D""" - - exp5 = ("TimedeltaIndex: 3 entries, 1 days 00:00:01 to 3 days " - "00:00:00") - - for idx, expected in zip([idx1, idx2, idx3, idx4, idx5], - [exp1, exp2, exp3, exp4, exp5]): - result = idx.summary() - self.assertEqual(result, expected) - - def test_add_iadd(self): - - # only test adding/sub offsets as + is now numeric - - # offset - offsets = [pd.offsets.Hour(2), timedelta(hours=2), - np.timedelta64(2, 'h'), Timedelta(hours=2)] - - for delta in offsets: - rng = timedelta_range('1 days', '10 days') - result = rng + delta - expected = timedelta_range('1 days 02:00:00', '10 days 02:00:00', - freq='D') - tm.assert_index_equal(result, expected) - rng += delta - tm.assert_index_equal(rng, expected) - - # int - rng = timedelta_range('1 days 09:00:00', freq='H', periods=10) - result = rng + 1 - expected = timedelta_range('1 days 10:00:00', freq='H', periods=10) - tm.assert_index_equal(result, expected) - rng += 1 - tm.assert_index_equal(rng, expected) - - def test_sub_isub(self): - # only test adding/sub offsets as - is now numeric - - # offset - offsets = [pd.offsets.Hour(2), timedelta(hours=2), - np.timedelta64(2, 'h'), Timedelta(hours=2)] - - for delta in offsets: - rng = timedelta_range('1 days', '10 days') - result = rng - delta - expected = timedelta_range('0 days 22:00:00', '9 days 22:00:00') - tm.assert_index_equal(result, expected) - rng -= delta - tm.assert_index_equal(rng, expected) - - # int - rng = timedelta_range('1 days 09:00:00', freq='H', periods=10) - result = rng - 1 - expected = timedelta_range('1 days 08:00:00', freq='H', periods=10) - tm.assert_index_equal(result, expected) - rng -= 1 - tm.assert_index_equal(rng, expected) - - idx = TimedeltaIndex(['1 day', '2 day']) - msg = "cannot subtract a datelike from a TimedeltaIndex" - with tm.assertRaisesRegexp(TypeError, msg): - idx - Timestamp('2011-01-01') - - result = Timestamp('2011-01-01') + idx - expected = DatetimeIndex(['2011-01-02', '2011-01-03']) - tm.assert_index_equal(result, expected) - - def test_ops_compat(self): - - offsets = [pd.offsets.Hour(2), timedelta(hours=2), - np.timedelta64(2, 'h'), Timedelta(hours=2)] - - rng = timedelta_range('1 days', '10 days', name='foo') - - # multiply - for offset in offsets: - self.assertRaises(TypeError, lambda: rng * offset) - - # divide - expected = Int64Index((np.arange(10) + 1) * 12, name='foo') - for offset in offsets: - result = rng / offset - tm.assert_index_equal(result, expected, exact=False) - - # divide with nats - rng = TimedeltaIndex(['1 days', pd.NaT, '2 days'], name='foo') - expected = Float64Index([12, np.nan, 24], name='foo') - for offset in offsets: - result = rng / offset - tm.assert_index_equal(result, expected) - - # don't allow division by NaT (make could in the future) - self.assertRaises(TypeError, lambda: rng / pd.NaT) - - def test_subtraction_ops(self): - - # with datetimes/timedelta and tdi/dti - tdi = TimedeltaIndex(['1 days', pd.NaT, '2 days'], name='foo') - dti = date_range('20130101', periods=3, name='bar') - td = Timedelta('1 days') - dt = Timestamp('20130101') - - self.assertRaises(TypeError, lambda: tdi - dt) - self.assertRaises(TypeError, lambda: tdi - dti) - self.assertRaises(TypeError, lambda: td - dt) - self.assertRaises(TypeError, lambda: td - dti) - - result = dt - dti - expected = TimedeltaIndex(['0 days', '-1 days', '-2 days'], name='bar') - tm.assert_index_equal(result, expected) - - result = dti - dt - expected = TimedeltaIndex(['0 days', '1 days', '2 days'], name='bar') - tm.assert_index_equal(result, expected) - - result = tdi - td - expected = TimedeltaIndex(['0 days', pd.NaT, '1 days'], name='foo') - tm.assert_index_equal(result, expected, check_names=False) - - result = td - tdi - expected = TimedeltaIndex(['0 days', pd.NaT, '-1 days'], name='foo') - tm.assert_index_equal(result, expected, check_names=False) - - result = dti - td - expected = DatetimeIndex( - ['20121231', '20130101', '20130102'], name='bar') - tm.assert_index_equal(result, expected, check_names=False) - - result = dt - tdi - expected = DatetimeIndex(['20121231', pd.NaT, '20121230'], name='foo') - tm.assert_index_equal(result, expected) - - def test_subtraction_ops_with_tz(self): - - # check that dt/dti subtraction ops with tz are validated - dti = date_range('20130101', periods=3) - ts = Timestamp('20130101') - dt = ts.to_pydatetime() - dti_tz = date_range('20130101', periods=3).tz_localize('US/Eastern') - ts_tz = Timestamp('20130101').tz_localize('US/Eastern') - ts_tz2 = Timestamp('20130101').tz_localize('CET') - dt_tz = ts_tz.to_pydatetime() - td = Timedelta('1 days') - - def _check(result, expected): - self.assertEqual(result, expected) - self.assertIsInstance(result, Timedelta) - - # scalars - result = ts - ts - expected = Timedelta('0 days') - _check(result, expected) - - result = dt_tz - ts_tz - expected = Timedelta('0 days') - _check(result, expected) - - result = ts_tz - dt_tz - expected = Timedelta('0 days') - _check(result, expected) - - # tz mismatches - self.assertRaises(TypeError, lambda: dt_tz - ts) - self.assertRaises(TypeError, lambda: dt_tz - dt) - self.assertRaises(TypeError, lambda: dt_tz - ts_tz2) - self.assertRaises(TypeError, lambda: dt - dt_tz) - self.assertRaises(TypeError, lambda: ts - dt_tz) - self.assertRaises(TypeError, lambda: ts_tz2 - ts) - self.assertRaises(TypeError, lambda: ts_tz2 - dt) - self.assertRaises(TypeError, lambda: ts_tz - ts_tz2) - - # with dti - self.assertRaises(TypeError, lambda: dti - ts_tz) - self.assertRaises(TypeError, lambda: dti_tz - ts) - self.assertRaises(TypeError, lambda: dti_tz - ts_tz2) - - result = dti_tz - dt_tz - expected = TimedeltaIndex(['0 days', '1 days', '2 days']) - tm.assert_index_equal(result, expected) - - result = dt_tz - dti_tz - expected = TimedeltaIndex(['0 days', '-1 days', '-2 days']) - tm.assert_index_equal(result, expected) - - result = dti_tz - ts_tz - expected = TimedeltaIndex(['0 days', '1 days', '2 days']) - tm.assert_index_equal(result, expected) - - result = ts_tz - dti_tz - expected = TimedeltaIndex(['0 days', '-1 days', '-2 days']) - tm.assert_index_equal(result, expected) - - result = td - td - expected = Timedelta('0 days') - _check(result, expected) - - result = dti_tz - td - expected = DatetimeIndex( - ['20121231', '20130101', '20130102'], tz='US/Eastern') - tm.assert_index_equal(result, expected) - - def test_dti_tdi_numeric_ops(self): - - # These are normally union/diff set-like ops - tdi = TimedeltaIndex(['1 days', pd.NaT, '2 days'], name='foo') - dti = date_range('20130101', periods=3, name='bar') - - # TODO(wesm): unused? - # td = Timedelta('1 days') - # dt = Timestamp('20130101') - - result = tdi - tdi - expected = TimedeltaIndex(['0 days', pd.NaT, '0 days'], name='foo') - tm.assert_index_equal(result, expected) - - result = tdi + tdi - expected = TimedeltaIndex(['2 days', pd.NaT, '4 days'], name='foo') - tm.assert_index_equal(result, expected) - - result = dti - tdi # name will be reset - expected = DatetimeIndex(['20121231', pd.NaT, '20130101']) - tm.assert_index_equal(result, expected) - - def test_sub_period(self): - # GH 13078 - # not supported, check TypeError - p = pd.Period('2011-01-01', freq='D') - - for freq in [None, 'H']: - idx = pd.TimedeltaIndex(['1 hours', '2 hours'], freq=freq) - - with tm.assertRaises(TypeError): - idx - p - - with tm.assertRaises(TypeError): - p - idx - - def test_addition_ops(self): - - # with datetimes/timedelta and tdi/dti - tdi = TimedeltaIndex(['1 days', pd.NaT, '2 days'], name='foo') - dti = date_range('20130101', periods=3, name='bar') - td = Timedelta('1 days') - dt = Timestamp('20130101') - - result = tdi + dt - expected = DatetimeIndex(['20130102', pd.NaT, '20130103'], name='foo') - tm.assert_index_equal(result, expected) - - result = dt + tdi - expected = DatetimeIndex(['20130102', pd.NaT, '20130103'], name='foo') - tm.assert_index_equal(result, expected) - - result = td + tdi - expected = TimedeltaIndex(['2 days', pd.NaT, '3 days'], name='foo') - tm.assert_index_equal(result, expected) - - result = tdi + td - expected = TimedeltaIndex(['2 days', pd.NaT, '3 days'], name='foo') - tm.assert_index_equal(result, expected) - - # unequal length - self.assertRaises(ValueError, lambda: tdi + dti[0:1]) - self.assertRaises(ValueError, lambda: tdi[0:1] + dti) - - # random indexes - self.assertRaises(TypeError, lambda: tdi + Int64Index([1, 2, 3])) - - # this is a union! - # self.assertRaises(TypeError, lambda : Int64Index([1,2,3]) + tdi) - - result = tdi + dti # name will be reset - expected = DatetimeIndex(['20130102', pd.NaT, '20130105']) - tm.assert_index_equal(result, expected) - - result = dti + tdi # name will be reset - expected = DatetimeIndex(['20130102', pd.NaT, '20130105']) - tm.assert_index_equal(result, expected) - - result = dt + td - expected = Timestamp('20130102') - self.assertEqual(result, expected) - - result = td + dt - expected = Timestamp('20130102') - self.assertEqual(result, expected) - - def test_comp_nat(self): - left = pd.TimedeltaIndex([pd.Timedelta('1 days'), pd.NaT, - pd.Timedelta('3 days')]) - right = pd.TimedeltaIndex([pd.NaT, pd.NaT, pd.Timedelta('3 days')]) - - for l, r in [(left, right), (left.asobject, right.asobject)]: - result = l == r - expected = np.array([False, False, True]) - tm.assert_numpy_array_equal(result, expected) - - result = l != r - expected = np.array([True, True, False]) - tm.assert_numpy_array_equal(result, expected) - - expected = np.array([False, False, False]) - tm.assert_numpy_array_equal(l == pd.NaT, expected) - tm.assert_numpy_array_equal(pd.NaT == r, expected) - - expected = np.array([True, True, True]) - tm.assert_numpy_array_equal(l != pd.NaT, expected) - tm.assert_numpy_array_equal(pd.NaT != l, expected) - - expected = np.array([False, False, False]) - tm.assert_numpy_array_equal(l < pd.NaT, expected) - tm.assert_numpy_array_equal(pd.NaT > l, expected) - - def test_value_counts_unique(self): - # GH 7735 - - idx = timedelta_range('1 days 09:00:00', freq='H', periods=10) - # create repeated values, 'n'th element is repeated by n+1 times - idx = TimedeltaIndex(np.repeat(idx.values, range(1, len(idx) + 1))) - - exp_idx = timedelta_range('1 days 18:00:00', freq='-1H', periods=10) - expected = Series(range(10, 0, -1), index=exp_idx, dtype='int64') - - for obj in [idx, Series(idx)]: - tm.assert_series_equal(obj.value_counts(), expected) - - expected = timedelta_range('1 days 09:00:00', freq='H', periods=10) - tm.assert_index_equal(idx.unique(), expected) - - idx = TimedeltaIndex(['1 days 09:00:00', '1 days 09:00:00', - '1 days 09:00:00', '1 days 08:00:00', - '1 days 08:00:00', pd.NaT]) - - exp_idx = TimedeltaIndex(['1 days 09:00:00', '1 days 08:00:00']) - expected = Series([3, 2], index=exp_idx) - - for obj in [idx, Series(idx)]: - tm.assert_series_equal(obj.value_counts(), expected) - - exp_idx = TimedeltaIndex(['1 days 09:00:00', '1 days 08:00:00', - pd.NaT]) - expected = Series([3, 2, 1], index=exp_idx) - - for obj in [idx, Series(idx)]: - tm.assert_series_equal(obj.value_counts(dropna=False), expected) - - tm.assert_index_equal(idx.unique(), exp_idx) - - def test_nonunique_contains(self): - # GH 9512 - for idx in map(TimedeltaIndex, ([0, 1, 0], [0, 0, -1], [0, -1, -1], - ['00:01:00', '00:01:00', '00:02:00'], - ['00:01:00', '00:01:00', '00:00:01'])): - tm.assertIn(idx[0], idx) - - def test_unknown_attribute(self): - # GH 9680 - tdi = pd.timedelta_range(start=0, periods=10, freq='1s') - ts = pd.Series(np.random.normal(size=10), index=tdi) - self.assertNotIn('foo', ts.__dict__.keys()) - self.assertRaises(AttributeError, lambda: ts.foo) - - def test_order(self): - # GH 10295 - idx1 = TimedeltaIndex(['1 day', '2 day', '3 day'], freq='D', - name='idx') - idx2 = TimedeltaIndex( - ['1 hour', '2 hour', '3 hour'], freq='H', name='idx') - - for idx in [idx1, idx2]: - ordered = idx.sort_values() - self.assert_index_equal(ordered, idx) - self.assertEqual(ordered.freq, idx.freq) - - ordered = idx.sort_values(ascending=False) - expected = idx[::-1] - self.assert_index_equal(ordered, expected) - self.assertEqual(ordered.freq, expected.freq) - self.assertEqual(ordered.freq.n, -1) - - ordered, indexer = idx.sort_values(return_indexer=True) - self.assert_index_equal(ordered, idx) - self.assert_numpy_array_equal(indexer, - np.array([0, 1, 2]), - check_dtype=False) - self.assertEqual(ordered.freq, idx.freq) - - ordered, indexer = idx.sort_values(return_indexer=True, - ascending=False) - self.assert_index_equal(ordered, idx[::-1]) - self.assertEqual(ordered.freq, expected.freq) - self.assertEqual(ordered.freq.n, -1) - - idx1 = TimedeltaIndex(['1 hour', '3 hour', '5 hour', - '2 hour ', '1 hour'], name='idx1') - exp1 = TimedeltaIndex(['1 hour', '1 hour', '2 hour', - '3 hour', '5 hour'], name='idx1') - - idx2 = TimedeltaIndex(['1 day', '3 day', '5 day', - '2 day', '1 day'], name='idx2') - - # TODO(wesm): unused? - # exp2 = TimedeltaIndex(['1 day', '1 day', '2 day', - # '3 day', '5 day'], name='idx2') - - # idx3 = TimedeltaIndex([pd.NaT, '3 minute', '5 minute', - # '2 minute', pd.NaT], name='idx3') - # exp3 = TimedeltaIndex([pd.NaT, pd.NaT, '2 minute', '3 minute', - # '5 minute'], name='idx3') - - for idx, expected in [(idx1, exp1), (idx1, exp1), (idx1, exp1)]: - ordered = idx.sort_values() - self.assert_index_equal(ordered, expected) - self.assertIsNone(ordered.freq) - - ordered = idx.sort_values(ascending=False) - self.assert_index_equal(ordered, expected[::-1]) - self.assertIsNone(ordered.freq) - - ordered, indexer = idx.sort_values(return_indexer=True) - self.assert_index_equal(ordered, expected) - - exp = np.array([0, 4, 3, 1, 2]) - self.assert_numpy_array_equal(indexer, exp, check_dtype=False) - self.assertIsNone(ordered.freq) - - ordered, indexer = idx.sort_values(return_indexer=True, - ascending=False) - self.assert_index_equal(ordered, expected[::-1]) - - exp = np.array([2, 1, 3, 4, 0]) - self.assert_numpy_array_equal(indexer, exp, check_dtype=False) - self.assertIsNone(ordered.freq) - - def test_getitem(self): - idx1 = pd.timedelta_range('1 day', '31 day', freq='D', name='idx') - - for idx in [idx1]: - result = idx[0] - self.assertEqual(result, pd.Timedelta('1 day')) - - result = idx[0:5] - expected = pd.timedelta_range('1 day', '5 day', freq='D', - name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - - result = idx[0:10:2] - expected = pd.timedelta_range('1 day', '9 day', freq='2D', - name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - - result = idx[-20:-5:3] - expected = pd.timedelta_range('12 day', '24 day', freq='3D', - name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - - result = idx[4::-1] - expected = TimedeltaIndex(['5 day', '4 day', '3 day', - '2 day', '1 day'], - freq='-1D', name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - - def test_drop_duplicates_metadata(self): - # GH 10115 - idx = pd.timedelta_range('1 day', '31 day', freq='D', name='idx') - result = idx.drop_duplicates() - self.assert_index_equal(idx, result) - self.assertEqual(idx.freq, result.freq) - - idx_dup = idx.append(idx) - self.assertIsNone(idx_dup.freq) # freq is reset - result = idx_dup.drop_duplicates() - self.assert_index_equal(idx, result) - self.assertIsNone(result.freq) - - def test_drop_duplicates(self): - # to check Index/Series compat - base = pd.timedelta_range('1 day', '31 day', freq='D', name='idx') - idx = base.append(base[:5]) - - res = idx.drop_duplicates() - tm.assert_index_equal(res, base) - res = Series(idx).drop_duplicates() - tm.assert_series_equal(res, Series(base)) - - res = idx.drop_duplicates(keep='last') - exp = base[5:].append(base[:5]) - tm.assert_index_equal(res, exp) - res = Series(idx).drop_duplicates(keep='last') - tm.assert_series_equal(res, Series(exp, index=np.arange(5, 36))) - - res = idx.drop_duplicates(keep=False) - tm.assert_index_equal(res, base[5:]) - res = Series(idx).drop_duplicates(keep=False) - tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31))) - - def test_take(self): - # GH 10295 - idx1 = pd.timedelta_range('1 day', '31 day', freq='D', name='idx') - - for idx in [idx1]: - result = idx.take([0]) - self.assertEqual(result, pd.Timedelta('1 day')) - - result = idx.take([-1]) - self.assertEqual(result, pd.Timedelta('31 day')) - - result = idx.take([0, 1, 2]) - expected = pd.timedelta_range('1 day', '3 day', freq='D', - name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - - result = idx.take([0, 2, 4]) - expected = pd.timedelta_range('1 day', '5 day', freq='2D', - name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - - result = idx.take([7, 4, 1]) - expected = pd.timedelta_range('8 day', '2 day', freq='-3D', - name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - - result = idx.take([3, 2, 5]) - expected = TimedeltaIndex(['4 day', '3 day', '6 day'], name='idx') - self.assert_index_equal(result, expected) - self.assertIsNone(result.freq) - - result = idx.take([-3, 2, 5]) - expected = TimedeltaIndex(['29 day', '3 day', '6 day'], name='idx') - self.assert_index_equal(result, expected) - self.assertIsNone(result.freq) - - def test_take_invalid_kwargs(self): - idx = pd.timedelta_range('1 day', '31 day', freq='D', name='idx') - indices = [1, 6, 5, 9, 10, 13, 15, 3] - - msg = r"take\(\) got an unexpected keyword argument 'foo'" - tm.assertRaisesRegexp(TypeError, msg, idx.take, - indices, foo=2) - - msg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, idx.take, - indices, out=indices) - - msg = "the 'mode' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, idx.take, - indices, mode='clip') - - def test_infer_freq(self): - # GH 11018 - for freq in ['D', '3D', '-3D', 'H', '2H', '-2H', 'T', '2T', 'S', '-3S' - ]: - idx = pd.timedelta_range('1', freq=freq, periods=10) - result = pd.TimedeltaIndex(idx.asi8, freq='infer') - tm.assert_index_equal(idx, result) - self.assertEqual(result.freq, freq) - - def test_nat_new(self): - - idx = pd.timedelta_range('1', freq='D', periods=5, name='x') - result = idx._nat_new() - exp = pd.TimedeltaIndex([pd.NaT] * 5, name='x') - tm.assert_index_equal(result, exp) - - result = idx._nat_new(box=False) - exp = np.array([tslib.iNaT] * 5, dtype=np.int64) - tm.assert_numpy_array_equal(result, exp) - - def test_shift(self): - # GH 9903 - idx = pd.TimedeltaIndex([], name='xxx') - tm.assert_index_equal(idx.shift(0, freq='H'), idx) - tm.assert_index_equal(idx.shift(3, freq='H'), idx) - - idx = pd.TimedeltaIndex(['5 hours', '6 hours', '9 hours'], name='xxx') - tm.assert_index_equal(idx.shift(0, freq='H'), idx) - exp = pd.TimedeltaIndex(['8 hours', '9 hours', '12 hours'], name='xxx') - tm.assert_index_equal(idx.shift(3, freq='H'), exp) - exp = pd.TimedeltaIndex(['2 hours', '3 hours', '6 hours'], name='xxx') - tm.assert_index_equal(idx.shift(-3, freq='H'), exp) - - tm.assert_index_equal(idx.shift(0, freq='T'), idx) - exp = pd.TimedeltaIndex(['05:03:00', '06:03:00', '9:03:00'], - name='xxx') - tm.assert_index_equal(idx.shift(3, freq='T'), exp) - exp = pd.TimedeltaIndex(['04:57:00', '05:57:00', '8:57:00'], - name='xxx') - tm.assert_index_equal(idx.shift(-3, freq='T'), exp) - - def test_repeat(self): - index = pd.timedelta_range('1 days', periods=2, freq='D') - exp = pd.TimedeltaIndex(['1 days', '1 days', '2 days', '2 days']) - for res in [index.repeat(2), np.repeat(index, 2)]: - tm.assert_index_equal(res, exp) - self.assertIsNone(res.freq) - - index = TimedeltaIndex(['1 days', 'NaT', '3 days']) - exp = TimedeltaIndex(['1 days', '1 days', '1 days', - 'NaT', 'NaT', 'NaT', - '3 days', '3 days', '3 days']) - for res in [index.repeat(3), np.repeat(index, 3)]: - tm.assert_index_equal(res, exp) - self.assertIsNone(res.freq) - - def test_nat(self): - self.assertIs(pd.TimedeltaIndex._na_value, pd.NaT) - self.assertIs(pd.TimedeltaIndex([])._na_value, pd.NaT) - - idx = pd.TimedeltaIndex(['1 days', '2 days']) - self.assertTrue(idx._can_hold_na) - - tm.assert_numpy_array_equal(idx._isnan, np.array([False, False])) - self.assertFalse(idx.hasnans) - tm.assert_numpy_array_equal(idx._nan_idxs, - np.array([], dtype=np.intp)) - - idx = pd.TimedeltaIndex(['1 days', 'NaT']) - self.assertTrue(idx._can_hold_na) - - tm.assert_numpy_array_equal(idx._isnan, np.array([False, True])) - self.assertTrue(idx.hasnans) - tm.assert_numpy_array_equal(idx._nan_idxs, - np.array([1], dtype=np.intp)) - - def test_equals(self): - # GH 13107 - idx = pd.TimedeltaIndex(['1 days', '2 days', 'NaT']) - self.assertTrue(idx.equals(idx)) - self.assertTrue(idx.equals(idx.copy())) - self.assertTrue(idx.equals(idx.asobject)) - self.assertTrue(idx.asobject.equals(idx)) - self.assertTrue(idx.asobject.equals(idx.asobject)) - self.assertFalse(idx.equals(list(idx))) - self.assertFalse(idx.equals(pd.Series(idx))) - - idx2 = pd.TimedeltaIndex(['2 days', '1 days', 'NaT']) - self.assertFalse(idx.equals(idx2)) - self.assertFalse(idx.equals(idx2.copy())) - self.assertFalse(idx.equals(idx2.asobject)) - self.assertFalse(idx.asobject.equals(idx2)) - self.assertFalse(idx.asobject.equals(idx2.asobject)) - self.assertFalse(idx.equals(list(idx2))) - self.assertFalse(idx.equals(pd.Series(idx2))) - - class TestPeriodIndexOps(Ops): def setUp(self): diff --git a/pandas/tseries/tests/test_daterange.py b/pandas/tseries/tests/test_daterange.py deleted file mode 100644 index a64882380850b..0000000000000 --- a/pandas/tseries/tests/test_daterange.py +++ /dev/null @@ -1,820 +0,0 @@ -from datetime import datetime -from pandas.compat import range -import numpy as np - -from pandas.core.index import Index -from pandas.tseries.index import DatetimeIndex - -from pandas import Timestamp -from pandas.tseries.offsets import (BDay, BMonthEnd, CDay, MonthEnd, - generate_range, DateOffset, Minute) -from pandas.tseries.index import cdate_range, bdate_range, date_range - -from pandas.core import common as com -from pandas.util.testing import assertRaisesRegexp -import pandas.util.testing as tm - - -def eq_gen_range(kwargs, expected): - rng = generate_range(**kwargs) - assert (np.array_equal(list(rng), expected)) - - -START, END = datetime(2009, 1, 1), datetime(2010, 1, 1) - - -class TestGenRangeGeneration(tm.TestCase): - - def test_generate(self): - rng1 = list(generate_range(START, END, offset=BDay())) - rng2 = list(generate_range(START, END, time_rule='B')) - self.assertEqual(rng1, rng2) - - def test_generate_cday(self): - rng1 = list(generate_range(START, END, offset=CDay())) - rng2 = list(generate_range(START, END, time_rule='C')) - self.assertEqual(rng1, rng2) - - def test_1(self): - eq_gen_range(dict(start=datetime(2009, 3, 25), periods=2), - [datetime(2009, 3, 25), datetime(2009, 3, 26)]) - - def test_2(self): - eq_gen_range(dict(start=datetime(2008, 1, 1), - end=datetime(2008, 1, 3)), - [datetime(2008, 1, 1), - datetime(2008, 1, 2), - datetime(2008, 1, 3)]) - - def test_3(self): - eq_gen_range(dict(start=datetime(2008, 1, 5), - end=datetime(2008, 1, 6)), - []) - - def test_precision_finer_than_offset(self): - # GH 9907 - result1 = DatetimeIndex(start='2015-04-15 00:00:03', - end='2016-04-22 00:00:00', freq='Q') - result2 = DatetimeIndex(start='2015-04-15 00:00:03', - end='2015-06-22 00:00:04', freq='W') - expected1_list = ['2015-06-30 00:00:03', '2015-09-30 00:00:03', - '2015-12-31 00:00:03', '2016-03-31 00:00:03'] - expected2_list = ['2015-04-19 00:00:03', '2015-04-26 00:00:03', - '2015-05-03 00:00:03', '2015-05-10 00:00:03', - '2015-05-17 00:00:03', '2015-05-24 00:00:03', - '2015-05-31 00:00:03', '2015-06-07 00:00:03', - '2015-06-14 00:00:03', '2015-06-21 00:00:03'] - expected1 = DatetimeIndex(expected1_list, dtype='datetime64[ns]', - freq='Q-DEC', tz=None) - expected2 = DatetimeIndex(expected2_list, dtype='datetime64[ns]', - freq='W-SUN', tz=None) - self.assert_index_equal(result1, expected1) - self.assert_index_equal(result2, expected2) - - -class TestDateRange(tm.TestCase): - - def setUp(self): - self.rng = bdate_range(START, END) - - def test_constructor(self): - bdate_range(START, END, freq=BDay()) - bdate_range(START, periods=20, freq=BDay()) - bdate_range(end=START, periods=20, freq=BDay()) - self.assertRaises(ValueError, date_range, '2011-1-1', '2012-1-1', 'B') - self.assertRaises(ValueError, bdate_range, '2011-1-1', '2012-1-1', 'B') - - def test_naive_aware_conflicts(self): - naive = bdate_range(START, END, freq=BDay(), tz=None) - aware = bdate_range(START, END, freq=BDay(), - tz="Asia/Hong_Kong") - assertRaisesRegexp(TypeError, "tz-naive.*tz-aware", naive.join, aware) - assertRaisesRegexp(TypeError, "tz-naive.*tz-aware", aware.join, naive) - - def test_cached_range(self): - DatetimeIndex._cached_range(START, END, offset=BDay()) - DatetimeIndex._cached_range(START, periods=20, offset=BDay()) - DatetimeIndex._cached_range(end=START, periods=20, offset=BDay()) - - assertRaisesRegexp(TypeError, "offset", DatetimeIndex._cached_range, - START, END) - - assertRaisesRegexp(TypeError, "specify period", - DatetimeIndex._cached_range, START, - offset=BDay()) - - assertRaisesRegexp(TypeError, "specify period", - DatetimeIndex._cached_range, end=END, - offset=BDay()) - - assertRaisesRegexp(TypeError, "start or end", - DatetimeIndex._cached_range, periods=20, - offset=BDay()) - - def test_cached_range_bug(self): - rng = date_range('2010-09-01 05:00:00', periods=50, - freq=DateOffset(hours=6)) - self.assertEqual(len(rng), 50) - self.assertEqual(rng[0], datetime(2010, 9, 1, 5)) - - def test_timezone_comparaison_bug(self): - start = Timestamp('20130220 10:00', tz='US/Eastern') - try: - date_range(start, periods=2, tz='US/Eastern') - except AssertionError: - self.fail() - - def test_timezone_comparaison_assert(self): - start = Timestamp('20130220 10:00', tz='US/Eastern') - self.assertRaises(AssertionError, date_range, start, periods=2, - tz='Europe/Berlin') - - def test_comparison(self): - d = self.rng[10] - - comp = self.rng > d - self.assertTrue(comp[11]) - self.assertFalse(comp[9]) - - def test_copy(self): - cp = self.rng.copy() - repr(cp) - self.assert_index_equal(cp, self.rng) - - def test_repr(self): - # only really care that it works - repr(self.rng) - - def test_getitem(self): - smaller = self.rng[:5] - exp = DatetimeIndex(self.rng.view(np.ndarray)[:5]) - self.assert_index_equal(smaller, exp) - - self.assertEqual(smaller.offset, self.rng.offset) - - sliced = self.rng[::5] - self.assertEqual(sliced.offset, BDay() * 5) - - fancy_indexed = self.rng[[4, 3, 2, 1, 0]] - self.assertEqual(len(fancy_indexed), 5) - tm.assertIsInstance(fancy_indexed, DatetimeIndex) - self.assertIsNone(fancy_indexed.freq) - - # 32-bit vs. 64-bit platforms - self.assertEqual(self.rng[4], self.rng[np.int_(4)]) - - def test_getitem_matplotlib_hackaround(self): - values = self.rng[:, None] - expected = self.rng.values[:, None] - self.assert_numpy_array_equal(values, expected) - - def test_shift(self): - shifted = self.rng.shift(5) - self.assertEqual(shifted[0], self.rng[5]) - self.assertEqual(shifted.offset, self.rng.offset) - - shifted = self.rng.shift(-5) - self.assertEqual(shifted[5], self.rng[0]) - self.assertEqual(shifted.offset, self.rng.offset) - - shifted = self.rng.shift(0) - self.assertEqual(shifted[0], self.rng[0]) - self.assertEqual(shifted.offset, self.rng.offset) - - rng = date_range(START, END, freq=BMonthEnd()) - shifted = rng.shift(1, freq=BDay()) - self.assertEqual(shifted[0], rng[0] + BDay()) - - def test_pickle_unpickle(self): - unpickled = self.round_trip_pickle(self.rng) - self.assertIsNotNone(unpickled.offset) - - def test_union(self): - # overlapping - left = self.rng[:10] - right = self.rng[5:10] - - the_union = left.union(right) - tm.assertIsInstance(the_union, DatetimeIndex) - - # non-overlapping, gap in middle - left = self.rng[:5] - right = self.rng[10:] - - the_union = left.union(right) - tm.assertIsInstance(the_union, Index) - - # non-overlapping, no gap - left = self.rng[:5] - right = self.rng[5:10] - - the_union = left.union(right) - tm.assertIsInstance(the_union, DatetimeIndex) - - # order does not matter - tm.assert_index_equal(right.union(left), the_union) - - # overlapping, but different offset - rng = date_range(START, END, freq=BMonthEnd()) - - the_union = self.rng.union(rng) - tm.assertIsInstance(the_union, DatetimeIndex) - - def test_outer_join(self): - # should just behave as union - - # overlapping - left = self.rng[:10] - right = self.rng[5:10] - - the_join = left.join(right, how='outer') - tm.assertIsInstance(the_join, DatetimeIndex) - - # non-overlapping, gap in middle - left = self.rng[:5] - right = self.rng[10:] - - the_join = left.join(right, how='outer') - tm.assertIsInstance(the_join, DatetimeIndex) - self.assertIsNone(the_join.freq) - - # non-overlapping, no gap - left = self.rng[:5] - right = self.rng[5:10] - - the_join = left.join(right, how='outer') - tm.assertIsInstance(the_join, DatetimeIndex) - - # overlapping, but different offset - rng = date_range(START, END, freq=BMonthEnd()) - - the_join = self.rng.join(rng, how='outer') - tm.assertIsInstance(the_join, DatetimeIndex) - self.assertIsNone(the_join.freq) - - def test_union_not_cacheable(self): - rng = date_range('1/1/2000', periods=50, freq=Minute()) - rng1 = rng[10:] - rng2 = rng[:25] - the_union = rng1.union(rng2) - self.assert_index_equal(the_union, rng) - - rng1 = rng[10:] - rng2 = rng[15:35] - the_union = rng1.union(rng2) - expected = rng[10:] - self.assert_index_equal(the_union, expected) - - def test_intersection(self): - rng = date_range('1/1/2000', periods=50, freq=Minute()) - rng1 = rng[10:] - rng2 = rng[:25] - the_int = rng1.intersection(rng2) - expected = rng[10:25] - self.assert_index_equal(the_int, expected) - tm.assertIsInstance(the_int, DatetimeIndex) - self.assertEqual(the_int.offset, rng.offset) - - the_int = rng1.intersection(rng2.view(DatetimeIndex)) - self.assert_index_equal(the_int, expected) - - # non-overlapping - the_int = rng[:10].intersection(rng[10:]) - expected = DatetimeIndex([]) - self.assert_index_equal(the_int, expected) - - def test_intersection_bug(self): - # GH #771 - a = bdate_range('11/30/2011', '12/31/2011') - b = bdate_range('12/10/2011', '12/20/2011') - result = a.intersection(b) - self.assert_index_equal(result, b) - - def test_summary(self): - self.rng.summary() - self.rng[2:2].summary() - - def test_summary_pytz(self): - tm._skip_if_no_pytz() - import pytz - bdate_range('1/1/2005', '1/1/2009', tz=pytz.utc).summary() - - def test_summary_dateutil(self): - tm._skip_if_no_dateutil() - import dateutil - bdate_range('1/1/2005', '1/1/2009', tz=dateutil.tz.tzutc()).summary() - - def test_misc(self): - end = datetime(2009, 5, 13) - dr = bdate_range(end=end, periods=20) - firstDate = end - 19 * BDay() - - assert len(dr) == 20 - assert dr[0] == firstDate - assert dr[-1] == end - - def test_date_parse_failure(self): - badly_formed_date = '2007/100/1' - - self.assertRaises(ValueError, Timestamp, badly_formed_date) - - self.assertRaises(ValueError, bdate_range, start=badly_formed_date, - periods=10) - self.assertRaises(ValueError, bdate_range, end=badly_formed_date, - periods=10) - self.assertRaises(ValueError, bdate_range, badly_formed_date, - badly_formed_date) - - def test_equals(self): - self.assertFalse(self.rng.equals(list(self.rng))) - - def test_identical(self): - t1 = self.rng.copy() - t2 = self.rng.copy() - self.assertTrue(t1.identical(t2)) - - # name - t1 = t1.rename('foo') - self.assertTrue(t1.equals(t2)) - self.assertFalse(t1.identical(t2)) - t2 = t2.rename('foo') - self.assertTrue(t1.identical(t2)) - - # freq - t2v = Index(t2.values) - self.assertTrue(t1.equals(t2v)) - self.assertFalse(t1.identical(t2v)) - - def test_daterange_bug_456(self): - # GH #456 - rng1 = bdate_range('12/5/2011', '12/5/2011') - rng2 = bdate_range('12/2/2011', '12/5/2011') - rng2.offset = BDay() - - result = rng1.union(rng2) - tm.assertIsInstance(result, DatetimeIndex) - - def test_error_with_zero_monthends(self): - self.assertRaises(ValueError, date_range, '1/1/2000', '1/1/2001', - freq=MonthEnd(0)) - - def test_range_bug(self): - # GH #770 - offset = DateOffset(months=3) - result = date_range("2011-1-1", "2012-1-31", freq=offset) - - start = datetime(2011, 1, 1) - exp_values = [start + i * offset for i in range(5)] - tm.assert_index_equal(result, DatetimeIndex(exp_values)) - - def test_range_tz_pytz(self): - # GH 2906 - tm._skip_if_no_pytz() - from pytz import timezone - - tz = timezone('US/Eastern') - start = tz.localize(datetime(2011, 1, 1)) - end = tz.localize(datetime(2011, 1, 3)) - - dr = date_range(start=start, periods=3) - self.assertEqual(dr.tz.zone, tz.zone) - self.assertEqual(dr[0], start) - self.assertEqual(dr[2], end) - - dr = date_range(end=end, periods=3) - self.assertEqual(dr.tz.zone, tz.zone) - self.assertEqual(dr[0], start) - self.assertEqual(dr[2], end) - - dr = date_range(start=start, end=end) - self.assertEqual(dr.tz.zone, tz.zone) - self.assertEqual(dr[0], start) - self.assertEqual(dr[2], end) - - def test_range_tz_dst_straddle_pytz(self): - - tm._skip_if_no_pytz() - from pytz import timezone - tz = timezone('US/Eastern') - dates = [(tz.localize(datetime(2014, 3, 6)), - tz.localize(datetime(2014, 3, 12))), - (tz.localize(datetime(2013, 11, 1)), - tz.localize(datetime(2013, 11, 6)))] - for (start, end) in dates: - dr = date_range(start, end, freq='D') - self.assertEqual(dr[0], start) - self.assertEqual(dr[-1], end) - self.assertEqual(np.all(dr.hour == 0), True) - - dr = date_range(start, end, freq='D', tz='US/Eastern') - self.assertEqual(dr[0], start) - self.assertEqual(dr[-1], end) - self.assertEqual(np.all(dr.hour == 0), True) - - dr = date_range(start.replace(tzinfo=None), end.replace( - tzinfo=None), freq='D', tz='US/Eastern') - self.assertEqual(dr[0], start) - self.assertEqual(dr[-1], end) - self.assertEqual(np.all(dr.hour == 0), True) - - def test_range_tz_dateutil(self): - # GH 2906 - tm._skip_if_no_dateutil() - # Use maybe_get_tz to fix filename in tz under dateutil. - from pandas.tslib import maybe_get_tz - tz = lambda x: maybe_get_tz('dateutil/' + x) - - start = datetime(2011, 1, 1, tzinfo=tz('US/Eastern')) - end = datetime(2011, 1, 3, tzinfo=tz('US/Eastern')) - - dr = date_range(start=start, periods=3) - self.assertTrue(dr.tz == tz('US/Eastern')) - self.assertTrue(dr[0] == start) - self.assertTrue(dr[2] == end) - - dr = date_range(end=end, periods=3) - self.assertTrue(dr.tz == tz('US/Eastern')) - self.assertTrue(dr[0] == start) - self.assertTrue(dr[2] == end) - - dr = date_range(start=start, end=end) - self.assertTrue(dr.tz == tz('US/Eastern')) - self.assertTrue(dr[0] == start) - self.assertTrue(dr[2] == end) - - def test_month_range_union_tz_pytz(self): - tm._skip_if_no_pytz() - from pytz import timezone - tz = timezone('US/Eastern') - - early_start = datetime(2011, 1, 1) - early_end = datetime(2011, 3, 1) - - late_start = datetime(2011, 3, 1) - late_end = datetime(2011, 5, 1) - - early_dr = date_range(start=early_start, end=early_end, tz=tz, - freq=MonthEnd()) - late_dr = date_range(start=late_start, end=late_end, tz=tz, - freq=MonthEnd()) - - early_dr.union(late_dr) - - def test_month_range_union_tz_dateutil(self): - tm._skip_if_windows_python_3() - tm._skip_if_no_dateutil() - from pandas.tslib import _dateutil_gettz as timezone - tz = timezone('US/Eastern') - - early_start = datetime(2011, 1, 1) - early_end = datetime(2011, 3, 1) - - late_start = datetime(2011, 3, 1) - late_end = datetime(2011, 5, 1) - - early_dr = date_range(start=early_start, end=early_end, tz=tz, - freq=MonthEnd()) - late_dr = date_range(start=late_start, end=late_end, tz=tz, - freq=MonthEnd()) - - early_dr.union(late_dr) - - def test_range_closed(self): - begin = datetime(2011, 1, 1) - end = datetime(2014, 1, 1) - - for freq in ["1D", "3D", "2M", "7W", "3H", "A"]: - closed = date_range(begin, end, closed=None, freq=freq) - left = date_range(begin, end, closed="left", freq=freq) - right = date_range(begin, end, closed="right", freq=freq) - expected_left = left - expected_right = right - - if end == closed[-1]: - expected_left = closed[:-1] - if begin == closed[0]: - expected_right = closed[1:] - - self.assert_index_equal(expected_left, left) - self.assert_index_equal(expected_right, right) - - def test_range_closed_with_tz_aware_start_end(self): - # GH12409, GH12684 - begin = Timestamp('2011/1/1', tz='US/Eastern') - end = Timestamp('2014/1/1', tz='US/Eastern') - - for freq in ["1D", "3D", "2M", "7W", "3H", "A"]: - closed = date_range(begin, end, closed=None, freq=freq) - left = date_range(begin, end, closed="left", freq=freq) - right = date_range(begin, end, closed="right", freq=freq) - expected_left = left - expected_right = right - - if end == closed[-1]: - expected_left = closed[:-1] - if begin == closed[0]: - expected_right = closed[1:] - - self.assert_index_equal(expected_left, left) - self.assert_index_equal(expected_right, right) - - begin = Timestamp('2011/1/1') - end = Timestamp('2014/1/1') - begintz = Timestamp('2011/1/1', tz='US/Eastern') - endtz = Timestamp('2014/1/1', tz='US/Eastern') - - for freq in ["1D", "3D", "2M", "7W", "3H", "A"]: - closed = date_range(begin, end, closed=None, freq=freq, - tz='US/Eastern') - left = date_range(begin, end, closed="left", freq=freq, - tz='US/Eastern') - right = date_range(begin, end, closed="right", freq=freq, - tz='US/Eastern') - expected_left = left - expected_right = right - - if endtz == closed[-1]: - expected_left = closed[:-1] - if begintz == closed[0]: - expected_right = closed[1:] - - self.assert_index_equal(expected_left, left) - self.assert_index_equal(expected_right, right) - - def test_range_closed_boundary(self): - # GH 11804 - for closed in ['right', 'left', None]: - right_boundary = date_range('2015-09-12', '2015-12-01', - freq='QS-MAR', closed=closed) - left_boundary = date_range('2015-09-01', '2015-09-12', - freq='QS-MAR', closed=closed) - both_boundary = date_range('2015-09-01', '2015-12-01', - freq='QS-MAR', closed=closed) - expected_right = expected_left = expected_both = both_boundary - - if closed == 'right': - expected_left = both_boundary[1:] - if closed == 'left': - expected_right = both_boundary[:-1] - if closed is None: - expected_right = both_boundary[1:] - expected_left = both_boundary[:-1] - - self.assert_index_equal(right_boundary, expected_right) - self.assert_index_equal(left_boundary, expected_left) - self.assert_index_equal(both_boundary, expected_both) - - def test_years_only(self): - # GH 6961 - dr = date_range('2014', '2015', freq='M') - self.assertEqual(dr[0], datetime(2014, 1, 31)) - self.assertEqual(dr[-1], datetime(2014, 12, 31)) - - def test_freq_divides_end_in_nanos(self): - # GH 10885 - result_1 = date_range('2005-01-12 10:00', '2005-01-12 16:00', - freq='345min') - result_2 = date_range('2005-01-13 10:00', '2005-01-13 16:00', - freq='345min') - expected_1 = DatetimeIndex(['2005-01-12 10:00:00', - '2005-01-12 15:45:00'], - dtype='datetime64[ns]', freq='345T', - tz=None) - expected_2 = DatetimeIndex(['2005-01-13 10:00:00', - '2005-01-13 15:45:00'], - dtype='datetime64[ns]', freq='345T', - tz=None) - self.assert_index_equal(result_1, expected_1) - self.assert_index_equal(result_2, expected_2) - - -class TestCustomDateRange(tm.TestCase): - - def setUp(self): - self.rng = cdate_range(START, END) - - def test_constructor(self): - cdate_range(START, END, freq=CDay()) - cdate_range(START, periods=20, freq=CDay()) - cdate_range(end=START, periods=20, freq=CDay()) - self.assertRaises(ValueError, date_range, '2011-1-1', '2012-1-1', 'C') - self.assertRaises(ValueError, cdate_range, '2011-1-1', '2012-1-1', 'C') - - def test_cached_range(self): - DatetimeIndex._cached_range(START, END, offset=CDay()) - DatetimeIndex._cached_range(START, periods=20, - offset=CDay()) - DatetimeIndex._cached_range(end=START, periods=20, - offset=CDay()) - - self.assertRaises(Exception, DatetimeIndex._cached_range, START, END) - - self.assertRaises(Exception, DatetimeIndex._cached_range, START, - freq=CDay()) - - self.assertRaises(Exception, DatetimeIndex._cached_range, end=END, - freq=CDay()) - - self.assertRaises(Exception, DatetimeIndex._cached_range, periods=20, - freq=CDay()) - - def test_comparison(self): - d = self.rng[10] - - comp = self.rng > d - self.assertTrue(comp[11]) - self.assertFalse(comp[9]) - - def test_copy(self): - cp = self.rng.copy() - repr(cp) - self.assert_index_equal(cp, self.rng) - - def test_repr(self): - # only really care that it works - repr(self.rng) - - def test_getitem(self): - smaller = self.rng[:5] - exp = DatetimeIndex(self.rng.view(np.ndarray)[:5]) - self.assert_index_equal(smaller, exp) - self.assertEqual(smaller.offset, self.rng.offset) - - sliced = self.rng[::5] - self.assertEqual(sliced.offset, CDay() * 5) - - fancy_indexed = self.rng[[4, 3, 2, 1, 0]] - self.assertEqual(len(fancy_indexed), 5) - tm.assertIsInstance(fancy_indexed, DatetimeIndex) - self.assertIsNone(fancy_indexed.freq) - - # 32-bit vs. 64-bit platforms - self.assertEqual(self.rng[4], self.rng[np.int_(4)]) - - def test_getitem_matplotlib_hackaround(self): - values = self.rng[:, None] - expected = self.rng.values[:, None] - self.assert_numpy_array_equal(values, expected) - - def test_shift(self): - - shifted = self.rng.shift(5) - self.assertEqual(shifted[0], self.rng[5]) - self.assertEqual(shifted.offset, self.rng.offset) - - shifted = self.rng.shift(-5) - self.assertEqual(shifted[5], self.rng[0]) - self.assertEqual(shifted.offset, self.rng.offset) - - shifted = self.rng.shift(0) - self.assertEqual(shifted[0], self.rng[0]) - self.assertEqual(shifted.offset, self.rng.offset) - - with tm.assert_produces_warning(com.PerformanceWarning): - rng = date_range(START, END, freq=BMonthEnd()) - shifted = rng.shift(1, freq=CDay()) - self.assertEqual(shifted[0], rng[0] + CDay()) - - def test_pickle_unpickle(self): - unpickled = self.round_trip_pickle(self.rng) - self.assertIsNotNone(unpickled.offset) - - def test_union(self): - # overlapping - left = self.rng[:10] - right = self.rng[5:10] - - the_union = left.union(right) - tm.assertIsInstance(the_union, DatetimeIndex) - - # non-overlapping, gap in middle - left = self.rng[:5] - right = self.rng[10:] - - the_union = left.union(right) - tm.assertIsInstance(the_union, Index) - - # non-overlapping, no gap - left = self.rng[:5] - right = self.rng[5:10] - - the_union = left.union(right) - tm.assertIsInstance(the_union, DatetimeIndex) - - # order does not matter - self.assert_index_equal(right.union(left), the_union) - - # overlapping, but different offset - rng = date_range(START, END, freq=BMonthEnd()) - - the_union = self.rng.union(rng) - tm.assertIsInstance(the_union, DatetimeIndex) - - def test_outer_join(self): - # should just behave as union - - # overlapping - left = self.rng[:10] - right = self.rng[5:10] - - the_join = left.join(right, how='outer') - tm.assertIsInstance(the_join, DatetimeIndex) - - # non-overlapping, gap in middle - left = self.rng[:5] - right = self.rng[10:] - - the_join = left.join(right, how='outer') - tm.assertIsInstance(the_join, DatetimeIndex) - self.assertIsNone(the_join.freq) - - # non-overlapping, no gap - left = self.rng[:5] - right = self.rng[5:10] - - the_join = left.join(right, how='outer') - tm.assertIsInstance(the_join, DatetimeIndex) - - # overlapping, but different offset - rng = date_range(START, END, freq=BMonthEnd()) - - the_join = self.rng.join(rng, how='outer') - tm.assertIsInstance(the_join, DatetimeIndex) - self.assertIsNone(the_join.freq) - - def test_intersection_bug(self): - # GH #771 - a = cdate_range('11/30/2011', '12/31/2011') - b = cdate_range('12/10/2011', '12/20/2011') - result = a.intersection(b) - self.assert_index_equal(result, b) - - def test_summary(self): - self.rng.summary() - self.rng[2:2].summary() - - def test_summary_pytz(self): - tm._skip_if_no_pytz() - import pytz - cdate_range('1/1/2005', '1/1/2009', tz=pytz.utc).summary() - - def test_summary_dateutil(self): - tm._skip_if_no_dateutil() - import dateutil - cdate_range('1/1/2005', '1/1/2009', tz=dateutil.tz.tzutc()).summary() - - def test_misc(self): - end = datetime(2009, 5, 13) - dr = cdate_range(end=end, periods=20) - firstDate = end - 19 * CDay() - - assert len(dr) == 20 - assert dr[0] == firstDate - assert dr[-1] == end - - def test_date_parse_failure(self): - badly_formed_date = '2007/100/1' - - self.assertRaises(ValueError, Timestamp, badly_formed_date) - - self.assertRaises(ValueError, cdate_range, start=badly_formed_date, - periods=10) - self.assertRaises(ValueError, cdate_range, end=badly_formed_date, - periods=10) - self.assertRaises(ValueError, cdate_range, badly_formed_date, - badly_formed_date) - - def test_equals(self): - self.assertFalse(self.rng.equals(list(self.rng))) - - def test_daterange_bug_456(self): - # GH #456 - rng1 = cdate_range('12/5/2011', '12/5/2011') - rng2 = cdate_range('12/2/2011', '12/5/2011') - rng2.offset = CDay() - - result = rng1.union(rng2) - tm.assertIsInstance(result, DatetimeIndex) - - def test_cdaterange(self): - rng = cdate_range('2013-05-01', periods=3) - xp = DatetimeIndex(['2013-05-01', '2013-05-02', '2013-05-03']) - self.assert_index_equal(xp, rng) - - def test_cdaterange_weekmask(self): - rng = cdate_range('2013-05-01', periods=3, - weekmask='Sun Mon Tue Wed Thu') - xp = DatetimeIndex(['2013-05-01', '2013-05-02', '2013-05-05']) - self.assert_index_equal(xp, rng) - - def test_cdaterange_holidays(self): - rng = cdate_range('2013-05-01', periods=3, holidays=['2013-05-01']) - xp = DatetimeIndex(['2013-05-02', '2013-05-03', '2013-05-06']) - self.assert_index_equal(xp, rng) - - def test_cdaterange_weekmask_and_holidays(self): - rng = cdate_range('2013-05-01', periods=3, - weekmask='Sun Mon Tue Wed Thu', - holidays=['2013-05-01']) - xp = DatetimeIndex(['2013-05-02', '2013-05-05', '2013-05-06']) - self.assert_index_equal(xp, rng) diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py index a39830b6aede6..3459da9d2b5c5 100644 --- a/pandas/tseries/tests/test_period.py +++ b/pandas/tseries/tests/test_period.py @@ -6,27 +6,25 @@ """ +import numpy as np +from numpy.random import randn from datetime import datetime, date, timedelta -from pandas import Timestamp, _period -from pandas.tseries.frequencies import MONTHS, DAYS, _period_code_map -from pandas.tseries.period import Period, PeriodIndex, period_range -from pandas.tseries.index import DatetimeIndex, date_range, Index -from pandas.tseries.tools import to_datetime +import pandas as pd +import pandas.util.testing as tm import pandas.tseries.period as period import pandas.tseries.offsets as offsets - -import pandas as pd -import numpy as np -from numpy.random import randn +from pandas.tseries.tools import to_datetime +from pandas.tseries.period import Period, PeriodIndex, period_range +from pandas.tseries.index import DatetimeIndex, date_range, Index +from pandas._period import period_ordinal, period_asfreq from pandas.compat import range, lrange, lmap, zip, text_type, PY3, iteritems from pandas.compat.numpy import np_datetime64_compat - -from pandas import (Series, DataFrame, +from pandas.tseries.frequencies import (MONTHS, DAYS, _period_code_map, + get_freq) +from pandas import (Series, DataFrame, Timestamp, _period, tslib, _np_version_under1p9, _np_version_under1p10, _np_version_under1p12) -from pandas import tslib -import pandas.util.testing as tm class TestPeriodProperties(tm.TestCase): @@ -4970,3 +4968,98 @@ def test_get_period_field_raises_on_out_of_range(self): def test_get_period_field_array_raises_on_out_of_range(self): self.assertRaises(ValueError, _period.get_period_field_arr, -1, np.empty(1), 0) + + +class TestTslib(tm.TestCase): + def test_intraday_conversion_factors(self): + self.assertEqual(period_asfreq( + 1, get_freq('D'), get_freq('H'), False), 24) + self.assertEqual(period_asfreq( + 1, get_freq('D'), get_freq('T'), False), 1440) + self.assertEqual(period_asfreq( + 1, get_freq('D'), get_freq('S'), False), 86400) + self.assertEqual(period_asfreq(1, get_freq( + 'D'), get_freq('L'), False), 86400000) + self.assertEqual(period_asfreq(1, get_freq( + 'D'), get_freq('U'), False), 86400000000) + self.assertEqual(period_asfreq(1, get_freq( + 'D'), get_freq('N'), False), 86400000000000) + + self.assertEqual(period_asfreq( + 1, get_freq('H'), get_freq('T'), False), 60) + self.assertEqual(period_asfreq( + 1, get_freq('H'), get_freq('S'), False), 3600) + self.assertEqual(period_asfreq(1, get_freq('H'), + get_freq('L'), False), 3600000) + self.assertEqual(period_asfreq(1, get_freq( + 'H'), get_freq('U'), False), 3600000000) + self.assertEqual(period_asfreq(1, get_freq( + 'H'), get_freq('N'), False), 3600000000000) + + self.assertEqual(period_asfreq( + 1, get_freq('T'), get_freq('S'), False), 60) + self.assertEqual(period_asfreq( + 1, get_freq('T'), get_freq('L'), False), 60000) + self.assertEqual(period_asfreq(1, get_freq( + 'T'), get_freq('U'), False), 60000000) + self.assertEqual(period_asfreq(1, get_freq( + 'T'), get_freq('N'), False), 60000000000) + + self.assertEqual(period_asfreq( + 1, get_freq('S'), get_freq('L'), False), 1000) + self.assertEqual(period_asfreq(1, get_freq('S'), + get_freq('U'), False), 1000000) + self.assertEqual(period_asfreq(1, get_freq( + 'S'), get_freq('N'), False), 1000000000) + + self.assertEqual(period_asfreq( + 1, get_freq('L'), get_freq('U'), False), 1000) + self.assertEqual(period_asfreq(1, get_freq('L'), + get_freq('N'), False), 1000000) + + self.assertEqual(period_asfreq( + 1, get_freq('U'), get_freq('N'), False), 1000) + + def test_period_ordinal_start_values(self): + # information for 1.1.1970 + self.assertEqual(0, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, + get_freq('A'))) + self.assertEqual(0, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, + get_freq('M'))) + self.assertEqual(1, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, + get_freq('W'))) + self.assertEqual(0, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, + get_freq('D'))) + self.assertEqual(0, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, + get_freq('B'))) + + def test_period_ordinal_week(self): + self.assertEqual(1, period_ordinal(1970, 1, 4, 0, 0, 0, 0, 0, + get_freq('W'))) + self.assertEqual(2, period_ordinal(1970, 1, 5, 0, 0, 0, 0, 0, + get_freq('W'))) + + self.assertEqual(2284, period_ordinal(2013, 10, 6, 0, 0, 0, 0, 0, + get_freq('W'))) + self.assertEqual(2285, period_ordinal(2013, 10, 7, 0, 0, 0, 0, 0, + get_freq('W'))) + + def test_period_ordinal_business_day(self): + # Thursday + self.assertEqual(11415, period_ordinal(2013, 10, 3, 0, 0, 0, 0, 0, + get_freq('B'))) + # Friday + self.assertEqual(11416, period_ordinal(2013, 10, 4, 0, 0, 0, 0, 0, + get_freq('B'))) + # Saturday + self.assertEqual(11417, period_ordinal(2013, 10, 5, 0, 0, 0, 0, 0, + get_freq('B'))) + # Sunday + self.assertEqual(11417, period_ordinal(2013, 10, 6, 0, 0, 0, 0, 0, + get_freq('B'))) + # Monday + self.assertEqual(11417, period_ordinal(2013, 10, 7, 0, 0, 0, 0, 0, + get_freq('B'))) + # Tuesday + self.assertEqual(11418, period_ordinal(2013, 10, 8, 0, 0, 0, 0, 0, + get_freq('B'))) diff --git a/pandas/tseries/tests/test_timedeltas.py b/pandas/tseries/tests/test_timedeltas.py deleted file mode 100644 index 170d5cdafa60b..0000000000000 --- a/pandas/tseries/tests/test_timedeltas.py +++ /dev/null @@ -1,2051 +0,0 @@ -# pylint: disable-msg=E1101,W0612 - -from __future__ import division -from datetime import timedelta, time - -from distutils.version import LooseVersion -import numpy as np -import pandas as pd - -from pandas import (Index, Series, DataFrame, Timestamp, Timedelta, - TimedeltaIndex, isnull, date_range, - timedelta_range, Int64Index) -from pandas.compat import range -from pandas import compat, to_timedelta, tslib -from pandas.tseries.timedeltas import _coerce_scalar_to_timedelta_type as ct -from pandas.util.testing import (assert_series_equal, assert_frame_equal, - assert_almost_equal, assert_index_equal) -from pandas.tseries.offsets import Day, Second -import pandas.util.testing as tm -from numpy.random import randn -from pandas import _np_version_under1p8 - -iNaT = tslib.iNaT - - -class TestTimedeltas(tm.TestCase): - - def setUp(self): - pass - - def test_get_loc_nat(self): - tidx = TimedeltaIndex(['1 days 01:00:00', 'NaT', '2 days 01:00:00']) - - self.assertEqual(tidx.get_loc(pd.NaT), 1) - self.assertEqual(tidx.get_loc(None), 1) - self.assertEqual(tidx.get_loc(float('nan')), 1) - self.assertEqual(tidx.get_loc(np.nan), 1) - - def test_contains(self): - # Checking for any NaT-like objects - # GH 13603 - td = to_timedelta(range(5), unit='d') + pd.offsets.Hour(1) - for v in [pd.NaT, None, float('nan'), np.nan]: - self.assertFalse((v in td)) - - td = to_timedelta([pd.NaT]) - for v in [pd.NaT, None, float('nan'), np.nan]: - self.assertTrue((v in td)) - - def test_construction(self): - - expected = np.timedelta64(10, 'D').astype('m8[ns]').view('i8') - self.assertEqual(Timedelta(10, unit='d').value, expected) - self.assertEqual(Timedelta(10.0, unit='d').value, expected) - self.assertEqual(Timedelta('10 days').value, expected) - self.assertEqual(Timedelta(days=10).value, expected) - self.assertEqual(Timedelta(days=10.0).value, expected) - - expected += np.timedelta64(10, 's').astype('m8[ns]').view('i8') - self.assertEqual(Timedelta('10 days 00:00:10').value, expected) - self.assertEqual(Timedelta(days=10, seconds=10).value, expected) - self.assertEqual( - Timedelta(days=10, milliseconds=10 * 1000).value, expected) - self.assertEqual( - Timedelta(days=10, microseconds=10 * 1000 * 1000).value, expected) - - # test construction with np dtypes - # GH 8757 - timedelta_kwargs = {'days': 'D', - 'seconds': 's', - 'microseconds': 'us', - 'milliseconds': 'ms', - 'minutes': 'm', - 'hours': 'h', - 'weeks': 'W'} - npdtypes = [np.int64, np.int32, np.int16, np.float64, np.float32, - np.float16] - for npdtype in npdtypes: - for pykwarg, npkwarg in timedelta_kwargs.items(): - expected = np.timedelta64(1, - npkwarg).astype('m8[ns]').view('i8') - self.assertEqual( - Timedelta(**{pykwarg: npdtype(1)}).value, expected) - - # rounding cases - self.assertEqual(Timedelta(82739999850000).value, 82739999850000) - self.assertTrue('0 days 22:58:59.999850' in str(Timedelta( - 82739999850000))) - self.assertEqual(Timedelta(123072001000000).value, 123072001000000) - self.assertTrue('1 days 10:11:12.001' in str(Timedelta( - 123072001000000))) - - # string conversion with/without leading zero - # GH 9570 - self.assertEqual(Timedelta('0:00:00'), timedelta(hours=0)) - self.assertEqual(Timedelta('00:00:00'), timedelta(hours=0)) - self.assertEqual(Timedelta('-1:00:00'), -timedelta(hours=1)) - self.assertEqual(Timedelta('-01:00:00'), -timedelta(hours=1)) - - # more strings & abbrevs - # GH 8190 - self.assertEqual(Timedelta('1 h'), timedelta(hours=1)) - self.assertEqual(Timedelta('1 hour'), timedelta(hours=1)) - self.assertEqual(Timedelta('1 hr'), timedelta(hours=1)) - self.assertEqual(Timedelta('1 hours'), timedelta(hours=1)) - self.assertEqual(Timedelta('-1 hours'), -timedelta(hours=1)) - self.assertEqual(Timedelta('1 m'), timedelta(minutes=1)) - self.assertEqual(Timedelta('1.5 m'), timedelta(seconds=90)) - self.assertEqual(Timedelta('1 minute'), timedelta(minutes=1)) - self.assertEqual(Timedelta('1 minutes'), timedelta(minutes=1)) - self.assertEqual(Timedelta('1 s'), timedelta(seconds=1)) - self.assertEqual(Timedelta('1 second'), timedelta(seconds=1)) - self.assertEqual(Timedelta('1 seconds'), timedelta(seconds=1)) - self.assertEqual(Timedelta('1 ms'), timedelta(milliseconds=1)) - self.assertEqual(Timedelta('1 milli'), timedelta(milliseconds=1)) - self.assertEqual(Timedelta('1 millisecond'), timedelta(milliseconds=1)) - self.assertEqual(Timedelta('1 us'), timedelta(microseconds=1)) - self.assertEqual(Timedelta('1 micros'), timedelta(microseconds=1)) - self.assertEqual(Timedelta('1 microsecond'), timedelta(microseconds=1)) - self.assertEqual(Timedelta('1.5 microsecond'), - Timedelta('00:00:00.000001500')) - self.assertEqual(Timedelta('1 ns'), Timedelta('00:00:00.000000001')) - self.assertEqual(Timedelta('1 nano'), Timedelta('00:00:00.000000001')) - self.assertEqual(Timedelta('1 nanosecond'), - Timedelta('00:00:00.000000001')) - - # combos - self.assertEqual(Timedelta('10 days 1 hour'), - timedelta(days=10, hours=1)) - self.assertEqual(Timedelta('10 days 1 h'), timedelta(days=10, hours=1)) - self.assertEqual(Timedelta('10 days 1 h 1m 1s'), timedelta( - days=10, hours=1, minutes=1, seconds=1)) - self.assertEqual(Timedelta('-10 days 1 h 1m 1s'), - - timedelta(days=10, hours=1, minutes=1, seconds=1)) - self.assertEqual(Timedelta('-10 days 1 h 1m 1s'), - - timedelta(days=10, hours=1, minutes=1, seconds=1)) - self.assertEqual(Timedelta('-10 days 1 h 1m 1s 3us'), - - timedelta(days=10, hours=1, minutes=1, - seconds=1, microseconds=3)) - self.assertEqual(Timedelta('-10 days 1 h 1.5m 1s 3us'), - - timedelta(days=10, hours=1, minutes=1, - seconds=31, microseconds=3)) - - # currently invalid as it has a - on the hhmmdd part (only allowed on - # the days) - self.assertRaises(ValueError, - lambda: Timedelta('-10 days -1 h 1.5m 1s 3us')) - - # only leading neg signs are allowed - self.assertRaises(ValueError, - lambda: Timedelta('10 days -1 h 1.5m 1s 3us')) - - # no units specified - self.assertRaises(ValueError, lambda: Timedelta('3.1415')) - - # invalid construction - tm.assertRaisesRegexp(ValueError, "cannot construct a Timedelta", - lambda: Timedelta()) - tm.assertRaisesRegexp(ValueError, "unit abbreviation w/o a number", - lambda: Timedelta('foo')) - tm.assertRaisesRegexp(ValueError, - "cannot construct a Timedelta from the passed " - "arguments, allowed keywords are ", - lambda: Timedelta(day=10)) - - # roundtripping both for string and value - for v in ['1s', '-1s', '1us', '-1us', '1 day', '-1 day', - '-23:59:59.999999', '-1 days +23:59:59.999999', '-1ns', - '1ns', '-23:59:59.999999999']: - - td = Timedelta(v) - self.assertEqual(Timedelta(td.value), td) - - # str does not normally display nanos - if not td.nanoseconds: - self.assertEqual(Timedelta(str(td)), td) - self.assertEqual(Timedelta(td._repr_base(format='all')), td) - - # floats - expected = np.timedelta64( - 10, 's').astype('m8[ns]').view('i8') + np.timedelta64( - 500, 'ms').astype('m8[ns]').view('i8') - self.assertEqual(Timedelta(10.5, unit='s').value, expected) - - # nat - self.assertEqual(Timedelta('').value, iNaT) - self.assertEqual(Timedelta('nat').value, iNaT) - self.assertEqual(Timedelta('NAT').value, iNaT) - self.assertEqual(Timedelta(None).value, iNaT) - self.assertEqual(Timedelta(np.nan).value, iNaT) - self.assertTrue(isnull(Timedelta('nat'))) - - # offset - self.assertEqual(to_timedelta(pd.offsets.Hour(2)), - Timedelta('0 days, 02:00:00')) - self.assertEqual(Timedelta(pd.offsets.Hour(2)), - Timedelta('0 days, 02:00:00')) - self.assertEqual(Timedelta(pd.offsets.Second(2)), - Timedelta('0 days, 00:00:02')) - - # unicode - # GH 11995 - expected = Timedelta('1H') - result = pd.Timedelta(u'1H') - self.assertEqual(result, expected) - self.assertEqual(to_timedelta(pd.offsets.Hour(2)), - Timedelta(u'0 days, 02:00:00')) - - self.assertRaises(ValueError, lambda: Timedelta(u'foo bar')) - - def test_round(self): - - t1 = Timedelta('1 days 02:34:56.789123456') - t2 = Timedelta('-1 days 02:34:56.789123456') - - for (freq, s1, s2) in [('N', t1, t2), - ('U', Timedelta('1 days 02:34:56.789123000'), - Timedelta('-1 days 02:34:56.789123000')), - ('L', Timedelta('1 days 02:34:56.789000000'), - Timedelta('-1 days 02:34:56.789000000')), - ('S', Timedelta('1 days 02:34:57'), - Timedelta('-1 days 02:34:57')), - ('2S', Timedelta('1 days 02:34:56'), - Timedelta('-1 days 02:34:56')), - ('5S', Timedelta('1 days 02:34:55'), - Timedelta('-1 days 02:34:55')), - ('T', Timedelta('1 days 02:35:00'), - Timedelta('-1 days 02:35:00')), - ('12T', Timedelta('1 days 02:36:00'), - Timedelta('-1 days 02:36:00')), - ('H', Timedelta('1 days 03:00:00'), - Timedelta('-1 days 03:00:00')), - ('d', Timedelta('1 days'), - Timedelta('-1 days'))]: - r1 = t1.round(freq) - self.assertEqual(r1, s1) - r2 = t2.round(freq) - self.assertEqual(r2, s2) - - # invalid - for freq in ['Y', 'M', 'foobar']: - self.assertRaises(ValueError, lambda: t1.round(freq)) - - t1 = timedelta_range('1 days', periods=3, freq='1 min 2 s 3 us') - t2 = -1 * t1 - t1a = timedelta_range('1 days', periods=3, freq='1 min 2 s') - t1c = pd.TimedeltaIndex([1, 1, 1], unit='D') - - # note that negative times round DOWN! so don't give whole numbers - for (freq, s1, s2) in [('N', t1, t2), - ('U', t1, t2), - ('L', t1a, - TimedeltaIndex(['-1 days +00:00:00', - '-2 days +23:58:58', - '-2 days +23:57:56'], - dtype='timedelta64[ns]', - freq=None) - ), - ('S', t1a, - TimedeltaIndex(['-1 days +00:00:00', - '-2 days +23:58:58', - '-2 days +23:57:56'], - dtype='timedelta64[ns]', - freq=None) - ), - ('12T', t1c, - TimedeltaIndex(['-1 days', - '-1 days', - '-1 days'], - dtype='timedelta64[ns]', - freq=None) - ), - ('H', t1c, - TimedeltaIndex(['-1 days', - '-1 days', - '-1 days'], - dtype='timedelta64[ns]', - freq=None) - ), - ('d', t1c, - pd.TimedeltaIndex([-1, -1, -1], unit='D') - )]: - - r1 = t1.round(freq) - tm.assert_index_equal(r1, s1) - r2 = t2.round(freq) - tm.assert_index_equal(r2, s2) - - # invalid - for freq in ['Y', 'M', 'foobar']: - self.assertRaises(ValueError, lambda: t1.round(freq)) - - def test_repr(self): - - self.assertEqual(repr(Timedelta(10, unit='d')), - "Timedelta('10 days 00:00:00')") - self.assertEqual(repr(Timedelta(10, unit='s')), - "Timedelta('0 days 00:00:10')") - self.assertEqual(repr(Timedelta(10, unit='ms')), - "Timedelta('0 days 00:00:00.010000')") - self.assertEqual(repr(Timedelta(-10, unit='ms')), - "Timedelta('-1 days +23:59:59.990000')") - - def test_identity(self): - - td = Timedelta(10, unit='d') - self.assertTrue(isinstance(td, Timedelta)) - self.assertTrue(isinstance(td, timedelta)) - - def test_conversion(self): - - for td in [Timedelta(10, unit='d'), - Timedelta('1 days, 10:11:12.012345')]: - pydt = td.to_pytimedelta() - self.assertTrue(td == Timedelta(pydt)) - self.assertEqual(td, pydt) - self.assertTrue(isinstance(pydt, timedelta) and not isinstance( - pydt, Timedelta)) - - self.assertEqual(td, np.timedelta64(td.value, 'ns')) - td64 = td.to_timedelta64() - self.assertEqual(td64, np.timedelta64(td.value, 'ns')) - self.assertEqual(td, td64) - self.assertTrue(isinstance(td64, np.timedelta64)) - - # this is NOT equal and cannot be roundtriped (because of the nanos) - td = Timedelta('1 days, 10:11:12.012345678') - self.assertTrue(td != td.to_pytimedelta()) - - def test_ops(self): - - td = Timedelta(10, unit='d') - self.assertEqual(-td, Timedelta(-10, unit='d')) - self.assertEqual(+td, Timedelta(10, unit='d')) - self.assertEqual(td - td, Timedelta(0, unit='ns')) - self.assertTrue((td - pd.NaT) is pd.NaT) - self.assertEqual(td + td, Timedelta(20, unit='d')) - self.assertTrue((td + pd.NaT) is pd.NaT) - self.assertEqual(td * 2, Timedelta(20, unit='d')) - self.assertTrue((td * pd.NaT) is pd.NaT) - self.assertEqual(td / 2, Timedelta(5, unit='d')) - self.assertEqual(abs(td), td) - self.assertEqual(abs(-td), td) - self.assertEqual(td / td, 1) - self.assertTrue((td / pd.NaT) is np.nan) - - # invert - self.assertEqual(-td, Timedelta('-10d')) - self.assertEqual(td * -1, Timedelta('-10d')) - self.assertEqual(-1 * td, Timedelta('-10d')) - self.assertEqual(abs(-td), Timedelta('10d')) - - # invalid - self.assertRaises(TypeError, lambda: Timedelta(11, unit='d') // 2) - - # invalid multiply with another timedelta - self.assertRaises(TypeError, lambda: td * td) - - # can't operate with integers - self.assertRaises(TypeError, lambda: td + 2) - self.assertRaises(TypeError, lambda: td - 2) - - def test_ops_offsets(self): - td = Timedelta(10, unit='d') - self.assertEqual(Timedelta(241, unit='h'), td + pd.offsets.Hour(1)) - self.assertEqual(Timedelta(241, unit='h'), pd.offsets.Hour(1) + td) - self.assertEqual(240, td / pd.offsets.Hour(1)) - self.assertEqual(1 / 240.0, pd.offsets.Hour(1) / td) - self.assertEqual(Timedelta(239, unit='h'), td - pd.offsets.Hour(1)) - self.assertEqual(Timedelta(-239, unit='h'), pd.offsets.Hour(1) - td) - - def test_freq_conversion(self): - - td = Timedelta('1 days 2 hours 3 ns') - result = td / np.timedelta64(1, 'D') - self.assertEqual(result, td.value / float(86400 * 1e9)) - result = td / np.timedelta64(1, 's') - self.assertEqual(result, td.value / float(1e9)) - result = td / np.timedelta64(1, 'ns') - self.assertEqual(result, td.value) - - def test_ops_ndarray(self): - td = Timedelta('1 day') - - # timedelta, timedelta - other = pd.to_timedelta(['1 day']).values - expected = pd.to_timedelta(['2 days']).values - self.assert_numpy_array_equal(td + other, expected) - if LooseVersion(np.__version__) >= '1.8': - self.assert_numpy_array_equal(other + td, expected) - self.assertRaises(TypeError, lambda: td + np.array([1])) - self.assertRaises(TypeError, lambda: np.array([1]) + td) - - expected = pd.to_timedelta(['0 days']).values - self.assert_numpy_array_equal(td - other, expected) - if LooseVersion(np.__version__) >= '1.8': - self.assert_numpy_array_equal(-other + td, expected) - self.assertRaises(TypeError, lambda: td - np.array([1])) - self.assertRaises(TypeError, lambda: np.array([1]) - td) - - expected = pd.to_timedelta(['2 days']).values - self.assert_numpy_array_equal(td * np.array([2]), expected) - self.assert_numpy_array_equal(np.array([2]) * td, expected) - self.assertRaises(TypeError, lambda: td * other) - self.assertRaises(TypeError, lambda: other * td) - - self.assert_numpy_array_equal(td / other, - np.array([1], dtype=np.float64)) - if LooseVersion(np.__version__) >= '1.8': - self.assert_numpy_array_equal(other / td, - np.array([1], dtype=np.float64)) - - # timedelta, datetime - other = pd.to_datetime(['2000-01-01']).values - expected = pd.to_datetime(['2000-01-02']).values - self.assert_numpy_array_equal(td + other, expected) - if LooseVersion(np.__version__) >= '1.8': - self.assert_numpy_array_equal(other + td, expected) - - expected = pd.to_datetime(['1999-12-31']).values - self.assert_numpy_array_equal(-td + other, expected) - if LooseVersion(np.__version__) >= '1.8': - self.assert_numpy_array_equal(other - td, expected) - - def test_ops_series(self): - # regression test for GH8813 - td = Timedelta('1 day') - other = pd.Series([1, 2]) - expected = pd.Series(pd.to_timedelta(['1 day', '2 days'])) - tm.assert_series_equal(expected, td * other) - tm.assert_series_equal(expected, other * td) - - def test_ops_series_object(self): - # GH 13043 - s = pd.Series([pd.Timestamp('2015-01-01', tz='US/Eastern'), - pd.Timestamp('2015-01-01', tz='Asia/Tokyo')], - name='xxx') - self.assertEqual(s.dtype, object) - - exp = pd.Series([pd.Timestamp('2015-01-02', tz='US/Eastern'), - pd.Timestamp('2015-01-02', tz='Asia/Tokyo')], - name='xxx') - tm.assert_series_equal(s + pd.Timedelta('1 days'), exp) - tm.assert_series_equal(pd.Timedelta('1 days') + s, exp) - - # object series & object series - s2 = pd.Series([pd.Timestamp('2015-01-03', tz='US/Eastern'), - pd.Timestamp('2015-01-05', tz='Asia/Tokyo')], - name='xxx') - self.assertEqual(s2.dtype, object) - exp = pd.Series([pd.Timedelta('2 days'), pd.Timedelta('4 days')], - name='xxx') - tm.assert_series_equal(s2 - s, exp) - tm.assert_series_equal(s - s2, -exp) - - s = pd.Series([pd.Timedelta('01:00:00'), pd.Timedelta('02:00:00')], - name='xxx', dtype=object) - self.assertEqual(s.dtype, object) - - exp = pd.Series([pd.Timedelta('01:30:00'), pd.Timedelta('02:30:00')], - name='xxx') - tm.assert_series_equal(s + pd.Timedelta('00:30:00'), exp) - tm.assert_series_equal(pd.Timedelta('00:30:00') + s, exp) - - def test_compare_timedelta_series(self): - # regresssion test for GH5963 - s = pd.Series([timedelta(days=1), timedelta(days=2)]) - actual = s > timedelta(days=1) - expected = pd.Series([False, True]) - tm.assert_series_equal(actual, expected) - - def test_compare_timedelta_ndarray(self): - # GH11835 - periods = [Timedelta('0 days 01:00:00'), Timedelta('0 days 01:00:00')] - arr = np.array(periods) - result = arr[0] > arr - expected = np.array([False, False]) - self.assert_numpy_array_equal(result, expected) - - def test_ops_notimplemented(self): - class Other: - pass - - other = Other() - - td = Timedelta('1 day') - self.assertTrue(td.__add__(other) is NotImplemented) - self.assertTrue(td.__sub__(other) is NotImplemented) - self.assertTrue(td.__truediv__(other) is NotImplemented) - self.assertTrue(td.__mul__(other) is NotImplemented) - self.assertTrue(td.__floordiv__(td) is NotImplemented) - - def test_ops_error_str(self): - # GH 13624 - td = Timedelta('1 day') - - for l, r in [(td, 'a'), ('a', td)]: - - with tm.assertRaises(TypeError): - l + r - - with tm.assertRaises(TypeError): - l > r - - self.assertFalse(l == r) - self.assertTrue(l != r) - - def test_fields(self): - def check(value): - # that we are int/long like - self.assertTrue(isinstance(value, (int, compat.long))) - - # compat to datetime.timedelta - rng = to_timedelta('1 days, 10:11:12') - self.assertEqual(rng.days, 1) - self.assertEqual(rng.seconds, 10 * 3600 + 11 * 60 + 12) - self.assertEqual(rng.microseconds, 0) - self.assertEqual(rng.nanoseconds, 0) - - self.assertRaises(AttributeError, lambda: rng.hours) - self.assertRaises(AttributeError, lambda: rng.minutes) - self.assertRaises(AttributeError, lambda: rng.milliseconds) - - # GH 10050 - check(rng.days) - check(rng.seconds) - check(rng.microseconds) - check(rng.nanoseconds) - - td = Timedelta('-1 days, 10:11:12') - self.assertEqual(abs(td), Timedelta('13:48:48')) - self.assertTrue(str(td) == "-1 days +10:11:12") - self.assertEqual(-td, Timedelta('0 days 13:48:48')) - self.assertEqual(-Timedelta('-1 days, 10:11:12').value, 49728000000000) - self.assertEqual(Timedelta('-1 days, 10:11:12').value, -49728000000000) - - rng = to_timedelta('-1 days, 10:11:12.100123456') - self.assertEqual(rng.days, -1) - self.assertEqual(rng.seconds, 10 * 3600 + 11 * 60 + 12) - self.assertEqual(rng.microseconds, 100 * 1000 + 123) - self.assertEqual(rng.nanoseconds, 456) - self.assertRaises(AttributeError, lambda: rng.hours) - self.assertRaises(AttributeError, lambda: rng.minutes) - self.assertRaises(AttributeError, lambda: rng.milliseconds) - - # components - tup = pd.to_timedelta(-1, 'us').components - self.assertEqual(tup.days, -1) - self.assertEqual(tup.hours, 23) - self.assertEqual(tup.minutes, 59) - self.assertEqual(tup.seconds, 59) - self.assertEqual(tup.milliseconds, 999) - self.assertEqual(tup.microseconds, 999) - self.assertEqual(tup.nanoseconds, 0) - - # GH 10050 - check(tup.days) - check(tup.hours) - check(tup.minutes) - check(tup.seconds) - check(tup.milliseconds) - check(tup.microseconds) - check(tup.nanoseconds) - - tup = Timedelta('-1 days 1 us').components - self.assertEqual(tup.days, -2) - self.assertEqual(tup.hours, 23) - self.assertEqual(tup.minutes, 59) - self.assertEqual(tup.seconds, 59) - self.assertEqual(tup.milliseconds, 999) - self.assertEqual(tup.microseconds, 999) - self.assertEqual(tup.nanoseconds, 0) - - def test_timedelta_range(self): - - expected = to_timedelta(np.arange(5), unit='D') - result = timedelta_range('0 days', periods=5, freq='D') - tm.assert_index_equal(result, expected) - - expected = to_timedelta(np.arange(11), unit='D') - result = timedelta_range('0 days', '10 days', freq='D') - tm.assert_index_equal(result, expected) - - expected = to_timedelta(np.arange(5), unit='D') + Second(2) + Day() - result = timedelta_range('1 days, 00:00:02', '5 days, 00:00:02', - freq='D') - tm.assert_index_equal(result, expected) - - expected = to_timedelta([1, 3, 5, 7, 9], unit='D') + Second(2) - result = timedelta_range('1 days, 00:00:02', periods=5, freq='2D') - tm.assert_index_equal(result, expected) - - expected = to_timedelta(np.arange(50), unit='T') * 30 - result = timedelta_range('0 days', freq='30T', periods=50) - tm.assert_index_equal(result, expected) - - # GH 11776 - arr = np.arange(10).reshape(2, 5) - df = pd.DataFrame(np.arange(10).reshape(2, 5)) - for arg in (arr, df): - with tm.assertRaisesRegexp(TypeError, "1-d array"): - to_timedelta(arg) - for errors in ['ignore', 'raise', 'coerce']: - with tm.assertRaisesRegexp(TypeError, "1-d array"): - to_timedelta(arg, errors=errors) - - # issue10583 - df = pd.DataFrame(np.random.normal(size=(10, 4))) - df.index = pd.timedelta_range(start='0s', periods=10, freq='s') - expected = df.loc[pd.Timedelta('0s'):, :] - result = df.loc['0s':, :] - assert_frame_equal(expected, result) - - def test_numeric_conversions(self): - self.assertEqual(ct(0), np.timedelta64(0, 'ns')) - self.assertEqual(ct(10), np.timedelta64(10, 'ns')) - self.assertEqual(ct(10, unit='ns'), np.timedelta64( - 10, 'ns').astype('m8[ns]')) - - self.assertEqual(ct(10, unit='us'), np.timedelta64( - 10, 'us').astype('m8[ns]')) - self.assertEqual(ct(10, unit='ms'), np.timedelta64( - 10, 'ms').astype('m8[ns]')) - self.assertEqual(ct(10, unit='s'), np.timedelta64( - 10, 's').astype('m8[ns]')) - self.assertEqual(ct(10, unit='d'), np.timedelta64( - 10, 'D').astype('m8[ns]')) - - def test_timedelta_conversions(self): - self.assertEqual(ct(timedelta(seconds=1)), - np.timedelta64(1, 's').astype('m8[ns]')) - self.assertEqual(ct(timedelta(microseconds=1)), - np.timedelta64(1, 'us').astype('m8[ns]')) - self.assertEqual(ct(timedelta(days=1)), - np.timedelta64(1, 'D').astype('m8[ns]')) - - def test_short_format_converters(self): - def conv(v): - return v.astype('m8[ns]') - - self.assertEqual(ct('10'), np.timedelta64(10, 'ns')) - self.assertEqual(ct('10ns'), np.timedelta64(10, 'ns')) - self.assertEqual(ct('100'), np.timedelta64(100, 'ns')) - self.assertEqual(ct('100ns'), np.timedelta64(100, 'ns')) - - self.assertEqual(ct('1000'), np.timedelta64(1000, 'ns')) - self.assertEqual(ct('1000ns'), np.timedelta64(1000, 'ns')) - self.assertEqual(ct('1000NS'), np.timedelta64(1000, 'ns')) - - self.assertEqual(ct('10us'), np.timedelta64(10000, 'ns')) - self.assertEqual(ct('100us'), np.timedelta64(100000, 'ns')) - self.assertEqual(ct('1000us'), np.timedelta64(1000000, 'ns')) - self.assertEqual(ct('1000Us'), np.timedelta64(1000000, 'ns')) - self.assertEqual(ct('1000uS'), np.timedelta64(1000000, 'ns')) - - self.assertEqual(ct('1ms'), np.timedelta64(1000000, 'ns')) - self.assertEqual(ct('10ms'), np.timedelta64(10000000, 'ns')) - self.assertEqual(ct('100ms'), np.timedelta64(100000000, 'ns')) - self.assertEqual(ct('1000ms'), np.timedelta64(1000000000, 'ns')) - - self.assertEqual(ct('-1s'), -np.timedelta64(1000000000, 'ns')) - self.assertEqual(ct('1s'), np.timedelta64(1000000000, 'ns')) - self.assertEqual(ct('10s'), np.timedelta64(10000000000, 'ns')) - self.assertEqual(ct('100s'), np.timedelta64(100000000000, 'ns')) - self.assertEqual(ct('1000s'), np.timedelta64(1000000000000, 'ns')) - - self.assertEqual(ct('1d'), conv(np.timedelta64(1, 'D'))) - self.assertEqual(ct('-1d'), -conv(np.timedelta64(1, 'D'))) - self.assertEqual(ct('1D'), conv(np.timedelta64(1, 'D'))) - self.assertEqual(ct('10D'), conv(np.timedelta64(10, 'D'))) - self.assertEqual(ct('100D'), conv(np.timedelta64(100, 'D'))) - self.assertEqual(ct('1000D'), conv(np.timedelta64(1000, 'D'))) - self.assertEqual(ct('10000D'), conv(np.timedelta64(10000, 'D'))) - - # space - self.assertEqual(ct(' 10000D '), conv(np.timedelta64(10000, 'D'))) - self.assertEqual(ct(' - 10000D '), -conv(np.timedelta64(10000, 'D'))) - - # invalid - self.assertRaises(ValueError, ct, '1foo') - self.assertRaises(ValueError, ct, 'foo') - - def test_full_format_converters(self): - def conv(v): - return v.astype('m8[ns]') - - d1 = np.timedelta64(1, 'D') - - self.assertEqual(ct('1days'), conv(d1)) - self.assertEqual(ct('1days,'), conv(d1)) - self.assertEqual(ct('- 1days,'), -conv(d1)) - - self.assertEqual(ct('00:00:01'), conv(np.timedelta64(1, 's'))) - self.assertEqual(ct('06:00:01'), conv( - np.timedelta64(6 * 3600 + 1, 's'))) - self.assertEqual(ct('06:00:01.0'), conv( - np.timedelta64(6 * 3600 + 1, 's'))) - self.assertEqual(ct('06:00:01.01'), conv( - np.timedelta64(1000 * (6 * 3600 + 1) + 10, 'ms'))) - - self.assertEqual(ct('- 1days, 00:00:01'), - conv(-d1 + np.timedelta64(1, 's'))) - self.assertEqual(ct('1days, 06:00:01'), conv( - d1 + np.timedelta64(6 * 3600 + 1, 's'))) - self.assertEqual(ct('1days, 06:00:01.01'), conv( - d1 + np.timedelta64(1000 * (6 * 3600 + 1) + 10, 'ms'))) - - # invalid - self.assertRaises(ValueError, ct, '- 1days, 00') - - def test_nat_converters(self): - self.assertEqual(to_timedelta( - 'nat', box=False).astype('int64'), tslib.iNaT) - self.assertEqual(to_timedelta( - 'nan', box=False).astype('int64'), tslib.iNaT) - - def test_to_timedelta(self): - def conv(v): - return v.astype('m8[ns]') - - d1 = np.timedelta64(1, 'D') - - self.assertEqual(to_timedelta('1 days 06:05:01.00003', box=False), - conv(d1 + np.timedelta64(6 * 3600 + - 5 * 60 + 1, 's') + - np.timedelta64(30, 'us'))) - self.assertEqual(to_timedelta('15.5us', box=False), - conv(np.timedelta64(15500, 'ns'))) - - # empty string - result = to_timedelta('', box=False) - self.assertEqual(result.astype('int64'), tslib.iNaT) - - result = to_timedelta(['', '']) - self.assertTrue(isnull(result).all()) - - # pass thru - result = to_timedelta(np.array([np.timedelta64(1, 's')])) - expected = pd.Index(np.array([np.timedelta64(1, 's')])) - tm.assert_index_equal(result, expected) - - # ints - result = np.timedelta64(0, 'ns') - expected = to_timedelta(0, box=False) - self.assertEqual(result, expected) - - # Series - expected = Series([timedelta(days=1), timedelta(days=1, seconds=1)]) - result = to_timedelta(Series(['1d', '1days 00:00:01'])) - tm.assert_series_equal(result, expected) - - # with units - result = TimedeltaIndex([np.timedelta64(0, 'ns'), np.timedelta64( - 10, 's').astype('m8[ns]')]) - expected = to_timedelta([0, 10], unit='s') - tm.assert_index_equal(result, expected) - - # single element conversion - v = timedelta(seconds=1) - result = to_timedelta(v, box=False) - expected = np.timedelta64(timedelta(seconds=1)) - self.assertEqual(result, expected) - - v = np.timedelta64(timedelta(seconds=1)) - result = to_timedelta(v, box=False) - expected = np.timedelta64(timedelta(seconds=1)) - self.assertEqual(result, expected) - - # arrays of various dtypes - arr = np.array([1] * 5, dtype='int64') - result = to_timedelta(arr, unit='s') - expected = TimedeltaIndex([np.timedelta64(1, 's')] * 5) - tm.assert_index_equal(result, expected) - - arr = np.array([1] * 5, dtype='int64') - result = to_timedelta(arr, unit='m') - expected = TimedeltaIndex([np.timedelta64(1, 'm')] * 5) - tm.assert_index_equal(result, expected) - - arr = np.array([1] * 5, dtype='int64') - result = to_timedelta(arr, unit='h') - expected = TimedeltaIndex([np.timedelta64(1, 'h')] * 5) - tm.assert_index_equal(result, expected) - - arr = np.array([1] * 5, dtype='timedelta64[s]') - result = to_timedelta(arr) - expected = TimedeltaIndex([np.timedelta64(1, 's')] * 5) - tm.assert_index_equal(result, expected) - - arr = np.array([1] * 5, dtype='timedelta64[D]') - result = to_timedelta(arr) - expected = TimedeltaIndex([np.timedelta64(1, 'D')] * 5) - tm.assert_index_equal(result, expected) - - # Test with lists as input when box=false - expected = np.array(np.arange(3) * 1000000000, dtype='timedelta64[ns]') - result = to_timedelta(range(3), unit='s', box=False) - tm.assert_numpy_array_equal(expected, result) - - result = to_timedelta(np.arange(3), unit='s', box=False) - tm.assert_numpy_array_equal(expected, result) - - result = to_timedelta([0, 1, 2], unit='s', box=False) - tm.assert_numpy_array_equal(expected, result) - - # Tests with fractional seconds as input: - expected = np.array( - [0, 500000000, 800000000, 1200000000], dtype='timedelta64[ns]') - result = to_timedelta([0., 0.5, 0.8, 1.2], unit='s', box=False) - tm.assert_numpy_array_equal(expected, result) - - def testit(unit, transform): - - # array - result = to_timedelta(np.arange(5), unit=unit) - expected = TimedeltaIndex([np.timedelta64(i, transform(unit)) - for i in np.arange(5).tolist()]) - tm.assert_index_equal(result, expected) - - # scalar - result = to_timedelta(2, unit=unit) - expected = Timedelta(np.timedelta64(2, transform(unit)).astype( - 'timedelta64[ns]')) - self.assertEqual(result, expected) - - # validate all units - # GH 6855 - for unit in ['Y', 'M', 'W', 'D', 'y', 'w', 'd']: - testit(unit, lambda x: x.upper()) - for unit in ['days', 'day', 'Day', 'Days']: - testit(unit, lambda x: 'D') - for unit in ['h', 'm', 's', 'ms', 'us', 'ns', 'H', 'S', 'MS', 'US', - 'NS']: - testit(unit, lambda x: x.lower()) - - # offsets - - # m - testit('T', lambda x: 'm') - - # ms - testit('L', lambda x: 'ms') - - def test_to_timedelta_invalid(self): - - # bad value for errors parameter - msg = "errors must be one of" - tm.assertRaisesRegexp(ValueError, msg, to_timedelta, - ['foo'], errors='never') - - # these will error - self.assertRaises(ValueError, lambda: to_timedelta([1, 2], unit='foo')) - self.assertRaises(ValueError, lambda: to_timedelta(1, unit='foo')) - - # time not supported ATM - self.assertRaises(ValueError, lambda: to_timedelta(time(second=1))) - self.assertTrue(to_timedelta( - time(second=1), errors='coerce') is pd.NaT) - - self.assertRaises(ValueError, lambda: to_timedelta(['foo', 'bar'])) - tm.assert_index_equal(TimedeltaIndex([pd.NaT, pd.NaT]), - to_timedelta(['foo', 'bar'], errors='coerce')) - - tm.assert_index_equal(TimedeltaIndex(['1 day', pd.NaT, '1 min']), - to_timedelta(['1 day', 'bar', '1 min'], - errors='coerce')) - - # gh-13613: these should not error because errors='ignore' - invalid_data = 'apple' - self.assertEqual(invalid_data, to_timedelta( - invalid_data, errors='ignore')) - - invalid_data = ['apple', '1 days'] - tm.assert_numpy_array_equal( - np.array(invalid_data, dtype=object), - to_timedelta(invalid_data, errors='ignore')) - - invalid_data = pd.Index(['apple', '1 days']) - tm.assert_index_equal(invalid_data, to_timedelta( - invalid_data, errors='ignore')) - - invalid_data = Series(['apple', '1 days']) - tm.assert_series_equal(invalid_data, to_timedelta( - invalid_data, errors='ignore')) - - def test_to_timedelta_via_apply(self): - # GH 5458 - expected = Series([np.timedelta64(1, 's')]) - result = Series(['00:00:01']).apply(to_timedelta) - tm.assert_series_equal(result, expected) - - result = Series([to_timedelta('00:00:01')]) - tm.assert_series_equal(result, expected) - - def test_timedelta_ops(self): - # GH4984 - # make sure ops return Timedelta - s = Series([Timestamp('20130101') + timedelta(seconds=i * i) - for i in range(10)]) - td = s.diff() - - result = td.mean() - expected = to_timedelta(timedelta(seconds=9)) - self.assertEqual(result, expected) - - result = td.to_frame().mean() - self.assertEqual(result[0], expected) - - result = td.quantile(.1) - expected = Timedelta(np.timedelta64(2600, 'ms')) - self.assertEqual(result, expected) - - result = td.median() - expected = to_timedelta('00:00:09') - self.assertEqual(result, expected) - - result = td.to_frame().median() - self.assertEqual(result[0], expected) - - # GH 6462 - # consistency in returned values for sum - result = td.sum() - expected = to_timedelta('00:01:21') - self.assertEqual(result, expected) - - result = td.to_frame().sum() - self.assertEqual(result[0], expected) - - # std - result = td.std() - expected = to_timedelta(Series(td.dropna().values).std()) - self.assertEqual(result, expected) - - result = td.to_frame().std() - self.assertEqual(result[0], expected) - - # invalid ops - for op in ['skew', 'kurt', 'sem', 'prod']: - self.assertRaises(TypeError, getattr(td, op)) - - # GH 10040 - # make sure NaT is properly handled by median() - s = Series([Timestamp('2015-02-03'), Timestamp('2015-02-07')]) - self.assertEqual(s.diff().median(), timedelta(days=4)) - - s = Series([Timestamp('2015-02-03'), Timestamp('2015-02-07'), - Timestamp('2015-02-15')]) - self.assertEqual(s.diff().median(), timedelta(days=6)) - - def test_overflow(self): - # GH 9442 - s = Series(pd.date_range('20130101', periods=100000, freq='H')) - s[0] += pd.Timedelta('1s 1ms') - - # mean - result = (s - s.min()).mean() - expected = pd.Timedelta((pd.DatetimeIndex((s - s.min())).asi8 / len(s) - ).sum()) - - # the computation is converted to float so might be some loss of - # precision - self.assertTrue(np.allclose(result.value / 1000, expected.value / - 1000)) - - # sum - self.assertRaises(ValueError, lambda: (s - s.min()).sum()) - s1 = s[0:10000] - self.assertRaises(ValueError, lambda: (s1 - s1.min()).sum()) - s2 = s[0:1000] - result = (s2 - s2.min()).sum() - - def test_overflow_on_construction(self): - # xref https://github.com/statsmodels/statsmodels/issues/3374 - value = pd.Timedelta('1day').value * 20169940 - self.assertRaises(OverflowError, pd.Timedelta, value) - - def test_timedelta_ops_scalar(self): - # GH 6808 - base = pd.to_datetime('20130101 09:01:12.123456') - expected_add = pd.to_datetime('20130101 09:01:22.123456') - expected_sub = pd.to_datetime('20130101 09:01:02.123456') - - for offset in [pd.to_timedelta(10, unit='s'), timedelta(seconds=10), - np.timedelta64(10, 's'), - np.timedelta64(10000000000, 'ns'), - pd.offsets.Second(10)]: - result = base + offset - self.assertEqual(result, expected_add) - - result = base - offset - self.assertEqual(result, expected_sub) - - base = pd.to_datetime('20130102 09:01:12.123456') - expected_add = pd.to_datetime('20130103 09:01:22.123456') - expected_sub = pd.to_datetime('20130101 09:01:02.123456') - - for offset in [pd.to_timedelta('1 day, 00:00:10'), - pd.to_timedelta('1 days, 00:00:10'), - timedelta(days=1, seconds=10), - np.timedelta64(1, 'D') + np.timedelta64(10, 's'), - pd.offsets.Day() + pd.offsets.Second(10)]: - result = base + offset - self.assertEqual(result, expected_add) - - result = base - offset - self.assertEqual(result, expected_sub) - - def test_to_timedelta_on_missing_values(self): - # GH5438 - timedelta_NaT = np.timedelta64('NaT') - - actual = pd.to_timedelta(Series(['00:00:01', np.nan])) - expected = Series([np.timedelta64(1000000000, 'ns'), - timedelta_NaT], dtype=' idx1 - expected = np.array([True, False, False, False, True, False]) - self.assert_numpy_array_equal(result, expected) - - result = idx1 <= idx2 - expected = np.array([True, False, False, False, True, True]) - self.assert_numpy_array_equal(result, expected) - - result = idx2 >= idx1 - expected = np.array([True, False, False, False, True, True]) - self.assert_numpy_array_equal(result, expected) - - result = idx1 == idx2 - expected = np.array([False, False, False, False, False, True]) - self.assert_numpy_array_equal(result, expected) - - result = idx1 != idx2 - expected = np.array([True, True, True, True, True, False]) - self.assert_numpy_array_equal(result, expected) - - def test_ops_error_str(self): - # GH 13624 - tdi = TimedeltaIndex(['1 day', '2 days']) - - for l, r in [(tdi, 'a'), ('a', tdi)]: - with tm.assertRaises(TypeError): - l + r - - with tm.assertRaises(TypeError): - l > r - - with tm.assertRaises(TypeError): - l == r - - with tm.assertRaises(TypeError): - l != r - - def test_map(self): - - rng = timedelta_range('1 day', periods=10) - - f = lambda x: x.days - result = rng.map(f) - exp = Int64Index([f(x) for x in rng]) - tm.assert_index_equal(result, exp) - - def test_misc_coverage(self): - - rng = timedelta_range('1 day', periods=5) - result = rng.groupby(rng.days) - tm.assertIsInstance(list(result.values())[0][0], Timedelta) - - idx = TimedeltaIndex(['3d', '1d', '2d']) - self.assertFalse(idx.equals(list(idx))) - - non_td = Index(list('abc')) - self.assertFalse(idx.equals(list(non_td))) - - def test_union(self): - - i1 = timedelta_range('1day', periods=5) - i2 = timedelta_range('3day', periods=5) - result = i1.union(i2) - expected = timedelta_range('1day', periods=7) - self.assert_index_equal(result, expected) - - i1 = Int64Index(np.arange(0, 20, 2)) - i2 = TimedeltaIndex(start='1 day', periods=10, freq='D') - i1.union(i2) # Works - i2.union(i1) # Fails with "AttributeError: can't set attribute" - - def test_union_coverage(self): - - idx = TimedeltaIndex(['3d', '1d', '2d']) - ordered = TimedeltaIndex(idx.sort_values(), freq='infer') - result = ordered.union(idx) - self.assert_index_equal(result, ordered) - - result = ordered[:0].union(ordered) - self.assert_index_equal(result, ordered) - self.assertEqual(result.freq, ordered.freq) - - def test_union_bug_1730(self): - - rng_a = timedelta_range('1 day', periods=4, freq='3H') - rng_b = timedelta_range('1 day', periods=4, freq='4H') - - result = rng_a.union(rng_b) - exp = TimedeltaIndex(sorted(set(list(rng_a)) | set(list(rng_b)))) - self.assert_index_equal(result, exp) - - def test_union_bug_1745(self): - - left = TimedeltaIndex(['1 day 15:19:49.695000']) - right = TimedeltaIndex(['2 day 13:04:21.322000', - '1 day 15:27:24.873000', - '1 day 15:31:05.350000']) - - result = left.union(right) - exp = TimedeltaIndex(sorted(set(list(left)) | set(list(right)))) - self.assert_index_equal(result, exp) - - def test_union_bug_4564(self): - - left = timedelta_range("1 day", "30d") - right = left + pd.offsets.Minute(15) - - result = left.union(right) - exp = TimedeltaIndex(sorted(set(list(left)) | set(list(right)))) - self.assert_index_equal(result, exp) - - def test_intersection_bug_1708(self): - index_1 = timedelta_range('1 day', periods=4, freq='h') - index_2 = index_1 + pd.offsets.Hour(5) - - result = index_1 & index_2 - self.assertEqual(len(result), 0) - - index_1 = timedelta_range('1 day', periods=4, freq='h') - index_2 = index_1 + pd.offsets.Hour(1) - - result = index_1 & index_2 - expected = timedelta_range('1 day 01:00:00', periods=3, freq='h') - tm.assert_index_equal(result, expected) - - def test_get_duplicates(self): - idx = TimedeltaIndex(['1 day', '2 day', '2 day', '3 day', '3day', - '4day']) - - result = idx.get_duplicates() - ex = TimedeltaIndex(['2 day', '3day']) - self.assert_index_equal(result, ex) - - def test_argmin_argmax(self): - idx = TimedeltaIndex(['1 day 00:00:05', '1 day 00:00:01', - '1 day 00:00:02']) - self.assertEqual(idx.argmin(), 1) - self.assertEqual(idx.argmax(), 0) - - def test_sort_values(self): - - idx = TimedeltaIndex(['4d', '1d', '2d']) - - ordered = idx.sort_values() - self.assertTrue(ordered.is_monotonic) - - ordered = idx.sort_values(ascending=False) - self.assertTrue(ordered[::-1].is_monotonic) - - ordered, dexer = idx.sort_values(return_indexer=True) - self.assertTrue(ordered.is_monotonic) - self.assert_numpy_array_equal(dexer, - np.array([1, 2, 0]), - check_dtype=False) - - ordered, dexer = idx.sort_values(return_indexer=True, ascending=False) - self.assertTrue(ordered[::-1].is_monotonic) - self.assert_numpy_array_equal(dexer, - np.array([0, 2, 1]), - check_dtype=False) - - def test_insert(self): - - idx = TimedeltaIndex(['4day', '1day', '2day'], name='idx') - - result = idx.insert(2, timedelta(days=5)) - exp = TimedeltaIndex(['4day', '1day', '5day', '2day'], name='idx') - self.assert_index_equal(result, exp) - - # insertion of non-datetime should coerce to object index - result = idx.insert(1, 'inserted') - expected = Index([Timedelta('4day'), 'inserted', Timedelta('1day'), - Timedelta('2day')], name='idx') - self.assertNotIsInstance(result, TimedeltaIndex) - tm.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - - idx = timedelta_range('1day 00:00:01', periods=3, freq='s', name='idx') - - # preserve freq - expected_0 = TimedeltaIndex(['1day', '1day 00:00:01', '1day 00:00:02', - '1day 00:00:03'], - name='idx', freq='s') - expected_3 = TimedeltaIndex(['1day 00:00:01', '1day 00:00:02', - '1day 00:00:03', '1day 00:00:04'], - name='idx', freq='s') - - # reset freq to None - expected_1_nofreq = TimedeltaIndex(['1day 00:00:01', '1day 00:00:01', - '1day 00:00:02', '1day 00:00:03'], - name='idx', freq=None) - expected_3_nofreq = TimedeltaIndex(['1day 00:00:01', '1day 00:00:02', - '1day 00:00:03', '1day 00:00:05'], - name='idx', freq=None) - - cases = [(0, Timedelta('1day'), expected_0), - (-3, Timedelta('1day'), expected_0), - (3, Timedelta('1day 00:00:04'), expected_3), - (1, Timedelta('1day 00:00:01'), expected_1_nofreq), - (3, Timedelta('1day 00:00:05'), expected_3_nofreq)] - - for n, d, expected in cases: - result = idx.insert(n, d) - self.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(result.freq, expected.freq) - - def test_delete(self): - idx = timedelta_range(start='1 Days', periods=5, freq='D', name='idx') - - # prserve freq - expected_0 = timedelta_range(start='2 Days', periods=4, freq='D', - name='idx') - expected_4 = timedelta_range(start='1 Days', periods=4, freq='D', - name='idx') - - # reset freq to None - expected_1 = TimedeltaIndex( - ['1 day', '3 day', '4 day', '5 day'], freq=None, name='idx') - - cases = {0: expected_0, - -5: expected_0, - -1: expected_4, - 4: expected_4, - 1: expected_1} - for n, expected in compat.iteritems(cases): - result = idx.delete(n) - self.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(result.freq, expected.freq) - - with tm.assertRaises((IndexError, ValueError)): - # either depeidnig on numpy version - result = idx.delete(5) - - def test_delete_slice(self): - idx = timedelta_range(start='1 days', periods=10, freq='D', name='idx') - - # prserve freq - expected_0_2 = timedelta_range(start='4 days', periods=7, freq='D', - name='idx') - expected_7_9 = timedelta_range(start='1 days', periods=7, freq='D', - name='idx') - - # reset freq to None - expected_3_5 = TimedeltaIndex(['1 d', '2 d', '3 d', - '7 d', '8 d', '9 d', '10d'], - freq=None, name='idx') - - cases = {(0, 1, 2): expected_0_2, - (7, 8, 9): expected_7_9, - (3, 4, 5): expected_3_5} - for n, expected in compat.iteritems(cases): - result = idx.delete(n) - self.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(result.freq, expected.freq) - - result = idx.delete(slice(n[0], n[-1] + 1)) - self.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(result.freq, expected.freq) - - def test_take(self): - - tds = ['1day 02:00:00', '1 day 04:00:00', '1 day 10:00:00'] - idx = TimedeltaIndex(start='1d', end='2d', freq='H', name='idx') - expected = TimedeltaIndex(tds, freq=None, name='idx') - - taken1 = idx.take([2, 4, 10]) - taken2 = idx[[2, 4, 10]] - - for taken in [taken1, taken2]: - self.assert_index_equal(taken, expected) - tm.assertIsInstance(taken, TimedeltaIndex) - self.assertIsNone(taken.freq) - self.assertEqual(taken.name, expected.name) - - def test_take_fill_value(self): - # GH 12631 - idx = pd.TimedeltaIndex(['1 days', '2 days', '3 days'], - name='xxx') - result = idx.take(np.array([1, 0, -1])) - expected = pd.TimedeltaIndex(['2 days', '1 days', '3 days'], - name='xxx') - tm.assert_index_equal(result, expected) - - # fill_value - result = idx.take(np.array([1, 0, -1]), fill_value=True) - expected = pd.TimedeltaIndex(['2 days', '1 days', 'NaT'], - name='xxx') - tm.assert_index_equal(result, expected) - - # allow_fill=False - result = idx.take(np.array([1, 0, -1]), allow_fill=False, - fill_value=True) - expected = pd.TimedeltaIndex(['2 days', '1 days', '3 days'], - name='xxx') - tm.assert_index_equal(result, expected) - - msg = ('When allow_fill=True and fill_value is not None, ' - 'all indices must be >= -1') - with tm.assertRaisesRegexp(ValueError, msg): - idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assertRaisesRegexp(ValueError, msg): - idx.take(np.array([1, 0, -5]), fill_value=True) - - with tm.assertRaises(IndexError): - idx.take(np.array([1, -5])) - - def test_isin(self): - - index = tm.makeTimedeltaIndex(4) - result = index.isin(index) - self.assertTrue(result.all()) - - result = index.isin(list(index)) - self.assertTrue(result.all()) - - assert_almost_equal(index.isin([index[2], 5]), - np.array([False, False, True, False])) - - def test_does_not_convert_mixed_integer(self): - df = tm.makeCustomDataframe(10, 10, - data_gen_f=lambda *args, **kwargs: randn(), - r_idx_type='i', c_idx_type='td') - str(df) - - cols = df.columns.join(df.index, how='outer') - joined = cols.join(df.columns) - self.assertEqual(cols.dtype, np.dtype('O')) - self.assertEqual(cols.dtype, joined.dtype) - tm.assert_index_equal(cols, joined) - - def test_slice_keeps_name(self): - - # GH4226 - dr = pd.timedelta_range('1d', '5d', freq='H', name='timebucket') - self.assertEqual(dr[1:].name, dr.name) - - def test_join_self(self): - - index = timedelta_range('1 day', periods=10) - kinds = 'outer', 'inner', 'left', 'right' - for kind in kinds: - joined = index.join(index, how=kind) - tm.assert_index_equal(index, joined) - - def test_factorize(self): - idx1 = TimedeltaIndex(['1 day', '1 day', '2 day', '2 day', '3 day', - '3 day']) - - exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.intp) - exp_idx = TimedeltaIndex(['1 day', '2 day', '3 day']) - - arr, idx = idx1.factorize() - self.assert_numpy_array_equal(arr, exp_arr) - self.assert_index_equal(idx, exp_idx) - - arr, idx = idx1.factorize(sort=True) - self.assert_numpy_array_equal(arr, exp_arr) - self.assert_index_equal(idx, exp_idx) - - # freq must be preserved - idx3 = timedelta_range('1 day', periods=4, freq='s') - exp_arr = np.array([0, 1, 2, 3], dtype=np.intp) - arr, idx = idx3.factorize() - self.assert_numpy_array_equal(arr, exp_arr) - self.assert_index_equal(idx, idx3) - - -class TestSlicing(tm.TestCase): - - def test_partial_slice(self): - rng = timedelta_range('1 day 10:11:12', freq='h', periods=500) - s = Series(np.arange(len(rng)), index=rng) - - result = s['5 day':'6 day'] - expected = s.iloc[86:134] - assert_series_equal(result, expected) - - result = s['5 day':] - expected = s.iloc[86:] - assert_series_equal(result, expected) - - result = s[:'6 day'] - expected = s.iloc[:134] - assert_series_equal(result, expected) - - result = s['6 days, 23:11:12'] - self.assertEqual(result, s.iloc[133]) - - self.assertRaises(KeyError, s.__getitem__, '50 days') - - def test_partial_slice_high_reso(self): - - # higher reso - rng = timedelta_range('1 day 10:11:12', freq='us', periods=2000) - s = Series(np.arange(len(rng)), index=rng) - - result = s['1 day 10:11:12':] - expected = s.iloc[0:] - assert_series_equal(result, expected) - - result = s['1 day 10:11:12.001':] - expected = s.iloc[1000:] - assert_series_equal(result, expected) - - result = s['1 days, 10:11:12.001001'] - self.assertEqual(result, s.iloc[1001]) - - def test_slice_with_negative_step(self): - ts = Series(np.arange(20), timedelta_range('0', periods=20, freq='H')) - SLC = pd.IndexSlice - - def assert_slices_equivalent(l_slc, i_slc): - assert_series_equal(ts[l_slc], ts.iloc[i_slc]) - assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc]) - assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc]) - - assert_slices_equivalent(SLC[Timedelta(hours=7)::-1], SLC[7::-1]) - assert_slices_equivalent(SLC['7 hours'::-1], SLC[7::-1]) - - assert_slices_equivalent(SLC[:Timedelta(hours=7):-1], SLC[:6:-1]) - assert_slices_equivalent(SLC[:'7 hours':-1], SLC[:6:-1]) - - assert_slices_equivalent(SLC['15 hours':'7 hours':-1], SLC[15:6:-1]) - assert_slices_equivalent(SLC[Timedelta(hours=15):Timedelta(hours=7):- - 1], SLC[15:6:-1]) - assert_slices_equivalent(SLC['15 hours':Timedelta(hours=7):-1], - SLC[15:6:-1]) - assert_slices_equivalent(SLC[Timedelta(hours=15):'7 hours':-1], - SLC[15:6:-1]) - - assert_slices_equivalent(SLC['7 hours':'15 hours':-1], SLC[:0]) - - def test_slice_with_zero_step_raises(self): - ts = Series(np.arange(20), timedelta_range('0', periods=20, freq='H')) - self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: ts[::0]) - self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: ts.loc[::0]) - self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: ts.loc[::0]) - - def test_tdi_ops_attributes(self): - rng = timedelta_range('2 days', periods=5, freq='2D', name='x') - - result = rng + 1 - exp = timedelta_range('4 days', periods=5, freq='2D', name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, '2D') - - result = rng - 2 - exp = timedelta_range('-2 days', periods=5, freq='2D', name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, '2D') - - result = rng * 2 - exp = timedelta_range('4 days', periods=5, freq='4D', name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, '4D') - - result = rng / 2 - exp = timedelta_range('1 days', periods=5, freq='D', name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, 'D') - - result = -rng - exp = timedelta_range('-2 days', periods=5, freq='-2D', name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, '-2D') - - rng = pd.timedelta_range('-2 days', periods=5, freq='D', name='x') - - result = abs(rng) - exp = TimedeltaIndex(['2 days', '1 days', '0 days', '1 days', - '2 days'], name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, None) - - def test_add_overflow(self): - # see gh-14068 - msg = "too (big|large) to convert" - with tm.assertRaisesRegexp(OverflowError, msg): - to_timedelta(106580, 'D') + Timestamp('2000') - with tm.assertRaisesRegexp(OverflowError, msg): - Timestamp('2000') + to_timedelta(106580, 'D') - - _NaT = int(pd.NaT) + 1 - msg = "Overflow in int64 addition" - with tm.assertRaisesRegexp(OverflowError, msg): - to_timedelta([106580], 'D') + Timestamp('2000') - with tm.assertRaisesRegexp(OverflowError, msg): - Timestamp('2000') + to_timedelta([106580], 'D') - with tm.assertRaisesRegexp(OverflowError, msg): - to_timedelta([_NaT]) - Timedelta('1 days') - with tm.assertRaisesRegexp(OverflowError, msg): - to_timedelta(['5 days', _NaT]) - Timedelta('1 days') - with tm.assertRaisesRegexp(OverflowError, msg): - (to_timedelta([_NaT, '5 days', '1 hours']) - - to_timedelta(['7 seconds', _NaT, '4 hours'])) - - # These should not overflow! - exp = TimedeltaIndex([pd.NaT]) - result = to_timedelta([pd.NaT]) - Timedelta('1 days') - tm.assert_index_equal(result, exp) - - exp = TimedeltaIndex(['4 days', pd.NaT]) - result = to_timedelta(['5 days', pd.NaT]) - Timedelta('1 days') - tm.assert_index_equal(result, exp) - - exp = TimedeltaIndex([pd.NaT, pd.NaT, '5 hours']) - result = (to_timedelta([pd.NaT, '5 days', '1 hours']) + - to_timedelta(['7 seconds', pd.NaT, '4 hours'])) - tm.assert_index_equal(result, exp) diff --git a/pandas/tseries/tests/test_timezones.py b/pandas/tseries/tests/test_timezones.py index 38cd8079faf93..771fb2f50c410 100644 --- a/pandas/tseries/tests/test_timezones.py +++ b/pandas/tseries/tests/test_timezones.py @@ -1,23 +1,20 @@ # pylint: disable-msg=E1101,W0612 -from datetime import datetime, timedelta, tzinfo, date -import numpy as np import pytz +import numpy as np from distutils.version import LooseVersion -from pandas.types.dtypes import DatetimeTZDtype -from pandas import (Index, Series, DataFrame, isnull, Timestamp) - -from pandas import DatetimeIndex, to_datetime, NaT -from pandas import tslib - -import pandas.tseries.offsets as offsets -from pandas.tseries.index import bdate_range, date_range -import pandas.tseries.tools as tools +from datetime import datetime, timedelta, tzinfo, date from pytz import NonExistentTimeError import pandas.util.testing as tm +import pandas.tseries.tools as tools +import pandas.tseries.offsets as offsets +from pandas.compat import lrange, zip +from pandas.tseries.index import bdate_range, date_range +from pandas.types.dtypes import DatetimeTZDtype +from pandas import (Index, Series, DataFrame, isnull, Timestamp, tslib, NaT, + DatetimeIndex, to_datetime) from pandas.util.testing import (assert_frame_equal, assert_series_equal, set_timezone) -from pandas.compat import lrange, zip try: import pytz # noqa @@ -1679,3 +1676,52 @@ def test_nat(self): idx = idx.tz_convert('US/Eastern') expected = ['2010-12-01 11:00', '2010-12-02 11:00', NaT] self.assert_index_equal(idx, DatetimeIndex(expected, tz='US/Eastern')) + + +class TestTslib(tm.TestCase): + + def test_tslib_tz_convert(self): + def compare_utc_to_local(tz_didx, utc_didx): + f = lambda x: tslib.tz_convert_single(x, 'UTC', tz_didx.tz) + result = tslib.tz_convert(tz_didx.asi8, 'UTC', tz_didx.tz) + result_single = np.vectorize(f)(tz_didx.asi8) + self.assert_numpy_array_equal(result, result_single) + + def compare_local_to_utc(tz_didx, utc_didx): + f = lambda x: tslib.tz_convert_single(x, tz_didx.tz, 'UTC') + result = tslib.tz_convert(utc_didx.asi8, tz_didx.tz, 'UTC') + result_single = np.vectorize(f)(utc_didx.asi8) + self.assert_numpy_array_equal(result, result_single) + + for tz in ['UTC', 'Asia/Tokyo', 'US/Eastern', 'Europe/Moscow']: + # US: 2014-03-09 - 2014-11-11 + # MOSCOW: 2014-10-26 / 2014-12-31 + tz_didx = date_range('2014-03-01', '2015-01-10', freq='H', tz=tz) + utc_didx = date_range('2014-03-01', '2015-01-10', freq='H') + compare_utc_to_local(tz_didx, utc_didx) + # local tz to UTC can be differ in hourly (or higher) freqs because + # of DST + compare_local_to_utc(tz_didx, utc_didx) + + tz_didx = date_range('2000-01-01', '2020-01-01', freq='D', tz=tz) + utc_didx = date_range('2000-01-01', '2020-01-01', freq='D') + compare_utc_to_local(tz_didx, utc_didx) + compare_local_to_utc(tz_didx, utc_didx) + + tz_didx = date_range('2000-01-01', '2100-01-01', freq='A', tz=tz) + utc_didx = date_range('2000-01-01', '2100-01-01', freq='A') + compare_utc_to_local(tz_didx, utc_didx) + compare_local_to_utc(tz_didx, utc_didx) + + # Check empty array + result = tslib.tz_convert(np.array([], dtype=np.int64), + tslib.maybe_get_tz('US/Eastern'), + tslib.maybe_get_tz('Asia/Tokyo')) + self.assert_numpy_array_equal(result, np.array([], dtype=np.int64)) + + # Check all-NaT array + result = tslib.tz_convert(np.array([tslib.iNaT], dtype=np.int64), + tslib.maybe_get_tz('US/Eastern'), + tslib.maybe_get_tz('Asia/Tokyo')) + self.assert_numpy_array_equal(result, np.array( + [tslib.iNaT], dtype=np.int64)) diff --git a/pandas/tseries/tests/test_tslib.py b/pandas/tseries/tests/test_tslib.py deleted file mode 100644 index a141d445e6035..0000000000000 --- a/pandas/tseries/tests/test_tslib.py +++ /dev/null @@ -1,694 +0,0 @@ -import datetime -import numpy as np -from distutils.version import LooseVersion - -import pandas as pd -import pandas.util.testing as tm -from pandas import tslib, lib, compat -from pandas.tseries import offsets, tools -from pandas.tseries.frequencies import get_freq -from pandas.tseries.index import date_range, DatetimeIndex -from pandas.util.testing import _skip_if_has_locale -from pandas._period import period_ordinal, period_asfreq -from pandas.compat.numpy import np_array_datetime64_compat -from pandas.core.api import Timestamp, to_datetime, Index, Series - - -class TestTsUtil(tm.TestCase): - - def test_try_parse_dates(self): - from dateutil.parser import parse - arr = np.array(['5/1/2000', '6/1/2000', '7/1/2000'], dtype=object) - - result = lib.try_parse_dates(arr, dayfirst=True) - expected = [parse(d, dayfirst=True) for d in arr] - self.assertTrue(np.array_equal(result, expected)) - - def test_min_valid(self): - # Ensure that Timestamp.min is a valid Timestamp - Timestamp(Timestamp.min) - - def test_max_valid(self): - # Ensure that Timestamp.max is a valid Timestamp - Timestamp(Timestamp.max) - - def test_to_datetime_bijective(self): - # Ensure that converting to datetime and back only loses precision - # by going from nanoseconds to microseconds. - exp_warning = None if Timestamp.max.nanosecond == 0 else UserWarning - with tm.assert_produces_warning(exp_warning, check_stacklevel=False): - self.assertEqual( - Timestamp(Timestamp.max.to_pydatetime()).value / 1000, - Timestamp.max.value / 1000) - - exp_warning = None if Timestamp.min.nanosecond == 0 else UserWarning - with tm.assert_produces_warning(exp_warning, check_stacklevel=False): - self.assertEqual( - Timestamp(Timestamp.min.to_pydatetime()).value / 1000, - Timestamp.min.value / 1000) - - -class TestDatetimeParsingWrappers(tm.TestCase): - - def test_does_not_convert_mixed_integer(self): - bad_date_strings = ('-50000', '999', '123.1234', 'm', 'T') - - for bad_date_string in bad_date_strings: - self.assertFalse(tslib._does_string_look_like_datetime( - bad_date_string)) - - good_date_strings = ('2012-01-01', - '01/01/2012', - 'Mon Sep 16, 2013', - '01012012', - '0101', - '1-1', ) - - for good_date_string in good_date_strings: - self.assertTrue(tslib._does_string_look_like_datetime( - good_date_string)) - - def test_parsers(self): - - # https://github.com/dateutil/dateutil/issues/217 - import dateutil - yearfirst = dateutil.__version__ >= LooseVersion('2.5.0') - - cases = {'2011-01-01': datetime.datetime(2011, 1, 1), - '2Q2005': datetime.datetime(2005, 4, 1), - '2Q05': datetime.datetime(2005, 4, 1), - '2005Q1': datetime.datetime(2005, 1, 1), - '05Q1': datetime.datetime(2005, 1, 1), - '2011Q3': datetime.datetime(2011, 7, 1), - '11Q3': datetime.datetime(2011, 7, 1), - '3Q2011': datetime.datetime(2011, 7, 1), - '3Q11': datetime.datetime(2011, 7, 1), - - # quarterly without space - '2000Q4': datetime.datetime(2000, 10, 1), - '00Q4': datetime.datetime(2000, 10, 1), - '4Q2000': datetime.datetime(2000, 10, 1), - '4Q00': datetime.datetime(2000, 10, 1), - '2000q4': datetime.datetime(2000, 10, 1), - '2000-Q4': datetime.datetime(2000, 10, 1), - '00-Q4': datetime.datetime(2000, 10, 1), - '4Q-2000': datetime.datetime(2000, 10, 1), - '4Q-00': datetime.datetime(2000, 10, 1), - '00q4': datetime.datetime(2000, 10, 1), - '2005': datetime.datetime(2005, 1, 1), - '2005-11': datetime.datetime(2005, 11, 1), - '2005 11': datetime.datetime(2005, 11, 1), - '11-2005': datetime.datetime(2005, 11, 1), - '11 2005': datetime.datetime(2005, 11, 1), - '200511': datetime.datetime(2020, 5, 11), - '20051109': datetime.datetime(2005, 11, 9), - '20051109 10:15': datetime.datetime(2005, 11, 9, 10, 15), - '20051109 08H': datetime.datetime(2005, 11, 9, 8, 0), - '2005-11-09 10:15': datetime.datetime(2005, 11, 9, 10, 15), - '2005-11-09 08H': datetime.datetime(2005, 11, 9, 8, 0), - '2005/11/09 10:15': datetime.datetime(2005, 11, 9, 10, 15), - '2005/11/09 08H': datetime.datetime(2005, 11, 9, 8, 0), - "Thu Sep 25 10:36:28 2003": datetime.datetime(2003, 9, 25, 10, - 36, 28), - "Thu Sep 25 2003": datetime.datetime(2003, 9, 25), - "Sep 25 2003": datetime.datetime(2003, 9, 25), - "January 1 2014": datetime.datetime(2014, 1, 1), - - # GH 10537 - '2014-06': datetime.datetime(2014, 6, 1), - '06-2014': datetime.datetime(2014, 6, 1), - '2014-6': datetime.datetime(2014, 6, 1), - '6-2014': datetime.datetime(2014, 6, 1), - - '20010101 12': datetime.datetime(2001, 1, 1, 12), - '20010101 1234': datetime.datetime(2001, 1, 1, 12, 34), - '20010101 123456': datetime.datetime(2001, 1, 1, 12, 34, 56), - } - - for date_str, expected in compat.iteritems(cases): - result1, _, _ = tools.parse_time_string(date_str, - yearfirst=yearfirst) - result2 = to_datetime(date_str, yearfirst=yearfirst) - result3 = to_datetime([date_str], yearfirst=yearfirst) - # result5 is used below - result4 = to_datetime(np.array([date_str], dtype=object), - yearfirst=yearfirst) - result6 = DatetimeIndex([date_str], yearfirst=yearfirst) - # result7 is used below - result8 = DatetimeIndex(Index([date_str]), yearfirst=yearfirst) - result9 = DatetimeIndex(Series([date_str]), yearfirst=yearfirst) - - for res in [result1, result2]: - self.assertEqual(res, expected) - for res in [result3, result4, result6, result8, result9]: - exp = DatetimeIndex([pd.Timestamp(expected)]) - tm.assert_index_equal(res, exp) - - # these really need to have yearfist, but we don't support - if not yearfirst: - result5 = Timestamp(date_str) - self.assertEqual(result5, expected) - result7 = date_range(date_str, freq='S', periods=1, - yearfirst=yearfirst) - self.assertEqual(result7, expected) - - # NaT - result1, _, _ = tools.parse_time_string('NaT') - result2 = to_datetime('NaT') - result3 = Timestamp('NaT') - result4 = DatetimeIndex(['NaT'])[0] - self.assertTrue(result1 is tslib.NaT) - self.assertTrue(result1 is tslib.NaT) - self.assertTrue(result1 is tslib.NaT) - self.assertTrue(result1 is tslib.NaT) - - def test_parsers_quarter_invalid(self): - - cases = ['2Q 2005', '2Q-200A', '2Q-200', '22Q2005', '6Q-20', '2Q200.'] - for case in cases: - self.assertRaises(ValueError, tools.parse_time_string, case) - - def test_parsers_dayfirst_yearfirst(self): - tm._skip_if_no_dateutil() - - # OK - # 2.5.1 10-11-12 [dayfirst=0, yearfirst=0] -> 2012-10-11 00:00:00 - # 2.5.2 10-11-12 [dayfirst=0, yearfirst=1] -> 2012-10-11 00:00:00 - # 2.5.3 10-11-12 [dayfirst=0, yearfirst=0] -> 2012-10-11 00:00:00 - - # OK - # 2.5.1 10-11-12 [dayfirst=0, yearfirst=1] -> 2010-11-12 00:00:00 - # 2.5.2 10-11-12 [dayfirst=0, yearfirst=1] -> 2010-11-12 00:00:00 - # 2.5.3 10-11-12 [dayfirst=0, yearfirst=1] -> 2010-11-12 00:00:00 - - # bug fix in 2.5.2 - # 2.5.1 10-11-12 [dayfirst=1, yearfirst=1] -> 2010-11-12 00:00:00 - # 2.5.2 10-11-12 [dayfirst=1, yearfirst=1] -> 2010-12-11 00:00:00 - # 2.5.3 10-11-12 [dayfirst=1, yearfirst=1] -> 2010-12-11 00:00:00 - - # OK - # 2.5.1 10-11-12 [dayfirst=1, yearfirst=0] -> 2012-11-10 00:00:00 - # 2.5.2 10-11-12 [dayfirst=1, yearfirst=0] -> 2012-11-10 00:00:00 - # 2.5.3 10-11-12 [dayfirst=1, yearfirst=0] -> 2012-11-10 00:00:00 - - # OK - # 2.5.1 20/12/21 [dayfirst=0, yearfirst=0] -> 2021-12-20 00:00:00 - # 2.5.2 20/12/21 [dayfirst=0, yearfirst=0] -> 2021-12-20 00:00:00 - # 2.5.3 20/12/21 [dayfirst=0, yearfirst=0] -> 2021-12-20 00:00:00 - - # OK - # 2.5.1 20/12/21 [dayfirst=0, yearfirst=1] -> 2020-12-21 00:00:00 - # 2.5.2 20/12/21 [dayfirst=0, yearfirst=1] -> 2020-12-21 00:00:00 - # 2.5.3 20/12/21 [dayfirst=0, yearfirst=1] -> 2020-12-21 00:00:00 - - # revert of bug in 2.5.2 - # 2.5.1 20/12/21 [dayfirst=1, yearfirst=1] -> 2020-12-21 00:00:00 - # 2.5.2 20/12/21 [dayfirst=1, yearfirst=1] -> month must be in 1..12 - # 2.5.3 20/12/21 [dayfirst=1, yearfirst=1] -> 2020-12-21 00:00:00 - - # OK - # 2.5.1 20/12/21 [dayfirst=1, yearfirst=0] -> 2021-12-20 00:00:00 - # 2.5.2 20/12/21 [dayfirst=1, yearfirst=0] -> 2021-12-20 00:00:00 - # 2.5.3 20/12/21 [dayfirst=1, yearfirst=0] -> 2021-12-20 00:00:00 - - import dateutil - is_lt_253 = dateutil.__version__ < LooseVersion('2.5.3') - - # str : dayfirst, yearfirst, expected - cases = {'10-11-12': [(False, False, - datetime.datetime(2012, 10, 11)), - (True, False, - datetime.datetime(2012, 11, 10)), - (False, True, - datetime.datetime(2010, 11, 12)), - (True, True, - datetime.datetime(2010, 12, 11))], - '20/12/21': [(False, False, - datetime.datetime(2021, 12, 20)), - (True, False, - datetime.datetime(2021, 12, 20)), - (False, True, - datetime.datetime(2020, 12, 21)), - (True, True, - datetime.datetime(2020, 12, 21))]} - - from dateutil.parser import parse - for date_str, values in compat.iteritems(cases): - for dayfirst, yearfirst, expected in values: - - # odd comparisons across version - # let's just skip - if dayfirst and yearfirst and is_lt_253: - continue - - # compare with dateutil result - dateutil_result = parse(date_str, dayfirst=dayfirst, - yearfirst=yearfirst) - self.assertEqual(dateutil_result, expected) - - result1, _, _ = tools.parse_time_string(date_str, - dayfirst=dayfirst, - yearfirst=yearfirst) - - # we don't support dayfirst/yearfirst here: - if not dayfirst and not yearfirst: - result2 = Timestamp(date_str) - self.assertEqual(result2, expected) - - result3 = to_datetime(date_str, dayfirst=dayfirst, - yearfirst=yearfirst) - - result4 = DatetimeIndex([date_str], dayfirst=dayfirst, - yearfirst=yearfirst)[0] - - self.assertEqual(result1, expected) - self.assertEqual(result3, expected) - self.assertEqual(result4, expected) - - def test_parsers_timestring(self): - tm._skip_if_no_dateutil() - from dateutil.parser import parse - - # must be the same as dateutil result - cases = {'10:15': (parse('10:15'), datetime.datetime(1, 1, 1, 10, 15)), - '9:05': (parse('9:05'), datetime.datetime(1, 1, 1, 9, 5))} - - for date_str, (exp_now, exp_def) in compat.iteritems(cases): - result1, _, _ = tools.parse_time_string(date_str) - result2 = to_datetime(date_str) - result3 = to_datetime([date_str]) - result4 = Timestamp(date_str) - result5 = DatetimeIndex([date_str])[0] - # parse time string return time string based on default date - # others are not, and can't be changed because it is used in - # time series plot - self.assertEqual(result1, exp_def) - self.assertEqual(result2, exp_now) - self.assertEqual(result3, exp_now) - self.assertEqual(result4, exp_now) - self.assertEqual(result5, exp_now) - - def test_parsers_time(self): - # GH11818 - _skip_if_has_locale() - strings = ["14:15", "1415", "2:15pm", "0215pm", "14:15:00", "141500", - "2:15:00pm", "021500pm", datetime.time(14, 15)] - expected = datetime.time(14, 15) - - for time_string in strings: - self.assertEqual(tools.to_time(time_string), expected) - - new_string = "14.15" - self.assertRaises(ValueError, tools.to_time, new_string) - self.assertEqual(tools.to_time(new_string, format="%H.%M"), expected) - - arg = ["14:15", "20:20"] - expected_arr = [datetime.time(14, 15), datetime.time(20, 20)] - self.assertEqual(tools.to_time(arg), expected_arr) - self.assertEqual(tools.to_time(arg, format="%H:%M"), expected_arr) - self.assertEqual(tools.to_time(arg, infer_time_format=True), - expected_arr) - self.assertEqual(tools.to_time(arg, format="%I:%M%p", errors="coerce"), - [None, None]) - - res = tools.to_time(arg, format="%I:%M%p", errors="ignore") - self.assert_numpy_array_equal(res, np.array(arg, dtype=np.object_)) - - with tm.assertRaises(ValueError): - tools.to_time(arg, format="%I:%M%p", errors="raise") - - self.assert_series_equal(tools.to_time(Series(arg, name="test")), - Series(expected_arr, name="test")) - - res = tools.to_time(np.array(arg)) - self.assertIsInstance(res, list) - self.assert_equal(res, expected_arr) - - def test_parsers_monthfreq(self): - cases = {'201101': datetime.datetime(2011, 1, 1, 0, 0), - '200005': datetime.datetime(2000, 5, 1, 0, 0)} - - for date_str, expected in compat.iteritems(cases): - result1, _, _ = tools.parse_time_string(date_str, freq='M') - self.assertEqual(result1, expected) - - def test_parsers_quarterly_with_freq(self): - msg = ('Incorrect quarterly string is given, quarter ' - 'must be between 1 and 4: 2013Q5') - with tm.assertRaisesRegexp(tslib.DateParseError, msg): - tools.parse_time_string('2013Q5') - - # GH 5418 - msg = ('Unable to retrieve month information from given freq: ' - 'INVLD-L-DEC-SAT') - with tm.assertRaisesRegexp(tslib.DateParseError, msg): - tools.parse_time_string('2013Q1', freq='INVLD-L-DEC-SAT') - - cases = {('2013Q2', None): datetime.datetime(2013, 4, 1), - ('2013Q2', 'A-APR'): datetime.datetime(2012, 8, 1), - ('2013-Q2', 'A-DEC'): datetime.datetime(2013, 4, 1)} - - for (date_str, freq), exp in compat.iteritems(cases): - result, _, _ = tools.parse_time_string(date_str, freq=freq) - self.assertEqual(result, exp) - - def test_parsers_timezone_minute_offsets_roundtrip(self): - # GH11708 - base = to_datetime("2013-01-01 00:00:00") - dt_strings = [ - ('2013-01-01 05:45+0545', - "Asia/Katmandu", - "Timestamp('2013-01-01 05:45:00+0545', tz='Asia/Katmandu')"), - ('2013-01-01 05:30+0530', - "Asia/Kolkata", - "Timestamp('2013-01-01 05:30:00+0530', tz='Asia/Kolkata')") - ] - - for dt_string, tz, dt_string_repr in dt_strings: - dt_time = to_datetime(dt_string) - self.assertEqual(base, dt_time) - converted_time = dt_time.tz_localize('UTC').tz_convert(tz) - self.assertEqual(dt_string_repr, repr(converted_time)) - - def test_parsers_iso8601(self): - # GH 12060 - # test only the iso parser - flexibility to different - # separators and leadings 0s - # Timestamp construction falls back to dateutil - cases = {'2011-01-02': datetime.datetime(2011, 1, 2), - '2011-1-2': datetime.datetime(2011, 1, 2), - '2011-01': datetime.datetime(2011, 1, 1), - '2011-1': datetime.datetime(2011, 1, 1), - '2011 01 02': datetime.datetime(2011, 1, 2), - '2011.01.02': datetime.datetime(2011, 1, 2), - '2011/01/02': datetime.datetime(2011, 1, 2), - '2011\\01\\02': datetime.datetime(2011, 1, 2), - '2013-01-01 05:30:00': datetime.datetime(2013, 1, 1, 5, 30), - '2013-1-1 5:30:00': datetime.datetime(2013, 1, 1, 5, 30)} - for date_str, exp in compat.iteritems(cases): - actual = tslib._test_parse_iso8601(date_str) - self.assertEqual(actual, exp) - - # seperators must all match - YYYYMM not valid - invalid_cases = ['2011-01/02', '2011^11^11', - '201401', '201111', '200101', - # mixed separated and unseparated - '2005-0101', '200501-01', - '20010101 12:3456', '20010101 1234:56', - # HHMMSS must have two digits in each component - # if unseparated - '20010101 1', '20010101 123', '20010101 12345', - '20010101 12345Z', - # wrong separator for HHMMSS - '2001-01-01 12-34-56'] - for date_str in invalid_cases: - with tm.assertRaises(ValueError): - tslib._test_parse_iso8601(date_str) - # If no ValueError raised, let me know which case failed. - raise Exception(date_str) - - -class TestArrayToDatetime(tm.TestCase): - - def test_parsing_valid_dates(self): - arr = np.array(['01-01-2013', '01-02-2013'], dtype=object) - self.assert_numpy_array_equal( - tslib.array_to_datetime(arr), - np_array_datetime64_compat( - [ - '2013-01-01T00:00:00.000000000-0000', - '2013-01-02T00:00:00.000000000-0000' - ], - dtype='M8[ns]' - ) - ) - - arr = np.array(['Mon Sep 16 2013', 'Tue Sep 17 2013'], dtype=object) - self.assert_numpy_array_equal( - tslib.array_to_datetime(arr), - np_array_datetime64_compat( - [ - '2013-09-16T00:00:00.000000000-0000', - '2013-09-17T00:00:00.000000000-0000' - ], - dtype='M8[ns]' - ) - ) - - def test_number_looking_strings_not_into_datetime(self): - # #4601 - # These strings don't look like datetimes so they shouldn't be - # attempted to be converted - arr = np.array(['-352.737091', '183.575577'], dtype=object) - self.assert_numpy_array_equal( - tslib.array_to_datetime(arr, errors='ignore'), arr) - - arr = np.array(['1', '2', '3', '4', '5'], dtype=object) - self.assert_numpy_array_equal( - tslib.array_to_datetime(arr, errors='ignore'), arr) - - def test_coercing_dates_outside_of_datetime64_ns_bounds(self): - invalid_dates = [ - datetime.date(1000, 1, 1), - datetime.datetime(1000, 1, 1), - '1000-01-01', - 'Jan 1, 1000', - np.datetime64('1000-01-01'), - ] - - for invalid_date in invalid_dates: - self.assertRaises(ValueError, - tslib.array_to_datetime, - np.array( - [invalid_date], dtype='object'), - errors='raise', ) - self.assert_numpy_array_equal( - tslib.array_to_datetime( - np.array([invalid_date], dtype='object'), - errors='coerce'), - np.array([tslib.iNaT], dtype='M8[ns]') - ) - - arr = np.array(['1/1/1000', '1/1/2000'], dtype=object) - self.assert_numpy_array_equal( - tslib.array_to_datetime(arr, errors='coerce'), - np_array_datetime64_compat( - [ - tslib.iNaT, - '2000-01-01T00:00:00.000000000-0000' - ], - dtype='M8[ns]' - ) - ) - - def test_coerce_of_invalid_datetimes(self): - arr = np.array(['01-01-2013', 'not_a_date', '1'], dtype=object) - - # Without coercing, the presence of any invalid dates prevents - # any values from being converted - self.assert_numpy_array_equal( - tslib.array_to_datetime(arr, errors='ignore'), arr) - - # With coercing, the invalid dates becomes iNaT - self.assert_numpy_array_equal( - tslib.array_to_datetime(arr, errors='coerce'), - np_array_datetime64_compat( - [ - '2013-01-01T00:00:00.000000000-0000', - tslib.iNaT, - tslib.iNaT - ], - dtype='M8[ns]' - ) - ) - - def test_parsing_timezone_offsets(self): - # All of these datetime strings with offsets are equivalent - # to the same datetime after the timezone offset is added - dt_strings = [ - '01-01-2013 08:00:00+08:00', - '2013-01-01T08:00:00.000000000+0800', - '2012-12-31T16:00:00.000000000-0800', - '12-31-2012 23:00:00-01:00' - ] - - expected_output = tslib.array_to_datetime(np.array( - ['01-01-2013 00:00:00'], dtype=object)) - - for dt_string in dt_strings: - self.assert_numpy_array_equal( - tslib.array_to_datetime( - np.array([dt_string], dtype=object) - ), - expected_output - ) - - -class TestTslib(tm.TestCase): - - def test_intraday_conversion_factors(self): - self.assertEqual(period_asfreq( - 1, get_freq('D'), get_freq('H'), False), 24) - self.assertEqual(period_asfreq( - 1, get_freq('D'), get_freq('T'), False), 1440) - self.assertEqual(period_asfreq( - 1, get_freq('D'), get_freq('S'), False), 86400) - self.assertEqual(period_asfreq(1, get_freq( - 'D'), get_freq('L'), False), 86400000) - self.assertEqual(period_asfreq(1, get_freq( - 'D'), get_freq('U'), False), 86400000000) - self.assertEqual(period_asfreq(1, get_freq( - 'D'), get_freq('N'), False), 86400000000000) - - self.assertEqual(period_asfreq( - 1, get_freq('H'), get_freq('T'), False), 60) - self.assertEqual(period_asfreq( - 1, get_freq('H'), get_freq('S'), False), 3600) - self.assertEqual(period_asfreq(1, get_freq('H'), - get_freq('L'), False), 3600000) - self.assertEqual(period_asfreq(1, get_freq( - 'H'), get_freq('U'), False), 3600000000) - self.assertEqual(period_asfreq(1, get_freq( - 'H'), get_freq('N'), False), 3600000000000) - - self.assertEqual(period_asfreq( - 1, get_freq('T'), get_freq('S'), False), 60) - self.assertEqual(period_asfreq( - 1, get_freq('T'), get_freq('L'), False), 60000) - self.assertEqual(period_asfreq(1, get_freq( - 'T'), get_freq('U'), False), 60000000) - self.assertEqual(period_asfreq(1, get_freq( - 'T'), get_freq('N'), False), 60000000000) - - self.assertEqual(period_asfreq( - 1, get_freq('S'), get_freq('L'), False), 1000) - self.assertEqual(period_asfreq(1, get_freq('S'), - get_freq('U'), False), 1000000) - self.assertEqual(period_asfreq(1, get_freq( - 'S'), get_freq('N'), False), 1000000000) - - self.assertEqual(period_asfreq( - 1, get_freq('L'), get_freq('U'), False), 1000) - self.assertEqual(period_asfreq(1, get_freq('L'), - get_freq('N'), False), 1000000) - - self.assertEqual(period_asfreq( - 1, get_freq('U'), get_freq('N'), False), 1000) - - def test_period_ordinal_start_values(self): - # information for 1.1.1970 - self.assertEqual(0, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, - get_freq('A'))) - self.assertEqual(0, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, - get_freq('M'))) - self.assertEqual(1, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, - get_freq('W'))) - self.assertEqual(0, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, - get_freq('D'))) - self.assertEqual(0, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, - get_freq('B'))) - - def test_period_ordinal_week(self): - self.assertEqual(1, period_ordinal(1970, 1, 4, 0, 0, 0, 0, 0, - get_freq('W'))) - self.assertEqual(2, period_ordinal(1970, 1, 5, 0, 0, 0, 0, 0, - get_freq('W'))) - - self.assertEqual(2284, period_ordinal(2013, 10, 6, 0, 0, 0, 0, 0, - get_freq('W'))) - self.assertEqual(2285, period_ordinal(2013, 10, 7, 0, 0, 0, 0, 0, - get_freq('W'))) - - def test_period_ordinal_business_day(self): - # Thursday - self.assertEqual(11415, period_ordinal(2013, 10, 3, 0, 0, 0, 0, 0, - get_freq('B'))) - # Friday - self.assertEqual(11416, period_ordinal(2013, 10, 4, 0, 0, 0, 0, 0, - get_freq('B'))) - # Saturday - self.assertEqual(11417, period_ordinal(2013, 10, 5, 0, 0, 0, 0, 0, - get_freq('B'))) - # Sunday - self.assertEqual(11417, period_ordinal(2013, 10, 6, 0, 0, 0, 0, 0, - get_freq('B'))) - # Monday - self.assertEqual(11417, period_ordinal(2013, 10, 7, 0, 0, 0, 0, 0, - get_freq('B'))) - # Tuesday - self.assertEqual(11418, period_ordinal(2013, 10, 8, 0, 0, 0, 0, 0, - get_freq('B'))) - - def test_tslib_tz_convert(self): - def compare_utc_to_local(tz_didx, utc_didx): - f = lambda x: tslib.tz_convert_single(x, 'UTC', tz_didx.tz) - result = tslib.tz_convert(tz_didx.asi8, 'UTC', tz_didx.tz) - result_single = np.vectorize(f)(tz_didx.asi8) - self.assert_numpy_array_equal(result, result_single) - - def compare_local_to_utc(tz_didx, utc_didx): - f = lambda x: tslib.tz_convert_single(x, tz_didx.tz, 'UTC') - result = tslib.tz_convert(utc_didx.asi8, tz_didx.tz, 'UTC') - result_single = np.vectorize(f)(utc_didx.asi8) - self.assert_numpy_array_equal(result, result_single) - - for tz in ['UTC', 'Asia/Tokyo', 'US/Eastern', 'Europe/Moscow']: - # US: 2014-03-09 - 2014-11-11 - # MOSCOW: 2014-10-26 / 2014-12-31 - tz_didx = date_range('2014-03-01', '2015-01-10', freq='H', tz=tz) - utc_didx = date_range('2014-03-01', '2015-01-10', freq='H') - compare_utc_to_local(tz_didx, utc_didx) - # local tz to UTC can be differ in hourly (or higher) freqs because - # of DST - compare_local_to_utc(tz_didx, utc_didx) - - tz_didx = date_range('2000-01-01', '2020-01-01', freq='D', tz=tz) - utc_didx = date_range('2000-01-01', '2020-01-01', freq='D') - compare_utc_to_local(tz_didx, utc_didx) - compare_local_to_utc(tz_didx, utc_didx) - - tz_didx = date_range('2000-01-01', '2100-01-01', freq='A', tz=tz) - utc_didx = date_range('2000-01-01', '2100-01-01', freq='A') - compare_utc_to_local(tz_didx, utc_didx) - compare_local_to_utc(tz_didx, utc_didx) - - # Check empty array - result = tslib.tz_convert(np.array([], dtype=np.int64), - tslib.maybe_get_tz('US/Eastern'), - tslib.maybe_get_tz('Asia/Tokyo')) - self.assert_numpy_array_equal(result, np.array([], dtype=np.int64)) - - # Check all-NaT array - result = tslib.tz_convert(np.array([tslib.iNaT], dtype=np.int64), - tslib.maybe_get_tz('US/Eastern'), - tslib.maybe_get_tz('Asia/Tokyo')) - self.assert_numpy_array_equal(result, np.array( - [tslib.iNaT], dtype=np.int64)) - - def test_shift_months(self): - s = DatetimeIndex([Timestamp('2000-01-05 00:15:00'), Timestamp( - '2000-01-31 00:23:00'), Timestamp('2000-01-01'), Timestamp( - '2000-02-29'), Timestamp('2000-12-31')]) - for years in [-1, 0, 1]: - for months in [-2, 0, 2]: - actual = DatetimeIndex(tslib.shift_months(s.asi8, years * 12 + - months)) - expected = DatetimeIndex([x + offsets.DateOffset( - years=years, months=months) for x in s]) - tm.assert_index_equal(actual, expected) - - def test_round(self): - stamp = Timestamp('2000-01-05 05:09:15.13') - - def _check_round(freq, expected): - result = stamp.round(freq=freq) - self.assertEqual(result, expected) - - for freq, expected in [('D', Timestamp('2000-01-05 00:00:00')), - ('H', Timestamp('2000-01-05 05:00:00')), - ('S', Timestamp('2000-01-05 05:09:15'))]: - _check_round(freq, expected) - - msg = pd.tseries.frequencies._INVALID_FREQ_ERROR - with self.assertRaisesRegexp(ValueError, msg): - stamp.round('foo') diff --git a/pandas/tseries/tests/test_util.py b/pandas/tseries/tests/test_util.py deleted file mode 100644 index 3feffe924c291..0000000000000 --- a/pandas/tseries/tests/test_util.py +++ /dev/null @@ -1,126 +0,0 @@ -from pandas.compat import range - -import numpy as np - -from pandas import Series, date_range -import pandas.util.testing as tm - -from datetime import datetime, date - -from pandas.tseries.tools import normalize_date -from pandas.tseries.util import pivot_annual, isleapyear - - -class TestPivotAnnual(tm.TestCase): - """ - New pandas of scikits.timeseries pivot_annual - """ - - def test_daily(self): - rng = date_range('1/1/2000', '12/31/2004', freq='D') - ts = Series(np.random.randn(len(rng)), index=rng) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - annual = pivot_annual(ts, 'D') - - doy = ts.index.dayofyear - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - doy[(~isleapyear(ts.index.year)) & (doy >= 60)] += 1 - - for i in range(1, 367): - subset = ts[doy == i] - subset.index = [x.year for x in subset.index] - - result = annual[i].dropna() - tm.assert_series_equal(result, subset, check_names=False) - self.assertEqual(result.name, i) - - # check leap days - leaps = ts[(ts.index.month == 2) & (ts.index.day == 29)] - day = leaps.index.dayofyear[0] - leaps.index = leaps.index.year - leaps.name = 60 - tm.assert_series_equal(annual[day].dropna(), leaps) - - def test_hourly(self): - rng_hourly = date_range('1/1/1994', periods=(18 * 8760 + 4 * 24), - freq='H') - data_hourly = np.random.randint(100, 350, rng_hourly.size) - ts_hourly = Series(data_hourly, index=rng_hourly) - - grouped = ts_hourly.groupby(ts_hourly.index.year) - hoy = grouped.apply(lambda x: x.reset_index(drop=True)) - hoy = hoy.index.droplevel(0).values - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - hoy[~isleapyear(ts_hourly.index.year) & (hoy >= 1416)] += 24 - hoy += 1 - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - annual = pivot_annual(ts_hourly) - - ts_hourly = ts_hourly.astype(float) - for i in [1, 1416, 1417, 1418, 1439, 1440, 1441, 8784]: - subset = ts_hourly[hoy == i] - subset.index = [x.year for x in subset.index] - - result = annual[i].dropna() - tm.assert_series_equal(result, subset, check_names=False) - self.assertEqual(result.name, i) - - leaps = ts_hourly[(ts_hourly.index.month == 2) & ( - ts_hourly.index.day == 29) & (ts_hourly.index.hour == 0)] - hour = leaps.index.dayofyear[0] * 24 - 23 - leaps.index = leaps.index.year - leaps.name = 1417 - tm.assert_series_equal(annual[hour].dropna(), leaps) - - def test_weekly(self): - pass - - def test_monthly(self): - rng = date_range('1/1/2000', '12/31/2004', freq='M') - ts = Series(np.random.randn(len(rng)), index=rng) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - annual = pivot_annual(ts, 'M') - - month = ts.index.month - for i in range(1, 13): - subset = ts[month == i] - subset.index = [x.year for x in subset.index] - result = annual[i].dropna() - tm.assert_series_equal(result, subset, check_names=False) - self.assertEqual(result.name, i) - - def test_period_monthly(self): - pass - - def test_period_daily(self): - pass - - def test_period_weekly(self): - pass - - def test_isleapyear_deprecate(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.assertTrue(isleapyear(2000)) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.assertFalse(isleapyear(2001)) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.assertTrue(isleapyear(2004)) - - -def test_normalize_date(): - value = date(2012, 9, 7) - - result = normalize_date(value) - assert (result == datetime(2012, 9, 7)) - - value = datetime(2012, 9, 7, 12) - - result = normalize_date(value) - assert (result == datetime(2012, 9, 7)) From 6552a237837894f6f244b5fa022cae90343508cd Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 7 Feb 2017 17:58:45 -0500 Subject: [PATCH 022/353] TST: more tseries/tests reorg --- .../indexes/datetimes/test_date_range.py | 444 ++---------------- pandas/tests/indexes/datetimes/test_ops.py | 195 +++++++- pandas/tests/indexes/datetimes/test_setops.py | 224 +++++++++ pandas/tests/indexes/datetimes/test_tools.py | 108 +---- pandas/tools/tests/test_pivot.py | 106 ++++- setup.py | 2 + 6 files changed, 557 insertions(+), 522 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index 8dab10269f76d..80664ce246bf8 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -1,22 +1,30 @@ +""" +test date_range, bdate_range, cdate_range +construction from the convenience range functions +""" + import numpy as np from datetime import datetime, timedelta, time import pandas as pd import pandas.util.testing as tm from pandas import compat -from pandas.core import common as com -from pandas.util.testing import assertRaisesRegexp from pandas.tseries.index import bdate_range, cdate_range -from pandas import date_range, offsets, DatetimeIndex, Timestamp, Index -from pandas.tseries.offsets import (generate_range, CDay, BDay, Minute, - BMonthEnd, DateOffset, MonthEnd) +from pandas import date_range, offsets, DatetimeIndex, Timestamp +from pandas.tseries.offsets import (generate_range, CDay, BDay, + DateOffset, MonthEnd) from pandas.tests.series.common import TestData START, END = datetime(2009, 1, 1), datetime(2010, 1, 1) -class TestTimeSeries(TestData, tm.TestCase): +def eq_gen_range(kwargs, expected): + rng = generate_range(**kwargs) + assert (np.array_equal(list(rng), expected)) + + +class TestDateRanges(TestData, tm.TestCase): def test_date_range_gen_error(self): rng = date_range('1/1/2000 00:00', '1/1/2000 00:18', freq='5min') @@ -137,11 +145,6 @@ def test_catch_infinite_loop(self): datetime(2011, 11, 12), freq=offset) -def eq_gen_range(kwargs, expected): - rng = generate_range(**kwargs) - assert (np.array_equal(list(rng), expected)) - - class TestGenRangeGeneration(tm.TestCase): def test_generate(self): @@ -191,7 +194,8 @@ def test_precision_finer_than_offset(self): self.assert_index_equal(result2, expected2) -class TestDateRange(tm.TestCase): +class TestBusinessDateRange(tm.TestCase): + def setUp(self): self.rng = bdate_range(START, END) @@ -206,28 +210,31 @@ def test_naive_aware_conflicts(self): naive = bdate_range(START, END, freq=BDay(), tz=None) aware = bdate_range(START, END, freq=BDay(), tz="Asia/Hong_Kong") - assertRaisesRegexp(TypeError, "tz-naive.*tz-aware", naive.join, aware) - assertRaisesRegexp(TypeError, "tz-naive.*tz-aware", aware.join, naive) + self.assertRaisesRegexp(TypeError, "tz-naive.*tz-aware", + naive.join, aware) + self.assertRaisesRegexp(TypeError, "tz-naive.*tz-aware", + aware.join, naive) def test_cached_range(self): DatetimeIndex._cached_range(START, END, offset=BDay()) DatetimeIndex._cached_range(START, periods=20, offset=BDay()) DatetimeIndex._cached_range(end=START, periods=20, offset=BDay()) - assertRaisesRegexp(TypeError, "offset", DatetimeIndex._cached_range, - START, END) + self.assertRaisesRegexp(TypeError, "offset", + DatetimeIndex._cached_range, + START, END) - assertRaisesRegexp(TypeError, "specify period", - DatetimeIndex._cached_range, START, - offset=BDay()) + self.assertRaisesRegexp(TypeError, "specify period", + DatetimeIndex._cached_range, START, + offset=BDay()) - assertRaisesRegexp(TypeError, "specify period", - DatetimeIndex._cached_range, end=END, - offset=BDay()) + self.assertRaisesRegexp(TypeError, "specify period", + DatetimeIndex._cached_range, end=END, + offset=BDay()) - assertRaisesRegexp(TypeError, "start or end", - DatetimeIndex._cached_range, periods=20, - offset=BDay()) + self.assertRaisesRegexp(TypeError, "start or end", + DatetimeIndex._cached_range, periods=20, + offset=BDay()) def test_cached_range_bug(self): rng = date_range('2010-09-01 05:00:00', periods=50, @@ -236,192 +243,16 @@ def test_cached_range_bug(self): self.assertEqual(rng[0], datetime(2010, 9, 1, 5)) def test_timezone_comparaison_bug(self): + # smoke test start = Timestamp('20130220 10:00', tz='US/Eastern') - try: - date_range(start, periods=2, tz='US/Eastern') - except AssertionError: - self.fail() + result = date_range(start, periods=2, tz='US/Eastern') + self.assertEqual(len(result), 2) def test_timezone_comparaison_assert(self): start = Timestamp('20130220 10:00', tz='US/Eastern') self.assertRaises(AssertionError, date_range, start, periods=2, tz='Europe/Berlin') - def test_comparison(self): - d = self.rng[10] - - comp = self.rng > d - self.assertTrue(comp[11]) - self.assertFalse(comp[9]) - - def test_copy(self): - cp = self.rng.copy() - repr(cp) - self.assert_index_equal(cp, self.rng) - - def test_repr(self): - # only really care that it works - repr(self.rng) - - def test_getitem(self): - smaller = self.rng[:5] - exp = DatetimeIndex(self.rng.view(np.ndarray)[:5]) - self.assert_index_equal(smaller, exp) - - self.assertEqual(smaller.offset, self.rng.offset) - - sliced = self.rng[::5] - self.assertEqual(sliced.offset, BDay() * 5) - - fancy_indexed = self.rng[[4, 3, 2, 1, 0]] - self.assertEqual(len(fancy_indexed), 5) - tm.assertIsInstance(fancy_indexed, DatetimeIndex) - self.assertIsNone(fancy_indexed.freq) - - # 32-bit vs. 64-bit platforms - self.assertEqual(self.rng[4], self.rng[np.int_(4)]) - - def test_getitem_matplotlib_hackaround(self): - values = self.rng[:, None] - expected = self.rng.values[:, None] - self.assert_numpy_array_equal(values, expected) - - def test_shift(self): - shifted = self.rng.shift(5) - self.assertEqual(shifted[0], self.rng[5]) - self.assertEqual(shifted.offset, self.rng.offset) - - shifted = self.rng.shift(-5) - self.assertEqual(shifted[5], self.rng[0]) - self.assertEqual(shifted.offset, self.rng.offset) - - shifted = self.rng.shift(0) - self.assertEqual(shifted[0], self.rng[0]) - self.assertEqual(shifted.offset, self.rng.offset) - - rng = date_range(START, END, freq=BMonthEnd()) - shifted = rng.shift(1, freq=BDay()) - self.assertEqual(shifted[0], rng[0] + BDay()) - - def test_pickle_unpickle(self): - unpickled = self.round_trip_pickle(self.rng) - self.assertIsNotNone(unpickled.offset) - - def test_union(self): - # overlapping - left = self.rng[:10] - right = self.rng[5:10] - - the_union = left.union(right) - tm.assertIsInstance(the_union, DatetimeIndex) - - # non-overlapping, gap in middle - left = self.rng[:5] - right = self.rng[10:] - - the_union = left.union(right) - tm.assertIsInstance(the_union, Index) - - # non-overlapping, no gap - left = self.rng[:5] - right = self.rng[5:10] - - the_union = left.union(right) - tm.assertIsInstance(the_union, DatetimeIndex) - - # order does not matter - tm.assert_index_equal(right.union(left), the_union) - - # overlapping, but different offset - rng = date_range(START, END, freq=BMonthEnd()) - - the_union = self.rng.union(rng) - tm.assertIsInstance(the_union, DatetimeIndex) - - def test_outer_join(self): - # should just behave as union - - # overlapping - left = self.rng[:10] - right = self.rng[5:10] - - the_join = left.join(right, how='outer') - tm.assertIsInstance(the_join, DatetimeIndex) - - # non-overlapping, gap in middle - left = self.rng[:5] - right = self.rng[10:] - - the_join = left.join(right, how='outer') - tm.assertIsInstance(the_join, DatetimeIndex) - self.assertIsNone(the_join.freq) - - # non-overlapping, no gap - left = self.rng[:5] - right = self.rng[5:10] - - the_join = left.join(right, how='outer') - tm.assertIsInstance(the_join, DatetimeIndex) - - # overlapping, but different offset - rng = date_range(START, END, freq=BMonthEnd()) - - the_join = self.rng.join(rng, how='outer') - tm.assertIsInstance(the_join, DatetimeIndex) - self.assertIsNone(the_join.freq) - - def test_union_not_cacheable(self): - rng = date_range('1/1/2000', periods=50, freq=Minute()) - rng1 = rng[10:] - rng2 = rng[:25] - the_union = rng1.union(rng2) - self.assert_index_equal(the_union, rng) - - rng1 = rng[10:] - rng2 = rng[15:35] - the_union = rng1.union(rng2) - expected = rng[10:] - self.assert_index_equal(the_union, expected) - - def test_intersection(self): - rng = date_range('1/1/2000', periods=50, freq=Minute()) - rng1 = rng[10:] - rng2 = rng[:25] - the_int = rng1.intersection(rng2) - expected = rng[10:25] - self.assert_index_equal(the_int, expected) - tm.assertIsInstance(the_int, DatetimeIndex) - self.assertEqual(the_int.offset, rng.offset) - - the_int = rng1.intersection(rng2.view(DatetimeIndex)) - self.assert_index_equal(the_int, expected) - - # non-overlapping - the_int = rng[:10].intersection(rng[10:]) - expected = DatetimeIndex([]) - self.assert_index_equal(the_int, expected) - - def test_intersection_bug(self): - # GH #771 - a = bdate_range('11/30/2011', '12/31/2011') - b = bdate_range('12/10/2011', '12/20/2011') - result = a.intersection(b) - self.assert_index_equal(result, b) - - def test_summary(self): - self.rng.summary() - self.rng[2:2].summary() - - def test_summary_pytz(self): - tm._skip_if_no_pytz() - import pytz - bdate_range('1/1/2005', '1/1/2009', tz=pytz.utc).summary() - - def test_summary_dateutil(self): - tm._skip_if_no_dateutil() - import dateutil - bdate_range('1/1/2005', '1/1/2009', tz=dateutil.tz.tzutc()).summary() - def test_misc(self): end = datetime(2009, 5, 13) dr = bdate_range(end=end, periods=20) @@ -443,26 +274,6 @@ def test_date_parse_failure(self): self.assertRaises(ValueError, bdate_range, badly_formed_date, badly_formed_date) - def test_equals(self): - self.assertFalse(self.rng.equals(list(self.rng))) - - def test_identical(self): - t1 = self.rng.copy() - t2 = self.rng.copy() - self.assertTrue(t1.identical(t2)) - - # name - t1 = t1.rename('foo') - self.assertTrue(t1.equals(t2)) - self.assertFalse(t1.identical(t2)) - t2 = t2.rename('foo') - self.assertTrue(t1.identical(t2)) - - # freq - t2v = Index(t2.values) - self.assertTrue(t1.equals(t2v)) - self.assertFalse(t1.identical(t2v)) - def test_daterange_bug_456(self): # GH #456 rng1 = bdate_range('12/5/2011', '12/5/2011') @@ -560,43 +371,6 @@ def test_range_tz_dateutil(self): self.assertTrue(dr[0] == start) self.assertTrue(dr[2] == end) - def test_month_range_union_tz_pytz(self): - tm._skip_if_no_pytz() - from pytz import timezone - tz = timezone('US/Eastern') - - early_start = datetime(2011, 1, 1) - early_end = datetime(2011, 3, 1) - - late_start = datetime(2011, 3, 1) - late_end = datetime(2011, 5, 1) - - early_dr = date_range(start=early_start, end=early_end, tz=tz, - freq=MonthEnd()) - late_dr = date_range(start=late_start, end=late_end, tz=tz, - freq=MonthEnd()) - - early_dr.union(late_dr) - - def test_month_range_union_tz_dateutil(self): - tm._skip_if_windows_python_3() - tm._skip_if_no_dateutil() - from pandas.tslib import _dateutil_gettz as timezone - tz = timezone('US/Eastern') - - early_start = datetime(2011, 1, 1) - early_end = datetime(2011, 3, 1) - - late_start = datetime(2011, 3, 1) - late_end = datetime(2011, 5, 1) - - early_dr = date_range(start=early_start, end=early_end, tz=tz, - freq=MonthEnd()) - late_dr = date_range(start=late_start, end=late_end, tz=tz, - freq=MonthEnd()) - - early_dr.union(late_dr) - def test_range_closed(self): begin = datetime(2011, 1, 1) end = datetime(2014, 1, 1) @@ -735,151 +509,6 @@ def test_cached_range(self): self.assertRaises(Exception, DatetimeIndex._cached_range, periods=20, freq=CDay()) - def test_comparison(self): - d = self.rng[10] - - comp = self.rng > d - self.assertTrue(comp[11]) - self.assertFalse(comp[9]) - - def test_copy(self): - cp = self.rng.copy() - repr(cp) - self.assert_index_equal(cp, self.rng) - - def test_repr(self): - # only really care that it works - repr(self.rng) - - def test_getitem(self): - smaller = self.rng[:5] - exp = DatetimeIndex(self.rng.view(np.ndarray)[:5]) - self.assert_index_equal(smaller, exp) - self.assertEqual(smaller.offset, self.rng.offset) - - sliced = self.rng[::5] - self.assertEqual(sliced.offset, CDay() * 5) - - fancy_indexed = self.rng[[4, 3, 2, 1, 0]] - self.assertEqual(len(fancy_indexed), 5) - tm.assertIsInstance(fancy_indexed, DatetimeIndex) - self.assertIsNone(fancy_indexed.freq) - - # 32-bit vs. 64-bit platforms - self.assertEqual(self.rng[4], self.rng[np.int_(4)]) - - def test_getitem_matplotlib_hackaround(self): - values = self.rng[:, None] - expected = self.rng.values[:, None] - self.assert_numpy_array_equal(values, expected) - - def test_shift(self): - - shifted = self.rng.shift(5) - self.assertEqual(shifted[0], self.rng[5]) - self.assertEqual(shifted.offset, self.rng.offset) - - shifted = self.rng.shift(-5) - self.assertEqual(shifted[5], self.rng[0]) - self.assertEqual(shifted.offset, self.rng.offset) - - shifted = self.rng.shift(0) - self.assertEqual(shifted[0], self.rng[0]) - self.assertEqual(shifted.offset, self.rng.offset) - - with tm.assert_produces_warning(com.PerformanceWarning): - rng = date_range(START, END, freq=BMonthEnd()) - shifted = rng.shift(1, freq=CDay()) - self.assertEqual(shifted[0], rng[0] + CDay()) - - def test_pickle_unpickle(self): - unpickled = self.round_trip_pickle(self.rng) - self.assertIsNotNone(unpickled.offset) - - def test_union(self): - # overlapping - left = self.rng[:10] - right = self.rng[5:10] - - the_union = left.union(right) - tm.assertIsInstance(the_union, DatetimeIndex) - - # non-overlapping, gap in middle - left = self.rng[:5] - right = self.rng[10:] - - the_union = left.union(right) - tm.assertIsInstance(the_union, Index) - - # non-overlapping, no gap - left = self.rng[:5] - right = self.rng[5:10] - - the_union = left.union(right) - tm.assertIsInstance(the_union, DatetimeIndex) - - # order does not matter - self.assert_index_equal(right.union(left), the_union) - - # overlapping, but different offset - rng = date_range(START, END, freq=BMonthEnd()) - - the_union = self.rng.union(rng) - tm.assertIsInstance(the_union, DatetimeIndex) - - def test_outer_join(self): - # should just behave as union - - # overlapping - left = self.rng[:10] - right = self.rng[5:10] - - the_join = left.join(right, how='outer') - tm.assertIsInstance(the_join, DatetimeIndex) - - # non-overlapping, gap in middle - left = self.rng[:5] - right = self.rng[10:] - - the_join = left.join(right, how='outer') - tm.assertIsInstance(the_join, DatetimeIndex) - self.assertIsNone(the_join.freq) - - # non-overlapping, no gap - left = self.rng[:5] - right = self.rng[5:10] - - the_join = left.join(right, how='outer') - tm.assertIsInstance(the_join, DatetimeIndex) - - # overlapping, but different offset - rng = date_range(START, END, freq=BMonthEnd()) - - the_join = self.rng.join(rng, how='outer') - tm.assertIsInstance(the_join, DatetimeIndex) - self.assertIsNone(the_join.freq) - - def test_intersection_bug(self): - # GH #771 - a = cdate_range('11/30/2011', '12/31/2011') - b = cdate_range('12/10/2011', '12/20/2011') - result = a.intersection(b) - self.assert_index_equal(result, b) - - def test_summary(self): - self.rng.summary() - self.rng[2:2].summary() - - def test_summary_pytz(self): - tm._skip_if_no_pytz() - import pytz - cdate_range('1/1/2005', '1/1/2009', tz=pytz.utc).summary() - - def test_summary_dateutil(self): - tm._skip_if_no_dateutil() - import dateutil - cdate_range('1/1/2005', '1/1/2009', tz=dateutil.tz.tzutc()).summary() - def test_misc(self): end = datetime(2009, 5, 13) dr = cdate_range(end=end, periods=20) @@ -901,9 +530,6 @@ def test_date_parse_failure(self): self.assertRaises(ValueError, cdate_range, badly_formed_date, badly_formed_date) - def test_equals(self): - self.assertFalse(self.rng.equals(list(self.rng))) - def test_daterange_bug_456(self): # GH #456 rng1 = cdate_range('12/5/2011', '12/5/2011') diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index c7cdcd9318a0e..63bf07ec041d3 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -6,13 +6,17 @@ import pandas.tslib as tslib import pandas.util.testing as tm from pandas.core.common import PerformanceWarning +from pandas.tseries.index import cdate_range from pandas import (DatetimeIndex, PeriodIndex, Series, Timestamp, Timedelta, date_range, TimedeltaIndex, _np_version_under1p10, Index, - datetime, Float64Index, offsets) - + datetime, Float64Index, offsets, bdate_range) +from pandas.tseries.offsets import BMonthEnd, CDay, BDay from pandas.tests.test_base import Ops +START, END = datetime(2009, 1, 1), datetime(2010, 1, 1) + + class TestDatetimeIndexOps(Ops): tz = [None, 'UTC', 'Asia/Tokyo', 'US/Eastern', 'dateutil/Asia/Singapore', 'dateutil/US/Pacific'] @@ -1071,9 +1075,6 @@ def test_datetime64_with_DateOffset(self): assert_func(klass([x - op for x in s]), s - op) assert_func(klass([op + x for x in s]), op + s) - -class TestTslib(tm.TestCase): - def test_shift_months(self): s = DatetimeIndex([Timestamp('2000-01-05 00:15:00'), Timestamp( '2000-01-31 00:23:00'), Timestamp('2000-01-01'), Timestamp( @@ -1085,3 +1086,187 @@ def test_shift_months(self): expected = DatetimeIndex([x + offsets.DateOffset( years=years, months=months) for x in s]) tm.assert_index_equal(actual, expected) + + +class TestBusinessDatetimeIndex(tm.TestCase): + + def setUp(self): + self.rng = bdate_range(START, END) + + def test_comparison(self): + d = self.rng[10] + + comp = self.rng > d + self.assertTrue(comp[11]) + self.assertFalse(comp[9]) + + def test_pickle_unpickle(self): + unpickled = self.round_trip_pickle(self.rng) + self.assertIsNotNone(unpickled.offset) + + def test_copy(self): + cp = self.rng.copy() + repr(cp) + self.assert_index_equal(cp, self.rng) + + def test_repr(self): + # only really care that it works + repr(self.rng) + + def test_getitem(self): + smaller = self.rng[:5] + exp = DatetimeIndex(self.rng.view(np.ndarray)[:5]) + self.assert_index_equal(smaller, exp) + + self.assertEqual(smaller.offset, self.rng.offset) + + sliced = self.rng[::5] + self.assertEqual(sliced.offset, BDay() * 5) + + fancy_indexed = self.rng[[4, 3, 2, 1, 0]] + self.assertEqual(len(fancy_indexed), 5) + tm.assertIsInstance(fancy_indexed, DatetimeIndex) + self.assertIsNone(fancy_indexed.freq) + + # 32-bit vs. 64-bit platforms + self.assertEqual(self.rng[4], self.rng[np.int_(4)]) + + def test_getitem_matplotlib_hackaround(self): + values = self.rng[:, None] + expected = self.rng.values[:, None] + self.assert_numpy_array_equal(values, expected) + + def test_shift(self): + shifted = self.rng.shift(5) + self.assertEqual(shifted[0], self.rng[5]) + self.assertEqual(shifted.offset, self.rng.offset) + + shifted = self.rng.shift(-5) + self.assertEqual(shifted[5], self.rng[0]) + self.assertEqual(shifted.offset, self.rng.offset) + + shifted = self.rng.shift(0) + self.assertEqual(shifted[0], self.rng[0]) + self.assertEqual(shifted.offset, self.rng.offset) + + rng = date_range(START, END, freq=BMonthEnd()) + shifted = rng.shift(1, freq=BDay()) + self.assertEqual(shifted[0], rng[0] + BDay()) + + def test_summary(self): + self.rng.summary() + self.rng[2:2].summary() + + def test_summary_pytz(self): + tm._skip_if_no_pytz() + import pytz + bdate_range('1/1/2005', '1/1/2009', tz=pytz.utc).summary() + + def test_summary_dateutil(self): + tm._skip_if_no_dateutil() + import dateutil + bdate_range('1/1/2005', '1/1/2009', tz=dateutil.tz.tzutc()).summary() + + def test_equals(self): + self.assertFalse(self.rng.equals(list(self.rng))) + + def test_identical(self): + t1 = self.rng.copy() + t2 = self.rng.copy() + self.assertTrue(t1.identical(t2)) + + # name + t1 = t1.rename('foo') + self.assertTrue(t1.equals(t2)) + self.assertFalse(t1.identical(t2)) + t2 = t2.rename('foo') + self.assertTrue(t1.identical(t2)) + + # freq + t2v = Index(t2.values) + self.assertTrue(t1.equals(t2v)) + self.assertFalse(t1.identical(t2v)) + + +class TestCustomDatetimeIndex(tm.TestCase): + + def setUp(self): + self.rng = cdate_range(START, END) + + def test_comparison(self): + d = self.rng[10] + + comp = self.rng > d + self.assertTrue(comp[11]) + self.assertFalse(comp[9]) + + def test_copy(self): + cp = self.rng.copy() + repr(cp) + self.assert_index_equal(cp, self.rng) + + def test_repr(self): + # only really care that it works + repr(self.rng) + + def test_getitem(self): + smaller = self.rng[:5] + exp = DatetimeIndex(self.rng.view(np.ndarray)[:5]) + self.assert_index_equal(smaller, exp) + self.assertEqual(smaller.offset, self.rng.offset) + + sliced = self.rng[::5] + self.assertEqual(sliced.offset, CDay() * 5) + + fancy_indexed = self.rng[[4, 3, 2, 1, 0]] + self.assertEqual(len(fancy_indexed), 5) + tm.assertIsInstance(fancy_indexed, DatetimeIndex) + self.assertIsNone(fancy_indexed.freq) + + # 32-bit vs. 64-bit platforms + self.assertEqual(self.rng[4], self.rng[np.int_(4)]) + + def test_getitem_matplotlib_hackaround(self): + values = self.rng[:, None] + expected = self.rng.values[:, None] + self.assert_numpy_array_equal(values, expected) + + def test_shift(self): + + shifted = self.rng.shift(5) + self.assertEqual(shifted[0], self.rng[5]) + self.assertEqual(shifted.offset, self.rng.offset) + + shifted = self.rng.shift(-5) + self.assertEqual(shifted[5], self.rng[0]) + self.assertEqual(shifted.offset, self.rng.offset) + + shifted = self.rng.shift(0) + self.assertEqual(shifted[0], self.rng[0]) + self.assertEqual(shifted.offset, self.rng.offset) + + with tm.assert_produces_warning(PerformanceWarning): + rng = date_range(START, END, freq=BMonthEnd()) + shifted = rng.shift(1, freq=CDay()) + self.assertEqual(shifted[0], rng[0] + CDay()) + + def test_pickle_unpickle(self): + unpickled = self.round_trip_pickle(self.rng) + self.assertIsNotNone(unpickled.offset) + + def test_summary(self): + self.rng.summary() + self.rng[2:2].summary() + + def test_summary_pytz(self): + tm._skip_if_no_pytz() + import pytz + cdate_range('1/1/2005', '1/1/2009', tz=pytz.utc).summary() + + def test_summary_dateutil(self): + tm._skip_if_no_dateutil() + import dateutil + cdate_range('1/1/2005', '1/1/2009', tz=dateutil.tz.tzutc()).summary() + + def test_equals(self): + self.assertFalse(self.rng.equals(list(self.rng))) diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index 7777de869bb20..7da660a956e23 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -4,8 +4,12 @@ import pandas as pd import pandas.util.testing as tm +from pandas.tseries.index import cdate_range from pandas import (DatetimeIndex, date_range, Series, bdate_range, DataFrame, Int64Index, Index) +from pandas.tseries.offsets import Minute, BMonthEnd, MonthEnd + +START, END = datetime(2009, 1, 1), datetime(2010, 1, 1) class TestDatetimeIndex(tm.TestCase): @@ -185,3 +189,223 @@ def test_datetimeindex_union_join_empty(self): result = dti.join(empty) tm.assertIsInstance(result, DatetimeIndex) + + +class TestBusinessDatetimeIndex(tm.TestCase): + + def setUp(self): + self.rng = bdate_range(START, END) + + def test_union(self): + # overlapping + left = self.rng[:10] + right = self.rng[5:10] + + the_union = left.union(right) + tm.assertIsInstance(the_union, DatetimeIndex) + + # non-overlapping, gap in middle + left = self.rng[:5] + right = self.rng[10:] + + the_union = left.union(right) + tm.assertIsInstance(the_union, Index) + + # non-overlapping, no gap + left = self.rng[:5] + right = self.rng[5:10] + + the_union = left.union(right) + tm.assertIsInstance(the_union, DatetimeIndex) + + # order does not matter + tm.assert_index_equal(right.union(left), the_union) + + # overlapping, but different offset + rng = date_range(START, END, freq=BMonthEnd()) + + the_union = self.rng.union(rng) + tm.assertIsInstance(the_union, DatetimeIndex) + + def test_outer_join(self): + # should just behave as union + + # overlapping + left = self.rng[:10] + right = self.rng[5:10] + + the_join = left.join(right, how='outer') + tm.assertIsInstance(the_join, DatetimeIndex) + + # non-overlapping, gap in middle + left = self.rng[:5] + right = self.rng[10:] + + the_join = left.join(right, how='outer') + tm.assertIsInstance(the_join, DatetimeIndex) + self.assertIsNone(the_join.freq) + + # non-overlapping, no gap + left = self.rng[:5] + right = self.rng[5:10] + + the_join = left.join(right, how='outer') + tm.assertIsInstance(the_join, DatetimeIndex) + + # overlapping, but different offset + rng = date_range(START, END, freq=BMonthEnd()) + + the_join = self.rng.join(rng, how='outer') + tm.assertIsInstance(the_join, DatetimeIndex) + self.assertIsNone(the_join.freq) + + def test_union_not_cacheable(self): + rng = date_range('1/1/2000', periods=50, freq=Minute()) + rng1 = rng[10:] + rng2 = rng[:25] + the_union = rng1.union(rng2) + self.assert_index_equal(the_union, rng) + + rng1 = rng[10:] + rng2 = rng[15:35] + the_union = rng1.union(rng2) + expected = rng[10:] + self.assert_index_equal(the_union, expected) + + def test_intersection(self): + rng = date_range('1/1/2000', periods=50, freq=Minute()) + rng1 = rng[10:] + rng2 = rng[:25] + the_int = rng1.intersection(rng2) + expected = rng[10:25] + self.assert_index_equal(the_int, expected) + tm.assertIsInstance(the_int, DatetimeIndex) + self.assertEqual(the_int.offset, rng.offset) + + the_int = rng1.intersection(rng2.view(DatetimeIndex)) + self.assert_index_equal(the_int, expected) + + # non-overlapping + the_int = rng[:10].intersection(rng[10:]) + expected = DatetimeIndex([]) + self.assert_index_equal(the_int, expected) + + def test_intersection_bug(self): + # GH #771 + a = bdate_range('11/30/2011', '12/31/2011') + b = bdate_range('12/10/2011', '12/20/2011') + result = a.intersection(b) + self.assert_index_equal(result, b) + + def test_month_range_union_tz_pytz(self): + tm._skip_if_no_pytz() + from pytz import timezone + tz = timezone('US/Eastern') + + early_start = datetime(2011, 1, 1) + early_end = datetime(2011, 3, 1) + + late_start = datetime(2011, 3, 1) + late_end = datetime(2011, 5, 1) + + early_dr = date_range(start=early_start, end=early_end, tz=tz, + freq=MonthEnd()) + late_dr = date_range(start=late_start, end=late_end, tz=tz, + freq=MonthEnd()) + + early_dr.union(late_dr) + + def test_month_range_union_tz_dateutil(self): + tm._skip_if_windows_python_3() + tm._skip_if_no_dateutil() + from pandas.tslib import _dateutil_gettz as timezone + tz = timezone('US/Eastern') + + early_start = datetime(2011, 1, 1) + early_end = datetime(2011, 3, 1) + + late_start = datetime(2011, 3, 1) + late_end = datetime(2011, 5, 1) + + early_dr = date_range(start=early_start, end=early_end, tz=tz, + freq=MonthEnd()) + late_dr = date_range(start=late_start, end=late_end, tz=tz, + freq=MonthEnd()) + + early_dr.union(late_dr) + + +class TestCustomDatetimeIndex(tm.TestCase): + + def setUp(self): + self.rng = cdate_range(START, END) + + def test_union(self): + # overlapping + left = self.rng[:10] + right = self.rng[5:10] + + the_union = left.union(right) + tm.assertIsInstance(the_union, DatetimeIndex) + + # non-overlapping, gap in middle + left = self.rng[:5] + right = self.rng[10:] + + the_union = left.union(right) + tm.assertIsInstance(the_union, Index) + + # non-overlapping, no gap + left = self.rng[:5] + right = self.rng[5:10] + + the_union = left.union(right) + tm.assertIsInstance(the_union, DatetimeIndex) + + # order does not matter + self.assert_index_equal(right.union(left), the_union) + + # overlapping, but different offset + rng = date_range(START, END, freq=BMonthEnd()) + + the_union = self.rng.union(rng) + tm.assertIsInstance(the_union, DatetimeIndex) + + def test_outer_join(self): + # should just behave as union + + # overlapping + left = self.rng[:10] + right = self.rng[5:10] + + the_join = left.join(right, how='outer') + tm.assertIsInstance(the_join, DatetimeIndex) + + # non-overlapping, gap in middle + left = self.rng[:5] + right = self.rng[10:] + + the_join = left.join(right, how='outer') + tm.assertIsInstance(the_join, DatetimeIndex) + self.assertIsNone(the_join.freq) + + # non-overlapping, no gap + left = self.rng[:5] + right = self.rng[5:10] + + the_join = left.join(right, how='outer') + tm.assertIsInstance(the_join, DatetimeIndex) + + # overlapping, but different offset + rng = date_range(START, END, freq=BMonthEnd()) + + the_join = self.rng.join(rng, how='outer') + tm.assertIsInstance(the_join, DatetimeIndex) + self.assertIsNone(the_join.freq) + + def test_intersection_bug(self): + # GH #771 + a = cdate_range('11/30/2011', '12/31/2011') + b = cdate_range('12/10/2011', '12/20/2011') + result = a.intersection(b) + self.assert_index_equal(result, b) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index bf1f82b90d5d6..af749963146c6 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -12,7 +12,6 @@ from pandas import tslib from pandas.tseries import tools from pandas.tseries.tools import normalize_date -from pandas.tseries.util import pivot_annual, isleapyear from pandas.compat import lmap from pandas.compat.numpy import np_array_datetime64_compat from pandas.types.common import is_datetime64_ns_dtype @@ -1382,7 +1381,7 @@ def test_parsers_iso8601(self): raise Exception(date_str) -class TestTsUtil(tm.TestCase): +class TestArrayToDatetime(tm.TestCase): def test_try_parse_dates(self): from dateutil.parser import parse @@ -1392,8 +1391,6 @@ def test_try_parse_dates(self): expected = [parse(d, dayfirst=True) for d in arr] self.assertTrue(np.array_equal(result, expected)) - -class TestArrayToDatetime(tm.TestCase): def test_parsing_valid_dates(self): arr = np.array(['01-01-2013', '01-02-2013'], dtype=object) self.assert_numpy_array_equal( @@ -1508,109 +1505,6 @@ def test_coerce_of_invalid_datetimes(self): ) -class TestPivotAnnual(tm.TestCase): - """ - New pandas of scikits.timeseries pivot_annual - """ - - def test_daily(self): - rng = date_range('1/1/2000', '12/31/2004', freq='D') - ts = Series(np.random.randn(len(rng)), index=rng) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - annual = pivot_annual(ts, 'D') - - doy = ts.index.dayofyear - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - doy[(~isleapyear(ts.index.year)) & (doy >= 60)] += 1 - - for i in range(1, 367): - subset = ts[doy == i] - subset.index = [x.year for x in subset.index] - - result = annual[i].dropna() - tm.assert_series_equal(result, subset, check_names=False) - self.assertEqual(result.name, i) - - # check leap days - leaps = ts[(ts.index.month == 2) & (ts.index.day == 29)] - day = leaps.index.dayofyear[0] - leaps.index = leaps.index.year - leaps.name = 60 - tm.assert_series_equal(annual[day].dropna(), leaps) - - def test_hourly(self): - rng_hourly = date_range('1/1/1994', periods=(18 * 8760 + 4 * 24), - freq='H') - data_hourly = np.random.randint(100, 350, rng_hourly.size) - ts_hourly = Series(data_hourly, index=rng_hourly) - - grouped = ts_hourly.groupby(ts_hourly.index.year) - hoy = grouped.apply(lambda x: x.reset_index(drop=True)) - hoy = hoy.index.droplevel(0).values - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - hoy[~isleapyear(ts_hourly.index.year) & (hoy >= 1416)] += 24 - hoy += 1 - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - annual = pivot_annual(ts_hourly) - - ts_hourly = ts_hourly.astype(float) - for i in [1, 1416, 1417, 1418, 1439, 1440, 1441, 8784]: - subset = ts_hourly[hoy == i] - subset.index = [x.year for x in subset.index] - - result = annual[i].dropna() - tm.assert_series_equal(result, subset, check_names=False) - self.assertEqual(result.name, i) - - leaps = ts_hourly[(ts_hourly.index.month == 2) & ( - ts_hourly.index.day == 29) & (ts_hourly.index.hour == 0)] - hour = leaps.index.dayofyear[0] * 24 - 23 - leaps.index = leaps.index.year - leaps.name = 1417 - tm.assert_series_equal(annual[hour].dropna(), leaps) - - def test_weekly(self): - pass - - def test_monthly(self): - rng = date_range('1/1/2000', '12/31/2004', freq='M') - ts = Series(np.random.randn(len(rng)), index=rng) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - annual = pivot_annual(ts, 'M') - - month = ts.index.month - for i in range(1, 13): - subset = ts[month == i] - subset.index = [x.year for x in subset.index] - result = annual[i].dropna() - tm.assert_series_equal(result, subset, check_names=False) - self.assertEqual(result.name, i) - - def test_period_monthly(self): - pass - - def test_period_daily(self): - pass - - def test_period_weekly(self): - pass - - def test_isleapyear_deprecate(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.assertTrue(isleapyear(2000)) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.assertFalse(isleapyear(2001)) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.assertTrue(isleapyear(2004)) - - def test_normalize_date(): value = date(2012, 9, 7) diff --git a/pandas/tools/tests/test_pivot.py b/pandas/tools/tests/test_pivot.py index 40b46c5413c8f..7f2bb7e724362 100644 --- a/pandas/tools/tests/test_pivot.py +++ b/pandas/tools/tests/test_pivot.py @@ -3,11 +3,12 @@ import numpy as np import pandas as pd -from pandas import DataFrame, Series, Index, MultiIndex, Grouper +from pandas import DataFrame, Series, Index, MultiIndex, Grouper, date_range from pandas.tools.merge import concat from pandas.tools.pivot import pivot_table, crosstab from pandas.compat import range, product import pandas.util.testing as tm +from pandas.tseries.util import pivot_annual, isleapyear class TestPivotTable(tm.TestCase): @@ -1319,3 +1320,106 @@ def test_crosstab_with_numpy_size(self): index=expected_index, columns=expected_column) tm.assert_frame_equal(result, expected) + + +class TestPivotAnnual(tm.TestCase): + """ + New pandas of scikits.timeseries pivot_annual + """ + + def test_daily(self): + rng = date_range('1/1/2000', '12/31/2004', freq='D') + ts = Series(np.random.randn(len(rng)), index=rng) + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + annual = pivot_annual(ts, 'D') + + doy = ts.index.dayofyear + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + doy[(~isleapyear(ts.index.year)) & (doy >= 60)] += 1 + + for i in range(1, 367): + subset = ts[doy == i] + subset.index = [x.year for x in subset.index] + + result = annual[i].dropna() + tm.assert_series_equal(result, subset, check_names=False) + self.assertEqual(result.name, i) + + # check leap days + leaps = ts[(ts.index.month == 2) & (ts.index.day == 29)] + day = leaps.index.dayofyear[0] + leaps.index = leaps.index.year + leaps.name = 60 + tm.assert_series_equal(annual[day].dropna(), leaps) + + def test_hourly(self): + rng_hourly = date_range('1/1/1994', periods=(18 * 8760 + 4 * 24), + freq='H') + data_hourly = np.random.randint(100, 350, rng_hourly.size) + ts_hourly = Series(data_hourly, index=rng_hourly) + + grouped = ts_hourly.groupby(ts_hourly.index.year) + hoy = grouped.apply(lambda x: x.reset_index(drop=True)) + hoy = hoy.index.droplevel(0).values + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + hoy[~isleapyear(ts_hourly.index.year) & (hoy >= 1416)] += 24 + hoy += 1 + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + annual = pivot_annual(ts_hourly) + + ts_hourly = ts_hourly.astype(float) + for i in [1, 1416, 1417, 1418, 1439, 1440, 1441, 8784]: + subset = ts_hourly[hoy == i] + subset.index = [x.year for x in subset.index] + + result = annual[i].dropna() + tm.assert_series_equal(result, subset, check_names=False) + self.assertEqual(result.name, i) + + leaps = ts_hourly[(ts_hourly.index.month == 2) & ( + ts_hourly.index.day == 29) & (ts_hourly.index.hour == 0)] + hour = leaps.index.dayofyear[0] * 24 - 23 + leaps.index = leaps.index.year + leaps.name = 1417 + tm.assert_series_equal(annual[hour].dropna(), leaps) + + def test_weekly(self): + pass + + def test_monthly(self): + rng = date_range('1/1/2000', '12/31/2004', freq='M') + ts = Series(np.random.randn(len(rng)), index=rng) + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + annual = pivot_annual(ts, 'M') + + month = ts.index.month + for i in range(1, 13): + subset = ts[month == i] + subset.index = [x.year for x in subset.index] + result = annual[i].dropna() + tm.assert_series_equal(result, subset, check_names=False) + self.assertEqual(result.name, i) + + def test_period_monthly(self): + pass + + def test_period_daily(self): + pass + + def test_period_weekly(self): + pass + + def test_isleapyear_deprecate(self): + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + self.assertTrue(isleapyear(2000)) + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + self.assertFalse(isleapyear(2001)) + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + self.assertTrue(isleapyear(2004)) diff --git a/setup.py b/setup.py index 4d6bb76fd6b7c..3c2617da18eae 100755 --- a/setup.py +++ b/setup.py @@ -642,6 +642,8 @@ def pxd(name): 'pandas.tests.frame', 'pandas.tests.indexes', 'pandas.tests.indexes.datetimes', + 'pandas.tests.indexes.timedeltas', + 'pandas.tests.indexes.period', 'pandas.tests.groupby', 'pandas.tests.series', 'pandas.tests.formats', From 153da508a536a3ef203d9cd315c67b5fd3022a51 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 7 Feb 2017 20:24:09 -0500 Subject: [PATCH 023/353] TST/CLN: reorg groupby tests (#15336) --- pandas/tests/groupby/common.py | 52 + pandas/tests/groupby/test_aggregate.py | 336 ++++- pandas/tests/groupby/test_categorical.py | 291 ++-- pandas/tests/groupby/test_groupby.py | 1670 +--------------------- pandas/tests/groupby/test_misc.py | 101 ++ pandas/tests/groupby/test_timegrouper.py | 609 ++++++++ pandas/tests/groupby/test_transform.py | 494 +++++++ 7 files changed, 1731 insertions(+), 1822 deletions(-) create mode 100644 pandas/tests/groupby/common.py create mode 100644 pandas/tests/groupby/test_misc.py create mode 100644 pandas/tests/groupby/test_timegrouper.py create mode 100644 pandas/tests/groupby/test_transform.py diff --git a/pandas/tests/groupby/common.py b/pandas/tests/groupby/common.py new file mode 100644 index 0000000000000..8a70777d08682 --- /dev/null +++ b/pandas/tests/groupby/common.py @@ -0,0 +1,52 @@ +""" Base setup """ + +import numpy as np +from pandas.util import testing as tm +from pandas import DataFrame, MultiIndex + + +class MixIn(object): + + def setUp(self): + self.ts = tm.makeTimeSeries() + + self.seriesd = tm.getSeriesData() + self.tsd = tm.getTimeSeriesData() + self.frame = DataFrame(self.seriesd) + self.tsframe = DataFrame(self.tsd) + + self.df = DataFrame( + {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], + 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], + 'C': np.random.randn(8), + 'D': np.random.randn(8)}) + + self.df_mixed_floats = DataFrame( + {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], + 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], + 'C': np.random.randn(8), + 'D': np.array( + np.random.randn(8), dtype='float32')}) + + index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', + 'three']], + labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=['first', 'second']) + self.mframe = DataFrame(np.random.randn(10, 3), index=index, + columns=['A', 'B', 'C']) + + self.three_group = DataFrame( + {'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar', + 'foo', 'foo', 'foo'], + 'B': ['one', 'one', 'one', 'two', 'one', 'one', 'one', 'two', + 'two', 'two', 'one'], + 'C': ['dull', 'dull', 'shiny', 'dull', 'dull', 'shiny', 'shiny', + 'dull', 'shiny', 'shiny', 'shiny'], + 'D': np.random.randn(11), + 'E': np.random.randn(11), + 'F': np.random.randn(11)}) + + +def assert_fp_equal(a, b): + assert (np.abs(a - b) < 1e-12).all() diff --git a/pandas/tests/groupby/test_aggregate.py b/pandas/tests/groupby/test_aggregate.py index 00ddd293f6014..a1fc97eb8d780 100644 --- a/pandas/tests/groupby/test_aggregate.py +++ b/pandas/tests/groupby/test_aggregate.py @@ -1,28 +1,25 @@ # -*- coding: utf-8 -*- -from __future__ import print_function -from datetime import datetime - - -from pandas import date_range -from pandas.core.index import MultiIndex -from pandas.core.api import DataFrame -from pandas.core.series import Series - -from pandas.util.testing import (assert_frame_equal, assert_series_equal - ) - -from pandas.core.groupby import (SpecificationError) -from pandas.compat import (lmap, OrderedDict) -from pandas.formats.printing import pprint_thing +""" +we test .agg behavior / note that .apply is tested +generally in test_groupby.py +""" -from pandas import compat +from __future__ import print_function +from datetime import datetime +from functools import partial -import pandas.core.common as com import numpy as np +from numpy import nan +import pandas as pd +from pandas import (date_range, MultiIndex, DataFrame, + Series, Index, bdate_range) +from pandas.util.testing import assert_frame_equal, assert_series_equal +from pandas.core.groupby import SpecificationError, DataError +from pandas.compat import OrderedDict +from pandas.formats.printing import pprint_thing import pandas.util.testing as tm -import pandas as pd class TestGroupByAggregate(tm.TestCase): @@ -452,35 +449,292 @@ def bad(x): expected = data.groupby(['A', 'B']).agg(lambda x: 'foo') assert_frame_equal(result, expected) + def test_cythonized_aggers(self): + data = {'A': [0, 0, 0, 0, 1, 1, 1, 1, 1, 1., nan, nan], + 'B': ['A', 'B'] * 6, + 'C': np.random.randn(12)} + df = DataFrame(data) + df.loc[2:10:2, 'C'] = nan + + def _testit(name): + + op = lambda x: getattr(x, name)() + + # single column + grouped = df.drop(['B'], axis=1).groupby('A') + exp = {} + for cat, group in grouped: + exp[cat] = op(group['C']) + exp = DataFrame({'C': exp}) + exp.index.name = 'A' + result = op(grouped) + assert_frame_equal(result, exp) + + # multiple columns + grouped = df.groupby(['A', 'B']) + expd = {} + for (cat1, cat2), group in grouped: + expd.setdefault(cat1, {})[cat2] = op(group['C']) + exp = DataFrame(expd).T.stack(dropna=False) + exp.index.names = ['A', 'B'] + exp.name = 'C' + + result = op(grouped)['C'] + if not tm._incompat_bottleneck_version(name): + assert_series_equal(result, exp) + + _testit('count') + _testit('sum') + _testit('std') + _testit('var') + _testit('sem') + _testit('mean') + _testit('median') + _testit('prod') + _testit('min') + _testit('max') + + def test_cython_agg_boolean(self): + frame = DataFrame({'a': np.random.randint(0, 5, 50), + 'b': np.random.randint(0, 2, 50).astype('bool')}) + result = frame.groupby('a')['b'].mean() + expected = frame.groupby('a')['b'].agg(np.mean) -def assert_fp_equal(a, b): - assert (np.abs(a - b) < 1e-12).all() + assert_series_equal(result, expected) + def test_cython_agg_nothing_to_agg(self): + frame = DataFrame({'a': np.random.randint(0, 5, 50), + 'b': ['foo', 'bar'] * 25}) + self.assertRaises(DataError, frame.groupby('a')['b'].mean) + + frame = DataFrame({'a': np.random.randint(0, 5, 50), + 'b': ['foo', 'bar'] * 25}) + self.assertRaises(DataError, frame[['b']].groupby(frame['a']).mean) + + def test_cython_agg_nothing_to_agg_with_dates(self): + frame = DataFrame({'a': np.random.randint(0, 5, 50), + 'b': ['foo', 'bar'] * 25, + 'dates': pd.date_range('now', periods=50, + freq='T')}) + with tm.assertRaisesRegexp(DataError, "No numeric types to aggregate"): + frame.groupby('b').dates.mean() + + def test_cython_agg_frame_columns(self): + # #2113 + df = DataFrame({'x': [1, 2, 3], 'y': [3, 4, 5]}) + + df.groupby(level=0, axis='columns').mean() + df.groupby(level=0, axis='columns').mean() + df.groupby(level=0, axis='columns').mean() + df.groupby(level=0, axis='columns').mean() + + def test_cython_fail_agg(self): + dr = bdate_range('1/1/2000', periods=50) + ts = Series(['A', 'B', 'C', 'D', 'E'] * 10, index=dr) + + grouped = ts.groupby(lambda x: x.month) + summed = grouped.sum() + expected = grouped.agg(np.sum) + assert_series_equal(summed, expected) + + def test_agg_consistency(self): + # agg with ([]) and () not consistent + # GH 6715 + + def P1(a): + try: + return np.percentile(a.dropna(), q=1) + except: + return np.nan + + import datetime as dt + df = DataFrame({'col1': [1, 2, 3, 4], + 'col2': [10, 25, 26, 31], + 'date': [dt.date(2013, 2, 10), dt.date(2013, 2, 10), + dt.date(2013, 2, 11), dt.date(2013, 2, 11)]}) + + g = df.groupby('date') + + expected = g.agg([P1]) + expected.columns = expected.columns.levels[0] + + result = g.agg(P1) + assert_frame_equal(result, expected) -def _check_groupby(df, result, keys, field, f=lambda x: x.sum()): - tups = lmap(tuple, df[keys].values) - tups = com._asarray_tuplesafe(tups) - expected = f(df.groupby(tups)[field]) - for k, v in compat.iteritems(expected): - assert (result[k] == v) + def test_wrap_agg_out(self): + grouped = self.three_group.groupby(['A', 'B']) + def func(ser): + if ser.dtype == np.object: + raise TypeError + else: + return ser.sum() -def test_decons(): - from pandas.core.groupby import decons_group_index, get_group_index + result = grouped.aggregate(func) + exp_grouped = self.three_group.loc[:, self.three_group.columns != 'C'] + expected = exp_grouped.groupby(['A', 'B']).aggregate(func) + assert_frame_equal(result, expected) + + def test_agg_multiple_functions_maintain_order(self): + # GH #610 + funcs = [('mean', np.mean), ('max', np.max), ('min', np.min)] + result = self.df.groupby('A')['C'].agg(funcs) + exp_cols = Index(['mean', 'max', 'min']) + + self.assert_index_equal(result.columns, exp_cols) + + def test_multiple_functions_tuples_and_non_tuples(self): + # #1359 - def testit(label_list, shape): - group_index = get_group_index(label_list, shape, sort=True, xnull=True) - label_list2 = decons_group_index(group_index, shape) + funcs = [('foo', 'mean'), 'std'] + ex_funcs = [('foo', 'mean'), ('std', 'std')] - for a, b in zip(label_list, label_list2): - assert (np.array_equal(a, b)) + result = self.df.groupby('A')['C'].agg(funcs) + expected = self.df.groupby('A')['C'].agg(ex_funcs) + assert_frame_equal(result, expected) + + result = self.df.groupby('A').agg(funcs) + expected = self.df.groupby('A').agg(ex_funcs) + assert_frame_equal(result, expected) + + def test_agg_multiple_functions_too_many_lambdas(self): + grouped = self.df.groupby('A') + funcs = ['mean', lambda x: x.mean(), lambda x: x.std()] + + self.assertRaises(SpecificationError, grouped.agg, funcs) + + def test_more_flexible_frame_multi_function(self): + from pandas import concat + + grouped = self.df.groupby('A') - shape = (4, 5, 6) - label_list = [np.tile([0, 1, 2, 3, 0, 1, 2, 3], 100), np.tile( - [0, 2, 4, 3, 0, 1, 2, 3], 100), np.tile( - [5, 1, 0, 2, 3, 0, 5, 4], 100)] - testit(label_list, shape) + exmean = grouped.agg(OrderedDict([['C', np.mean], ['D', np.mean]])) + exstd = grouped.agg(OrderedDict([['C', np.std], ['D', np.std]])) - shape = (10000, 10000) - label_list = [np.tile(np.arange(10000), 5), np.tile(np.arange(10000), 5)] - testit(label_list, shape) + expected = concat([exmean, exstd], keys=['mean', 'std'], axis=1) + expected = expected.swaplevel(0, 1, axis=1).sort_index(level=0, axis=1) + + d = OrderedDict([['C', [np.mean, np.std]], ['D', [np.mean, np.std]]]) + result = grouped.aggregate(d) + + assert_frame_equal(result, expected) + + # be careful + result = grouped.aggregate(OrderedDict([['C', np.mean], + ['D', [np.mean, np.std]]])) + expected = grouped.aggregate(OrderedDict([['C', np.mean], + ['D', [np.mean, np.std]]])) + assert_frame_equal(result, expected) + + def foo(x): + return np.mean(x) + + def bar(x): + return np.std(x, ddof=1) + + d = OrderedDict([['C', np.mean], ['D', OrderedDict( + [['foo', np.mean], ['bar', np.std]])]]) + result = grouped.aggregate(d) + + d = OrderedDict([['C', [np.mean]], ['D', [foo, bar]]]) + expected = grouped.aggregate(d) + + assert_frame_equal(result, expected) + + def test_multi_function_flexible_mix(self): + # GH #1268 + grouped = self.df.groupby('A') + + d = OrderedDict([['C', OrderedDict([['foo', 'mean'], [ + 'bar', 'std' + ]])], ['D', 'sum']]) + result = grouped.aggregate(d) + d2 = OrderedDict([['C', OrderedDict([['foo', 'mean'], [ + 'bar', 'std' + ]])], ['D', ['sum']]]) + result2 = grouped.aggregate(d2) + + d3 = OrderedDict([['C', OrderedDict([['foo', 'mean'], [ + 'bar', 'std' + ]])], ['D', {'sum': 'sum'}]]) + expected = grouped.aggregate(d3) + + assert_frame_equal(result, expected) + assert_frame_equal(result2, expected) + + def test_agg_callables(self): + # GH 7929 + df = DataFrame({'foo': [1, 2], 'bar': [3, 4]}).astype(np.int64) + + class fn_class(object): + + def __call__(self, x): + return sum(x) + + equiv_callables = [sum, np.sum, lambda x: sum(x), lambda x: x.sum(), + partial(sum), fn_class()] + + expected = df.groupby("foo").agg(sum) + for ecall in equiv_callables: + result = df.groupby('foo').agg(ecall) + assert_frame_equal(result, expected) + + def test__cython_agg_general(self): + ops = [('mean', np.mean), + ('median', np.median), + ('var', np.var), + ('add', np.sum), + ('prod', np.prod), + ('min', np.min), + ('max', np.max), + ('first', lambda x: x.iloc[0]), + ('last', lambda x: x.iloc[-1]), ] + df = DataFrame(np.random.randn(1000)) + labels = np.random.randint(0, 50, size=1000).astype(float) + + for op, targop in ops: + result = df.groupby(labels)._cython_agg_general(op) + expected = df.groupby(labels).agg(targop) + try: + tm.assert_frame_equal(result, expected) + except BaseException as exc: + exc.args += ('operation: %s' % op, ) + raise + + def test_cython_agg_empty_buckets(self): + ops = [('mean', np.mean), + ('median', lambda x: np.median(x) if len(x) > 0 else np.nan), + ('var', lambda x: np.var(x, ddof=1)), + ('add', lambda x: np.sum(x) if len(x) > 0 else np.nan), + ('prod', np.prod), + ('min', np.min), + ('max', np.max), ] + + df = pd.DataFrame([11, 12, 13]) + grps = range(0, 55, 5) + + for op, targop in ops: + result = df.groupby(pd.cut(df[0], grps))._cython_agg_general(op) + expected = df.groupby(pd.cut(df[0], grps)).agg(lambda x: targop(x)) + try: + tm.assert_frame_equal(result, expected) + except BaseException as exc: + exc.args += ('operation: %s' % op,) + raise + + def test_agg_over_numpy_arrays(self): + # GH 3788 + df = pd.DataFrame([[1, np.array([10, 20, 30])], + [1, np.array([40, 50, 60])], + [2, np.array([20, 30, 40])]], + columns=['category', 'arraydata']) + result = df.groupby('category').agg(sum) + + expected_data = [[np.array([50, 70, 90])], [np.array([20, 30, 40])]] + expected_index = pd.Index([1, 2], name='category') + expected_column = ['arraydata'] + expected = pd.DataFrame(expected_data, + index=expected_index, + columns=expected_column) + + assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 605b327208a03..8952b520f4f78 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -1,67 +1,19 @@ # -*- coding: utf-8 -*- from __future__ import print_function -from numpy import nan - -from pandas.core.index import Index, MultiIndex, CategoricalIndex -from pandas.core.api import DataFrame, Categorical - -from pandas.core.series import Series - -from pandas.util.testing import (assert_frame_equal, assert_series_equal - ) +from datetime import datetime -from pandas.compat import (lmap) - -from pandas import compat - -import pandas.core.common as com import numpy as np +from numpy import nan -import pandas.util.testing as tm import pandas as pd +from pandas import (Index, MultiIndex, CategoricalIndex, + DataFrame, Categorical, Series) +from pandas.util.testing import assert_frame_equal, assert_series_equal +import pandas.util.testing as tm +from .common import MixIn -class TestGroupByCategorical(tm.TestCase): - - def setUp(self): - self.ts = tm.makeTimeSeries() - - self.seriesd = tm.getSeriesData() - self.tsd = tm.getTimeSeriesData() - self.frame = DataFrame(self.seriesd) - self.tsframe = DataFrame(self.tsd) - - self.df = DataFrame( - {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], - 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], - 'C': np.random.randn(8), - 'D': np.random.randn(8)}) - - self.df_mixed_floats = DataFrame( - {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], - 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], - 'C': np.random.randn(8), - 'D': np.array( - np.random.randn(8), dtype='float32')}) - - index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', - 'three']], - labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], - [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], - names=['first', 'second']) - self.mframe = DataFrame(np.random.randn(10, 3), index=index, - columns=['A', 'B', 'C']) - - self.three_group = DataFrame( - {'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar', - 'foo', 'foo', 'foo'], - 'B': ['one', 'one', 'one', 'two', 'one', 'one', 'one', 'two', - 'two', 'two', 'one'], - 'C': ['dull', 'dull', 'shiny', 'dull', 'dull', 'shiny', 'shiny', - 'dull', 'shiny', 'shiny', 'shiny'], - 'D': np.random.randn(11), - 'E': np.random.randn(11), - 'F': np.random.randn(11)}) +class TestGroupByCategorical(MixIn, tm.TestCase): def test_level_groupby_get_group(self): # GH15155 @@ -210,8 +162,9 @@ def test_groupby_datetime_categorical(self): def test_groupby_categorical_index(self): + s = np.random.RandomState(12345) levels = ['foo', 'bar', 'baz', 'qux'] - codes = np.random.randint(0, 4, size=20) + codes = s.randint(0, 4, size=20) cats = Categorical.from_codes(codes, levels, ordered=True) df = DataFrame( np.repeat( @@ -264,70 +217,15 @@ def test_groupby_unstack_categorical(self): expected = pd.Series([6, 4], index=pd.Index(['X', 'Y'], name='artist')) tm.assert_series_equal(result, expected) - def test_groupby_categorical_unequal_len(self): + def test_groupby_bins_unequal_len(self): # GH3011 series = Series([np.nan, np.nan, 1, 1, 2, 2, 3, 3, 4, 4]) - # The raises only happens with categorical, not with series of types - # category bins = pd.cut(series.dropna().values, 4) # len(bins) != len(series) here - self.assertRaises(ValueError, lambda: series.groupby(bins).mean()) - - def test_groupby_categorical_two_columns(self): - - # https://github.com/pandas-dev/pandas/issues/8138 - d = {'cat': - pd.Categorical(["a", "b", "a", "b"], categories=["a", "b", "c"], - ordered=True), - 'ints': [1, 1, 2, 2], - 'val': [10, 20, 30, 40]} - test = pd.DataFrame(d) - - # Grouping on a single column - groups_single_key = test.groupby("cat") - res = groups_single_key.agg('mean') - - exp_index = pd.CategoricalIndex(["a", "b", "c"], name="cat", - ordered=True) - exp = DataFrame({"ints": [1.5, 1.5, np.nan], "val": [20, 30, np.nan]}, - index=exp_index) - tm.assert_frame_equal(res, exp) - - # Grouping on two columns - groups_double_key = test.groupby(["cat", "ints"]) - res = groups_double_key.agg('mean') - exp = DataFrame({"val": [10, 30, 20, 40, np.nan, np.nan], - "cat": pd.Categorical(["a", "a", "b", "b", "c", "c"], - ordered=True), - "ints": [1, 2, 1, 2, 1, 2]}).set_index(["cat", "ints" - ]) - tm.assert_frame_equal(res, exp) - - # GH 10132 - for key in [('a', 1), ('b', 2), ('b', 1), ('a', 2)]: - c, i = key - result = groups_double_key.get_group(key) - expected = test[(test.cat == c) & (test.ints == i)] - assert_frame_equal(result, expected) - - d = {'C1': [3, 3, 4, 5], 'C2': [1, 2, 3, 4], 'C3': [10, 100, 200, 34]} - test = pd.DataFrame(d) - values = pd.cut(test['C1'], [1, 2, 3, 6]) - values.name = "cat" - groups_double_key = test.groupby([values, 'C2']) - - res = groups_double_key.agg('mean') - nan = np.nan - idx = MultiIndex.from_product( - [Categorical(["(1, 2]", "(2, 3]", "(3, 6]"], ordered=True), - [1, 2, 3, 4]], - names=["cat", "C2"]) - exp = DataFrame({"C1": [nan, nan, nan, nan, 3, 3, - nan, nan, nan, nan, 4, 5], - "C3": [nan, nan, nan, nan, 10, 100, - nan, nan, nan, nan, 200, 34]}, index=idx) - tm.assert_frame_equal(res, exp) + def f(): + series.groupby(bins).mean() + self.assertRaises(ValueError, f) def test_groupby_multi_categorical_as_index(self): # GH13204 @@ -454,35 +352,148 @@ def test_groupby_categorical_no_compress(self): exp = np.array([1, 2, 4, np.nan]) self.assert_numpy_array_equal(result, exp) + def test_groupby_sort_categorical(self): + # dataframe groupby sort was being ignored # GH 8868 + df = DataFrame([['(7.5, 10]', 10, 10], + ['(7.5, 10]', 8, 20], + ['(2.5, 5]', 5, 30], + ['(5, 7.5]', 6, 40], + ['(2.5, 5]', 4, 50], + ['(0, 2.5]', 1, 60], + ['(5, 7.5]', 7, 70]], columns=['range', 'foo', 'bar']) + df['range'] = Categorical(df['range'], ordered=True) + index = CategoricalIndex(['(0, 2.5]', '(2.5, 5]', '(5, 7.5]', + '(7.5, 10]'], name='range', ordered=True) + result_sort = DataFrame([[1, 60], [5, 30], [6, 40], [10, 10]], + columns=['foo', 'bar'], index=index) + + col = 'range' + assert_frame_equal(result_sort, df.groupby(col, sort=True).first()) + # when categories is ordered, group is ordered by category's order + assert_frame_equal(result_sort, df.groupby(col, sort=False).first()) + + df['range'] = Categorical(df['range'], ordered=False) + index = CategoricalIndex(['(0, 2.5]', '(2.5, 5]', '(5, 7.5]', + '(7.5, 10]'], name='range') + result_sort = DataFrame([[1, 60], [5, 30], [6, 40], [10, 10]], + columns=['foo', 'bar'], index=index) + + index = CategoricalIndex(['(7.5, 10]', '(2.5, 5]', '(5, 7.5]', + '(0, 2.5]'], + categories=['(7.5, 10]', '(2.5, 5]', + '(5, 7.5]', '(0, 2.5]'], + name='range') + result_nosort = DataFrame([[10, 10], [5, 30], [6, 40], [1, 60]], + index=index, columns=['foo', 'bar']) + + col = 'range' + # this is an unordered categorical, but we allow this #### + assert_frame_equal(result_sort, df.groupby(col, sort=True).first()) + assert_frame_equal(result_nosort, df.groupby(col, sort=False).first()) + + def test_groupby_sort_categorical_datetimelike(self): + # GH10505 + + # use same data as test_groupby_sort_categorical, which category is + # corresponding to datetime.month + df = DataFrame({'dt': [datetime(2011, 7, 1), datetime(2011, 7, 1), + datetime(2011, 2, 1), datetime(2011, 5, 1), + datetime(2011, 2, 1), datetime(2011, 1, 1), + datetime(2011, 5, 1)], + 'foo': [10, 8, 5, 6, 4, 1, 7], + 'bar': [10, 20, 30, 40, 50, 60, 70]}, + columns=['dt', 'foo', 'bar']) + + # ordered=True + df['dt'] = Categorical(df['dt'], ordered=True) + index = [datetime(2011, 1, 1), datetime(2011, 2, 1), + datetime(2011, 5, 1), datetime(2011, 7, 1)] + result_sort = DataFrame( + [[1, 60], [5, 30], [6, 40], [10, 10]], columns=['foo', 'bar']) + result_sort.index = CategoricalIndex(index, name='dt', ordered=True) + + index = [datetime(2011, 7, 1), datetime(2011, 2, 1), + datetime(2011, 5, 1), datetime(2011, 1, 1)] + result_nosort = DataFrame([[10, 10], [5, 30], [6, 40], [1, 60]], + columns=['foo', 'bar']) + result_nosort.index = CategoricalIndex(index, categories=index, + name='dt', ordered=True) + + col = 'dt' + assert_frame_equal(result_sort, df.groupby(col, sort=True).first()) + # when categories is ordered, group is ordered by category's order + assert_frame_equal(result_sort, df.groupby(col, sort=False).first()) + + # ordered = False + df['dt'] = Categorical(df['dt'], ordered=False) + index = [datetime(2011, 1, 1), datetime(2011, 2, 1), + datetime(2011, 5, 1), datetime(2011, 7, 1)] + result_sort = DataFrame( + [[1, 60], [5, 30], [6, 40], [10, 10]], columns=['foo', 'bar']) + result_sort.index = CategoricalIndex(index, name='dt') + + index = [datetime(2011, 7, 1), datetime(2011, 2, 1), + datetime(2011, 5, 1), datetime(2011, 1, 1)] + result_nosort = DataFrame([[10, 10], [5, 30], [6, 40], [1, 60]], + columns=['foo', 'bar']) + result_nosort.index = CategoricalIndex(index, categories=index, + name='dt') + + col = 'dt' + assert_frame_equal(result_sort, df.groupby(col, sort=True).first()) + assert_frame_equal(result_nosort, df.groupby(col, sort=False).first()) -def assert_fp_equal(a, b): - assert (np.abs(a - b) < 1e-12).all() - + def test_groupby_categorical_two_columns(self): -def _check_groupby(df, result, keys, field, f=lambda x: x.sum()): - tups = lmap(tuple, df[keys].values) - tups = com._asarray_tuplesafe(tups) - expected = f(df.groupby(tups)[field]) - for k, v in compat.iteritems(expected): - assert (result[k] == v) + # https://github.com/pandas-dev/pandas/issues/8138 + d = {'cat': + pd.Categorical(["a", "b", "a", "b"], categories=["a", "b", "c"], + ordered=True), + 'ints': [1, 1, 2, 2], + 'val': [10, 20, 30, 40]} + test = pd.DataFrame(d) + # Grouping on a single column + groups_single_key = test.groupby("cat") + res = groups_single_key.agg('mean') -def test_decons(): - from pandas.core.groupby import decons_group_index, get_group_index + exp_index = pd.CategoricalIndex(["a", "b", "c"], name="cat", + ordered=True) + exp = DataFrame({"ints": [1.5, 1.5, np.nan], "val": [20, 30, np.nan]}, + index=exp_index) + tm.assert_frame_equal(res, exp) - def testit(label_list, shape): - group_index = get_group_index(label_list, shape, sort=True, xnull=True) - label_list2 = decons_group_index(group_index, shape) + # Grouping on two columns + groups_double_key = test.groupby(["cat", "ints"]) + res = groups_double_key.agg('mean') + exp = DataFrame({"val": [10, 30, 20, 40, np.nan, np.nan], + "cat": pd.Categorical(["a", "a", "b", "b", "c", "c"], + ordered=True), + "ints": [1, 2, 1, 2, 1, 2]}).set_index(["cat", "ints" + ]) + tm.assert_frame_equal(res, exp) - for a, b in zip(label_list, label_list2): - assert (np.array_equal(a, b)) + # GH 10132 + for key in [('a', 1), ('b', 2), ('b', 1), ('a', 2)]: + c, i = key + result = groups_double_key.get_group(key) + expected = test[(test.cat == c) & (test.ints == i)] + assert_frame_equal(result, expected) - shape = (4, 5, 6) - label_list = [np.tile([0, 1, 2, 3, 0, 1, 2, 3], 100), np.tile( - [0, 2, 4, 3, 0, 1, 2, 3], 100), np.tile( - [5, 1, 0, 2, 3, 0, 5, 4], 100)] - testit(label_list, shape) + d = {'C1': [3, 3, 4, 5], 'C2': [1, 2, 3, 4], 'C3': [10, 100, 200, 34]} + test = pd.DataFrame(d) + values = pd.cut(test['C1'], [1, 2, 3, 6]) + values.name = "cat" + groups_double_key = test.groupby([values, 'C2']) - shape = (10000, 10000) - label_list = [np.tile(np.arange(10000), 5), np.tile(np.arange(10000), 5)] - testit(label_list, shape) + res = groups_double_key.agg('mean') + nan = np.nan + idx = MultiIndex.from_product( + [Categorical(["(1, 2]", "(2, 3]", "(3, 6]"], ordered=True), + [1, 2, 3, 4]], + names=["cat", "C2"]) + exp = DataFrame({"C1": [nan, nan, nan, nan, 3, 3, + nan, nan, nan, nan, 4, 5], + "C3": [nan, nan, nan, nan, 10, 100, + nan, nan, nan, nan, 200, 34]}, index=idx) + tm.assert_frame_equal(res, exp) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index df4707fcef3f0..458e869130190 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1,20 +1,13 @@ # -*- coding: utf-8 -*- from __future__ import print_function -import nose from string import ascii_lowercase from datetime import datetime from numpy import nan -from pandas.types.common import _ensure_platform_int -from pandas import date_range, bdate_range, Timestamp, isnull -from pandas.core.index import Index, MultiIndex, CategoricalIndex -from pandas.core.api import Categorical, DataFrame +from pandas import (date_range, bdate_range, Timestamp, + isnull, Index, MultiIndex, DataFrame, Series) from pandas.core.common import UnsupportedFunctionCall -from pandas.core.groupby import (SpecificationError, DataError, _nargsort, - _lexsort_indexer) -from pandas.core.series import Series -from pandas.core.config import option_context from pandas.util.testing import (assert_panel_equal, assert_frame_equal, assert_series_equal, assert_almost_equal, assert_index_equal, assertRaisesRegexp) @@ -24,57 +17,16 @@ from pandas.core.panel import Panel from pandas.tools.merge import concat from collections import defaultdict -from functools import partial import pandas.core.common as com import numpy as np import pandas.core.nanops as nanops - import pandas.util.testing as tm import pandas as pd +from .common import MixIn -class TestGroupBy(tm.TestCase): - - def setUp(self): - self.ts = tm.makeTimeSeries() - - self.seriesd = tm.getSeriesData() - self.tsd = tm.getTimeSeriesData() - self.frame = DataFrame(self.seriesd) - self.tsframe = DataFrame(self.tsd) - - self.df = DataFrame( - {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], - 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], - 'C': np.random.randn(8), - 'D': np.random.randn(8)}) - - self.df_mixed_floats = DataFrame( - {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], - 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], - 'C': np.random.randn(8), - 'D': np.array( - np.random.randn(8), dtype='float32')}) - - index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', - 'three']], - labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], - [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], - names=['first', 'second']) - self.mframe = DataFrame(np.random.randn(10, 3), index=index, - columns=['A', 'B', 'C']) - - self.three_group = DataFrame( - {'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar', - 'foo', 'foo', 'foo'], - 'B': ['one', 'one', 'one', 'two', 'one', 'one', 'one', 'two', - 'two', 'two', 'one'], - 'C': ['dull', 'dull', 'shiny', 'dull', 'dull', 'shiny', 'shiny', - 'dull', 'shiny', 'shiny', 'shiny'], - 'D': np.random.randn(11), - 'E': np.random.randn(11), - 'F': np.random.randn(11)}) +class TestGroupBy(MixIn, tm.TestCase): def test_basic(self): def checkit(dtype): @@ -774,12 +726,12 @@ def max_value(group): def test_groupby_return_type(self): # GH2893, return a reduced type - df1 = DataFrame([{"val1": 1, - "val2": 20}, {"val1": 1, - "val2": 19}, {"val1": 2, - "val2": 27}, {"val1": 2, - "val2": 12} - ]) + df1 = DataFrame( + [{"val1": 1, "val2": 20}, + {"val1": 1, "val2": 19}, + {"val1": 2, "val2": 27}, + {"val1": 2, "val2": 12} + ]) def func(dataf): return dataf["val2"] - dataf["val2"].mean() @@ -787,12 +739,12 @@ def func(dataf): result = df1.groupby("val1", squeeze=True).apply(func) tm.assertIsInstance(result, Series) - df2 = DataFrame([{"val1": 1, - "val2": 20}, {"val1": 1, - "val2": 19}, {"val1": 1, - "val2": 27}, {"val1": 1, - "val2": 12} - ]) + df2 = DataFrame( + [{"val1": 1, "val2": 20}, + {"val1": 1, "val2": 19}, + {"val1": 1, "val2": 27}, + {"val1": 1, "val2": 12} + ]) def func(dataf): return dataf["val2"] - dataf["val2"].mean() @@ -902,6 +854,7 @@ def test_get_group(self): lambda: g.get_group(('foo', 'bar', 'baz'))) def test_get_group_empty_bins(self): + d = pd.DataFrame([3, 1, 7, 6]) bins = [0, 5, 10, 15] g = d.groupby(pd.cut(d[0], bins)) @@ -1043,266 +996,6 @@ def test_basic_regression(self): grouped = result.groupby(groupings) grouped.mean() - def test_transform(self): - data = Series(np.arange(9) // 3, index=np.arange(9)) - - index = np.arange(9) - np.random.shuffle(index) - data = data.reindex(index) - - grouped = data.groupby(lambda x: x // 3) - - transformed = grouped.transform(lambda x: x * x.sum()) - self.assertEqual(transformed[7], 12) - - # GH 8046 - # make sure that we preserve the input order - - df = DataFrame( - np.arange(6, dtype='int64').reshape( - 3, 2), columns=["a", "b"], index=[0, 2, 1]) - key = [0, 0, 1] - expected = df.sort_index().groupby(key).transform( - lambda x: x - x.mean()).groupby(key).mean() - result = df.groupby(key).transform(lambda x: x - x.mean()).groupby( - key).mean() - assert_frame_equal(result, expected) - - def demean(arr): - return arr - arr.mean() - - people = DataFrame(np.random.randn(5, 5), - columns=['a', 'b', 'c', 'd', 'e'], - index=['Joe', 'Steve', 'Wes', 'Jim', 'Travis']) - key = ['one', 'two', 'one', 'two', 'one'] - result = people.groupby(key).transform(demean).groupby(key).mean() - expected = people.groupby(key).apply(demean).groupby(key).mean() - assert_frame_equal(result, expected) - - # GH 8430 - df = tm.makeTimeDataFrame() - g = df.groupby(pd.TimeGrouper('M')) - g.transform(lambda x: x - 1) - - # GH 9700 - df = DataFrame({'a': range(5, 10), 'b': range(5)}) - result = df.groupby('a').transform(max) - expected = DataFrame({'b': range(5)}) - tm.assert_frame_equal(result, expected) - - def test_transform_fast(self): - - df = DataFrame({'id': np.arange(100000) / 3, - 'val': np.random.randn(100000)}) - - grp = df.groupby('id')['val'] - - values = np.repeat(grp.mean().values, - _ensure_platform_int(grp.count().values)) - expected = pd.Series(values, index=df.index, name='val') - - result = grp.transform(np.mean) - assert_series_equal(result, expected) - - result = grp.transform('mean') - assert_series_equal(result, expected) - - # GH 12737 - df = pd.DataFrame({'grouping': [0, 1, 1, 3], 'f': [1.1, 2.1, 3.1, 4.5], - 'd': pd.date_range('2014-1-1', '2014-1-4'), - 'i': [1, 2, 3, 4]}, - columns=['grouping', 'f', 'i', 'd']) - result = df.groupby('grouping').transform('first') - - dates = [pd.Timestamp('2014-1-1'), pd.Timestamp('2014-1-2'), - pd.Timestamp('2014-1-2'), pd.Timestamp('2014-1-4')] - expected = pd.DataFrame({'f': [1.1, 2.1, 2.1, 4.5], - 'd': dates, - 'i': [1, 2, 2, 4]}, - columns=['f', 'i', 'd']) - assert_frame_equal(result, expected) - - # selection - result = df.groupby('grouping')[['f', 'i']].transform('first') - expected = expected[['f', 'i']] - assert_frame_equal(result, expected) - - # dup columns - df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=['g', 'a', 'a']) - result = df.groupby('g').transform('first') - expected = df.drop('g', axis=1) - assert_frame_equal(result, expected) - - def test_transform_broadcast(self): - grouped = self.ts.groupby(lambda x: x.month) - result = grouped.transform(np.mean) - - self.assert_index_equal(result.index, self.ts.index) - for _, gp in grouped: - assert_fp_equal(result.reindex(gp.index), gp.mean()) - - grouped = self.tsframe.groupby(lambda x: x.month) - result = grouped.transform(np.mean) - self.assert_index_equal(result.index, self.tsframe.index) - for _, gp in grouped: - agged = gp.mean() - res = result.reindex(gp.index) - for col in self.tsframe: - assert_fp_equal(res[col], agged[col]) - - # group columns - grouped = self.tsframe.groupby({'A': 0, 'B': 0, 'C': 1, 'D': 1}, - axis=1) - result = grouped.transform(np.mean) - self.assert_index_equal(result.index, self.tsframe.index) - self.assert_index_equal(result.columns, self.tsframe.columns) - for _, gp in grouped: - agged = gp.mean(1) - res = result.reindex(columns=gp.columns) - for idx in gp.index: - assert_fp_equal(res.xs(idx), agged[idx]) - - def test_transform_axis(self): - - # make sure that we are setting the axes - # correctly when on axis=0 or 1 - # in the presence of a non-monotonic indexer - # GH12713 - - base = self.tsframe.iloc[0:5] - r = len(base.index) - c = len(base.columns) - tso = DataFrame(np.random.randn(r, c), - index=base.index, - columns=base.columns, - dtype='float64') - # monotonic - ts = tso - grouped = ts.groupby(lambda x: x.weekday()) - result = ts - grouped.transform('mean') - expected = grouped.apply(lambda x: x - x.mean()) - assert_frame_equal(result, expected) - - ts = ts.T - grouped = ts.groupby(lambda x: x.weekday(), axis=1) - result = ts - grouped.transform('mean') - expected = grouped.apply(lambda x: (x.T - x.mean(1)).T) - assert_frame_equal(result, expected) - - # non-monotonic - ts = tso.iloc[[1, 0] + list(range(2, len(base)))] - grouped = ts.groupby(lambda x: x.weekday()) - result = ts - grouped.transform('mean') - expected = grouped.apply(lambda x: x - x.mean()) - assert_frame_equal(result, expected) - - ts = ts.T - grouped = ts.groupby(lambda x: x.weekday(), axis=1) - result = ts - grouped.transform('mean') - expected = grouped.apply(lambda x: (x.T - x.mean(1)).T) - assert_frame_equal(result, expected) - - def test_transform_dtype(self): - # GH 9807 - # Check transform dtype output is preserved - df = DataFrame([[1, 3], [2, 3]]) - result = df.groupby(1).transform('mean') - expected = DataFrame([[1.5], [1.5]]) - assert_frame_equal(result, expected) - - def test_transform_bug(self): - # GH 5712 - # transforming on a datetime column - df = DataFrame(dict(A=Timestamp('20130101'), B=np.arange(5))) - result = df.groupby('A')['B'].transform( - lambda x: x.rank(ascending=False)) - expected = Series(np.arange(5, 0, step=-1), name='B') - assert_series_equal(result, expected) - - def test_transform_multiple(self): - grouped = self.ts.groupby([lambda x: x.year, lambda x: x.month]) - - grouped.transform(lambda x: x * 2) - grouped.transform(np.mean) - - def test_dispatch_transform(self): - df = self.tsframe[::5].reindex(self.tsframe.index) - - grouped = df.groupby(lambda x: x.month) - - filled = grouped.fillna(method='pad') - fillit = lambda x: x.fillna(method='pad') - expected = df.groupby(lambda x: x.month).transform(fillit) - assert_frame_equal(filled, expected) - - def test_transform_select_columns(self): - f = lambda x: x.mean() - result = self.df.groupby('A')['C', 'D'].transform(f) - - selection = self.df[['C', 'D']] - expected = selection.groupby(self.df['A']).transform(f) - - assert_frame_equal(result, expected) - - def test_transform_exclude_nuisance(self): - - # this also tests orderings in transform between - # series/frame to make sure it's consistent - expected = {} - grouped = self.df.groupby('A') - expected['C'] = grouped['C'].transform(np.mean) - expected['D'] = grouped['D'].transform(np.mean) - expected = DataFrame(expected) - result = self.df.groupby('A').transform(np.mean) - - assert_frame_equal(result, expected) - - def test_transform_function_aliases(self): - result = self.df.groupby('A').transform('mean') - expected = self.df.groupby('A').transform(np.mean) - assert_frame_equal(result, expected) - - result = self.df.groupby('A')['C'].transform('mean') - expected = self.df.groupby('A')['C'].transform(np.mean) - assert_series_equal(result, expected) - - def test_series_fast_transform_date(self): - # GH 13191 - df = pd.DataFrame({'grouping': [np.nan, 1, 1, 3], - 'd': pd.date_range('2014-1-1', '2014-1-4')}) - result = df.groupby('grouping')['d'].transform('first') - dates = [pd.NaT, pd.Timestamp('2014-1-2'), pd.Timestamp('2014-1-2'), - pd.Timestamp('2014-1-4')] - expected = pd.Series(dates, name='d') - assert_series_equal(result, expected) - - def test_transform_length(self): - # GH 9697 - df = pd.DataFrame({'col1': [1, 1, 2, 2], 'col2': [1, 2, 3, np.nan]}) - expected = pd.Series([3.0] * 4) - - def nsum(x): - return np.nansum(x) - - results = [df.groupby('col1').transform(sum)['col2'], - df.groupby('col1')['col2'].transform(sum), - df.groupby('col1').transform(nsum)['col2'], - df.groupby('col1')['col2'].transform(nsum)] - for result in results: - assert_series_equal(result, expected, check_names=False) - - def test_transform_coercion(self): - - # 14457 - # when we are transforming be sure to not coerce - # via assignment - df = pd.DataFrame(dict(A=['a', 'a'], B=[0, 1])) - g = df.groupby('A') - - expected = g.transform(np.mean) - result = g.transform(lambda x: np.mean(x)) - assert_frame_equal(result, expected) - def test_with_na(self): index = Index(np.arange(10)) @@ -1330,58 +1023,6 @@ def f(x): assert_series_equal(agged, expected, check_dtype=False) self.assertTrue(issubclass(agged.dtype.type, np.dtype(dtype).type)) - def test_groupby_transform_with_int(self): - - # GH 3740, make sure that we might upcast on item-by-item transform - - # floats - df = DataFrame(dict(A=[1, 1, 1, 2, 2, 2], B=Series(1, dtype='float64'), - C=Series( - [1, 2, 3, 1, 2, 3], dtype='float64'), D='foo')) - with np.errstate(all='ignore'): - result = df.groupby('A').transform( - lambda x: (x - x.mean()) / x.std()) - expected = DataFrame(dict(B=np.nan, C=Series( - [-1, 0, 1, -1, 0, 1], dtype='float64'))) - assert_frame_equal(result, expected) - - # int case - df = DataFrame(dict(A=[1, 1, 1, 2, 2, 2], B=1, - C=[1, 2, 3, 1, 2, 3], D='foo')) - with np.errstate(all='ignore'): - result = df.groupby('A').transform( - lambda x: (x - x.mean()) / x.std()) - expected = DataFrame(dict(B=np.nan, C=[-1, 0, 1, -1, 0, 1])) - assert_frame_equal(result, expected) - - # int that needs float conversion - s = Series([2, 3, 4, 10, 5, -1]) - df = DataFrame(dict(A=[1, 1, 1, 2, 2, 2], B=1, C=s, D='foo')) - with np.errstate(all='ignore'): - result = df.groupby('A').transform( - lambda x: (x - x.mean()) / x.std()) - - s1 = s.iloc[0:3] - s1 = (s1 - s1.mean()) / s1.std() - s2 = s.iloc[3:6] - s2 = (s2 - s2.mean()) / s2.std() - expected = DataFrame(dict(B=np.nan, C=concat([s1, s2]))) - assert_frame_equal(result, expected) - - # int downcasting - result = df.groupby('A').transform(lambda x: x * 2 / 2) - expected = DataFrame(dict(B=1, C=[2, 3, 4, 10, 5, -1])) - assert_frame_equal(result, expected) - - def test_groupby_transform_with_nan_group(self): - # GH 9941 - df = pd.DataFrame({'a': range(10), - 'b': [1, 1, 2, 3, np.nan, 4, 4, 5, 5, 5]}) - result = df.groupby(df.b)['a'].transform(max) - expected = pd.Series([1., 1., 2., 3., np.nan, 6., 6., 9., 9., 9.], - name='a') - assert_series_equal(result, expected) - def test_indices_concatenation_order(self): # GH 2808 @@ -1845,6 +1486,7 @@ def check_nunique(df, keys, as_index=True): def test_series_groupby_value_counts(self): from itertools import product + np.random.seed(1234) def rebuild_index(df): arr = list(map(df.index.get_level_values, range(df.index.nlevels))) @@ -2220,51 +1862,6 @@ def test_builtins_apply(self): # GH8155 assert_series_equal(getattr(result, fname)(), getattr(df, fname)()) - def test_cythonized_aggers(self): - data = {'A': [0, 0, 0, 0, 1, 1, 1, 1, 1, 1., nan, nan], - 'B': ['A', 'B'] * 6, - 'C': np.random.randn(12)} - df = DataFrame(data) - df.loc[2:10:2, 'C'] = nan - - def _testit(name): - - op = lambda x: getattr(x, name)() - - # single column - grouped = df.drop(['B'], axis=1).groupby('A') - exp = {} - for cat, group in grouped: - exp[cat] = op(group['C']) - exp = DataFrame({'C': exp}) - exp.index.name = 'A' - result = op(grouped) - assert_frame_equal(result, exp) - - # multiple columns - grouped = df.groupby(['A', 'B']) - expd = {} - for (cat1, cat2), group in grouped: - expd.setdefault(cat1, {})[cat2] = op(group['C']) - exp = DataFrame(expd).T.stack(dropna=False) - exp.index.names = ['A', 'B'] - exp.name = 'C' - - result = op(grouped)['C'] - if not tm._incompat_bottleneck_version(name): - assert_series_equal(result, exp) - - _testit('count') - _testit('sum') - _testit('std') - _testit('var') - _testit('sem') - _testit('mean') - _testit('median') - _testit('prod') - _testit('min') - _testit('max') - def test_max_min_non_numeric(self): # #2700 aa = DataFrame({'nn': [11, 11, 22, 22], @@ -2399,31 +1996,6 @@ def test_arg_passthru(self): result = f(numeric_only=False) tm.assert_index_equal(result.columns, expected_columns) - def test_cython_agg_boolean(self): - frame = DataFrame({'a': np.random.randint(0, 5, 50), - 'b': np.random.randint(0, 2, 50).astype('bool')}) - result = frame.groupby('a')['b'].mean() - expected = frame.groupby('a')['b'].agg(np.mean) - - assert_series_equal(result, expected) - - def test_cython_agg_nothing_to_agg(self): - frame = DataFrame({'a': np.random.randint(0, 5, 50), - 'b': ['foo', 'bar'] * 25}) - self.assertRaises(DataError, frame.groupby('a')['b'].mean) - - frame = DataFrame({'a': np.random.randint(0, 5, 50), - 'b': ['foo', 'bar'] * 25}) - self.assertRaises(DataError, frame[['b']].groupby(frame['a']).mean) - - def test_cython_agg_nothing_to_agg_with_dates(self): - frame = DataFrame({'a': np.random.randint(0, 5, 50), - 'b': ['foo', 'bar'] * 25, - 'dates': pd.date_range('now', periods=50, - freq='T')}) - with tm.assertRaisesRegexp(DataError, "No numeric types to aggregate"): - frame.groupby('b').dates.mean() - def test_groupby_timedelta_cython_count(self): df = DataFrame({'g': list('ab' * 2), 'delt': np.arange(4).astype('timedelta64[ns]')}) @@ -2433,15 +2005,6 @@ def test_groupby_timedelta_cython_count(self): result = df.groupby('g').delt.count() tm.assert_series_equal(expected, result) - def test_cython_agg_frame_columns(self): - # #2113 - df = DataFrame({'x': [1, 2, 3], 'y': [3, 4, 5]}) - - df.groupby(level=0, axis='columns').mean() - df.groupby(level=0, axis='columns').mean() - df.groupby(level=0, axis='columns').mean() - df.groupby(level=0, axis='columns').mean() - def test_wrap_aggregated_output_multindex(self): df = self.mframe.T df['baz', 'two'] = 'peekaboo' @@ -2616,15 +2179,6 @@ def test_grouping_labels(self): exp_labels = np.array([2, 2, 2, 0, 0, 1, 1, 3, 3, 3], dtype=np.intp) assert_almost_equal(grouped.grouper.labels[0], exp_labels) - def test_cython_fail_agg(self): - dr = bdate_range('1/1/2000', periods=50) - ts = Series(['A', 'B', 'C', 'D', 'E'] * 10, index=dr) - - grouped = ts.groupby(lambda x: x.month) - summed = grouped.sum() - expected = grouped.agg(np.sum) - assert_series_equal(summed, expected) - def test_apply_series_to_frame(self): def f(piece): with np.errstate(invalid='ignore'): @@ -3051,30 +2605,6 @@ def test_grouping_ndarray(self): assert_frame_equal(result, expected, check_names=False ) # Note: no names when grouping by value - def test_agg_consistency(self): - # agg with ([]) and () not consistent - # GH 6715 - - def P1(a): - try: - return np.percentile(a.dropna(), q=1) - except: - return np.nan - - import datetime as dt - df = DataFrame({'col1': [1, 2, 3, 4], - 'col2': [10, 25, 26, 31], - 'date': [dt.date(2013, 2, 10), dt.date(2013, 2, 10), - dt.date(2013, 2, 11), dt.date(2013, 2, 11)]}) - - g = df.groupby('date') - - expected = g.agg([P1]) - expected.columns = expected.columns.levels[0] - - result = g.agg(P1) - assert_frame_equal(result, expected) - def test_apply_typecast_fail(self): df = DataFrame({'d': [1., 1., 1., 2., 2., 2.], 'c': np.tile( @@ -3159,28 +2689,6 @@ def f(g): result = grouped.apply(f) self.assertTrue('value3' in result) - def test_transform_mixed_type(self): - index = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1], [1, 2, 3, 1, 2, 3] - ]) - df = DataFrame({'d': [1., 1., 1., 2., 2., 2.], - 'c': np.tile(['a', 'b', 'c'], 2), - 'v': np.arange(1., 7.)}, index=index) - - def f(group): - group['g'] = group['d'] * 2 - return group[:1] - - grouped = df.groupby('c') - result = grouped.apply(f) - - self.assertEqual(result['d'].dtype, np.float64) - - # this is by definition a mutating operation! - with option_context('mode.chained_assignment', None): - for key, group in grouped: - res = f(group) - assert_frame_equal(res, result.loc[key]) - def test_groupby_wrong_multi_labels(self): from pandas import read_csv data = """index,foo,bar,baz,spam,data @@ -3768,20 +3276,6 @@ def test_no_nonsense_name(self): result = s.groupby(self.frame['A']).agg(np.sum) self.assertIsNone(result.name) - def test_wrap_agg_out(self): - grouped = self.three_group.groupby(['A', 'B']) - - def func(ser): - if ser.dtype == np.object: - raise TypeError - else: - return ser.sum() - - result = grouped.aggregate(func) - exp_grouped = self.three_group.loc[:, self.three_group.columns != 'C'] - expected = exp_grouped.groupby(['A', 'B']).aggregate(func) - assert_frame_equal(result, expected) - def test_multifunc_sum_bug(self): # GH #1065 x = DataFrame(np.arange(9).reshape(3, 3)) @@ -3839,110 +3333,6 @@ def test_getitem_numeric_column_names(self): assert_frame_equal(result2, expected) assert_frame_equal(result3, expected) - def test_agg_multiple_functions_maintain_order(self): - # GH #610 - funcs = [('mean', np.mean), ('max', np.max), ('min', np.min)] - result = self.df.groupby('A')['C'].agg(funcs) - exp_cols = Index(['mean', 'max', 'min']) - - self.assert_index_equal(result.columns, exp_cols) - - def test_multiple_functions_tuples_and_non_tuples(self): - # #1359 - - funcs = [('foo', 'mean'), 'std'] - ex_funcs = [('foo', 'mean'), ('std', 'std')] - - result = self.df.groupby('A')['C'].agg(funcs) - expected = self.df.groupby('A')['C'].agg(ex_funcs) - assert_frame_equal(result, expected) - - result = self.df.groupby('A').agg(funcs) - expected = self.df.groupby('A').agg(ex_funcs) - assert_frame_equal(result, expected) - - def test_agg_multiple_functions_too_many_lambdas(self): - grouped = self.df.groupby('A') - funcs = ['mean', lambda x: x.mean(), lambda x: x.std()] - - self.assertRaises(SpecificationError, grouped.agg, funcs) - - def test_more_flexible_frame_multi_function(self): - from pandas import concat - - grouped = self.df.groupby('A') - - exmean = grouped.agg(OrderedDict([['C', np.mean], ['D', np.mean]])) - exstd = grouped.agg(OrderedDict([['C', np.std], ['D', np.std]])) - - expected = concat([exmean, exstd], keys=['mean', 'std'], axis=1) - expected = expected.swaplevel(0, 1, axis=1).sort_index(level=0, axis=1) - - d = OrderedDict([['C', [np.mean, np.std]], ['D', [np.mean, np.std]]]) - result = grouped.aggregate(d) - - assert_frame_equal(result, expected) - - # be careful - result = grouped.aggregate(OrderedDict([['C', np.mean], - ['D', [np.mean, np.std]]])) - expected = grouped.aggregate(OrderedDict([['C', np.mean], - ['D', [np.mean, np.std]]])) - assert_frame_equal(result, expected) - - def foo(x): - return np.mean(x) - - def bar(x): - return np.std(x, ddof=1) - - d = OrderedDict([['C', np.mean], ['D', OrderedDict( - [['foo', np.mean], ['bar', np.std]])]]) - result = grouped.aggregate(d) - - d = OrderedDict([['C', [np.mean]], ['D', [foo, bar]]]) - expected = grouped.aggregate(d) - - assert_frame_equal(result, expected) - - def test_multi_function_flexible_mix(self): - # GH #1268 - grouped = self.df.groupby('A') - - d = OrderedDict([['C', OrderedDict([['foo', 'mean'], [ - 'bar', 'std' - ]])], ['D', 'sum']]) - result = grouped.aggregate(d) - d2 = OrderedDict([['C', OrderedDict([['foo', 'mean'], [ - 'bar', 'std' - ]])], ['D', ['sum']]]) - result2 = grouped.aggregate(d2) - - d3 = OrderedDict([['C', OrderedDict([['foo', 'mean'], [ - 'bar', 'std' - ]])], ['D', {'sum': 'sum'}]]) - expected = grouped.aggregate(d3) - - assert_frame_equal(result, expected) - assert_frame_equal(result2, expected) - - def test_agg_callables(self): - # GH 7929 - df = DataFrame({'foo': [1, 2], 'bar': [3, 4]}).astype(np.int64) - - class fn_class(object): - - def __call__(self, x): - return sum(x) - - equiv_callables = [sum, np.sum, lambda x: sum(x), lambda x: x.sum(), - partial(sum), fn_class()] - - expected = df.groupby("foo").agg(sum) - for ecall in equiv_callables: - result = df.groupby('foo').agg(ecall) - assert_frame_equal(result, expected) - def test_set_group_name(self): def f(group): assert group.name is not None @@ -3980,97 +3370,6 @@ def test_no_dummy_key_names(self): ]).sum() self.assertEqual(result.index.names, (None, None)) - def test_groupby_sort_categorical(self): - # dataframe groupby sort was being ignored # GH 8868 - df = DataFrame([['(7.5, 10]', 10, 10], - ['(7.5, 10]', 8, 20], - ['(2.5, 5]', 5, 30], - ['(5, 7.5]', 6, 40], - ['(2.5, 5]', 4, 50], - ['(0, 2.5]', 1, 60], - ['(5, 7.5]', 7, 70]], columns=['range', 'foo', 'bar']) - df['range'] = Categorical(df['range'], ordered=True) - index = CategoricalIndex(['(0, 2.5]', '(2.5, 5]', '(5, 7.5]', - '(7.5, 10]'], name='range', ordered=True) - result_sort = DataFrame([[1, 60], [5, 30], [6, 40], [10, 10]], - columns=['foo', 'bar'], index=index) - - col = 'range' - assert_frame_equal(result_sort, df.groupby(col, sort=True).first()) - # when categories is ordered, group is ordered by category's order - assert_frame_equal(result_sort, df.groupby(col, sort=False).first()) - - df['range'] = Categorical(df['range'], ordered=False) - index = CategoricalIndex(['(0, 2.5]', '(2.5, 5]', '(5, 7.5]', - '(7.5, 10]'], name='range') - result_sort = DataFrame([[1, 60], [5, 30], [6, 40], [10, 10]], - columns=['foo', 'bar'], index=index) - - index = CategoricalIndex(['(7.5, 10]', '(2.5, 5]', '(5, 7.5]', - '(0, 2.5]'], - categories=['(7.5, 10]', '(2.5, 5]', - '(5, 7.5]', '(0, 2.5]'], - name='range') - result_nosort = DataFrame([[10, 10], [5, 30], [6, 40], [1, 60]], - index=index, columns=['foo', 'bar']) - - col = 'range' - # this is an unordered categorical, but we allow this #### - assert_frame_equal(result_sort, df.groupby(col, sort=True).first()) - assert_frame_equal(result_nosort, df.groupby(col, sort=False).first()) - - def test_groupby_sort_categorical_datetimelike(self): - # GH10505 - - # use same data as test_groupby_sort_categorical, which category is - # corresponding to datetime.month - df = DataFrame({'dt': [datetime(2011, 7, 1), datetime(2011, 7, 1), - datetime(2011, 2, 1), datetime(2011, 5, 1), - datetime(2011, 2, 1), datetime(2011, 1, 1), - datetime(2011, 5, 1)], - 'foo': [10, 8, 5, 6, 4, 1, 7], - 'bar': [10, 20, 30, 40, 50, 60, 70]}, - columns=['dt', 'foo', 'bar']) - - # ordered=True - df['dt'] = Categorical(df['dt'], ordered=True) - index = [datetime(2011, 1, 1), datetime(2011, 2, 1), - datetime(2011, 5, 1), datetime(2011, 7, 1)] - result_sort = DataFrame( - [[1, 60], [5, 30], [6, 40], [10, 10]], columns=['foo', 'bar']) - result_sort.index = CategoricalIndex(index, name='dt', ordered=True) - - index = [datetime(2011, 7, 1), datetime(2011, 2, 1), - datetime(2011, 5, 1), datetime(2011, 1, 1)] - result_nosort = DataFrame([[10, 10], [5, 30], [6, 40], [1, 60]], - columns=['foo', 'bar']) - result_nosort.index = CategoricalIndex(index, categories=index, - name='dt', ordered=True) - - col = 'dt' - assert_frame_equal(result_sort, df.groupby(col, sort=True).first()) - # when categories is ordered, group is ordered by category's order - assert_frame_equal(result_sort, df.groupby(col, sort=False).first()) - - # ordered = False - df['dt'] = Categorical(df['dt'], ordered=False) - index = [datetime(2011, 1, 1), datetime(2011, 2, 1), - datetime(2011, 5, 1), datetime(2011, 7, 1)] - result_sort = DataFrame( - [[1, 60], [5, 30], [6, 40], [10, 10]], columns=['foo', 'bar']) - result_sort.index = CategoricalIndex(index, name='dt') - - index = [datetime(2011, 7, 1), datetime(2011, 2, 1), - datetime(2011, 5, 1), datetime(2011, 1, 1)] - result_nosort = DataFrame([[10, 10], [5, 30], [6, 40], [1, 60]], - columns=['foo', 'bar']) - result_nosort.index = CategoricalIndex(index, categories=index, - name='dt') - - col = 'dt' - assert_frame_equal(result_sort, df.groupby(col, sort=True).first()) - assert_frame_equal(result_nosort, df.groupby(col, sort=False).first()) - def test_groupby_sort_multiindex_series(self): # series multiindex groupby sort argument was not being passed through # _compress_group_index @@ -4088,169 +3387,6 @@ def test_groupby_sort_multiindex_series(self): result = mseries.groupby(level=['a', 'b'], sort=True).first() assert_series_equal(result, mseries_result.sort_index()) - def test_groupby_groups_datetimeindex(self): - # #1430 - from pandas.tseries.api import DatetimeIndex - periods = 1000 - ind = DatetimeIndex(start='2012/1/1', freq='5min', periods=periods) - df = DataFrame({'high': np.arange(periods), - 'low': np.arange(periods)}, index=ind) - grouped = df.groupby(lambda x: datetime(x.year, x.month, x.day)) - - # it works! - groups = grouped.groups - tm.assertIsInstance(list(groups.keys())[0], datetime) - - # GH 11442 - index = pd.date_range('2015/01/01', periods=5, name='date') - df = pd.DataFrame({'A': [5, 6, 7, 8, 9], - 'B': [1, 2, 3, 4, 5]}, index=index) - result = df.groupby(level='date').groups - dates = ['2015-01-05', '2015-01-04', '2015-01-03', - '2015-01-02', '2015-01-01'] - expected = {pd.Timestamp(date): pd.DatetimeIndex([date], name='date') - for date in dates} - tm.assert_dict_equal(result, expected) - - grouped = df.groupby(level='date') - for date in dates: - result = grouped.get_group(date) - data = [[df.loc[date, 'A'], df.loc[date, 'B']]] - expected_index = pd.DatetimeIndex([date], name='date') - expected = pd.DataFrame(data, - columns=list('AB'), - index=expected_index) - tm.assert_frame_equal(result, expected) - - def test_groupby_groups_datetimeindex_tz(self): - # GH 3950 - dates = ['2011-07-19 07:00:00', '2011-07-19 08:00:00', - '2011-07-19 09:00:00', '2011-07-19 07:00:00', - '2011-07-19 08:00:00', '2011-07-19 09:00:00'] - df = DataFrame({'label': ['a', 'a', 'a', 'b', 'b', 'b'], - 'datetime': dates, - 'value1': np.arange(6, dtype='int64'), - 'value2': [1, 2] * 3}) - df['datetime'] = df['datetime'].apply( - lambda d: Timestamp(d, tz='US/Pacific')) - - exp_idx1 = pd.DatetimeIndex(['2011-07-19 07:00:00', - '2011-07-19 07:00:00', - '2011-07-19 08:00:00', - '2011-07-19 08:00:00', - '2011-07-19 09:00:00', - '2011-07-19 09:00:00'], - tz='US/Pacific', name='datetime') - exp_idx2 = Index(['a', 'b'] * 3, name='label') - exp_idx = MultiIndex.from_arrays([exp_idx1, exp_idx2]) - expected = DataFrame({'value1': [0, 3, 1, 4, 2, 5], - 'value2': [1, 2, 2, 1, 1, 2]}, - index=exp_idx, columns=['value1', 'value2']) - - result = df.groupby(['datetime', 'label']).sum() - assert_frame_equal(result, expected) - - # by level - didx = pd.DatetimeIndex(dates, tz='Asia/Tokyo') - df = DataFrame({'value1': np.arange(6, dtype='int64'), - 'value2': [1, 2, 3, 1, 2, 3]}, - index=didx) - - exp_idx = pd.DatetimeIndex(['2011-07-19 07:00:00', - '2011-07-19 08:00:00', - '2011-07-19 09:00:00'], tz='Asia/Tokyo') - expected = DataFrame({'value1': [3, 5, 7], 'value2': [2, 4, 6]}, - index=exp_idx, columns=['value1', 'value2']) - - result = df.groupby(level=0).sum() - assert_frame_equal(result, expected) - - def test_frame_datetime64_handling_groupby(self): - # it works! - df = DataFrame([(3, np.datetime64('2012-07-03')), - (3, np.datetime64('2012-07-04'))], - columns=['a', 'date']) - result = df.groupby('a').first() - self.assertEqual(result['date'][3], Timestamp('2012-07-03')) - - def test_groupby_multi_timezone(self): - - # combining multiple / different timezones yields UTC - - data = """0,2000-01-28 16:47:00,America/Chicago -1,2000-01-29 16:48:00,America/Chicago -2,2000-01-30 16:49:00,America/Los_Angeles -3,2000-01-31 16:50:00,America/Chicago -4,2000-01-01 16:50:00,America/New_York""" - - df = pd.read_csv(StringIO(data), header=None, - names=['value', 'date', 'tz']) - result = df.groupby('tz').date.apply( - lambda x: pd.to_datetime(x).dt.tz_localize(x.name)) - - expected = Series([Timestamp('2000-01-28 16:47:00-0600', - tz='America/Chicago'), - Timestamp('2000-01-29 16:48:00-0600', - tz='America/Chicago'), - Timestamp('2000-01-30 16:49:00-0800', - tz='America/Los_Angeles'), - Timestamp('2000-01-31 16:50:00-0600', - tz='America/Chicago'), - Timestamp('2000-01-01 16:50:00-0500', - tz='America/New_York')], - name='date', - dtype=object) - assert_series_equal(result, expected) - - tz = 'America/Chicago' - res_values = df.groupby('tz').date.get_group(tz) - result = pd.to_datetime(res_values).dt.tz_localize(tz) - exp_values = Series(['2000-01-28 16:47:00', '2000-01-29 16:48:00', - '2000-01-31 16:50:00'], - index=[0, 1, 3], name='date') - expected = pd.to_datetime(exp_values).dt.tz_localize(tz) - assert_series_equal(result, expected) - - def test_groupby_groups_periods(self): - dates = ['2011-07-19 07:00:00', '2011-07-19 08:00:00', - '2011-07-19 09:00:00', '2011-07-19 07:00:00', - '2011-07-19 08:00:00', '2011-07-19 09:00:00'] - df = DataFrame({'label': ['a', 'a', 'a', 'b', 'b', 'b'], - 'period': [pd.Period(d, freq='H') for d in dates], - 'value1': np.arange(6, dtype='int64'), - 'value2': [1, 2] * 3}) - - exp_idx1 = pd.PeriodIndex(['2011-07-19 07:00:00', - '2011-07-19 07:00:00', - '2011-07-19 08:00:00', - '2011-07-19 08:00:00', - '2011-07-19 09:00:00', - '2011-07-19 09:00:00'], - freq='H', name='period') - exp_idx2 = Index(['a', 'b'] * 3, name='label') - exp_idx = MultiIndex.from_arrays([exp_idx1, exp_idx2]) - expected = DataFrame({'value1': [0, 3, 1, 4, 2, 5], - 'value2': [1, 2, 2, 1, 1, 2]}, - index=exp_idx, columns=['value1', 'value2']) - - result = df.groupby(['period', 'label']).sum() - assert_frame_equal(result, expected) - - # by level - didx = pd.PeriodIndex(dates, freq='H') - df = DataFrame({'value1': np.arange(6, dtype='int64'), - 'value2': [1, 2, 3, 1, 2, 3]}, - index=didx) - - exp_idx = pd.PeriodIndex(['2011-07-19 07:00:00', - '2011-07-19 08:00:00', - '2011-07-19 09:00:00'], freq='H') - expected = DataFrame({'value1': [3, 5, 7], 'value2': [2, 4, 6]}, - index=exp_idx, columns=['value1', 'value2']) - - result = df.groupby(level=0).sum() - assert_frame_equal(result, expected) - def test_groupby_reindex_inside_function(self): from pandas.tseries.api import DatetimeIndex @@ -4336,33 +3472,21 @@ def test_median_empty_bins(self): def test_groupby_non_arithmetic_agg_types(self): # GH9311, GH6620 - df = pd.DataFrame([{'a': 1, - 'b': 1}, {'a': 1, - 'b': 2}, {'a': 2, - 'b': 3}, {'a': 2, - 'b': 4}]) + df = pd.DataFrame( + [{'a': 1, 'b': 1}, + {'a': 1, 'b': 2}, + {'a': 2, 'b': 3}, + {'a': 2, 'b': 4}]) dtypes = ['int8', 'int16', 'int32', 'int64', 'float32', 'float64'] - grp_exp = {'first': {'df': [{'a': 1, - 'b': 1}, {'a': 2, - 'b': 3}]}, - 'last': {'df': [{'a': 1, - 'b': 2}, {'a': 2, - 'b': 4}]}, - 'min': {'df': [{'a': 1, - 'b': 1}, {'a': 2, - 'b': 3}]}, - 'max': {'df': [{'a': 1, - 'b': 2}, {'a': 2, - 'b': 4}]}, - 'nth': {'df': [{'a': 1, - 'b': 2}, {'a': 2, - 'b': 4}], + grp_exp = {'first': {'df': [{'a': 1, 'b': 1}, {'a': 2, 'b': 3}]}, + 'last': {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 4}]}, + 'min': {'df': [{'a': 1, 'b': 1}, {'a': 2, 'b': 3}]}, + 'max': {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 4}]}, + 'nth': {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 4}], 'args': [1]}, - 'count': {'df': [{'a': 1, - 'b': 2}, {'a': 2, - 'b': 2}], + 'count': {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 2}], 'out_type': 'int64'}} for dtype in dtypes: @@ -4414,37 +3538,6 @@ def test_groupby_non_arithmetic_agg_intlike_precision(self): res = getattr(grpd, method)(*data['args']) self.assertEqual(res.iloc[0].b, data['expected']) - def test_groupby_first_datetime64(self): - df = DataFrame([(1, 1351036800000000000), (2, 1351036800000000000)]) - df[1] = df[1].view('M8[ns]') - - self.assertTrue(issubclass(df[1].dtype.type, np.datetime64)) - - result = df.groupby(level=0).first() - got_dt = result[1].dtype - self.assertTrue(issubclass(got_dt.type, np.datetime64)) - - result = df[1].groupby(level=0).first() - got_dt = result.dtype - self.assertTrue(issubclass(got_dt.type, np.datetime64)) - - def test_groupby_max_datetime64(self): - # GH 5869 - # datetimelike dtype conversion from int - df = DataFrame(dict(A=Timestamp('20130101'), B=np.arange(5))) - expected = df.groupby('A')['A'].apply(lambda x: x.max()) - result = df.groupby('A')['A'].max() - assert_series_equal(result, expected) - - def test_groupby_datetime64_32_bit(self): - # GH 6410 / numpy 4328 - # 32-bit under 1.9-dev indexing issue - - df = DataFrame({"A": range(2), "B": [pd.Timestamp('2000-01-1')] * 2}) - result = df.groupby("A")["B"].transform(min) - expected = Series([pd.Timestamp('2000-01-1')] * 2, name='B') - assert_series_equal(result, expected) - def test_groupby_multiindex_missing_pair(self): # GH9049 df = DataFrame({'group1': ['a', 'a', 'a', 'b'], @@ -4613,381 +3706,6 @@ def test_groupby_with_small_elem(self): res = grouped.get_group((pd.Timestamp('2014-08-31'), 'start')) tm.assert_frame_equal(res, df.iloc[[2], :]) - def test_groupby_with_timezone_selection(self): - # GH 11616 - # Test that column selection returns output in correct timezone. - np.random.seed(42) - df = pd.DataFrame({ - 'factor': np.random.randint(0, 3, size=60), - 'time': pd.date_range('01/01/2000 00:00', periods=60, - freq='s', tz='UTC') - }) - df1 = df.groupby('factor').max()['time'] - df2 = df.groupby('factor')['time'].max() - tm.assert_series_equal(df1, df2) - - def test_timezone_info(self): - # GH 11682 - # Timezone info lost when broadcasting scalar datetime to DataFrame - tm._skip_if_no_pytz() - import pytz - - df = pd.DataFrame({'a': [1], 'b': [datetime.now(pytz.utc)]}) - self.assertEqual(df['b'][0].tzinfo, pytz.utc) - df = pd.DataFrame({'a': [1, 2, 3]}) - df['b'] = datetime.now(pytz.utc) - self.assertEqual(df['b'][0].tzinfo, pytz.utc) - - def test_groupby_with_timegrouper(self): - # GH 4161 - # TimeGrouper requires a sorted index - # also verifies that the resultant index has the correct name - import datetime as DT - df_original = DataFrame({ - 'Buyer': 'Carl Carl Carl Carl Joe Carl'.split(), - 'Quantity': [18, 3, 5, 1, 9, 3], - 'Date': [ - DT.datetime(2013, 9, 1, 13, 0), - DT.datetime(2013, 9, 1, 13, 5), - DT.datetime(2013, 10, 1, 20, 0), - DT.datetime(2013, 10, 3, 10, 0), - DT.datetime(2013, 12, 2, 12, 0), - DT.datetime(2013, 9, 2, 14, 0), - ] - }) - - # GH 6908 change target column's order - df_reordered = df_original.sort_values(by='Quantity') - - for df in [df_original, df_reordered]: - df = df.set_index(['Date']) - - expected = DataFrame( - {'Quantity': np.nan}, - index=date_range('20130901 13:00:00', - '20131205 13:00:00', freq='5D', - name='Date', closed='left')) - expected.iloc[[0, 6, 18], 0] = np.array( - [24., 6., 9.], dtype='float64') - - result1 = df.resample('5D') .sum() - assert_frame_equal(result1, expected) - - df_sorted = df.sort_index() - result2 = df_sorted.groupby(pd.TimeGrouper(freq='5D')).sum() - assert_frame_equal(result2, expected) - - result3 = df.groupby(pd.TimeGrouper(freq='5D')).sum() - assert_frame_equal(result3, expected) - - def test_groupby_with_timegrouper_methods(self): - # GH 3881 - # make sure API of timegrouper conforms - - import datetime as DT - df_original = pd.DataFrame({ - 'Branch': 'A A A A A B'.split(), - 'Buyer': 'Carl Mark Carl Joe Joe Carl'.split(), - 'Quantity': [1, 3, 5, 8, 9, 3], - 'Date': [ - DT.datetime(2013, 1, 1, 13, 0), - DT.datetime(2013, 1, 1, 13, 5), - DT.datetime(2013, 10, 1, 20, 0), - DT.datetime(2013, 10, 2, 10, 0), - DT.datetime(2013, 12, 2, 12, 0), - DT.datetime(2013, 12, 2, 14, 0), - ] - }) - - df_sorted = df_original.sort_values(by='Quantity', ascending=False) - - for df in [df_original, df_sorted]: - df = df.set_index('Date', drop=False) - g = df.groupby(pd.TimeGrouper('6M')) - self.assertTrue(g.group_keys) - self.assertTrue(isinstance(g.grouper, pd.core.groupby.BinGrouper)) - groups = g.groups - self.assertTrue(isinstance(groups, dict)) - self.assertTrue(len(groups) == 3) - - def test_timegrouper_with_reg_groups(self): - - # GH 3794 - # allow combinateion of timegrouper/reg groups - - import datetime as DT - - df_original = DataFrame({ - 'Branch': 'A A A A A A A B'.split(), - 'Buyer': 'Carl Mark Carl Carl Joe Joe Joe Carl'.split(), - 'Quantity': [1, 3, 5, 1, 8, 1, 9, 3], - 'Date': [ - DT.datetime(2013, 1, 1, 13, 0), - DT.datetime(2013, 1, 1, 13, 5), - DT.datetime(2013, 10, 1, 20, 0), - DT.datetime(2013, 10, 2, 10, 0), - DT.datetime(2013, 10, 1, 20, 0), - DT.datetime(2013, 10, 2, 10, 0), - DT.datetime(2013, 12, 2, 12, 0), - DT.datetime(2013, 12, 2, 14, 0), - ] - }).set_index('Date') - - df_sorted = df_original.sort_values(by='Quantity', ascending=False) - - for df in [df_original, df_sorted]: - expected = DataFrame({ - 'Buyer': 'Carl Joe Mark'.split(), - 'Quantity': [10, 18, 3], - 'Date': [ - DT.datetime(2013, 12, 31, 0, 0), - DT.datetime(2013, 12, 31, 0, 0), - DT.datetime(2013, 12, 31, 0, 0), - ] - }).set_index(['Date', 'Buyer']) - - result = df.groupby([pd.Grouper(freq='A'), 'Buyer']).sum() - assert_frame_equal(result, expected) - - expected = DataFrame({ - 'Buyer': 'Carl Mark Carl Joe'.split(), - 'Quantity': [1, 3, 9, 18], - 'Date': [ - DT.datetime(2013, 1, 1, 0, 0), - DT.datetime(2013, 1, 1, 0, 0), - DT.datetime(2013, 7, 1, 0, 0), - DT.datetime(2013, 7, 1, 0, 0), - ] - }).set_index(['Date', 'Buyer']) - result = df.groupby([pd.Grouper(freq='6MS'), 'Buyer']).sum() - assert_frame_equal(result, expected) - - df_original = DataFrame({ - 'Branch': 'A A A A A A A B'.split(), - 'Buyer': 'Carl Mark Carl Carl Joe Joe Joe Carl'.split(), - 'Quantity': [1, 3, 5, 1, 8, 1, 9, 3], - 'Date': [ - DT.datetime(2013, 10, 1, 13, 0), - DT.datetime(2013, 10, 1, 13, 5), - DT.datetime(2013, 10, 1, 20, 0), - DT.datetime(2013, 10, 2, 10, 0), - DT.datetime(2013, 10, 1, 20, 0), - DT.datetime(2013, 10, 2, 10, 0), - DT.datetime(2013, 10, 2, 12, 0), - DT.datetime(2013, 10, 2, 14, 0), - ] - }).set_index('Date') - - df_sorted = df_original.sort_values(by='Quantity', ascending=False) - for df in [df_original, df_sorted]: - - expected = DataFrame({ - 'Buyer': 'Carl Joe Mark Carl Joe'.split(), - 'Quantity': [6, 8, 3, 4, 10], - 'Date': [ - DT.datetime(2013, 10, 1, 0, 0), - DT.datetime(2013, 10, 1, 0, 0), - DT.datetime(2013, 10, 1, 0, 0), - DT.datetime(2013, 10, 2, 0, 0), - DT.datetime(2013, 10, 2, 0, 0), - ] - }).set_index(['Date', 'Buyer']) - - result = df.groupby([pd.Grouper(freq='1D'), 'Buyer']).sum() - assert_frame_equal(result, expected) - - result = df.groupby([pd.Grouper(freq='1M'), 'Buyer']).sum() - expected = DataFrame({ - 'Buyer': 'Carl Joe Mark'.split(), - 'Quantity': [10, 18, 3], - 'Date': [ - DT.datetime(2013, 10, 31, 0, 0), - DT.datetime(2013, 10, 31, 0, 0), - DT.datetime(2013, 10, 31, 0, 0), - ] - }).set_index(['Date', 'Buyer']) - assert_frame_equal(result, expected) - - # passing the name - df = df.reset_index() - result = df.groupby([pd.Grouper(freq='1M', key='Date'), 'Buyer' - ]).sum() - assert_frame_equal(result, expected) - - with self.assertRaises(KeyError): - df.groupby([pd.Grouper(freq='1M', key='foo'), 'Buyer']).sum() - - # passing the level - df = df.set_index('Date') - result = df.groupby([pd.Grouper(freq='1M', level='Date'), 'Buyer' - ]).sum() - assert_frame_equal(result, expected) - result = df.groupby([pd.Grouper(freq='1M', level=0), 'Buyer']).sum( - ) - assert_frame_equal(result, expected) - - with self.assertRaises(ValueError): - df.groupby([pd.Grouper(freq='1M', level='foo'), - 'Buyer']).sum() - - # multi names - df = df.copy() - df['Date'] = df.index + pd.offsets.MonthEnd(2) - result = df.groupby([pd.Grouper(freq='1M', key='Date'), 'Buyer' - ]).sum() - expected = DataFrame({ - 'Buyer': 'Carl Joe Mark'.split(), - 'Quantity': [10, 18, 3], - 'Date': [ - DT.datetime(2013, 11, 30, 0, 0), - DT.datetime(2013, 11, 30, 0, 0), - DT.datetime(2013, 11, 30, 0, 0), - ] - }).set_index(['Date', 'Buyer']) - assert_frame_equal(result, expected) - - # error as we have both a level and a name! - with self.assertRaises(ValueError): - df.groupby([pd.Grouper(freq='1M', key='Date', - level='Date'), 'Buyer']).sum() - - # single groupers - expected = DataFrame({'Quantity': [31], - 'Date': [DT.datetime(2013, 10, 31, 0, 0) - ]}).set_index('Date') - result = df.groupby(pd.Grouper(freq='1M')).sum() - assert_frame_equal(result, expected) - - result = df.groupby([pd.Grouper(freq='1M')]).sum() - assert_frame_equal(result, expected) - - expected = DataFrame({'Quantity': [31], - 'Date': [DT.datetime(2013, 11, 30, 0, 0) - ]}).set_index('Date') - result = df.groupby(pd.Grouper(freq='1M', key='Date')).sum() - assert_frame_equal(result, expected) - - result = df.groupby([pd.Grouper(freq='1M', key='Date')]).sum() - assert_frame_equal(result, expected) - - # GH 6764 multiple grouping with/without sort - df = DataFrame({ - 'date': pd.to_datetime([ - '20121002', '20121007', '20130130', '20130202', '20130305', - '20121002', '20121207', '20130130', '20130202', '20130305', - '20130202', '20130305' - ]), - 'user_id': [1, 1, 1, 1, 1, 3, 3, 3, 5, 5, 5, 5], - 'whole_cost': [1790, 364, 280, 259, 201, 623, 90, 312, 359, 301, - 359, 801], - 'cost1': [12, 15, 10, 24, 39, 1, 0, 90, 45, 34, 1, 12] - }).set_index('date') - - for freq in ['D', 'M', 'A', 'Q-APR']: - expected = df.groupby('user_id')[ - 'whole_cost'].resample( - freq).sum().dropna().reorder_levels( - ['date', 'user_id']).sort_index().astype('int64') - expected.name = 'whole_cost' - - result1 = df.sort_index().groupby([pd.TimeGrouper(freq=freq), - 'user_id'])['whole_cost'].sum() - assert_series_equal(result1, expected) - - result2 = df.groupby([pd.TimeGrouper(freq=freq), 'user_id'])[ - 'whole_cost'].sum() - assert_series_equal(result2, expected) - - def test_timegrouper_get_group(self): - # GH 6914 - - df_original = DataFrame({ - 'Buyer': 'Carl Joe Joe Carl Joe Carl'.split(), - 'Quantity': [18, 3, 5, 1, 9, 3], - 'Date': [datetime(2013, 9, 1, 13, 0), - datetime(2013, 9, 1, 13, 5), - datetime(2013, 10, 1, 20, 0), - datetime(2013, 10, 3, 10, 0), - datetime(2013, 12, 2, 12, 0), - datetime(2013, 9, 2, 14, 0), ] - }) - df_reordered = df_original.sort_values(by='Quantity') - - # single grouping - expected_list = [df_original.iloc[[0, 1, 5]], df_original.iloc[[2, 3]], - df_original.iloc[[4]]] - dt_list = ['2013-09-30', '2013-10-31', '2013-12-31'] - - for df in [df_original, df_reordered]: - grouped = df.groupby(pd.Grouper(freq='M', key='Date')) - for t, expected in zip(dt_list, expected_list): - dt = pd.Timestamp(t) - result = grouped.get_group(dt) - assert_frame_equal(result, expected) - - # multiple grouping - expected_list = [df_original.iloc[[1]], df_original.iloc[[3]], - df_original.iloc[[4]]] - g_list = [('Joe', '2013-09-30'), ('Carl', '2013-10-31'), - ('Joe', '2013-12-31')] - - for df in [df_original, df_reordered]: - grouped = df.groupby(['Buyer', pd.Grouper(freq='M', key='Date')]) - for (b, t), expected in zip(g_list, expected_list): - dt = pd.Timestamp(t) - result = grouped.get_group((b, dt)) - assert_frame_equal(result, expected) - - # with index - df_original = df_original.set_index('Date') - df_reordered = df_original.sort_values(by='Quantity') - - expected_list = [df_original.iloc[[0, 1, 5]], df_original.iloc[[2, 3]], - df_original.iloc[[4]]] - - for df in [df_original, df_reordered]: - grouped = df.groupby(pd.Grouper(freq='M')) - for t, expected in zip(dt_list, expected_list): - dt = pd.Timestamp(t) - result = grouped.get_group(dt) - assert_frame_equal(result, expected) - - def test_timegrouper_apply_return_type_series(self): - # Using `apply` with the `TimeGrouper` should give the - # same return type as an `apply` with a `Grouper`. - # Issue #11742 - df = pd.DataFrame({'date': ['10/10/2000', '11/10/2000'], - 'value': [10, 13]}) - df_dt = df.copy() - df_dt['date'] = pd.to_datetime(df_dt['date']) - - def sumfunc_series(x): - return pd.Series([x['value'].sum()], ('sum',)) - - expected = df.groupby(pd.Grouper(key='date')).apply(sumfunc_series) - result = (df_dt.groupby(pd.TimeGrouper(freq='M', key='date')) - .apply(sumfunc_series)) - assert_frame_equal(result.reset_index(drop=True), - expected.reset_index(drop=True)) - - def test_timegrouper_apply_return_type_value(self): - # Using `apply` with the `TimeGrouper` should give the - # same return type as an `apply` with a `Grouper`. - # Issue #11742 - df = pd.DataFrame({'date': ['10/10/2000', '11/10/2000'], - 'value': [10, 13]}) - df_dt = df.copy() - df_dt['date'] = pd.to_datetime(df_dt['date']) - - def sumfunc_value(x): - return x.value.sum() - - expected = df.groupby(pd.Grouper(key='date')).apply(sumfunc_value) - result = (df_dt.groupby(pd.TimeGrouper(freq='M', key='date')) - .apply(sumfunc_value)) - assert_series_equal(result.reset_index(drop=True), - expected.reset_index(drop=True)) - def test_cumcount(self): df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']], columns=['A']) g = df.groupby('A') @@ -5326,106 +4044,6 @@ def test_tab_completion(self): 'ffill', 'bfill', 'pad', 'backfill', 'rolling', 'expanding']) self.assertEqual(results, expected) - def test_lexsort_indexer(self): - keys = [[nan] * 5 + list(range(100)) + [nan] * 5] - # orders=True, na_position='last' - result = _lexsort_indexer(keys, orders=True, na_position='last') - exp = list(range(5, 105)) + list(range(5)) + list(range(105, 110)) - tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp)) - - # orders=True, na_position='first' - result = _lexsort_indexer(keys, orders=True, na_position='first') - exp = list(range(5)) + list(range(105, 110)) + list(range(5, 105)) - tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp)) - - # orders=False, na_position='last' - result = _lexsort_indexer(keys, orders=False, na_position='last') - exp = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110)) - tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp)) - - # orders=False, na_position='first' - result = _lexsort_indexer(keys, orders=False, na_position='first') - exp = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1)) - tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp)) - - def test_nargsort(self): - # np.argsort(items) places NaNs last - items = [nan] * 5 + list(range(100)) + [nan] * 5 - # np.argsort(items2) may not place NaNs first - items2 = np.array(items, dtype='O') - - try: - # GH 2785; due to a regression in NumPy1.6.2 - np.argsort(np.array([[1, 2], [1, 3], [1, 2]], dtype='i')) - np.argsort(items2, kind='mergesort') - except TypeError: - raise nose.SkipTest('requested sort not available for type') - - # mergesort is the most difficult to get right because we want it to be - # stable. - - # According to numpy/core/tests/test_multiarray, """The number of - # sorted items must be greater than ~50 to check the actual algorithm - # because quick and merge sort fall over to insertion sort for small - # arrays.""" - - # mergesort, ascending=True, na_position='last' - result = _nargsort(items, kind='mergesort', ascending=True, - na_position='last') - exp = list(range(5, 105)) + list(range(5)) + list(range(105, 110)) - tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) - - # mergesort, ascending=True, na_position='first' - result = _nargsort(items, kind='mergesort', ascending=True, - na_position='first') - exp = list(range(5)) + list(range(105, 110)) + list(range(5, 105)) - tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) - - # mergesort, ascending=False, na_position='last' - result = _nargsort(items, kind='mergesort', ascending=False, - na_position='last') - exp = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110)) - tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) - - # mergesort, ascending=False, na_position='first' - result = _nargsort(items, kind='mergesort', ascending=False, - na_position='first') - exp = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1)) - tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) - - # mergesort, ascending=True, na_position='last' - result = _nargsort(items2, kind='mergesort', ascending=True, - na_position='last') - exp = list(range(5, 105)) + list(range(5)) + list(range(105, 110)) - tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) - - # mergesort, ascending=True, na_position='first' - result = _nargsort(items2, kind='mergesort', ascending=True, - na_position='first') - exp = list(range(5)) + list(range(105, 110)) + list(range(5, 105)) - tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) - - # mergesort, ascending=False, na_position='last' - result = _nargsort(items2, kind='mergesort', ascending=False, - na_position='last') - exp = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110)) - tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) - - # mergesort, ascending=False, na_position='first' - result = _nargsort(items2, kind='mergesort', ascending=False, - na_position='first') - exp = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1)) - tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) - - def test_datetime_count(self): - df = DataFrame({'a': [1, 2, 3] * 2, - 'dates': pd.date_range('now', periods=6, freq='T')}) - result = df.groupby('a').dates.count() - expected = Series([ - 2, 2, 2 - ], index=Index([1, 2, 3], name='a'), name='dates') - tm.assert_series_equal(result, expected) - def test_lower_int_prec_count(self): df = DataFrame({'a': np.array( [0, 1, 2, 100], np.int8), @@ -5462,179 +4080,6 @@ def __eq__(self, other): list('ab'), name='grp')) tm.assert_frame_equal(result, expected) - def test__cython_agg_general(self): - ops = [('mean', np.mean), - ('median', np.median), - ('var', np.var), - ('add', np.sum), - ('prod', np.prod), - ('min', np.min), - ('max', np.max), - ('first', lambda x: x.iloc[0]), - ('last', lambda x: x.iloc[-1]), ] - df = DataFrame(np.random.randn(1000)) - labels = np.random.randint(0, 50, size=1000).astype(float) - - for op, targop in ops: - result = df.groupby(labels)._cython_agg_general(op) - expected = df.groupby(labels).agg(targop) - try: - tm.assert_frame_equal(result, expected) - except BaseException as exc: - exc.args += ('operation: %s' % op, ) - raise - - def test_cython_agg_empty_buckets(self): - ops = [('mean', np.mean), - ('median', lambda x: np.median(x) if len(x) > 0 else np.nan), - ('var', lambda x: np.var(x, ddof=1)), - ('add', lambda x: np.sum(x) if len(x) > 0 else np.nan), - ('prod', np.prod), - ('min', np.min), - ('max', np.max), ] - - df = pd.DataFrame([11, 12, 13]) - grps = range(0, 55, 5) - - for op, targop in ops: - result = df.groupby(pd.cut(df[0], grps))._cython_agg_general(op) - expected = df.groupby(pd.cut(df[0], grps)).agg(lambda x: targop(x)) - try: - tm.assert_frame_equal(result, expected) - except BaseException as exc: - exc.args += ('operation: %s' % op,) - raise - - def test_cython_group_transform_algos(self): - # GH 4095 - dtypes = [np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint32, - np.uint64, np.float32, np.float64] - - ops = [(pd.algos.group_cumprod_float64, np.cumproduct, [np.float64]), - (pd.algos.group_cumsum, np.cumsum, dtypes)] - - is_datetimelike = False - for pd_op, np_op, dtypes in ops: - for dtype in dtypes: - data = np.array([[1], [2], [3], [4]], dtype=dtype) - ans = np.zeros_like(data) - labels = np.array([0, 0, 0, 0], dtype=np.int64) - pd_op(ans, data, labels, is_datetimelike) - self.assert_numpy_array_equal(np_op(data), ans[:, 0], - check_dtype=False) - - # with nans - labels = np.array([0, 0, 0, 0, 0], dtype=np.int64) - - data = np.array([[1], [2], [3], [np.nan], [4]], dtype='float64') - actual = np.zeros_like(data) - actual.fill(np.nan) - pd.algos.group_cumprod_float64(actual, data, labels, is_datetimelike) - expected = np.array([1, 2, 6, np.nan, 24], dtype='float64') - self.assert_numpy_array_equal(actual[:, 0], expected) - - actual = np.zeros_like(data) - actual.fill(np.nan) - pd.algos.group_cumsum(actual, data, labels, is_datetimelike) - expected = np.array([1, 3, 6, np.nan, 10], dtype='float64') - self.assert_numpy_array_equal(actual[:, 0], expected) - - # timedelta - is_datetimelike = True - data = np.array([np.timedelta64(1, 'ns')] * 5, dtype='m8[ns]')[:, None] - actual = np.zeros_like(data, dtype='int64') - pd.algos.group_cumsum(actual, data.view('int64'), labels, - is_datetimelike) - expected = np.array([np.timedelta64(1, 'ns'), np.timedelta64( - 2, 'ns'), np.timedelta64(3, 'ns'), np.timedelta64(4, 'ns'), - np.timedelta64(5, 'ns')]) - self.assert_numpy_array_equal(actual[:, 0].view('m8[ns]'), expected) - - def test_cython_transform(self): - # GH 4095 - ops = [(('cumprod', - ()), lambda x: x.cumprod()), (('cumsum', ()), - lambda x: x.cumsum()), - (('shift', (-1, )), - lambda x: x.shift(-1)), (('shift', - (1, )), lambda x: x.shift())] - - s = Series(np.random.randn(1000)) - s_missing = s.copy() - s_missing.iloc[2:10] = np.nan - labels = np.random.randint(0, 50, size=1000).astype(float) - - # series - for (op, args), targop in ops: - for data in [s, s_missing]: - # print(data.head()) - expected = data.groupby(labels).transform(targop) - - tm.assert_series_equal(expected, - data.groupby(labels).transform(op, - *args)) - tm.assert_series_equal(expected, getattr( - data.groupby(labels), op)(*args)) - - strings = list('qwertyuiopasdfghjklz') - strings_missing = strings[:] - strings_missing[5] = np.nan - df = DataFrame({'float': s, - 'float_missing': s_missing, - 'int': [1, 1, 1, 1, 2] * 200, - 'datetime': pd.date_range('1990-1-1', periods=1000), - 'timedelta': pd.timedelta_range(1, freq='s', - periods=1000), - 'string': strings * 50, - 'string_missing': strings_missing * 50}) - df['cat'] = df['string'].astype('category') - - df2 = df.copy() - df2.index = pd.MultiIndex.from_product([range(100), range(10)]) - - # DataFrame - Single and MultiIndex, - # group by values, index level, columns - for df in [df, df2]: - for gb_target in [dict(by=labels), dict(level=0), dict(by='string') - ]: # dict(by='string_missing')]: - # dict(by=['int','string'])]: - - gb = df.groupby(**gb_target) - # whitelisted methods set the selection before applying - # bit a of hack to make sure the cythonized shift - # is equivalent to pre 0.17.1 behavior - if op == 'shift': - gb._set_group_selection() - - for (op, args), targop in ops: - if op != 'shift' and 'int' not in gb_target: - # numeric apply fastpath promotes dtype so have - # to apply seperately and concat - i = gb[['int']].apply(targop) - f = gb[['float', 'float_missing']].apply(targop) - expected = pd.concat([f, i], axis=1) - else: - expected = gb.apply(targop) - - expected = expected.sort_index(axis=1) - tm.assert_frame_equal(expected, - gb.transform(op, *args).sort_index( - axis=1)) - tm.assert_frame_equal(expected, getattr(gb, op)(*args)) - # individual columns - for c in df: - if c not in ['float', 'int', 'float_missing' - ] and op != 'shift': - self.assertRaises(DataError, gb[c].transform, op) - self.assertRaises(DataError, getattr(gb[c], op)) - else: - expected = gb[c].apply(targop) - expected.name = c - tm.assert_series_equal(expected, - gb[c].transform(op, *args)) - tm.assert_series_equal(expected, - getattr(gb[c], op)(*args)) - def test_groupby_cumprod(self): # GH 4095 df = pd.DataFrame({'key': ['b'] * 10, 'value': 2}) @@ -5784,27 +4229,6 @@ def test_func(x): tm.assert_frame_equal(result1, expected1) tm.assert_frame_equal(result2, expected2) - def test_first_last_max_min_on_time_data(self): - # GH 10295 - # Verify that NaT is not in the result of max, min, first and last on - # Dataframe with datetime or timedelta values. - from datetime import timedelta as td - df_test = DataFrame( - {'dt': [nan, '2015-07-24 10:10', '2015-07-25 11:11', - '2015-07-23 12:12', nan], - 'td': [nan, td(days=1), td(days=2), td(days=3), nan]}) - df_test.dt = pd.to_datetime(df_test.dt) - df_test['group'] = 'A' - df_ref = df_test[df_test.dt.notnull()] - - grouped_test = df_test.groupby('group') - grouped_ref = df_ref.groupby('group') - - assert_frame_equal(grouped_ref.max(), grouped_test.max()) - assert_frame_equal(grouped_ref.min(), grouped_test.min()) - assert_frame_equal(grouped_ref.first(), grouped_test.first()) - assert_frame_equal(grouped_ref.last(), grouped_test.last()) - def test_groupby_preserves_sort(self): # Test to ensure that groupby always preserves sort order of original # object. Issue #8588 and #9651 @@ -5854,21 +4278,6 @@ def test_nunique_with_empty_series(self): expected = pd.Series(name='name', dtype='int64') tm.assert_series_equal(result, expected) - def test_transform_with_non_scalar_group(self): - # GH 10165 - cols = pd.MultiIndex.from_tuples([ - ('syn', 'A'), ('mis', 'A'), ('non', 'A'), - ('syn', 'C'), ('mis', 'C'), ('non', 'C'), - ('syn', 'T'), ('mis', 'T'), ('non', 'T'), - ('syn', 'G'), ('mis', 'G'), ('non', 'G')]) - df = pd.DataFrame(np.random.randint(1, 10, (4, 12)), - columns=cols, - index=['A', 'C', 'G', 'T']) - self.assertRaisesRegexp(ValueError, 'transform must return a scalar ' - 'value for each group.*', df.groupby - (axis=1, level=1).transform, - lambda z: z.div(z.sum(axis=1), axis=0)) - def test_numpy_compat(self): # see gh-12811 df = pd.DataFrame({'A': [1, 2, 1], 'B': [1, 2, 3]}) @@ -5927,23 +4336,6 @@ def test_pivot_table_values_key_error(self): df.reset_index().pivot_table(index='year', columns='month', values='badname', aggfunc='count') - def test_agg_over_numpy_arrays(self): - # GH 3788 - df = pd.DataFrame([[1, np.array([10, 20, 30])], - [1, np.array([40, 50, 60])], - [2, np.array([20, 30, 40])]], - columns=['category', 'arraydata']) - result = df.groupby('category').agg(sum) - - expected_data = [[np.array([50, 70, 90])], [np.array([20, 30, 40])]] - expected_index = pd.Index([1, 2], name='category') - expected_column = ['arraydata'] - expected = pd.DataFrame(expected_data, - index=expected_index, - columns=expected_column) - - assert_frame_equal(result, expected) - def test_cummin_cummax(self): # GH 15048 num_types = [np.int32, np.int64, np.float32, np.float64] @@ -6024,10 +4416,6 @@ def test_cummin_cummax(self): tm.assert_frame_equal(expected, result) -def assert_fp_equal(a, b): - assert (np.abs(a - b) < 1e-12).all() - - def _check_groupby(df, result, keys, field, f=lambda x: x.sum()): tups = lmap(tuple, df[keys].values) tups = com._asarray_tuplesafe(tups) diff --git a/pandas/tests/groupby/test_misc.py b/pandas/tests/groupby/test_misc.py new file mode 100644 index 0000000000000..c9d8ad4231cfb --- /dev/null +++ b/pandas/tests/groupby/test_misc.py @@ -0,0 +1,101 @@ +""" misc non-groupby routines, as they are defined in core/groupby.py """ + +import nose +import numpy as np +from numpy import nan +from pandas.util import testing as tm +from pandas.core.groupby import _nargsort, _lexsort_indexer + + +class TestSorting(tm.TestCase): + + def test_lexsort_indexer(self): + keys = [[nan] * 5 + list(range(100)) + [nan] * 5] + # orders=True, na_position='last' + result = _lexsort_indexer(keys, orders=True, na_position='last') + exp = list(range(5, 105)) + list(range(5)) + list(range(105, 110)) + tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp)) + + # orders=True, na_position='first' + result = _lexsort_indexer(keys, orders=True, na_position='first') + exp = list(range(5)) + list(range(105, 110)) + list(range(5, 105)) + tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp)) + + # orders=False, na_position='last' + result = _lexsort_indexer(keys, orders=False, na_position='last') + exp = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110)) + tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp)) + + # orders=False, na_position='first' + result = _lexsort_indexer(keys, orders=False, na_position='first') + exp = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1)) + tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp)) + + def test_nargsort(self): + # np.argsort(items) places NaNs last + items = [nan] * 5 + list(range(100)) + [nan] * 5 + # np.argsort(items2) may not place NaNs first + items2 = np.array(items, dtype='O') + + try: + # GH 2785; due to a regression in NumPy1.6.2 + np.argsort(np.array([[1, 2], [1, 3], [1, 2]], dtype='i')) + np.argsort(items2, kind='mergesort') + except TypeError: + raise nose.SkipTest('requested sort not available for type') + + # mergesort is the most difficult to get right because we want it to be + # stable. + + # According to numpy/core/tests/test_multiarray, """The number of + # sorted items must be greater than ~50 to check the actual algorithm + # because quick and merge sort fall over to insertion sort for small + # arrays.""" + + # mergesort, ascending=True, na_position='last' + result = _nargsort(items, kind='mergesort', ascending=True, + na_position='last') + exp = list(range(5, 105)) + list(range(5)) + list(range(105, 110)) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) + + # mergesort, ascending=True, na_position='first' + result = _nargsort(items, kind='mergesort', ascending=True, + na_position='first') + exp = list(range(5)) + list(range(105, 110)) + list(range(5, 105)) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) + + # mergesort, ascending=False, na_position='last' + result = _nargsort(items, kind='mergesort', ascending=False, + na_position='last') + exp = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110)) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) + + # mergesort, ascending=False, na_position='first' + result = _nargsort(items, kind='mergesort', ascending=False, + na_position='first') + exp = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1)) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) + + # mergesort, ascending=True, na_position='last' + result = _nargsort(items2, kind='mergesort', ascending=True, + na_position='last') + exp = list(range(5, 105)) + list(range(5)) + list(range(105, 110)) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) + + # mergesort, ascending=True, na_position='first' + result = _nargsort(items2, kind='mergesort', ascending=True, + na_position='first') + exp = list(range(5)) + list(range(105, 110)) + list(range(5, 105)) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) + + # mergesort, ascending=False, na_position='last' + result = _nargsort(items2, kind='mergesort', ascending=False, + na_position='last') + exp = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110)) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) + + # mergesort, ascending=False, na_position='first' + result = _nargsort(items2, kind='mergesort', ascending=False, + na_position='first') + exp = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1)) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py new file mode 100644 index 0000000000000..3142b74b56778 --- /dev/null +++ b/pandas/tests/groupby/test_timegrouper.py @@ -0,0 +1,609 @@ +""" test with the TimeGrouper / grouping with datetimes """ + +from datetime import datetime +import numpy as np +from numpy import nan + +import pandas as pd +from pandas import DataFrame, date_range, Index, Series, MultiIndex, Timestamp +from pandas.compat import StringIO +from pandas.util import testing as tm +from pandas.util.testing import assert_frame_equal, assert_series_equal + + +class TestGroupBy(tm.TestCase): + + def test_groupby_with_timegrouper(self): + # GH 4161 + # TimeGrouper requires a sorted index + # also verifies that the resultant index has the correct name + df_original = DataFrame({ + 'Buyer': 'Carl Carl Carl Carl Joe Carl'.split(), + 'Quantity': [18, 3, 5, 1, 9, 3], + 'Date': [ + datetime(2013, 9, 1, 13, 0), + datetime(2013, 9, 1, 13, 5), + datetime(2013, 10, 1, 20, 0), + datetime(2013, 10, 3, 10, 0), + datetime(2013, 12, 2, 12, 0), + datetime(2013, 9, 2, 14, 0), + ] + }) + + # GH 6908 change target column's order + df_reordered = df_original.sort_values(by='Quantity') + + for df in [df_original, df_reordered]: + df = df.set_index(['Date']) + + expected = DataFrame( + {'Quantity': np.nan}, + index=date_range('20130901 13:00:00', + '20131205 13:00:00', freq='5D', + name='Date', closed='left')) + expected.iloc[[0, 6, 18], 0] = np.array( + [24., 6., 9.], dtype='float64') + + result1 = df.resample('5D') .sum() + assert_frame_equal(result1, expected) + + df_sorted = df.sort_index() + result2 = df_sorted.groupby(pd.TimeGrouper(freq='5D')).sum() + assert_frame_equal(result2, expected) + + result3 = df.groupby(pd.TimeGrouper(freq='5D')).sum() + assert_frame_equal(result3, expected) + + def test_groupby_with_timegrouper_methods(self): + # GH 3881 + # make sure API of timegrouper conforms + + df_original = pd.DataFrame({ + 'Branch': 'A A A A A B'.split(), + 'Buyer': 'Carl Mark Carl Joe Joe Carl'.split(), + 'Quantity': [1, 3, 5, 8, 9, 3], + 'Date': [ + datetime(2013, 1, 1, 13, 0), + datetime(2013, 1, 1, 13, 5), + datetime(2013, 10, 1, 20, 0), + datetime(2013, 10, 2, 10, 0), + datetime(2013, 12, 2, 12, 0), + datetime(2013, 12, 2, 14, 0), + ] + }) + + df_sorted = df_original.sort_values(by='Quantity', ascending=False) + + for df in [df_original, df_sorted]: + df = df.set_index('Date', drop=False) + g = df.groupby(pd.TimeGrouper('6M')) + self.assertTrue(g.group_keys) + self.assertTrue(isinstance(g.grouper, pd.core.groupby.BinGrouper)) + groups = g.groups + self.assertTrue(isinstance(groups, dict)) + self.assertTrue(len(groups) == 3) + + def test_timegrouper_with_reg_groups(self): + + # GH 3794 + # allow combinateion of timegrouper/reg groups + + df_original = DataFrame({ + 'Branch': 'A A A A A A A B'.split(), + 'Buyer': 'Carl Mark Carl Carl Joe Joe Joe Carl'.split(), + 'Quantity': [1, 3, 5, 1, 8, 1, 9, 3], + 'Date': [ + datetime(2013, 1, 1, 13, 0), + datetime(2013, 1, 1, 13, 5), + datetime(2013, 10, 1, 20, 0), + datetime(2013, 10, 2, 10, 0), + datetime(2013, 10, 1, 20, 0), + datetime(2013, 10, 2, 10, 0), + datetime(2013, 12, 2, 12, 0), + datetime(2013, 12, 2, 14, 0), + ] + }).set_index('Date') + + df_sorted = df_original.sort_values(by='Quantity', ascending=False) + + for df in [df_original, df_sorted]: + expected = DataFrame({ + 'Buyer': 'Carl Joe Mark'.split(), + 'Quantity': [10, 18, 3], + 'Date': [ + datetime(2013, 12, 31, 0, 0), + datetime(2013, 12, 31, 0, 0), + datetime(2013, 12, 31, 0, 0), + ] + }).set_index(['Date', 'Buyer']) + + result = df.groupby([pd.Grouper(freq='A'), 'Buyer']).sum() + assert_frame_equal(result, expected) + + expected = DataFrame({ + 'Buyer': 'Carl Mark Carl Joe'.split(), + 'Quantity': [1, 3, 9, 18], + 'Date': [ + datetime(2013, 1, 1, 0, 0), + datetime(2013, 1, 1, 0, 0), + datetime(2013, 7, 1, 0, 0), + datetime(2013, 7, 1, 0, 0), + ] + }).set_index(['Date', 'Buyer']) + result = df.groupby([pd.Grouper(freq='6MS'), 'Buyer']).sum() + assert_frame_equal(result, expected) + + df_original = DataFrame({ + 'Branch': 'A A A A A A A B'.split(), + 'Buyer': 'Carl Mark Carl Carl Joe Joe Joe Carl'.split(), + 'Quantity': [1, 3, 5, 1, 8, 1, 9, 3], + 'Date': [ + datetime(2013, 10, 1, 13, 0), + datetime(2013, 10, 1, 13, 5), + datetime(2013, 10, 1, 20, 0), + datetime(2013, 10, 2, 10, 0), + datetime(2013, 10, 1, 20, 0), + datetime(2013, 10, 2, 10, 0), + datetime(2013, 10, 2, 12, 0), + datetime(2013, 10, 2, 14, 0), + ] + }).set_index('Date') + + df_sorted = df_original.sort_values(by='Quantity', ascending=False) + for df in [df_original, df_sorted]: + + expected = DataFrame({ + 'Buyer': 'Carl Joe Mark Carl Joe'.split(), + 'Quantity': [6, 8, 3, 4, 10], + 'Date': [ + datetime(2013, 10, 1, 0, 0), + datetime(2013, 10, 1, 0, 0), + datetime(2013, 10, 1, 0, 0), + datetime(2013, 10, 2, 0, 0), + datetime(2013, 10, 2, 0, 0), + ] + }).set_index(['Date', 'Buyer']) + + result = df.groupby([pd.Grouper(freq='1D'), 'Buyer']).sum() + assert_frame_equal(result, expected) + + result = df.groupby([pd.Grouper(freq='1M'), 'Buyer']).sum() + expected = DataFrame({ + 'Buyer': 'Carl Joe Mark'.split(), + 'Quantity': [10, 18, 3], + 'Date': [ + datetime(2013, 10, 31, 0, 0), + datetime(2013, 10, 31, 0, 0), + datetime(2013, 10, 31, 0, 0), + ] + }).set_index(['Date', 'Buyer']) + assert_frame_equal(result, expected) + + # passing the name + df = df.reset_index() + result = df.groupby([pd.Grouper(freq='1M', key='Date'), 'Buyer' + ]).sum() + assert_frame_equal(result, expected) + + with self.assertRaises(KeyError): + df.groupby([pd.Grouper(freq='1M', key='foo'), 'Buyer']).sum() + + # passing the level + df = df.set_index('Date') + result = df.groupby([pd.Grouper(freq='1M', level='Date'), 'Buyer' + ]).sum() + assert_frame_equal(result, expected) + result = df.groupby([pd.Grouper(freq='1M', level=0), 'Buyer']).sum( + ) + assert_frame_equal(result, expected) + + with self.assertRaises(ValueError): + df.groupby([pd.Grouper(freq='1M', level='foo'), + 'Buyer']).sum() + + # multi names + df = df.copy() + df['Date'] = df.index + pd.offsets.MonthEnd(2) + result = df.groupby([pd.Grouper(freq='1M', key='Date'), 'Buyer' + ]).sum() + expected = DataFrame({ + 'Buyer': 'Carl Joe Mark'.split(), + 'Quantity': [10, 18, 3], + 'Date': [ + datetime(2013, 11, 30, 0, 0), + datetime(2013, 11, 30, 0, 0), + datetime(2013, 11, 30, 0, 0), + ] + }).set_index(['Date', 'Buyer']) + assert_frame_equal(result, expected) + + # error as we have both a level and a name! + with self.assertRaises(ValueError): + df.groupby([pd.Grouper(freq='1M', key='Date', + level='Date'), 'Buyer']).sum() + + # single groupers + expected = DataFrame({'Quantity': [31], + 'Date': [datetime(2013, 10, 31, 0, 0) + ]}).set_index('Date') + result = df.groupby(pd.Grouper(freq='1M')).sum() + assert_frame_equal(result, expected) + + result = df.groupby([pd.Grouper(freq='1M')]).sum() + assert_frame_equal(result, expected) + + expected = DataFrame({'Quantity': [31], + 'Date': [datetime(2013, 11, 30, 0, 0) + ]}).set_index('Date') + result = df.groupby(pd.Grouper(freq='1M', key='Date')).sum() + assert_frame_equal(result, expected) + + result = df.groupby([pd.Grouper(freq='1M', key='Date')]).sum() + assert_frame_equal(result, expected) + + # GH 6764 multiple grouping with/without sort + df = DataFrame({ + 'date': pd.to_datetime([ + '20121002', '20121007', '20130130', '20130202', '20130305', + '20121002', '20121207', '20130130', '20130202', '20130305', + '20130202', '20130305' + ]), + 'user_id': [1, 1, 1, 1, 1, 3, 3, 3, 5, 5, 5, 5], + 'whole_cost': [1790, 364, 280, 259, 201, 623, 90, 312, 359, 301, + 359, 801], + 'cost1': [12, 15, 10, 24, 39, 1, 0, 90, 45, 34, 1, 12] + }).set_index('date') + + for freq in ['D', 'M', 'A', 'Q-APR']: + expected = df.groupby('user_id')[ + 'whole_cost'].resample( + freq).sum().dropna().reorder_levels( + ['date', 'user_id']).sort_index().astype('int64') + expected.name = 'whole_cost' + + result1 = df.sort_index().groupby([pd.TimeGrouper(freq=freq), + 'user_id'])['whole_cost'].sum() + assert_series_equal(result1, expected) + + result2 = df.groupby([pd.TimeGrouper(freq=freq), 'user_id'])[ + 'whole_cost'].sum() + assert_series_equal(result2, expected) + + def test_timegrouper_get_group(self): + # GH 6914 + + df_original = DataFrame({ + 'Buyer': 'Carl Joe Joe Carl Joe Carl'.split(), + 'Quantity': [18, 3, 5, 1, 9, 3], + 'Date': [datetime(2013, 9, 1, 13, 0), + datetime(2013, 9, 1, 13, 5), + datetime(2013, 10, 1, 20, 0), + datetime(2013, 10, 3, 10, 0), + datetime(2013, 12, 2, 12, 0), + datetime(2013, 9, 2, 14, 0), ] + }) + df_reordered = df_original.sort_values(by='Quantity') + + # single grouping + expected_list = [df_original.iloc[[0, 1, 5]], df_original.iloc[[2, 3]], + df_original.iloc[[4]]] + dt_list = ['2013-09-30', '2013-10-31', '2013-12-31'] + + for df in [df_original, df_reordered]: + grouped = df.groupby(pd.Grouper(freq='M', key='Date')) + for t, expected in zip(dt_list, expected_list): + dt = pd.Timestamp(t) + result = grouped.get_group(dt) + assert_frame_equal(result, expected) + + # multiple grouping + expected_list = [df_original.iloc[[1]], df_original.iloc[[3]], + df_original.iloc[[4]]] + g_list = [('Joe', '2013-09-30'), ('Carl', '2013-10-31'), + ('Joe', '2013-12-31')] + + for df in [df_original, df_reordered]: + grouped = df.groupby(['Buyer', pd.Grouper(freq='M', key='Date')]) + for (b, t), expected in zip(g_list, expected_list): + dt = pd.Timestamp(t) + result = grouped.get_group((b, dt)) + assert_frame_equal(result, expected) + + # with index + df_original = df_original.set_index('Date') + df_reordered = df_original.sort_values(by='Quantity') + + expected_list = [df_original.iloc[[0, 1, 5]], df_original.iloc[[2, 3]], + df_original.iloc[[4]]] + + for df in [df_original, df_reordered]: + grouped = df.groupby(pd.Grouper(freq='M')) + for t, expected in zip(dt_list, expected_list): + dt = pd.Timestamp(t) + result = grouped.get_group(dt) + assert_frame_equal(result, expected) + + def test_timegrouper_apply_return_type_series(self): + # Using `apply` with the `TimeGrouper` should give the + # same return type as an `apply` with a `Grouper`. + # Issue #11742 + df = pd.DataFrame({'date': ['10/10/2000', '11/10/2000'], + 'value': [10, 13]}) + df_dt = df.copy() + df_dt['date'] = pd.to_datetime(df_dt['date']) + + def sumfunc_series(x): + return pd.Series([x['value'].sum()], ('sum',)) + + expected = df.groupby(pd.Grouper(key='date')).apply(sumfunc_series) + result = (df_dt.groupby(pd.TimeGrouper(freq='M', key='date')) + .apply(sumfunc_series)) + assert_frame_equal(result.reset_index(drop=True), + expected.reset_index(drop=True)) + + def test_timegrouper_apply_return_type_value(self): + # Using `apply` with the `TimeGrouper` should give the + # same return type as an `apply` with a `Grouper`. + # Issue #11742 + df = pd.DataFrame({'date': ['10/10/2000', '11/10/2000'], + 'value': [10, 13]}) + df_dt = df.copy() + df_dt['date'] = pd.to_datetime(df_dt['date']) + + def sumfunc_value(x): + return x.value.sum() + + expected = df.groupby(pd.Grouper(key='date')).apply(sumfunc_value) + result = (df_dt.groupby(pd.TimeGrouper(freq='M', key='date')) + .apply(sumfunc_value)) + assert_series_equal(result.reset_index(drop=True), + expected.reset_index(drop=True)) + + def test_groupby_groups_datetimeindex(self): + # #1430 + from pandas.tseries.api import DatetimeIndex + periods = 1000 + ind = DatetimeIndex(start='2012/1/1', freq='5min', periods=periods) + df = DataFrame({'high': np.arange(periods), + 'low': np.arange(periods)}, index=ind) + grouped = df.groupby(lambda x: datetime(x.year, x.month, x.day)) + + # it works! + groups = grouped.groups + tm.assertIsInstance(list(groups.keys())[0], datetime) + + # GH 11442 + index = pd.date_range('2015/01/01', periods=5, name='date') + df = pd.DataFrame({'A': [5, 6, 7, 8, 9], + 'B': [1, 2, 3, 4, 5]}, index=index) + result = df.groupby(level='date').groups + dates = ['2015-01-05', '2015-01-04', '2015-01-03', + '2015-01-02', '2015-01-01'] + expected = {pd.Timestamp(date): pd.DatetimeIndex([date], name='date') + for date in dates} + tm.assert_dict_equal(result, expected) + + grouped = df.groupby(level='date') + for date in dates: + result = grouped.get_group(date) + data = [[df.loc[date, 'A'], df.loc[date, 'B']]] + expected_index = pd.DatetimeIndex([date], name='date') + expected = pd.DataFrame(data, + columns=list('AB'), + index=expected_index) + tm.assert_frame_equal(result, expected) + + def test_groupby_groups_datetimeindex_tz(self): + # GH 3950 + dates = ['2011-07-19 07:00:00', '2011-07-19 08:00:00', + '2011-07-19 09:00:00', '2011-07-19 07:00:00', + '2011-07-19 08:00:00', '2011-07-19 09:00:00'] + df = DataFrame({'label': ['a', 'a', 'a', 'b', 'b', 'b'], + 'datetime': dates, + 'value1': np.arange(6, dtype='int64'), + 'value2': [1, 2] * 3}) + df['datetime'] = df['datetime'].apply( + lambda d: Timestamp(d, tz='US/Pacific')) + + exp_idx1 = pd.DatetimeIndex(['2011-07-19 07:00:00', + '2011-07-19 07:00:00', + '2011-07-19 08:00:00', + '2011-07-19 08:00:00', + '2011-07-19 09:00:00', + '2011-07-19 09:00:00'], + tz='US/Pacific', name='datetime') + exp_idx2 = Index(['a', 'b'] * 3, name='label') + exp_idx = MultiIndex.from_arrays([exp_idx1, exp_idx2]) + expected = DataFrame({'value1': [0, 3, 1, 4, 2, 5], + 'value2': [1, 2, 2, 1, 1, 2]}, + index=exp_idx, columns=['value1', 'value2']) + + result = df.groupby(['datetime', 'label']).sum() + assert_frame_equal(result, expected) + + # by level + didx = pd.DatetimeIndex(dates, tz='Asia/Tokyo') + df = DataFrame({'value1': np.arange(6, dtype='int64'), + 'value2': [1, 2, 3, 1, 2, 3]}, + index=didx) + + exp_idx = pd.DatetimeIndex(['2011-07-19 07:00:00', + '2011-07-19 08:00:00', + '2011-07-19 09:00:00'], tz='Asia/Tokyo') + expected = DataFrame({'value1': [3, 5, 7], 'value2': [2, 4, 6]}, + index=exp_idx, columns=['value1', 'value2']) + + result = df.groupby(level=0).sum() + assert_frame_equal(result, expected) + + def test_frame_datetime64_handling_groupby(self): + # it works! + df = DataFrame([(3, np.datetime64('2012-07-03')), + (3, np.datetime64('2012-07-04'))], + columns=['a', 'date']) + result = df.groupby('a').first() + self.assertEqual(result['date'][3], Timestamp('2012-07-03')) + + def test_groupby_multi_timezone(self): + + # combining multiple / different timezones yields UTC + + data = """0,2000-01-28 16:47:00,America/Chicago +1,2000-01-29 16:48:00,America/Chicago +2,2000-01-30 16:49:00,America/Los_Angeles +3,2000-01-31 16:50:00,America/Chicago +4,2000-01-01 16:50:00,America/New_York""" + + df = pd.read_csv(StringIO(data), header=None, + names=['value', 'date', 'tz']) + result = df.groupby('tz').date.apply( + lambda x: pd.to_datetime(x).dt.tz_localize(x.name)) + + expected = Series([Timestamp('2000-01-28 16:47:00-0600', + tz='America/Chicago'), + Timestamp('2000-01-29 16:48:00-0600', + tz='America/Chicago'), + Timestamp('2000-01-30 16:49:00-0800', + tz='America/Los_Angeles'), + Timestamp('2000-01-31 16:50:00-0600', + tz='America/Chicago'), + Timestamp('2000-01-01 16:50:00-0500', + tz='America/New_York')], + name='date', + dtype=object) + assert_series_equal(result, expected) + + tz = 'America/Chicago' + res_values = df.groupby('tz').date.get_group(tz) + result = pd.to_datetime(res_values).dt.tz_localize(tz) + exp_values = Series(['2000-01-28 16:47:00', '2000-01-29 16:48:00', + '2000-01-31 16:50:00'], + index=[0, 1, 3], name='date') + expected = pd.to_datetime(exp_values).dt.tz_localize(tz) + assert_series_equal(result, expected) + + def test_groupby_groups_periods(self): + dates = ['2011-07-19 07:00:00', '2011-07-19 08:00:00', + '2011-07-19 09:00:00', '2011-07-19 07:00:00', + '2011-07-19 08:00:00', '2011-07-19 09:00:00'] + df = DataFrame({'label': ['a', 'a', 'a', 'b', 'b', 'b'], + 'period': [pd.Period(d, freq='H') for d in dates], + 'value1': np.arange(6, dtype='int64'), + 'value2': [1, 2] * 3}) + + exp_idx1 = pd.PeriodIndex(['2011-07-19 07:00:00', + '2011-07-19 07:00:00', + '2011-07-19 08:00:00', + '2011-07-19 08:00:00', + '2011-07-19 09:00:00', + '2011-07-19 09:00:00'], + freq='H', name='period') + exp_idx2 = Index(['a', 'b'] * 3, name='label') + exp_idx = MultiIndex.from_arrays([exp_idx1, exp_idx2]) + expected = DataFrame({'value1': [0, 3, 1, 4, 2, 5], + 'value2': [1, 2, 2, 1, 1, 2]}, + index=exp_idx, columns=['value1', 'value2']) + + result = df.groupby(['period', 'label']).sum() + assert_frame_equal(result, expected) + + # by level + didx = pd.PeriodIndex(dates, freq='H') + df = DataFrame({'value1': np.arange(6, dtype='int64'), + 'value2': [1, 2, 3, 1, 2, 3]}, + index=didx) + + exp_idx = pd.PeriodIndex(['2011-07-19 07:00:00', + '2011-07-19 08:00:00', + '2011-07-19 09:00:00'], freq='H') + expected = DataFrame({'value1': [3, 5, 7], 'value2': [2, 4, 6]}, + index=exp_idx, columns=['value1', 'value2']) + + result = df.groupby(level=0).sum() + assert_frame_equal(result, expected) + + def test_groupby_first_datetime64(self): + df = DataFrame([(1, 1351036800000000000), (2, 1351036800000000000)]) + df[1] = df[1].view('M8[ns]') + + self.assertTrue(issubclass(df[1].dtype.type, np.datetime64)) + + result = df.groupby(level=0).first() + got_dt = result[1].dtype + self.assertTrue(issubclass(got_dt.type, np.datetime64)) + + result = df[1].groupby(level=0).first() + got_dt = result.dtype + self.assertTrue(issubclass(got_dt.type, np.datetime64)) + + def test_groupby_max_datetime64(self): + # GH 5869 + # datetimelike dtype conversion from int + df = DataFrame(dict(A=Timestamp('20130101'), B=np.arange(5))) + expected = df.groupby('A')['A'].apply(lambda x: x.max()) + result = df.groupby('A')['A'].max() + assert_series_equal(result, expected) + + def test_groupby_datetime64_32_bit(self): + # GH 6410 / numpy 4328 + # 32-bit under 1.9-dev indexing issue + + df = DataFrame({"A": range(2), "B": [pd.Timestamp('2000-01-1')] * 2}) + result = df.groupby("A")["B"].transform(min) + expected = Series([pd.Timestamp('2000-01-1')] * 2, name='B') + assert_series_equal(result, expected) + + def test_groupby_with_timezone_selection(self): + # GH 11616 + # Test that column selection returns output in correct timezone. + np.random.seed(42) + df = pd.DataFrame({ + 'factor': np.random.randint(0, 3, size=60), + 'time': pd.date_range('01/01/2000 00:00', periods=60, + freq='s', tz='UTC') + }) + df1 = df.groupby('factor').max()['time'] + df2 = df.groupby('factor')['time'].max() + tm.assert_series_equal(df1, df2) + + def test_timezone_info(self): + # GH 11682 + # Timezone info lost when broadcasting scalar datetime to DataFrame + tm._skip_if_no_pytz() + import pytz + + df = pd.DataFrame({'a': [1], 'b': [datetime.now(pytz.utc)]}) + self.assertEqual(df['b'][0].tzinfo, pytz.utc) + df = pd.DataFrame({'a': [1, 2, 3]}) + df['b'] = datetime.now(pytz.utc) + self.assertEqual(df['b'][0].tzinfo, pytz.utc) + + def test_datetime_count(self): + df = DataFrame({'a': [1, 2, 3] * 2, + 'dates': pd.date_range('now', periods=6, freq='T')}) + result = df.groupby('a').dates.count() + expected = Series([ + 2, 2, 2 + ], index=Index([1, 2, 3], name='a'), name='dates') + tm.assert_series_equal(result, expected) + + def test_first_last_max_min_on_time_data(self): + # GH 10295 + # Verify that NaT is not in the result of max, min, first and last on + # Dataframe with datetime or timedelta values. + from datetime import timedelta as td + df_test = DataFrame( + {'dt': [nan, '2015-07-24 10:10', '2015-07-25 11:11', + '2015-07-23 12:12', nan], + 'td': [nan, td(days=1), td(days=2), td(days=3), nan]}) + df_test.dt = pd.to_datetime(df_test.dt) + df_test['group'] = 'A' + df_ref = df_test[df_test.dt.notnull()] + + grouped_test = df_test.groupby('group') + grouped_ref = df_ref.groupby('group') + + assert_frame_equal(grouped_ref.max(), grouped_test.max()) + assert_frame_equal(grouped_ref.min(), grouped_test.min()) + assert_frame_equal(grouped_ref.first(), grouped_test.first()) + assert_frame_equal(grouped_ref.last(), grouped_test.last()) diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py new file mode 100644 index 0000000000000..cf5e9eb26ff13 --- /dev/null +++ b/pandas/tests/groupby/test_transform.py @@ -0,0 +1,494 @@ +""" test with the .transform """ + +import numpy as np +import pandas as pd +from pandas.util import testing as tm +from pandas import Series, DataFrame, Timestamp, MultiIndex, concat +from pandas.types.common import _ensure_platform_int +from .common import MixIn, assert_fp_equal + +from pandas.util.testing import assert_frame_equal, assert_series_equal +from pandas.core.groupby import DataError +from pandas.core.config import option_context + + +class TestGroupBy(MixIn, tm.TestCase): + + def test_transform(self): + data = Series(np.arange(9) // 3, index=np.arange(9)) + + index = np.arange(9) + np.random.shuffle(index) + data = data.reindex(index) + + grouped = data.groupby(lambda x: x // 3) + + transformed = grouped.transform(lambda x: x * x.sum()) + self.assertEqual(transformed[7], 12) + + # GH 8046 + # make sure that we preserve the input order + + df = DataFrame( + np.arange(6, dtype='int64').reshape( + 3, 2), columns=["a", "b"], index=[0, 2, 1]) + key = [0, 0, 1] + expected = df.sort_index().groupby(key).transform( + lambda x: x - x.mean()).groupby(key).mean() + result = df.groupby(key).transform(lambda x: x - x.mean()).groupby( + key).mean() + assert_frame_equal(result, expected) + + def demean(arr): + return arr - arr.mean() + + people = DataFrame(np.random.randn(5, 5), + columns=['a', 'b', 'c', 'd', 'e'], + index=['Joe', 'Steve', 'Wes', 'Jim', 'Travis']) + key = ['one', 'two', 'one', 'two', 'one'] + result = people.groupby(key).transform(demean).groupby(key).mean() + expected = people.groupby(key).apply(demean).groupby(key).mean() + assert_frame_equal(result, expected) + + # GH 8430 + df = tm.makeTimeDataFrame() + g = df.groupby(pd.TimeGrouper('M')) + g.transform(lambda x: x - 1) + + # GH 9700 + df = DataFrame({'a': range(5, 10), 'b': range(5)}) + result = df.groupby('a').transform(max) + expected = DataFrame({'b': range(5)}) + tm.assert_frame_equal(result, expected) + + def test_transform_fast(self): + + df = DataFrame({'id': np.arange(100000) / 3, + 'val': np.random.randn(100000)}) + + grp = df.groupby('id')['val'] + + values = np.repeat(grp.mean().values, + _ensure_platform_int(grp.count().values)) + expected = pd.Series(values, index=df.index, name='val') + + result = grp.transform(np.mean) + assert_series_equal(result, expected) + + result = grp.transform('mean') + assert_series_equal(result, expected) + + # GH 12737 + df = pd.DataFrame({'grouping': [0, 1, 1, 3], 'f': [1.1, 2.1, 3.1, 4.5], + 'd': pd.date_range('2014-1-1', '2014-1-4'), + 'i': [1, 2, 3, 4]}, + columns=['grouping', 'f', 'i', 'd']) + result = df.groupby('grouping').transform('first') + + dates = [pd.Timestamp('2014-1-1'), pd.Timestamp('2014-1-2'), + pd.Timestamp('2014-1-2'), pd.Timestamp('2014-1-4')] + expected = pd.DataFrame({'f': [1.1, 2.1, 2.1, 4.5], + 'd': dates, + 'i': [1, 2, 2, 4]}, + columns=['f', 'i', 'd']) + assert_frame_equal(result, expected) + + # selection + result = df.groupby('grouping')[['f', 'i']].transform('first') + expected = expected[['f', 'i']] + assert_frame_equal(result, expected) + + # dup columns + df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=['g', 'a', 'a']) + result = df.groupby('g').transform('first') + expected = df.drop('g', axis=1) + assert_frame_equal(result, expected) + + def test_transform_broadcast(self): + grouped = self.ts.groupby(lambda x: x.month) + result = grouped.transform(np.mean) + + self.assert_index_equal(result.index, self.ts.index) + for _, gp in grouped: + assert_fp_equal(result.reindex(gp.index), gp.mean()) + + grouped = self.tsframe.groupby(lambda x: x.month) + result = grouped.transform(np.mean) + self.assert_index_equal(result.index, self.tsframe.index) + for _, gp in grouped: + agged = gp.mean() + res = result.reindex(gp.index) + for col in self.tsframe: + assert_fp_equal(res[col], agged[col]) + + # group columns + grouped = self.tsframe.groupby({'A': 0, 'B': 0, 'C': 1, 'D': 1}, + axis=1) + result = grouped.transform(np.mean) + self.assert_index_equal(result.index, self.tsframe.index) + self.assert_index_equal(result.columns, self.tsframe.columns) + for _, gp in grouped: + agged = gp.mean(1) + res = result.reindex(columns=gp.columns) + for idx in gp.index: + assert_fp_equal(res.xs(idx), agged[idx]) + + def test_transform_axis(self): + + # make sure that we are setting the axes + # correctly when on axis=0 or 1 + # in the presence of a non-monotonic indexer + # GH12713 + + base = self.tsframe.iloc[0:5] + r = len(base.index) + c = len(base.columns) + tso = DataFrame(np.random.randn(r, c), + index=base.index, + columns=base.columns, + dtype='float64') + # monotonic + ts = tso + grouped = ts.groupby(lambda x: x.weekday()) + result = ts - grouped.transform('mean') + expected = grouped.apply(lambda x: x - x.mean()) + assert_frame_equal(result, expected) + + ts = ts.T + grouped = ts.groupby(lambda x: x.weekday(), axis=1) + result = ts - grouped.transform('mean') + expected = grouped.apply(lambda x: (x.T - x.mean(1)).T) + assert_frame_equal(result, expected) + + # non-monotonic + ts = tso.iloc[[1, 0] + list(range(2, len(base)))] + grouped = ts.groupby(lambda x: x.weekday()) + result = ts - grouped.transform('mean') + expected = grouped.apply(lambda x: x - x.mean()) + assert_frame_equal(result, expected) + + ts = ts.T + grouped = ts.groupby(lambda x: x.weekday(), axis=1) + result = ts - grouped.transform('mean') + expected = grouped.apply(lambda x: (x.T - x.mean(1)).T) + assert_frame_equal(result, expected) + + def test_transform_dtype(self): + # GH 9807 + # Check transform dtype output is preserved + df = DataFrame([[1, 3], [2, 3]]) + result = df.groupby(1).transform('mean') + expected = DataFrame([[1.5], [1.5]]) + assert_frame_equal(result, expected) + + def test_transform_bug(self): + # GH 5712 + # transforming on a datetime column + df = DataFrame(dict(A=Timestamp('20130101'), B=np.arange(5))) + result = df.groupby('A')['B'].transform( + lambda x: x.rank(ascending=False)) + expected = Series(np.arange(5, 0, step=-1), name='B') + assert_series_equal(result, expected) + + def test_transform_multiple(self): + grouped = self.ts.groupby([lambda x: x.year, lambda x: x.month]) + + grouped.transform(lambda x: x * 2) + grouped.transform(np.mean) + + def test_dispatch_transform(self): + df = self.tsframe[::5].reindex(self.tsframe.index) + + grouped = df.groupby(lambda x: x.month) + + filled = grouped.fillna(method='pad') + fillit = lambda x: x.fillna(method='pad') + expected = df.groupby(lambda x: x.month).transform(fillit) + assert_frame_equal(filled, expected) + + def test_transform_select_columns(self): + f = lambda x: x.mean() + result = self.df.groupby('A')['C', 'D'].transform(f) + + selection = self.df[['C', 'D']] + expected = selection.groupby(self.df['A']).transform(f) + + assert_frame_equal(result, expected) + + def test_transform_exclude_nuisance(self): + + # this also tests orderings in transform between + # series/frame to make sure it's consistent + expected = {} + grouped = self.df.groupby('A') + expected['C'] = grouped['C'].transform(np.mean) + expected['D'] = grouped['D'].transform(np.mean) + expected = DataFrame(expected) + result = self.df.groupby('A').transform(np.mean) + + assert_frame_equal(result, expected) + + def test_transform_function_aliases(self): + result = self.df.groupby('A').transform('mean') + expected = self.df.groupby('A').transform(np.mean) + assert_frame_equal(result, expected) + + result = self.df.groupby('A')['C'].transform('mean') + expected = self.df.groupby('A')['C'].transform(np.mean) + assert_series_equal(result, expected) + + def test_series_fast_transform_date(self): + # GH 13191 + df = pd.DataFrame({'grouping': [np.nan, 1, 1, 3], + 'd': pd.date_range('2014-1-1', '2014-1-4')}) + result = df.groupby('grouping')['d'].transform('first') + dates = [pd.NaT, pd.Timestamp('2014-1-2'), pd.Timestamp('2014-1-2'), + pd.Timestamp('2014-1-4')] + expected = pd.Series(dates, name='d') + assert_series_equal(result, expected) + + def test_transform_length(self): + # GH 9697 + df = pd.DataFrame({'col1': [1, 1, 2, 2], 'col2': [1, 2, 3, np.nan]}) + expected = pd.Series([3.0] * 4) + + def nsum(x): + return np.nansum(x) + + results = [df.groupby('col1').transform(sum)['col2'], + df.groupby('col1')['col2'].transform(sum), + df.groupby('col1').transform(nsum)['col2'], + df.groupby('col1')['col2'].transform(nsum)] + for result in results: + assert_series_equal(result, expected, check_names=False) + + def test_transform_coercion(self): + + # 14457 + # when we are transforming be sure to not coerce + # via assignment + df = pd.DataFrame(dict(A=['a', 'a'], B=[0, 1])) + g = df.groupby('A') + + expected = g.transform(np.mean) + result = g.transform(lambda x: np.mean(x)) + assert_frame_equal(result, expected) + + def test_groupby_transform_with_int(self): + + # GH 3740, make sure that we might upcast on item-by-item transform + + # floats + df = DataFrame(dict(A=[1, 1, 1, 2, 2, 2], B=Series(1, dtype='float64'), + C=Series( + [1, 2, 3, 1, 2, 3], dtype='float64'), D='foo')) + with np.errstate(all='ignore'): + result = df.groupby('A').transform( + lambda x: (x - x.mean()) / x.std()) + expected = DataFrame(dict(B=np.nan, C=Series( + [-1, 0, 1, -1, 0, 1], dtype='float64'))) + assert_frame_equal(result, expected) + + # int case + df = DataFrame(dict(A=[1, 1, 1, 2, 2, 2], B=1, + C=[1, 2, 3, 1, 2, 3], D='foo')) + with np.errstate(all='ignore'): + result = df.groupby('A').transform( + lambda x: (x - x.mean()) / x.std()) + expected = DataFrame(dict(B=np.nan, C=[-1, 0, 1, -1, 0, 1])) + assert_frame_equal(result, expected) + + # int that needs float conversion + s = Series([2, 3, 4, 10, 5, -1]) + df = DataFrame(dict(A=[1, 1, 1, 2, 2, 2], B=1, C=s, D='foo')) + with np.errstate(all='ignore'): + result = df.groupby('A').transform( + lambda x: (x - x.mean()) / x.std()) + + s1 = s.iloc[0:3] + s1 = (s1 - s1.mean()) / s1.std() + s2 = s.iloc[3:6] + s2 = (s2 - s2.mean()) / s2.std() + expected = DataFrame(dict(B=np.nan, C=concat([s1, s2]))) + assert_frame_equal(result, expected) + + # int downcasting + result = df.groupby('A').transform(lambda x: x * 2 / 2) + expected = DataFrame(dict(B=1, C=[2, 3, 4, 10, 5, -1])) + assert_frame_equal(result, expected) + + def test_groupby_transform_with_nan_group(self): + # GH 9941 + df = pd.DataFrame({'a': range(10), + 'b': [1, 1, 2, 3, np.nan, 4, 4, 5, 5, 5]}) + result = df.groupby(df.b)['a'].transform(max) + expected = pd.Series([1., 1., 2., 3., np.nan, 6., 6., 9., 9., 9.], + name='a') + assert_series_equal(result, expected) + + def test_transform_mixed_type(self): + index = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1], [1, 2, 3, 1, 2, 3] + ]) + df = DataFrame({'d': [1., 1., 1., 2., 2., 2.], + 'c': np.tile(['a', 'b', 'c'], 2), + 'v': np.arange(1., 7.)}, index=index) + + def f(group): + group['g'] = group['d'] * 2 + return group[:1] + + grouped = df.groupby('c') + result = grouped.apply(f) + + self.assertEqual(result['d'].dtype, np.float64) + + # this is by definition a mutating operation! + with option_context('mode.chained_assignment', None): + for key, group in grouped: + res = f(group) + assert_frame_equal(res, result.loc[key]) + + def test_cython_group_transform_algos(self): + # GH 4095 + dtypes = [np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint32, + np.uint64, np.float32, np.float64] + + ops = [(pd.algos.group_cumprod_float64, np.cumproduct, [np.float64]), + (pd.algos.group_cumsum, np.cumsum, dtypes)] + + is_datetimelike = False + for pd_op, np_op, dtypes in ops: + for dtype in dtypes: + data = np.array([[1], [2], [3], [4]], dtype=dtype) + ans = np.zeros_like(data) + labels = np.array([0, 0, 0, 0], dtype=np.int64) + pd_op(ans, data, labels, is_datetimelike) + self.assert_numpy_array_equal(np_op(data), ans[:, 0], + check_dtype=False) + + # with nans + labels = np.array([0, 0, 0, 0, 0], dtype=np.int64) + + data = np.array([[1], [2], [3], [np.nan], [4]], dtype='float64') + actual = np.zeros_like(data) + actual.fill(np.nan) + pd.algos.group_cumprod_float64(actual, data, labels, is_datetimelike) + expected = np.array([1, 2, 6, np.nan, 24], dtype='float64') + self.assert_numpy_array_equal(actual[:, 0], expected) + + actual = np.zeros_like(data) + actual.fill(np.nan) + pd.algos.group_cumsum(actual, data, labels, is_datetimelike) + expected = np.array([1, 3, 6, np.nan, 10], dtype='float64') + self.assert_numpy_array_equal(actual[:, 0], expected) + + # timedelta + is_datetimelike = True + data = np.array([np.timedelta64(1, 'ns')] * 5, dtype='m8[ns]')[:, None] + actual = np.zeros_like(data, dtype='int64') + pd.algos.group_cumsum(actual, data.view('int64'), labels, + is_datetimelike) + expected = np.array([np.timedelta64(1, 'ns'), np.timedelta64( + 2, 'ns'), np.timedelta64(3, 'ns'), np.timedelta64(4, 'ns'), + np.timedelta64(5, 'ns')]) + self.assert_numpy_array_equal(actual[:, 0].view('m8[ns]'), expected) + + def test_cython_transform(self): + # GH 4095 + ops = [(('cumprod', + ()), lambda x: x.cumprod()), (('cumsum', ()), + lambda x: x.cumsum()), + (('shift', (-1, )), + lambda x: x.shift(-1)), (('shift', + (1, )), lambda x: x.shift())] + + s = Series(np.random.randn(1000)) + s_missing = s.copy() + s_missing.iloc[2:10] = np.nan + labels = np.random.randint(0, 50, size=1000).astype(float) + + # series + for (op, args), targop in ops: + for data in [s, s_missing]: + # print(data.head()) + expected = data.groupby(labels).transform(targop) + + tm.assert_series_equal(expected, + data.groupby(labels).transform(op, + *args)) + tm.assert_series_equal(expected, getattr( + data.groupby(labels), op)(*args)) + + strings = list('qwertyuiopasdfghjklz') + strings_missing = strings[:] + strings_missing[5] = np.nan + df = DataFrame({'float': s, + 'float_missing': s_missing, + 'int': [1, 1, 1, 1, 2] * 200, + 'datetime': pd.date_range('1990-1-1', periods=1000), + 'timedelta': pd.timedelta_range(1, freq='s', + periods=1000), + 'string': strings * 50, + 'string_missing': strings_missing * 50}) + df['cat'] = df['string'].astype('category') + + df2 = df.copy() + df2.index = pd.MultiIndex.from_product([range(100), range(10)]) + + # DataFrame - Single and MultiIndex, + # group by values, index level, columns + for df in [df, df2]: + for gb_target in [dict(by=labels), dict(level=0), dict(by='string') + ]: # dict(by='string_missing')]: + # dict(by=['int','string'])]: + + gb = df.groupby(**gb_target) + # whitelisted methods set the selection before applying + # bit a of hack to make sure the cythonized shift + # is equivalent to pre 0.17.1 behavior + if op == 'shift': + gb._set_group_selection() + + for (op, args), targop in ops: + if op != 'shift' and 'int' not in gb_target: + # numeric apply fastpath promotes dtype so have + # to apply seperately and concat + i = gb[['int']].apply(targop) + f = gb[['float', 'float_missing']].apply(targop) + expected = pd.concat([f, i], axis=1) + else: + expected = gb.apply(targop) + + expected = expected.sort_index(axis=1) + tm.assert_frame_equal(expected, + gb.transform(op, *args).sort_index( + axis=1)) + tm.assert_frame_equal(expected, getattr(gb, op)(*args)) + # individual columns + for c in df: + if c not in ['float', 'int', 'float_missing' + ] and op != 'shift': + self.assertRaises(DataError, gb[c].transform, op) + self.assertRaises(DataError, getattr(gb[c], op)) + else: + expected = gb[c].apply(targop) + expected.name = c + tm.assert_series_equal(expected, + gb[c].transform(op, *args)) + tm.assert_series_equal(expected, + getattr(gb[c], op)(*args)) + + def test_transform_with_non_scalar_group(self): + # GH 10165 + cols = pd.MultiIndex.from_tuples([ + ('syn', 'A'), ('mis', 'A'), ('non', 'A'), + ('syn', 'C'), ('mis', 'C'), ('non', 'C'), + ('syn', 'T'), ('mis', 'T'), ('non', 'T'), + ('syn', 'G'), ('mis', 'G'), ('non', 'G')]) + df = pd.DataFrame(np.random.randint(1, 10, (4, 12)), + columns=cols, + index=['A', 'C', 'G', 'T']) + self.assertRaisesRegexp(ValueError, 'transform must return a scalar ' + 'value for each group.*', df.groupby + (axis=1, level=1).transform, + lambda z: z.div(z.sum(axis=1), axis=0)) From fe246cc27027c7d469a5e8f946415ec2c5664d1d Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 8 Feb 2017 08:04:01 -0500 Subject: [PATCH 024/353] TST: mark gbq streaming insert tests as slow --- ci/requirements-3.4_SLOW.pip | 3 +++ pandas/io/tests/test_gbq.py | 1 + 2 files changed, 4 insertions(+) create mode 100644 ci/requirements-3.4_SLOW.pip diff --git a/ci/requirements-3.4_SLOW.pip b/ci/requirements-3.4_SLOW.pip new file mode 100644 index 0000000000000..05c938abcbab6 --- /dev/null +++ b/ci/requirements-3.4_SLOW.pip @@ -0,0 +1,3 @@ +httplib2 +google-api-python-client +oauth2client diff --git a/pandas/io/tests/test_gbq.py b/pandas/io/tests/test_gbq.py index 0507f0d89661c..457e2d218cb33 100644 --- a/pandas/io/tests/test_gbq.py +++ b/pandas/io/tests/test_gbq.py @@ -938,6 +938,7 @@ def test_upload_data_if_table_exists_replace(self): private_key=_get_private_key_path()) self.assertEqual(result['NUM_ROWS'][0], 5) + @tm.slow def test_google_upload_errors_should_raise_exception(self): destination_table = DESTINATION_TABLE + "5" From 87c2c2af6150d6e8fa8cfbc017fbfd52e7f8c5c7 Mon Sep 17 00:00:00 2001 From: Kernc Date: Wed, 8 Feb 2017 09:30:47 -0500 Subject: [PATCH 025/353] ENH: .squeeze has gained the axis parameter closes #15339 Author: Kernc Closes #15335 from kernc/squeeze_axis_param and squashes the following commits: 44d3c54 [Kernc] fixup! ENH: .squeeze accepts axis parameter cc018c9 [Kernc] ENH: .squeeze accepts axis parameter --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/compat/numpy/function.py | 7 ------- pandas/core/generic.py | 24 +++++++++++++++++++----- pandas/tests/test_generic.py | 18 ++++++++++++++---- 4 files changed, 34 insertions(+), 16 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 3f6c06e20b546..9afcf85c929a7 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -149,6 +149,7 @@ Other enhancements - ``Series/DataFrame.asfreq()`` have gained a ``fill_value`` parameter, to fill missing values (:issue:`3715`). - ``Series/DataFrame.resample.asfreq`` have gained a ``fill_value`` parameter, to fill missing values during resampling (:issue:`3715`). - ``pandas.tools.hashing`` has gained a ``hash_tuples`` routine, and ``hash_pandas_object`` has gained the ability to hash a ``MultiIndex`` (:issue:`15224`) +- ``Series/DataFrame.squeeze()`` have gained the ``axis`` parameter. (:issue:`15339`) .. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations diff --git a/pandas/compat/numpy/function.py b/pandas/compat/numpy/function.py index 72e89586d0280..eb9e9ecc359b2 100644 --- a/pandas/compat/numpy/function.py +++ b/pandas/compat/numpy/function.py @@ -214,13 +214,6 @@ def validate_cum_func_with_skipna(skipna, args, kwargs, name): validate_stat_ddof_func = CompatValidator(STAT_DDOF_FUNC_DEFAULTS, method='kwargs') -# Currently, numpy (v1.11) has backwards compatibility checks -# in place so that this 'kwargs' parameter is technically -# unnecessary, but in the long-run, this will be needed. -SQUEEZE_DEFAULTS = dict(axis=None) -validate_squeeze = CompatValidator(SQUEEZE_DEFAULTS, fname='squeeze', - method='kwargs') - TAKE_DEFAULTS = OrderedDict() TAKE_DEFAULTS['out'] = None TAKE_DEFAULTS['mode'] = 'raise' diff --git a/pandas/core/generic.py b/pandas/core/generic.py index bb2664a5b8d28..228dd2acd2124 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -532,13 +532,27 @@ def pop(self, item): return result - def squeeze(self, **kwargs): - """Squeeze length 1 dimensions.""" - nv.validate_squeeze(tuple(), kwargs) + def squeeze(self, axis=None): + """ + Squeeze length 1 dimensions. + Parameters + ---------- + axis : None, integer or string axis name, optional + The axis to squeeze if 1-sized. + + .. versionadded:: 0.20.0 + + Returns + ------- + scalar if 1-sized, else original object + """ + axis = (self._AXIS_NAMES if axis is None else + (self._get_axis_number(axis),)) try: - return self.iloc[tuple([0 if len(a) == 1 else slice(None) - for a in self.axes])] + return self.iloc[ + tuple([0 if i in axis and len(a) == 1 else slice(None) + for i, a in enumerate(self.axes)])] except: return self diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py index 916d7ae0b0ec4..bb341c26d454e 100644 --- a/pandas/tests/test_generic.py +++ b/pandas/tests/test_generic.py @@ -1770,6 +1770,20 @@ def test_squeeze(self): [tm.assert_series_equal(empty_series, higher_dim.squeeze()) for higher_dim in [empty_series, empty_frame, empty_panel]] + # axis argument + df = tm.makeTimeDataFrame(nper=1).iloc[:, :1] + tm.assert_equal(df.shape, (1, 1)) + tm.assert_series_equal(df.squeeze(axis=0), df.iloc[0]) + tm.assert_series_equal(df.squeeze(axis='index'), df.iloc[0]) + tm.assert_series_equal(df.squeeze(axis=1), df.iloc[:, 0]) + tm.assert_series_equal(df.squeeze(axis='columns'), df.iloc[:, 0]) + tm.assert_equal(df.squeeze(), df.iloc[0, 0]) + tm.assertRaises(ValueError, df.squeeze, axis=2) + tm.assertRaises(ValueError, df.squeeze, axis='x') + + df = tm.makeTimeDataFrame(3) + tm.assert_frame_equal(df.squeeze(axis=0), df) + def test_numpy_squeeze(self): s = tm.makeFloatSeries() tm.assert_series_equal(np.squeeze(s), s) @@ -1777,10 +1791,6 @@ def test_numpy_squeeze(self): df = tm.makeTimeDataFrame().reindex(columns=['A']) tm.assert_series_equal(np.squeeze(df), df['A']) - msg = "the 'axis' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, - np.squeeze, s, axis=0) - def test_transpose(self): msg = (r"transpose\(\) got multiple values for " r"keyword argument 'axes'") From bf1a5961a09a6f5237a681f9f1c9a698b1a13918 Mon Sep 17 00:00:00 2001 From: Jonathan de Bruin Date: Wed, 8 Feb 2017 15:58:25 +0100 Subject: [PATCH 026/353] Small documentation fix for MultiIndex.sortlevel (#15345) * doc fix for return values of MultiIndex.sortlevel * MultiIndex.sortlevel docs improved after feedback --- pandas/indexes/multi.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index d2469cf1a3eed..9ab07d87fd13b 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -1399,7 +1399,11 @@ def sortlevel(self, level=0, ascending=True, sort_remaining=True): Returns ------- - sorted_index : MultiIndex + sorted_index : pd.MultiIndex + Resulting index + indexer : np.ndarray + Indices of output values in original index + """ from pandas.core.groupby import _indexer_from_factorized From 704cdbf830c110001062012b92302ed30d8ae127 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 8 Feb 2017 12:42:21 -0500 Subject: [PATCH 027/353] CI: use pip install for statsmodels --- ci/requirements-2.7.pip | 1 + ci/requirements-2.7.run | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/requirements-2.7.pip b/ci/requirements-2.7.pip index d16b932c8be4f..d7266fe88fb32 100644 --- a/ci/requirements-2.7.pip +++ b/ci/requirements-2.7.pip @@ -1,3 +1,4 @@ +statsmodels blosc httplib2 google-api-python-client==1.2 diff --git a/ci/requirements-2.7.run b/ci/requirements-2.7.run index b5fc919297c76..62e31e4ae24e3 100644 --- a/ci/requirements-2.7.run +++ b/ci/requirements-2.7.run @@ -18,6 +18,5 @@ patsy pymysql=0.6.3 html5lib=1.0b2 beautiful-soup=4.2.1 -statsmodels jinja2=2.8 xarray=0.8.0 From 9ba10089f1c57d6fff569af39f0e6d37ee0210f5 Mon Sep 17 00:00:00 2001 From: TrigonaMinima Date: Thu, 9 Feb 2017 03:35:43 +0530 Subject: [PATCH 028/353] TST: Period tests reorg xref #14854 --- pandas/tests/frame/test_period.py | 139 + pandas/tests/indexes/period/test_asfreq.py | 154 + .../tests/indexes/period/test_construction.py | 486 ++ .../indexes/period/test_ops.py} | 798 ++- .../indexes/period/test_partial_slicing.py | 139 + pandas/tests/indexes/period/test_period.py | 583 +- pandas/tests/indexes/period/test_setops.py | 157 + pandas/tests/indexes/period/test_tools.py | 449 ++ pandas/tests/scalar/test_period.py | 2074 +++++++ pandas/tests/series/test_period.py | 248 + pandas/tseries/tests/test_period.py | 5065 ----------------- 11 files changed, 5190 insertions(+), 5102 deletions(-) create mode 100644 pandas/tests/frame/test_period.py create mode 100644 pandas/tests/indexes/period/test_asfreq.py create mode 100644 pandas/tests/indexes/period/test_construction.py rename pandas/{tseries/tests/test_base.py => tests/indexes/period/test_ops.py} (58%) create mode 100644 pandas/tests/indexes/period/test_partial_slicing.py create mode 100644 pandas/tests/indexes/period/test_setops.py create mode 100644 pandas/tests/indexes/period/test_tools.py create mode 100644 pandas/tests/scalar/test_period.py create mode 100644 pandas/tests/series/test_period.py delete mode 100644 pandas/tseries/tests/test_period.py diff --git a/pandas/tests/frame/test_period.py b/pandas/tests/frame/test_period.py new file mode 100644 index 0000000000000..84d10a2e78d28 --- /dev/null +++ b/pandas/tests/frame/test_period.py @@ -0,0 +1,139 @@ +import numpy as np +from numpy.random import randn +from datetime import timedelta + +import pandas as pd +import pandas.util.testing as tm +from pandas import (PeriodIndex, period_range, DataFrame, date_range, + Index, to_datetime, DatetimeIndex) + + +def _permute(obj): + return obj.take(np.random.permutation(len(obj))) + + +class TestPeriodIndex(tm.TestCase): + + def setUp(self): + pass + + def test_as_frame_columns(self): + rng = period_range('1/1/2000', periods=5) + df = DataFrame(randn(10, 5), columns=rng) + + ts = df[rng[0]] + tm.assert_series_equal(ts, df.iloc[:, 0]) + + # GH # 1211 + repr(df) + + ts = df['1/1/2000'] + tm.assert_series_equal(ts, df.iloc[:, 0]) + + def test_frame_setitem(self): + rng = period_range('1/1/2000', periods=5, name='index') + df = DataFrame(randn(5, 3), index=rng) + + df['Index'] = rng + rs = Index(df['Index']) + tm.assert_index_equal(rs, rng, check_names=False) + self.assertEqual(rs.name, 'Index') + self.assertEqual(rng.name, 'index') + + rs = df.reset_index().set_index('index') + tm.assertIsInstance(rs.index, PeriodIndex) + tm.assert_index_equal(rs.index, rng) + + def test_frame_to_time_stamp(self): + K = 5 + index = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') + df = DataFrame(randn(len(index), K), index=index) + df['mix'] = 'a' + + exp_index = date_range('1/1/2001', end='12/31/2009', freq='A-DEC') + result = df.to_timestamp('D', 'end') + tm.assert_index_equal(result.index, exp_index) + tm.assert_numpy_array_equal(result.values, df.values) + + exp_index = date_range('1/1/2001', end='1/1/2009', freq='AS-JAN') + result = df.to_timestamp('D', 'start') + tm.assert_index_equal(result.index, exp_index) + + def _get_with_delta(delta, freq='A-DEC'): + return date_range(to_datetime('1/1/2001') + delta, + to_datetime('12/31/2009') + delta, freq=freq) + + delta = timedelta(hours=23) + result = df.to_timestamp('H', 'end') + exp_index = _get_with_delta(delta) + tm.assert_index_equal(result.index, exp_index) + + delta = timedelta(hours=23, minutes=59) + result = df.to_timestamp('T', 'end') + exp_index = _get_with_delta(delta) + tm.assert_index_equal(result.index, exp_index) + + result = df.to_timestamp('S', 'end') + delta = timedelta(hours=23, minutes=59, seconds=59) + exp_index = _get_with_delta(delta) + tm.assert_index_equal(result.index, exp_index) + + # columns + df = df.T + + exp_index = date_range('1/1/2001', end='12/31/2009', freq='A-DEC') + result = df.to_timestamp('D', 'end', axis=1) + tm.assert_index_equal(result.columns, exp_index) + tm.assert_numpy_array_equal(result.values, df.values) + + exp_index = date_range('1/1/2001', end='1/1/2009', freq='AS-JAN') + result = df.to_timestamp('D', 'start', axis=1) + tm.assert_index_equal(result.columns, exp_index) + + delta = timedelta(hours=23) + result = df.to_timestamp('H', 'end', axis=1) + exp_index = _get_with_delta(delta) + tm.assert_index_equal(result.columns, exp_index) + + delta = timedelta(hours=23, minutes=59) + result = df.to_timestamp('T', 'end', axis=1) + exp_index = _get_with_delta(delta) + tm.assert_index_equal(result.columns, exp_index) + + result = df.to_timestamp('S', 'end', axis=1) + delta = timedelta(hours=23, minutes=59, seconds=59) + exp_index = _get_with_delta(delta) + tm.assert_index_equal(result.columns, exp_index) + + # invalid axis + tm.assertRaisesRegexp(ValueError, 'axis', df.to_timestamp, axis=2) + + result1 = df.to_timestamp('5t', axis=1) + result2 = df.to_timestamp('t', axis=1) + expected = pd.date_range('2001-01-01', '2009-01-01', freq='AS') + self.assertTrue(isinstance(result1.columns, DatetimeIndex)) + self.assertTrue(isinstance(result2.columns, DatetimeIndex)) + self.assert_numpy_array_equal(result1.columns.asi8, expected.asi8) + self.assert_numpy_array_equal(result2.columns.asi8, expected.asi8) + # PeriodIndex.to_timestamp always use 'infer' + self.assertEqual(result1.columns.freqstr, 'AS-JAN') + self.assertEqual(result2.columns.freqstr, 'AS-JAN') + + def test_frame_index_to_string(self): + index = PeriodIndex(['2011-1', '2011-2', '2011-3'], freq='M') + frame = DataFrame(np.random.randn(3, 4), index=index) + + # it works! + frame.to_string() + + def test_align_frame(self): + rng = period_range('1/1/2000', '1/1/2010', freq='A') + ts = DataFrame(np.random.randn(len(rng), 3), index=rng) + + result = ts + ts[::2] + expected = ts + ts + expected.values[1::2] = np.nan + tm.assert_frame_equal(result, expected) + + result = ts + _permute(ts[::2]) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexes/period/test_asfreq.py b/pandas/tests/indexes/period/test_asfreq.py new file mode 100644 index 0000000000000..96e3d0bbd8abc --- /dev/null +++ b/pandas/tests/indexes/period/test_asfreq.py @@ -0,0 +1,154 @@ +import numpy as np + +import pandas as pd +from pandas.util import testing as tm +from pandas import PeriodIndex, Series, DataFrame + + +class TestPeriodIndex(tm.TestCase): + + def setUp(self): + pass + + def test_asfreq(self): + pi1 = PeriodIndex(freq='A', start='1/1/2001', end='1/1/2001') + pi2 = PeriodIndex(freq='Q', start='1/1/2001', end='1/1/2001') + pi3 = PeriodIndex(freq='M', start='1/1/2001', end='1/1/2001') + pi4 = PeriodIndex(freq='D', start='1/1/2001', end='1/1/2001') + pi5 = PeriodIndex(freq='H', start='1/1/2001', end='1/1/2001 00:00') + pi6 = PeriodIndex(freq='Min', start='1/1/2001', end='1/1/2001 00:00') + pi7 = PeriodIndex(freq='S', start='1/1/2001', end='1/1/2001 00:00:00') + + self.assertEqual(pi1.asfreq('Q', 'S'), pi2) + self.assertEqual(pi1.asfreq('Q', 's'), pi2) + self.assertEqual(pi1.asfreq('M', 'start'), pi3) + self.assertEqual(pi1.asfreq('D', 'StarT'), pi4) + self.assertEqual(pi1.asfreq('H', 'beGIN'), pi5) + self.assertEqual(pi1.asfreq('Min', 'S'), pi6) + self.assertEqual(pi1.asfreq('S', 'S'), pi7) + + self.assertEqual(pi2.asfreq('A', 'S'), pi1) + self.assertEqual(pi2.asfreq('M', 'S'), pi3) + self.assertEqual(pi2.asfreq('D', 'S'), pi4) + self.assertEqual(pi2.asfreq('H', 'S'), pi5) + self.assertEqual(pi2.asfreq('Min', 'S'), pi6) + self.assertEqual(pi2.asfreq('S', 'S'), pi7) + + self.assertEqual(pi3.asfreq('A', 'S'), pi1) + self.assertEqual(pi3.asfreq('Q', 'S'), pi2) + self.assertEqual(pi3.asfreq('D', 'S'), pi4) + self.assertEqual(pi3.asfreq('H', 'S'), pi5) + self.assertEqual(pi3.asfreq('Min', 'S'), pi6) + self.assertEqual(pi3.asfreq('S', 'S'), pi7) + + self.assertEqual(pi4.asfreq('A', 'S'), pi1) + self.assertEqual(pi4.asfreq('Q', 'S'), pi2) + self.assertEqual(pi4.asfreq('M', 'S'), pi3) + self.assertEqual(pi4.asfreq('H', 'S'), pi5) + self.assertEqual(pi4.asfreq('Min', 'S'), pi6) + self.assertEqual(pi4.asfreq('S', 'S'), pi7) + + self.assertEqual(pi5.asfreq('A', 'S'), pi1) + self.assertEqual(pi5.asfreq('Q', 'S'), pi2) + self.assertEqual(pi5.asfreq('M', 'S'), pi3) + self.assertEqual(pi5.asfreq('D', 'S'), pi4) + self.assertEqual(pi5.asfreq('Min', 'S'), pi6) + self.assertEqual(pi5.asfreq('S', 'S'), pi7) + + self.assertEqual(pi6.asfreq('A', 'S'), pi1) + self.assertEqual(pi6.asfreq('Q', 'S'), pi2) + self.assertEqual(pi6.asfreq('M', 'S'), pi3) + self.assertEqual(pi6.asfreq('D', 'S'), pi4) + self.assertEqual(pi6.asfreq('H', 'S'), pi5) + self.assertEqual(pi6.asfreq('S', 'S'), pi7) + + self.assertEqual(pi7.asfreq('A', 'S'), pi1) + self.assertEqual(pi7.asfreq('Q', 'S'), pi2) + self.assertEqual(pi7.asfreq('M', 'S'), pi3) + self.assertEqual(pi7.asfreq('D', 'S'), pi4) + self.assertEqual(pi7.asfreq('H', 'S'), pi5) + self.assertEqual(pi7.asfreq('Min', 'S'), pi6) + + self.assertRaises(ValueError, pi7.asfreq, 'T', 'foo') + result1 = pi1.asfreq('3M') + result2 = pi1.asfreq('M') + expected = PeriodIndex(freq='M', start='2001-12', end='2001-12') + self.assert_numpy_array_equal(result1.asi8, expected.asi8) + self.assertEqual(result1.freqstr, '3M') + self.assert_numpy_array_equal(result2.asi8, expected.asi8) + self.assertEqual(result2.freqstr, 'M') + + def test_asfreq_nat(self): + idx = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-04'], freq='M') + result = idx.asfreq(freq='Q') + expected = PeriodIndex(['2011Q1', '2011Q1', 'NaT', '2011Q2'], freq='Q') + tm.assert_index_equal(result, expected) + + def test_asfreq_mult_pi(self): + pi = PeriodIndex(['2001-01', '2001-02', 'NaT', '2001-03'], freq='2M') + + for freq in ['D', '3D']: + result = pi.asfreq(freq) + exp = PeriodIndex(['2001-02-28', '2001-03-31', 'NaT', + '2001-04-30'], freq=freq) + self.assert_index_equal(result, exp) + self.assertEqual(result.freq, exp.freq) + + result = pi.asfreq(freq, how='S') + exp = PeriodIndex(['2001-01-01', '2001-02-01', 'NaT', + '2001-03-01'], freq=freq) + self.assert_index_equal(result, exp) + self.assertEqual(result.freq, exp.freq) + + def test_asfreq_combined_pi(self): + pi = pd.PeriodIndex(['2001-01-01 00:00', '2001-01-02 02:00', 'NaT'], + freq='H') + exp = PeriodIndex(['2001-01-01 00:00', '2001-01-02 02:00', 'NaT'], + freq='25H') + for freq, how in zip(['1D1H', '1H1D'], ['S', 'E']): + result = pi.asfreq(freq, how=how) + self.assert_index_equal(result, exp) + self.assertEqual(result.freq, exp.freq) + + for freq in ['1D1H', '1H1D']: + pi = pd.PeriodIndex(['2001-01-01 00:00', '2001-01-02 02:00', + 'NaT'], freq=freq) + result = pi.asfreq('H') + exp = PeriodIndex(['2001-01-02 00:00', '2001-01-03 02:00', 'NaT'], + freq='H') + self.assert_index_equal(result, exp) + self.assertEqual(result.freq, exp.freq) + + pi = pd.PeriodIndex(['2001-01-01 00:00', '2001-01-02 02:00', + 'NaT'], freq=freq) + result = pi.asfreq('H', how='S') + exp = PeriodIndex(['2001-01-01 00:00', '2001-01-02 02:00', 'NaT'], + freq='H') + self.assert_index_equal(result, exp) + self.assertEqual(result.freq, exp.freq) + + def test_asfreq_ts(self): + index = PeriodIndex(freq='A', start='1/1/2001', end='12/31/2010') + ts = Series(np.random.randn(len(index)), index=index) + df = DataFrame(np.random.randn(len(index), 3), index=index) + + result = ts.asfreq('D', how='end') + df_result = df.asfreq('D', how='end') + exp_index = index.asfreq('D', how='end') + self.assertEqual(len(result), len(ts)) + tm.assert_index_equal(result.index, exp_index) + tm.assert_index_equal(df_result.index, exp_index) + + result = ts.asfreq('D', how='start') + self.assertEqual(len(result), len(ts)) + tm.assert_index_equal(result.index, index.asfreq('D', how='start')) + + def test_astype_asfreq(self): + pi1 = PeriodIndex(['2011-01-01', '2011-02-01', '2011-03-01'], freq='D') + exp = PeriodIndex(['2011-01', '2011-02', '2011-03'], freq='M') + tm.assert_index_equal(pi1.asfreq('M'), exp) + tm.assert_index_equal(pi1.astype('period[M]'), exp) + + exp = PeriodIndex(['2011-01', '2011-02', '2011-03'], freq='3M') + tm.assert_index_equal(pi1.asfreq('3M'), exp) + tm.assert_index_equal(pi1.astype('period[3M]'), exp) diff --git a/pandas/tests/indexes/period/test_construction.py b/pandas/tests/indexes/period/test_construction.py new file mode 100644 index 0000000000000..c1299c6abeda3 --- /dev/null +++ b/pandas/tests/indexes/period/test_construction.py @@ -0,0 +1,486 @@ +import numpy as np + +import pandas as pd +import pandas.util.testing as tm +import pandas.tseries.period as period +from pandas.compat import lrange, PY3, text_type, lmap +from pandas import (Period, PeriodIndex, period_range, offsets, date_range, + Series, Index) + + +class TestPeriodIndex(tm.TestCase): + + def setUp(self): + pass + + def test_construction_base_constructor(self): + # GH 13664 + arr = [pd.Period('2011-01', freq='M'), pd.NaT, + pd.Period('2011-03', freq='M')] + tm.assert_index_equal(pd.Index(arr), pd.PeriodIndex(arr)) + tm.assert_index_equal(pd.Index(np.array(arr)), + pd.PeriodIndex(np.array(arr))) + + arr = [np.nan, pd.NaT, pd.Period('2011-03', freq='M')] + tm.assert_index_equal(pd.Index(arr), pd.PeriodIndex(arr)) + tm.assert_index_equal(pd.Index(np.array(arr)), + pd.PeriodIndex(np.array(arr))) + + arr = [pd.Period('2011-01', freq='M'), pd.NaT, + pd.Period('2011-03', freq='D')] + tm.assert_index_equal(pd.Index(arr), pd.Index(arr, dtype=object)) + + tm.assert_index_equal(pd.Index(np.array(arr)), + pd.Index(np.array(arr), dtype=object)) + + def test_constructor_use_start_freq(self): + # GH #1118 + p = Period('4/2/2012', freq='B') + index = PeriodIndex(start=p, periods=10) + expected = PeriodIndex(start='4/2/2012', periods=10, freq='B') + tm.assert_index_equal(index, expected) + + def test_constructor_field_arrays(self): + # GH #1264 + + years = np.arange(1990, 2010).repeat(4)[2:-2] + quarters = np.tile(np.arange(1, 5), 20)[2:-2] + + index = PeriodIndex(year=years, quarter=quarters, freq='Q-DEC') + expected = period_range('1990Q3', '2009Q2', freq='Q-DEC') + tm.assert_index_equal(index, expected) + + index2 = PeriodIndex(year=years, quarter=quarters, freq='2Q-DEC') + tm.assert_numpy_array_equal(index.asi8, index2.asi8) + + index = PeriodIndex(year=years, quarter=quarters) + tm.assert_index_equal(index, expected) + + years = [2007, 2007, 2007] + months = [1, 2] + self.assertRaises(ValueError, PeriodIndex, year=years, month=months, + freq='M') + self.assertRaises(ValueError, PeriodIndex, year=years, month=months, + freq='2M') + self.assertRaises(ValueError, PeriodIndex, year=years, month=months, + freq='M', start=Period('2007-01', freq='M')) + + years = [2007, 2007, 2007] + months = [1, 2, 3] + idx = PeriodIndex(year=years, month=months, freq='M') + exp = period_range('2007-01', periods=3, freq='M') + tm.assert_index_equal(idx, exp) + + def test_constructor_U(self): + # U was used as undefined period + self.assertRaises(ValueError, period_range, '2007-1-1', periods=500, + freq='X') + + def test_constructor_nano(self): + idx = period_range(start=Period(ordinal=1, freq='N'), + end=Period(ordinal=4, freq='N'), freq='N') + exp = PeriodIndex([Period(ordinal=1, freq='N'), + Period(ordinal=2, freq='N'), + Period(ordinal=3, freq='N'), + Period(ordinal=4, freq='N')], freq='N') + tm.assert_index_equal(idx, exp) + + def test_constructor_arrays_negative_year(self): + years = np.arange(1960, 2000, dtype=np.int64).repeat(4) + quarters = np.tile(np.array([1, 2, 3, 4], dtype=np.int64), 40) + + pindex = PeriodIndex(year=years, quarter=quarters) + + self.assert_numpy_array_equal(pindex.year, years) + self.assert_numpy_array_equal(pindex.quarter, quarters) + + def test_constructor_invalid_quarters(self): + self.assertRaises(ValueError, PeriodIndex, year=lrange(2000, 2004), + quarter=lrange(4), freq='Q-DEC') + + def test_constructor_corner(self): + self.assertRaises(ValueError, PeriodIndex, periods=10, freq='A') + + start = Period('2007', freq='A-JUN') + end = Period('2010', freq='A-DEC') + self.assertRaises(ValueError, PeriodIndex, start=start, end=end) + self.assertRaises(ValueError, PeriodIndex, start=start) + self.assertRaises(ValueError, PeriodIndex, end=end) + + result = period_range('2007-01', periods=10.5, freq='M') + exp = period_range('2007-01', periods=10, freq='M') + tm.assert_index_equal(result, exp) + + def test_constructor_fromarraylike(self): + idx = period_range('2007-01', periods=20, freq='M') + + # values is an array of Period, thus can retrieve freq + tm.assert_index_equal(PeriodIndex(idx.values), idx) + tm.assert_index_equal(PeriodIndex(list(idx.values)), idx) + + self.assertRaises(ValueError, PeriodIndex, idx._values) + self.assertRaises(ValueError, PeriodIndex, list(idx._values)) + self.assertRaises(ValueError, PeriodIndex, + data=Period('2007', freq='A')) + + result = PeriodIndex(iter(idx)) + tm.assert_index_equal(result, idx) + + result = PeriodIndex(idx) + tm.assert_index_equal(result, idx) + + result = PeriodIndex(idx, freq='M') + tm.assert_index_equal(result, idx) + + result = PeriodIndex(idx, freq=offsets.MonthEnd()) + tm.assert_index_equal(result, idx) + self.assertTrue(result.freq, 'M') + + result = PeriodIndex(idx, freq='2M') + tm.assert_index_equal(result, idx.asfreq('2M')) + self.assertTrue(result.freq, '2M') + + result = PeriodIndex(idx, freq=offsets.MonthEnd(2)) + tm.assert_index_equal(result, idx.asfreq('2M')) + self.assertTrue(result.freq, '2M') + + result = PeriodIndex(idx, freq='D') + exp = idx.asfreq('D', 'e') + tm.assert_index_equal(result, exp) + + def test_constructor_datetime64arr(self): + vals = np.arange(100000, 100000 + 10000, 100, dtype=np.int64) + vals = vals.view(np.dtype('M8[us]')) + + self.assertRaises(ValueError, PeriodIndex, vals, freq='D') + + def test_constructor_dtype(self): + # passing a dtype with a tz should localize + idx = PeriodIndex(['2013-01', '2013-03'], dtype='period[M]') + exp = PeriodIndex(['2013-01', '2013-03'], freq='M') + tm.assert_index_equal(idx, exp) + self.assertEqual(idx.dtype, 'period[M]') + + idx = PeriodIndex(['2013-01-05', '2013-03-05'], dtype='period[3D]') + exp = PeriodIndex(['2013-01-05', '2013-03-05'], freq='3D') + tm.assert_index_equal(idx, exp) + self.assertEqual(idx.dtype, 'period[3D]') + + # if we already have a freq and its not the same, then asfreq + # (not changed) + idx = PeriodIndex(['2013-01-01', '2013-01-02'], freq='D') + + res = PeriodIndex(idx, dtype='period[M]') + exp = PeriodIndex(['2013-01', '2013-01'], freq='M') + tm.assert_index_equal(res, exp) + self.assertEqual(res.dtype, 'period[M]') + + res = PeriodIndex(idx, freq='M') + tm.assert_index_equal(res, exp) + self.assertEqual(res.dtype, 'period[M]') + + msg = 'specified freq and dtype are different' + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + PeriodIndex(['2011-01'], freq='M', dtype='period[D]') + + def test_constructor_empty(self): + idx = pd.PeriodIndex([], freq='M') + tm.assertIsInstance(idx, PeriodIndex) + self.assertEqual(len(idx), 0) + self.assertEqual(idx.freq, 'M') + + with tm.assertRaisesRegexp(ValueError, 'freq not specified'): + pd.PeriodIndex([]) + + def test_constructor_pi_nat(self): + idx = PeriodIndex([Period('2011-01', freq='M'), pd.NaT, + Period('2011-01', freq='M')]) + exp = PeriodIndex(['2011-01', 'NaT', '2011-01'], freq='M') + tm.assert_index_equal(idx, exp) + + idx = PeriodIndex(np.array([Period('2011-01', freq='M'), pd.NaT, + Period('2011-01', freq='M')])) + tm.assert_index_equal(idx, exp) + + idx = PeriodIndex([pd.NaT, pd.NaT, Period('2011-01', freq='M'), + Period('2011-01', freq='M')]) + exp = PeriodIndex(['NaT', 'NaT', '2011-01', '2011-01'], freq='M') + tm.assert_index_equal(idx, exp) + + idx = PeriodIndex(np.array([pd.NaT, pd.NaT, + Period('2011-01', freq='M'), + Period('2011-01', freq='M')])) + tm.assert_index_equal(idx, exp) + + idx = PeriodIndex([pd.NaT, pd.NaT, '2011-01', '2011-01'], freq='M') + tm.assert_index_equal(idx, exp) + + with tm.assertRaisesRegexp(ValueError, 'freq not specified'): + PeriodIndex([pd.NaT, pd.NaT]) + + with tm.assertRaisesRegexp(ValueError, 'freq not specified'): + PeriodIndex(np.array([pd.NaT, pd.NaT])) + + with tm.assertRaisesRegexp(ValueError, 'freq not specified'): + PeriodIndex(['NaT', 'NaT']) + + with tm.assertRaisesRegexp(ValueError, 'freq not specified'): + PeriodIndex(np.array(['NaT', 'NaT'])) + + def test_constructor_incompat_freq(self): + msg = "Input has different freq=D from PeriodIndex\\(freq=M\\)" + + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + PeriodIndex([Period('2011-01', freq='M'), pd.NaT, + Period('2011-01', freq='D')]) + + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + PeriodIndex(np.array([Period('2011-01', freq='M'), pd.NaT, + Period('2011-01', freq='D')])) + + # first element is pd.NaT + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + PeriodIndex([pd.NaT, Period('2011-01', freq='M'), + Period('2011-01', freq='D')]) + + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + PeriodIndex(np.array([pd.NaT, Period('2011-01', freq='M'), + Period('2011-01', freq='D')])) + + def test_constructor_mixed(self): + idx = PeriodIndex(['2011-01', pd.NaT, Period('2011-01', freq='M')]) + exp = PeriodIndex(['2011-01', 'NaT', '2011-01'], freq='M') + tm.assert_index_equal(idx, exp) + + idx = PeriodIndex(['NaT', pd.NaT, Period('2011-01', freq='M')]) + exp = PeriodIndex(['NaT', 'NaT', '2011-01'], freq='M') + tm.assert_index_equal(idx, exp) + + idx = PeriodIndex([Period('2011-01-01', freq='D'), pd.NaT, + '2012-01-01']) + exp = PeriodIndex(['2011-01-01', 'NaT', '2012-01-01'], freq='D') + tm.assert_index_equal(idx, exp) + + def test_constructor_simple_new(self): + idx = period_range('2007-01', name='p', periods=2, freq='M') + result = idx._simple_new(idx, 'p', freq=idx.freq) + tm.assert_index_equal(result, idx) + + result = idx._simple_new(idx.astype('i8'), 'p', freq=idx.freq) + tm.assert_index_equal(result, idx) + + result = idx._simple_new([pd.Period('2007-01', freq='M'), + pd.Period('2007-02', freq='M')], + 'p', freq=idx.freq) + self.assert_index_equal(result, idx) + + result = idx._simple_new(np.array([pd.Period('2007-01', freq='M'), + pd.Period('2007-02', freq='M')]), + 'p', freq=idx.freq) + self.assert_index_equal(result, idx) + + def test_constructor_simple_new_empty(self): + # GH13079 + idx = PeriodIndex([], freq='M', name='p') + result = idx._simple_new(idx, name='p', freq='M') + tm.assert_index_equal(result, idx) + + def test_constructor_simple_new_floats(self): + # GH13079 + for floats in [[1.1], np.array([1.1])]: + with self.assertRaises(TypeError): + pd.PeriodIndex._simple_new(floats, freq='M') + + def test_constructor_nat(self): + self.assertRaises(ValueError, period_range, start='NaT', + end='2011-01-01', freq='M') + self.assertRaises(ValueError, period_range, start='2011-01-01', + end='NaT', freq='M') + + def test_constructor_year_and_quarter(self): + year = pd.Series([2001, 2002, 2003]) + quarter = year - 2000 + idx = PeriodIndex(year=year, quarter=quarter) + strs = ['%dQ%d' % t for t in zip(quarter, year)] + lops = list(map(Period, strs)) + p = PeriodIndex(lops) + tm.assert_index_equal(p, idx) + + def test_constructor_freq_mult(self): + # GH #7811 + for func in [PeriodIndex, period_range]: + # must be the same, but for sure... + pidx = func(start='2014-01', freq='2M', periods=4) + expected = PeriodIndex(['2014-01', '2014-03', + '2014-05', '2014-07'], freq='2M') + tm.assert_index_equal(pidx, expected) + + pidx = func(start='2014-01-02', end='2014-01-15', freq='3D') + expected = PeriodIndex(['2014-01-02', '2014-01-05', + '2014-01-08', '2014-01-11', + '2014-01-14'], freq='3D') + tm.assert_index_equal(pidx, expected) + + pidx = func(end='2014-01-01 17:00', freq='4H', periods=3) + expected = PeriodIndex(['2014-01-01 09:00', '2014-01-01 13:00', + '2014-01-01 17:00'], freq='4H') + tm.assert_index_equal(pidx, expected) + + msg = ('Frequency must be positive, because it' + ' represents span: -1M') + with tm.assertRaisesRegexp(ValueError, msg): + PeriodIndex(['2011-01'], freq='-1M') + + msg = ('Frequency must be positive, because it' ' represents span: 0M') + with tm.assertRaisesRegexp(ValueError, msg): + PeriodIndex(['2011-01'], freq='0M') + + msg = ('Frequency must be positive, because it' ' represents span: 0M') + with tm.assertRaisesRegexp(ValueError, msg): + period_range('2011-01', periods=3, freq='0M') + + def test_constructor_freq_mult_dti_compat(self): + import itertools + mults = [1, 2, 3, 4, 5] + freqs = ['A', 'M', 'D', 'T', 'S'] + for mult, freq in itertools.product(mults, freqs): + freqstr = str(mult) + freq + pidx = PeriodIndex(start='2014-04-01', freq=freqstr, periods=10) + expected = date_range(start='2014-04-01', freq=freqstr, + periods=10).to_period(freqstr) + tm.assert_index_equal(pidx, expected) + + def test_constructor_freq_combined(self): + for freq in ['1D1H', '1H1D']: + pidx = PeriodIndex(['2016-01-01', '2016-01-02'], freq=freq) + expected = PeriodIndex(['2016-01-01 00:00', '2016-01-02 00:00'], + freq='25H') + for freq, func in zip(['1D1H', '1H1D'], [PeriodIndex, period_range]): + pidx = func(start='2016-01-01', periods=2, freq=freq) + expected = PeriodIndex(['2016-01-01 00:00', '2016-01-02 01:00'], + freq='25H') + tm.assert_index_equal(pidx, expected) + + def test_constructor(self): + pi = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') + self.assertEqual(len(pi), 9) + + pi = PeriodIndex(freq='Q', start='1/1/2001', end='12/1/2009') + self.assertEqual(len(pi), 4 * 9) + + pi = PeriodIndex(freq='M', start='1/1/2001', end='12/1/2009') + self.assertEqual(len(pi), 12 * 9) + + pi = PeriodIndex(freq='D', start='1/1/2001', end='12/31/2009') + self.assertEqual(len(pi), 365 * 9 + 2) + + pi = PeriodIndex(freq='B', start='1/1/2001', end='12/31/2009') + self.assertEqual(len(pi), 261 * 9) + + pi = PeriodIndex(freq='H', start='1/1/2001', end='12/31/2001 23:00') + self.assertEqual(len(pi), 365 * 24) + + pi = PeriodIndex(freq='Min', start='1/1/2001', end='1/1/2001 23:59') + self.assertEqual(len(pi), 24 * 60) + + pi = PeriodIndex(freq='S', start='1/1/2001', end='1/1/2001 23:59:59') + self.assertEqual(len(pi), 24 * 60 * 60) + + start = Period('02-Apr-2005', 'B') + i1 = PeriodIndex(start=start, periods=20) + self.assertEqual(len(i1), 20) + self.assertEqual(i1.freq, start.freq) + self.assertEqual(i1[0], start) + + end_intv = Period('2006-12-31', 'W') + i1 = PeriodIndex(end=end_intv, periods=10) + self.assertEqual(len(i1), 10) + self.assertEqual(i1.freq, end_intv.freq) + self.assertEqual(i1[-1], end_intv) + + end_intv = Period('2006-12-31', '1w') + i2 = PeriodIndex(end=end_intv, periods=10) + self.assertEqual(len(i1), len(i2)) + self.assertTrue((i1 == i2).all()) + self.assertEqual(i1.freq, i2.freq) + + end_intv = Period('2006-12-31', ('w', 1)) + i2 = PeriodIndex(end=end_intv, periods=10) + self.assertEqual(len(i1), len(i2)) + self.assertTrue((i1 == i2).all()) + self.assertEqual(i1.freq, i2.freq) + + try: + PeriodIndex(start=start, end=end_intv) + raise AssertionError('Cannot allow mixed freq for start and end') + except ValueError: + pass + + end_intv = Period('2005-05-01', 'B') + i1 = PeriodIndex(start=start, end=end_intv) + + try: + PeriodIndex(start=start) + raise AssertionError( + 'Must specify periods if missing start or end') + except ValueError: + pass + + # infer freq from first element + i2 = PeriodIndex([end_intv, Period('2005-05-05', 'B')]) + self.assertEqual(len(i2), 2) + self.assertEqual(i2[0], end_intv) + + i2 = PeriodIndex(np.array([end_intv, Period('2005-05-05', 'B')])) + self.assertEqual(len(i2), 2) + self.assertEqual(i2[0], end_intv) + + # Mixed freq should fail + vals = [end_intv, Period('2006-12-31', 'w')] + self.assertRaises(ValueError, PeriodIndex, vals) + vals = np.array(vals) + self.assertRaises(ValueError, PeriodIndex, vals) + + def test_recreate_from_data(self): + for o in ['M', 'Q', 'A', 'D', 'B', 'T', 'S', 'L', 'U', 'N', 'H']: + org = PeriodIndex(start='2001/04/01', freq=o, periods=1) + idx = PeriodIndex(org.values, freq=o) + tm.assert_index_equal(idx, org) + + def test_map_with_string_constructor(self): + raw = [2005, 2007, 2009] + index = PeriodIndex(raw, freq='A') + types = str, + + if PY3: + # unicode + types += text_type, + + for t in types: + expected = Index(lmap(t, raw)) + res = index.map(t) + + # should return an Index + tm.assertIsInstance(res, Index) + + # preserve element types + self.assertTrue(all(isinstance(resi, t) for resi in res)) + + # lastly, values should compare equal + tm.assert_index_equal(res, expected) + + +class TestSeriesPeriod(tm.TestCase): + + def setUp(self): + self.series = Series(period_range('2000-01-01', periods=10, freq='D')) + + def test_constructor_cant_cast_period(self): + with tm.assertRaises(TypeError): + Series(period_range('2000-01-01', periods=10, freq='D'), + dtype=float) + + def test_constructor_cast_object(self): + s = Series(period_range('1/1/2000', periods=10), dtype=object) + exp = Series(period_range('1/1/2000', periods=10)) + tm.assert_series_equal(s, exp) diff --git a/pandas/tseries/tests/test_base.py b/pandas/tests/indexes/period/test_ops.py similarity index 58% rename from pandas/tseries/tests/test_base.py rename to pandas/tests/indexes/period/test_ops.py index 114cb02205d4f..70759e8659c25 100644 --- a/pandas/tseries/tests/test_base.py +++ b/pandas/tests/indexes/period/test_ops.py @@ -1,13 +1,14 @@ -from __future__ import print_function -from datetime import timedelta import numpy as np +from datetime import timedelta, datetime + import pandas as pd -from pandas import (Series, Index, Period, DatetimeIndex, PeriodIndex, - Timedelta, _np_version_under1p10) import pandas.tslib as tslib -import pandas.tseries.period as period - import pandas.util.testing as tm +import pandas.tseries.period as period +from pandas.compat import lrange +from pandas import (DatetimeIndex, PeriodIndex, period_range, Series, Period, + _np_version_under1p10, Index, Timedelta, offsets, + _np_version_under1p9) from pandas.tests.test_base import Ops @@ -473,6 +474,13 @@ def test_difference(self): result_union = rng.difference(other) tm.assert_index_equal(result_union, expected) + def test_sub(self): + rng = period_range('2007-01', periods=50) + + result = rng - 5 + exp = rng + (-5) + tm.assert_index_equal(result, exp) + def test_sub_isub(self): # previously performed setop, now raises TypeError (GH14164) @@ -1020,3 +1028,781 @@ def test_equals(self): self.assertFalse(idx.asobject.equals(idx3)) self.assertFalse(idx.equals(list(idx3))) self.assertFalse(idx.equals(pd.Series(idx3))) + + +class TestPeriodIndexSeriesMethods(tm.TestCase): + """ Test PeriodIndex and Period Series Ops consistency """ + + def _check(self, values, func, expected): + idx = pd.PeriodIndex(values) + result = func(idx) + if isinstance(expected, pd.Index): + tm.assert_index_equal(result, expected) + else: + # comp op results in bool + tm.assert_numpy_array_equal(result, expected) + + s = pd.Series(values) + result = func(s) + + exp = pd.Series(expected, name=values.name) + tm.assert_series_equal(result, exp) + + def test_pi_ops(self): + idx = PeriodIndex(['2011-01', '2011-02', '2011-03', + '2011-04'], freq='M', name='idx') + + expected = PeriodIndex(['2011-03', '2011-04', + '2011-05', '2011-06'], freq='M', name='idx') + self._check(idx, lambda x: x + 2, expected) + self._check(idx, lambda x: 2 + x, expected) + + self._check(idx + 2, lambda x: x - 2, idx) + result = idx - Period('2011-01', freq='M') + exp = pd.Index([0, 1, 2, 3], name='idx') + tm.assert_index_equal(result, exp) + + result = Period('2011-01', freq='M') - idx + exp = pd.Index([0, -1, -2, -3], name='idx') + tm.assert_index_equal(result, exp) + + def test_pi_ops_errors(self): + idx = PeriodIndex(['2011-01', '2011-02', '2011-03', + '2011-04'], freq='M', name='idx') + s = pd.Series(idx) + + msg = r"unsupported operand type\(s\)" + + for obj in [idx, s]: + for ng in ["str", 1.5]: + with tm.assertRaisesRegexp(TypeError, msg): + obj + ng + + with tm.assertRaises(TypeError): + # error message differs between PY2 and 3 + ng + obj + + with tm.assertRaisesRegexp(TypeError, msg): + obj - ng + + with tm.assertRaises(TypeError): + np.add(obj, ng) + + if _np_version_under1p10: + self.assertIs(np.add(ng, obj), NotImplemented) + else: + with tm.assertRaises(TypeError): + np.add(ng, obj) + + with tm.assertRaises(TypeError): + np.subtract(obj, ng) + + if _np_version_under1p10: + self.assertIs(np.subtract(ng, obj), NotImplemented) + else: + with tm.assertRaises(TypeError): + np.subtract(ng, obj) + + def test_pi_ops_nat(self): + idx = PeriodIndex(['2011-01', '2011-02', 'NaT', + '2011-04'], freq='M', name='idx') + expected = PeriodIndex(['2011-03', '2011-04', + 'NaT', '2011-06'], freq='M', name='idx') + self._check(idx, lambda x: x + 2, expected) + self._check(idx, lambda x: 2 + x, expected) + self._check(idx, lambda x: np.add(x, 2), expected) + + self._check(idx + 2, lambda x: x - 2, idx) + self._check(idx + 2, lambda x: np.subtract(x, 2), idx) + + # freq with mult + idx = PeriodIndex(['2011-01', '2011-02', 'NaT', + '2011-04'], freq='2M', name='idx') + expected = PeriodIndex(['2011-07', '2011-08', + 'NaT', '2011-10'], freq='2M', name='idx') + self._check(idx, lambda x: x + 3, expected) + self._check(idx, lambda x: 3 + x, expected) + self._check(idx, lambda x: np.add(x, 3), expected) + + self._check(idx + 3, lambda x: x - 3, idx) + self._check(idx + 3, lambda x: np.subtract(x, 3), idx) + + def test_pi_ops_array_int(self): + idx = PeriodIndex(['2011-01', '2011-02', 'NaT', + '2011-04'], freq='M', name='idx') + f = lambda x: x + np.array([1, 2, 3, 4]) + exp = PeriodIndex(['2011-02', '2011-04', 'NaT', + '2011-08'], freq='M', name='idx') + self._check(idx, f, exp) + + f = lambda x: np.add(x, np.array([4, -1, 1, 2])) + exp = PeriodIndex(['2011-05', '2011-01', 'NaT', + '2011-06'], freq='M', name='idx') + self._check(idx, f, exp) + + f = lambda x: x - np.array([1, 2, 3, 4]) + exp = PeriodIndex(['2010-12', '2010-12', 'NaT', + '2010-12'], freq='M', name='idx') + self._check(idx, f, exp) + + f = lambda x: np.subtract(x, np.array([3, 2, 3, -2])) + exp = PeriodIndex(['2010-10', '2010-12', 'NaT', + '2011-06'], freq='M', name='idx') + self._check(idx, f, exp) + + def test_pi_ops_offset(self): + idx = PeriodIndex(['2011-01-01', '2011-02-01', '2011-03-01', + '2011-04-01'], freq='D', name='idx') + f = lambda x: x + offsets.Day() + exp = PeriodIndex(['2011-01-02', '2011-02-02', '2011-03-02', + '2011-04-02'], freq='D', name='idx') + self._check(idx, f, exp) + + f = lambda x: x + offsets.Day(2) + exp = PeriodIndex(['2011-01-03', '2011-02-03', '2011-03-03', + '2011-04-03'], freq='D', name='idx') + self._check(idx, f, exp) + + f = lambda x: x - offsets.Day(2) + exp = PeriodIndex(['2010-12-30', '2011-01-30', '2011-02-27', + '2011-03-30'], freq='D', name='idx') + self._check(idx, f, exp) + + def test_pi_offset_errors(self): + idx = PeriodIndex(['2011-01-01', '2011-02-01', '2011-03-01', + '2011-04-01'], freq='D', name='idx') + s = pd.Series(idx) + + # Series op is applied per Period instance, thus error is raised + # from Period + msg_idx = r"Input has different freq from PeriodIndex\(freq=D\)" + msg_s = r"Input cannot be converted to Period\(freq=D\)" + for obj, msg in [(idx, msg_idx), (s, msg_s)]: + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + obj + offsets.Hour(2) + + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + offsets.Hour(2) + obj + + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + obj - offsets.Hour(2) + + def test_pi_sub_period(self): + # GH 13071 + idx = PeriodIndex(['2011-01', '2011-02', '2011-03', + '2011-04'], freq='M', name='idx') + + result = idx - pd.Period('2012-01', freq='M') + exp = pd.Index([-12, -11, -10, -9], name='idx') + tm.assert_index_equal(result, exp) + + result = np.subtract(idx, pd.Period('2012-01', freq='M')) + tm.assert_index_equal(result, exp) + + result = pd.Period('2012-01', freq='M') - idx + exp = pd.Index([12, 11, 10, 9], name='idx') + tm.assert_index_equal(result, exp) + + result = np.subtract(pd.Period('2012-01', freq='M'), idx) + if _np_version_under1p10: + self.assertIs(result, NotImplemented) + else: + tm.assert_index_equal(result, exp) + + exp = pd.TimedeltaIndex([np.nan, np.nan, np.nan, np.nan], name='idx') + tm.assert_index_equal(idx - pd.Period('NaT', freq='M'), exp) + tm.assert_index_equal(pd.Period('NaT', freq='M') - idx, exp) + + def test_pi_sub_pdnat(self): + # GH 13071 + idx = PeriodIndex(['2011-01', '2011-02', 'NaT', + '2011-04'], freq='M', name='idx') + exp = pd.TimedeltaIndex([pd.NaT] * 4, name='idx') + tm.assert_index_equal(pd.NaT - idx, exp) + tm.assert_index_equal(idx - pd.NaT, exp) + + def test_pi_sub_period_nat(self): + # GH 13071 + idx = PeriodIndex(['2011-01', 'NaT', '2011-03', + '2011-04'], freq='M', name='idx') + + result = idx - pd.Period('2012-01', freq='M') + exp = pd.Index([-12, np.nan, -10, -9], name='idx') + tm.assert_index_equal(result, exp) + + result = pd.Period('2012-01', freq='M') - idx + exp = pd.Index([12, np.nan, 10, 9], name='idx') + tm.assert_index_equal(result, exp) + + exp = pd.TimedeltaIndex([np.nan, np.nan, np.nan, np.nan], name='idx') + tm.assert_index_equal(idx - pd.Period('NaT', freq='M'), exp) + tm.assert_index_equal(pd.Period('NaT', freq='M') - idx, exp) + + def test_pi_comp_period(self): + idx = PeriodIndex(['2011-01', '2011-02', '2011-03', + '2011-04'], freq='M', name='idx') + + f = lambda x: x == pd.Period('2011-03', freq='M') + exp = np.array([False, False, True, False], dtype=np.bool) + self._check(idx, f, exp) + f = lambda x: pd.Period('2011-03', freq='M') == x + self._check(idx, f, exp) + + f = lambda x: x != pd.Period('2011-03', freq='M') + exp = np.array([True, True, False, True], dtype=np.bool) + self._check(idx, f, exp) + f = lambda x: pd.Period('2011-03', freq='M') != x + self._check(idx, f, exp) + + f = lambda x: pd.Period('2011-03', freq='M') >= x + exp = np.array([True, True, True, False], dtype=np.bool) + self._check(idx, f, exp) + + f = lambda x: x > pd.Period('2011-03', freq='M') + exp = np.array([False, False, False, True], dtype=np.bool) + self._check(idx, f, exp) + + f = lambda x: pd.Period('2011-03', freq='M') >= x + exp = np.array([True, True, True, False], dtype=np.bool) + self._check(idx, f, exp) + + def test_pi_comp_period_nat(self): + idx = PeriodIndex(['2011-01', 'NaT', '2011-03', + '2011-04'], freq='M', name='idx') + + f = lambda x: x == pd.Period('2011-03', freq='M') + exp = np.array([False, False, True, False], dtype=np.bool) + self._check(idx, f, exp) + f = lambda x: pd.Period('2011-03', freq='M') == x + self._check(idx, f, exp) + + f = lambda x: x == tslib.NaT + exp = np.array([False, False, False, False], dtype=np.bool) + self._check(idx, f, exp) + f = lambda x: tslib.NaT == x + self._check(idx, f, exp) + + f = lambda x: x != pd.Period('2011-03', freq='M') + exp = np.array([True, True, False, True], dtype=np.bool) + self._check(idx, f, exp) + f = lambda x: pd.Period('2011-03', freq='M') != x + self._check(idx, f, exp) + + f = lambda x: x != tslib.NaT + exp = np.array([True, True, True, True], dtype=np.bool) + self._check(idx, f, exp) + f = lambda x: tslib.NaT != x + self._check(idx, f, exp) + + f = lambda x: pd.Period('2011-03', freq='M') >= x + exp = np.array([True, False, True, False], dtype=np.bool) + self._check(idx, f, exp) + + f = lambda x: x < pd.Period('2011-03', freq='M') + exp = np.array([True, False, False, False], dtype=np.bool) + self._check(idx, f, exp) + + f = lambda x: x > tslib.NaT + exp = np.array([False, False, False, False], dtype=np.bool) + self._check(idx, f, exp) + + f = lambda x: tslib.NaT >= x + exp = np.array([False, False, False, False], dtype=np.bool) + self._check(idx, f, exp) + + +class TestSeriesPeriod(tm.TestCase): + + def setUp(self): + self.series = Series(period_range('2000-01-01', periods=10, freq='D')) + + def test_ops_series_timedelta(self): + # GH 13043 + s = pd.Series([pd.Period('2015-01-01', freq='D'), + pd.Period('2015-01-02', freq='D')], name='xxx') + self.assertEqual(s.dtype, object) + + exp = pd.Series([pd.Period('2015-01-02', freq='D'), + pd.Period('2015-01-03', freq='D')], name='xxx') + tm.assert_series_equal(s + pd.Timedelta('1 days'), exp) + tm.assert_series_equal(pd.Timedelta('1 days') + s, exp) + + tm.assert_series_equal(s + pd.tseries.offsets.Day(), exp) + tm.assert_series_equal(pd.tseries.offsets.Day() + s, exp) + + def test_ops_series_period(self): + # GH 13043 + s = pd.Series([pd.Period('2015-01-01', freq='D'), + pd.Period('2015-01-02', freq='D')], name='xxx') + self.assertEqual(s.dtype, object) + + p = pd.Period('2015-01-10', freq='D') + # dtype will be object because of original dtype + exp = pd.Series([9, 8], name='xxx', dtype=object) + tm.assert_series_equal(p - s, exp) + tm.assert_series_equal(s - p, -exp) + + s2 = pd.Series([pd.Period('2015-01-05', freq='D'), + pd.Period('2015-01-04', freq='D')], name='xxx') + self.assertEqual(s2.dtype, object) + + exp = pd.Series([4, 2], name='xxx', dtype=object) + tm.assert_series_equal(s2 - s, exp) + tm.assert_series_equal(s - s2, -exp) + + def test_ops_frame_period(self): + # GH 13043 + df = pd.DataFrame({'A': [pd.Period('2015-01', freq='M'), + pd.Period('2015-02', freq='M')], + 'B': [pd.Period('2014-01', freq='M'), + pd.Period('2014-02', freq='M')]}) + self.assertEqual(df['A'].dtype, object) + self.assertEqual(df['B'].dtype, object) + + p = pd.Period('2015-03', freq='M') + # dtype will be object because of original dtype + exp = pd.DataFrame({'A': np.array([2, 1], dtype=object), + 'B': np.array([14, 13], dtype=object)}) + tm.assert_frame_equal(p - df, exp) + tm.assert_frame_equal(df - p, -exp) + + df2 = pd.DataFrame({'A': [pd.Period('2015-05', freq='M'), + pd.Period('2015-06', freq='M')], + 'B': [pd.Period('2015-05', freq='M'), + pd.Period('2015-06', freq='M')]}) + self.assertEqual(df2['A'].dtype, object) + self.assertEqual(df2['B'].dtype, object) + + exp = pd.DataFrame({'A': np.array([4, 4], dtype=object), + 'B': np.array([16, 16], dtype=object)}) + tm.assert_frame_equal(df2 - df, exp) + tm.assert_frame_equal(df - df2, -exp) + + +class TestPeriodIndex(tm.TestCase): + + def setUp(self): + pass + + def test_getitem_index(self): + idx = period_range('2007-01', periods=10, freq='M', name='x') + + result = idx[[1, 3, 5]] + exp = pd.PeriodIndex(['2007-02', '2007-04', '2007-06'], + freq='M', name='x') + tm.assert_index_equal(result, exp) + + result = idx[[True, True, False, False, False, + True, True, False, False, False]] + exp = pd.PeriodIndex(['2007-01', '2007-02', '2007-06', '2007-07'], + freq='M', name='x') + tm.assert_index_equal(result, exp) + + def test_getitem_partial(self): + rng = period_range('2007-01', periods=50, freq='M') + ts = Series(np.random.randn(len(rng)), rng) + + self.assertRaises(KeyError, ts.__getitem__, '2006') + + result = ts['2008'] + self.assertTrue((result.index.year == 2008).all()) + + result = ts['2008':'2009'] + self.assertEqual(len(result), 24) + + result = ts['2008-1':'2009-12'] + self.assertEqual(len(result), 24) + + result = ts['2008Q1':'2009Q4'] + self.assertEqual(len(result), 24) + + result = ts[:'2009'] + self.assertEqual(len(result), 36) + + result = ts['2009':] + self.assertEqual(len(result), 50 - 24) + + exp = result + result = ts[24:] + tm.assert_series_equal(exp, result) + + ts = ts[10:].append(ts[10:]) + self.assertRaisesRegexp(KeyError, + "left slice bound for non-unique " + "label: '2008'", + ts.__getitem__, slice('2008', '2009')) + + def test_getitem_datetime(self): + rng = period_range(start='2012-01-01', periods=10, freq='W-MON') + ts = Series(lrange(len(rng)), index=rng) + + dt1 = datetime(2011, 10, 2) + dt4 = datetime(2012, 4, 20) + + rs = ts[dt1:dt4] + tm.assert_series_equal(rs, ts) + + def test_getitem_nat(self): + idx = pd.PeriodIndex(['2011-01', 'NaT', '2011-02'], freq='M') + self.assertEqual(idx[0], pd.Period('2011-01', freq='M')) + self.assertIs(idx[1], tslib.NaT) + + s = pd.Series([0, 1, 2], index=idx) + self.assertEqual(s[pd.NaT], 1) + + s = pd.Series(idx, index=idx) + self.assertEqual(s[pd.Period('2011-01', freq='M')], + pd.Period('2011-01', freq='M')) + self.assertIs(s[pd.NaT], tslib.NaT) + + def test_getitem_list_periods(self): + # GH 7710 + rng = period_range(start='2012-01-01', periods=10, freq='D') + ts = Series(lrange(len(rng)), index=rng) + exp = ts.iloc[[1]] + tm.assert_series_equal(ts[[Period('2012-01-02', freq='D')]], exp) + + def test_getitem_seconds(self): + # GH 6716 + didx = DatetimeIndex(start='2013/01/01 09:00:00', freq='S', + periods=4000) + pidx = PeriodIndex(start='2013/01/01 09:00:00', freq='S', periods=4000) + + for idx in [didx, pidx]: + # getitem against index should raise ValueError + values = ['2014', '2013/02', '2013/01/02', '2013/02/01 9H', + '2013/02/01 09:00'] + for v in values: + if _np_version_under1p9: + with tm.assertRaises(ValueError): + idx[v] + else: + # GH7116 + # these show deprecations as we are trying + # to slice with non-integer indexers + # with tm.assertRaises(IndexError): + # idx[v] + continue + + s = Series(np.random.rand(len(idx)), index=idx) + tm.assert_series_equal(s['2013/01/01 10:00'], s[3600:3660]) + tm.assert_series_equal(s['2013/01/01 9H'], s[:3600]) + for d in ['2013/01/01', '2013/01', '2013']: + tm.assert_series_equal(s[d], s) + + def test_getitem_day(self): + # GH 6716 + # Confirm DatetimeIndex and PeriodIndex works identically + didx = DatetimeIndex(start='2013/01/01', freq='D', periods=400) + pidx = PeriodIndex(start='2013/01/01', freq='D', periods=400) + + for idx in [didx, pidx]: + # getitem against index should raise ValueError + values = ['2014', '2013/02', '2013/01/02', '2013/02/01 9H', + '2013/02/01 09:00'] + for v in values: + + if _np_version_under1p9: + with tm.assertRaises(ValueError): + idx[v] + else: + # GH7116 + # these show deprecations as we are trying + # to slice with non-integer indexers + # with tm.assertRaises(IndexError): + # idx[v] + continue + + s = Series(np.random.rand(len(idx)), index=idx) + tm.assert_series_equal(s['2013/01'], s[0:31]) + tm.assert_series_equal(s['2013/02'], s[31:59]) + tm.assert_series_equal(s['2014'], s[365:]) + + invalid = ['2013/02/01 9H', '2013/02/01 09:00'] + for v in invalid: + with tm.assertRaises(KeyError): + s[v] + + def test_take(self): + index = PeriodIndex(start='1/1/10', end='12/31/12', freq='D', + name='idx') + expected = PeriodIndex([datetime(2010, 1, 6), datetime(2010, 1, 7), + datetime(2010, 1, 9), datetime(2010, 1, 13)], + freq='D', name='idx') + + taken1 = index.take([5, 6, 8, 12]) + taken2 = index[[5, 6, 8, 12]] + + for taken in [taken1, taken2]: + tm.assert_index_equal(taken, expected) + tm.assertIsInstance(taken, PeriodIndex) + self.assertEqual(taken.freq, index.freq) + self.assertEqual(taken.name, expected.name) + + def test_take_fill_value(self): + # GH 12631 + idx = pd.PeriodIndex(['2011-01-01', '2011-02-01', '2011-03-01'], + name='xxx', freq='D') + result = idx.take(np.array([1, 0, -1])) + expected = pd.PeriodIndex(['2011-02-01', '2011-01-01', '2011-03-01'], + name='xxx', freq='D') + tm.assert_index_equal(result, expected) + + # fill_value + result = idx.take(np.array([1, 0, -1]), fill_value=True) + expected = pd.PeriodIndex(['2011-02-01', '2011-01-01', 'NaT'], + name='xxx', freq='D') + tm.assert_index_equal(result, expected) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, + fill_value=True) + expected = pd.PeriodIndex(['2011-02-01', '2011-01-01', '2011-03-01'], + name='xxx', freq='D') + tm.assert_index_equal(result, expected) + + msg = ('When allow_fill=True and fill_value is not None, ' + 'all indices must be >= -1') + with tm.assertRaisesRegexp(ValueError, msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with tm.assertRaisesRegexp(ValueError, msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + with tm.assertRaises(IndexError): + idx.take(np.array([1, -5])) + + def test_get_loc_msg(self): + idx = period_range('2000-1-1', freq='A', periods=10) + bad_period = Period('2012', 'A') + self.assertRaises(KeyError, idx.get_loc, bad_period) + + try: + idx.get_loc(bad_period) + except KeyError as inst: + self.assertEqual(inst.args[0], bad_period) + + def test_get_loc_nat(self): + didx = DatetimeIndex(['2011-01-01', 'NaT', '2011-01-03']) + pidx = PeriodIndex(['2011-01-01', 'NaT', '2011-01-03'], freq='M') + + # check DatetimeIndex compat + for idx in [didx, pidx]: + self.assertEqual(idx.get_loc(pd.NaT), 1) + self.assertEqual(idx.get_loc(None), 1) + self.assertEqual(idx.get_loc(float('nan')), 1) + self.assertEqual(idx.get_loc(np.nan), 1) + + +class TestComparisons(tm.TestCase): + + def setUp(self): + self.january1 = Period('2000-01', 'M') + self.january2 = Period('2000-01', 'M') + self.february = Period('2000-02', 'M') + self.march = Period('2000-03', 'M') + self.day = Period('2012-01-01', 'D') + + def test_equal(self): + self.assertEqual(self.january1, self.january2) + + def test_equal_Raises_Value(self): + with tm.assertRaises(period.IncompatibleFrequency): + self.january1 == self.day + + def test_notEqual(self): + self.assertNotEqual(self.january1, 1) + self.assertNotEqual(self.january1, self.february) + + def test_greater(self): + self.assertTrue(self.february > self.january1) + + def test_greater_Raises_Value(self): + with tm.assertRaises(period.IncompatibleFrequency): + self.january1 > self.day + + def test_greater_Raises_Type(self): + with tm.assertRaises(TypeError): + self.january1 > 1 + + def test_greaterEqual(self): + self.assertTrue(self.january1 >= self.january2) + + def test_greaterEqual_Raises_Value(self): + with tm.assertRaises(period.IncompatibleFrequency): + self.january1 >= self.day + + with tm.assertRaises(TypeError): + print(self.january1 >= 1) + + def test_smallerEqual(self): + self.assertTrue(self.january1 <= self.january2) + + def test_smallerEqual_Raises_Value(self): + with tm.assertRaises(period.IncompatibleFrequency): + self.january1 <= self.day + + def test_smallerEqual_Raises_Type(self): + with tm.assertRaises(TypeError): + self.january1 <= 1 + + def test_smaller(self): + self.assertTrue(self.january1 < self.february) + + def test_smaller_Raises_Value(self): + with tm.assertRaises(period.IncompatibleFrequency): + self.january1 < self.day + + def test_smaller_Raises_Type(self): + with tm.assertRaises(TypeError): + self.january1 < 1 + + def test_sort(self): + periods = [self.march, self.january1, self.february] + correctPeriods = [self.january1, self.february, self.march] + self.assertEqual(sorted(periods), correctPeriods) + + def test_period_nat_comp(self): + p_nat = Period('NaT', freq='D') + p = Period('2011-01-01', freq='D') + + nat = pd.Timestamp('NaT') + t = pd.Timestamp('2011-01-01') + # confirm Period('NaT') work identical with Timestamp('NaT') + for left, right in [(p_nat, p), (p, p_nat), (p_nat, p_nat), (nat, t), + (t, nat), (nat, nat)]: + self.assertEqual(left < right, False) + self.assertEqual(left > right, False) + self.assertEqual(left == right, False) + self.assertEqual(left != right, True) + self.assertEqual(left <= right, False) + self.assertEqual(left >= right, False) + + def test_pi_pi_comp(self): + + for freq in ['M', '2M', '3M']: + base = PeriodIndex(['2011-01', '2011-02', + '2011-03', '2011-04'], freq=freq) + p = Period('2011-02', freq=freq) + + exp = np.array([False, True, False, False]) + self.assert_numpy_array_equal(base == p, exp) + self.assert_numpy_array_equal(p == base, exp) + + exp = np.array([True, False, True, True]) + self.assert_numpy_array_equal(base != p, exp) + self.assert_numpy_array_equal(p != base, exp) + + exp = np.array([False, False, True, True]) + self.assert_numpy_array_equal(base > p, exp) + self.assert_numpy_array_equal(p < base, exp) + + exp = np.array([True, False, False, False]) + self.assert_numpy_array_equal(base < p, exp) + self.assert_numpy_array_equal(p > base, exp) + + exp = np.array([False, True, True, True]) + self.assert_numpy_array_equal(base >= p, exp) + self.assert_numpy_array_equal(p <= base, exp) + + exp = np.array([True, True, False, False]) + self.assert_numpy_array_equal(base <= p, exp) + self.assert_numpy_array_equal(p >= base, exp) + + idx = PeriodIndex(['2011-02', '2011-01', '2011-03', + '2011-05'], freq=freq) + + exp = np.array([False, False, True, False]) + self.assert_numpy_array_equal(base == idx, exp) + + exp = np.array([True, True, False, True]) + self.assert_numpy_array_equal(base != idx, exp) + + exp = np.array([False, True, False, False]) + self.assert_numpy_array_equal(base > idx, exp) + + exp = np.array([True, False, False, True]) + self.assert_numpy_array_equal(base < idx, exp) + + exp = np.array([False, True, True, False]) + self.assert_numpy_array_equal(base >= idx, exp) + + exp = np.array([True, False, True, True]) + self.assert_numpy_array_equal(base <= idx, exp) + + # different base freq + msg = "Input has different freq=A-DEC from PeriodIndex" + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + base <= Period('2011', freq='A') + + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + Period('2011', freq='A') >= base + + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + idx = PeriodIndex(['2011', '2012', '2013', '2014'], freq='A') + base <= idx + + # different mult + msg = "Input has different freq=4M from PeriodIndex" + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + base <= Period('2011', freq='4M') + + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + Period('2011', freq='4M') >= base + + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + idx = PeriodIndex(['2011', '2012', '2013', '2014'], freq='4M') + base <= idx + + def test_pi_nat_comp(self): + for freq in ['M', '2M', '3M']: + idx1 = PeriodIndex( + ['2011-01', '2011-02', 'NaT', '2011-05'], freq=freq) + + result = idx1 > Period('2011-02', freq=freq) + exp = np.array([False, False, False, True]) + self.assert_numpy_array_equal(result, exp) + result = Period('2011-02', freq=freq) < idx1 + self.assert_numpy_array_equal(result, exp) + + result = idx1 == Period('NaT', freq=freq) + exp = np.array([False, False, False, False]) + self.assert_numpy_array_equal(result, exp) + result = Period('NaT', freq=freq) == idx1 + self.assert_numpy_array_equal(result, exp) + + result = idx1 != Period('NaT', freq=freq) + exp = np.array([True, True, True, True]) + self.assert_numpy_array_equal(result, exp) + result = Period('NaT', freq=freq) != idx1 + self.assert_numpy_array_equal(result, exp) + + idx2 = PeriodIndex(['2011-02', '2011-01', '2011-04', + 'NaT'], freq=freq) + result = idx1 < idx2 + exp = np.array([True, False, False, False]) + self.assert_numpy_array_equal(result, exp) + + result = idx1 == idx2 + exp = np.array([False, False, False, False]) + self.assert_numpy_array_equal(result, exp) + + result = idx1 != idx2 + exp = np.array([True, True, True, True]) + self.assert_numpy_array_equal(result, exp) + + result = idx1 == idx1 + exp = np.array([True, True, False, True]) + self.assert_numpy_array_equal(result, exp) + + result = idx1 != idx1 + exp = np.array([False, False, True, False]) + self.assert_numpy_array_equal(result, exp) + + diff = PeriodIndex(['2011-02', '2011-01', '2011-04', + 'NaT'], freq='4M') + msg = "Input has different freq=4M from PeriodIndex" + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + idx1 > diff + + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + idx1 == diff diff --git a/pandas/tests/indexes/period/test_partial_slicing.py b/pandas/tests/indexes/period/test_partial_slicing.py new file mode 100644 index 0000000000000..b051c4a0dcab1 --- /dev/null +++ b/pandas/tests/indexes/period/test_partial_slicing.py @@ -0,0 +1,139 @@ +import numpy as np + +import pandas as pd +from pandas.util import testing as tm +from pandas import (Series, period_range, DatetimeIndex, PeriodIndex, + DataFrame, _np_version_under1p12, Period) + + +class TestPeriodIndex(tm.TestCase): + + def setUp(self): + pass + + def test_slice_with_negative_step(self): + ts = Series(np.arange(20), + period_range('2014-01', periods=20, freq='M')) + SLC = pd.IndexSlice + + def assert_slices_equivalent(l_slc, i_slc): + tm.assert_series_equal(ts[l_slc], ts.iloc[i_slc]) + tm.assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc]) + tm.assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc]) + + assert_slices_equivalent(SLC[Period('2014-10')::-1], SLC[9::-1]) + assert_slices_equivalent(SLC['2014-10'::-1], SLC[9::-1]) + + assert_slices_equivalent(SLC[:Period('2014-10'):-1], SLC[:8:-1]) + assert_slices_equivalent(SLC[:'2014-10':-1], SLC[:8:-1]) + + assert_slices_equivalent(SLC['2015-02':'2014-10':-1], SLC[13:8:-1]) + assert_slices_equivalent(SLC[Period('2015-02'):Period('2014-10'):-1], + SLC[13:8:-1]) + assert_slices_equivalent(SLC['2015-02':Period('2014-10'):-1], + SLC[13:8:-1]) + assert_slices_equivalent(SLC[Period('2015-02'):'2014-10':-1], + SLC[13:8:-1]) + + assert_slices_equivalent(SLC['2014-10':'2015-02':-1], SLC[:0]) + + def test_slice_with_zero_step_raises(self): + ts = Series(np.arange(20), + period_range('2014-01', periods=20, freq='M')) + self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', + lambda: ts[::0]) + self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', + lambda: ts.loc[::0]) + self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', + lambda: ts.loc[::0]) + + def test_slice_keep_name(self): + idx = period_range('20010101', periods=10, freq='D', name='bob') + self.assertEqual(idx.name, idx[1:].name) + + def test_pindex_slice_index(self): + pi = PeriodIndex(start='1/1/10', end='12/31/12', freq='M') + s = Series(np.random.rand(len(pi)), index=pi) + res = s['2010'] + exp = s[0:12] + tm.assert_series_equal(res, exp) + res = s['2011'] + exp = s[12:24] + tm.assert_series_equal(res, exp) + + def test_range_slice_day(self): + # GH 6716 + didx = DatetimeIndex(start='2013/01/01', freq='D', periods=400) + pidx = PeriodIndex(start='2013/01/01', freq='D', periods=400) + + # changed to TypeError in 1.12 + # https://github.com/numpy/numpy/pull/6271 + exc = IndexError if _np_version_under1p12 else TypeError + + for idx in [didx, pidx]: + # slices against index should raise IndexError + values = ['2014', '2013/02', '2013/01/02', '2013/02/01 9H', + '2013/02/01 09:00'] + for v in values: + with tm.assertRaises(exc): + idx[v:] + + s = Series(np.random.rand(len(idx)), index=idx) + + tm.assert_series_equal(s['2013/01/02':], s[1:]) + tm.assert_series_equal(s['2013/01/02':'2013/01/05'], s[1:5]) + tm.assert_series_equal(s['2013/02':], s[31:]) + tm.assert_series_equal(s['2014':], s[365:]) + + invalid = ['2013/02/01 9H', '2013/02/01 09:00'] + for v in invalid: + with tm.assertRaises(exc): + idx[v:] + + def test_range_slice_seconds(self): + # GH 6716 + didx = DatetimeIndex(start='2013/01/01 09:00:00', freq='S', + periods=4000) + pidx = PeriodIndex(start='2013/01/01 09:00:00', freq='S', periods=4000) + + # changed to TypeError in 1.12 + # https://github.com/numpy/numpy/pull/6271 + exc = IndexError if _np_version_under1p12 else TypeError + + for idx in [didx, pidx]: + # slices against index should raise IndexError + values = ['2014', '2013/02', '2013/01/02', '2013/02/01 9H', + '2013/02/01 09:00'] + for v in values: + with tm.assertRaises(exc): + idx[v:] + + s = Series(np.random.rand(len(idx)), index=idx) + + tm.assert_series_equal(s['2013/01/01 09:05':'2013/01/01 09:10'], + s[300:660]) + tm.assert_series_equal(s['2013/01/01 10:00':'2013/01/01 10:05'], + s[3600:3960]) + tm.assert_series_equal(s['2013/01/01 10H':], s[3600:]) + tm.assert_series_equal(s[:'2013/01/01 09:30'], s[:1860]) + for d in ['2013/01/01', '2013/01', '2013']: + tm.assert_series_equal(s[d:], s) + + def test_range_slice_outofbounds(self): + # GH 5407 + didx = DatetimeIndex(start='2013/10/01', freq='D', periods=10) + pidx = PeriodIndex(start='2013/10/01', freq='D', periods=10) + + for idx in [didx, pidx]: + df = DataFrame(dict(units=[100 + i for i in range(10)]), index=idx) + empty = DataFrame(index=idx.__class__([], freq='D'), + columns=['units']) + empty['units'] = empty['units'].astype('int64') + + tm.assert_frame_equal(df['2013/09/01':'2013/09/30'], empty) + tm.assert_frame_equal(df['2013/09/30':'2013/10/02'], df.iloc[:2]) + tm.assert_frame_equal(df['2013/10/01':'2013/10/02'], df.iloc[:2]) + tm.assert_frame_equal(df['2013/10/02':'2013/09/30'], empty) + tm.assert_frame_equal(df['2013/10/15':'2013/10/17'], empty) + tm.assert_frame_equal(df['2013-06':'2013-09'], empty) + tm.assert_frame_equal(df['2013-11':'2013-12'], empty) diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 33653c92da719..6a8128bb8985f 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -1,10 +1,12 @@ import numpy as np +from numpy.random import randn from datetime import timedelta import pandas as pd from pandas.util import testing as tm from pandas import (PeriodIndex, period_range, notnull, DatetimeIndex, NaT, - Index, Period, Int64Index) + Index, Period, Int64Index, Series, DataFrame, date_range, + offsets) from ..datetimelike import DatetimeLike @@ -20,26 +22,6 @@ def setUp(self): def create_index(self): return period_range('20130101', periods=5, freq='D') - def test_construction_base_constructor(self): - # GH 13664 - arr = [pd.Period('2011-01', freq='M'), pd.NaT, - pd.Period('2011-03', freq='M')] - tm.assert_index_equal(pd.Index(arr), pd.PeriodIndex(arr)) - tm.assert_index_equal(pd.Index(np.array(arr)), - pd.PeriodIndex(np.array(arr))) - - arr = [np.nan, pd.NaT, pd.Period('2011-03', freq='M')] - tm.assert_index_equal(pd.Index(arr), pd.PeriodIndex(arr)) - tm.assert_index_equal(pd.Index(np.array(arr)), - pd.PeriodIndex(np.array(arr))) - - arr = [pd.Period('2011-01', freq='M'), pd.NaT, - pd.Period('2011-03', freq='D')] - tm.assert_index_equal(pd.Index(arr), pd.Index(arr, dtype=object)) - - tm.assert_index_equal(pd.Index(np.array(arr)), - pd.Index(np.array(arr), dtype=object)) - def test_astype(self): # GH 13149, GH 13209 idx = PeriodIndex(['2016-05-16', 'NaT', NaT, np.NaN], freq='D') @@ -68,16 +50,6 @@ def test_astype_raises(self): self.assertRaises(ValueError, idx.astype, 'timedelta64') self.assertRaises(ValueError, idx.astype, 'timedelta64[ns]') - def test_shift(self): - - # test shift for PeriodIndex - # GH8083 - drange = self.create_index() - result = drange.shift(1) - expected = PeriodIndex(['2013-01-02', '2013-01-03', '2013-01-04', - '2013-01-05', '2013-01-06'], freq='D') - self.assert_index_equal(result, expected) - def test_pickle_compat_construction(self): pass @@ -231,3 +203,552 @@ def test_difference_freq(self): expected = PeriodIndex(["20160920", "20160921"], freq='D') tm.assert_index_equal(idx_diff, expected) tm.assert_attr_equal('freq', idx_diff, expected) + + def test_hash_error(self): + index = period_range('20010101', periods=10) + with tm.assertRaisesRegexp(TypeError, "unhashable type: %r" % + type(index).__name__): + hash(index) + + def test_make_time_series(self): + index = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') + series = Series(1, index=index) + tm.assertIsInstance(series, Series) + + def test_shallow_copy_empty(self): + + # GH13067 + idx = PeriodIndex([], freq='M') + result = idx._shallow_copy() + expected = idx + + tm.assert_index_equal(result, expected) + + def test_dtype_str(self): + pi = pd.PeriodIndex([], freq='M') + self.assertEqual(pi.dtype_str, 'period[M]') + self.assertEqual(pi.dtype_str, str(pi.dtype)) + + pi = pd.PeriodIndex([], freq='3M') + self.assertEqual(pi.dtype_str, 'period[3M]') + self.assertEqual(pi.dtype_str, str(pi.dtype)) + + def test_view_asi8(self): + idx = pd.PeriodIndex([], freq='M') + + exp = np.array([], dtype=np.int64) + tm.assert_numpy_array_equal(idx.view('i8'), exp) + tm.assert_numpy_array_equal(idx.asi8, exp) + + idx = pd.PeriodIndex(['2011-01', pd.NaT], freq='M') + + exp = np.array([492, -9223372036854775808], dtype=np.int64) + tm.assert_numpy_array_equal(idx.view('i8'), exp) + tm.assert_numpy_array_equal(idx.asi8, exp) + + exp = np.array([14975, -9223372036854775808], dtype=np.int64) + idx = pd.PeriodIndex(['2011-01-01', pd.NaT], freq='D') + tm.assert_numpy_array_equal(idx.view('i8'), exp) + tm.assert_numpy_array_equal(idx.asi8, exp) + + def test_values(self): + idx = pd.PeriodIndex([], freq='M') + + exp = np.array([], dtype=np.object) + tm.assert_numpy_array_equal(idx.values, exp) + tm.assert_numpy_array_equal(idx.get_values(), exp) + exp = np.array([], dtype=np.int64) + tm.assert_numpy_array_equal(idx._values, exp) + + idx = pd.PeriodIndex(['2011-01', pd.NaT], freq='M') + + exp = np.array([pd.Period('2011-01', freq='M'), pd.NaT], dtype=object) + tm.assert_numpy_array_equal(idx.values, exp) + tm.assert_numpy_array_equal(idx.get_values(), exp) + exp = np.array([492, -9223372036854775808], dtype=np.int64) + tm.assert_numpy_array_equal(idx._values, exp) + + idx = pd.PeriodIndex(['2011-01-01', pd.NaT], freq='D') + + exp = np.array([pd.Period('2011-01-01', freq='D'), pd.NaT], + dtype=object) + tm.assert_numpy_array_equal(idx.values, exp) + tm.assert_numpy_array_equal(idx.get_values(), exp) + exp = np.array([14975, -9223372036854775808], dtype=np.int64) + tm.assert_numpy_array_equal(idx._values, exp) + + def test_period_index_length(self): + pi = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') + self.assertEqual(len(pi), 9) + + pi = PeriodIndex(freq='Q', start='1/1/2001', end='12/1/2009') + self.assertEqual(len(pi), 4 * 9) + + pi = PeriodIndex(freq='M', start='1/1/2001', end='12/1/2009') + self.assertEqual(len(pi), 12 * 9) + + start = Period('02-Apr-2005', 'B') + i1 = PeriodIndex(start=start, periods=20) + self.assertEqual(len(i1), 20) + self.assertEqual(i1.freq, start.freq) + self.assertEqual(i1[0], start) + + end_intv = Period('2006-12-31', 'W') + i1 = PeriodIndex(end=end_intv, periods=10) + self.assertEqual(len(i1), 10) + self.assertEqual(i1.freq, end_intv.freq) + self.assertEqual(i1[-1], end_intv) + + end_intv = Period('2006-12-31', '1w') + i2 = PeriodIndex(end=end_intv, periods=10) + self.assertEqual(len(i1), len(i2)) + self.assertTrue((i1 == i2).all()) + self.assertEqual(i1.freq, i2.freq) + + end_intv = Period('2006-12-31', ('w', 1)) + i2 = PeriodIndex(end=end_intv, periods=10) + self.assertEqual(len(i1), len(i2)) + self.assertTrue((i1 == i2).all()) + self.assertEqual(i1.freq, i2.freq) + + try: + PeriodIndex(start=start, end=end_intv) + raise AssertionError('Cannot allow mixed freq for start and end') + except ValueError: + pass + + end_intv = Period('2005-05-01', 'B') + i1 = PeriodIndex(start=start, end=end_intv) + + try: + PeriodIndex(start=start) + raise AssertionError( + 'Must specify periods if missing start or end') + except ValueError: + pass + + # infer freq from first element + i2 = PeriodIndex([end_intv, Period('2005-05-05', 'B')]) + self.assertEqual(len(i2), 2) + self.assertEqual(i2[0], end_intv) + + i2 = PeriodIndex(np.array([end_intv, Period('2005-05-05', 'B')])) + self.assertEqual(len(i2), 2) + self.assertEqual(i2[0], end_intv) + + # Mixed freq should fail + vals = [end_intv, Period('2006-12-31', 'w')] + self.assertRaises(ValueError, PeriodIndex, vals) + vals = np.array(vals) + self.assertRaises(ValueError, PeriodIndex, vals) + + def test_fields(self): + # year, month, day, hour, minute + # second, weekofyear, week, dayofweek, weekday, dayofyear, quarter + # qyear + pi = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2005') + self._check_all_fields(pi) + + pi = PeriodIndex(freq='Q', start='1/1/2001', end='12/1/2002') + self._check_all_fields(pi) + + pi = PeriodIndex(freq='M', start='1/1/2001', end='1/1/2002') + self._check_all_fields(pi) + + pi = PeriodIndex(freq='D', start='12/1/2001', end='6/1/2001') + self._check_all_fields(pi) + + pi = PeriodIndex(freq='B', start='12/1/2001', end='6/1/2001') + self._check_all_fields(pi) + + pi = PeriodIndex(freq='H', start='12/31/2001', end='1/1/2002 23:00') + self._check_all_fields(pi) + + pi = PeriodIndex(freq='Min', start='12/31/2001', end='1/1/2002 00:20') + self._check_all_fields(pi) + + pi = PeriodIndex(freq='S', start='12/31/2001 00:00:00', + end='12/31/2001 00:05:00') + self._check_all_fields(pi) + + end_intv = Period('2006-12-31', 'W') + i1 = PeriodIndex(end=end_intv, periods=10) + self._check_all_fields(i1) + + def _check_all_fields(self, periodindex): + fields = ['year', 'month', 'day', 'hour', 'minute', 'second', + 'weekofyear', 'week', 'dayofweek', 'weekday', 'dayofyear', + 'quarter', 'qyear', 'days_in_month', 'is_leap_year'] + + periods = list(periodindex) + s = pd.Series(periodindex) + + for field in fields: + field_idx = getattr(periodindex, field) + self.assertEqual(len(periodindex), len(field_idx)) + for x, val in zip(periods, field_idx): + self.assertEqual(getattr(x, field), val) + + if len(s) == 0: + continue + + field_s = getattr(s.dt, field) + self.assertEqual(len(periodindex), len(field_s)) + for x, val in zip(periods, field_s): + self.assertEqual(getattr(x, field), val) + + def test_indexing(self): + + # GH 4390, iat incorrectly indexing + index = period_range('1/1/2001', periods=10) + s = Series(randn(10), index=index) + expected = s[index[0]] + result = s.iat[0] + self.assertEqual(expected, result) + + def test_period_set_index_reindex(self): + # GH 6631 + df = DataFrame(np.random.random(6)) + idx1 = period_range('2011/01/01', periods=6, freq='M') + idx2 = period_range('2013', periods=6, freq='A') + + df = df.set_index(idx1) + tm.assert_index_equal(df.index, idx1) + df = df.set_index(idx2) + tm.assert_index_equal(df.index, idx2) + + def test_factorize(self): + idx1 = PeriodIndex(['2014-01', '2014-01', '2014-02', '2014-02', + '2014-03', '2014-03'], freq='M') + + exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.intp) + exp_idx = PeriodIndex(['2014-01', '2014-02', '2014-03'], freq='M') + + arr, idx = idx1.factorize() + self.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, exp_idx) + + arr, idx = idx1.factorize(sort=True) + self.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, exp_idx) + + idx2 = pd.PeriodIndex(['2014-03', '2014-03', '2014-02', '2014-01', + '2014-03', '2014-01'], freq='M') + + exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.intp) + arr, idx = idx2.factorize(sort=True) + self.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, exp_idx) + + exp_arr = np.array([0, 0, 1, 2, 0, 2], dtype=np.intp) + exp_idx = PeriodIndex(['2014-03', '2014-02', '2014-01'], freq='M') + arr, idx = idx2.factorize() + self.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, exp_idx) + + def test_asobject_like(self): + idx = pd.PeriodIndex([], freq='M') + + exp = np.array([], dtype=object) + tm.assert_numpy_array_equal(idx.asobject.values, exp) + tm.assert_numpy_array_equal(idx._mpl_repr(), exp) + + idx = pd.PeriodIndex(['2011-01', pd.NaT], freq='M') + + exp = np.array([pd.Period('2011-01', freq='M'), pd.NaT], dtype=object) + tm.assert_numpy_array_equal(idx.asobject.values, exp) + tm.assert_numpy_array_equal(idx._mpl_repr(), exp) + + exp = np.array([pd.Period('2011-01-01', freq='D'), pd.NaT], + dtype=object) + idx = pd.PeriodIndex(['2011-01-01', pd.NaT], freq='D') + tm.assert_numpy_array_equal(idx.asobject.values, exp) + tm.assert_numpy_array_equal(idx._mpl_repr(), exp) + + def test_is_(self): + create_index = lambda: PeriodIndex(freq='A', start='1/1/2001', + end='12/1/2009') + index = create_index() + self.assertEqual(index.is_(index), True) + self.assertEqual(index.is_(create_index()), False) + self.assertEqual(index.is_(index.view()), True) + self.assertEqual( + index.is_(index.view().view().view().view().view()), True) + self.assertEqual(index.view().is_(index), True) + ind2 = index.view() + index.name = "Apple" + self.assertEqual(ind2.is_(index), True) + self.assertEqual(index.is_(index[:]), False) + self.assertEqual(index.is_(index.asfreq('M')), False) + self.assertEqual(index.is_(index.asfreq('A')), False) + self.assertEqual(index.is_(index - 2), False) + self.assertEqual(index.is_(index - 0), False) + + def test_comp_period(self): + idx = period_range('2007-01', periods=20, freq='M') + + result = idx < idx[10] + exp = idx.values < idx.values[10] + self.assert_numpy_array_equal(result, exp) + + def test_contains(self): + rng = period_range('2007-01', freq='M', periods=10) + + self.assertTrue(Period('2007-01', freq='M') in rng) + self.assertFalse(Period('2007-01', freq='D') in rng) + self.assertFalse(Period('2007-01', freq='2M') in rng) + + def test_contains_nat(self): + # GH13582 + idx = period_range('2007-01', freq='M', periods=10) + self.assertFalse(pd.NaT in idx) + self.assertFalse(None in idx) + self.assertFalse(float('nan') in idx) + self.assertFalse(np.nan in idx) + + idx = pd.PeriodIndex(['2011-01', 'NaT', '2011-02'], freq='M') + self.assertTrue(pd.NaT in idx) + self.assertTrue(None in idx) + self.assertTrue(float('nan') in idx) + self.assertTrue(np.nan in idx) + + def test_periods_number_check(self): + with tm.assertRaises(ValueError): + period_range('2011-1-1', '2012-1-1', 'B') + + def test_start_time(self): + index = PeriodIndex(freq='M', start='2016-01-01', end='2016-05-31') + expected_index = date_range('2016-01-01', end='2016-05-31', freq='MS') + tm.assert_index_equal(index.start_time, expected_index) + + def test_end_time(self): + index = PeriodIndex(freq='M', start='2016-01-01', end='2016-05-31') + expected_index = date_range('2016-01-01', end='2016-05-31', freq='M') + tm.assert_index_equal(index.end_time, expected_index) + + def test_index_duplicate_periods(self): + # monotonic + idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq='A-JUN') + ts = Series(np.random.randn(len(idx)), index=idx) + + result = ts[2007] + expected = ts[1:3] + tm.assert_series_equal(result, expected) + result[:] = 1 + self.assertTrue((ts[1:3] == 1).all()) + + # not monotonic + idx = PeriodIndex([2000, 2007, 2007, 2009, 2007], freq='A-JUN') + ts = Series(np.random.randn(len(idx)), index=idx) + + result = ts[2007] + expected = ts[idx == 2007] + tm.assert_series_equal(result, expected) + + def test_index_unique(self): + idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq='A-JUN') + expected = PeriodIndex([2000, 2007, 2009], freq='A-JUN') + self.assert_index_equal(idx.unique(), expected) + self.assertEqual(idx.nunique(), 3) + + idx = PeriodIndex([2000, 2007, 2007, 2009, 2007], freq='A-JUN', + tz='US/Eastern') + expected = PeriodIndex([2000, 2007, 2009], freq='A-JUN', + tz='US/Eastern') + self.assert_index_equal(idx.unique(), expected) + self.assertEqual(idx.nunique(), 3) + + def test_shift_gh8083(self): + + # test shift for PeriodIndex + # GH8083 + drange = self.create_index() + result = drange.shift(1) + expected = PeriodIndex(['2013-01-02', '2013-01-03', '2013-01-04', + '2013-01-05', '2013-01-06'], freq='D') + self.assert_index_equal(result, expected) + + def test_shift(self): + pi1 = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') + pi2 = PeriodIndex(freq='A', start='1/1/2002', end='12/1/2010') + + tm.assert_index_equal(pi1.shift(0), pi1) + + self.assertEqual(len(pi1), len(pi2)) + self.assert_index_equal(pi1.shift(1), pi2) + + pi1 = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') + pi2 = PeriodIndex(freq='A', start='1/1/2000', end='12/1/2008') + self.assertEqual(len(pi1), len(pi2)) + self.assert_index_equal(pi1.shift(-1), pi2) + + pi1 = PeriodIndex(freq='M', start='1/1/2001', end='12/1/2009') + pi2 = PeriodIndex(freq='M', start='2/1/2001', end='1/1/2010') + self.assertEqual(len(pi1), len(pi2)) + self.assert_index_equal(pi1.shift(1), pi2) + + pi1 = PeriodIndex(freq='M', start='1/1/2001', end='12/1/2009') + pi2 = PeriodIndex(freq='M', start='12/1/2000', end='11/1/2009') + self.assertEqual(len(pi1), len(pi2)) + self.assert_index_equal(pi1.shift(-1), pi2) + + pi1 = PeriodIndex(freq='D', start='1/1/2001', end='12/1/2009') + pi2 = PeriodIndex(freq='D', start='1/2/2001', end='12/2/2009') + self.assertEqual(len(pi1), len(pi2)) + self.assert_index_equal(pi1.shift(1), pi2) + + pi1 = PeriodIndex(freq='D', start='1/1/2001', end='12/1/2009') + pi2 = PeriodIndex(freq='D', start='12/31/2000', end='11/30/2009') + self.assertEqual(len(pi1), len(pi2)) + self.assert_index_equal(pi1.shift(-1), pi2) + + def test_shift_nat(self): + idx = PeriodIndex(['2011-01', '2011-02', 'NaT', + '2011-04'], freq='M', name='idx') + result = idx.shift(1) + expected = PeriodIndex(['2011-02', '2011-03', 'NaT', + '2011-05'], freq='M', name='idx') + tm.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + + def test_shift_ndarray(self): + idx = PeriodIndex(['2011-01', '2011-02', 'NaT', + '2011-04'], freq='M', name='idx') + result = idx.shift(np.array([1, 2, 3, 4])) + expected = PeriodIndex(['2011-02', '2011-04', 'NaT', + '2011-08'], freq='M', name='idx') + tm.assert_index_equal(result, expected) + + idx = PeriodIndex(['2011-01', '2011-02', 'NaT', + '2011-04'], freq='M', name='idx') + result = idx.shift(np.array([1, -2, 3, -4])) + expected = PeriodIndex(['2011-02', '2010-12', 'NaT', + '2010-12'], freq='M', name='idx') + tm.assert_index_equal(result, expected) + + def test_negative_ordinals(self): + Period(ordinal=-1000, freq='A') + Period(ordinal=0, freq='A') + + idx1 = PeriodIndex(ordinal=[-1, 0, 1], freq='A') + idx2 = PeriodIndex(ordinal=np.array([-1, 0, 1]), freq='A') + tm.assert_index_equal(idx1, idx2) + + def test_pindex_fieldaccessor_nat(self): + idx = PeriodIndex(['2011-01', '2011-02', 'NaT', + '2012-03', '2012-04'], freq='D') + + exp = np.array([2011, 2011, -1, 2012, 2012], dtype=np.int64) + self.assert_numpy_array_equal(idx.year, exp) + exp = np.array([1, 2, -1, 3, 4], dtype=np.int64) + self.assert_numpy_array_equal(idx.month, exp) + + def test_pindex_qaccess(self): + pi = PeriodIndex(['2Q05', '3Q05', '4Q05', '1Q06', '2Q06'], freq='Q') + s = Series(np.random.rand(len(pi)), index=pi).cumsum() + # Todo: fix these accessors! + self.assertEqual(s['05Q4'], s[2]) + + def test_numpy_repeat(self): + index = period_range('20010101', periods=2) + expected = PeriodIndex([Period('2001-01-01'), Period('2001-01-01'), + Period('2001-01-02'), Period('2001-01-02')]) + + tm.assert_index_equal(np.repeat(index, 2), expected) + + msg = "the 'axis' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, np.repeat, index, 2, axis=1) + + def test_pindex_multiples(self): + pi = PeriodIndex(start='1/1/11', end='12/31/11', freq='2M') + expected = PeriodIndex(['2011-01', '2011-03', '2011-05', '2011-07', + '2011-09', '2011-11'], freq='2M') + tm.assert_index_equal(pi, expected) + self.assertEqual(pi.freq, offsets.MonthEnd(2)) + self.assertEqual(pi.freqstr, '2M') + + pi = period_range(start='1/1/11', end='12/31/11', freq='2M') + tm.assert_index_equal(pi, expected) + self.assertEqual(pi.freq, offsets.MonthEnd(2)) + self.assertEqual(pi.freqstr, '2M') + + pi = period_range(start='1/1/11', periods=6, freq='2M') + tm.assert_index_equal(pi, expected) + self.assertEqual(pi.freq, offsets.MonthEnd(2)) + self.assertEqual(pi.freqstr, '2M') + + def test_iteration(self): + index = PeriodIndex(start='1/1/10', periods=4, freq='B') + + result = list(index) + tm.assertIsInstance(result[0], Period) + self.assertEqual(result[0].freq, index.freq) + + def test_is_full(self): + index = PeriodIndex([2005, 2007, 2009], freq='A') + self.assertFalse(index.is_full) + + index = PeriodIndex([2005, 2006, 2007], freq='A') + self.assertTrue(index.is_full) + + index = PeriodIndex([2005, 2005, 2007], freq='A') + self.assertFalse(index.is_full) + + index = PeriodIndex([2005, 2005, 2006], freq='A') + self.assertTrue(index.is_full) + + index = PeriodIndex([2006, 2005, 2005], freq='A') + self.assertRaises(ValueError, getattr, index, 'is_full') + + self.assertTrue(index[:0].is_full) + + def test_with_multi_index(self): + # #1705 + index = date_range('1/1/2012', periods=4, freq='12H') + index_as_arrays = [index.to_period(freq='D'), index.hour] + + s = Series([0, 1, 2, 3], index_as_arrays) + + tm.assertIsInstance(s.index.levels[0], PeriodIndex) + + tm.assertIsInstance(s.index.values[0][0], Period) + + def test_convert_array_of_periods(self): + rng = period_range('1/1/2000', periods=20, freq='D') + periods = list(rng) + + result = pd.Index(periods) + tm.assertIsInstance(result, PeriodIndex) + + def test_append_concat(self): + # #1815 + d1 = date_range('12/31/1990', '12/31/1999', freq='A-DEC') + d2 = date_range('12/31/2000', '12/31/2009', freq='A-DEC') + + s1 = Series(np.random.randn(10), d1) + s2 = Series(np.random.randn(10), d2) + + s1 = s1.to_period() + s2 = s2.to_period() + + # drops index + result = pd.concat([s1, s2]) + tm.assertIsInstance(result.index, PeriodIndex) + self.assertEqual(result.index[0], s1.index[0]) + + def test_pickle_freq(self): + # GH2891 + prng = period_range('1/1/2011', '1/1/2012', freq='M') + new_prng = self.round_trip_pickle(prng) + self.assertEqual(new_prng.freq, offsets.MonthEnd()) + self.assertEqual(new_prng.freqstr, 'M') + + def test_map(self): + index = PeriodIndex([2005, 2007, 2009], freq='A') + result = index.map(lambda x: x + 1) + expected = index + 1 + tm.assert_index_equal(result, expected) + + result = index.map(lambda x: x.ordinal) + exp = Index([x.ordinal for x in index]) + tm.assert_index_equal(result, exp) diff --git a/pandas/tests/indexes/period/test_setops.py b/pandas/tests/indexes/period/test_setops.py new file mode 100644 index 0000000000000..06e15f9175ed8 --- /dev/null +++ b/pandas/tests/indexes/period/test_setops.py @@ -0,0 +1,157 @@ +import numpy as np + +import pandas as pd +import pandas.util.testing as tm +import pandas.tseries.period as period +from pandas import period_range, PeriodIndex, Index, date_range + + +def _permute(obj): + return obj.take(np.random.permutation(len(obj))) + + +class TestPeriodIndex(tm.TestCase): + + def setUp(self): + pass + + def test_joins(self): + index = period_range('1/1/2000', '1/20/2000', freq='D') + + for kind in ['inner', 'outer', 'left', 'right']: + joined = index.join(index[:-5], how=kind) + + tm.assertIsInstance(joined, PeriodIndex) + self.assertEqual(joined.freq, index.freq) + + def test_join_self(self): + index = period_range('1/1/2000', '1/20/2000', freq='D') + + for kind in ['inner', 'outer', 'left', 'right']: + res = index.join(index, how=kind) + self.assertIs(index, res) + + def test_join_does_not_recur(self): + df = tm.makeCustomDataframe( + 3, 2, data_gen_f=lambda *args: np.random.randint(2), + c_idx_type='p', r_idx_type='dt') + s = df.iloc[:2, 0] + + res = s.index.join(df.columns, how='outer') + expected = Index([s.index[0], s.index[1], + df.columns[0], df.columns[1]], object) + tm.assert_index_equal(res, expected) + + def test_union(self): + index = period_range('1/1/2000', '1/20/2000', freq='D') + + result = index[:-5].union(index[10:]) + tm.assert_index_equal(result, index) + + # not in order + result = _permute(index[:-5]).union(_permute(index[10:])) + tm.assert_index_equal(result, index) + + # raise if different frequencies + index = period_range('1/1/2000', '1/20/2000', freq='D') + index2 = period_range('1/1/2000', '1/20/2000', freq='W-WED') + with tm.assertRaises(period.IncompatibleFrequency): + index.union(index2) + + msg = 'can only call with other PeriodIndex-ed objects' + with tm.assertRaisesRegexp(ValueError, msg): + index.join(index.to_timestamp()) + + index3 = period_range('1/1/2000', '1/20/2000', freq='2D') + with tm.assertRaises(period.IncompatibleFrequency): + index.join(index3) + + def test_union_dataframe_index(self): + rng1 = pd.period_range('1/1/1999', '1/1/2012', freq='M') + s1 = pd.Series(np.random.randn(len(rng1)), rng1) + + rng2 = pd.period_range('1/1/1980', '12/1/2001', freq='M') + s2 = pd.Series(np.random.randn(len(rng2)), rng2) + df = pd.DataFrame({'s1': s1, 's2': s2}) + + exp = pd.period_range('1/1/1980', '1/1/2012', freq='M') + self.assert_index_equal(df.index, exp) + + def test_intersection(self): + index = period_range('1/1/2000', '1/20/2000', freq='D') + + result = index[:-5].intersection(index[10:]) + tm.assert_index_equal(result, index[10:-5]) + + # not in order + left = _permute(index[:-5]) + right = _permute(index[10:]) + result = left.intersection(right).sort_values() + tm.assert_index_equal(result, index[10:-5]) + + # raise if different frequencies + index = period_range('1/1/2000', '1/20/2000', freq='D') + index2 = period_range('1/1/2000', '1/20/2000', freq='W-WED') + with tm.assertRaises(period.IncompatibleFrequency): + index.intersection(index2) + + index3 = period_range('1/1/2000', '1/20/2000', freq='2D') + with tm.assertRaises(period.IncompatibleFrequency): + index.intersection(index3) + + def test_intersection_cases(self): + base = period_range('6/1/2000', '6/30/2000', freq='D', name='idx') + + # if target has the same name, it is preserved + rng2 = period_range('5/15/2000', '6/20/2000', freq='D', name='idx') + expected2 = period_range('6/1/2000', '6/20/2000', freq='D', + name='idx') + + # if target name is different, it will be reset + rng3 = period_range('5/15/2000', '6/20/2000', freq='D', name='other') + expected3 = period_range('6/1/2000', '6/20/2000', freq='D', + name=None) + + rng4 = period_range('7/1/2000', '7/31/2000', freq='D', name='idx') + expected4 = PeriodIndex([], name='idx', freq='D') + + for (rng, expected) in [(rng2, expected2), (rng3, expected3), + (rng4, expected4)]: + result = base.intersection(rng) + tm.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + self.assertEqual(result.freq, expected.freq) + + # non-monotonic + base = PeriodIndex(['2011-01-05', '2011-01-04', '2011-01-02', + '2011-01-03'], freq='D', name='idx') + + rng2 = PeriodIndex(['2011-01-04', '2011-01-02', + '2011-02-02', '2011-02-03'], + freq='D', name='idx') + expected2 = PeriodIndex(['2011-01-04', '2011-01-02'], freq='D', + name='idx') + + rng3 = PeriodIndex(['2011-01-04', '2011-01-02', '2011-02-02', + '2011-02-03'], + freq='D', name='other') + expected3 = PeriodIndex(['2011-01-04', '2011-01-02'], freq='D', + name=None) + + rng4 = period_range('7/1/2000', '7/31/2000', freq='D', name='idx') + expected4 = PeriodIndex([], freq='D', name='idx') + + for (rng, expected) in [(rng2, expected2), (rng3, expected3), + (rng4, expected4)]: + result = base.intersection(rng) + tm.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + self.assertEqual(result.freq, 'D') + + # empty same freq + rng = date_range('6/1/2000', '6/15/2000', freq='T') + result = rng[0:0].intersection(rng) + self.assertEqual(len(result), 0) + + result = rng.intersection(rng[0:0]) + self.assertEqual(len(result), 0) diff --git a/pandas/tests/indexes/period/test_tools.py b/pandas/tests/indexes/period/test_tools.py new file mode 100644 index 0000000000000..e09d405afd375 --- /dev/null +++ b/pandas/tests/indexes/period/test_tools.py @@ -0,0 +1,449 @@ +import numpy as np +from datetime import datetime, timedelta + +import pandas as pd +import pandas.util.testing as tm +import pandas.tseries.period as period +from pandas.compat import lrange +from pandas.tseries.frequencies import get_freq, MONTHS +from pandas._period import period_ordinal, period_asfreq +from pandas import (PeriodIndex, Period, DatetimeIndex, Timestamp, Series, + date_range, to_datetime, period_range) + + +class TestPeriodRepresentation(tm.TestCase): + """ + Wish to match NumPy units + """ + + def _check_freq(self, freq, base_date): + rng = PeriodIndex(start=base_date, periods=10, freq=freq) + exp = np.arange(10, dtype=np.int64) + self.assert_numpy_array_equal(rng._values, exp) + self.assert_numpy_array_equal(rng.asi8, exp) + + def test_annual(self): + self._check_freq('A', 1970) + + def test_monthly(self): + self._check_freq('M', '1970-01') + + def test_weekly(self): + self._check_freq('W-THU', '1970-01-01') + + def test_daily(self): + self._check_freq('D', '1970-01-01') + + def test_business_daily(self): + self._check_freq('B', '1970-01-01') + + def test_hourly(self): + self._check_freq('H', '1970-01-01') + + def test_minutely(self): + self._check_freq('T', '1970-01-01') + + def test_secondly(self): + self._check_freq('S', '1970-01-01') + + def test_millisecondly(self): + self._check_freq('L', '1970-01-01') + + def test_microsecondly(self): + self._check_freq('U', '1970-01-01') + + def test_nanosecondly(self): + self._check_freq('N', '1970-01-01') + + def test_negone_ordinals(self): + freqs = ['A', 'M', 'Q', 'D', 'H', 'T', 'S'] + + period = Period(ordinal=-1, freq='D') + for freq in freqs: + repr(period.asfreq(freq)) + + for freq in freqs: + period = Period(ordinal=-1, freq=freq) + repr(period) + self.assertEqual(period.year, 1969) + + period = Period(ordinal=-1, freq='B') + repr(period) + period = Period(ordinal=-1, freq='W') + repr(period) + + +class TestTslib(tm.TestCase): + def test_intraday_conversion_factors(self): + self.assertEqual(period_asfreq( + 1, get_freq('D'), get_freq('H'), False), 24) + self.assertEqual(period_asfreq( + 1, get_freq('D'), get_freq('T'), False), 1440) + self.assertEqual(period_asfreq( + 1, get_freq('D'), get_freq('S'), False), 86400) + self.assertEqual(period_asfreq(1, get_freq( + 'D'), get_freq('L'), False), 86400000) + self.assertEqual(period_asfreq(1, get_freq( + 'D'), get_freq('U'), False), 86400000000) + self.assertEqual(period_asfreq(1, get_freq( + 'D'), get_freq('N'), False), 86400000000000) + + self.assertEqual(period_asfreq( + 1, get_freq('H'), get_freq('T'), False), 60) + self.assertEqual(period_asfreq( + 1, get_freq('H'), get_freq('S'), False), 3600) + self.assertEqual(period_asfreq(1, get_freq('H'), + get_freq('L'), False), 3600000) + self.assertEqual(period_asfreq(1, get_freq( + 'H'), get_freq('U'), False), 3600000000) + self.assertEqual(period_asfreq(1, get_freq( + 'H'), get_freq('N'), False), 3600000000000) + + self.assertEqual(period_asfreq( + 1, get_freq('T'), get_freq('S'), False), 60) + self.assertEqual(period_asfreq( + 1, get_freq('T'), get_freq('L'), False), 60000) + self.assertEqual(period_asfreq(1, get_freq( + 'T'), get_freq('U'), False), 60000000) + self.assertEqual(period_asfreq(1, get_freq( + 'T'), get_freq('N'), False), 60000000000) + + self.assertEqual(period_asfreq( + 1, get_freq('S'), get_freq('L'), False), 1000) + self.assertEqual(period_asfreq(1, get_freq('S'), + get_freq('U'), False), 1000000) + self.assertEqual(period_asfreq(1, get_freq( + 'S'), get_freq('N'), False), 1000000000) + + self.assertEqual(period_asfreq( + 1, get_freq('L'), get_freq('U'), False), 1000) + self.assertEqual(period_asfreq(1, get_freq('L'), + get_freq('N'), False), 1000000) + + self.assertEqual(period_asfreq( + 1, get_freq('U'), get_freq('N'), False), 1000) + + def test_period_ordinal_start_values(self): + # information for 1.1.1970 + self.assertEqual(0, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, + get_freq('A'))) + self.assertEqual(0, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, + get_freq('M'))) + self.assertEqual(1, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, + get_freq('W'))) + self.assertEqual(0, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, + get_freq('D'))) + self.assertEqual(0, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, + get_freq('B'))) + + def test_period_ordinal_week(self): + self.assertEqual(1, period_ordinal(1970, 1, 4, 0, 0, 0, 0, 0, + get_freq('W'))) + self.assertEqual(2, period_ordinal(1970, 1, 5, 0, 0, 0, 0, 0, + get_freq('W'))) + + self.assertEqual(2284, period_ordinal(2013, 10, 6, 0, 0, 0, 0, 0, + get_freq('W'))) + self.assertEqual(2285, period_ordinal(2013, 10, 7, 0, 0, 0, 0, 0, + get_freq('W'))) + + def test_period_ordinal_business_day(self): + # Thursday + self.assertEqual(11415, period_ordinal(2013, 10, 3, 0, 0, 0, 0, 0, + get_freq('B'))) + # Friday + self.assertEqual(11416, period_ordinal(2013, 10, 4, 0, 0, 0, 0, 0, + get_freq('B'))) + # Saturday + self.assertEqual(11417, period_ordinal(2013, 10, 5, 0, 0, 0, 0, 0, + get_freq('B'))) + # Sunday + self.assertEqual(11417, period_ordinal(2013, 10, 6, 0, 0, 0, 0, 0, + get_freq('B'))) + # Monday + self.assertEqual(11417, period_ordinal(2013, 10, 7, 0, 0, 0, 0, 0, + get_freq('B'))) + # Tuesday + self.assertEqual(11418, period_ordinal(2013, 10, 8, 0, 0, 0, 0, 0, + get_freq('B'))) + + +class TestPeriodIndex(tm.TestCase): + + def setUp(self): + pass + + def test_tolist(self): + index = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') + rs = index.tolist() + [tm.assertIsInstance(x, Period) for x in rs] + + recon = PeriodIndex(rs) + tm.assert_index_equal(index, recon) + + def test_to_timestamp(self): + index = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') + series = Series(1, index=index, name='foo') + + exp_index = date_range('1/1/2001', end='12/31/2009', freq='A-DEC') + result = series.to_timestamp(how='end') + tm.assert_index_equal(result.index, exp_index) + self.assertEqual(result.name, 'foo') + + exp_index = date_range('1/1/2001', end='1/1/2009', freq='AS-JAN') + result = series.to_timestamp(how='start') + tm.assert_index_equal(result.index, exp_index) + + def _get_with_delta(delta, freq='A-DEC'): + return date_range(to_datetime('1/1/2001') + delta, + to_datetime('12/31/2009') + delta, freq=freq) + + delta = timedelta(hours=23) + result = series.to_timestamp('H', 'end') + exp_index = _get_with_delta(delta) + tm.assert_index_equal(result.index, exp_index) + + delta = timedelta(hours=23, minutes=59) + result = series.to_timestamp('T', 'end') + exp_index = _get_with_delta(delta) + tm.assert_index_equal(result.index, exp_index) + + result = series.to_timestamp('S', 'end') + delta = timedelta(hours=23, minutes=59, seconds=59) + exp_index = _get_with_delta(delta) + tm.assert_index_equal(result.index, exp_index) + + index = PeriodIndex(freq='H', start='1/1/2001', end='1/2/2001') + series = Series(1, index=index, name='foo') + + exp_index = date_range('1/1/2001 00:59:59', end='1/2/2001 00:59:59', + freq='H') + result = series.to_timestamp(how='end') + tm.assert_index_equal(result.index, exp_index) + self.assertEqual(result.name, 'foo') + + def test_to_timestamp_quarterly_bug(self): + years = np.arange(1960, 2000).repeat(4) + quarters = np.tile(lrange(1, 5), 40) + + pindex = PeriodIndex(year=years, quarter=quarters) + + stamps = pindex.to_timestamp('D', 'end') + expected = DatetimeIndex([x.to_timestamp('D', 'end') for x in pindex]) + tm.assert_index_equal(stamps, expected) + + def test_to_timestamp_preserve_name(self): + index = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009', + name='foo') + self.assertEqual(index.name, 'foo') + + conv = index.to_timestamp('D') + self.assertEqual(conv.name, 'foo') + + def test_to_timestamp_repr_is_code(self): + zs = [Timestamp('99-04-17 00:00:00', tz='UTC'), + Timestamp('2001-04-17 00:00:00', tz='UTC'), + Timestamp('2001-04-17 00:00:00', tz='America/Los_Angeles'), + Timestamp('2001-04-17 00:00:00', tz=None)] + for z in zs: + self.assertEqual(eval(repr(z)), z) + + def test_to_timestamp_pi_nat(self): + # GH 7228 + index = PeriodIndex(['NaT', '2011-01', '2011-02'], freq='M', + name='idx') + + result = index.to_timestamp('D') + expected = DatetimeIndex([pd.NaT, datetime(2011, 1, 1), + datetime(2011, 2, 1)], name='idx') + tm.assert_index_equal(result, expected) + self.assertEqual(result.name, 'idx') + + result2 = result.to_period(freq='M') + tm.assert_index_equal(result2, index) + self.assertEqual(result2.name, 'idx') + + result3 = result.to_period(freq='3M') + exp = PeriodIndex(['NaT', '2011-01', '2011-02'], freq='3M', name='idx') + self.assert_index_equal(result3, exp) + self.assertEqual(result3.freqstr, '3M') + + msg = ('Frequency must be positive, because it' + ' represents span: -2A') + with tm.assertRaisesRegexp(ValueError, msg): + result.to_period(freq='-2A') + + def test_to_timestamp_pi_mult(self): + idx = PeriodIndex(['2011-01', 'NaT', '2011-02'], freq='2M', name='idx') + result = idx.to_timestamp() + expected = DatetimeIndex( + ['2011-01-01', 'NaT', '2011-02-01'], name='idx') + self.assert_index_equal(result, expected) + result = idx.to_timestamp(how='E') + expected = DatetimeIndex( + ['2011-02-28', 'NaT', '2011-03-31'], name='idx') + self.assert_index_equal(result, expected) + + def test_to_timestamp_pi_combined(self): + idx = PeriodIndex(start='2011', periods=2, freq='1D1H', name='idx') + result = idx.to_timestamp() + expected = DatetimeIndex( + ['2011-01-01 00:00', '2011-01-02 01:00'], name='idx') + self.assert_index_equal(result, expected) + result = idx.to_timestamp(how='E') + expected = DatetimeIndex( + ['2011-01-02 00:59:59', '2011-01-03 01:59:59'], name='idx') + self.assert_index_equal(result, expected) + result = idx.to_timestamp(how='E', freq='H') + expected = DatetimeIndex( + ['2011-01-02 00:00', '2011-01-03 01:00'], name='idx') + self.assert_index_equal(result, expected) + + def test_to_timestamp_to_period_astype(self): + idx = DatetimeIndex([pd.NaT, '2011-01-01', '2011-02-01'], name='idx') + + res = idx.astype('period[M]') + exp = PeriodIndex(['NaT', '2011-01', '2011-02'], freq='M', name='idx') + tm.assert_index_equal(res, exp) + + res = idx.astype('period[3M]') + exp = PeriodIndex(['NaT', '2011-01', '2011-02'], freq='3M', name='idx') + self.assert_index_equal(res, exp) + + def test_dti_to_period(self): + dti = DatetimeIndex(start='1/1/2005', end='12/1/2005', freq='M') + pi1 = dti.to_period() + pi2 = dti.to_period(freq='D') + pi3 = dti.to_period(freq='3D') + + self.assertEqual(pi1[0], Period('Jan 2005', freq='M')) + self.assertEqual(pi2[0], Period('1/31/2005', freq='D')) + self.assertEqual(pi3[0], Period('1/31/2005', freq='3D')) + + self.assertEqual(pi1[-1], Period('Nov 2005', freq='M')) + self.assertEqual(pi2[-1], Period('11/30/2005', freq='D')) + self.assertEqual(pi3[-1], Period('11/30/2005', freq='3D')) + + tm.assert_index_equal(pi1, period_range('1/1/2005', '11/1/2005', + freq='M')) + tm.assert_index_equal(pi2, period_range('1/1/2005', '11/1/2005', + freq='M').asfreq('D')) + tm.assert_index_equal(pi3, period_range('1/1/2005', '11/1/2005', + freq='M').asfreq('3D')) + + def test_period_astype_to_timestamp(self): + pi = pd.PeriodIndex(['2011-01', '2011-02', '2011-03'], freq='M') + + exp = pd.DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01']) + tm.assert_index_equal(pi.astype('datetime64[ns]'), exp) + + exp = pd.DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31']) + tm.assert_index_equal(pi.astype('datetime64[ns]', how='end'), exp) + + exp = pd.DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01'], + tz='US/Eastern') + res = pi.astype('datetime64[ns, US/Eastern]') + tm.assert_index_equal(pi.astype('datetime64[ns, US/Eastern]'), exp) + + exp = pd.DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31'], + tz='US/Eastern') + res = pi.astype('datetime64[ns, US/Eastern]', how='end') + tm.assert_index_equal(res, exp) + + def test_to_period_quarterly(self): + # make sure we can make the round trip + for month in MONTHS: + freq = 'Q-%s' % month + rng = period_range('1989Q3', '1991Q3', freq=freq) + stamps = rng.to_timestamp() + result = stamps.to_period(freq) + tm.assert_index_equal(rng, result) + + def test_to_period_quarterlyish(self): + offsets = ['BQ', 'QS', 'BQS'] + for off in offsets: + rng = date_range('01-Jan-2012', periods=8, freq=off) + prng = rng.to_period() + self.assertEqual(prng.freq, 'Q-DEC') + + def test_to_period_annualish(self): + offsets = ['BA', 'AS', 'BAS'] + for off in offsets: + rng = date_range('01-Jan-2012', periods=8, freq=off) + prng = rng.to_period() + self.assertEqual(prng.freq, 'A-DEC') + + def test_to_period_monthish(self): + offsets = ['MS', 'BM'] + for off in offsets: + rng = date_range('01-Jan-2012', periods=8, freq=off) + prng = rng.to_period() + self.assertEqual(prng.freq, 'M') + + rng = date_range('01-Jan-2012', periods=8, freq='M') + prng = rng.to_period() + self.assertEqual(prng.freq, 'M') + + msg = pd.tseries.frequencies._INVALID_FREQ_ERROR + with self.assertRaisesRegexp(ValueError, msg): + date_range('01-Jan-2012', periods=8, freq='EOM') + + def test_period_dt64_round_trip(self): + dti = date_range('1/1/2000', '1/7/2002', freq='B') + pi = dti.to_period() + tm.assert_index_equal(pi.to_timestamp(), dti) + + dti = date_range('1/1/2000', '1/7/2002', freq='B') + pi = dti.to_period(freq='H') + tm.assert_index_equal(pi.to_timestamp(), dti) + + def test_to_timestamp_1703(self): + index = period_range('1/1/2012', periods=4, freq='D') + + result = index.to_timestamp() + self.assertEqual(result[0], Timestamp('1/1/2012')) + + def test_to_datetime_depr(self): + index = period_range('1/1/2012', periods=4, freq='D') + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = index.to_datetime() + self.assertEqual(result[0], Timestamp('1/1/2012')) + + def test_combine_first(self): + # GH 3367 + didx = pd.DatetimeIndex(start='1950-01-31', end='1950-07-31', freq='M') + pidx = pd.PeriodIndex(start=pd.Period('1950-1'), + end=pd.Period('1950-7'), freq='M') + # check to be consistent with DatetimeIndex + for idx in [didx, pidx]: + a = pd.Series([1, np.nan, np.nan, 4, 5, np.nan, 7], index=idx) + b = pd.Series([9, 9, 9, 9, 9, 9, 9], index=idx) + + result = a.combine_first(b) + expected = pd.Series([1, 9, 9, 4, 5, 9, 7], index=idx, + dtype=np.float64) + tm.assert_series_equal(result, expected) + + def test_searchsorted(self): + for freq in ['D', '2D']: + pidx = pd.PeriodIndex(['2014-01-01', '2014-01-02', '2014-01-03', + '2014-01-04', '2014-01-05'], freq=freq) + + p1 = pd.Period('2014-01-01', freq=freq) + self.assertEqual(pidx.searchsorted(p1), 0) + + p2 = pd.Period('2014-01-04', freq=freq) + self.assertEqual(pidx.searchsorted(p2), 3) + + msg = "Input has different freq=H from PeriodIndex" + with self.assertRaisesRegexp(period.IncompatibleFrequency, msg): + pidx.searchsorted(pd.Period('2014-01-01', freq='H')) + + msg = "Input has different freq=5D from PeriodIndex" + with self.assertRaisesRegexp(period.IncompatibleFrequency, msg): + pidx.searchsorted(pd.Period('2014-01-01', freq='5D')) + + with tm.assert_produces_warning(FutureWarning): + pidx.searchsorted(key=p2) diff --git a/pandas/tests/scalar/test_period.py b/pandas/tests/scalar/test_period.py new file mode 100644 index 0000000000000..c94a7c62a6dc9 --- /dev/null +++ b/pandas/tests/scalar/test_period.py @@ -0,0 +1,2074 @@ +import numpy as np +from datetime import datetime, date, timedelta + +import pandas as pd +import pandas.util.testing as tm +import pandas.tseries.period as period +from pandas.compat import text_type, iteritems +from pandas.compat.numpy import np_datetime64_compat +from pandas import Period, Timestamp, tslib, offsets, _period +from pandas.tseries.frequencies import DAYS, MONTHS, _period_code_map + + +class TestPeriodProperties(tm.TestCase): + "Test properties such as year, month, weekday, etc...." + + def test_quarterly_negative_ordinals(self): + p = Period(ordinal=-1, freq='Q-DEC') + self.assertEqual(p.year, 1969) + self.assertEqual(p.quarter, 4) + self.assertIsInstance(p, Period) + + p = Period(ordinal=-2, freq='Q-DEC') + self.assertEqual(p.year, 1969) + self.assertEqual(p.quarter, 3) + self.assertIsInstance(p, Period) + + p = Period(ordinal=-2, freq='M') + self.assertEqual(p.year, 1969) + self.assertEqual(p.month, 11) + self.assertIsInstance(p, Period) + + def test_period_cons_quarterly(self): + # bugs in scikits.timeseries + for month in MONTHS: + freq = 'Q-%s' % month + exp = Period('1989Q3', freq=freq) + self.assertIn('1989Q3', str(exp)) + stamp = exp.to_timestamp('D', how='end') + p = Period(stamp, freq=freq) + self.assertEqual(p, exp) + + stamp = exp.to_timestamp('3D', how='end') + p = Period(stamp, freq=freq) + self.assertEqual(p, exp) + + def test_period_cons_annual(self): + # bugs in scikits.timeseries + for month in MONTHS: + freq = 'A-%s' % month + exp = Period('1989', freq=freq) + stamp = exp.to_timestamp('D', how='end') + timedelta(days=30) + p = Period(stamp, freq=freq) + self.assertEqual(p, exp + 1) + self.assertIsInstance(p, Period) + + def test_period_cons_weekly(self): + for num in range(10, 17): + daystr = '2011-02-%d' % num + for day in DAYS: + freq = 'W-%s' % day + + result = Period(daystr, freq=freq) + expected = Period(daystr, freq='D').asfreq(freq) + self.assertEqual(result, expected) + self.assertIsInstance(result, Period) + + def test_period_from_ordinal(self): + p = pd.Period('2011-01', freq='M') + res = pd.Period._from_ordinal(p.ordinal, freq='M') + self.assertEqual(p, res) + self.assertIsInstance(res, Period) + + def test_period_cons_nat(self): + p = Period('NaT', freq='M') + self.assertIs(p, pd.NaT) + + p = Period('nat', freq='W-SUN') + self.assertIs(p, pd.NaT) + + p = Period(tslib.iNaT, freq='D') + self.assertIs(p, pd.NaT) + + p = Period(tslib.iNaT, freq='3D') + self.assertIs(p, pd.NaT) + + p = Period(tslib.iNaT, freq='1D1H') + self.assertIs(p, pd.NaT) + + p = Period('NaT') + self.assertIs(p, pd.NaT) + + p = Period(tslib.iNaT) + self.assertIs(p, pd.NaT) + + def test_cons_null_like(self): + # check Timestamp compat + self.assertIs(Timestamp('NaT'), pd.NaT) + self.assertIs(Period('NaT'), pd.NaT) + + self.assertIs(Timestamp(None), pd.NaT) + self.assertIs(Period(None), pd.NaT) + + self.assertIs(Timestamp(float('nan')), pd.NaT) + self.assertIs(Period(float('nan')), pd.NaT) + + self.assertIs(Timestamp(np.nan), pd.NaT) + self.assertIs(Period(np.nan), pd.NaT) + + def test_period_cons_mult(self): + p1 = Period('2011-01', freq='3M') + p2 = Period('2011-01', freq='M') + self.assertEqual(p1.ordinal, p2.ordinal) + + self.assertEqual(p1.freq, offsets.MonthEnd(3)) + self.assertEqual(p1.freqstr, '3M') + + self.assertEqual(p2.freq, offsets.MonthEnd()) + self.assertEqual(p2.freqstr, 'M') + + result = p1 + 1 + self.assertEqual(result.ordinal, (p2 + 3).ordinal) + self.assertEqual(result.freq, p1.freq) + self.assertEqual(result.freqstr, '3M') + + result = p1 - 1 + self.assertEqual(result.ordinal, (p2 - 3).ordinal) + self.assertEqual(result.freq, p1.freq) + self.assertEqual(result.freqstr, '3M') + + msg = ('Frequency must be positive, because it' + ' represents span: -3M') + with tm.assertRaisesRegexp(ValueError, msg): + Period('2011-01', freq='-3M') + + msg = ('Frequency must be positive, because it' ' represents span: 0M') + with tm.assertRaisesRegexp(ValueError, msg): + Period('2011-01', freq='0M') + + def test_period_cons_combined(self): + p = [(Period('2011-01', freq='1D1H'), + Period('2011-01', freq='1H1D'), + Period('2011-01', freq='H')), + (Period(ordinal=1, freq='1D1H'), + Period(ordinal=1, freq='1H1D'), + Period(ordinal=1, freq='H'))] + + for p1, p2, p3 in p: + self.assertEqual(p1.ordinal, p3.ordinal) + self.assertEqual(p2.ordinal, p3.ordinal) + + self.assertEqual(p1.freq, offsets.Hour(25)) + self.assertEqual(p1.freqstr, '25H') + + self.assertEqual(p2.freq, offsets.Hour(25)) + self.assertEqual(p2.freqstr, '25H') + + self.assertEqual(p3.freq, offsets.Hour()) + self.assertEqual(p3.freqstr, 'H') + + result = p1 + 1 + self.assertEqual(result.ordinal, (p3 + 25).ordinal) + self.assertEqual(result.freq, p1.freq) + self.assertEqual(result.freqstr, '25H') + + result = p2 + 1 + self.assertEqual(result.ordinal, (p3 + 25).ordinal) + self.assertEqual(result.freq, p2.freq) + self.assertEqual(result.freqstr, '25H') + + result = p1 - 1 + self.assertEqual(result.ordinal, (p3 - 25).ordinal) + self.assertEqual(result.freq, p1.freq) + self.assertEqual(result.freqstr, '25H') + + result = p2 - 1 + self.assertEqual(result.ordinal, (p3 - 25).ordinal) + self.assertEqual(result.freq, p2.freq) + self.assertEqual(result.freqstr, '25H') + + msg = ('Frequency must be positive, because it' + ' represents span: -25H') + with tm.assertRaisesRegexp(ValueError, msg): + Period('2011-01', freq='-1D1H') + with tm.assertRaisesRegexp(ValueError, msg): + Period('2011-01', freq='-1H1D') + with tm.assertRaisesRegexp(ValueError, msg): + Period(ordinal=1, freq='-1D1H') + with tm.assertRaisesRegexp(ValueError, msg): + Period(ordinal=1, freq='-1H1D') + + msg = ('Frequency must be positive, because it' + ' represents span: 0D') + with tm.assertRaisesRegexp(ValueError, msg): + Period('2011-01', freq='0D0H') + with tm.assertRaisesRegexp(ValueError, msg): + Period(ordinal=1, freq='0D0H') + + # You can only combine together day and intraday offsets + msg = ('Invalid frequency: 1W1D') + with tm.assertRaisesRegexp(ValueError, msg): + Period('2011-01', freq='1W1D') + msg = ('Invalid frequency: 1D1W') + with tm.assertRaisesRegexp(ValueError, msg): + Period('2011-01', freq='1D1W') + + def test_timestamp_tz_arg(self): + tm._skip_if_no_pytz() + import pytz + for case in ['Europe/Brussels', 'Asia/Tokyo', 'US/Pacific']: + p = Period('1/1/2005', freq='M').to_timestamp(tz=case) + exp = Timestamp('1/1/2005', tz='UTC').tz_convert(case) + exp_zone = pytz.timezone(case).normalize(p) + + self.assertEqual(p, exp) + self.assertEqual(p.tz, exp_zone.tzinfo) + self.assertEqual(p.tz, exp.tz) + + p = Period('1/1/2005', freq='3H').to_timestamp(tz=case) + exp = Timestamp('1/1/2005', tz='UTC').tz_convert(case) + exp_zone = pytz.timezone(case).normalize(p) + + self.assertEqual(p, exp) + self.assertEqual(p.tz, exp_zone.tzinfo) + self.assertEqual(p.tz, exp.tz) + + p = Period('1/1/2005', freq='A').to_timestamp(freq='A', tz=case) + exp = Timestamp('31/12/2005', tz='UTC').tz_convert(case) + exp_zone = pytz.timezone(case).normalize(p) + + self.assertEqual(p, exp) + self.assertEqual(p.tz, exp_zone.tzinfo) + self.assertEqual(p.tz, exp.tz) + + p = Period('1/1/2005', freq='A').to_timestamp(freq='3H', tz=case) + exp = Timestamp('1/1/2005', tz='UTC').tz_convert(case) + exp_zone = pytz.timezone(case).normalize(p) + + self.assertEqual(p, exp) + self.assertEqual(p.tz, exp_zone.tzinfo) + self.assertEqual(p.tz, exp.tz) + + def test_timestamp_tz_arg_dateutil(self): + from pandas.tslib import _dateutil_gettz as gettz + from pandas.tslib import maybe_get_tz + for case in ['dateutil/Europe/Brussels', 'dateutil/Asia/Tokyo', + 'dateutil/US/Pacific']: + p = Period('1/1/2005', freq='M').to_timestamp( + tz=maybe_get_tz(case)) + exp = Timestamp('1/1/2005', tz='UTC').tz_convert(case) + self.assertEqual(p, exp) + self.assertEqual(p.tz, gettz(case.split('/', 1)[1])) + self.assertEqual(p.tz, exp.tz) + + p = Period('1/1/2005', + freq='M').to_timestamp(freq='3H', tz=maybe_get_tz(case)) + exp = Timestamp('1/1/2005', tz='UTC').tz_convert(case) + self.assertEqual(p, exp) + self.assertEqual(p.tz, gettz(case.split('/', 1)[1])) + self.assertEqual(p.tz, exp.tz) + + def test_timestamp_tz_arg_dateutil_from_string(self): + from pandas.tslib import _dateutil_gettz as gettz + p = Period('1/1/2005', + freq='M').to_timestamp(tz='dateutil/Europe/Brussels') + self.assertEqual(p.tz, gettz('Europe/Brussels')) + + def test_timestamp_mult(self): + p = pd.Period('2011-01', freq='M') + self.assertEqual(p.to_timestamp(how='S'), pd.Timestamp('2011-01-01')) + self.assertEqual(p.to_timestamp(how='E'), pd.Timestamp('2011-01-31')) + + p = pd.Period('2011-01', freq='3M') + self.assertEqual(p.to_timestamp(how='S'), pd.Timestamp('2011-01-01')) + self.assertEqual(p.to_timestamp(how='E'), pd.Timestamp('2011-03-31')) + + def test_period_constructor(self): + i1 = Period('1/1/2005', freq='M') + i2 = Period('Jan 2005') + + self.assertEqual(i1, i2) + + i1 = Period('2005', freq='A') + i2 = Period('2005') + i3 = Period('2005', freq='a') + + self.assertEqual(i1, i2) + self.assertEqual(i1, i3) + + i4 = Period('2005', freq='M') + i5 = Period('2005', freq='m') + + self.assertRaises(ValueError, i1.__ne__, i4) + self.assertEqual(i4, i5) + + i1 = Period.now('Q') + i2 = Period(datetime.now(), freq='Q') + i3 = Period.now('q') + + self.assertEqual(i1, i2) + self.assertEqual(i1, i3) + + # Biz day construction, roll forward if non-weekday + i1 = Period('3/10/12', freq='B') + i2 = Period('3/10/12', freq='D') + self.assertEqual(i1, i2.asfreq('B')) + i2 = Period('3/11/12', freq='D') + self.assertEqual(i1, i2.asfreq('B')) + i2 = Period('3/12/12', freq='D') + self.assertEqual(i1, i2.asfreq('B')) + + i3 = Period('3/10/12', freq='b') + self.assertEqual(i1, i3) + + i1 = Period(year=2005, quarter=1, freq='Q') + i2 = Period('1/1/2005', freq='Q') + self.assertEqual(i1, i2) + + i1 = Period(year=2005, quarter=3, freq='Q') + i2 = Period('9/1/2005', freq='Q') + self.assertEqual(i1, i2) + + i1 = Period(year=2005, month=3, day=1, freq='D') + i2 = Period('3/1/2005', freq='D') + self.assertEqual(i1, i2) + + i3 = Period(year=2005, month=3, day=1, freq='d') + self.assertEqual(i1, i3) + + i1 = Period(year=2012, month=3, day=10, freq='B') + i2 = Period('3/12/12', freq='B') + self.assertEqual(i1, i2) + + i1 = Period('2005Q1') + i2 = Period(year=2005, quarter=1, freq='Q') + i3 = Period('2005q1') + self.assertEqual(i1, i2) + self.assertEqual(i1, i3) + + i1 = Period('05Q1') + self.assertEqual(i1, i2) + lower = Period('05q1') + self.assertEqual(i1, lower) + + i1 = Period('1Q2005') + self.assertEqual(i1, i2) + lower = Period('1q2005') + self.assertEqual(i1, lower) + + i1 = Period('1Q05') + self.assertEqual(i1, i2) + lower = Period('1q05') + self.assertEqual(i1, lower) + + i1 = Period('4Q1984') + self.assertEqual(i1.year, 1984) + lower = Period('4q1984') + self.assertEqual(i1, lower) + + i1 = Period('1982', freq='min') + i2 = Period('1982', freq='MIN') + self.assertEqual(i1, i2) + i2 = Period('1982', freq=('Min', 1)) + self.assertEqual(i1, i2) + + expected = Period('2007-01', freq='M') + i1 = Period('200701', freq='M') + self.assertEqual(i1, expected) + + i1 = Period('200701', freq='M') + self.assertEqual(i1, expected) + + i1 = Period(200701, freq='M') + self.assertEqual(i1, expected) + + i1 = Period(ordinal=200701, freq='M') + self.assertEqual(i1.year, 18695) + + i1 = Period(datetime(2007, 1, 1), freq='M') + i2 = Period('200701', freq='M') + self.assertEqual(i1, i2) + + i1 = Period(date(2007, 1, 1), freq='M') + i2 = Period(datetime(2007, 1, 1), freq='M') + i3 = Period(np.datetime64('2007-01-01'), freq='M') + i4 = Period(np_datetime64_compat('2007-01-01 00:00:00Z'), freq='M') + i5 = Period(np_datetime64_compat('2007-01-01 00:00:00.000Z'), freq='M') + self.assertEqual(i1, i2) + self.assertEqual(i1, i3) + self.assertEqual(i1, i4) + self.assertEqual(i1, i5) + + i1 = Period('2007-01-01 09:00:00.001') + expected = Period(datetime(2007, 1, 1, 9, 0, 0, 1000), freq='L') + self.assertEqual(i1, expected) + + expected = Period(np_datetime64_compat( + '2007-01-01 09:00:00.001Z'), freq='L') + self.assertEqual(i1, expected) + + i1 = Period('2007-01-01 09:00:00.00101') + expected = Period(datetime(2007, 1, 1, 9, 0, 0, 1010), freq='U') + self.assertEqual(i1, expected) + + expected = Period(np_datetime64_compat('2007-01-01 09:00:00.00101Z'), + freq='U') + self.assertEqual(i1, expected) + + self.assertRaises(ValueError, Period, ordinal=200701) + + self.assertRaises(ValueError, Period, '2007-1-1', freq='X') + + def test_period_constructor_offsets(self): + self.assertEqual(Period('1/1/2005', freq=offsets.MonthEnd()), + Period('1/1/2005', freq='M')) + self.assertEqual(Period('2005', freq=offsets.YearEnd()), + Period('2005', freq='A')) + self.assertEqual(Period('2005', freq=offsets.MonthEnd()), + Period('2005', freq='M')) + self.assertEqual(Period('3/10/12', freq=offsets.BusinessDay()), + Period('3/10/12', freq='B')) + self.assertEqual(Period('3/10/12', freq=offsets.Day()), + Period('3/10/12', freq='D')) + + self.assertEqual(Period(year=2005, quarter=1, + freq=offsets.QuarterEnd(startingMonth=12)), + Period(year=2005, quarter=1, freq='Q')) + self.assertEqual(Period(year=2005, quarter=2, + freq=offsets.QuarterEnd(startingMonth=12)), + Period(year=2005, quarter=2, freq='Q')) + + self.assertEqual(Period(year=2005, month=3, day=1, freq=offsets.Day()), + Period(year=2005, month=3, day=1, freq='D')) + self.assertEqual(Period(year=2012, month=3, day=10, + freq=offsets.BDay()), + Period(year=2012, month=3, day=10, freq='B')) + + expected = Period('2005-03-01', freq='3D') + self.assertEqual(Period(year=2005, month=3, day=1, + freq=offsets.Day(3)), expected) + self.assertEqual(Period(year=2005, month=3, day=1, freq='3D'), + expected) + + self.assertEqual(Period(year=2012, month=3, day=10, + freq=offsets.BDay(3)), + Period(year=2012, month=3, day=10, freq='3B')) + + self.assertEqual(Period(200701, freq=offsets.MonthEnd()), + Period(200701, freq='M')) + + i1 = Period(ordinal=200701, freq=offsets.MonthEnd()) + i2 = Period(ordinal=200701, freq='M') + self.assertEqual(i1, i2) + self.assertEqual(i1.year, 18695) + self.assertEqual(i2.year, 18695) + + i1 = Period(datetime(2007, 1, 1), freq='M') + i2 = Period('200701', freq='M') + self.assertEqual(i1, i2) + + i1 = Period(date(2007, 1, 1), freq='M') + i2 = Period(datetime(2007, 1, 1), freq='M') + i3 = Period(np.datetime64('2007-01-01'), freq='M') + i4 = Period(np_datetime64_compat('2007-01-01 00:00:00Z'), freq='M') + i5 = Period(np_datetime64_compat('2007-01-01 00:00:00.000Z'), freq='M') + self.assertEqual(i1, i2) + self.assertEqual(i1, i3) + self.assertEqual(i1, i4) + self.assertEqual(i1, i5) + + i1 = Period('2007-01-01 09:00:00.001') + expected = Period(datetime(2007, 1, 1, 9, 0, 0, 1000), freq='L') + self.assertEqual(i1, expected) + + expected = Period(np_datetime64_compat( + '2007-01-01 09:00:00.001Z'), freq='L') + self.assertEqual(i1, expected) + + i1 = Period('2007-01-01 09:00:00.00101') + expected = Period(datetime(2007, 1, 1, 9, 0, 0, 1010), freq='U') + self.assertEqual(i1, expected) + + expected = Period(np_datetime64_compat('2007-01-01 09:00:00.00101Z'), + freq='U') + self.assertEqual(i1, expected) + + self.assertRaises(ValueError, Period, ordinal=200701) + + self.assertRaises(ValueError, Period, '2007-1-1', freq='X') + + def test_freq_str(self): + i1 = Period('1982', freq='Min') + self.assertEqual(i1.freq, offsets.Minute()) + self.assertEqual(i1.freqstr, 'T') + + def test_period_deprecated_freq(self): + cases = {"M": ["MTH", "MONTH", "MONTHLY", "Mth", "month", "monthly"], + "B": ["BUS", "BUSINESS", "BUSINESSLY", "WEEKDAY", "bus"], + "D": ["DAY", "DLY", "DAILY", "Day", "Dly", "Daily"], + "H": ["HR", "HOUR", "HRLY", "HOURLY", "hr", "Hour", "HRly"], + "T": ["minute", "MINUTE", "MINUTELY", "minutely"], + "S": ["sec", "SEC", "SECOND", "SECONDLY", "second"], + "L": ["MILLISECOND", "MILLISECONDLY", "millisecond"], + "U": ["MICROSECOND", "MICROSECONDLY", "microsecond"], + "N": ["NANOSECOND", "NANOSECONDLY", "nanosecond"]} + + msg = pd.tseries.frequencies._INVALID_FREQ_ERROR + for exp, freqs in iteritems(cases): + for freq in freqs: + with self.assertRaisesRegexp(ValueError, msg): + Period('2016-03-01 09:00', freq=freq) + with self.assertRaisesRegexp(ValueError, msg): + Period(ordinal=1, freq=freq) + + # check supported freq-aliases still works + p1 = Period('2016-03-01 09:00', freq=exp) + p2 = Period(ordinal=1, freq=exp) + tm.assertIsInstance(p1, Period) + tm.assertIsInstance(p2, Period) + + def test_hash(self): + self.assertEqual(hash(Period('2011-01', freq='M')), + hash(Period('2011-01', freq='M'))) + + self.assertNotEqual(hash(Period('2011-01-01', freq='D')), + hash(Period('2011-01', freq='M'))) + + self.assertNotEqual(hash(Period('2011-01', freq='3M')), + hash(Period('2011-01', freq='2M'))) + + self.assertNotEqual(hash(Period('2011-01', freq='M')), + hash(Period('2011-02', freq='M'))) + + def test_repr(self): + p = Period('Jan-2000') + self.assertIn('2000-01', repr(p)) + + p = Period('2000-12-15') + self.assertIn('2000-12-15', repr(p)) + + def test_repr_nat(self): + p = Period('nat', freq='M') + self.assertIn(repr(tslib.NaT), repr(p)) + + def test_millisecond_repr(self): + p = Period('2000-01-01 12:15:02.123') + + self.assertEqual("Period('2000-01-01 12:15:02.123', 'L')", repr(p)) + + def test_microsecond_repr(self): + p = Period('2000-01-01 12:15:02.123567') + + self.assertEqual("Period('2000-01-01 12:15:02.123567', 'U')", repr(p)) + + def test_strftime(self): + p = Period('2000-1-1 12:34:12', freq='S') + res = p.strftime('%Y-%m-%d %H:%M:%S') + self.assertEqual(res, '2000-01-01 12:34:12') + tm.assertIsInstance(res, text_type) # GH3363 + + def test_sub_delta(self): + left, right = Period('2011', freq='A'), Period('2007', freq='A') + result = left - right + self.assertEqual(result, 4) + + with self.assertRaises(period.IncompatibleFrequency): + left - Period('2007-01', freq='M') + + def test_to_timestamp(self): + p = Period('1982', freq='A') + start_ts = p.to_timestamp(how='S') + aliases = ['s', 'StarT', 'BEGIn'] + for a in aliases: + self.assertEqual(start_ts, p.to_timestamp('D', how=a)) + # freq with mult should not affect to the result + self.assertEqual(start_ts, p.to_timestamp('3D', how=a)) + + end_ts = p.to_timestamp(how='E') + aliases = ['e', 'end', 'FINIsH'] + for a in aliases: + self.assertEqual(end_ts, p.to_timestamp('D', how=a)) + self.assertEqual(end_ts, p.to_timestamp('3D', how=a)) + + from_lst = ['A', 'Q', 'M', 'W', 'B', 'D', 'H', 'Min', 'S'] + + def _ex(p): + return Timestamp((p + 1).start_time.value - 1) + + for i, fcode in enumerate(from_lst): + p = Period('1982', freq=fcode) + result = p.to_timestamp().to_period(fcode) + self.assertEqual(result, p) + + self.assertEqual(p.start_time, p.to_timestamp(how='S')) + + self.assertEqual(p.end_time, _ex(p)) + + # Frequency other than daily + + p = Period('1985', freq='A') + + result = p.to_timestamp('H', how='end') + expected = datetime(1985, 12, 31, 23) + self.assertEqual(result, expected) + result = p.to_timestamp('3H', how='end') + self.assertEqual(result, expected) + + result = p.to_timestamp('T', how='end') + expected = datetime(1985, 12, 31, 23, 59) + self.assertEqual(result, expected) + result = p.to_timestamp('2T', how='end') + self.assertEqual(result, expected) + + result = p.to_timestamp(how='end') + expected = datetime(1985, 12, 31) + self.assertEqual(result, expected) + + expected = datetime(1985, 1, 1) + result = p.to_timestamp('H', how='start') + self.assertEqual(result, expected) + result = p.to_timestamp('T', how='start') + self.assertEqual(result, expected) + result = p.to_timestamp('S', how='start') + self.assertEqual(result, expected) + result = p.to_timestamp('3H', how='start') + self.assertEqual(result, expected) + result = p.to_timestamp('5S', how='start') + self.assertEqual(result, expected) + + def test_start_time(self): + freq_lst = ['A', 'Q', 'M', 'D', 'H', 'T', 'S'] + xp = datetime(2012, 1, 1) + for f in freq_lst: + p = Period('2012', freq=f) + self.assertEqual(p.start_time, xp) + self.assertEqual(Period('2012', freq='B').start_time, + datetime(2012, 1, 2)) + self.assertEqual(Period('2012', freq='W').start_time, + datetime(2011, 12, 26)) + + def test_end_time(self): + p = Period('2012', freq='A') + + def _ex(*args): + return Timestamp(Timestamp(datetime(*args)).value - 1) + + xp = _ex(2013, 1, 1) + self.assertEqual(xp, p.end_time) + + p = Period('2012', freq='Q') + xp = _ex(2012, 4, 1) + self.assertEqual(xp, p.end_time) + + p = Period('2012', freq='M') + xp = _ex(2012, 2, 1) + self.assertEqual(xp, p.end_time) + + p = Period('2012', freq='D') + xp = _ex(2012, 1, 2) + self.assertEqual(xp, p.end_time) + + p = Period('2012', freq='H') + xp = _ex(2012, 1, 1, 1) + self.assertEqual(xp, p.end_time) + + p = Period('2012', freq='B') + xp = _ex(2012, 1, 3) + self.assertEqual(xp, p.end_time) + + p = Period('2012', freq='W') + xp = _ex(2012, 1, 2) + self.assertEqual(xp, p.end_time) + + # Test for GH 11738 + p = Period('2012', freq='15D') + xp = _ex(2012, 1, 16) + self.assertEqual(xp, p.end_time) + + p = Period('2012', freq='1D1H') + xp = _ex(2012, 1, 2, 1) + self.assertEqual(xp, p.end_time) + + p = Period('2012', freq='1H1D') + xp = _ex(2012, 1, 2, 1) + self.assertEqual(xp, p.end_time) + + def test_anchor_week_end_time(self): + def _ex(*args): + return Timestamp(Timestamp(datetime(*args)).value - 1) + + p = Period('2013-1-1', 'W-SAT') + xp = _ex(2013, 1, 6) + self.assertEqual(p.end_time, xp) + + def test_properties_annually(self): + # Test properties on Periods with annually frequency. + a_date = Period(freq='A', year=2007) + self.assertEqual(a_date.year, 2007) + + def test_properties_quarterly(self): + # Test properties on Periods with daily frequency. + qedec_date = Period(freq="Q-DEC", year=2007, quarter=1) + qejan_date = Period(freq="Q-JAN", year=2007, quarter=1) + qejun_date = Period(freq="Q-JUN", year=2007, quarter=1) + # + for x in range(3): + for qd in (qedec_date, qejan_date, qejun_date): + self.assertEqual((qd + x).qyear, 2007) + self.assertEqual((qd + x).quarter, x + 1) + + def test_properties_monthly(self): + # Test properties on Periods with daily frequency. + m_date = Period(freq='M', year=2007, month=1) + for x in range(11): + m_ival_x = m_date + x + self.assertEqual(m_ival_x.year, 2007) + if 1 <= x + 1 <= 3: + self.assertEqual(m_ival_x.quarter, 1) + elif 4 <= x + 1 <= 6: + self.assertEqual(m_ival_x.quarter, 2) + elif 7 <= x + 1 <= 9: + self.assertEqual(m_ival_x.quarter, 3) + elif 10 <= x + 1 <= 12: + self.assertEqual(m_ival_x.quarter, 4) + self.assertEqual(m_ival_x.month, x + 1) + + def test_properties_weekly(self): + # Test properties on Periods with daily frequency. + w_date = Period(freq='W', year=2007, month=1, day=7) + # + self.assertEqual(w_date.year, 2007) + self.assertEqual(w_date.quarter, 1) + self.assertEqual(w_date.month, 1) + self.assertEqual(w_date.week, 1) + self.assertEqual((w_date - 1).week, 52) + self.assertEqual(w_date.days_in_month, 31) + self.assertEqual(Period(freq='W', year=2012, + month=2, day=1).days_in_month, 29) + + def test_properties_weekly_legacy(self): + # Test properties on Periods with daily frequency. + w_date = Period(freq='W', year=2007, month=1, day=7) + self.assertEqual(w_date.year, 2007) + self.assertEqual(w_date.quarter, 1) + self.assertEqual(w_date.month, 1) + self.assertEqual(w_date.week, 1) + self.assertEqual((w_date - 1).week, 52) + self.assertEqual(w_date.days_in_month, 31) + + exp = Period(freq='W', year=2012, month=2, day=1) + self.assertEqual(exp.days_in_month, 29) + + msg = pd.tseries.frequencies._INVALID_FREQ_ERROR + with self.assertRaisesRegexp(ValueError, msg): + Period(freq='WK', year=2007, month=1, day=7) + + def test_properties_daily(self): + # Test properties on Periods with daily frequency. + b_date = Period(freq='B', year=2007, month=1, day=1) + # + self.assertEqual(b_date.year, 2007) + self.assertEqual(b_date.quarter, 1) + self.assertEqual(b_date.month, 1) + self.assertEqual(b_date.day, 1) + self.assertEqual(b_date.weekday, 0) + self.assertEqual(b_date.dayofyear, 1) + self.assertEqual(b_date.days_in_month, 31) + self.assertEqual(Period(freq='B', year=2012, + month=2, day=1).days_in_month, 29) + # + d_date = Period(freq='D', year=2007, month=1, day=1) + # + self.assertEqual(d_date.year, 2007) + self.assertEqual(d_date.quarter, 1) + self.assertEqual(d_date.month, 1) + self.assertEqual(d_date.day, 1) + self.assertEqual(d_date.weekday, 0) + self.assertEqual(d_date.dayofyear, 1) + self.assertEqual(d_date.days_in_month, 31) + self.assertEqual(Period(freq='D', year=2012, month=2, + day=1).days_in_month, 29) + + def test_properties_hourly(self): + # Test properties on Periods with hourly frequency. + h_date1 = Period(freq='H', year=2007, month=1, day=1, hour=0) + h_date2 = Period(freq='2H', year=2007, month=1, day=1, hour=0) + + for h_date in [h_date1, h_date2]: + self.assertEqual(h_date.year, 2007) + self.assertEqual(h_date.quarter, 1) + self.assertEqual(h_date.month, 1) + self.assertEqual(h_date.day, 1) + self.assertEqual(h_date.weekday, 0) + self.assertEqual(h_date.dayofyear, 1) + self.assertEqual(h_date.hour, 0) + self.assertEqual(h_date.days_in_month, 31) + self.assertEqual(Period(freq='H', year=2012, month=2, day=1, + hour=0).days_in_month, 29) + + def test_properties_minutely(self): + # Test properties on Periods with minutely frequency. + t_date = Period(freq='Min', year=2007, month=1, day=1, hour=0, + minute=0) + # + self.assertEqual(t_date.quarter, 1) + self.assertEqual(t_date.month, 1) + self.assertEqual(t_date.day, 1) + self.assertEqual(t_date.weekday, 0) + self.assertEqual(t_date.dayofyear, 1) + self.assertEqual(t_date.hour, 0) + self.assertEqual(t_date.minute, 0) + self.assertEqual(t_date.days_in_month, 31) + self.assertEqual(Period(freq='D', year=2012, month=2, day=1, hour=0, + minute=0).days_in_month, 29) + + def test_properties_secondly(self): + # Test properties on Periods with secondly frequency. + s_date = Period(freq='Min', year=2007, month=1, day=1, hour=0, + minute=0, second=0) + # + self.assertEqual(s_date.year, 2007) + self.assertEqual(s_date.quarter, 1) + self.assertEqual(s_date.month, 1) + self.assertEqual(s_date.day, 1) + self.assertEqual(s_date.weekday, 0) + self.assertEqual(s_date.dayofyear, 1) + self.assertEqual(s_date.hour, 0) + self.assertEqual(s_date.minute, 0) + self.assertEqual(s_date.second, 0) + self.assertEqual(s_date.days_in_month, 31) + self.assertEqual(Period(freq='Min', year=2012, month=2, day=1, hour=0, + minute=0, second=0).days_in_month, 29) + + def test_properties_nat(self): + p_nat = Period('NaT', freq='M') + t_nat = pd.Timestamp('NaT') + self.assertIs(p_nat, t_nat) + + # confirm Period('NaT') work identical with Timestamp('NaT') + for f in ['year', 'month', 'day', 'hour', 'minute', 'second', 'week', + 'dayofyear', 'quarter', 'days_in_month']: + self.assertTrue(np.isnan(getattr(p_nat, f))) + self.assertTrue(np.isnan(getattr(t_nat, f))) + + def test_pnow(self): + dt = datetime.now() + + val = period.pnow('D') + exp = Period(dt, freq='D') + self.assertEqual(val, exp) + + val2 = period.pnow('2D') + exp2 = Period(dt, freq='2D') + self.assertEqual(val2, exp2) + self.assertEqual(val.ordinal, val2.ordinal) + self.assertEqual(val.ordinal, exp2.ordinal) + + def test_constructor_corner(self): + expected = Period('2007-01', freq='2M') + self.assertEqual(Period(year=2007, month=1, freq='2M'), expected) + + self.assertRaises(ValueError, Period, datetime.now()) + self.assertRaises(ValueError, Period, datetime.now().date()) + self.assertRaises(ValueError, Period, 1.6, freq='D') + self.assertRaises(ValueError, Period, ordinal=1.6, freq='D') + self.assertRaises(ValueError, Period, ordinal=2, value=1, freq='D') + self.assertIs(Period(None), pd.NaT) + self.assertRaises(ValueError, Period, month=1) + + p = Period('2007-01-01', freq='D') + + result = Period(p, freq='A') + exp = Period('2007', freq='A') + self.assertEqual(result, exp) + + def test_constructor_infer_freq(self): + p = Period('2007-01-01') + self.assertEqual(p.freq, 'D') + + p = Period('2007-01-01 07') + self.assertEqual(p.freq, 'H') + + p = Period('2007-01-01 07:10') + self.assertEqual(p.freq, 'T') + + p = Period('2007-01-01 07:10:15') + self.assertEqual(p.freq, 'S') + + p = Period('2007-01-01 07:10:15.123') + self.assertEqual(p.freq, 'L') + + p = Period('2007-01-01 07:10:15.123000') + self.assertEqual(p.freq, 'L') + + p = Period('2007-01-01 07:10:15.123400') + self.assertEqual(p.freq, 'U') + + def test_asfreq_MS(self): + initial = Period("2013") + + self.assertEqual(initial.asfreq(freq="M", how="S"), + Period('2013-01', 'M')) + + msg = pd.tseries.frequencies._INVALID_FREQ_ERROR + with self.assertRaisesRegexp(ValueError, msg): + initial.asfreq(freq="MS", how="S") + + with tm.assertRaisesRegexp(ValueError, msg): + pd.Period('2013-01', 'MS') + + self.assertTrue(_period_code_map.get("MS") is None) + + def test_badinput(self): + self.assertRaises(ValueError, Period, '-2000', 'A') + self.assertRaises(tslib.DateParseError, Period, '0', 'A') + self.assertRaises(tslib.DateParseError, Period, '1/1/-2000', 'A') + + def test_multiples(self): + result1 = Period('1989', freq='2A') + result2 = Period('1989', freq='A') + self.assertEqual(result1.ordinal, result2.ordinal) + self.assertEqual(result1.freqstr, '2A-DEC') + self.assertEqual(result2.freqstr, 'A-DEC') + self.assertEqual(result1.freq, offsets.YearEnd(2)) + self.assertEqual(result2.freq, offsets.YearEnd()) + + self.assertEqual((result1 + 1).ordinal, result1.ordinal + 2) + self.assertEqual((1 + result1).ordinal, result1.ordinal + 2) + self.assertEqual((result1 - 1).ordinal, result2.ordinal - 2) + self.assertEqual((-1 + result1).ordinal, result2.ordinal - 2) + + def test_round_trip(self): + + p = Period('2000Q1') + new_p = self.round_trip_pickle(p) + self.assertEqual(new_p, p) + + +class TestPeriodField(tm.TestCase): + + def test_get_period_field_raises_on_out_of_range(self): + self.assertRaises(ValueError, _period.get_period_field, -1, 0, 0) + + def test_get_period_field_array_raises_on_out_of_range(self): + self.assertRaises(ValueError, _period.get_period_field_arr, -1, + np.empty(1), 0) + + +class TestFreqConversion(tm.TestCase): + "Test frequency conversion of date objects" + + def test_asfreq_corner(self): + val = Period(freq='A', year=2007) + result1 = val.asfreq('5t') + result2 = val.asfreq('t') + expected = Period('2007-12-31 23:59', freq='t') + self.assertEqual(result1.ordinal, expected.ordinal) + self.assertEqual(result1.freqstr, '5T') + self.assertEqual(result2.ordinal, expected.ordinal) + self.assertEqual(result2.freqstr, 'T') + + def test_conv_annual(self): + # frequency conversion tests: from Annual Frequency + + ival_A = Period(freq='A', year=2007) + + ival_AJAN = Period(freq="A-JAN", year=2007) + ival_AJUN = Period(freq="A-JUN", year=2007) + ival_ANOV = Period(freq="A-NOV", year=2007) + + ival_A_to_Q_start = Period(freq='Q', year=2007, quarter=1) + ival_A_to_Q_end = Period(freq='Q', year=2007, quarter=4) + ival_A_to_M_start = Period(freq='M', year=2007, month=1) + ival_A_to_M_end = Period(freq='M', year=2007, month=12) + ival_A_to_W_start = Period(freq='W', year=2007, month=1, day=1) + ival_A_to_W_end = Period(freq='W', year=2007, month=12, day=31) + ival_A_to_B_start = Period(freq='B', year=2007, month=1, day=1) + ival_A_to_B_end = Period(freq='B', year=2007, month=12, day=31) + ival_A_to_D_start = Period(freq='D', year=2007, month=1, day=1) + ival_A_to_D_end = Period(freq='D', year=2007, month=12, day=31) + ival_A_to_H_start = Period(freq='H', year=2007, month=1, day=1, hour=0) + ival_A_to_H_end = Period(freq='H', year=2007, month=12, day=31, + hour=23) + ival_A_to_T_start = Period(freq='Min', year=2007, month=1, day=1, + hour=0, minute=0) + ival_A_to_T_end = Period(freq='Min', year=2007, month=12, day=31, + hour=23, minute=59) + ival_A_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, + minute=0, second=0) + ival_A_to_S_end = Period(freq='S', year=2007, month=12, day=31, + hour=23, minute=59, second=59) + + ival_AJAN_to_D_end = Period(freq='D', year=2007, month=1, day=31) + ival_AJAN_to_D_start = Period(freq='D', year=2006, month=2, day=1) + ival_AJUN_to_D_end = Period(freq='D', year=2007, month=6, day=30) + ival_AJUN_to_D_start = Period(freq='D', year=2006, month=7, day=1) + ival_ANOV_to_D_end = Period(freq='D', year=2007, month=11, day=30) + ival_ANOV_to_D_start = Period(freq='D', year=2006, month=12, day=1) + + self.assertEqual(ival_A.asfreq('Q', 'S'), ival_A_to_Q_start) + self.assertEqual(ival_A.asfreq('Q', 'e'), ival_A_to_Q_end) + self.assertEqual(ival_A.asfreq('M', 's'), ival_A_to_M_start) + self.assertEqual(ival_A.asfreq('M', 'E'), ival_A_to_M_end) + self.assertEqual(ival_A.asfreq('W', 'S'), ival_A_to_W_start) + self.assertEqual(ival_A.asfreq('W', 'E'), ival_A_to_W_end) + self.assertEqual(ival_A.asfreq('B', 'S'), ival_A_to_B_start) + self.assertEqual(ival_A.asfreq('B', 'E'), ival_A_to_B_end) + self.assertEqual(ival_A.asfreq('D', 'S'), ival_A_to_D_start) + self.assertEqual(ival_A.asfreq('D', 'E'), ival_A_to_D_end) + self.assertEqual(ival_A.asfreq('H', 'S'), ival_A_to_H_start) + self.assertEqual(ival_A.asfreq('H', 'E'), ival_A_to_H_end) + self.assertEqual(ival_A.asfreq('min', 'S'), ival_A_to_T_start) + self.assertEqual(ival_A.asfreq('min', 'E'), ival_A_to_T_end) + self.assertEqual(ival_A.asfreq('T', 'S'), ival_A_to_T_start) + self.assertEqual(ival_A.asfreq('T', 'E'), ival_A_to_T_end) + self.assertEqual(ival_A.asfreq('S', 'S'), ival_A_to_S_start) + self.assertEqual(ival_A.asfreq('S', 'E'), ival_A_to_S_end) + + self.assertEqual(ival_AJAN.asfreq('D', 'S'), ival_AJAN_to_D_start) + self.assertEqual(ival_AJAN.asfreq('D', 'E'), ival_AJAN_to_D_end) + + self.assertEqual(ival_AJUN.asfreq('D', 'S'), ival_AJUN_to_D_start) + self.assertEqual(ival_AJUN.asfreq('D', 'E'), ival_AJUN_to_D_end) + + self.assertEqual(ival_ANOV.asfreq('D', 'S'), ival_ANOV_to_D_start) + self.assertEqual(ival_ANOV.asfreq('D', 'E'), ival_ANOV_to_D_end) + + self.assertEqual(ival_A.asfreq('A'), ival_A) + + def test_conv_quarterly(self): + # frequency conversion tests: from Quarterly Frequency + + ival_Q = Period(freq='Q', year=2007, quarter=1) + ival_Q_end_of_year = Period(freq='Q', year=2007, quarter=4) + + ival_QEJAN = Period(freq="Q-JAN", year=2007, quarter=1) + ival_QEJUN = Period(freq="Q-JUN", year=2007, quarter=1) + + ival_Q_to_A = Period(freq='A', year=2007) + ival_Q_to_M_start = Period(freq='M', year=2007, month=1) + ival_Q_to_M_end = Period(freq='M', year=2007, month=3) + ival_Q_to_W_start = Period(freq='W', year=2007, month=1, day=1) + ival_Q_to_W_end = Period(freq='W', year=2007, month=3, day=31) + ival_Q_to_B_start = Period(freq='B', year=2007, month=1, day=1) + ival_Q_to_B_end = Period(freq='B', year=2007, month=3, day=30) + ival_Q_to_D_start = Period(freq='D', year=2007, month=1, day=1) + ival_Q_to_D_end = Period(freq='D', year=2007, month=3, day=31) + ival_Q_to_H_start = Period(freq='H', year=2007, month=1, day=1, hour=0) + ival_Q_to_H_end = Period(freq='H', year=2007, month=3, day=31, hour=23) + ival_Q_to_T_start = Period(freq='Min', year=2007, month=1, day=1, + hour=0, minute=0) + ival_Q_to_T_end = Period(freq='Min', year=2007, month=3, day=31, + hour=23, minute=59) + ival_Q_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, + minute=0, second=0) + ival_Q_to_S_end = Period(freq='S', year=2007, month=3, day=31, hour=23, + minute=59, second=59) + + ival_QEJAN_to_D_start = Period(freq='D', year=2006, month=2, day=1) + ival_QEJAN_to_D_end = Period(freq='D', year=2006, month=4, day=30) + + ival_QEJUN_to_D_start = Period(freq='D', year=2006, month=7, day=1) + ival_QEJUN_to_D_end = Period(freq='D', year=2006, month=9, day=30) + + self.assertEqual(ival_Q.asfreq('A'), ival_Q_to_A) + self.assertEqual(ival_Q_end_of_year.asfreq('A'), ival_Q_to_A) + + self.assertEqual(ival_Q.asfreq('M', 'S'), ival_Q_to_M_start) + self.assertEqual(ival_Q.asfreq('M', 'E'), ival_Q_to_M_end) + self.assertEqual(ival_Q.asfreq('W', 'S'), ival_Q_to_W_start) + self.assertEqual(ival_Q.asfreq('W', 'E'), ival_Q_to_W_end) + self.assertEqual(ival_Q.asfreq('B', 'S'), ival_Q_to_B_start) + self.assertEqual(ival_Q.asfreq('B', 'E'), ival_Q_to_B_end) + self.assertEqual(ival_Q.asfreq('D', 'S'), ival_Q_to_D_start) + self.assertEqual(ival_Q.asfreq('D', 'E'), ival_Q_to_D_end) + self.assertEqual(ival_Q.asfreq('H', 'S'), ival_Q_to_H_start) + self.assertEqual(ival_Q.asfreq('H', 'E'), ival_Q_to_H_end) + self.assertEqual(ival_Q.asfreq('Min', 'S'), ival_Q_to_T_start) + self.assertEqual(ival_Q.asfreq('Min', 'E'), ival_Q_to_T_end) + self.assertEqual(ival_Q.asfreq('S', 'S'), ival_Q_to_S_start) + self.assertEqual(ival_Q.asfreq('S', 'E'), ival_Q_to_S_end) + + self.assertEqual(ival_QEJAN.asfreq('D', 'S'), ival_QEJAN_to_D_start) + self.assertEqual(ival_QEJAN.asfreq('D', 'E'), ival_QEJAN_to_D_end) + self.assertEqual(ival_QEJUN.asfreq('D', 'S'), ival_QEJUN_to_D_start) + self.assertEqual(ival_QEJUN.asfreq('D', 'E'), ival_QEJUN_to_D_end) + + self.assertEqual(ival_Q.asfreq('Q'), ival_Q) + + def test_conv_monthly(self): + # frequency conversion tests: from Monthly Frequency + + ival_M = Period(freq='M', year=2007, month=1) + ival_M_end_of_year = Period(freq='M', year=2007, month=12) + ival_M_end_of_quarter = Period(freq='M', year=2007, month=3) + ival_M_to_A = Period(freq='A', year=2007) + ival_M_to_Q = Period(freq='Q', year=2007, quarter=1) + ival_M_to_W_start = Period(freq='W', year=2007, month=1, day=1) + ival_M_to_W_end = Period(freq='W', year=2007, month=1, day=31) + ival_M_to_B_start = Period(freq='B', year=2007, month=1, day=1) + ival_M_to_B_end = Period(freq='B', year=2007, month=1, day=31) + ival_M_to_D_start = Period(freq='D', year=2007, month=1, day=1) + ival_M_to_D_end = Period(freq='D', year=2007, month=1, day=31) + ival_M_to_H_start = Period(freq='H', year=2007, month=1, day=1, hour=0) + ival_M_to_H_end = Period(freq='H', year=2007, month=1, day=31, hour=23) + ival_M_to_T_start = Period(freq='Min', year=2007, month=1, day=1, + hour=0, minute=0) + ival_M_to_T_end = Period(freq='Min', year=2007, month=1, day=31, + hour=23, minute=59) + ival_M_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, + minute=0, second=0) + ival_M_to_S_end = Period(freq='S', year=2007, month=1, day=31, hour=23, + minute=59, second=59) + + self.assertEqual(ival_M.asfreq('A'), ival_M_to_A) + self.assertEqual(ival_M_end_of_year.asfreq('A'), ival_M_to_A) + self.assertEqual(ival_M.asfreq('Q'), ival_M_to_Q) + self.assertEqual(ival_M_end_of_quarter.asfreq('Q'), ival_M_to_Q) + + self.assertEqual(ival_M.asfreq('W', 'S'), ival_M_to_W_start) + self.assertEqual(ival_M.asfreq('W', 'E'), ival_M_to_W_end) + self.assertEqual(ival_M.asfreq('B', 'S'), ival_M_to_B_start) + self.assertEqual(ival_M.asfreq('B', 'E'), ival_M_to_B_end) + self.assertEqual(ival_M.asfreq('D', 'S'), ival_M_to_D_start) + self.assertEqual(ival_M.asfreq('D', 'E'), ival_M_to_D_end) + self.assertEqual(ival_M.asfreq('H', 'S'), ival_M_to_H_start) + self.assertEqual(ival_M.asfreq('H', 'E'), ival_M_to_H_end) + self.assertEqual(ival_M.asfreq('Min', 'S'), ival_M_to_T_start) + self.assertEqual(ival_M.asfreq('Min', 'E'), ival_M_to_T_end) + self.assertEqual(ival_M.asfreq('S', 'S'), ival_M_to_S_start) + self.assertEqual(ival_M.asfreq('S', 'E'), ival_M_to_S_end) + + self.assertEqual(ival_M.asfreq('M'), ival_M) + + def test_conv_weekly(self): + # frequency conversion tests: from Weekly Frequency + ival_W = Period(freq='W', year=2007, month=1, day=1) + + ival_WSUN = Period(freq='W', year=2007, month=1, day=7) + ival_WSAT = Period(freq='W-SAT', year=2007, month=1, day=6) + ival_WFRI = Period(freq='W-FRI', year=2007, month=1, day=5) + ival_WTHU = Period(freq='W-THU', year=2007, month=1, day=4) + ival_WWED = Period(freq='W-WED', year=2007, month=1, day=3) + ival_WTUE = Period(freq='W-TUE', year=2007, month=1, day=2) + ival_WMON = Period(freq='W-MON', year=2007, month=1, day=1) + + ival_WSUN_to_D_start = Period(freq='D', year=2007, month=1, day=1) + ival_WSUN_to_D_end = Period(freq='D', year=2007, month=1, day=7) + ival_WSAT_to_D_start = Period(freq='D', year=2006, month=12, day=31) + ival_WSAT_to_D_end = Period(freq='D', year=2007, month=1, day=6) + ival_WFRI_to_D_start = Period(freq='D', year=2006, month=12, day=30) + ival_WFRI_to_D_end = Period(freq='D', year=2007, month=1, day=5) + ival_WTHU_to_D_start = Period(freq='D', year=2006, month=12, day=29) + ival_WTHU_to_D_end = Period(freq='D', year=2007, month=1, day=4) + ival_WWED_to_D_start = Period(freq='D', year=2006, month=12, day=28) + ival_WWED_to_D_end = Period(freq='D', year=2007, month=1, day=3) + ival_WTUE_to_D_start = Period(freq='D', year=2006, month=12, day=27) + ival_WTUE_to_D_end = Period(freq='D', year=2007, month=1, day=2) + ival_WMON_to_D_start = Period(freq='D', year=2006, month=12, day=26) + ival_WMON_to_D_end = Period(freq='D', year=2007, month=1, day=1) + + ival_W_end_of_year = Period(freq='W', year=2007, month=12, day=31) + ival_W_end_of_quarter = Period(freq='W', year=2007, month=3, day=31) + ival_W_end_of_month = Period(freq='W', year=2007, month=1, day=31) + ival_W_to_A = Period(freq='A', year=2007) + ival_W_to_Q = Period(freq='Q', year=2007, quarter=1) + ival_W_to_M = Period(freq='M', year=2007, month=1) + + if Period(freq='D', year=2007, month=12, day=31).weekday == 6: + ival_W_to_A_end_of_year = Period(freq='A', year=2007) + else: + ival_W_to_A_end_of_year = Period(freq='A', year=2008) + + if Period(freq='D', year=2007, month=3, day=31).weekday == 6: + ival_W_to_Q_end_of_quarter = Period(freq='Q', year=2007, quarter=1) + else: + ival_W_to_Q_end_of_quarter = Period(freq='Q', year=2007, quarter=2) + + if Period(freq='D', year=2007, month=1, day=31).weekday == 6: + ival_W_to_M_end_of_month = Period(freq='M', year=2007, month=1) + else: + ival_W_to_M_end_of_month = Period(freq='M', year=2007, month=2) + + ival_W_to_B_start = Period(freq='B', year=2007, month=1, day=1) + ival_W_to_B_end = Period(freq='B', year=2007, month=1, day=5) + ival_W_to_D_start = Period(freq='D', year=2007, month=1, day=1) + ival_W_to_D_end = Period(freq='D', year=2007, month=1, day=7) + ival_W_to_H_start = Period(freq='H', year=2007, month=1, day=1, hour=0) + ival_W_to_H_end = Period(freq='H', year=2007, month=1, day=7, hour=23) + ival_W_to_T_start = Period(freq='Min', year=2007, month=1, day=1, + hour=0, minute=0) + ival_W_to_T_end = Period(freq='Min', year=2007, month=1, day=7, + hour=23, minute=59) + ival_W_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, + minute=0, second=0) + ival_W_to_S_end = Period(freq='S', year=2007, month=1, day=7, hour=23, + minute=59, second=59) + + self.assertEqual(ival_W.asfreq('A'), ival_W_to_A) + self.assertEqual(ival_W_end_of_year.asfreq('A'), + ival_W_to_A_end_of_year) + self.assertEqual(ival_W.asfreq('Q'), ival_W_to_Q) + self.assertEqual(ival_W_end_of_quarter.asfreq('Q'), + ival_W_to_Q_end_of_quarter) + self.assertEqual(ival_W.asfreq('M'), ival_W_to_M) + self.assertEqual(ival_W_end_of_month.asfreq('M'), + ival_W_to_M_end_of_month) + + self.assertEqual(ival_W.asfreq('B', 'S'), ival_W_to_B_start) + self.assertEqual(ival_W.asfreq('B', 'E'), ival_W_to_B_end) + + self.assertEqual(ival_W.asfreq('D', 'S'), ival_W_to_D_start) + self.assertEqual(ival_W.asfreq('D', 'E'), ival_W_to_D_end) + + self.assertEqual(ival_WSUN.asfreq('D', 'S'), ival_WSUN_to_D_start) + self.assertEqual(ival_WSUN.asfreq('D', 'E'), ival_WSUN_to_D_end) + self.assertEqual(ival_WSAT.asfreq('D', 'S'), ival_WSAT_to_D_start) + self.assertEqual(ival_WSAT.asfreq('D', 'E'), ival_WSAT_to_D_end) + self.assertEqual(ival_WFRI.asfreq('D', 'S'), ival_WFRI_to_D_start) + self.assertEqual(ival_WFRI.asfreq('D', 'E'), ival_WFRI_to_D_end) + self.assertEqual(ival_WTHU.asfreq('D', 'S'), ival_WTHU_to_D_start) + self.assertEqual(ival_WTHU.asfreq('D', 'E'), ival_WTHU_to_D_end) + self.assertEqual(ival_WWED.asfreq('D', 'S'), ival_WWED_to_D_start) + self.assertEqual(ival_WWED.asfreq('D', 'E'), ival_WWED_to_D_end) + self.assertEqual(ival_WTUE.asfreq('D', 'S'), ival_WTUE_to_D_start) + self.assertEqual(ival_WTUE.asfreq('D', 'E'), ival_WTUE_to_D_end) + self.assertEqual(ival_WMON.asfreq('D', 'S'), ival_WMON_to_D_start) + self.assertEqual(ival_WMON.asfreq('D', 'E'), ival_WMON_to_D_end) + + self.assertEqual(ival_W.asfreq('H', 'S'), ival_W_to_H_start) + self.assertEqual(ival_W.asfreq('H', 'E'), ival_W_to_H_end) + self.assertEqual(ival_W.asfreq('Min', 'S'), ival_W_to_T_start) + self.assertEqual(ival_W.asfreq('Min', 'E'), ival_W_to_T_end) + self.assertEqual(ival_W.asfreq('S', 'S'), ival_W_to_S_start) + self.assertEqual(ival_W.asfreq('S', 'E'), ival_W_to_S_end) + + self.assertEqual(ival_W.asfreq('W'), ival_W) + + msg = pd.tseries.frequencies._INVALID_FREQ_ERROR + with self.assertRaisesRegexp(ValueError, msg): + ival_W.asfreq('WK') + + def test_conv_weekly_legacy(self): + # frequency conversion tests: from Weekly Frequency + msg = pd.tseries.frequencies._INVALID_FREQ_ERROR + with self.assertRaisesRegexp(ValueError, msg): + Period(freq='WK', year=2007, month=1, day=1) + + with self.assertRaisesRegexp(ValueError, msg): + Period(freq='WK-SAT', year=2007, month=1, day=6) + with self.assertRaisesRegexp(ValueError, msg): + Period(freq='WK-FRI', year=2007, month=1, day=5) + with self.assertRaisesRegexp(ValueError, msg): + Period(freq='WK-THU', year=2007, month=1, day=4) + with self.assertRaisesRegexp(ValueError, msg): + Period(freq='WK-WED', year=2007, month=1, day=3) + with self.assertRaisesRegexp(ValueError, msg): + Period(freq='WK-TUE', year=2007, month=1, day=2) + with self.assertRaisesRegexp(ValueError, msg): + Period(freq='WK-MON', year=2007, month=1, day=1) + + def test_conv_business(self): + # frequency conversion tests: from Business Frequency" + + ival_B = Period(freq='B', year=2007, month=1, day=1) + ival_B_end_of_year = Period(freq='B', year=2007, month=12, day=31) + ival_B_end_of_quarter = Period(freq='B', year=2007, month=3, day=30) + ival_B_end_of_month = Period(freq='B', year=2007, month=1, day=31) + ival_B_end_of_week = Period(freq='B', year=2007, month=1, day=5) + + ival_B_to_A = Period(freq='A', year=2007) + ival_B_to_Q = Period(freq='Q', year=2007, quarter=1) + ival_B_to_M = Period(freq='M', year=2007, month=1) + ival_B_to_W = Period(freq='W', year=2007, month=1, day=7) + ival_B_to_D = Period(freq='D', year=2007, month=1, day=1) + ival_B_to_H_start = Period(freq='H', year=2007, month=1, day=1, hour=0) + ival_B_to_H_end = Period(freq='H', year=2007, month=1, day=1, hour=23) + ival_B_to_T_start = Period(freq='Min', year=2007, month=1, day=1, + hour=0, minute=0) + ival_B_to_T_end = Period(freq='Min', year=2007, month=1, day=1, + hour=23, minute=59) + ival_B_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, + minute=0, second=0) + ival_B_to_S_end = Period(freq='S', year=2007, month=1, day=1, hour=23, + minute=59, second=59) + + self.assertEqual(ival_B.asfreq('A'), ival_B_to_A) + self.assertEqual(ival_B_end_of_year.asfreq('A'), ival_B_to_A) + self.assertEqual(ival_B.asfreq('Q'), ival_B_to_Q) + self.assertEqual(ival_B_end_of_quarter.asfreq('Q'), ival_B_to_Q) + self.assertEqual(ival_B.asfreq('M'), ival_B_to_M) + self.assertEqual(ival_B_end_of_month.asfreq('M'), ival_B_to_M) + self.assertEqual(ival_B.asfreq('W'), ival_B_to_W) + self.assertEqual(ival_B_end_of_week.asfreq('W'), ival_B_to_W) + + self.assertEqual(ival_B.asfreq('D'), ival_B_to_D) + + self.assertEqual(ival_B.asfreq('H', 'S'), ival_B_to_H_start) + self.assertEqual(ival_B.asfreq('H', 'E'), ival_B_to_H_end) + self.assertEqual(ival_B.asfreq('Min', 'S'), ival_B_to_T_start) + self.assertEqual(ival_B.asfreq('Min', 'E'), ival_B_to_T_end) + self.assertEqual(ival_B.asfreq('S', 'S'), ival_B_to_S_start) + self.assertEqual(ival_B.asfreq('S', 'E'), ival_B_to_S_end) + + self.assertEqual(ival_B.asfreq('B'), ival_B) + + def test_conv_daily(self): + # frequency conversion tests: from Business Frequency" + + ival_D = Period(freq='D', year=2007, month=1, day=1) + ival_D_end_of_year = Period(freq='D', year=2007, month=12, day=31) + ival_D_end_of_quarter = Period(freq='D', year=2007, month=3, day=31) + ival_D_end_of_month = Period(freq='D', year=2007, month=1, day=31) + ival_D_end_of_week = Period(freq='D', year=2007, month=1, day=7) + + ival_D_friday = Period(freq='D', year=2007, month=1, day=5) + ival_D_saturday = Period(freq='D', year=2007, month=1, day=6) + ival_D_sunday = Period(freq='D', year=2007, month=1, day=7) + + # TODO: unused? + # ival_D_monday = Period(freq='D', year=2007, month=1, day=8) + + ival_B_friday = Period(freq='B', year=2007, month=1, day=5) + ival_B_monday = Period(freq='B', year=2007, month=1, day=8) + + ival_D_to_A = Period(freq='A', year=2007) + + ival_Deoq_to_AJAN = Period(freq='A-JAN', year=2008) + ival_Deoq_to_AJUN = Period(freq='A-JUN', year=2007) + ival_Deoq_to_ADEC = Period(freq='A-DEC', year=2007) + + ival_D_to_QEJAN = Period(freq="Q-JAN", year=2007, quarter=4) + ival_D_to_QEJUN = Period(freq="Q-JUN", year=2007, quarter=3) + ival_D_to_QEDEC = Period(freq="Q-DEC", year=2007, quarter=1) + + ival_D_to_M = Period(freq='M', year=2007, month=1) + ival_D_to_W = Period(freq='W', year=2007, month=1, day=7) + + ival_D_to_H_start = Period(freq='H', year=2007, month=1, day=1, hour=0) + ival_D_to_H_end = Period(freq='H', year=2007, month=1, day=1, hour=23) + ival_D_to_T_start = Period(freq='Min', year=2007, month=1, day=1, + hour=0, minute=0) + ival_D_to_T_end = Period(freq='Min', year=2007, month=1, day=1, + hour=23, minute=59) + ival_D_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, + minute=0, second=0) + ival_D_to_S_end = Period(freq='S', year=2007, month=1, day=1, hour=23, + minute=59, second=59) + + self.assertEqual(ival_D.asfreq('A'), ival_D_to_A) + + self.assertEqual(ival_D_end_of_quarter.asfreq('A-JAN'), + ival_Deoq_to_AJAN) + self.assertEqual(ival_D_end_of_quarter.asfreq('A-JUN'), + ival_Deoq_to_AJUN) + self.assertEqual(ival_D_end_of_quarter.asfreq('A-DEC'), + ival_Deoq_to_ADEC) + + self.assertEqual(ival_D_end_of_year.asfreq('A'), ival_D_to_A) + self.assertEqual(ival_D_end_of_quarter.asfreq('Q'), ival_D_to_QEDEC) + self.assertEqual(ival_D.asfreq("Q-JAN"), ival_D_to_QEJAN) + self.assertEqual(ival_D.asfreq("Q-JUN"), ival_D_to_QEJUN) + self.assertEqual(ival_D.asfreq("Q-DEC"), ival_D_to_QEDEC) + self.assertEqual(ival_D.asfreq('M'), ival_D_to_M) + self.assertEqual(ival_D_end_of_month.asfreq('M'), ival_D_to_M) + self.assertEqual(ival_D.asfreq('W'), ival_D_to_W) + self.assertEqual(ival_D_end_of_week.asfreq('W'), ival_D_to_W) + + self.assertEqual(ival_D_friday.asfreq('B'), ival_B_friday) + self.assertEqual(ival_D_saturday.asfreq('B', 'S'), ival_B_friday) + self.assertEqual(ival_D_saturday.asfreq('B', 'E'), ival_B_monday) + self.assertEqual(ival_D_sunday.asfreq('B', 'S'), ival_B_friday) + self.assertEqual(ival_D_sunday.asfreq('B', 'E'), ival_B_monday) + + self.assertEqual(ival_D.asfreq('H', 'S'), ival_D_to_H_start) + self.assertEqual(ival_D.asfreq('H', 'E'), ival_D_to_H_end) + self.assertEqual(ival_D.asfreq('Min', 'S'), ival_D_to_T_start) + self.assertEqual(ival_D.asfreq('Min', 'E'), ival_D_to_T_end) + self.assertEqual(ival_D.asfreq('S', 'S'), ival_D_to_S_start) + self.assertEqual(ival_D.asfreq('S', 'E'), ival_D_to_S_end) + + self.assertEqual(ival_D.asfreq('D'), ival_D) + + def test_conv_hourly(self): + # frequency conversion tests: from Hourly Frequency" + + ival_H = Period(freq='H', year=2007, month=1, day=1, hour=0) + ival_H_end_of_year = Period(freq='H', year=2007, month=12, day=31, + hour=23) + ival_H_end_of_quarter = Period(freq='H', year=2007, month=3, day=31, + hour=23) + ival_H_end_of_month = Period(freq='H', year=2007, month=1, day=31, + hour=23) + ival_H_end_of_week = Period(freq='H', year=2007, month=1, day=7, + hour=23) + ival_H_end_of_day = Period(freq='H', year=2007, month=1, day=1, + hour=23) + ival_H_end_of_bus = Period(freq='H', year=2007, month=1, day=1, + hour=23) + + ival_H_to_A = Period(freq='A', year=2007) + ival_H_to_Q = Period(freq='Q', year=2007, quarter=1) + ival_H_to_M = Period(freq='M', year=2007, month=1) + ival_H_to_W = Period(freq='W', year=2007, month=1, day=7) + ival_H_to_D = Period(freq='D', year=2007, month=1, day=1) + ival_H_to_B = Period(freq='B', year=2007, month=1, day=1) + + ival_H_to_T_start = Period(freq='Min', year=2007, month=1, day=1, + hour=0, minute=0) + ival_H_to_T_end = Period(freq='Min', year=2007, month=1, day=1, hour=0, + minute=59) + ival_H_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, + minute=0, second=0) + ival_H_to_S_end = Period(freq='S', year=2007, month=1, day=1, hour=0, + minute=59, second=59) + + self.assertEqual(ival_H.asfreq('A'), ival_H_to_A) + self.assertEqual(ival_H_end_of_year.asfreq('A'), ival_H_to_A) + self.assertEqual(ival_H.asfreq('Q'), ival_H_to_Q) + self.assertEqual(ival_H_end_of_quarter.asfreq('Q'), ival_H_to_Q) + self.assertEqual(ival_H.asfreq('M'), ival_H_to_M) + self.assertEqual(ival_H_end_of_month.asfreq('M'), ival_H_to_M) + self.assertEqual(ival_H.asfreq('W'), ival_H_to_W) + self.assertEqual(ival_H_end_of_week.asfreq('W'), ival_H_to_W) + self.assertEqual(ival_H.asfreq('D'), ival_H_to_D) + self.assertEqual(ival_H_end_of_day.asfreq('D'), ival_H_to_D) + self.assertEqual(ival_H.asfreq('B'), ival_H_to_B) + self.assertEqual(ival_H_end_of_bus.asfreq('B'), ival_H_to_B) + + self.assertEqual(ival_H.asfreq('Min', 'S'), ival_H_to_T_start) + self.assertEqual(ival_H.asfreq('Min', 'E'), ival_H_to_T_end) + self.assertEqual(ival_H.asfreq('S', 'S'), ival_H_to_S_start) + self.assertEqual(ival_H.asfreq('S', 'E'), ival_H_to_S_end) + + self.assertEqual(ival_H.asfreq('H'), ival_H) + + def test_conv_minutely(self): + # frequency conversion tests: from Minutely Frequency" + + ival_T = Period(freq='Min', year=2007, month=1, day=1, hour=0, + minute=0) + ival_T_end_of_year = Period(freq='Min', year=2007, month=12, day=31, + hour=23, minute=59) + ival_T_end_of_quarter = Period(freq='Min', year=2007, month=3, day=31, + hour=23, minute=59) + ival_T_end_of_month = Period(freq='Min', year=2007, month=1, day=31, + hour=23, minute=59) + ival_T_end_of_week = Period(freq='Min', year=2007, month=1, day=7, + hour=23, minute=59) + ival_T_end_of_day = Period(freq='Min', year=2007, month=1, day=1, + hour=23, minute=59) + ival_T_end_of_bus = Period(freq='Min', year=2007, month=1, day=1, + hour=23, minute=59) + ival_T_end_of_hour = Period(freq='Min', year=2007, month=1, day=1, + hour=0, minute=59) + + ival_T_to_A = Period(freq='A', year=2007) + ival_T_to_Q = Period(freq='Q', year=2007, quarter=1) + ival_T_to_M = Period(freq='M', year=2007, month=1) + ival_T_to_W = Period(freq='W', year=2007, month=1, day=7) + ival_T_to_D = Period(freq='D', year=2007, month=1, day=1) + ival_T_to_B = Period(freq='B', year=2007, month=1, day=1) + ival_T_to_H = Period(freq='H', year=2007, month=1, day=1, hour=0) + + ival_T_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, + minute=0, second=0) + ival_T_to_S_end = Period(freq='S', year=2007, month=1, day=1, hour=0, + minute=0, second=59) + + self.assertEqual(ival_T.asfreq('A'), ival_T_to_A) + self.assertEqual(ival_T_end_of_year.asfreq('A'), ival_T_to_A) + self.assertEqual(ival_T.asfreq('Q'), ival_T_to_Q) + self.assertEqual(ival_T_end_of_quarter.asfreq('Q'), ival_T_to_Q) + self.assertEqual(ival_T.asfreq('M'), ival_T_to_M) + self.assertEqual(ival_T_end_of_month.asfreq('M'), ival_T_to_M) + self.assertEqual(ival_T.asfreq('W'), ival_T_to_W) + self.assertEqual(ival_T_end_of_week.asfreq('W'), ival_T_to_W) + self.assertEqual(ival_T.asfreq('D'), ival_T_to_D) + self.assertEqual(ival_T_end_of_day.asfreq('D'), ival_T_to_D) + self.assertEqual(ival_T.asfreq('B'), ival_T_to_B) + self.assertEqual(ival_T_end_of_bus.asfreq('B'), ival_T_to_B) + self.assertEqual(ival_T.asfreq('H'), ival_T_to_H) + self.assertEqual(ival_T_end_of_hour.asfreq('H'), ival_T_to_H) + + self.assertEqual(ival_T.asfreq('S', 'S'), ival_T_to_S_start) + self.assertEqual(ival_T.asfreq('S', 'E'), ival_T_to_S_end) + + self.assertEqual(ival_T.asfreq('Min'), ival_T) + + def test_conv_secondly(self): + # frequency conversion tests: from Secondly Frequency" + + ival_S = Period(freq='S', year=2007, month=1, day=1, hour=0, minute=0, + second=0) + ival_S_end_of_year = Period(freq='S', year=2007, month=12, day=31, + hour=23, minute=59, second=59) + ival_S_end_of_quarter = Period(freq='S', year=2007, month=3, day=31, + hour=23, minute=59, second=59) + ival_S_end_of_month = Period(freq='S', year=2007, month=1, day=31, + hour=23, minute=59, second=59) + ival_S_end_of_week = Period(freq='S', year=2007, month=1, day=7, + hour=23, minute=59, second=59) + ival_S_end_of_day = Period(freq='S', year=2007, month=1, day=1, + hour=23, minute=59, second=59) + ival_S_end_of_bus = Period(freq='S', year=2007, month=1, day=1, + hour=23, minute=59, second=59) + ival_S_end_of_hour = Period(freq='S', year=2007, month=1, day=1, + hour=0, minute=59, second=59) + ival_S_end_of_minute = Period(freq='S', year=2007, month=1, day=1, + hour=0, minute=0, second=59) + + ival_S_to_A = Period(freq='A', year=2007) + ival_S_to_Q = Period(freq='Q', year=2007, quarter=1) + ival_S_to_M = Period(freq='M', year=2007, month=1) + ival_S_to_W = Period(freq='W', year=2007, month=1, day=7) + ival_S_to_D = Period(freq='D', year=2007, month=1, day=1) + ival_S_to_B = Period(freq='B', year=2007, month=1, day=1) + ival_S_to_H = Period(freq='H', year=2007, month=1, day=1, hour=0) + ival_S_to_T = Period(freq='Min', year=2007, month=1, day=1, hour=0, + minute=0) + + self.assertEqual(ival_S.asfreq('A'), ival_S_to_A) + self.assertEqual(ival_S_end_of_year.asfreq('A'), ival_S_to_A) + self.assertEqual(ival_S.asfreq('Q'), ival_S_to_Q) + self.assertEqual(ival_S_end_of_quarter.asfreq('Q'), ival_S_to_Q) + self.assertEqual(ival_S.asfreq('M'), ival_S_to_M) + self.assertEqual(ival_S_end_of_month.asfreq('M'), ival_S_to_M) + self.assertEqual(ival_S.asfreq('W'), ival_S_to_W) + self.assertEqual(ival_S_end_of_week.asfreq('W'), ival_S_to_W) + self.assertEqual(ival_S.asfreq('D'), ival_S_to_D) + self.assertEqual(ival_S_end_of_day.asfreq('D'), ival_S_to_D) + self.assertEqual(ival_S.asfreq('B'), ival_S_to_B) + self.assertEqual(ival_S_end_of_bus.asfreq('B'), ival_S_to_B) + self.assertEqual(ival_S.asfreq('H'), ival_S_to_H) + self.assertEqual(ival_S_end_of_hour.asfreq('H'), ival_S_to_H) + self.assertEqual(ival_S.asfreq('Min'), ival_S_to_T) + self.assertEqual(ival_S_end_of_minute.asfreq('Min'), ival_S_to_T) + + self.assertEqual(ival_S.asfreq('S'), ival_S) + + def test_asfreq_mult(self): + # normal freq to mult freq + p = Period(freq='A', year=2007) + # ordinal will not change + for freq in ['3A', offsets.YearEnd(3)]: + result = p.asfreq(freq) + expected = Period('2007', freq='3A') + + self.assertEqual(result, expected) + self.assertEqual(result.ordinal, expected.ordinal) + self.assertEqual(result.freq, expected.freq) + # ordinal will not change + for freq in ['3A', offsets.YearEnd(3)]: + result = p.asfreq(freq, how='S') + expected = Period('2007', freq='3A') + + self.assertEqual(result, expected) + self.assertEqual(result.ordinal, expected.ordinal) + self.assertEqual(result.freq, expected.freq) + + # mult freq to normal freq + p = Period(freq='3A', year=2007) + # ordinal will change because how=E is the default + for freq in ['A', offsets.YearEnd()]: + result = p.asfreq(freq) + expected = Period('2009', freq='A') + + self.assertEqual(result, expected) + self.assertEqual(result.ordinal, expected.ordinal) + self.assertEqual(result.freq, expected.freq) + # ordinal will not change + for freq in ['A', offsets.YearEnd()]: + result = p.asfreq(freq, how='S') + expected = Period('2007', freq='A') + + self.assertEqual(result, expected) + self.assertEqual(result.ordinal, expected.ordinal) + self.assertEqual(result.freq, expected.freq) + + p = Period(freq='A', year=2007) + for freq in ['2M', offsets.MonthEnd(2)]: + result = p.asfreq(freq) + expected = Period('2007-12', freq='2M') + + self.assertEqual(result, expected) + self.assertEqual(result.ordinal, expected.ordinal) + self.assertEqual(result.freq, expected.freq) + for freq in ['2M', offsets.MonthEnd(2)]: + result = p.asfreq(freq, how='S') + expected = Period('2007-01', freq='2M') + + self.assertEqual(result, expected) + self.assertEqual(result.ordinal, expected.ordinal) + self.assertEqual(result.freq, expected.freq) + + p = Period(freq='3A', year=2007) + for freq in ['2M', offsets.MonthEnd(2)]: + result = p.asfreq(freq) + expected = Period('2009-12', freq='2M') + + self.assertEqual(result, expected) + self.assertEqual(result.ordinal, expected.ordinal) + self.assertEqual(result.freq, expected.freq) + for freq in ['2M', offsets.MonthEnd(2)]: + result = p.asfreq(freq, how='S') + expected = Period('2007-01', freq='2M') + + self.assertEqual(result, expected) + self.assertEqual(result.ordinal, expected.ordinal) + self.assertEqual(result.freq, expected.freq) + + def test_asfreq_combined(self): + # normal freq to combined freq + p = Period('2007', freq='H') + + # ordinal will not change + expected = Period('2007', freq='25H') + for freq, how in zip(['1D1H', '1H1D'], ['E', 'S']): + result = p.asfreq(freq, how=how) + self.assertEqual(result, expected) + self.assertEqual(result.ordinal, expected.ordinal) + self.assertEqual(result.freq, expected.freq) + + # combined freq to normal freq + p1 = Period(freq='1D1H', year=2007) + p2 = Period(freq='1H1D', year=2007) + + # ordinal will change because how=E is the default + result1 = p1.asfreq('H') + result2 = p2.asfreq('H') + expected = Period('2007-01-02', freq='H') + self.assertEqual(result1, expected) + self.assertEqual(result1.ordinal, expected.ordinal) + self.assertEqual(result1.freq, expected.freq) + self.assertEqual(result2, expected) + self.assertEqual(result2.ordinal, expected.ordinal) + self.assertEqual(result2.freq, expected.freq) + + # ordinal will not change + result1 = p1.asfreq('H', how='S') + result2 = p2.asfreq('H', how='S') + expected = Period('2007-01-01', freq='H') + self.assertEqual(result1, expected) + self.assertEqual(result1.ordinal, expected.ordinal) + self.assertEqual(result1.freq, expected.freq) + self.assertEqual(result2, expected) + self.assertEqual(result2.ordinal, expected.ordinal) + self.assertEqual(result2.freq, expected.freq) + + def test_is_leap_year(self): + # GH 13727 + for freq in ['A', 'M', 'D', 'H']: + p = Period('2000-01-01 00:00:00', freq=freq) + self.assertTrue(p.is_leap_year) + self.assertIsInstance(p.is_leap_year, bool) + + p = Period('1999-01-01 00:00:00', freq=freq) + self.assertFalse(p.is_leap_year) + + p = Period('2004-01-01 00:00:00', freq=freq) + self.assertTrue(p.is_leap_year) + + p = Period('2100-01-01 00:00:00', freq=freq) + self.assertFalse(p.is_leap_year) + + +class TestMethods(tm.TestCase): + + def test_add(self): + dt1 = Period(freq='D', year=2008, month=1, day=1) + dt2 = Period(freq='D', year=2008, month=1, day=2) + self.assertEqual(dt1 + 1, dt2) + self.assertEqual(1 + dt1, dt2) + + def test_add_pdnat(self): + p = pd.Period('2011-01', freq='M') + self.assertIs(p + pd.NaT, pd.NaT) + self.assertIs(pd.NaT + p, pd.NaT) + + p = pd.Period('NaT', freq='M') + self.assertIs(p + pd.NaT, pd.NaT) + self.assertIs(pd.NaT + p, pd.NaT) + + def test_add_raises(self): + # GH 4731 + dt1 = Period(freq='D', year=2008, month=1, day=1) + dt2 = Period(freq='D', year=2008, month=1, day=2) + msg = r"unsupported operand type\(s\)" + with tm.assertRaisesRegexp(TypeError, msg): + dt1 + "str" + + msg = r"unsupported operand type\(s\)" + with tm.assertRaisesRegexp(TypeError, msg): + "str" + dt1 + + with tm.assertRaisesRegexp(TypeError, msg): + dt1 + dt2 + + def test_sub(self): + dt1 = Period('2011-01-01', freq='D') + dt2 = Period('2011-01-15', freq='D') + + self.assertEqual(dt1 - dt2, -14) + self.assertEqual(dt2 - dt1, 14) + + msg = r"Input has different freq=M from Period\(freq=D\)" + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + dt1 - pd.Period('2011-02', freq='M') + + def test_add_offset(self): + # freq is DateOffset + for freq in ['A', '2A', '3A']: + p = Period('2011', freq=freq) + exp = Period('2013', freq=freq) + self.assertEqual(p + offsets.YearEnd(2), exp) + self.assertEqual(offsets.YearEnd(2) + p, exp) + + for o in [offsets.YearBegin(2), offsets.MonthBegin(1), + offsets.Minute(), np.timedelta64(365, 'D'), + timedelta(365)]: + with tm.assertRaises(period.IncompatibleFrequency): + p + o + + if isinstance(o, np.timedelta64): + with tm.assertRaises(TypeError): + o + p + else: + with tm.assertRaises(period.IncompatibleFrequency): + o + p + + for freq in ['M', '2M', '3M']: + p = Period('2011-03', freq=freq) + exp = Period('2011-05', freq=freq) + self.assertEqual(p + offsets.MonthEnd(2), exp) + self.assertEqual(offsets.MonthEnd(2) + p, exp) + + exp = Period('2012-03', freq=freq) + self.assertEqual(p + offsets.MonthEnd(12), exp) + self.assertEqual(offsets.MonthEnd(12) + p, exp) + + for o in [offsets.YearBegin(2), offsets.MonthBegin(1), + offsets.Minute(), np.timedelta64(365, 'D'), + timedelta(365)]: + with tm.assertRaises(period.IncompatibleFrequency): + p + o + + if isinstance(o, np.timedelta64): + with tm.assertRaises(TypeError): + o + p + else: + with tm.assertRaises(period.IncompatibleFrequency): + o + p + + # freq is Tick + for freq in ['D', '2D', '3D']: + p = Period('2011-04-01', freq=freq) + + exp = Period('2011-04-06', freq=freq) + self.assertEqual(p + offsets.Day(5), exp) + self.assertEqual(offsets.Day(5) + p, exp) + + exp = Period('2011-04-02', freq=freq) + self.assertEqual(p + offsets.Hour(24), exp) + self.assertEqual(offsets.Hour(24) + p, exp) + + exp = Period('2011-04-03', freq=freq) + self.assertEqual(p + np.timedelta64(2, 'D'), exp) + with tm.assertRaises(TypeError): + np.timedelta64(2, 'D') + p + + exp = Period('2011-04-02', freq=freq) + self.assertEqual(p + np.timedelta64(3600 * 24, 's'), exp) + with tm.assertRaises(TypeError): + np.timedelta64(3600 * 24, 's') + p + + exp = Period('2011-03-30', freq=freq) + self.assertEqual(p + timedelta(-2), exp) + self.assertEqual(timedelta(-2) + p, exp) + + exp = Period('2011-04-03', freq=freq) + self.assertEqual(p + timedelta(hours=48), exp) + self.assertEqual(timedelta(hours=48) + p, exp) + + for o in [offsets.YearBegin(2), offsets.MonthBegin(1), + offsets.Minute(), np.timedelta64(4, 'h'), + timedelta(hours=23)]: + with tm.assertRaises(period.IncompatibleFrequency): + p + o + + if isinstance(o, np.timedelta64): + with tm.assertRaises(TypeError): + o + p + else: + with tm.assertRaises(period.IncompatibleFrequency): + o + p + + for freq in ['H', '2H', '3H']: + p = Period('2011-04-01 09:00', freq=freq) + + exp = Period('2011-04-03 09:00', freq=freq) + self.assertEqual(p + offsets.Day(2), exp) + self.assertEqual(offsets.Day(2) + p, exp) + + exp = Period('2011-04-01 12:00', freq=freq) + self.assertEqual(p + offsets.Hour(3), exp) + self.assertEqual(offsets.Hour(3) + p, exp) + + exp = Period('2011-04-01 12:00', freq=freq) + self.assertEqual(p + np.timedelta64(3, 'h'), exp) + with tm.assertRaises(TypeError): + np.timedelta64(3, 'h') + p + + exp = Period('2011-04-01 10:00', freq=freq) + self.assertEqual(p + np.timedelta64(3600, 's'), exp) + with tm.assertRaises(TypeError): + np.timedelta64(3600, 's') + p + + exp = Period('2011-04-01 11:00', freq=freq) + self.assertEqual(p + timedelta(minutes=120), exp) + self.assertEqual(timedelta(minutes=120) + p, exp) + + exp = Period('2011-04-05 12:00', freq=freq) + self.assertEqual(p + timedelta(days=4, minutes=180), exp) + self.assertEqual(timedelta(days=4, minutes=180) + p, exp) + + for o in [offsets.YearBegin(2), offsets.MonthBegin(1), + offsets.Minute(), np.timedelta64(3200, 's'), + timedelta(hours=23, minutes=30)]: + with tm.assertRaises(period.IncompatibleFrequency): + p + o + + if isinstance(o, np.timedelta64): + with tm.assertRaises(TypeError): + o + p + else: + with tm.assertRaises(period.IncompatibleFrequency): + o + p + + def test_add_offset_nat(self): + # freq is DateOffset + for freq in ['A', '2A', '3A']: + p = Period('NaT', freq=freq) + for o in [offsets.YearEnd(2)]: + self.assertIs(p + o, tslib.NaT) + self.assertIs(o + p, tslib.NaT) + + for o in [offsets.YearBegin(2), offsets.MonthBegin(1), + offsets.Minute(), np.timedelta64(365, 'D'), + timedelta(365)]: + self.assertIs(p + o, tslib.NaT) + + if isinstance(o, np.timedelta64): + with tm.assertRaises(TypeError): + o + p + else: + self.assertIs(o + p, tslib.NaT) + + for freq in ['M', '2M', '3M']: + p = Period('NaT', freq=freq) + for o in [offsets.MonthEnd(2), offsets.MonthEnd(12)]: + self.assertIs(p + o, tslib.NaT) + + if isinstance(o, np.timedelta64): + with tm.assertRaises(TypeError): + o + p + else: + self.assertIs(o + p, tslib.NaT) + + for o in [offsets.YearBegin(2), offsets.MonthBegin(1), + offsets.Minute(), np.timedelta64(365, 'D'), + timedelta(365)]: + self.assertIs(p + o, tslib.NaT) + + if isinstance(o, np.timedelta64): + with tm.assertRaises(TypeError): + o + p + else: + self.assertIs(o + p, tslib.NaT) + + # freq is Tick + for freq in ['D', '2D', '3D']: + p = Period('NaT', freq=freq) + for o in [offsets.Day(5), offsets.Hour(24), np.timedelta64(2, 'D'), + np.timedelta64(3600 * 24, 's'), timedelta(-2), + timedelta(hours=48)]: + self.assertIs(p + o, tslib.NaT) + + if isinstance(o, np.timedelta64): + with tm.assertRaises(TypeError): + o + p + else: + self.assertIs(o + p, tslib.NaT) + + for o in [offsets.YearBegin(2), offsets.MonthBegin(1), + offsets.Minute(), np.timedelta64(4, 'h'), + timedelta(hours=23)]: + self.assertIs(p + o, tslib.NaT) + + if isinstance(o, np.timedelta64): + with tm.assertRaises(TypeError): + o + p + else: + self.assertIs(o + p, tslib.NaT) + + for freq in ['H', '2H', '3H']: + p = Period('NaT', freq=freq) + for o in [offsets.Day(2), offsets.Hour(3), np.timedelta64(3, 'h'), + np.timedelta64(3600, 's'), timedelta(minutes=120), + timedelta(days=4, minutes=180)]: + self.assertIs(p + o, tslib.NaT) + + if not isinstance(o, np.timedelta64): + self.assertIs(o + p, tslib.NaT) + + for o in [offsets.YearBegin(2), offsets.MonthBegin(1), + offsets.Minute(), np.timedelta64(3200, 's'), + timedelta(hours=23, minutes=30)]: + self.assertIs(p + o, tslib.NaT) + + if isinstance(o, np.timedelta64): + with tm.assertRaises(TypeError): + o + p + else: + self.assertIs(o + p, tslib.NaT) + + def test_sub_pdnat(self): + # GH 13071 + p = pd.Period('2011-01', freq='M') + self.assertIs(p - pd.NaT, pd.NaT) + self.assertIs(pd.NaT - p, pd.NaT) + + p = pd.Period('NaT', freq='M') + self.assertIs(p - pd.NaT, pd.NaT) + self.assertIs(pd.NaT - p, pd.NaT) + + def test_sub_offset(self): + # freq is DateOffset + for freq in ['A', '2A', '3A']: + p = Period('2011', freq=freq) + self.assertEqual(p - offsets.YearEnd(2), Period('2009', freq=freq)) + + for o in [offsets.YearBegin(2), offsets.MonthBegin(1), + offsets.Minute(), np.timedelta64(365, 'D'), + timedelta(365)]: + with tm.assertRaises(period.IncompatibleFrequency): + p - o + + for freq in ['M', '2M', '3M']: + p = Period('2011-03', freq=freq) + self.assertEqual(p - offsets.MonthEnd(2), + Period('2011-01', freq=freq)) + self.assertEqual(p - offsets.MonthEnd(12), + Period('2010-03', freq=freq)) + + for o in [offsets.YearBegin(2), offsets.MonthBegin(1), + offsets.Minute(), np.timedelta64(365, 'D'), + timedelta(365)]: + with tm.assertRaises(period.IncompatibleFrequency): + p - o + + # freq is Tick + for freq in ['D', '2D', '3D']: + p = Period('2011-04-01', freq=freq) + self.assertEqual(p - offsets.Day(5), + Period('2011-03-27', freq=freq)) + self.assertEqual(p - offsets.Hour(24), + Period('2011-03-31', freq=freq)) + self.assertEqual(p - np.timedelta64(2, 'D'), + Period('2011-03-30', freq=freq)) + self.assertEqual(p - np.timedelta64(3600 * 24, 's'), + Period('2011-03-31', freq=freq)) + self.assertEqual(p - timedelta(-2), + Period('2011-04-03', freq=freq)) + self.assertEqual(p - timedelta(hours=48), + Period('2011-03-30', freq=freq)) + + for o in [offsets.YearBegin(2), offsets.MonthBegin(1), + offsets.Minute(), np.timedelta64(4, 'h'), + timedelta(hours=23)]: + with tm.assertRaises(period.IncompatibleFrequency): + p - o + + for freq in ['H', '2H', '3H']: + p = Period('2011-04-01 09:00', freq=freq) + self.assertEqual(p - offsets.Day(2), + Period('2011-03-30 09:00', freq=freq)) + self.assertEqual(p - offsets.Hour(3), + Period('2011-04-01 06:00', freq=freq)) + self.assertEqual(p - np.timedelta64(3, 'h'), + Period('2011-04-01 06:00', freq=freq)) + self.assertEqual(p - np.timedelta64(3600, 's'), + Period('2011-04-01 08:00', freq=freq)) + self.assertEqual(p - timedelta(minutes=120), + Period('2011-04-01 07:00', freq=freq)) + self.assertEqual(p - timedelta(days=4, minutes=180), + Period('2011-03-28 06:00', freq=freq)) + + for o in [offsets.YearBegin(2), offsets.MonthBegin(1), + offsets.Minute(), np.timedelta64(3200, 's'), + timedelta(hours=23, minutes=30)]: + with tm.assertRaises(period.IncompatibleFrequency): + p - o + + def test_sub_offset_nat(self): + # freq is DateOffset + for freq in ['A', '2A', '3A']: + p = Period('NaT', freq=freq) + for o in [offsets.YearEnd(2)]: + self.assertIs(p - o, tslib.NaT) + + for o in [offsets.YearBegin(2), offsets.MonthBegin(1), + offsets.Minute(), np.timedelta64(365, 'D'), + timedelta(365)]: + self.assertIs(p - o, tslib.NaT) + + for freq in ['M', '2M', '3M']: + p = Period('NaT', freq=freq) + for o in [offsets.MonthEnd(2), offsets.MonthEnd(12)]: + self.assertIs(p - o, tslib.NaT) + + for o in [offsets.YearBegin(2), offsets.MonthBegin(1), + offsets.Minute(), np.timedelta64(365, 'D'), + timedelta(365)]: + self.assertIs(p - o, tslib.NaT) + + # freq is Tick + for freq in ['D', '2D', '3D']: + p = Period('NaT', freq=freq) + for o in [offsets.Day(5), offsets.Hour(24), np.timedelta64(2, 'D'), + np.timedelta64(3600 * 24, 's'), timedelta(-2), + timedelta(hours=48)]: + self.assertIs(p - o, tslib.NaT) + + for o in [offsets.YearBegin(2), offsets.MonthBegin(1), + offsets.Minute(), np.timedelta64(4, 'h'), + timedelta(hours=23)]: + self.assertIs(p - o, tslib.NaT) + + for freq in ['H', '2H', '3H']: + p = Period('NaT', freq=freq) + for o in [offsets.Day(2), offsets.Hour(3), np.timedelta64(3, 'h'), + np.timedelta64(3600, 's'), timedelta(minutes=120), + timedelta(days=4, minutes=180)]: + self.assertIs(p - o, tslib.NaT) + + for o in [offsets.YearBegin(2), offsets.MonthBegin(1), + offsets.Minute(), np.timedelta64(3200, 's'), + timedelta(hours=23, minutes=30)]: + self.assertIs(p - o, tslib.NaT) + + def test_nat_ops(self): + for freq in ['M', '2M', '3M']: + p = Period('NaT', freq=freq) + self.assertIs(p + 1, tslib.NaT) + self.assertIs(1 + p, tslib.NaT) + self.assertIs(p - 1, tslib.NaT) + self.assertIs(p - Period('2011-01', freq=freq), tslib.NaT) + self.assertIs(Period('2011-01', freq=freq) - p, tslib.NaT) + + def test_period_ops_offset(self): + p = Period('2011-04-01', freq='D') + result = p + offsets.Day() + exp = pd.Period('2011-04-02', freq='D') + self.assertEqual(result, exp) + + result = p - offsets.Day(2) + exp = pd.Period('2011-03-30', freq='D') + self.assertEqual(result, exp) + + msg = r"Input cannot be converted to Period\(freq=D\)" + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + p + offsets.Hour(2) + + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + p - offsets.Hour(2) diff --git a/pandas/tests/series/test_period.py b/pandas/tests/series/test_period.py new file mode 100644 index 0000000000000..f1ae7765648ca --- /dev/null +++ b/pandas/tests/series/test_period.py @@ -0,0 +1,248 @@ +import numpy as np + +import pandas as pd +import pandas.util.testing as tm +import pandas.tseries.period as period +from pandas import Series, period_range, DataFrame, Period + + +def _permute(obj): + return obj.take(np.random.permutation(len(obj))) + + +class TestSeriesPeriod(tm.TestCase): + + def setUp(self): + self.series = Series(period_range('2000-01-01', periods=10, freq='D')) + + def test_auto_conversion(self): + series = Series(list(period_range('2000-01-01', periods=10, freq='D'))) + self.assertEqual(series.dtype, 'object') + + series = pd.Series([pd.Period('2011-01-01', freq='D'), + pd.Period('2011-02-01', freq='D')]) + self.assertEqual(series.dtype, 'object') + + def test_getitem(self): + self.assertEqual(self.series[1], pd.Period('2000-01-02', freq='D')) + + result = self.series[[2, 4]] + exp = pd.Series([pd.Period('2000-01-03', freq='D'), + pd.Period('2000-01-05', freq='D')], + index=[2, 4]) + self.assert_series_equal(result, exp) + self.assertEqual(result.dtype, 'object') + + def test_isnull(self): + # GH 13737 + s = Series([pd.Period('2011-01', freq='M'), + pd.Period('NaT', freq='M')]) + tm.assert_series_equal(s.isnull(), Series([False, True])) + tm.assert_series_equal(s.notnull(), Series([True, False])) + + def test_fillna(self): + # GH 13737 + s = Series([pd.Period('2011-01', freq='M'), + pd.Period('NaT', freq='M')]) + + res = s.fillna(pd.Period('2012-01', freq='M')) + exp = Series([pd.Period('2011-01', freq='M'), + pd.Period('2012-01', freq='M')]) + tm.assert_series_equal(res, exp) + self.assertEqual(res.dtype, 'object') + + res = s.fillna('XXX') + exp = Series([pd.Period('2011-01', freq='M'), 'XXX']) + tm.assert_series_equal(res, exp) + self.assertEqual(res.dtype, 'object') + + def test_dropna(self): + # GH 13737 + s = Series([pd.Period('2011-01', freq='M'), + pd.Period('NaT', freq='M')]) + tm.assert_series_equal(s.dropna(), + Series([pd.Period('2011-01', freq='M')])) + + def test_series_comparison_scalars(self): + val = pd.Period('2000-01-04', freq='D') + result = self.series > val + expected = pd.Series([x > val for x in self.series]) + tm.assert_series_equal(result, expected) + + val = self.series[5] + result = self.series > val + expected = pd.Series([x > val for x in self.series]) + tm.assert_series_equal(result, expected) + + def test_between(self): + left, right = self.series[[2, 7]] + result = self.series.between(left, right) + expected = (self.series >= left) & (self.series <= right) + tm.assert_series_equal(result, expected) + + # --------------------------------------------------------------------- + # NaT support + + """ + # ToDo: Enable when support period dtype + def test_NaT_scalar(self): + series = Series([0, 1000, 2000, iNaT], dtype='period[D]') + + val = series[3] + self.assertTrue(isnull(val)) + + series[2] = val + self.assertTrue(isnull(series[2])) + + def test_NaT_cast(self): + result = Series([np.nan]).astype('period[D]') + expected = Series([NaT]) + tm.assert_series_equal(result, expected) + """ + + def test_set_none_nan(self): + # currently Period is stored as object dtype, not as NaT + self.series[3] = None + self.assertIs(self.series[3], None) + + self.series[3:5] = None + self.assertIs(self.series[4], None) + + self.series[5] = np.nan + self.assertTrue(np.isnan(self.series[5])) + + self.series[5:7] = np.nan + self.assertTrue(np.isnan(self.series[6])) + + def test_intercept_astype_object(self): + expected = self.series.astype('object') + + df = DataFrame({'a': self.series, + 'b': np.random.randn(len(self.series))}) + + result = df.values.squeeze() + self.assertTrue((result[:, 0] == expected.values).all()) + + df = DataFrame({'a': self.series, 'b': ['foo'] * len(self.series)}) + + result = df.values.squeeze() + self.assertTrue((result[:, 0] == expected.values).all()) + + def test_comp_series_period_scalar(self): + # GH 13200 + for freq in ['M', '2M', '3M']: + base = Series([Period(x, freq=freq) for x in + ['2011-01', '2011-02', '2011-03', '2011-04']]) + p = Period('2011-02', freq=freq) + + exp = pd.Series([False, True, False, False]) + tm.assert_series_equal(base == p, exp) + tm.assert_series_equal(p == base, exp) + + exp = pd.Series([True, False, True, True]) + tm.assert_series_equal(base != p, exp) + tm.assert_series_equal(p != base, exp) + + exp = pd.Series([False, False, True, True]) + tm.assert_series_equal(base > p, exp) + tm.assert_series_equal(p < base, exp) + + exp = pd.Series([True, False, False, False]) + tm.assert_series_equal(base < p, exp) + tm.assert_series_equal(p > base, exp) + + exp = pd.Series([False, True, True, True]) + tm.assert_series_equal(base >= p, exp) + tm.assert_series_equal(p <= base, exp) + + exp = pd.Series([True, True, False, False]) + tm.assert_series_equal(base <= p, exp) + tm.assert_series_equal(p >= base, exp) + + # different base freq + msg = "Input has different freq=A-DEC from Period" + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + base <= Period('2011', freq='A') + + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + Period('2011', freq='A') >= base + + def test_comp_series_period_series(self): + # GH 13200 + for freq in ['M', '2M', '3M']: + base = Series([Period(x, freq=freq) for x in + ['2011-01', '2011-02', '2011-03', '2011-04']]) + + s = Series([Period(x, freq=freq) for x in + ['2011-02', '2011-01', '2011-03', '2011-05']]) + + exp = Series([False, False, True, False]) + tm.assert_series_equal(base == s, exp) + + exp = Series([True, True, False, True]) + tm.assert_series_equal(base != s, exp) + + exp = Series([False, True, False, False]) + tm.assert_series_equal(base > s, exp) + + exp = Series([True, False, False, True]) + tm.assert_series_equal(base < s, exp) + + exp = Series([False, True, True, False]) + tm.assert_series_equal(base >= s, exp) + + exp = Series([True, False, True, True]) + tm.assert_series_equal(base <= s, exp) + + s2 = Series([Period(x, freq='A') for x in + ['2011', '2011', '2011', '2011']]) + + # different base freq + msg = "Input has different freq=A-DEC from Period" + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + base <= s2 + + def test_comp_series_period_object(self): + # GH 13200 + base = Series([Period('2011', freq='A'), Period('2011-02', freq='M'), + Period('2013', freq='A'), Period('2011-04', freq='M')]) + + s = Series([Period('2012', freq='A'), Period('2011-01', freq='M'), + Period('2013', freq='A'), Period('2011-05', freq='M')]) + + exp = Series([False, False, True, False]) + tm.assert_series_equal(base == s, exp) + + exp = Series([True, True, False, True]) + tm.assert_series_equal(base != s, exp) + + exp = Series([False, True, False, False]) + tm.assert_series_equal(base > s, exp) + + exp = Series([True, False, False, True]) + tm.assert_series_equal(base < s, exp) + + exp = Series([False, True, True, False]) + tm.assert_series_equal(base >= s, exp) + + exp = Series([True, False, True, True]) + tm.assert_series_equal(base <= s, exp) + + def test_align_series(self): + rng = period_range('1/1/2000', '1/1/2010', freq='A') + ts = Series(np.random.randn(len(rng)), index=rng) + + result = ts + ts[::2] + expected = ts + ts + expected[1::2] = np.nan + tm.assert_series_equal(result, expected) + + result = ts + _permute(ts[::2]) + tm.assert_series_equal(result, expected) + + # it works! + for kind in ['inner', 'outer', 'left', 'right']: + ts.align(ts[::2], join=kind) + msg = "Input has different freq=D from PeriodIndex\\(freq=A-DEC\\)" + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + ts + ts.asfreq('D', how="end") diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py deleted file mode 100644 index 3459da9d2b5c5..0000000000000 --- a/pandas/tseries/tests/test_period.py +++ /dev/null @@ -1,5065 +0,0 @@ -"""Tests suite for Period handling. - -Parts derived from scikits.timeseries code, original authors: -- Pierre Gerard-Marchant & Matt Knox -- pierregm_at_uga_dot_edu - mattknow_ca_at_hotmail_dot_com - -""" - -import numpy as np -from numpy.random import randn -from datetime import datetime, date, timedelta - -import pandas as pd -import pandas.util.testing as tm -import pandas.tseries.period as period -import pandas.tseries.offsets as offsets -from pandas.tseries.tools import to_datetime -from pandas.tseries.period import Period, PeriodIndex, period_range -from pandas.tseries.index import DatetimeIndex, date_range, Index -from pandas._period import period_ordinal, period_asfreq -from pandas.compat import range, lrange, lmap, zip, text_type, PY3, iteritems -from pandas.compat.numpy import np_datetime64_compat -from pandas.tseries.frequencies import (MONTHS, DAYS, _period_code_map, - get_freq) -from pandas import (Series, DataFrame, Timestamp, _period, tslib, - _np_version_under1p9, _np_version_under1p10, - _np_version_under1p12) - - -class TestPeriodProperties(tm.TestCase): - "Test properties such as year, month, weekday, etc...." - - def test_quarterly_negative_ordinals(self): - p = Period(ordinal=-1, freq='Q-DEC') - self.assertEqual(p.year, 1969) - self.assertEqual(p.quarter, 4) - self.assertIsInstance(p, Period) - - p = Period(ordinal=-2, freq='Q-DEC') - self.assertEqual(p.year, 1969) - self.assertEqual(p.quarter, 3) - self.assertIsInstance(p, Period) - - p = Period(ordinal=-2, freq='M') - self.assertEqual(p.year, 1969) - self.assertEqual(p.month, 11) - self.assertIsInstance(p, Period) - - def test_period_cons_quarterly(self): - # bugs in scikits.timeseries - for month in MONTHS: - freq = 'Q-%s' % month - exp = Period('1989Q3', freq=freq) - self.assertIn('1989Q3', str(exp)) - stamp = exp.to_timestamp('D', how='end') - p = Period(stamp, freq=freq) - self.assertEqual(p, exp) - - stamp = exp.to_timestamp('3D', how='end') - p = Period(stamp, freq=freq) - self.assertEqual(p, exp) - - def test_period_cons_annual(self): - # bugs in scikits.timeseries - for month in MONTHS: - freq = 'A-%s' % month - exp = Period('1989', freq=freq) - stamp = exp.to_timestamp('D', how='end') + timedelta(days=30) - p = Period(stamp, freq=freq) - self.assertEqual(p, exp + 1) - self.assertIsInstance(p, Period) - - def test_period_cons_weekly(self): - for num in range(10, 17): - daystr = '2011-02-%d' % num - for day in DAYS: - freq = 'W-%s' % day - - result = Period(daystr, freq=freq) - expected = Period(daystr, freq='D').asfreq(freq) - self.assertEqual(result, expected) - self.assertIsInstance(result, Period) - - def test_period_from_ordinal(self): - p = pd.Period('2011-01', freq='M') - res = pd.Period._from_ordinal(p.ordinal, freq='M') - self.assertEqual(p, res) - self.assertIsInstance(res, Period) - - def test_period_cons_nat(self): - p = Period('NaT', freq='M') - self.assertIs(p, pd.NaT) - - p = Period('nat', freq='W-SUN') - self.assertIs(p, pd.NaT) - - p = Period(tslib.iNaT, freq='D') - self.assertIs(p, pd.NaT) - - p = Period(tslib.iNaT, freq='3D') - self.assertIs(p, pd.NaT) - - p = Period(tslib.iNaT, freq='1D1H') - self.assertIs(p, pd.NaT) - - p = Period('NaT') - self.assertIs(p, pd.NaT) - - p = Period(tslib.iNaT) - self.assertIs(p, pd.NaT) - - def test_cons_null_like(self): - # check Timestamp compat - self.assertIs(Timestamp('NaT'), pd.NaT) - self.assertIs(Period('NaT'), pd.NaT) - - self.assertIs(Timestamp(None), pd.NaT) - self.assertIs(Period(None), pd.NaT) - - self.assertIs(Timestamp(float('nan')), pd.NaT) - self.assertIs(Period(float('nan')), pd.NaT) - - self.assertIs(Timestamp(np.nan), pd.NaT) - self.assertIs(Period(np.nan), pd.NaT) - - def test_period_cons_mult(self): - p1 = Period('2011-01', freq='3M') - p2 = Period('2011-01', freq='M') - self.assertEqual(p1.ordinal, p2.ordinal) - - self.assertEqual(p1.freq, offsets.MonthEnd(3)) - self.assertEqual(p1.freqstr, '3M') - - self.assertEqual(p2.freq, offsets.MonthEnd()) - self.assertEqual(p2.freqstr, 'M') - - result = p1 + 1 - self.assertEqual(result.ordinal, (p2 + 3).ordinal) - self.assertEqual(result.freq, p1.freq) - self.assertEqual(result.freqstr, '3M') - - result = p1 - 1 - self.assertEqual(result.ordinal, (p2 - 3).ordinal) - self.assertEqual(result.freq, p1.freq) - self.assertEqual(result.freqstr, '3M') - - msg = ('Frequency must be positive, because it' - ' represents span: -3M') - with tm.assertRaisesRegexp(ValueError, msg): - Period('2011-01', freq='-3M') - - msg = ('Frequency must be positive, because it' ' represents span: 0M') - with tm.assertRaisesRegexp(ValueError, msg): - Period('2011-01', freq='0M') - - def test_period_cons_combined(self): - p = [(Period('2011-01', freq='1D1H'), - Period('2011-01', freq='1H1D'), - Period('2011-01', freq='H')), - (Period(ordinal=1, freq='1D1H'), - Period(ordinal=1, freq='1H1D'), - Period(ordinal=1, freq='H'))] - - for p1, p2, p3 in p: - self.assertEqual(p1.ordinal, p3.ordinal) - self.assertEqual(p2.ordinal, p3.ordinal) - - self.assertEqual(p1.freq, offsets.Hour(25)) - self.assertEqual(p1.freqstr, '25H') - - self.assertEqual(p2.freq, offsets.Hour(25)) - self.assertEqual(p2.freqstr, '25H') - - self.assertEqual(p3.freq, offsets.Hour()) - self.assertEqual(p3.freqstr, 'H') - - result = p1 + 1 - self.assertEqual(result.ordinal, (p3 + 25).ordinal) - self.assertEqual(result.freq, p1.freq) - self.assertEqual(result.freqstr, '25H') - - result = p2 + 1 - self.assertEqual(result.ordinal, (p3 + 25).ordinal) - self.assertEqual(result.freq, p2.freq) - self.assertEqual(result.freqstr, '25H') - - result = p1 - 1 - self.assertEqual(result.ordinal, (p3 - 25).ordinal) - self.assertEqual(result.freq, p1.freq) - self.assertEqual(result.freqstr, '25H') - - result = p2 - 1 - self.assertEqual(result.ordinal, (p3 - 25).ordinal) - self.assertEqual(result.freq, p2.freq) - self.assertEqual(result.freqstr, '25H') - - msg = ('Frequency must be positive, because it' - ' represents span: -25H') - with tm.assertRaisesRegexp(ValueError, msg): - Period('2011-01', freq='-1D1H') - with tm.assertRaisesRegexp(ValueError, msg): - Period('2011-01', freq='-1H1D') - with tm.assertRaisesRegexp(ValueError, msg): - Period(ordinal=1, freq='-1D1H') - with tm.assertRaisesRegexp(ValueError, msg): - Period(ordinal=1, freq='-1H1D') - - msg = ('Frequency must be positive, because it' - ' represents span: 0D') - with tm.assertRaisesRegexp(ValueError, msg): - Period('2011-01', freq='0D0H') - with tm.assertRaisesRegexp(ValueError, msg): - Period(ordinal=1, freq='0D0H') - - # You can only combine together day and intraday offsets - msg = ('Invalid frequency: 1W1D') - with tm.assertRaisesRegexp(ValueError, msg): - Period('2011-01', freq='1W1D') - msg = ('Invalid frequency: 1D1W') - with tm.assertRaisesRegexp(ValueError, msg): - Period('2011-01', freq='1D1W') - - def test_timestamp_tz_arg(self): - tm._skip_if_no_pytz() - import pytz - for case in ['Europe/Brussels', 'Asia/Tokyo', 'US/Pacific']: - p = Period('1/1/2005', freq='M').to_timestamp(tz=case) - exp = Timestamp('1/1/2005', tz='UTC').tz_convert(case) - exp_zone = pytz.timezone(case).normalize(p) - - self.assertEqual(p, exp) - self.assertEqual(p.tz, exp_zone.tzinfo) - self.assertEqual(p.tz, exp.tz) - - p = Period('1/1/2005', freq='3H').to_timestamp(tz=case) - exp = Timestamp('1/1/2005', tz='UTC').tz_convert(case) - exp_zone = pytz.timezone(case).normalize(p) - - self.assertEqual(p, exp) - self.assertEqual(p.tz, exp_zone.tzinfo) - self.assertEqual(p.tz, exp.tz) - - p = Period('1/1/2005', freq='A').to_timestamp(freq='A', tz=case) - exp = Timestamp('31/12/2005', tz='UTC').tz_convert(case) - exp_zone = pytz.timezone(case).normalize(p) - - self.assertEqual(p, exp) - self.assertEqual(p.tz, exp_zone.tzinfo) - self.assertEqual(p.tz, exp.tz) - - p = Period('1/1/2005', freq='A').to_timestamp(freq='3H', tz=case) - exp = Timestamp('1/1/2005', tz='UTC').tz_convert(case) - exp_zone = pytz.timezone(case).normalize(p) - - self.assertEqual(p, exp) - self.assertEqual(p.tz, exp_zone.tzinfo) - self.assertEqual(p.tz, exp.tz) - - def test_timestamp_tz_arg_dateutil(self): - from pandas.tslib import _dateutil_gettz as gettz - from pandas.tslib import maybe_get_tz - for case in ['dateutil/Europe/Brussels', 'dateutil/Asia/Tokyo', - 'dateutil/US/Pacific']: - p = Period('1/1/2005', freq='M').to_timestamp( - tz=maybe_get_tz(case)) - exp = Timestamp('1/1/2005', tz='UTC').tz_convert(case) - self.assertEqual(p, exp) - self.assertEqual(p.tz, gettz(case.split('/', 1)[1])) - self.assertEqual(p.tz, exp.tz) - - p = Period('1/1/2005', - freq='M').to_timestamp(freq='3H', tz=maybe_get_tz(case)) - exp = Timestamp('1/1/2005', tz='UTC').tz_convert(case) - self.assertEqual(p, exp) - self.assertEqual(p.tz, gettz(case.split('/', 1)[1])) - self.assertEqual(p.tz, exp.tz) - - def test_timestamp_tz_arg_dateutil_from_string(self): - from pandas.tslib import _dateutil_gettz as gettz - p = Period('1/1/2005', - freq='M').to_timestamp(tz='dateutil/Europe/Brussels') - self.assertEqual(p.tz, gettz('Europe/Brussels')) - - def test_timestamp_mult(self): - p = pd.Period('2011-01', freq='M') - self.assertEqual(p.to_timestamp(how='S'), pd.Timestamp('2011-01-01')) - self.assertEqual(p.to_timestamp(how='E'), pd.Timestamp('2011-01-31')) - - p = pd.Period('2011-01', freq='3M') - self.assertEqual(p.to_timestamp(how='S'), pd.Timestamp('2011-01-01')) - self.assertEqual(p.to_timestamp(how='E'), pd.Timestamp('2011-03-31')) - - def test_period_constructor(self): - i1 = Period('1/1/2005', freq='M') - i2 = Period('Jan 2005') - - self.assertEqual(i1, i2) - - i1 = Period('2005', freq='A') - i2 = Period('2005') - i3 = Period('2005', freq='a') - - self.assertEqual(i1, i2) - self.assertEqual(i1, i3) - - i4 = Period('2005', freq='M') - i5 = Period('2005', freq='m') - - self.assertRaises(ValueError, i1.__ne__, i4) - self.assertEqual(i4, i5) - - i1 = Period.now('Q') - i2 = Period(datetime.now(), freq='Q') - i3 = Period.now('q') - - self.assertEqual(i1, i2) - self.assertEqual(i1, i3) - - # Biz day construction, roll forward if non-weekday - i1 = Period('3/10/12', freq='B') - i2 = Period('3/10/12', freq='D') - self.assertEqual(i1, i2.asfreq('B')) - i2 = Period('3/11/12', freq='D') - self.assertEqual(i1, i2.asfreq('B')) - i2 = Period('3/12/12', freq='D') - self.assertEqual(i1, i2.asfreq('B')) - - i3 = Period('3/10/12', freq='b') - self.assertEqual(i1, i3) - - i1 = Period(year=2005, quarter=1, freq='Q') - i2 = Period('1/1/2005', freq='Q') - self.assertEqual(i1, i2) - - i1 = Period(year=2005, quarter=3, freq='Q') - i2 = Period('9/1/2005', freq='Q') - self.assertEqual(i1, i2) - - i1 = Period(year=2005, month=3, day=1, freq='D') - i2 = Period('3/1/2005', freq='D') - self.assertEqual(i1, i2) - - i3 = Period(year=2005, month=3, day=1, freq='d') - self.assertEqual(i1, i3) - - i1 = Period(year=2012, month=3, day=10, freq='B') - i2 = Period('3/12/12', freq='B') - self.assertEqual(i1, i2) - - i1 = Period('2005Q1') - i2 = Period(year=2005, quarter=1, freq='Q') - i3 = Period('2005q1') - self.assertEqual(i1, i2) - self.assertEqual(i1, i3) - - i1 = Period('05Q1') - self.assertEqual(i1, i2) - lower = Period('05q1') - self.assertEqual(i1, lower) - - i1 = Period('1Q2005') - self.assertEqual(i1, i2) - lower = Period('1q2005') - self.assertEqual(i1, lower) - - i1 = Period('1Q05') - self.assertEqual(i1, i2) - lower = Period('1q05') - self.assertEqual(i1, lower) - - i1 = Period('4Q1984') - self.assertEqual(i1.year, 1984) - lower = Period('4q1984') - self.assertEqual(i1, lower) - - i1 = Period('1982', freq='min') - i2 = Period('1982', freq='MIN') - self.assertEqual(i1, i2) - i2 = Period('1982', freq=('Min', 1)) - self.assertEqual(i1, i2) - - expected = Period('2007-01', freq='M') - i1 = Period('200701', freq='M') - self.assertEqual(i1, expected) - - i1 = Period('200701', freq='M') - self.assertEqual(i1, expected) - - i1 = Period(200701, freq='M') - self.assertEqual(i1, expected) - - i1 = Period(ordinal=200701, freq='M') - self.assertEqual(i1.year, 18695) - - i1 = Period(datetime(2007, 1, 1), freq='M') - i2 = Period('200701', freq='M') - self.assertEqual(i1, i2) - - i1 = Period(date(2007, 1, 1), freq='M') - i2 = Period(datetime(2007, 1, 1), freq='M') - i3 = Period(np.datetime64('2007-01-01'), freq='M') - i4 = Period(np_datetime64_compat('2007-01-01 00:00:00Z'), freq='M') - i5 = Period(np_datetime64_compat('2007-01-01 00:00:00.000Z'), freq='M') - self.assertEqual(i1, i2) - self.assertEqual(i1, i3) - self.assertEqual(i1, i4) - self.assertEqual(i1, i5) - - i1 = Period('2007-01-01 09:00:00.001') - expected = Period(datetime(2007, 1, 1, 9, 0, 0, 1000), freq='L') - self.assertEqual(i1, expected) - - expected = Period(np_datetime64_compat( - '2007-01-01 09:00:00.001Z'), freq='L') - self.assertEqual(i1, expected) - - i1 = Period('2007-01-01 09:00:00.00101') - expected = Period(datetime(2007, 1, 1, 9, 0, 0, 1010), freq='U') - self.assertEqual(i1, expected) - - expected = Period(np_datetime64_compat('2007-01-01 09:00:00.00101Z'), - freq='U') - self.assertEqual(i1, expected) - - self.assertRaises(ValueError, Period, ordinal=200701) - - self.assertRaises(ValueError, Period, '2007-1-1', freq='X') - - def test_period_constructor_offsets(self): - self.assertEqual(Period('1/1/2005', freq=offsets.MonthEnd()), - Period('1/1/2005', freq='M')) - self.assertEqual(Period('2005', freq=offsets.YearEnd()), - Period('2005', freq='A')) - self.assertEqual(Period('2005', freq=offsets.MonthEnd()), - Period('2005', freq='M')) - self.assertEqual(Period('3/10/12', freq=offsets.BusinessDay()), - Period('3/10/12', freq='B')) - self.assertEqual(Period('3/10/12', freq=offsets.Day()), - Period('3/10/12', freq='D')) - - self.assertEqual(Period(year=2005, quarter=1, - freq=offsets.QuarterEnd(startingMonth=12)), - Period(year=2005, quarter=1, freq='Q')) - self.assertEqual(Period(year=2005, quarter=2, - freq=offsets.QuarterEnd(startingMonth=12)), - Period(year=2005, quarter=2, freq='Q')) - - self.assertEqual(Period(year=2005, month=3, day=1, freq=offsets.Day()), - Period(year=2005, month=3, day=1, freq='D')) - self.assertEqual(Period(year=2012, month=3, day=10, - freq=offsets.BDay()), - Period(year=2012, month=3, day=10, freq='B')) - - expected = Period('2005-03-01', freq='3D') - self.assertEqual(Period(year=2005, month=3, day=1, - freq=offsets.Day(3)), expected) - self.assertEqual(Period(year=2005, month=3, day=1, freq='3D'), - expected) - - self.assertEqual(Period(year=2012, month=3, day=10, - freq=offsets.BDay(3)), - Period(year=2012, month=3, day=10, freq='3B')) - - self.assertEqual(Period(200701, freq=offsets.MonthEnd()), - Period(200701, freq='M')) - - i1 = Period(ordinal=200701, freq=offsets.MonthEnd()) - i2 = Period(ordinal=200701, freq='M') - self.assertEqual(i1, i2) - self.assertEqual(i1.year, 18695) - self.assertEqual(i2.year, 18695) - - i1 = Period(datetime(2007, 1, 1), freq='M') - i2 = Period('200701', freq='M') - self.assertEqual(i1, i2) - - i1 = Period(date(2007, 1, 1), freq='M') - i2 = Period(datetime(2007, 1, 1), freq='M') - i3 = Period(np.datetime64('2007-01-01'), freq='M') - i4 = Period(np_datetime64_compat('2007-01-01 00:00:00Z'), freq='M') - i5 = Period(np_datetime64_compat('2007-01-01 00:00:00.000Z'), freq='M') - self.assertEqual(i1, i2) - self.assertEqual(i1, i3) - self.assertEqual(i1, i4) - self.assertEqual(i1, i5) - - i1 = Period('2007-01-01 09:00:00.001') - expected = Period(datetime(2007, 1, 1, 9, 0, 0, 1000), freq='L') - self.assertEqual(i1, expected) - - expected = Period(np_datetime64_compat( - '2007-01-01 09:00:00.001Z'), freq='L') - self.assertEqual(i1, expected) - - i1 = Period('2007-01-01 09:00:00.00101') - expected = Period(datetime(2007, 1, 1, 9, 0, 0, 1010), freq='U') - self.assertEqual(i1, expected) - - expected = Period(np_datetime64_compat('2007-01-01 09:00:00.00101Z'), - freq='U') - self.assertEqual(i1, expected) - - self.assertRaises(ValueError, Period, ordinal=200701) - - self.assertRaises(ValueError, Period, '2007-1-1', freq='X') - - def test_freq_str(self): - i1 = Period('1982', freq='Min') - self.assertEqual(i1.freq, offsets.Minute()) - self.assertEqual(i1.freqstr, 'T') - - def test_period_deprecated_freq(self): - cases = {"M": ["MTH", "MONTH", "MONTHLY", "Mth", "month", "monthly"], - "B": ["BUS", "BUSINESS", "BUSINESSLY", "WEEKDAY", "bus"], - "D": ["DAY", "DLY", "DAILY", "Day", "Dly", "Daily"], - "H": ["HR", "HOUR", "HRLY", "HOURLY", "hr", "Hour", "HRly"], - "T": ["minute", "MINUTE", "MINUTELY", "minutely"], - "S": ["sec", "SEC", "SECOND", "SECONDLY", "second"], - "L": ["MILLISECOND", "MILLISECONDLY", "millisecond"], - "U": ["MICROSECOND", "MICROSECONDLY", "microsecond"], - "N": ["NANOSECOND", "NANOSECONDLY", "nanosecond"]} - - msg = pd.tseries.frequencies._INVALID_FREQ_ERROR - for exp, freqs in iteritems(cases): - for freq in freqs: - with self.assertRaisesRegexp(ValueError, msg): - Period('2016-03-01 09:00', freq=freq) - with self.assertRaisesRegexp(ValueError, msg): - Period(ordinal=1, freq=freq) - - # check supported freq-aliases still works - p1 = Period('2016-03-01 09:00', freq=exp) - p2 = Period(ordinal=1, freq=exp) - tm.assertIsInstance(p1, Period) - tm.assertIsInstance(p2, Period) - - def test_hash(self): - self.assertEqual(hash(Period('2011-01', freq='M')), - hash(Period('2011-01', freq='M'))) - - self.assertNotEqual(hash(Period('2011-01-01', freq='D')), - hash(Period('2011-01', freq='M'))) - - self.assertNotEqual(hash(Period('2011-01', freq='3M')), - hash(Period('2011-01', freq='2M'))) - - self.assertNotEqual(hash(Period('2011-01', freq='M')), - hash(Period('2011-02', freq='M'))) - - def test_repr(self): - p = Period('Jan-2000') - self.assertIn('2000-01', repr(p)) - - p = Period('2000-12-15') - self.assertIn('2000-12-15', repr(p)) - - def test_repr_nat(self): - p = Period('nat', freq='M') - self.assertIn(repr(tslib.NaT), repr(p)) - - def test_millisecond_repr(self): - p = Period('2000-01-01 12:15:02.123') - - self.assertEqual("Period('2000-01-01 12:15:02.123', 'L')", repr(p)) - - def test_microsecond_repr(self): - p = Period('2000-01-01 12:15:02.123567') - - self.assertEqual("Period('2000-01-01 12:15:02.123567', 'U')", repr(p)) - - def test_strftime(self): - p = Period('2000-1-1 12:34:12', freq='S') - res = p.strftime('%Y-%m-%d %H:%M:%S') - self.assertEqual(res, '2000-01-01 12:34:12') - tm.assertIsInstance(res, text_type) # GH3363 - - def test_sub_delta(self): - left, right = Period('2011', freq='A'), Period('2007', freq='A') - result = left - right - self.assertEqual(result, 4) - - with self.assertRaises(period.IncompatibleFrequency): - left - Period('2007-01', freq='M') - - def test_to_timestamp(self): - p = Period('1982', freq='A') - start_ts = p.to_timestamp(how='S') - aliases = ['s', 'StarT', 'BEGIn'] - for a in aliases: - self.assertEqual(start_ts, p.to_timestamp('D', how=a)) - # freq with mult should not affect to the result - self.assertEqual(start_ts, p.to_timestamp('3D', how=a)) - - end_ts = p.to_timestamp(how='E') - aliases = ['e', 'end', 'FINIsH'] - for a in aliases: - self.assertEqual(end_ts, p.to_timestamp('D', how=a)) - self.assertEqual(end_ts, p.to_timestamp('3D', how=a)) - - from_lst = ['A', 'Q', 'M', 'W', 'B', 'D', 'H', 'Min', 'S'] - - def _ex(p): - return Timestamp((p + 1).start_time.value - 1) - - for i, fcode in enumerate(from_lst): - p = Period('1982', freq=fcode) - result = p.to_timestamp().to_period(fcode) - self.assertEqual(result, p) - - self.assertEqual(p.start_time, p.to_timestamp(how='S')) - - self.assertEqual(p.end_time, _ex(p)) - - # Frequency other than daily - - p = Period('1985', freq='A') - - result = p.to_timestamp('H', how='end') - expected = datetime(1985, 12, 31, 23) - self.assertEqual(result, expected) - result = p.to_timestamp('3H', how='end') - self.assertEqual(result, expected) - - result = p.to_timestamp('T', how='end') - expected = datetime(1985, 12, 31, 23, 59) - self.assertEqual(result, expected) - result = p.to_timestamp('2T', how='end') - self.assertEqual(result, expected) - - result = p.to_timestamp(how='end') - expected = datetime(1985, 12, 31) - self.assertEqual(result, expected) - - expected = datetime(1985, 1, 1) - result = p.to_timestamp('H', how='start') - self.assertEqual(result, expected) - result = p.to_timestamp('T', how='start') - self.assertEqual(result, expected) - result = p.to_timestamp('S', how='start') - self.assertEqual(result, expected) - result = p.to_timestamp('3H', how='start') - self.assertEqual(result, expected) - result = p.to_timestamp('5S', how='start') - self.assertEqual(result, expected) - - def test_start_time(self): - freq_lst = ['A', 'Q', 'M', 'D', 'H', 'T', 'S'] - xp = datetime(2012, 1, 1) - for f in freq_lst: - p = Period('2012', freq=f) - self.assertEqual(p.start_time, xp) - self.assertEqual(Period('2012', freq='B').start_time, - datetime(2012, 1, 2)) - self.assertEqual(Period('2012', freq='W').start_time, - datetime(2011, 12, 26)) - - def test_end_time(self): - p = Period('2012', freq='A') - - def _ex(*args): - return Timestamp(Timestamp(datetime(*args)).value - 1) - - xp = _ex(2013, 1, 1) - self.assertEqual(xp, p.end_time) - - p = Period('2012', freq='Q') - xp = _ex(2012, 4, 1) - self.assertEqual(xp, p.end_time) - - p = Period('2012', freq='M') - xp = _ex(2012, 2, 1) - self.assertEqual(xp, p.end_time) - - p = Period('2012', freq='D') - xp = _ex(2012, 1, 2) - self.assertEqual(xp, p.end_time) - - p = Period('2012', freq='H') - xp = _ex(2012, 1, 1, 1) - self.assertEqual(xp, p.end_time) - - p = Period('2012', freq='B') - xp = _ex(2012, 1, 3) - self.assertEqual(xp, p.end_time) - - p = Period('2012', freq='W') - xp = _ex(2012, 1, 2) - self.assertEqual(xp, p.end_time) - - # Test for GH 11738 - p = Period('2012', freq='15D') - xp = _ex(2012, 1, 16) - self.assertEqual(xp, p.end_time) - - p = Period('2012', freq='1D1H') - xp = _ex(2012, 1, 2, 1) - self.assertEqual(xp, p.end_time) - - p = Period('2012', freq='1H1D') - xp = _ex(2012, 1, 2, 1) - self.assertEqual(xp, p.end_time) - - def test_anchor_week_end_time(self): - def _ex(*args): - return Timestamp(Timestamp(datetime(*args)).value - 1) - - p = Period('2013-1-1', 'W-SAT') - xp = _ex(2013, 1, 6) - self.assertEqual(p.end_time, xp) - - def test_properties_annually(self): - # Test properties on Periods with annually frequency. - a_date = Period(freq='A', year=2007) - self.assertEqual(a_date.year, 2007) - - def test_properties_quarterly(self): - # Test properties on Periods with daily frequency. - qedec_date = Period(freq="Q-DEC", year=2007, quarter=1) - qejan_date = Period(freq="Q-JAN", year=2007, quarter=1) - qejun_date = Period(freq="Q-JUN", year=2007, quarter=1) - # - for x in range(3): - for qd in (qedec_date, qejan_date, qejun_date): - self.assertEqual((qd + x).qyear, 2007) - self.assertEqual((qd + x).quarter, x + 1) - - def test_properties_monthly(self): - # Test properties on Periods with daily frequency. - m_date = Period(freq='M', year=2007, month=1) - for x in range(11): - m_ival_x = m_date + x - self.assertEqual(m_ival_x.year, 2007) - if 1 <= x + 1 <= 3: - self.assertEqual(m_ival_x.quarter, 1) - elif 4 <= x + 1 <= 6: - self.assertEqual(m_ival_x.quarter, 2) - elif 7 <= x + 1 <= 9: - self.assertEqual(m_ival_x.quarter, 3) - elif 10 <= x + 1 <= 12: - self.assertEqual(m_ival_x.quarter, 4) - self.assertEqual(m_ival_x.month, x + 1) - - def test_properties_weekly(self): - # Test properties on Periods with daily frequency. - w_date = Period(freq='W', year=2007, month=1, day=7) - # - self.assertEqual(w_date.year, 2007) - self.assertEqual(w_date.quarter, 1) - self.assertEqual(w_date.month, 1) - self.assertEqual(w_date.week, 1) - self.assertEqual((w_date - 1).week, 52) - self.assertEqual(w_date.days_in_month, 31) - self.assertEqual(Period(freq='W', year=2012, - month=2, day=1).days_in_month, 29) - - def test_properties_weekly_legacy(self): - # Test properties on Periods with daily frequency. - w_date = Period(freq='W', year=2007, month=1, day=7) - self.assertEqual(w_date.year, 2007) - self.assertEqual(w_date.quarter, 1) - self.assertEqual(w_date.month, 1) - self.assertEqual(w_date.week, 1) - self.assertEqual((w_date - 1).week, 52) - self.assertEqual(w_date.days_in_month, 31) - - exp = Period(freq='W', year=2012, month=2, day=1) - self.assertEqual(exp.days_in_month, 29) - - msg = pd.tseries.frequencies._INVALID_FREQ_ERROR - with self.assertRaisesRegexp(ValueError, msg): - Period(freq='WK', year=2007, month=1, day=7) - - def test_properties_daily(self): - # Test properties on Periods with daily frequency. - b_date = Period(freq='B', year=2007, month=1, day=1) - # - self.assertEqual(b_date.year, 2007) - self.assertEqual(b_date.quarter, 1) - self.assertEqual(b_date.month, 1) - self.assertEqual(b_date.day, 1) - self.assertEqual(b_date.weekday, 0) - self.assertEqual(b_date.dayofyear, 1) - self.assertEqual(b_date.days_in_month, 31) - self.assertEqual(Period(freq='B', year=2012, - month=2, day=1).days_in_month, 29) - # - d_date = Period(freq='D', year=2007, month=1, day=1) - # - self.assertEqual(d_date.year, 2007) - self.assertEqual(d_date.quarter, 1) - self.assertEqual(d_date.month, 1) - self.assertEqual(d_date.day, 1) - self.assertEqual(d_date.weekday, 0) - self.assertEqual(d_date.dayofyear, 1) - self.assertEqual(d_date.days_in_month, 31) - self.assertEqual(Period(freq='D', year=2012, month=2, - day=1).days_in_month, 29) - - def test_properties_hourly(self): - # Test properties on Periods with hourly frequency. - h_date1 = Period(freq='H', year=2007, month=1, day=1, hour=0) - h_date2 = Period(freq='2H', year=2007, month=1, day=1, hour=0) - - for h_date in [h_date1, h_date2]: - self.assertEqual(h_date.year, 2007) - self.assertEqual(h_date.quarter, 1) - self.assertEqual(h_date.month, 1) - self.assertEqual(h_date.day, 1) - self.assertEqual(h_date.weekday, 0) - self.assertEqual(h_date.dayofyear, 1) - self.assertEqual(h_date.hour, 0) - self.assertEqual(h_date.days_in_month, 31) - self.assertEqual(Period(freq='H', year=2012, month=2, day=1, - hour=0).days_in_month, 29) - - def test_properties_minutely(self): - # Test properties on Periods with minutely frequency. - t_date = Period(freq='Min', year=2007, month=1, day=1, hour=0, - minute=0) - # - self.assertEqual(t_date.quarter, 1) - self.assertEqual(t_date.month, 1) - self.assertEqual(t_date.day, 1) - self.assertEqual(t_date.weekday, 0) - self.assertEqual(t_date.dayofyear, 1) - self.assertEqual(t_date.hour, 0) - self.assertEqual(t_date.minute, 0) - self.assertEqual(t_date.days_in_month, 31) - self.assertEqual(Period(freq='D', year=2012, month=2, day=1, hour=0, - minute=0).days_in_month, 29) - - def test_properties_secondly(self): - # Test properties on Periods with secondly frequency. - s_date = Period(freq='Min', year=2007, month=1, day=1, hour=0, - minute=0, second=0) - # - self.assertEqual(s_date.year, 2007) - self.assertEqual(s_date.quarter, 1) - self.assertEqual(s_date.month, 1) - self.assertEqual(s_date.day, 1) - self.assertEqual(s_date.weekday, 0) - self.assertEqual(s_date.dayofyear, 1) - self.assertEqual(s_date.hour, 0) - self.assertEqual(s_date.minute, 0) - self.assertEqual(s_date.second, 0) - self.assertEqual(s_date.days_in_month, 31) - self.assertEqual(Period(freq='Min', year=2012, month=2, day=1, hour=0, - minute=0, second=0).days_in_month, 29) - - def test_properties_nat(self): - p_nat = Period('NaT', freq='M') - t_nat = pd.Timestamp('NaT') - self.assertIs(p_nat, t_nat) - - # confirm Period('NaT') work identical with Timestamp('NaT') - for f in ['year', 'month', 'day', 'hour', 'minute', 'second', 'week', - 'dayofyear', 'quarter', 'days_in_month']: - self.assertTrue(np.isnan(getattr(p_nat, f))) - self.assertTrue(np.isnan(getattr(t_nat, f))) - - def test_pnow(self): - dt = datetime.now() - - val = period.pnow('D') - exp = Period(dt, freq='D') - self.assertEqual(val, exp) - - val2 = period.pnow('2D') - exp2 = Period(dt, freq='2D') - self.assertEqual(val2, exp2) - self.assertEqual(val.ordinal, val2.ordinal) - self.assertEqual(val.ordinal, exp2.ordinal) - - def test_constructor_corner(self): - expected = Period('2007-01', freq='2M') - self.assertEqual(Period(year=2007, month=1, freq='2M'), expected) - - self.assertRaises(ValueError, Period, datetime.now()) - self.assertRaises(ValueError, Period, datetime.now().date()) - self.assertRaises(ValueError, Period, 1.6, freq='D') - self.assertRaises(ValueError, Period, ordinal=1.6, freq='D') - self.assertRaises(ValueError, Period, ordinal=2, value=1, freq='D') - self.assertIs(Period(None), pd.NaT) - self.assertRaises(ValueError, Period, month=1) - - p = Period('2007-01-01', freq='D') - - result = Period(p, freq='A') - exp = Period('2007', freq='A') - self.assertEqual(result, exp) - - def test_constructor_infer_freq(self): - p = Period('2007-01-01') - self.assertEqual(p.freq, 'D') - - p = Period('2007-01-01 07') - self.assertEqual(p.freq, 'H') - - p = Period('2007-01-01 07:10') - self.assertEqual(p.freq, 'T') - - p = Period('2007-01-01 07:10:15') - self.assertEqual(p.freq, 'S') - - p = Period('2007-01-01 07:10:15.123') - self.assertEqual(p.freq, 'L') - - p = Period('2007-01-01 07:10:15.123000') - self.assertEqual(p.freq, 'L') - - p = Period('2007-01-01 07:10:15.123400') - self.assertEqual(p.freq, 'U') - - def test_asfreq_MS(self): - initial = Period("2013") - - self.assertEqual(initial.asfreq(freq="M", how="S"), - Period('2013-01', 'M')) - - msg = pd.tseries.frequencies._INVALID_FREQ_ERROR - with self.assertRaisesRegexp(ValueError, msg): - initial.asfreq(freq="MS", how="S") - - with tm.assertRaisesRegexp(ValueError, msg): - pd.Period('2013-01', 'MS') - - self.assertTrue(_period_code_map.get("MS") is None) - - -def noWrap(item): - return item - - -class TestFreqConversion(tm.TestCase): - "Test frequency conversion of date objects" - - def test_asfreq_corner(self): - val = Period(freq='A', year=2007) - result1 = val.asfreq('5t') - result2 = val.asfreq('t') - expected = Period('2007-12-31 23:59', freq='t') - self.assertEqual(result1.ordinal, expected.ordinal) - self.assertEqual(result1.freqstr, '5T') - self.assertEqual(result2.ordinal, expected.ordinal) - self.assertEqual(result2.freqstr, 'T') - - def test_conv_annual(self): - # frequency conversion tests: from Annual Frequency - - ival_A = Period(freq='A', year=2007) - - ival_AJAN = Period(freq="A-JAN", year=2007) - ival_AJUN = Period(freq="A-JUN", year=2007) - ival_ANOV = Period(freq="A-NOV", year=2007) - - ival_A_to_Q_start = Period(freq='Q', year=2007, quarter=1) - ival_A_to_Q_end = Period(freq='Q', year=2007, quarter=4) - ival_A_to_M_start = Period(freq='M', year=2007, month=1) - ival_A_to_M_end = Period(freq='M', year=2007, month=12) - ival_A_to_W_start = Period(freq='W', year=2007, month=1, day=1) - ival_A_to_W_end = Period(freq='W', year=2007, month=12, day=31) - ival_A_to_B_start = Period(freq='B', year=2007, month=1, day=1) - ival_A_to_B_end = Period(freq='B', year=2007, month=12, day=31) - ival_A_to_D_start = Period(freq='D', year=2007, month=1, day=1) - ival_A_to_D_end = Period(freq='D', year=2007, month=12, day=31) - ival_A_to_H_start = Period(freq='H', year=2007, month=1, day=1, hour=0) - ival_A_to_H_end = Period(freq='H', year=2007, month=12, day=31, - hour=23) - ival_A_to_T_start = Period(freq='Min', year=2007, month=1, day=1, - hour=0, minute=0) - ival_A_to_T_end = Period(freq='Min', year=2007, month=12, day=31, - hour=23, minute=59) - ival_A_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, - minute=0, second=0) - ival_A_to_S_end = Period(freq='S', year=2007, month=12, day=31, - hour=23, minute=59, second=59) - - ival_AJAN_to_D_end = Period(freq='D', year=2007, month=1, day=31) - ival_AJAN_to_D_start = Period(freq='D', year=2006, month=2, day=1) - ival_AJUN_to_D_end = Period(freq='D', year=2007, month=6, day=30) - ival_AJUN_to_D_start = Period(freq='D', year=2006, month=7, day=1) - ival_ANOV_to_D_end = Period(freq='D', year=2007, month=11, day=30) - ival_ANOV_to_D_start = Period(freq='D', year=2006, month=12, day=1) - - self.assertEqual(ival_A.asfreq('Q', 'S'), ival_A_to_Q_start) - self.assertEqual(ival_A.asfreq('Q', 'e'), ival_A_to_Q_end) - self.assertEqual(ival_A.asfreq('M', 's'), ival_A_to_M_start) - self.assertEqual(ival_A.asfreq('M', 'E'), ival_A_to_M_end) - self.assertEqual(ival_A.asfreq('W', 'S'), ival_A_to_W_start) - self.assertEqual(ival_A.asfreq('W', 'E'), ival_A_to_W_end) - self.assertEqual(ival_A.asfreq('B', 'S'), ival_A_to_B_start) - self.assertEqual(ival_A.asfreq('B', 'E'), ival_A_to_B_end) - self.assertEqual(ival_A.asfreq('D', 'S'), ival_A_to_D_start) - self.assertEqual(ival_A.asfreq('D', 'E'), ival_A_to_D_end) - self.assertEqual(ival_A.asfreq('H', 'S'), ival_A_to_H_start) - self.assertEqual(ival_A.asfreq('H', 'E'), ival_A_to_H_end) - self.assertEqual(ival_A.asfreq('min', 'S'), ival_A_to_T_start) - self.assertEqual(ival_A.asfreq('min', 'E'), ival_A_to_T_end) - self.assertEqual(ival_A.asfreq('T', 'S'), ival_A_to_T_start) - self.assertEqual(ival_A.asfreq('T', 'E'), ival_A_to_T_end) - self.assertEqual(ival_A.asfreq('S', 'S'), ival_A_to_S_start) - self.assertEqual(ival_A.asfreq('S', 'E'), ival_A_to_S_end) - - self.assertEqual(ival_AJAN.asfreq('D', 'S'), ival_AJAN_to_D_start) - self.assertEqual(ival_AJAN.asfreq('D', 'E'), ival_AJAN_to_D_end) - - self.assertEqual(ival_AJUN.asfreq('D', 'S'), ival_AJUN_to_D_start) - self.assertEqual(ival_AJUN.asfreq('D', 'E'), ival_AJUN_to_D_end) - - self.assertEqual(ival_ANOV.asfreq('D', 'S'), ival_ANOV_to_D_start) - self.assertEqual(ival_ANOV.asfreq('D', 'E'), ival_ANOV_to_D_end) - - self.assertEqual(ival_A.asfreq('A'), ival_A) - - def test_conv_quarterly(self): - # frequency conversion tests: from Quarterly Frequency - - ival_Q = Period(freq='Q', year=2007, quarter=1) - ival_Q_end_of_year = Period(freq='Q', year=2007, quarter=4) - - ival_QEJAN = Period(freq="Q-JAN", year=2007, quarter=1) - ival_QEJUN = Period(freq="Q-JUN", year=2007, quarter=1) - - ival_Q_to_A = Period(freq='A', year=2007) - ival_Q_to_M_start = Period(freq='M', year=2007, month=1) - ival_Q_to_M_end = Period(freq='M', year=2007, month=3) - ival_Q_to_W_start = Period(freq='W', year=2007, month=1, day=1) - ival_Q_to_W_end = Period(freq='W', year=2007, month=3, day=31) - ival_Q_to_B_start = Period(freq='B', year=2007, month=1, day=1) - ival_Q_to_B_end = Period(freq='B', year=2007, month=3, day=30) - ival_Q_to_D_start = Period(freq='D', year=2007, month=1, day=1) - ival_Q_to_D_end = Period(freq='D', year=2007, month=3, day=31) - ival_Q_to_H_start = Period(freq='H', year=2007, month=1, day=1, hour=0) - ival_Q_to_H_end = Period(freq='H', year=2007, month=3, day=31, hour=23) - ival_Q_to_T_start = Period(freq='Min', year=2007, month=1, day=1, - hour=0, minute=0) - ival_Q_to_T_end = Period(freq='Min', year=2007, month=3, day=31, - hour=23, minute=59) - ival_Q_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, - minute=0, second=0) - ival_Q_to_S_end = Period(freq='S', year=2007, month=3, day=31, hour=23, - minute=59, second=59) - - ival_QEJAN_to_D_start = Period(freq='D', year=2006, month=2, day=1) - ival_QEJAN_to_D_end = Period(freq='D', year=2006, month=4, day=30) - - ival_QEJUN_to_D_start = Period(freq='D', year=2006, month=7, day=1) - ival_QEJUN_to_D_end = Period(freq='D', year=2006, month=9, day=30) - - self.assertEqual(ival_Q.asfreq('A'), ival_Q_to_A) - self.assertEqual(ival_Q_end_of_year.asfreq('A'), ival_Q_to_A) - - self.assertEqual(ival_Q.asfreq('M', 'S'), ival_Q_to_M_start) - self.assertEqual(ival_Q.asfreq('M', 'E'), ival_Q_to_M_end) - self.assertEqual(ival_Q.asfreq('W', 'S'), ival_Q_to_W_start) - self.assertEqual(ival_Q.asfreq('W', 'E'), ival_Q_to_W_end) - self.assertEqual(ival_Q.asfreq('B', 'S'), ival_Q_to_B_start) - self.assertEqual(ival_Q.asfreq('B', 'E'), ival_Q_to_B_end) - self.assertEqual(ival_Q.asfreq('D', 'S'), ival_Q_to_D_start) - self.assertEqual(ival_Q.asfreq('D', 'E'), ival_Q_to_D_end) - self.assertEqual(ival_Q.asfreq('H', 'S'), ival_Q_to_H_start) - self.assertEqual(ival_Q.asfreq('H', 'E'), ival_Q_to_H_end) - self.assertEqual(ival_Q.asfreq('Min', 'S'), ival_Q_to_T_start) - self.assertEqual(ival_Q.asfreq('Min', 'E'), ival_Q_to_T_end) - self.assertEqual(ival_Q.asfreq('S', 'S'), ival_Q_to_S_start) - self.assertEqual(ival_Q.asfreq('S', 'E'), ival_Q_to_S_end) - - self.assertEqual(ival_QEJAN.asfreq('D', 'S'), ival_QEJAN_to_D_start) - self.assertEqual(ival_QEJAN.asfreq('D', 'E'), ival_QEJAN_to_D_end) - self.assertEqual(ival_QEJUN.asfreq('D', 'S'), ival_QEJUN_to_D_start) - self.assertEqual(ival_QEJUN.asfreq('D', 'E'), ival_QEJUN_to_D_end) - - self.assertEqual(ival_Q.asfreq('Q'), ival_Q) - - def test_conv_monthly(self): - # frequency conversion tests: from Monthly Frequency - - ival_M = Period(freq='M', year=2007, month=1) - ival_M_end_of_year = Period(freq='M', year=2007, month=12) - ival_M_end_of_quarter = Period(freq='M', year=2007, month=3) - ival_M_to_A = Period(freq='A', year=2007) - ival_M_to_Q = Period(freq='Q', year=2007, quarter=1) - ival_M_to_W_start = Period(freq='W', year=2007, month=1, day=1) - ival_M_to_W_end = Period(freq='W', year=2007, month=1, day=31) - ival_M_to_B_start = Period(freq='B', year=2007, month=1, day=1) - ival_M_to_B_end = Period(freq='B', year=2007, month=1, day=31) - ival_M_to_D_start = Period(freq='D', year=2007, month=1, day=1) - ival_M_to_D_end = Period(freq='D', year=2007, month=1, day=31) - ival_M_to_H_start = Period(freq='H', year=2007, month=1, day=1, hour=0) - ival_M_to_H_end = Period(freq='H', year=2007, month=1, day=31, hour=23) - ival_M_to_T_start = Period(freq='Min', year=2007, month=1, day=1, - hour=0, minute=0) - ival_M_to_T_end = Period(freq='Min', year=2007, month=1, day=31, - hour=23, minute=59) - ival_M_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, - minute=0, second=0) - ival_M_to_S_end = Period(freq='S', year=2007, month=1, day=31, hour=23, - minute=59, second=59) - - self.assertEqual(ival_M.asfreq('A'), ival_M_to_A) - self.assertEqual(ival_M_end_of_year.asfreq('A'), ival_M_to_A) - self.assertEqual(ival_M.asfreq('Q'), ival_M_to_Q) - self.assertEqual(ival_M_end_of_quarter.asfreq('Q'), ival_M_to_Q) - - self.assertEqual(ival_M.asfreq('W', 'S'), ival_M_to_W_start) - self.assertEqual(ival_M.asfreq('W', 'E'), ival_M_to_W_end) - self.assertEqual(ival_M.asfreq('B', 'S'), ival_M_to_B_start) - self.assertEqual(ival_M.asfreq('B', 'E'), ival_M_to_B_end) - self.assertEqual(ival_M.asfreq('D', 'S'), ival_M_to_D_start) - self.assertEqual(ival_M.asfreq('D', 'E'), ival_M_to_D_end) - self.assertEqual(ival_M.asfreq('H', 'S'), ival_M_to_H_start) - self.assertEqual(ival_M.asfreq('H', 'E'), ival_M_to_H_end) - self.assertEqual(ival_M.asfreq('Min', 'S'), ival_M_to_T_start) - self.assertEqual(ival_M.asfreq('Min', 'E'), ival_M_to_T_end) - self.assertEqual(ival_M.asfreq('S', 'S'), ival_M_to_S_start) - self.assertEqual(ival_M.asfreq('S', 'E'), ival_M_to_S_end) - - self.assertEqual(ival_M.asfreq('M'), ival_M) - - def test_conv_weekly(self): - # frequency conversion tests: from Weekly Frequency - ival_W = Period(freq='W', year=2007, month=1, day=1) - - ival_WSUN = Period(freq='W', year=2007, month=1, day=7) - ival_WSAT = Period(freq='W-SAT', year=2007, month=1, day=6) - ival_WFRI = Period(freq='W-FRI', year=2007, month=1, day=5) - ival_WTHU = Period(freq='W-THU', year=2007, month=1, day=4) - ival_WWED = Period(freq='W-WED', year=2007, month=1, day=3) - ival_WTUE = Period(freq='W-TUE', year=2007, month=1, day=2) - ival_WMON = Period(freq='W-MON', year=2007, month=1, day=1) - - ival_WSUN_to_D_start = Period(freq='D', year=2007, month=1, day=1) - ival_WSUN_to_D_end = Period(freq='D', year=2007, month=1, day=7) - ival_WSAT_to_D_start = Period(freq='D', year=2006, month=12, day=31) - ival_WSAT_to_D_end = Period(freq='D', year=2007, month=1, day=6) - ival_WFRI_to_D_start = Period(freq='D', year=2006, month=12, day=30) - ival_WFRI_to_D_end = Period(freq='D', year=2007, month=1, day=5) - ival_WTHU_to_D_start = Period(freq='D', year=2006, month=12, day=29) - ival_WTHU_to_D_end = Period(freq='D', year=2007, month=1, day=4) - ival_WWED_to_D_start = Period(freq='D', year=2006, month=12, day=28) - ival_WWED_to_D_end = Period(freq='D', year=2007, month=1, day=3) - ival_WTUE_to_D_start = Period(freq='D', year=2006, month=12, day=27) - ival_WTUE_to_D_end = Period(freq='D', year=2007, month=1, day=2) - ival_WMON_to_D_start = Period(freq='D', year=2006, month=12, day=26) - ival_WMON_to_D_end = Period(freq='D', year=2007, month=1, day=1) - - ival_W_end_of_year = Period(freq='W', year=2007, month=12, day=31) - ival_W_end_of_quarter = Period(freq='W', year=2007, month=3, day=31) - ival_W_end_of_month = Period(freq='W', year=2007, month=1, day=31) - ival_W_to_A = Period(freq='A', year=2007) - ival_W_to_Q = Period(freq='Q', year=2007, quarter=1) - ival_W_to_M = Period(freq='M', year=2007, month=1) - - if Period(freq='D', year=2007, month=12, day=31).weekday == 6: - ival_W_to_A_end_of_year = Period(freq='A', year=2007) - else: - ival_W_to_A_end_of_year = Period(freq='A', year=2008) - - if Period(freq='D', year=2007, month=3, day=31).weekday == 6: - ival_W_to_Q_end_of_quarter = Period(freq='Q', year=2007, quarter=1) - else: - ival_W_to_Q_end_of_quarter = Period(freq='Q', year=2007, quarter=2) - - if Period(freq='D', year=2007, month=1, day=31).weekday == 6: - ival_W_to_M_end_of_month = Period(freq='M', year=2007, month=1) - else: - ival_W_to_M_end_of_month = Period(freq='M', year=2007, month=2) - - ival_W_to_B_start = Period(freq='B', year=2007, month=1, day=1) - ival_W_to_B_end = Period(freq='B', year=2007, month=1, day=5) - ival_W_to_D_start = Period(freq='D', year=2007, month=1, day=1) - ival_W_to_D_end = Period(freq='D', year=2007, month=1, day=7) - ival_W_to_H_start = Period(freq='H', year=2007, month=1, day=1, hour=0) - ival_W_to_H_end = Period(freq='H', year=2007, month=1, day=7, hour=23) - ival_W_to_T_start = Period(freq='Min', year=2007, month=1, day=1, - hour=0, minute=0) - ival_W_to_T_end = Period(freq='Min', year=2007, month=1, day=7, - hour=23, minute=59) - ival_W_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, - minute=0, second=0) - ival_W_to_S_end = Period(freq='S', year=2007, month=1, day=7, hour=23, - minute=59, second=59) - - self.assertEqual(ival_W.asfreq('A'), ival_W_to_A) - self.assertEqual(ival_W_end_of_year.asfreq('A'), - ival_W_to_A_end_of_year) - self.assertEqual(ival_W.asfreq('Q'), ival_W_to_Q) - self.assertEqual(ival_W_end_of_quarter.asfreq('Q'), - ival_W_to_Q_end_of_quarter) - self.assertEqual(ival_W.asfreq('M'), ival_W_to_M) - self.assertEqual(ival_W_end_of_month.asfreq('M'), - ival_W_to_M_end_of_month) - - self.assertEqual(ival_W.asfreq('B', 'S'), ival_W_to_B_start) - self.assertEqual(ival_W.asfreq('B', 'E'), ival_W_to_B_end) - - self.assertEqual(ival_W.asfreq('D', 'S'), ival_W_to_D_start) - self.assertEqual(ival_W.asfreq('D', 'E'), ival_W_to_D_end) - - self.assertEqual(ival_WSUN.asfreq('D', 'S'), ival_WSUN_to_D_start) - self.assertEqual(ival_WSUN.asfreq('D', 'E'), ival_WSUN_to_D_end) - self.assertEqual(ival_WSAT.asfreq('D', 'S'), ival_WSAT_to_D_start) - self.assertEqual(ival_WSAT.asfreq('D', 'E'), ival_WSAT_to_D_end) - self.assertEqual(ival_WFRI.asfreq('D', 'S'), ival_WFRI_to_D_start) - self.assertEqual(ival_WFRI.asfreq('D', 'E'), ival_WFRI_to_D_end) - self.assertEqual(ival_WTHU.asfreq('D', 'S'), ival_WTHU_to_D_start) - self.assertEqual(ival_WTHU.asfreq('D', 'E'), ival_WTHU_to_D_end) - self.assertEqual(ival_WWED.asfreq('D', 'S'), ival_WWED_to_D_start) - self.assertEqual(ival_WWED.asfreq('D', 'E'), ival_WWED_to_D_end) - self.assertEqual(ival_WTUE.asfreq('D', 'S'), ival_WTUE_to_D_start) - self.assertEqual(ival_WTUE.asfreq('D', 'E'), ival_WTUE_to_D_end) - self.assertEqual(ival_WMON.asfreq('D', 'S'), ival_WMON_to_D_start) - self.assertEqual(ival_WMON.asfreq('D', 'E'), ival_WMON_to_D_end) - - self.assertEqual(ival_W.asfreq('H', 'S'), ival_W_to_H_start) - self.assertEqual(ival_W.asfreq('H', 'E'), ival_W_to_H_end) - self.assertEqual(ival_W.asfreq('Min', 'S'), ival_W_to_T_start) - self.assertEqual(ival_W.asfreq('Min', 'E'), ival_W_to_T_end) - self.assertEqual(ival_W.asfreq('S', 'S'), ival_W_to_S_start) - self.assertEqual(ival_W.asfreq('S', 'E'), ival_W_to_S_end) - - self.assertEqual(ival_W.asfreq('W'), ival_W) - - msg = pd.tseries.frequencies._INVALID_FREQ_ERROR - with self.assertRaisesRegexp(ValueError, msg): - ival_W.asfreq('WK') - - def test_conv_weekly_legacy(self): - # frequency conversion tests: from Weekly Frequency - msg = pd.tseries.frequencies._INVALID_FREQ_ERROR - with self.assertRaisesRegexp(ValueError, msg): - Period(freq='WK', year=2007, month=1, day=1) - - with self.assertRaisesRegexp(ValueError, msg): - Period(freq='WK-SAT', year=2007, month=1, day=6) - with self.assertRaisesRegexp(ValueError, msg): - Period(freq='WK-FRI', year=2007, month=1, day=5) - with self.assertRaisesRegexp(ValueError, msg): - Period(freq='WK-THU', year=2007, month=1, day=4) - with self.assertRaisesRegexp(ValueError, msg): - Period(freq='WK-WED', year=2007, month=1, day=3) - with self.assertRaisesRegexp(ValueError, msg): - Period(freq='WK-TUE', year=2007, month=1, day=2) - with self.assertRaisesRegexp(ValueError, msg): - Period(freq='WK-MON', year=2007, month=1, day=1) - - def test_conv_business(self): - # frequency conversion tests: from Business Frequency" - - ival_B = Period(freq='B', year=2007, month=1, day=1) - ival_B_end_of_year = Period(freq='B', year=2007, month=12, day=31) - ival_B_end_of_quarter = Period(freq='B', year=2007, month=3, day=30) - ival_B_end_of_month = Period(freq='B', year=2007, month=1, day=31) - ival_B_end_of_week = Period(freq='B', year=2007, month=1, day=5) - - ival_B_to_A = Period(freq='A', year=2007) - ival_B_to_Q = Period(freq='Q', year=2007, quarter=1) - ival_B_to_M = Period(freq='M', year=2007, month=1) - ival_B_to_W = Period(freq='W', year=2007, month=1, day=7) - ival_B_to_D = Period(freq='D', year=2007, month=1, day=1) - ival_B_to_H_start = Period(freq='H', year=2007, month=1, day=1, hour=0) - ival_B_to_H_end = Period(freq='H', year=2007, month=1, day=1, hour=23) - ival_B_to_T_start = Period(freq='Min', year=2007, month=1, day=1, - hour=0, minute=0) - ival_B_to_T_end = Period(freq='Min', year=2007, month=1, day=1, - hour=23, minute=59) - ival_B_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, - minute=0, second=0) - ival_B_to_S_end = Period(freq='S', year=2007, month=1, day=1, hour=23, - minute=59, second=59) - - self.assertEqual(ival_B.asfreq('A'), ival_B_to_A) - self.assertEqual(ival_B_end_of_year.asfreq('A'), ival_B_to_A) - self.assertEqual(ival_B.asfreq('Q'), ival_B_to_Q) - self.assertEqual(ival_B_end_of_quarter.asfreq('Q'), ival_B_to_Q) - self.assertEqual(ival_B.asfreq('M'), ival_B_to_M) - self.assertEqual(ival_B_end_of_month.asfreq('M'), ival_B_to_M) - self.assertEqual(ival_B.asfreq('W'), ival_B_to_W) - self.assertEqual(ival_B_end_of_week.asfreq('W'), ival_B_to_W) - - self.assertEqual(ival_B.asfreq('D'), ival_B_to_D) - - self.assertEqual(ival_B.asfreq('H', 'S'), ival_B_to_H_start) - self.assertEqual(ival_B.asfreq('H', 'E'), ival_B_to_H_end) - self.assertEqual(ival_B.asfreq('Min', 'S'), ival_B_to_T_start) - self.assertEqual(ival_B.asfreq('Min', 'E'), ival_B_to_T_end) - self.assertEqual(ival_B.asfreq('S', 'S'), ival_B_to_S_start) - self.assertEqual(ival_B.asfreq('S', 'E'), ival_B_to_S_end) - - self.assertEqual(ival_B.asfreq('B'), ival_B) - - def test_conv_daily(self): - # frequency conversion tests: from Business Frequency" - - ival_D = Period(freq='D', year=2007, month=1, day=1) - ival_D_end_of_year = Period(freq='D', year=2007, month=12, day=31) - ival_D_end_of_quarter = Period(freq='D', year=2007, month=3, day=31) - ival_D_end_of_month = Period(freq='D', year=2007, month=1, day=31) - ival_D_end_of_week = Period(freq='D', year=2007, month=1, day=7) - - ival_D_friday = Period(freq='D', year=2007, month=1, day=5) - ival_D_saturday = Period(freq='D', year=2007, month=1, day=6) - ival_D_sunday = Period(freq='D', year=2007, month=1, day=7) - - # TODO: unused? - # ival_D_monday = Period(freq='D', year=2007, month=1, day=8) - - ival_B_friday = Period(freq='B', year=2007, month=1, day=5) - ival_B_monday = Period(freq='B', year=2007, month=1, day=8) - - ival_D_to_A = Period(freq='A', year=2007) - - ival_Deoq_to_AJAN = Period(freq='A-JAN', year=2008) - ival_Deoq_to_AJUN = Period(freq='A-JUN', year=2007) - ival_Deoq_to_ADEC = Period(freq='A-DEC', year=2007) - - ival_D_to_QEJAN = Period(freq="Q-JAN", year=2007, quarter=4) - ival_D_to_QEJUN = Period(freq="Q-JUN", year=2007, quarter=3) - ival_D_to_QEDEC = Period(freq="Q-DEC", year=2007, quarter=1) - - ival_D_to_M = Period(freq='M', year=2007, month=1) - ival_D_to_W = Period(freq='W', year=2007, month=1, day=7) - - ival_D_to_H_start = Period(freq='H', year=2007, month=1, day=1, hour=0) - ival_D_to_H_end = Period(freq='H', year=2007, month=1, day=1, hour=23) - ival_D_to_T_start = Period(freq='Min', year=2007, month=1, day=1, - hour=0, minute=0) - ival_D_to_T_end = Period(freq='Min', year=2007, month=1, day=1, - hour=23, minute=59) - ival_D_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, - minute=0, second=0) - ival_D_to_S_end = Period(freq='S', year=2007, month=1, day=1, hour=23, - minute=59, second=59) - - self.assertEqual(ival_D.asfreq('A'), ival_D_to_A) - - self.assertEqual(ival_D_end_of_quarter.asfreq('A-JAN'), - ival_Deoq_to_AJAN) - self.assertEqual(ival_D_end_of_quarter.asfreq('A-JUN'), - ival_Deoq_to_AJUN) - self.assertEqual(ival_D_end_of_quarter.asfreq('A-DEC'), - ival_Deoq_to_ADEC) - - self.assertEqual(ival_D_end_of_year.asfreq('A'), ival_D_to_A) - self.assertEqual(ival_D_end_of_quarter.asfreq('Q'), ival_D_to_QEDEC) - self.assertEqual(ival_D.asfreq("Q-JAN"), ival_D_to_QEJAN) - self.assertEqual(ival_D.asfreq("Q-JUN"), ival_D_to_QEJUN) - self.assertEqual(ival_D.asfreq("Q-DEC"), ival_D_to_QEDEC) - self.assertEqual(ival_D.asfreq('M'), ival_D_to_M) - self.assertEqual(ival_D_end_of_month.asfreq('M'), ival_D_to_M) - self.assertEqual(ival_D.asfreq('W'), ival_D_to_W) - self.assertEqual(ival_D_end_of_week.asfreq('W'), ival_D_to_W) - - self.assertEqual(ival_D_friday.asfreq('B'), ival_B_friday) - self.assertEqual(ival_D_saturday.asfreq('B', 'S'), ival_B_friday) - self.assertEqual(ival_D_saturday.asfreq('B', 'E'), ival_B_monday) - self.assertEqual(ival_D_sunday.asfreq('B', 'S'), ival_B_friday) - self.assertEqual(ival_D_sunday.asfreq('B', 'E'), ival_B_monday) - - self.assertEqual(ival_D.asfreq('H', 'S'), ival_D_to_H_start) - self.assertEqual(ival_D.asfreq('H', 'E'), ival_D_to_H_end) - self.assertEqual(ival_D.asfreq('Min', 'S'), ival_D_to_T_start) - self.assertEqual(ival_D.asfreq('Min', 'E'), ival_D_to_T_end) - self.assertEqual(ival_D.asfreq('S', 'S'), ival_D_to_S_start) - self.assertEqual(ival_D.asfreq('S', 'E'), ival_D_to_S_end) - - self.assertEqual(ival_D.asfreq('D'), ival_D) - - def test_conv_hourly(self): - # frequency conversion tests: from Hourly Frequency" - - ival_H = Period(freq='H', year=2007, month=1, day=1, hour=0) - ival_H_end_of_year = Period(freq='H', year=2007, month=12, day=31, - hour=23) - ival_H_end_of_quarter = Period(freq='H', year=2007, month=3, day=31, - hour=23) - ival_H_end_of_month = Period(freq='H', year=2007, month=1, day=31, - hour=23) - ival_H_end_of_week = Period(freq='H', year=2007, month=1, day=7, - hour=23) - ival_H_end_of_day = Period(freq='H', year=2007, month=1, day=1, - hour=23) - ival_H_end_of_bus = Period(freq='H', year=2007, month=1, day=1, - hour=23) - - ival_H_to_A = Period(freq='A', year=2007) - ival_H_to_Q = Period(freq='Q', year=2007, quarter=1) - ival_H_to_M = Period(freq='M', year=2007, month=1) - ival_H_to_W = Period(freq='W', year=2007, month=1, day=7) - ival_H_to_D = Period(freq='D', year=2007, month=1, day=1) - ival_H_to_B = Period(freq='B', year=2007, month=1, day=1) - - ival_H_to_T_start = Period(freq='Min', year=2007, month=1, day=1, - hour=0, minute=0) - ival_H_to_T_end = Period(freq='Min', year=2007, month=1, day=1, hour=0, - minute=59) - ival_H_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, - minute=0, second=0) - ival_H_to_S_end = Period(freq='S', year=2007, month=1, day=1, hour=0, - minute=59, second=59) - - self.assertEqual(ival_H.asfreq('A'), ival_H_to_A) - self.assertEqual(ival_H_end_of_year.asfreq('A'), ival_H_to_A) - self.assertEqual(ival_H.asfreq('Q'), ival_H_to_Q) - self.assertEqual(ival_H_end_of_quarter.asfreq('Q'), ival_H_to_Q) - self.assertEqual(ival_H.asfreq('M'), ival_H_to_M) - self.assertEqual(ival_H_end_of_month.asfreq('M'), ival_H_to_M) - self.assertEqual(ival_H.asfreq('W'), ival_H_to_W) - self.assertEqual(ival_H_end_of_week.asfreq('W'), ival_H_to_W) - self.assertEqual(ival_H.asfreq('D'), ival_H_to_D) - self.assertEqual(ival_H_end_of_day.asfreq('D'), ival_H_to_D) - self.assertEqual(ival_H.asfreq('B'), ival_H_to_B) - self.assertEqual(ival_H_end_of_bus.asfreq('B'), ival_H_to_B) - - self.assertEqual(ival_H.asfreq('Min', 'S'), ival_H_to_T_start) - self.assertEqual(ival_H.asfreq('Min', 'E'), ival_H_to_T_end) - self.assertEqual(ival_H.asfreq('S', 'S'), ival_H_to_S_start) - self.assertEqual(ival_H.asfreq('S', 'E'), ival_H_to_S_end) - - self.assertEqual(ival_H.asfreq('H'), ival_H) - - def test_conv_minutely(self): - # frequency conversion tests: from Minutely Frequency" - - ival_T = Period(freq='Min', year=2007, month=1, day=1, hour=0, - minute=0) - ival_T_end_of_year = Period(freq='Min', year=2007, month=12, day=31, - hour=23, minute=59) - ival_T_end_of_quarter = Period(freq='Min', year=2007, month=3, day=31, - hour=23, minute=59) - ival_T_end_of_month = Period(freq='Min', year=2007, month=1, day=31, - hour=23, minute=59) - ival_T_end_of_week = Period(freq='Min', year=2007, month=1, day=7, - hour=23, minute=59) - ival_T_end_of_day = Period(freq='Min', year=2007, month=1, day=1, - hour=23, minute=59) - ival_T_end_of_bus = Period(freq='Min', year=2007, month=1, day=1, - hour=23, minute=59) - ival_T_end_of_hour = Period(freq='Min', year=2007, month=1, day=1, - hour=0, minute=59) - - ival_T_to_A = Period(freq='A', year=2007) - ival_T_to_Q = Period(freq='Q', year=2007, quarter=1) - ival_T_to_M = Period(freq='M', year=2007, month=1) - ival_T_to_W = Period(freq='W', year=2007, month=1, day=7) - ival_T_to_D = Period(freq='D', year=2007, month=1, day=1) - ival_T_to_B = Period(freq='B', year=2007, month=1, day=1) - ival_T_to_H = Period(freq='H', year=2007, month=1, day=1, hour=0) - - ival_T_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, - minute=0, second=0) - ival_T_to_S_end = Period(freq='S', year=2007, month=1, day=1, hour=0, - minute=0, second=59) - - self.assertEqual(ival_T.asfreq('A'), ival_T_to_A) - self.assertEqual(ival_T_end_of_year.asfreq('A'), ival_T_to_A) - self.assertEqual(ival_T.asfreq('Q'), ival_T_to_Q) - self.assertEqual(ival_T_end_of_quarter.asfreq('Q'), ival_T_to_Q) - self.assertEqual(ival_T.asfreq('M'), ival_T_to_M) - self.assertEqual(ival_T_end_of_month.asfreq('M'), ival_T_to_M) - self.assertEqual(ival_T.asfreq('W'), ival_T_to_W) - self.assertEqual(ival_T_end_of_week.asfreq('W'), ival_T_to_W) - self.assertEqual(ival_T.asfreq('D'), ival_T_to_D) - self.assertEqual(ival_T_end_of_day.asfreq('D'), ival_T_to_D) - self.assertEqual(ival_T.asfreq('B'), ival_T_to_B) - self.assertEqual(ival_T_end_of_bus.asfreq('B'), ival_T_to_B) - self.assertEqual(ival_T.asfreq('H'), ival_T_to_H) - self.assertEqual(ival_T_end_of_hour.asfreq('H'), ival_T_to_H) - - self.assertEqual(ival_T.asfreq('S', 'S'), ival_T_to_S_start) - self.assertEqual(ival_T.asfreq('S', 'E'), ival_T_to_S_end) - - self.assertEqual(ival_T.asfreq('Min'), ival_T) - - def test_conv_secondly(self): - # frequency conversion tests: from Secondly Frequency" - - ival_S = Period(freq='S', year=2007, month=1, day=1, hour=0, minute=0, - second=0) - ival_S_end_of_year = Period(freq='S', year=2007, month=12, day=31, - hour=23, minute=59, second=59) - ival_S_end_of_quarter = Period(freq='S', year=2007, month=3, day=31, - hour=23, minute=59, second=59) - ival_S_end_of_month = Period(freq='S', year=2007, month=1, day=31, - hour=23, minute=59, second=59) - ival_S_end_of_week = Period(freq='S', year=2007, month=1, day=7, - hour=23, minute=59, second=59) - ival_S_end_of_day = Period(freq='S', year=2007, month=1, day=1, - hour=23, minute=59, second=59) - ival_S_end_of_bus = Period(freq='S', year=2007, month=1, day=1, - hour=23, minute=59, second=59) - ival_S_end_of_hour = Period(freq='S', year=2007, month=1, day=1, - hour=0, minute=59, second=59) - ival_S_end_of_minute = Period(freq='S', year=2007, month=1, day=1, - hour=0, minute=0, second=59) - - ival_S_to_A = Period(freq='A', year=2007) - ival_S_to_Q = Period(freq='Q', year=2007, quarter=1) - ival_S_to_M = Period(freq='M', year=2007, month=1) - ival_S_to_W = Period(freq='W', year=2007, month=1, day=7) - ival_S_to_D = Period(freq='D', year=2007, month=1, day=1) - ival_S_to_B = Period(freq='B', year=2007, month=1, day=1) - ival_S_to_H = Period(freq='H', year=2007, month=1, day=1, hour=0) - ival_S_to_T = Period(freq='Min', year=2007, month=1, day=1, hour=0, - minute=0) - - self.assertEqual(ival_S.asfreq('A'), ival_S_to_A) - self.assertEqual(ival_S_end_of_year.asfreq('A'), ival_S_to_A) - self.assertEqual(ival_S.asfreq('Q'), ival_S_to_Q) - self.assertEqual(ival_S_end_of_quarter.asfreq('Q'), ival_S_to_Q) - self.assertEqual(ival_S.asfreq('M'), ival_S_to_M) - self.assertEqual(ival_S_end_of_month.asfreq('M'), ival_S_to_M) - self.assertEqual(ival_S.asfreq('W'), ival_S_to_W) - self.assertEqual(ival_S_end_of_week.asfreq('W'), ival_S_to_W) - self.assertEqual(ival_S.asfreq('D'), ival_S_to_D) - self.assertEqual(ival_S_end_of_day.asfreq('D'), ival_S_to_D) - self.assertEqual(ival_S.asfreq('B'), ival_S_to_B) - self.assertEqual(ival_S_end_of_bus.asfreq('B'), ival_S_to_B) - self.assertEqual(ival_S.asfreq('H'), ival_S_to_H) - self.assertEqual(ival_S_end_of_hour.asfreq('H'), ival_S_to_H) - self.assertEqual(ival_S.asfreq('Min'), ival_S_to_T) - self.assertEqual(ival_S_end_of_minute.asfreq('Min'), ival_S_to_T) - - self.assertEqual(ival_S.asfreq('S'), ival_S) - - def test_asfreq_mult(self): - # normal freq to mult freq - p = Period(freq='A', year=2007) - # ordinal will not change - for freq in ['3A', offsets.YearEnd(3)]: - result = p.asfreq(freq) - expected = Period('2007', freq='3A') - - self.assertEqual(result, expected) - self.assertEqual(result.ordinal, expected.ordinal) - self.assertEqual(result.freq, expected.freq) - # ordinal will not change - for freq in ['3A', offsets.YearEnd(3)]: - result = p.asfreq(freq, how='S') - expected = Period('2007', freq='3A') - - self.assertEqual(result, expected) - self.assertEqual(result.ordinal, expected.ordinal) - self.assertEqual(result.freq, expected.freq) - - # mult freq to normal freq - p = Period(freq='3A', year=2007) - # ordinal will change because how=E is the default - for freq in ['A', offsets.YearEnd()]: - result = p.asfreq(freq) - expected = Period('2009', freq='A') - - self.assertEqual(result, expected) - self.assertEqual(result.ordinal, expected.ordinal) - self.assertEqual(result.freq, expected.freq) - # ordinal will not change - for freq in ['A', offsets.YearEnd()]: - result = p.asfreq(freq, how='S') - expected = Period('2007', freq='A') - - self.assertEqual(result, expected) - self.assertEqual(result.ordinal, expected.ordinal) - self.assertEqual(result.freq, expected.freq) - - p = Period(freq='A', year=2007) - for freq in ['2M', offsets.MonthEnd(2)]: - result = p.asfreq(freq) - expected = Period('2007-12', freq='2M') - - self.assertEqual(result, expected) - self.assertEqual(result.ordinal, expected.ordinal) - self.assertEqual(result.freq, expected.freq) - for freq in ['2M', offsets.MonthEnd(2)]: - result = p.asfreq(freq, how='S') - expected = Period('2007-01', freq='2M') - - self.assertEqual(result, expected) - self.assertEqual(result.ordinal, expected.ordinal) - self.assertEqual(result.freq, expected.freq) - - p = Period(freq='3A', year=2007) - for freq in ['2M', offsets.MonthEnd(2)]: - result = p.asfreq(freq) - expected = Period('2009-12', freq='2M') - - self.assertEqual(result, expected) - self.assertEqual(result.ordinal, expected.ordinal) - self.assertEqual(result.freq, expected.freq) - for freq in ['2M', offsets.MonthEnd(2)]: - result = p.asfreq(freq, how='S') - expected = Period('2007-01', freq='2M') - - self.assertEqual(result, expected) - self.assertEqual(result.ordinal, expected.ordinal) - self.assertEqual(result.freq, expected.freq) - - def test_asfreq_combined(self): - # normal freq to combined freq - p = Period('2007', freq='H') - - # ordinal will not change - expected = Period('2007', freq='25H') - for freq, how in zip(['1D1H', '1H1D'], ['E', 'S']): - result = p.asfreq(freq, how=how) - self.assertEqual(result, expected) - self.assertEqual(result.ordinal, expected.ordinal) - self.assertEqual(result.freq, expected.freq) - - # combined freq to normal freq - p1 = Period(freq='1D1H', year=2007) - p2 = Period(freq='1H1D', year=2007) - - # ordinal will change because how=E is the default - result1 = p1.asfreq('H') - result2 = p2.asfreq('H') - expected = Period('2007-01-02', freq='H') - self.assertEqual(result1, expected) - self.assertEqual(result1.ordinal, expected.ordinal) - self.assertEqual(result1.freq, expected.freq) - self.assertEqual(result2, expected) - self.assertEqual(result2.ordinal, expected.ordinal) - self.assertEqual(result2.freq, expected.freq) - - # ordinal will not change - result1 = p1.asfreq('H', how='S') - result2 = p2.asfreq('H', how='S') - expected = Period('2007-01-01', freq='H') - self.assertEqual(result1, expected) - self.assertEqual(result1.ordinal, expected.ordinal) - self.assertEqual(result1.freq, expected.freq) - self.assertEqual(result2, expected) - self.assertEqual(result2.ordinal, expected.ordinal) - self.assertEqual(result2.freq, expected.freq) - - def test_is_leap_year(self): - # GH 13727 - for freq in ['A', 'M', 'D', 'H']: - p = Period('2000-01-01 00:00:00', freq=freq) - self.assertTrue(p.is_leap_year) - self.assertIsInstance(p.is_leap_year, bool) - - p = Period('1999-01-01 00:00:00', freq=freq) - self.assertFalse(p.is_leap_year) - - p = Period('2004-01-01 00:00:00', freq=freq) - self.assertTrue(p.is_leap_year) - - p = Period('2100-01-01 00:00:00', freq=freq) - self.assertFalse(p.is_leap_year) - - -class TestPeriodIndex(tm.TestCase): - - def setUp(self): - pass - - def test_hash_error(self): - index = period_range('20010101', periods=10) - with tm.assertRaisesRegexp(TypeError, "unhashable type: %r" % - type(index).__name__): - hash(index) - - def test_make_time_series(self): - index = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') - series = Series(1, index=index) - tm.assertIsInstance(series, Series) - - def test_constructor_use_start_freq(self): - # GH #1118 - p = Period('4/2/2012', freq='B') - index = PeriodIndex(start=p, periods=10) - expected = PeriodIndex(start='4/2/2012', periods=10, freq='B') - tm.assert_index_equal(index, expected) - - def test_constructor_field_arrays(self): - # GH #1264 - - years = np.arange(1990, 2010).repeat(4)[2:-2] - quarters = np.tile(np.arange(1, 5), 20)[2:-2] - - index = PeriodIndex(year=years, quarter=quarters, freq='Q-DEC') - expected = period_range('1990Q3', '2009Q2', freq='Q-DEC') - tm.assert_index_equal(index, expected) - - index2 = PeriodIndex(year=years, quarter=quarters, freq='2Q-DEC') - tm.assert_numpy_array_equal(index.asi8, index2.asi8) - - index = PeriodIndex(year=years, quarter=quarters) - tm.assert_index_equal(index, expected) - - years = [2007, 2007, 2007] - months = [1, 2] - self.assertRaises(ValueError, PeriodIndex, year=years, month=months, - freq='M') - self.assertRaises(ValueError, PeriodIndex, year=years, month=months, - freq='2M') - self.assertRaises(ValueError, PeriodIndex, year=years, month=months, - freq='M', start=Period('2007-01', freq='M')) - - years = [2007, 2007, 2007] - months = [1, 2, 3] - idx = PeriodIndex(year=years, month=months, freq='M') - exp = period_range('2007-01', periods=3, freq='M') - tm.assert_index_equal(idx, exp) - - def test_constructor_U(self): - # U was used as undefined period - self.assertRaises(ValueError, period_range, '2007-1-1', periods=500, - freq='X') - - def test_constructor_nano(self): - idx = period_range(start=Period(ordinal=1, freq='N'), - end=Period(ordinal=4, freq='N'), freq='N') - exp = PeriodIndex([Period(ordinal=1, freq='N'), - Period(ordinal=2, freq='N'), - Period(ordinal=3, freq='N'), - Period(ordinal=4, freq='N')], freq='N') - tm.assert_index_equal(idx, exp) - - def test_constructor_arrays_negative_year(self): - years = np.arange(1960, 2000, dtype=np.int64).repeat(4) - quarters = np.tile(np.array([1, 2, 3, 4], dtype=np.int64), 40) - - pindex = PeriodIndex(year=years, quarter=quarters) - - self.assert_numpy_array_equal(pindex.year, years) - self.assert_numpy_array_equal(pindex.quarter, quarters) - - def test_constructor_invalid_quarters(self): - self.assertRaises(ValueError, PeriodIndex, year=lrange(2000, 2004), - quarter=lrange(4), freq='Q-DEC') - - def test_constructor_corner(self): - self.assertRaises(ValueError, PeriodIndex, periods=10, freq='A') - - start = Period('2007', freq='A-JUN') - end = Period('2010', freq='A-DEC') - self.assertRaises(ValueError, PeriodIndex, start=start, end=end) - self.assertRaises(ValueError, PeriodIndex, start=start) - self.assertRaises(ValueError, PeriodIndex, end=end) - - result = period_range('2007-01', periods=10.5, freq='M') - exp = period_range('2007-01', periods=10, freq='M') - tm.assert_index_equal(result, exp) - - def test_constructor_fromarraylike(self): - idx = period_range('2007-01', periods=20, freq='M') - - # values is an array of Period, thus can retrieve freq - tm.assert_index_equal(PeriodIndex(idx.values), idx) - tm.assert_index_equal(PeriodIndex(list(idx.values)), idx) - - self.assertRaises(ValueError, PeriodIndex, idx._values) - self.assertRaises(ValueError, PeriodIndex, list(idx._values)) - self.assertRaises(ValueError, PeriodIndex, - data=Period('2007', freq='A')) - - result = PeriodIndex(iter(idx)) - tm.assert_index_equal(result, idx) - - result = PeriodIndex(idx) - tm.assert_index_equal(result, idx) - - result = PeriodIndex(idx, freq='M') - tm.assert_index_equal(result, idx) - - result = PeriodIndex(idx, freq=offsets.MonthEnd()) - tm.assert_index_equal(result, idx) - self.assertTrue(result.freq, 'M') - - result = PeriodIndex(idx, freq='2M') - tm.assert_index_equal(result, idx.asfreq('2M')) - self.assertTrue(result.freq, '2M') - - result = PeriodIndex(idx, freq=offsets.MonthEnd(2)) - tm.assert_index_equal(result, idx.asfreq('2M')) - self.assertTrue(result.freq, '2M') - - result = PeriodIndex(idx, freq='D') - exp = idx.asfreq('D', 'e') - tm.assert_index_equal(result, exp) - - def test_constructor_datetime64arr(self): - vals = np.arange(100000, 100000 + 10000, 100, dtype=np.int64) - vals = vals.view(np.dtype('M8[us]')) - - self.assertRaises(ValueError, PeriodIndex, vals, freq='D') - - def test_constructor_dtype(self): - # passing a dtype with a tz should localize - idx = PeriodIndex(['2013-01', '2013-03'], dtype='period[M]') - exp = PeriodIndex(['2013-01', '2013-03'], freq='M') - tm.assert_index_equal(idx, exp) - self.assertEqual(idx.dtype, 'period[M]') - - idx = PeriodIndex(['2013-01-05', '2013-03-05'], dtype='period[3D]') - exp = PeriodIndex(['2013-01-05', '2013-03-05'], freq='3D') - tm.assert_index_equal(idx, exp) - self.assertEqual(idx.dtype, 'period[3D]') - - # if we already have a freq and its not the same, then asfreq - # (not changed) - idx = PeriodIndex(['2013-01-01', '2013-01-02'], freq='D') - - res = PeriodIndex(idx, dtype='period[M]') - exp = PeriodIndex(['2013-01', '2013-01'], freq='M') - tm.assert_index_equal(res, exp) - self.assertEqual(res.dtype, 'period[M]') - - res = PeriodIndex(idx, freq='M') - tm.assert_index_equal(res, exp) - self.assertEqual(res.dtype, 'period[M]') - - msg = 'specified freq and dtype are different' - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - PeriodIndex(['2011-01'], freq='M', dtype='period[D]') - - def test_constructor_empty(self): - idx = pd.PeriodIndex([], freq='M') - tm.assertIsInstance(idx, PeriodIndex) - self.assertEqual(len(idx), 0) - self.assertEqual(idx.freq, 'M') - - with tm.assertRaisesRegexp(ValueError, 'freq not specified'): - pd.PeriodIndex([]) - - def test_constructor_pi_nat(self): - idx = PeriodIndex([Period('2011-01', freq='M'), pd.NaT, - Period('2011-01', freq='M')]) - exp = PeriodIndex(['2011-01', 'NaT', '2011-01'], freq='M') - tm.assert_index_equal(idx, exp) - - idx = PeriodIndex(np.array([Period('2011-01', freq='M'), pd.NaT, - Period('2011-01', freq='M')])) - tm.assert_index_equal(idx, exp) - - idx = PeriodIndex([pd.NaT, pd.NaT, Period('2011-01', freq='M'), - Period('2011-01', freq='M')]) - exp = PeriodIndex(['NaT', 'NaT', '2011-01', '2011-01'], freq='M') - tm.assert_index_equal(idx, exp) - - idx = PeriodIndex(np.array([pd.NaT, pd.NaT, - Period('2011-01', freq='M'), - Period('2011-01', freq='M')])) - tm.assert_index_equal(idx, exp) - - idx = PeriodIndex([pd.NaT, pd.NaT, '2011-01', '2011-01'], freq='M') - tm.assert_index_equal(idx, exp) - - with tm.assertRaisesRegexp(ValueError, 'freq not specified'): - PeriodIndex([pd.NaT, pd.NaT]) - - with tm.assertRaisesRegexp(ValueError, 'freq not specified'): - PeriodIndex(np.array([pd.NaT, pd.NaT])) - - with tm.assertRaisesRegexp(ValueError, 'freq not specified'): - PeriodIndex(['NaT', 'NaT']) - - with tm.assertRaisesRegexp(ValueError, 'freq not specified'): - PeriodIndex(np.array(['NaT', 'NaT'])) - - def test_constructor_incompat_freq(self): - msg = "Input has different freq=D from PeriodIndex\\(freq=M\\)" - - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - PeriodIndex([Period('2011-01', freq='M'), pd.NaT, - Period('2011-01', freq='D')]) - - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - PeriodIndex(np.array([Period('2011-01', freq='M'), pd.NaT, - Period('2011-01', freq='D')])) - - # first element is pd.NaT - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - PeriodIndex([pd.NaT, Period('2011-01', freq='M'), - Period('2011-01', freq='D')]) - - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - PeriodIndex(np.array([pd.NaT, Period('2011-01', freq='M'), - Period('2011-01', freq='D')])) - - def test_constructor_mixed(self): - idx = PeriodIndex(['2011-01', pd.NaT, Period('2011-01', freq='M')]) - exp = PeriodIndex(['2011-01', 'NaT', '2011-01'], freq='M') - tm.assert_index_equal(idx, exp) - - idx = PeriodIndex(['NaT', pd.NaT, Period('2011-01', freq='M')]) - exp = PeriodIndex(['NaT', 'NaT', '2011-01'], freq='M') - tm.assert_index_equal(idx, exp) - - idx = PeriodIndex([Period('2011-01-01', freq='D'), pd.NaT, - '2012-01-01']) - exp = PeriodIndex(['2011-01-01', 'NaT', '2012-01-01'], freq='D') - tm.assert_index_equal(idx, exp) - - def test_constructor_simple_new(self): - idx = period_range('2007-01', name='p', periods=2, freq='M') - result = idx._simple_new(idx, 'p', freq=idx.freq) - tm.assert_index_equal(result, idx) - - result = idx._simple_new(idx.astype('i8'), 'p', freq=idx.freq) - tm.assert_index_equal(result, idx) - - result = idx._simple_new([pd.Period('2007-01', freq='M'), - pd.Period('2007-02', freq='M')], - 'p', freq=idx.freq) - self.assert_index_equal(result, idx) - - result = idx._simple_new(np.array([pd.Period('2007-01', freq='M'), - pd.Period('2007-02', freq='M')]), - 'p', freq=idx.freq) - self.assert_index_equal(result, idx) - - def test_constructor_simple_new_empty(self): - # GH13079 - idx = PeriodIndex([], freq='M', name='p') - result = idx._simple_new(idx, name='p', freq='M') - tm.assert_index_equal(result, idx) - - def test_constructor_simple_new_floats(self): - # GH13079 - for floats in [[1.1], np.array([1.1])]: - with self.assertRaises(TypeError): - pd.PeriodIndex._simple_new(floats, freq='M') - - def test_shallow_copy_empty(self): - - # GH13067 - idx = PeriodIndex([], freq='M') - result = idx._shallow_copy() - expected = idx - - tm.assert_index_equal(result, expected) - - def test_constructor_nat(self): - self.assertRaises(ValueError, period_range, start='NaT', - end='2011-01-01', freq='M') - self.assertRaises(ValueError, period_range, start='2011-01-01', - end='NaT', freq='M') - - def test_constructor_year_and_quarter(self): - year = pd.Series([2001, 2002, 2003]) - quarter = year - 2000 - idx = PeriodIndex(year=year, quarter=quarter) - strs = ['%dQ%d' % t for t in zip(quarter, year)] - lops = list(map(Period, strs)) - p = PeriodIndex(lops) - tm.assert_index_equal(p, idx) - - def test_constructor_freq_mult(self): - # GH #7811 - for func in [PeriodIndex, period_range]: - # must be the same, but for sure... - pidx = func(start='2014-01', freq='2M', periods=4) - expected = PeriodIndex(['2014-01', '2014-03', - '2014-05', '2014-07'], freq='2M') - tm.assert_index_equal(pidx, expected) - - pidx = func(start='2014-01-02', end='2014-01-15', freq='3D') - expected = PeriodIndex(['2014-01-02', '2014-01-05', - '2014-01-08', '2014-01-11', - '2014-01-14'], freq='3D') - tm.assert_index_equal(pidx, expected) - - pidx = func(end='2014-01-01 17:00', freq='4H', periods=3) - expected = PeriodIndex(['2014-01-01 09:00', '2014-01-01 13:00', - '2014-01-01 17:00'], freq='4H') - tm.assert_index_equal(pidx, expected) - - msg = ('Frequency must be positive, because it' - ' represents span: -1M') - with tm.assertRaisesRegexp(ValueError, msg): - PeriodIndex(['2011-01'], freq='-1M') - - msg = ('Frequency must be positive, because it' ' represents span: 0M') - with tm.assertRaisesRegexp(ValueError, msg): - PeriodIndex(['2011-01'], freq='0M') - - msg = ('Frequency must be positive, because it' ' represents span: 0M') - with tm.assertRaisesRegexp(ValueError, msg): - period_range('2011-01', periods=3, freq='0M') - - def test_constructor_freq_mult_dti_compat(self): - import itertools - mults = [1, 2, 3, 4, 5] - freqs = ['A', 'M', 'D', 'T', 'S'] - for mult, freq in itertools.product(mults, freqs): - freqstr = str(mult) + freq - pidx = PeriodIndex(start='2014-04-01', freq=freqstr, periods=10) - expected = date_range(start='2014-04-01', freq=freqstr, - periods=10).to_period(freqstr) - tm.assert_index_equal(pidx, expected) - - def test_constructor_freq_combined(self): - for freq in ['1D1H', '1H1D']: - pidx = PeriodIndex(['2016-01-01', '2016-01-02'], freq=freq) - expected = PeriodIndex(['2016-01-01 00:00', '2016-01-02 00:00'], - freq='25H') - for freq, func in zip(['1D1H', '1H1D'], [PeriodIndex, period_range]): - pidx = func(start='2016-01-01', periods=2, freq=freq) - expected = PeriodIndex(['2016-01-01 00:00', '2016-01-02 01:00'], - freq='25H') - tm.assert_index_equal(pidx, expected) - - def test_dtype_str(self): - pi = pd.PeriodIndex([], freq='M') - self.assertEqual(pi.dtype_str, 'period[M]') - self.assertEqual(pi.dtype_str, str(pi.dtype)) - - pi = pd.PeriodIndex([], freq='3M') - self.assertEqual(pi.dtype_str, 'period[3M]') - self.assertEqual(pi.dtype_str, str(pi.dtype)) - - def test_view_asi8(self): - idx = pd.PeriodIndex([], freq='M') - - exp = np.array([], dtype=np.int64) - tm.assert_numpy_array_equal(idx.view('i8'), exp) - tm.assert_numpy_array_equal(idx.asi8, exp) - - idx = pd.PeriodIndex(['2011-01', pd.NaT], freq='M') - - exp = np.array([492, -9223372036854775808], dtype=np.int64) - tm.assert_numpy_array_equal(idx.view('i8'), exp) - tm.assert_numpy_array_equal(idx.asi8, exp) - - exp = np.array([14975, -9223372036854775808], dtype=np.int64) - idx = pd.PeriodIndex(['2011-01-01', pd.NaT], freq='D') - tm.assert_numpy_array_equal(idx.view('i8'), exp) - tm.assert_numpy_array_equal(idx.asi8, exp) - - def test_values(self): - idx = pd.PeriodIndex([], freq='M') - - exp = np.array([], dtype=np.object) - tm.assert_numpy_array_equal(idx.values, exp) - tm.assert_numpy_array_equal(idx.get_values(), exp) - exp = np.array([], dtype=np.int64) - tm.assert_numpy_array_equal(idx._values, exp) - - idx = pd.PeriodIndex(['2011-01', pd.NaT], freq='M') - - exp = np.array([pd.Period('2011-01', freq='M'), pd.NaT], dtype=object) - tm.assert_numpy_array_equal(idx.values, exp) - tm.assert_numpy_array_equal(idx.get_values(), exp) - exp = np.array([492, -9223372036854775808], dtype=np.int64) - tm.assert_numpy_array_equal(idx._values, exp) - - idx = pd.PeriodIndex(['2011-01-01', pd.NaT], freq='D') - - exp = np.array([pd.Period('2011-01-01', freq='D'), pd.NaT], - dtype=object) - tm.assert_numpy_array_equal(idx.values, exp) - tm.assert_numpy_array_equal(idx.get_values(), exp) - exp = np.array([14975, -9223372036854775808], dtype=np.int64) - tm.assert_numpy_array_equal(idx._values, exp) - - def test_asobject_like(self): - idx = pd.PeriodIndex([], freq='M') - - exp = np.array([], dtype=object) - tm.assert_numpy_array_equal(idx.asobject.values, exp) - tm.assert_numpy_array_equal(idx._mpl_repr(), exp) - - idx = pd.PeriodIndex(['2011-01', pd.NaT], freq='M') - - exp = np.array([pd.Period('2011-01', freq='M'), pd.NaT], dtype=object) - tm.assert_numpy_array_equal(idx.asobject.values, exp) - tm.assert_numpy_array_equal(idx._mpl_repr(), exp) - - exp = np.array([pd.Period('2011-01-01', freq='D'), pd.NaT], - dtype=object) - idx = pd.PeriodIndex(['2011-01-01', pd.NaT], freq='D') - tm.assert_numpy_array_equal(idx.asobject.values, exp) - tm.assert_numpy_array_equal(idx._mpl_repr(), exp) - - def test_is_(self): - create_index = lambda: PeriodIndex(freq='A', start='1/1/2001', - end='12/1/2009') - index = create_index() - self.assertEqual(index.is_(index), True) - self.assertEqual(index.is_(create_index()), False) - self.assertEqual(index.is_(index.view()), True) - self.assertEqual( - index.is_(index.view().view().view().view().view()), True) - self.assertEqual(index.view().is_(index), True) - ind2 = index.view() - index.name = "Apple" - self.assertEqual(ind2.is_(index), True) - self.assertEqual(index.is_(index[:]), False) - self.assertEqual(index.is_(index.asfreq('M')), False) - self.assertEqual(index.is_(index.asfreq('A')), False) - self.assertEqual(index.is_(index - 2), False) - self.assertEqual(index.is_(index - 0), False) - - def test_comp_period(self): - idx = period_range('2007-01', periods=20, freq='M') - - result = idx < idx[10] - exp = idx.values < idx.values[10] - self.assert_numpy_array_equal(result, exp) - - def test_getitem_index(self): - idx = period_range('2007-01', periods=10, freq='M', name='x') - - result = idx[[1, 3, 5]] - exp = pd.PeriodIndex(['2007-02', '2007-04', '2007-06'], - freq='M', name='x') - tm.assert_index_equal(result, exp) - - result = idx[[True, True, False, False, False, - True, True, False, False, False]] - exp = pd.PeriodIndex(['2007-01', '2007-02', '2007-06', '2007-07'], - freq='M', name='x') - tm.assert_index_equal(result, exp) - - def test_getitem_partial(self): - rng = period_range('2007-01', periods=50, freq='M') - ts = Series(np.random.randn(len(rng)), rng) - - self.assertRaises(KeyError, ts.__getitem__, '2006') - - result = ts['2008'] - self.assertTrue((result.index.year == 2008).all()) - - result = ts['2008':'2009'] - self.assertEqual(len(result), 24) - - result = ts['2008-1':'2009-12'] - self.assertEqual(len(result), 24) - - result = ts['2008Q1':'2009Q4'] - self.assertEqual(len(result), 24) - - result = ts[:'2009'] - self.assertEqual(len(result), 36) - - result = ts['2009':] - self.assertEqual(len(result), 50 - 24) - - exp = result - result = ts[24:] - tm.assert_series_equal(exp, result) - - ts = ts[10:].append(ts[10:]) - self.assertRaisesRegexp(KeyError, - "left slice bound for non-unique " - "label: '2008'", - ts.__getitem__, slice('2008', '2009')) - - def test_getitem_datetime(self): - rng = period_range(start='2012-01-01', periods=10, freq='W-MON') - ts = Series(lrange(len(rng)), index=rng) - - dt1 = datetime(2011, 10, 2) - dt4 = datetime(2012, 4, 20) - - rs = ts[dt1:dt4] - tm.assert_series_equal(rs, ts) - - def test_getitem_nat(self): - idx = pd.PeriodIndex(['2011-01', 'NaT', '2011-02'], freq='M') - self.assertEqual(idx[0], pd.Period('2011-01', freq='M')) - self.assertIs(idx[1], tslib.NaT) - - s = pd.Series([0, 1, 2], index=idx) - self.assertEqual(s[pd.NaT], 1) - - s = pd.Series(idx, index=idx) - self.assertEqual(s[pd.Period('2011-01', freq='M')], - pd.Period('2011-01', freq='M')) - self.assertIs(s[pd.NaT], tslib.NaT) - - def test_getitem_list_periods(self): - # GH 7710 - rng = period_range(start='2012-01-01', periods=10, freq='D') - ts = Series(lrange(len(rng)), index=rng) - exp = ts.iloc[[1]] - tm.assert_series_equal(ts[[Period('2012-01-02', freq='D')]], exp) - - def test_slice_with_negative_step(self): - ts = Series(np.arange(20), - period_range('2014-01', periods=20, freq='M')) - SLC = pd.IndexSlice - - def assert_slices_equivalent(l_slc, i_slc): - tm.assert_series_equal(ts[l_slc], ts.iloc[i_slc]) - tm.assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc]) - tm.assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc]) - - assert_slices_equivalent(SLC[Period('2014-10')::-1], SLC[9::-1]) - assert_slices_equivalent(SLC['2014-10'::-1], SLC[9::-1]) - - assert_slices_equivalent(SLC[:Period('2014-10'):-1], SLC[:8:-1]) - assert_slices_equivalent(SLC[:'2014-10':-1], SLC[:8:-1]) - - assert_slices_equivalent(SLC['2015-02':'2014-10':-1], SLC[13:8:-1]) - assert_slices_equivalent(SLC[Period('2015-02'):Period('2014-10'):-1], - SLC[13:8:-1]) - assert_slices_equivalent(SLC['2015-02':Period('2014-10'):-1], - SLC[13:8:-1]) - assert_slices_equivalent(SLC[Period('2015-02'):'2014-10':-1], - SLC[13:8:-1]) - - assert_slices_equivalent(SLC['2014-10':'2015-02':-1], SLC[:0]) - - def test_slice_with_zero_step_raises(self): - ts = Series(np.arange(20), - period_range('2014-01', periods=20, freq='M')) - self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: ts[::0]) - self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: ts.loc[::0]) - self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: ts.loc[::0]) - - def test_contains(self): - rng = period_range('2007-01', freq='M', periods=10) - - self.assertTrue(Period('2007-01', freq='M') in rng) - self.assertFalse(Period('2007-01', freq='D') in rng) - self.assertFalse(Period('2007-01', freq='2M') in rng) - - def test_contains_nat(self): - # GH13582 - idx = period_range('2007-01', freq='M', periods=10) - self.assertFalse(pd.NaT in idx) - self.assertFalse(None in idx) - self.assertFalse(float('nan') in idx) - self.assertFalse(np.nan in idx) - - idx = pd.PeriodIndex(['2011-01', 'NaT', '2011-02'], freq='M') - self.assertTrue(pd.NaT in idx) - self.assertTrue(None in idx) - self.assertTrue(float('nan') in idx) - self.assertTrue(np.nan in idx) - - def test_sub(self): - rng = period_range('2007-01', periods=50) - - result = rng - 5 - exp = rng + (-5) - tm.assert_index_equal(result, exp) - - def test_periods_number_check(self): - with tm.assertRaises(ValueError): - period_range('2011-1-1', '2012-1-1', 'B') - - def test_tolist(self): - index = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') - rs = index.tolist() - [tm.assertIsInstance(x, Period) for x in rs] - - recon = PeriodIndex(rs) - tm.assert_index_equal(index, recon) - - def test_to_timestamp(self): - index = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') - series = Series(1, index=index, name='foo') - - exp_index = date_range('1/1/2001', end='12/31/2009', freq='A-DEC') - result = series.to_timestamp(how='end') - tm.assert_index_equal(result.index, exp_index) - self.assertEqual(result.name, 'foo') - - exp_index = date_range('1/1/2001', end='1/1/2009', freq='AS-JAN') - result = series.to_timestamp(how='start') - tm.assert_index_equal(result.index, exp_index) - - def _get_with_delta(delta, freq='A-DEC'): - return date_range(to_datetime('1/1/2001') + delta, - to_datetime('12/31/2009') + delta, freq=freq) - - delta = timedelta(hours=23) - result = series.to_timestamp('H', 'end') - exp_index = _get_with_delta(delta) - tm.assert_index_equal(result.index, exp_index) - - delta = timedelta(hours=23, minutes=59) - result = series.to_timestamp('T', 'end') - exp_index = _get_with_delta(delta) - tm.assert_index_equal(result.index, exp_index) - - result = series.to_timestamp('S', 'end') - delta = timedelta(hours=23, minutes=59, seconds=59) - exp_index = _get_with_delta(delta) - tm.assert_index_equal(result.index, exp_index) - - index = PeriodIndex(freq='H', start='1/1/2001', end='1/2/2001') - series = Series(1, index=index, name='foo') - - exp_index = date_range('1/1/2001 00:59:59', end='1/2/2001 00:59:59', - freq='H') - result = series.to_timestamp(how='end') - tm.assert_index_equal(result.index, exp_index) - self.assertEqual(result.name, 'foo') - - def test_to_timestamp_quarterly_bug(self): - years = np.arange(1960, 2000).repeat(4) - quarters = np.tile(lrange(1, 5), 40) - - pindex = PeriodIndex(year=years, quarter=quarters) - - stamps = pindex.to_timestamp('D', 'end') - expected = DatetimeIndex([x.to_timestamp('D', 'end') for x in pindex]) - tm.assert_index_equal(stamps, expected) - - def test_to_timestamp_preserve_name(self): - index = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009', - name='foo') - self.assertEqual(index.name, 'foo') - - conv = index.to_timestamp('D') - self.assertEqual(conv.name, 'foo') - - def test_to_timestamp_repr_is_code(self): - zs = [Timestamp('99-04-17 00:00:00', tz='UTC'), - Timestamp('2001-04-17 00:00:00', tz='UTC'), - Timestamp('2001-04-17 00:00:00', tz='America/Los_Angeles'), - Timestamp('2001-04-17 00:00:00', tz=None)] - for z in zs: - self.assertEqual(eval(repr(z)), z) - - def test_to_timestamp_pi_nat(self): - # GH 7228 - index = PeriodIndex(['NaT', '2011-01', '2011-02'], freq='M', - name='idx') - - result = index.to_timestamp('D') - expected = DatetimeIndex([pd.NaT, datetime(2011, 1, 1), - datetime(2011, 2, 1)], name='idx') - tm.assert_index_equal(result, expected) - self.assertEqual(result.name, 'idx') - - result2 = result.to_period(freq='M') - tm.assert_index_equal(result2, index) - self.assertEqual(result2.name, 'idx') - - result3 = result.to_period(freq='3M') - exp = PeriodIndex(['NaT', '2011-01', '2011-02'], freq='3M', name='idx') - self.assert_index_equal(result3, exp) - self.assertEqual(result3.freqstr, '3M') - - msg = ('Frequency must be positive, because it' - ' represents span: -2A') - with tm.assertRaisesRegexp(ValueError, msg): - result.to_period(freq='-2A') - - def test_to_timestamp_pi_mult(self): - idx = PeriodIndex(['2011-01', 'NaT', '2011-02'], freq='2M', name='idx') - result = idx.to_timestamp() - expected = DatetimeIndex( - ['2011-01-01', 'NaT', '2011-02-01'], name='idx') - self.assert_index_equal(result, expected) - result = idx.to_timestamp(how='E') - expected = DatetimeIndex( - ['2011-02-28', 'NaT', '2011-03-31'], name='idx') - self.assert_index_equal(result, expected) - - def test_to_timestamp_pi_combined(self): - idx = PeriodIndex(start='2011', periods=2, freq='1D1H', name='idx') - result = idx.to_timestamp() - expected = DatetimeIndex( - ['2011-01-01 00:00', '2011-01-02 01:00'], name='idx') - self.assert_index_equal(result, expected) - result = idx.to_timestamp(how='E') - expected = DatetimeIndex( - ['2011-01-02 00:59:59', '2011-01-03 01:59:59'], name='idx') - self.assert_index_equal(result, expected) - result = idx.to_timestamp(how='E', freq='H') - expected = DatetimeIndex( - ['2011-01-02 00:00', '2011-01-03 01:00'], name='idx') - self.assert_index_equal(result, expected) - - def test_to_timestamp_to_period_astype(self): - idx = DatetimeIndex([pd.NaT, '2011-01-01', '2011-02-01'], name='idx') - - res = idx.astype('period[M]') - exp = PeriodIndex(['NaT', '2011-01', '2011-02'], freq='M', name='idx') - tm.assert_index_equal(res, exp) - - res = idx.astype('period[3M]') - exp = PeriodIndex(['NaT', '2011-01', '2011-02'], freq='3M', name='idx') - self.assert_index_equal(res, exp) - - def test_start_time(self): - index = PeriodIndex(freq='M', start='2016-01-01', end='2016-05-31') - expected_index = date_range('2016-01-01', end='2016-05-31', freq='MS') - tm.assert_index_equal(index.start_time, expected_index) - - def test_end_time(self): - index = PeriodIndex(freq='M', start='2016-01-01', end='2016-05-31') - expected_index = date_range('2016-01-01', end='2016-05-31', freq='M') - tm.assert_index_equal(index.end_time, expected_index) - - def test_as_frame_columns(self): - rng = period_range('1/1/2000', periods=5) - df = DataFrame(randn(10, 5), columns=rng) - - ts = df[rng[0]] - tm.assert_series_equal(ts, df.iloc[:, 0]) - - # GH # 1211 - repr(df) - - ts = df['1/1/2000'] - tm.assert_series_equal(ts, df.iloc[:, 0]) - - def test_indexing(self): - - # GH 4390, iat incorrectly indexing - index = period_range('1/1/2001', periods=10) - s = Series(randn(10), index=index) - expected = s[index[0]] - result = s.iat[0] - self.assertEqual(expected, result) - - def test_frame_setitem(self): - rng = period_range('1/1/2000', periods=5, name='index') - df = DataFrame(randn(5, 3), index=rng) - - df['Index'] = rng - rs = Index(df['Index']) - tm.assert_index_equal(rs, rng, check_names=False) - self.assertEqual(rs.name, 'Index') - self.assertEqual(rng.name, 'index') - - rs = df.reset_index().set_index('index') - tm.assertIsInstance(rs.index, PeriodIndex) - tm.assert_index_equal(rs.index, rng) - - def test_period_set_index_reindex(self): - # GH 6631 - df = DataFrame(np.random.random(6)) - idx1 = period_range('2011/01/01', periods=6, freq='M') - idx2 = period_range('2013', periods=6, freq='A') - - df = df.set_index(idx1) - tm.assert_index_equal(df.index, idx1) - df = df.set_index(idx2) - tm.assert_index_equal(df.index, idx2) - - def test_frame_to_time_stamp(self): - K = 5 - index = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') - df = DataFrame(randn(len(index), K), index=index) - df['mix'] = 'a' - - exp_index = date_range('1/1/2001', end='12/31/2009', freq='A-DEC') - result = df.to_timestamp('D', 'end') - tm.assert_index_equal(result.index, exp_index) - tm.assert_numpy_array_equal(result.values, df.values) - - exp_index = date_range('1/1/2001', end='1/1/2009', freq='AS-JAN') - result = df.to_timestamp('D', 'start') - tm.assert_index_equal(result.index, exp_index) - - def _get_with_delta(delta, freq='A-DEC'): - return date_range(to_datetime('1/1/2001') + delta, - to_datetime('12/31/2009') + delta, freq=freq) - - delta = timedelta(hours=23) - result = df.to_timestamp('H', 'end') - exp_index = _get_with_delta(delta) - tm.assert_index_equal(result.index, exp_index) - - delta = timedelta(hours=23, minutes=59) - result = df.to_timestamp('T', 'end') - exp_index = _get_with_delta(delta) - tm.assert_index_equal(result.index, exp_index) - - result = df.to_timestamp('S', 'end') - delta = timedelta(hours=23, minutes=59, seconds=59) - exp_index = _get_with_delta(delta) - tm.assert_index_equal(result.index, exp_index) - - # columns - df = df.T - - exp_index = date_range('1/1/2001', end='12/31/2009', freq='A-DEC') - result = df.to_timestamp('D', 'end', axis=1) - tm.assert_index_equal(result.columns, exp_index) - tm.assert_numpy_array_equal(result.values, df.values) - - exp_index = date_range('1/1/2001', end='1/1/2009', freq='AS-JAN') - result = df.to_timestamp('D', 'start', axis=1) - tm.assert_index_equal(result.columns, exp_index) - - delta = timedelta(hours=23) - result = df.to_timestamp('H', 'end', axis=1) - exp_index = _get_with_delta(delta) - tm.assert_index_equal(result.columns, exp_index) - - delta = timedelta(hours=23, minutes=59) - result = df.to_timestamp('T', 'end', axis=1) - exp_index = _get_with_delta(delta) - tm.assert_index_equal(result.columns, exp_index) - - result = df.to_timestamp('S', 'end', axis=1) - delta = timedelta(hours=23, minutes=59, seconds=59) - exp_index = _get_with_delta(delta) - tm.assert_index_equal(result.columns, exp_index) - - # invalid axis - tm.assertRaisesRegexp(ValueError, 'axis', df.to_timestamp, axis=2) - - result1 = df.to_timestamp('5t', axis=1) - result2 = df.to_timestamp('t', axis=1) - expected = pd.date_range('2001-01-01', '2009-01-01', freq='AS') - self.assertTrue(isinstance(result1.columns, DatetimeIndex)) - self.assertTrue(isinstance(result2.columns, DatetimeIndex)) - self.assert_numpy_array_equal(result1.columns.asi8, expected.asi8) - self.assert_numpy_array_equal(result2.columns.asi8, expected.asi8) - # PeriodIndex.to_timestamp always use 'infer' - self.assertEqual(result1.columns.freqstr, 'AS-JAN') - self.assertEqual(result2.columns.freqstr, 'AS-JAN') - - def test_index_duplicate_periods(self): - # monotonic - idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq='A-JUN') - ts = Series(np.random.randn(len(idx)), index=idx) - - result = ts[2007] - expected = ts[1:3] - tm.assert_series_equal(result, expected) - result[:] = 1 - self.assertTrue((ts[1:3] == 1).all()) - - # not monotonic - idx = PeriodIndex([2000, 2007, 2007, 2009, 2007], freq='A-JUN') - ts = Series(np.random.randn(len(idx)), index=idx) - - result = ts[2007] - expected = ts[idx == 2007] - tm.assert_series_equal(result, expected) - - def test_index_unique(self): - idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq='A-JUN') - expected = PeriodIndex([2000, 2007, 2009], freq='A-JUN') - self.assert_index_equal(idx.unique(), expected) - self.assertEqual(idx.nunique(), 3) - - idx = PeriodIndex([2000, 2007, 2007, 2009, 2007], freq='A-JUN', - tz='US/Eastern') - expected = PeriodIndex([2000, 2007, 2009], freq='A-JUN', - tz='US/Eastern') - self.assert_index_equal(idx.unique(), expected) - self.assertEqual(idx.nunique(), 3) - - def test_constructor(self): - pi = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') - self.assertEqual(len(pi), 9) - - pi = PeriodIndex(freq='Q', start='1/1/2001', end='12/1/2009') - self.assertEqual(len(pi), 4 * 9) - - pi = PeriodIndex(freq='M', start='1/1/2001', end='12/1/2009') - self.assertEqual(len(pi), 12 * 9) - - pi = PeriodIndex(freq='D', start='1/1/2001', end='12/31/2009') - self.assertEqual(len(pi), 365 * 9 + 2) - - pi = PeriodIndex(freq='B', start='1/1/2001', end='12/31/2009') - self.assertEqual(len(pi), 261 * 9) - - pi = PeriodIndex(freq='H', start='1/1/2001', end='12/31/2001 23:00') - self.assertEqual(len(pi), 365 * 24) - - pi = PeriodIndex(freq='Min', start='1/1/2001', end='1/1/2001 23:59') - self.assertEqual(len(pi), 24 * 60) - - pi = PeriodIndex(freq='S', start='1/1/2001', end='1/1/2001 23:59:59') - self.assertEqual(len(pi), 24 * 60 * 60) - - start = Period('02-Apr-2005', 'B') - i1 = PeriodIndex(start=start, periods=20) - self.assertEqual(len(i1), 20) - self.assertEqual(i1.freq, start.freq) - self.assertEqual(i1[0], start) - - end_intv = Period('2006-12-31', 'W') - i1 = PeriodIndex(end=end_intv, periods=10) - self.assertEqual(len(i1), 10) - self.assertEqual(i1.freq, end_intv.freq) - self.assertEqual(i1[-1], end_intv) - - end_intv = Period('2006-12-31', '1w') - i2 = PeriodIndex(end=end_intv, periods=10) - self.assertEqual(len(i1), len(i2)) - self.assertTrue((i1 == i2).all()) - self.assertEqual(i1.freq, i2.freq) - - end_intv = Period('2006-12-31', ('w', 1)) - i2 = PeriodIndex(end=end_intv, periods=10) - self.assertEqual(len(i1), len(i2)) - self.assertTrue((i1 == i2).all()) - self.assertEqual(i1.freq, i2.freq) - - try: - PeriodIndex(start=start, end=end_intv) - raise AssertionError('Cannot allow mixed freq for start and end') - except ValueError: - pass - - end_intv = Period('2005-05-01', 'B') - i1 = PeriodIndex(start=start, end=end_intv) - - try: - PeriodIndex(start=start) - raise AssertionError( - 'Must specify periods if missing start or end') - except ValueError: - pass - - # infer freq from first element - i2 = PeriodIndex([end_intv, Period('2005-05-05', 'B')]) - self.assertEqual(len(i2), 2) - self.assertEqual(i2[0], end_intv) - - i2 = PeriodIndex(np.array([end_intv, Period('2005-05-05', 'B')])) - self.assertEqual(len(i2), 2) - self.assertEqual(i2[0], end_intv) - - # Mixed freq should fail - vals = [end_intv, Period('2006-12-31', 'w')] - self.assertRaises(ValueError, PeriodIndex, vals) - vals = np.array(vals) - self.assertRaises(ValueError, PeriodIndex, vals) - - def test_numpy_repeat(self): - index = period_range('20010101', periods=2) - expected = PeriodIndex([Period('2001-01-01'), Period('2001-01-01'), - Period('2001-01-02'), Period('2001-01-02')]) - - tm.assert_index_equal(np.repeat(index, 2), expected) - - msg = "the 'axis' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.repeat, index, 2, axis=1) - - def test_shift(self): - pi1 = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') - pi2 = PeriodIndex(freq='A', start='1/1/2002', end='12/1/2010') - - tm.assert_index_equal(pi1.shift(0), pi1) - - self.assertEqual(len(pi1), len(pi2)) - self.assert_index_equal(pi1.shift(1), pi2) - - pi1 = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') - pi2 = PeriodIndex(freq='A', start='1/1/2000', end='12/1/2008') - self.assertEqual(len(pi1), len(pi2)) - self.assert_index_equal(pi1.shift(-1), pi2) - - pi1 = PeriodIndex(freq='M', start='1/1/2001', end='12/1/2009') - pi2 = PeriodIndex(freq='M', start='2/1/2001', end='1/1/2010') - self.assertEqual(len(pi1), len(pi2)) - self.assert_index_equal(pi1.shift(1), pi2) - - pi1 = PeriodIndex(freq='M', start='1/1/2001', end='12/1/2009') - pi2 = PeriodIndex(freq='M', start='12/1/2000', end='11/1/2009') - self.assertEqual(len(pi1), len(pi2)) - self.assert_index_equal(pi1.shift(-1), pi2) - - pi1 = PeriodIndex(freq='D', start='1/1/2001', end='12/1/2009') - pi2 = PeriodIndex(freq='D', start='1/2/2001', end='12/2/2009') - self.assertEqual(len(pi1), len(pi2)) - self.assert_index_equal(pi1.shift(1), pi2) - - pi1 = PeriodIndex(freq='D', start='1/1/2001', end='12/1/2009') - pi2 = PeriodIndex(freq='D', start='12/31/2000', end='11/30/2009') - self.assertEqual(len(pi1), len(pi2)) - self.assert_index_equal(pi1.shift(-1), pi2) - - def test_shift_nat(self): - idx = PeriodIndex(['2011-01', '2011-02', 'NaT', - '2011-04'], freq='M', name='idx') - result = idx.shift(1) - expected = PeriodIndex(['2011-02', '2011-03', 'NaT', - '2011-05'], freq='M', name='idx') - tm.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - - def test_shift_ndarray(self): - idx = PeriodIndex(['2011-01', '2011-02', 'NaT', - '2011-04'], freq='M', name='idx') - result = idx.shift(np.array([1, 2, 3, 4])) - expected = PeriodIndex(['2011-02', '2011-04', 'NaT', - '2011-08'], freq='M', name='idx') - tm.assert_index_equal(result, expected) - - idx = PeriodIndex(['2011-01', '2011-02', 'NaT', - '2011-04'], freq='M', name='idx') - result = idx.shift(np.array([1, -2, 3, -4])) - expected = PeriodIndex(['2011-02', '2010-12', 'NaT', - '2010-12'], freq='M', name='idx') - tm.assert_index_equal(result, expected) - - def test_asfreq(self): - pi1 = PeriodIndex(freq='A', start='1/1/2001', end='1/1/2001') - pi2 = PeriodIndex(freq='Q', start='1/1/2001', end='1/1/2001') - pi3 = PeriodIndex(freq='M', start='1/1/2001', end='1/1/2001') - pi4 = PeriodIndex(freq='D', start='1/1/2001', end='1/1/2001') - pi5 = PeriodIndex(freq='H', start='1/1/2001', end='1/1/2001 00:00') - pi6 = PeriodIndex(freq='Min', start='1/1/2001', end='1/1/2001 00:00') - pi7 = PeriodIndex(freq='S', start='1/1/2001', end='1/1/2001 00:00:00') - - self.assertEqual(pi1.asfreq('Q', 'S'), pi2) - self.assertEqual(pi1.asfreq('Q', 's'), pi2) - self.assertEqual(pi1.asfreq('M', 'start'), pi3) - self.assertEqual(pi1.asfreq('D', 'StarT'), pi4) - self.assertEqual(pi1.asfreq('H', 'beGIN'), pi5) - self.assertEqual(pi1.asfreq('Min', 'S'), pi6) - self.assertEqual(pi1.asfreq('S', 'S'), pi7) - - self.assertEqual(pi2.asfreq('A', 'S'), pi1) - self.assertEqual(pi2.asfreq('M', 'S'), pi3) - self.assertEqual(pi2.asfreq('D', 'S'), pi4) - self.assertEqual(pi2.asfreq('H', 'S'), pi5) - self.assertEqual(pi2.asfreq('Min', 'S'), pi6) - self.assertEqual(pi2.asfreq('S', 'S'), pi7) - - self.assertEqual(pi3.asfreq('A', 'S'), pi1) - self.assertEqual(pi3.asfreq('Q', 'S'), pi2) - self.assertEqual(pi3.asfreq('D', 'S'), pi4) - self.assertEqual(pi3.asfreq('H', 'S'), pi5) - self.assertEqual(pi3.asfreq('Min', 'S'), pi6) - self.assertEqual(pi3.asfreq('S', 'S'), pi7) - - self.assertEqual(pi4.asfreq('A', 'S'), pi1) - self.assertEqual(pi4.asfreq('Q', 'S'), pi2) - self.assertEqual(pi4.asfreq('M', 'S'), pi3) - self.assertEqual(pi4.asfreq('H', 'S'), pi5) - self.assertEqual(pi4.asfreq('Min', 'S'), pi6) - self.assertEqual(pi4.asfreq('S', 'S'), pi7) - - self.assertEqual(pi5.asfreq('A', 'S'), pi1) - self.assertEqual(pi5.asfreq('Q', 'S'), pi2) - self.assertEqual(pi5.asfreq('M', 'S'), pi3) - self.assertEqual(pi5.asfreq('D', 'S'), pi4) - self.assertEqual(pi5.asfreq('Min', 'S'), pi6) - self.assertEqual(pi5.asfreq('S', 'S'), pi7) - - self.assertEqual(pi6.asfreq('A', 'S'), pi1) - self.assertEqual(pi6.asfreq('Q', 'S'), pi2) - self.assertEqual(pi6.asfreq('M', 'S'), pi3) - self.assertEqual(pi6.asfreq('D', 'S'), pi4) - self.assertEqual(pi6.asfreq('H', 'S'), pi5) - self.assertEqual(pi6.asfreq('S', 'S'), pi7) - - self.assertEqual(pi7.asfreq('A', 'S'), pi1) - self.assertEqual(pi7.asfreq('Q', 'S'), pi2) - self.assertEqual(pi7.asfreq('M', 'S'), pi3) - self.assertEqual(pi7.asfreq('D', 'S'), pi4) - self.assertEqual(pi7.asfreq('H', 'S'), pi5) - self.assertEqual(pi7.asfreq('Min', 'S'), pi6) - - self.assertRaises(ValueError, pi7.asfreq, 'T', 'foo') - result1 = pi1.asfreq('3M') - result2 = pi1.asfreq('M') - expected = PeriodIndex(freq='M', start='2001-12', end='2001-12') - self.assert_numpy_array_equal(result1.asi8, expected.asi8) - self.assertEqual(result1.freqstr, '3M') - self.assert_numpy_array_equal(result2.asi8, expected.asi8) - self.assertEqual(result2.freqstr, 'M') - - def test_asfreq_nat(self): - idx = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-04'], freq='M') - result = idx.asfreq(freq='Q') - expected = PeriodIndex(['2011Q1', '2011Q1', 'NaT', '2011Q2'], freq='Q') - tm.assert_index_equal(result, expected) - - def test_asfreq_mult_pi(self): - pi = PeriodIndex(['2001-01', '2001-02', 'NaT', '2001-03'], freq='2M') - - for freq in ['D', '3D']: - result = pi.asfreq(freq) - exp = PeriodIndex(['2001-02-28', '2001-03-31', 'NaT', - '2001-04-30'], freq=freq) - self.assert_index_equal(result, exp) - self.assertEqual(result.freq, exp.freq) - - result = pi.asfreq(freq, how='S') - exp = PeriodIndex(['2001-01-01', '2001-02-01', 'NaT', - '2001-03-01'], freq=freq) - self.assert_index_equal(result, exp) - self.assertEqual(result.freq, exp.freq) - - def test_asfreq_combined_pi(self): - pi = pd.PeriodIndex(['2001-01-01 00:00', '2001-01-02 02:00', 'NaT'], - freq='H') - exp = PeriodIndex(['2001-01-01 00:00', '2001-01-02 02:00', 'NaT'], - freq='25H') - for freq, how in zip(['1D1H', '1H1D'], ['S', 'E']): - result = pi.asfreq(freq, how=how) - self.assert_index_equal(result, exp) - self.assertEqual(result.freq, exp.freq) - - for freq in ['1D1H', '1H1D']: - pi = pd.PeriodIndex(['2001-01-01 00:00', '2001-01-02 02:00', - 'NaT'], freq=freq) - result = pi.asfreq('H') - exp = PeriodIndex(['2001-01-02 00:00', '2001-01-03 02:00', 'NaT'], - freq='H') - self.assert_index_equal(result, exp) - self.assertEqual(result.freq, exp.freq) - - pi = pd.PeriodIndex(['2001-01-01 00:00', '2001-01-02 02:00', - 'NaT'], freq=freq) - result = pi.asfreq('H', how='S') - exp = PeriodIndex(['2001-01-01 00:00', '2001-01-02 02:00', 'NaT'], - freq='H') - self.assert_index_equal(result, exp) - self.assertEqual(result.freq, exp.freq) - - def test_period_index_length(self): - pi = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') - self.assertEqual(len(pi), 9) - - pi = PeriodIndex(freq='Q', start='1/1/2001', end='12/1/2009') - self.assertEqual(len(pi), 4 * 9) - - pi = PeriodIndex(freq='M', start='1/1/2001', end='12/1/2009') - self.assertEqual(len(pi), 12 * 9) - - start = Period('02-Apr-2005', 'B') - i1 = PeriodIndex(start=start, periods=20) - self.assertEqual(len(i1), 20) - self.assertEqual(i1.freq, start.freq) - self.assertEqual(i1[0], start) - - end_intv = Period('2006-12-31', 'W') - i1 = PeriodIndex(end=end_intv, periods=10) - self.assertEqual(len(i1), 10) - self.assertEqual(i1.freq, end_intv.freq) - self.assertEqual(i1[-1], end_intv) - - end_intv = Period('2006-12-31', '1w') - i2 = PeriodIndex(end=end_intv, periods=10) - self.assertEqual(len(i1), len(i2)) - self.assertTrue((i1 == i2).all()) - self.assertEqual(i1.freq, i2.freq) - - end_intv = Period('2006-12-31', ('w', 1)) - i2 = PeriodIndex(end=end_intv, periods=10) - self.assertEqual(len(i1), len(i2)) - self.assertTrue((i1 == i2).all()) - self.assertEqual(i1.freq, i2.freq) - - try: - PeriodIndex(start=start, end=end_intv) - raise AssertionError('Cannot allow mixed freq for start and end') - except ValueError: - pass - - end_intv = Period('2005-05-01', 'B') - i1 = PeriodIndex(start=start, end=end_intv) - - try: - PeriodIndex(start=start) - raise AssertionError( - 'Must specify periods if missing start or end') - except ValueError: - pass - - # infer freq from first element - i2 = PeriodIndex([end_intv, Period('2005-05-05', 'B')]) - self.assertEqual(len(i2), 2) - self.assertEqual(i2[0], end_intv) - - i2 = PeriodIndex(np.array([end_intv, Period('2005-05-05', 'B')])) - self.assertEqual(len(i2), 2) - self.assertEqual(i2[0], end_intv) - - # Mixed freq should fail - vals = [end_intv, Period('2006-12-31', 'w')] - self.assertRaises(ValueError, PeriodIndex, vals) - vals = np.array(vals) - self.assertRaises(ValueError, PeriodIndex, vals) - - def test_frame_index_to_string(self): - index = PeriodIndex(['2011-1', '2011-2', '2011-3'], freq='M') - frame = DataFrame(np.random.randn(3, 4), index=index) - - # it works! - frame.to_string() - - def test_asfreq_ts(self): - index = PeriodIndex(freq='A', start='1/1/2001', end='12/31/2010') - ts = Series(np.random.randn(len(index)), index=index) - df = DataFrame(np.random.randn(len(index), 3), index=index) - - result = ts.asfreq('D', how='end') - df_result = df.asfreq('D', how='end') - exp_index = index.asfreq('D', how='end') - self.assertEqual(len(result), len(ts)) - tm.assert_index_equal(result.index, exp_index) - tm.assert_index_equal(df_result.index, exp_index) - - result = ts.asfreq('D', how='start') - self.assertEqual(len(result), len(ts)) - tm.assert_index_equal(result.index, index.asfreq('D', how='start')) - - def test_badinput(self): - self.assertRaises(ValueError, Period, '-2000', 'A') - self.assertRaises(tslib.DateParseError, Period, '0', 'A') - self.assertRaises(tslib.DateParseError, Period, '1/1/-2000', 'A') - - def test_negative_ordinals(self): - Period(ordinal=-1000, freq='A') - Period(ordinal=0, freq='A') - - idx1 = PeriodIndex(ordinal=[-1, 0, 1], freq='A') - idx2 = PeriodIndex(ordinal=np.array([-1, 0, 1]), freq='A') - tm.assert_index_equal(idx1, idx2) - - def test_dti_to_period(self): - dti = DatetimeIndex(start='1/1/2005', end='12/1/2005', freq='M') - pi1 = dti.to_period() - pi2 = dti.to_period(freq='D') - pi3 = dti.to_period(freq='3D') - - self.assertEqual(pi1[0], Period('Jan 2005', freq='M')) - self.assertEqual(pi2[0], Period('1/31/2005', freq='D')) - self.assertEqual(pi3[0], Period('1/31/2005', freq='3D')) - - self.assertEqual(pi1[-1], Period('Nov 2005', freq='M')) - self.assertEqual(pi2[-1], Period('11/30/2005', freq='D')) - self.assertEqual(pi3[-1], Period('11/30/2005', freq='3D')) - - tm.assert_index_equal(pi1, period_range('1/1/2005', '11/1/2005', - freq='M')) - tm.assert_index_equal(pi2, period_range('1/1/2005', '11/1/2005', - freq='M').asfreq('D')) - tm.assert_index_equal(pi3, period_range('1/1/2005', '11/1/2005', - freq='M').asfreq('3D')) - - def test_pindex_slice_index(self): - pi = PeriodIndex(start='1/1/10', end='12/31/12', freq='M') - s = Series(np.random.rand(len(pi)), index=pi) - res = s['2010'] - exp = s[0:12] - tm.assert_series_equal(res, exp) - res = s['2011'] - exp = s[12:24] - tm.assert_series_equal(res, exp) - - def test_getitem_day(self): - # GH 6716 - # Confirm DatetimeIndex and PeriodIndex works identically - didx = DatetimeIndex(start='2013/01/01', freq='D', periods=400) - pidx = PeriodIndex(start='2013/01/01', freq='D', periods=400) - - for idx in [didx, pidx]: - # getitem against index should raise ValueError - values = ['2014', '2013/02', '2013/01/02', '2013/02/01 9H', - '2013/02/01 09:00'] - for v in values: - - if _np_version_under1p9: - with tm.assertRaises(ValueError): - idx[v] - else: - # GH7116 - # these show deprecations as we are trying - # to slice with non-integer indexers - # with tm.assertRaises(IndexError): - # idx[v] - continue - - s = Series(np.random.rand(len(idx)), index=idx) - tm.assert_series_equal(s['2013/01'], s[0:31]) - tm.assert_series_equal(s['2013/02'], s[31:59]) - tm.assert_series_equal(s['2014'], s[365:]) - - invalid = ['2013/02/01 9H', '2013/02/01 09:00'] - for v in invalid: - with tm.assertRaises(KeyError): - s[v] - - def test_range_slice_day(self): - # GH 6716 - didx = DatetimeIndex(start='2013/01/01', freq='D', periods=400) - pidx = PeriodIndex(start='2013/01/01', freq='D', periods=400) - - # changed to TypeError in 1.12 - # https://github.com/numpy/numpy/pull/6271 - exc = IndexError if _np_version_under1p12 else TypeError - - for idx in [didx, pidx]: - # slices against index should raise IndexError - values = ['2014', '2013/02', '2013/01/02', '2013/02/01 9H', - '2013/02/01 09:00'] - for v in values: - with tm.assertRaises(exc): - idx[v:] - - s = Series(np.random.rand(len(idx)), index=idx) - - tm.assert_series_equal(s['2013/01/02':], s[1:]) - tm.assert_series_equal(s['2013/01/02':'2013/01/05'], s[1:5]) - tm.assert_series_equal(s['2013/02':], s[31:]) - tm.assert_series_equal(s['2014':], s[365:]) - - invalid = ['2013/02/01 9H', '2013/02/01 09:00'] - for v in invalid: - with tm.assertRaises(exc): - idx[v:] - - def test_getitem_seconds(self): - # GH 6716 - didx = DatetimeIndex(start='2013/01/01 09:00:00', freq='S', - periods=4000) - pidx = PeriodIndex(start='2013/01/01 09:00:00', freq='S', periods=4000) - - for idx in [didx, pidx]: - # getitem against index should raise ValueError - values = ['2014', '2013/02', '2013/01/02', '2013/02/01 9H', - '2013/02/01 09:00'] - for v in values: - if _np_version_under1p9: - with tm.assertRaises(ValueError): - idx[v] - else: - # GH7116 - # these show deprecations as we are trying - # to slice with non-integer indexers - # with tm.assertRaises(IndexError): - # idx[v] - continue - - s = Series(np.random.rand(len(idx)), index=idx) - tm.assert_series_equal(s['2013/01/01 10:00'], s[3600:3660]) - tm.assert_series_equal(s['2013/01/01 9H'], s[:3600]) - for d in ['2013/01/01', '2013/01', '2013']: - tm.assert_series_equal(s[d], s) - - def test_range_slice_seconds(self): - # GH 6716 - didx = DatetimeIndex(start='2013/01/01 09:00:00', freq='S', - periods=4000) - pidx = PeriodIndex(start='2013/01/01 09:00:00', freq='S', periods=4000) - - # changed to TypeError in 1.12 - # https://github.com/numpy/numpy/pull/6271 - exc = IndexError if _np_version_under1p12 else TypeError - - for idx in [didx, pidx]: - # slices against index should raise IndexError - values = ['2014', '2013/02', '2013/01/02', '2013/02/01 9H', - '2013/02/01 09:00'] - for v in values: - with tm.assertRaises(exc): - idx[v:] - - s = Series(np.random.rand(len(idx)), index=idx) - - tm.assert_series_equal(s['2013/01/01 09:05':'2013/01/01 09:10'], - s[300:660]) - tm.assert_series_equal(s['2013/01/01 10:00':'2013/01/01 10:05'], - s[3600:3960]) - tm.assert_series_equal(s['2013/01/01 10H':], s[3600:]) - tm.assert_series_equal(s[:'2013/01/01 09:30'], s[:1860]) - for d in ['2013/01/01', '2013/01', '2013']: - tm.assert_series_equal(s[d:], s) - - def test_range_slice_outofbounds(self): - # GH 5407 - didx = DatetimeIndex(start='2013/10/01', freq='D', periods=10) - pidx = PeriodIndex(start='2013/10/01', freq='D', periods=10) - - for idx in [didx, pidx]: - df = DataFrame(dict(units=[100 + i for i in range(10)]), index=idx) - empty = DataFrame(index=idx.__class__([], freq='D'), - columns=['units']) - empty['units'] = empty['units'].astype('int64') - - tm.assert_frame_equal(df['2013/09/01':'2013/09/30'], empty) - tm.assert_frame_equal(df['2013/09/30':'2013/10/02'], df.iloc[:2]) - tm.assert_frame_equal(df['2013/10/01':'2013/10/02'], df.iloc[:2]) - tm.assert_frame_equal(df['2013/10/02':'2013/09/30'], empty) - tm.assert_frame_equal(df['2013/10/15':'2013/10/17'], empty) - tm.assert_frame_equal(df['2013-06':'2013-09'], empty) - tm.assert_frame_equal(df['2013-11':'2013-12'], empty) - - def test_astype_asfreq(self): - pi1 = PeriodIndex(['2011-01-01', '2011-02-01', '2011-03-01'], freq='D') - exp = PeriodIndex(['2011-01', '2011-02', '2011-03'], freq='M') - tm.assert_index_equal(pi1.asfreq('M'), exp) - tm.assert_index_equal(pi1.astype('period[M]'), exp) - - exp = PeriodIndex(['2011-01', '2011-02', '2011-03'], freq='3M') - tm.assert_index_equal(pi1.asfreq('3M'), exp) - tm.assert_index_equal(pi1.astype('period[3M]'), exp) - - def test_pindex_fieldaccessor_nat(self): - idx = PeriodIndex(['2011-01', '2011-02', 'NaT', - '2012-03', '2012-04'], freq='D') - - exp = np.array([2011, 2011, -1, 2012, 2012], dtype=np.int64) - self.assert_numpy_array_equal(idx.year, exp) - exp = np.array([1, 2, -1, 3, 4], dtype=np.int64) - self.assert_numpy_array_equal(idx.month, exp) - - def test_pindex_qaccess(self): - pi = PeriodIndex(['2Q05', '3Q05', '4Q05', '1Q06', '2Q06'], freq='Q') - s = Series(np.random.rand(len(pi)), index=pi).cumsum() - # Todo: fix these accessors! - self.assertEqual(s['05Q4'], s[2]) - - def test_period_dt64_round_trip(self): - dti = date_range('1/1/2000', '1/7/2002', freq='B') - pi = dti.to_period() - tm.assert_index_equal(pi.to_timestamp(), dti) - - dti = date_range('1/1/2000', '1/7/2002', freq='B') - pi = dti.to_period(freq='H') - tm.assert_index_equal(pi.to_timestamp(), dti) - - def test_period_astype_to_timestamp(self): - pi = pd.PeriodIndex(['2011-01', '2011-02', '2011-03'], freq='M') - - exp = pd.DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01']) - tm.assert_index_equal(pi.astype('datetime64[ns]'), exp) - - exp = pd.DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31']) - tm.assert_index_equal(pi.astype('datetime64[ns]', how='end'), exp) - - exp = pd.DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01'], - tz='US/Eastern') - res = pi.astype('datetime64[ns, US/Eastern]') - tm.assert_index_equal(pi.astype('datetime64[ns, US/Eastern]'), exp) - - exp = pd.DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31'], - tz='US/Eastern') - res = pi.astype('datetime64[ns, US/Eastern]', how='end') - tm.assert_index_equal(res, exp) - - def test_to_period_quarterly(self): - # make sure we can make the round trip - for month in MONTHS: - freq = 'Q-%s' % month - rng = period_range('1989Q3', '1991Q3', freq=freq) - stamps = rng.to_timestamp() - result = stamps.to_period(freq) - tm.assert_index_equal(rng, result) - - def test_to_period_quarterlyish(self): - offsets = ['BQ', 'QS', 'BQS'] - for off in offsets: - rng = date_range('01-Jan-2012', periods=8, freq=off) - prng = rng.to_period() - self.assertEqual(prng.freq, 'Q-DEC') - - def test_to_period_annualish(self): - offsets = ['BA', 'AS', 'BAS'] - for off in offsets: - rng = date_range('01-Jan-2012', periods=8, freq=off) - prng = rng.to_period() - self.assertEqual(prng.freq, 'A-DEC') - - def test_to_period_monthish(self): - offsets = ['MS', 'BM'] - for off in offsets: - rng = date_range('01-Jan-2012', periods=8, freq=off) - prng = rng.to_period() - self.assertEqual(prng.freq, 'M') - - rng = date_range('01-Jan-2012', periods=8, freq='M') - prng = rng.to_period() - self.assertEqual(prng.freq, 'M') - - msg = pd.tseries.frequencies._INVALID_FREQ_ERROR - with self.assertRaisesRegexp(ValueError, msg): - date_range('01-Jan-2012', periods=8, freq='EOM') - - def test_multiples(self): - result1 = Period('1989', freq='2A') - result2 = Period('1989', freq='A') - self.assertEqual(result1.ordinal, result2.ordinal) - self.assertEqual(result1.freqstr, '2A-DEC') - self.assertEqual(result2.freqstr, 'A-DEC') - self.assertEqual(result1.freq, offsets.YearEnd(2)) - self.assertEqual(result2.freq, offsets.YearEnd()) - - self.assertEqual((result1 + 1).ordinal, result1.ordinal + 2) - self.assertEqual((1 + result1).ordinal, result1.ordinal + 2) - self.assertEqual((result1 - 1).ordinal, result2.ordinal - 2) - self.assertEqual((-1 + result1).ordinal, result2.ordinal - 2) - - def test_pindex_multiples(self): - pi = PeriodIndex(start='1/1/11', end='12/31/11', freq='2M') - expected = PeriodIndex(['2011-01', '2011-03', '2011-05', '2011-07', - '2011-09', '2011-11'], freq='2M') - tm.assert_index_equal(pi, expected) - self.assertEqual(pi.freq, offsets.MonthEnd(2)) - self.assertEqual(pi.freqstr, '2M') - - pi = period_range(start='1/1/11', end='12/31/11', freq='2M') - tm.assert_index_equal(pi, expected) - self.assertEqual(pi.freq, offsets.MonthEnd(2)) - self.assertEqual(pi.freqstr, '2M') - - pi = period_range(start='1/1/11', periods=6, freq='2M') - tm.assert_index_equal(pi, expected) - self.assertEqual(pi.freq, offsets.MonthEnd(2)) - self.assertEqual(pi.freqstr, '2M') - - def test_iteration(self): - index = PeriodIndex(start='1/1/10', periods=4, freq='B') - - result = list(index) - tm.assertIsInstance(result[0], Period) - self.assertEqual(result[0].freq, index.freq) - - def test_take(self): - index = PeriodIndex(start='1/1/10', end='12/31/12', freq='D', - name='idx') - expected = PeriodIndex([datetime(2010, 1, 6), datetime(2010, 1, 7), - datetime(2010, 1, 9), datetime(2010, 1, 13)], - freq='D', name='idx') - - taken1 = index.take([5, 6, 8, 12]) - taken2 = index[[5, 6, 8, 12]] - - for taken in [taken1, taken2]: - tm.assert_index_equal(taken, expected) - tm.assertIsInstance(taken, PeriodIndex) - self.assertEqual(taken.freq, index.freq) - self.assertEqual(taken.name, expected.name) - - def test_take_fill_value(self): - # GH 12631 - idx = pd.PeriodIndex(['2011-01-01', '2011-02-01', '2011-03-01'], - name='xxx', freq='D') - result = idx.take(np.array([1, 0, -1])) - expected = pd.PeriodIndex(['2011-02-01', '2011-01-01', '2011-03-01'], - name='xxx', freq='D') - tm.assert_index_equal(result, expected) - - # fill_value - result = idx.take(np.array([1, 0, -1]), fill_value=True) - expected = pd.PeriodIndex(['2011-02-01', '2011-01-01', 'NaT'], - name='xxx', freq='D') - tm.assert_index_equal(result, expected) - - # allow_fill=False - result = idx.take(np.array([1, 0, -1]), allow_fill=False, - fill_value=True) - expected = pd.PeriodIndex(['2011-02-01', '2011-01-01', '2011-03-01'], - name='xxx', freq='D') - tm.assert_index_equal(result, expected) - - msg = ('When allow_fill=True and fill_value is not None, ' - 'all indices must be >= -1') - with tm.assertRaisesRegexp(ValueError, msg): - idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assertRaisesRegexp(ValueError, msg): - idx.take(np.array([1, 0, -5]), fill_value=True) - - with tm.assertRaises(IndexError): - idx.take(np.array([1, -5])) - - def test_joins(self): - index = period_range('1/1/2000', '1/20/2000', freq='D') - - for kind in ['inner', 'outer', 'left', 'right']: - joined = index.join(index[:-5], how=kind) - - tm.assertIsInstance(joined, PeriodIndex) - self.assertEqual(joined.freq, index.freq) - - def test_join_self(self): - index = period_range('1/1/2000', '1/20/2000', freq='D') - - for kind in ['inner', 'outer', 'left', 'right']: - res = index.join(index, how=kind) - self.assertIs(index, res) - - def test_join_does_not_recur(self): - df = tm.makeCustomDataframe( - 3, 2, data_gen_f=lambda *args: np.random.randint(2), - c_idx_type='p', r_idx_type='dt') - s = df.iloc[:2, 0] - - res = s.index.join(df.columns, how='outer') - expected = Index([s.index[0], s.index[1], - df.columns[0], df.columns[1]], object) - tm.assert_index_equal(res, expected) - - def test_align_series(self): - rng = period_range('1/1/2000', '1/1/2010', freq='A') - ts = Series(np.random.randn(len(rng)), index=rng) - - result = ts + ts[::2] - expected = ts + ts - expected[1::2] = np.nan - tm.assert_series_equal(result, expected) - - result = ts + _permute(ts[::2]) - tm.assert_series_equal(result, expected) - - # it works! - for kind in ['inner', 'outer', 'left', 'right']: - ts.align(ts[::2], join=kind) - msg = "Input has different freq=D from PeriodIndex\\(freq=A-DEC\\)" - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - ts + ts.asfreq('D', how="end") - - def test_align_frame(self): - rng = period_range('1/1/2000', '1/1/2010', freq='A') - ts = DataFrame(np.random.randn(len(rng), 3), index=rng) - - result = ts + ts[::2] - expected = ts + ts - expected.values[1::2] = np.nan - tm.assert_frame_equal(result, expected) - - result = ts + _permute(ts[::2]) - tm.assert_frame_equal(result, expected) - - def test_union(self): - index = period_range('1/1/2000', '1/20/2000', freq='D') - - result = index[:-5].union(index[10:]) - tm.assert_index_equal(result, index) - - # not in order - result = _permute(index[:-5]).union(_permute(index[10:])) - tm.assert_index_equal(result, index) - - # raise if different frequencies - index = period_range('1/1/2000', '1/20/2000', freq='D') - index2 = period_range('1/1/2000', '1/20/2000', freq='W-WED') - with tm.assertRaises(period.IncompatibleFrequency): - index.union(index2) - - msg = 'can only call with other PeriodIndex-ed objects' - with tm.assertRaisesRegexp(ValueError, msg): - index.join(index.to_timestamp()) - - index3 = period_range('1/1/2000', '1/20/2000', freq='2D') - with tm.assertRaises(period.IncompatibleFrequency): - index.join(index3) - - def test_union_dataframe_index(self): - rng1 = pd.period_range('1/1/1999', '1/1/2012', freq='M') - s1 = pd.Series(np.random.randn(len(rng1)), rng1) - - rng2 = pd.period_range('1/1/1980', '12/1/2001', freq='M') - s2 = pd.Series(np.random.randn(len(rng2)), rng2) - df = pd.DataFrame({'s1': s1, 's2': s2}) - - exp = pd.period_range('1/1/1980', '1/1/2012', freq='M') - self.assert_index_equal(df.index, exp) - - def test_intersection(self): - index = period_range('1/1/2000', '1/20/2000', freq='D') - - result = index[:-5].intersection(index[10:]) - tm.assert_index_equal(result, index[10:-5]) - - # not in order - left = _permute(index[:-5]) - right = _permute(index[10:]) - result = left.intersection(right).sort_values() - tm.assert_index_equal(result, index[10:-5]) - - # raise if different frequencies - index = period_range('1/1/2000', '1/20/2000', freq='D') - index2 = period_range('1/1/2000', '1/20/2000', freq='W-WED') - with tm.assertRaises(period.IncompatibleFrequency): - index.intersection(index2) - - index3 = period_range('1/1/2000', '1/20/2000', freq='2D') - with tm.assertRaises(period.IncompatibleFrequency): - index.intersection(index3) - - def test_intersection_cases(self): - base = period_range('6/1/2000', '6/30/2000', freq='D', name='idx') - - # if target has the same name, it is preserved - rng2 = period_range('5/15/2000', '6/20/2000', freq='D', name='idx') - expected2 = period_range('6/1/2000', '6/20/2000', freq='D', - name='idx') - - # if target name is different, it will be reset - rng3 = period_range('5/15/2000', '6/20/2000', freq='D', name='other') - expected3 = period_range('6/1/2000', '6/20/2000', freq='D', - name=None) - - rng4 = period_range('7/1/2000', '7/31/2000', freq='D', name='idx') - expected4 = PeriodIndex([], name='idx', freq='D') - - for (rng, expected) in [(rng2, expected2), (rng3, expected3), - (rng4, expected4)]: - result = base.intersection(rng) - tm.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(result.freq, expected.freq) - - # non-monotonic - base = PeriodIndex(['2011-01-05', '2011-01-04', '2011-01-02', - '2011-01-03'], freq='D', name='idx') - - rng2 = PeriodIndex(['2011-01-04', '2011-01-02', - '2011-02-02', '2011-02-03'], - freq='D', name='idx') - expected2 = PeriodIndex(['2011-01-04', '2011-01-02'], freq='D', - name='idx') - - rng3 = PeriodIndex(['2011-01-04', '2011-01-02', '2011-02-02', - '2011-02-03'], - freq='D', name='other') - expected3 = PeriodIndex(['2011-01-04', '2011-01-02'], freq='D', - name=None) - - rng4 = period_range('7/1/2000', '7/31/2000', freq='D', name='idx') - expected4 = PeriodIndex([], freq='D', name='idx') - - for (rng, expected) in [(rng2, expected2), (rng3, expected3), - (rng4, expected4)]: - result = base.intersection(rng) - tm.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(result.freq, 'D') - - # empty same freq - rng = date_range('6/1/2000', '6/15/2000', freq='T') - result = rng[0:0].intersection(rng) - self.assertEqual(len(result), 0) - - result = rng.intersection(rng[0:0]) - self.assertEqual(len(result), 0) - - def test_fields(self): - # year, month, day, hour, minute - # second, weekofyear, week, dayofweek, weekday, dayofyear, quarter - # qyear - pi = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2005') - self._check_all_fields(pi) - - pi = PeriodIndex(freq='Q', start='1/1/2001', end='12/1/2002') - self._check_all_fields(pi) - - pi = PeriodIndex(freq='M', start='1/1/2001', end='1/1/2002') - self._check_all_fields(pi) - - pi = PeriodIndex(freq='D', start='12/1/2001', end='6/1/2001') - self._check_all_fields(pi) - - pi = PeriodIndex(freq='B', start='12/1/2001', end='6/1/2001') - self._check_all_fields(pi) - - pi = PeriodIndex(freq='H', start='12/31/2001', end='1/1/2002 23:00') - self._check_all_fields(pi) - - pi = PeriodIndex(freq='Min', start='12/31/2001', end='1/1/2002 00:20') - self._check_all_fields(pi) - - pi = PeriodIndex(freq='S', start='12/31/2001 00:00:00', - end='12/31/2001 00:05:00') - self._check_all_fields(pi) - - end_intv = Period('2006-12-31', 'W') - i1 = PeriodIndex(end=end_intv, periods=10) - self._check_all_fields(i1) - - def _check_all_fields(self, periodindex): - fields = ['year', 'month', 'day', 'hour', 'minute', 'second', - 'weekofyear', 'week', 'dayofweek', 'weekday', 'dayofyear', - 'quarter', 'qyear', 'days_in_month', 'is_leap_year'] - - periods = list(periodindex) - s = pd.Series(periodindex) - - for field in fields: - field_idx = getattr(periodindex, field) - self.assertEqual(len(periodindex), len(field_idx)) - for x, val in zip(periods, field_idx): - self.assertEqual(getattr(x, field), val) - - if len(s) == 0: - continue - - field_s = getattr(s.dt, field) - self.assertEqual(len(periodindex), len(field_s)) - for x, val in zip(periods, field_s): - self.assertEqual(getattr(x, field), val) - - def test_is_full(self): - index = PeriodIndex([2005, 2007, 2009], freq='A') - self.assertFalse(index.is_full) - - index = PeriodIndex([2005, 2006, 2007], freq='A') - self.assertTrue(index.is_full) - - index = PeriodIndex([2005, 2005, 2007], freq='A') - self.assertFalse(index.is_full) - - index = PeriodIndex([2005, 2005, 2006], freq='A') - self.assertTrue(index.is_full) - - index = PeriodIndex([2006, 2005, 2005], freq='A') - self.assertRaises(ValueError, getattr, index, 'is_full') - - self.assertTrue(index[:0].is_full) - - def test_map(self): - index = PeriodIndex([2005, 2007, 2009], freq='A') - result = index.map(lambda x: x + 1) - expected = index + 1 - tm.assert_index_equal(result, expected) - - result = index.map(lambda x: x.ordinal) - exp = Index([x.ordinal for x in index]) - tm.assert_index_equal(result, exp) - - def test_map_with_string_constructor(self): - raw = [2005, 2007, 2009] - index = PeriodIndex(raw, freq='A') - types = str, - - if PY3: - # unicode - types += text_type, - - for t in types: - expected = Index(lmap(t, raw)) - res = index.map(t) - - # should return an Index - tm.assertIsInstance(res, Index) - - # preserve element types - self.assertTrue(all(isinstance(resi, t) for resi in res)) - - # lastly, values should compare equal - tm.assert_index_equal(res, expected) - - def test_convert_array_of_periods(self): - rng = period_range('1/1/2000', periods=20, freq='D') - periods = list(rng) - - result = pd.Index(periods) - tm.assertIsInstance(result, PeriodIndex) - - def test_with_multi_index(self): - # #1705 - index = date_range('1/1/2012', periods=4, freq='12H') - index_as_arrays = [index.to_period(freq='D'), index.hour] - - s = Series([0, 1, 2, 3], index_as_arrays) - - tm.assertIsInstance(s.index.levels[0], PeriodIndex) - - tm.assertIsInstance(s.index.values[0][0], Period) - - def test_to_timestamp_1703(self): - index = period_range('1/1/2012', periods=4, freq='D') - - result = index.to_timestamp() - self.assertEqual(result[0], Timestamp('1/1/2012')) - - def test_to_datetime_depr(self): - index = period_range('1/1/2012', periods=4, freq='D') - - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - result = index.to_datetime() - self.assertEqual(result[0], Timestamp('1/1/2012')) - - def test_get_loc_msg(self): - idx = period_range('2000-1-1', freq='A', periods=10) - bad_period = Period('2012', 'A') - self.assertRaises(KeyError, idx.get_loc, bad_period) - - try: - idx.get_loc(bad_period) - except KeyError as inst: - self.assertEqual(inst.args[0], bad_period) - - def test_get_loc_nat(self): - didx = DatetimeIndex(['2011-01-01', 'NaT', '2011-01-03']) - pidx = PeriodIndex(['2011-01-01', 'NaT', '2011-01-03'], freq='M') - - # check DatetimeIndex compat - for idx in [didx, pidx]: - self.assertEqual(idx.get_loc(pd.NaT), 1) - self.assertEqual(idx.get_loc(None), 1) - self.assertEqual(idx.get_loc(float('nan')), 1) - self.assertEqual(idx.get_loc(np.nan), 1) - - def test_append_concat(self): - # #1815 - d1 = date_range('12/31/1990', '12/31/1999', freq='A-DEC') - d2 = date_range('12/31/2000', '12/31/2009', freq='A-DEC') - - s1 = Series(np.random.randn(10), d1) - s2 = Series(np.random.randn(10), d2) - - s1 = s1.to_period() - s2 = s2.to_period() - - # drops index - result = pd.concat([s1, s2]) - tm.assertIsInstance(result.index, PeriodIndex) - self.assertEqual(result.index[0], s1.index[0]) - - def test_pickle_freq(self): - # GH2891 - prng = period_range('1/1/2011', '1/1/2012', freq='M') - new_prng = self.round_trip_pickle(prng) - self.assertEqual(new_prng.freq, offsets.MonthEnd()) - self.assertEqual(new_prng.freqstr, 'M') - - def test_slice_keep_name(self): - idx = period_range('20010101', periods=10, freq='D', name='bob') - self.assertEqual(idx.name, idx[1:].name) - - def test_factorize(self): - idx1 = PeriodIndex(['2014-01', '2014-01', '2014-02', '2014-02', - '2014-03', '2014-03'], freq='M') - - exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.intp) - exp_idx = PeriodIndex(['2014-01', '2014-02', '2014-03'], freq='M') - - arr, idx = idx1.factorize() - self.assert_numpy_array_equal(arr, exp_arr) - tm.assert_index_equal(idx, exp_idx) - - arr, idx = idx1.factorize(sort=True) - self.assert_numpy_array_equal(arr, exp_arr) - tm.assert_index_equal(idx, exp_idx) - - idx2 = pd.PeriodIndex(['2014-03', '2014-03', '2014-02', '2014-01', - '2014-03', '2014-01'], freq='M') - - exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.intp) - arr, idx = idx2.factorize(sort=True) - self.assert_numpy_array_equal(arr, exp_arr) - tm.assert_index_equal(idx, exp_idx) - - exp_arr = np.array([0, 0, 1, 2, 0, 2], dtype=np.intp) - exp_idx = PeriodIndex(['2014-03', '2014-02', '2014-01'], freq='M') - arr, idx = idx2.factorize() - self.assert_numpy_array_equal(arr, exp_arr) - tm.assert_index_equal(idx, exp_idx) - - def test_recreate_from_data(self): - for o in ['M', 'Q', 'A', 'D', 'B', 'T', 'S', 'L', 'U', 'N', 'H']: - org = PeriodIndex(start='2001/04/01', freq=o, periods=1) - idx = PeriodIndex(org.values, freq=o) - tm.assert_index_equal(idx, org) - - def test_combine_first(self): - # GH 3367 - didx = pd.DatetimeIndex(start='1950-01-31', end='1950-07-31', freq='M') - pidx = pd.PeriodIndex(start=pd.Period('1950-1'), - end=pd.Period('1950-7'), freq='M') - # check to be consistent with DatetimeIndex - for idx in [didx, pidx]: - a = pd.Series([1, np.nan, np.nan, 4, 5, np.nan, 7], index=idx) - b = pd.Series([9, 9, 9, 9, 9, 9, 9], index=idx) - - result = a.combine_first(b) - expected = pd.Series([1, 9, 9, 4, 5, 9, 7], index=idx, - dtype=np.float64) - tm.assert_series_equal(result, expected) - - def test_searchsorted(self): - for freq in ['D', '2D']: - pidx = pd.PeriodIndex(['2014-01-01', '2014-01-02', '2014-01-03', - '2014-01-04', '2014-01-05'], freq=freq) - - p1 = pd.Period('2014-01-01', freq=freq) - self.assertEqual(pidx.searchsorted(p1), 0) - - p2 = pd.Period('2014-01-04', freq=freq) - self.assertEqual(pidx.searchsorted(p2), 3) - - msg = "Input has different freq=H from PeriodIndex" - with self.assertRaisesRegexp(period.IncompatibleFrequency, msg): - pidx.searchsorted(pd.Period('2014-01-01', freq='H')) - - msg = "Input has different freq=5D from PeriodIndex" - with self.assertRaisesRegexp(period.IncompatibleFrequency, msg): - pidx.searchsorted(pd.Period('2014-01-01', freq='5D')) - - with tm.assert_produces_warning(FutureWarning): - pidx.searchsorted(key=p2) - - def test_round_trip(self): - - p = Period('2000Q1') - new_p = self.round_trip_pickle(p) - self.assertEqual(new_p, p) - - -def _permute(obj): - return obj.take(np.random.permutation(len(obj))) - - -class TestMethods(tm.TestCase): - - def test_add(self): - dt1 = Period(freq='D', year=2008, month=1, day=1) - dt2 = Period(freq='D', year=2008, month=1, day=2) - self.assertEqual(dt1 + 1, dt2) - self.assertEqual(1 + dt1, dt2) - - def test_add_pdnat(self): - p = pd.Period('2011-01', freq='M') - self.assertIs(p + pd.NaT, pd.NaT) - self.assertIs(pd.NaT + p, pd.NaT) - - p = pd.Period('NaT', freq='M') - self.assertIs(p + pd.NaT, pd.NaT) - self.assertIs(pd.NaT + p, pd.NaT) - - def test_add_raises(self): - # GH 4731 - dt1 = Period(freq='D', year=2008, month=1, day=1) - dt2 = Period(freq='D', year=2008, month=1, day=2) - msg = r"unsupported operand type\(s\)" - with tm.assertRaisesRegexp(TypeError, msg): - dt1 + "str" - - msg = r"unsupported operand type\(s\)" - with tm.assertRaisesRegexp(TypeError, msg): - "str" + dt1 - - with tm.assertRaisesRegexp(TypeError, msg): - dt1 + dt2 - - def test_sub(self): - dt1 = Period('2011-01-01', freq='D') - dt2 = Period('2011-01-15', freq='D') - - self.assertEqual(dt1 - dt2, -14) - self.assertEqual(dt2 - dt1, 14) - - msg = r"Input has different freq=M from Period\(freq=D\)" - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - dt1 - pd.Period('2011-02', freq='M') - - def test_add_offset(self): - # freq is DateOffset - for freq in ['A', '2A', '3A']: - p = Period('2011', freq=freq) - exp = Period('2013', freq=freq) - self.assertEqual(p + offsets.YearEnd(2), exp) - self.assertEqual(offsets.YearEnd(2) + p, exp) - - for o in [offsets.YearBegin(2), offsets.MonthBegin(1), - offsets.Minute(), np.timedelta64(365, 'D'), - timedelta(365)]: - with tm.assertRaises(period.IncompatibleFrequency): - p + o - - if isinstance(o, np.timedelta64): - with tm.assertRaises(TypeError): - o + p - else: - with tm.assertRaises(period.IncompatibleFrequency): - o + p - - for freq in ['M', '2M', '3M']: - p = Period('2011-03', freq=freq) - exp = Period('2011-05', freq=freq) - self.assertEqual(p + offsets.MonthEnd(2), exp) - self.assertEqual(offsets.MonthEnd(2) + p, exp) - - exp = Period('2012-03', freq=freq) - self.assertEqual(p + offsets.MonthEnd(12), exp) - self.assertEqual(offsets.MonthEnd(12) + p, exp) - - for o in [offsets.YearBegin(2), offsets.MonthBegin(1), - offsets.Minute(), np.timedelta64(365, 'D'), - timedelta(365)]: - with tm.assertRaises(period.IncompatibleFrequency): - p + o - - if isinstance(o, np.timedelta64): - with tm.assertRaises(TypeError): - o + p - else: - with tm.assertRaises(period.IncompatibleFrequency): - o + p - - # freq is Tick - for freq in ['D', '2D', '3D']: - p = Period('2011-04-01', freq=freq) - - exp = Period('2011-04-06', freq=freq) - self.assertEqual(p + offsets.Day(5), exp) - self.assertEqual(offsets.Day(5) + p, exp) - - exp = Period('2011-04-02', freq=freq) - self.assertEqual(p + offsets.Hour(24), exp) - self.assertEqual(offsets.Hour(24) + p, exp) - - exp = Period('2011-04-03', freq=freq) - self.assertEqual(p + np.timedelta64(2, 'D'), exp) - with tm.assertRaises(TypeError): - np.timedelta64(2, 'D') + p - - exp = Period('2011-04-02', freq=freq) - self.assertEqual(p + np.timedelta64(3600 * 24, 's'), exp) - with tm.assertRaises(TypeError): - np.timedelta64(3600 * 24, 's') + p - - exp = Period('2011-03-30', freq=freq) - self.assertEqual(p + timedelta(-2), exp) - self.assertEqual(timedelta(-2) + p, exp) - - exp = Period('2011-04-03', freq=freq) - self.assertEqual(p + timedelta(hours=48), exp) - self.assertEqual(timedelta(hours=48) + p, exp) - - for o in [offsets.YearBegin(2), offsets.MonthBegin(1), - offsets.Minute(), np.timedelta64(4, 'h'), - timedelta(hours=23)]: - with tm.assertRaises(period.IncompatibleFrequency): - p + o - - if isinstance(o, np.timedelta64): - with tm.assertRaises(TypeError): - o + p - else: - with tm.assertRaises(period.IncompatibleFrequency): - o + p - - for freq in ['H', '2H', '3H']: - p = Period('2011-04-01 09:00', freq=freq) - - exp = Period('2011-04-03 09:00', freq=freq) - self.assertEqual(p + offsets.Day(2), exp) - self.assertEqual(offsets.Day(2) + p, exp) - - exp = Period('2011-04-01 12:00', freq=freq) - self.assertEqual(p + offsets.Hour(3), exp) - self.assertEqual(offsets.Hour(3) + p, exp) - - exp = Period('2011-04-01 12:00', freq=freq) - self.assertEqual(p + np.timedelta64(3, 'h'), exp) - with tm.assertRaises(TypeError): - np.timedelta64(3, 'h') + p - - exp = Period('2011-04-01 10:00', freq=freq) - self.assertEqual(p + np.timedelta64(3600, 's'), exp) - with tm.assertRaises(TypeError): - np.timedelta64(3600, 's') + p - - exp = Period('2011-04-01 11:00', freq=freq) - self.assertEqual(p + timedelta(minutes=120), exp) - self.assertEqual(timedelta(minutes=120) + p, exp) - - exp = Period('2011-04-05 12:00', freq=freq) - self.assertEqual(p + timedelta(days=4, minutes=180), exp) - self.assertEqual(timedelta(days=4, minutes=180) + p, exp) - - for o in [offsets.YearBegin(2), offsets.MonthBegin(1), - offsets.Minute(), np.timedelta64(3200, 's'), - timedelta(hours=23, minutes=30)]: - with tm.assertRaises(period.IncompatibleFrequency): - p + o - - if isinstance(o, np.timedelta64): - with tm.assertRaises(TypeError): - o + p - else: - with tm.assertRaises(period.IncompatibleFrequency): - o + p - - def test_add_offset_nat(self): - # freq is DateOffset - for freq in ['A', '2A', '3A']: - p = Period('NaT', freq=freq) - for o in [offsets.YearEnd(2)]: - self.assertIs(p + o, tslib.NaT) - self.assertIs(o + p, tslib.NaT) - - for o in [offsets.YearBegin(2), offsets.MonthBegin(1), - offsets.Minute(), np.timedelta64(365, 'D'), - timedelta(365)]: - self.assertIs(p + o, tslib.NaT) - - if isinstance(o, np.timedelta64): - with tm.assertRaises(TypeError): - o + p - else: - self.assertIs(o + p, tslib.NaT) - - for freq in ['M', '2M', '3M']: - p = Period('NaT', freq=freq) - for o in [offsets.MonthEnd(2), offsets.MonthEnd(12)]: - self.assertIs(p + o, tslib.NaT) - - if isinstance(o, np.timedelta64): - with tm.assertRaises(TypeError): - o + p - else: - self.assertIs(o + p, tslib.NaT) - - for o in [offsets.YearBegin(2), offsets.MonthBegin(1), - offsets.Minute(), np.timedelta64(365, 'D'), - timedelta(365)]: - self.assertIs(p + o, tslib.NaT) - - if isinstance(o, np.timedelta64): - with tm.assertRaises(TypeError): - o + p - else: - self.assertIs(o + p, tslib.NaT) - - # freq is Tick - for freq in ['D', '2D', '3D']: - p = Period('NaT', freq=freq) - for o in [offsets.Day(5), offsets.Hour(24), np.timedelta64(2, 'D'), - np.timedelta64(3600 * 24, 's'), timedelta(-2), - timedelta(hours=48)]: - self.assertIs(p + o, tslib.NaT) - - if isinstance(o, np.timedelta64): - with tm.assertRaises(TypeError): - o + p - else: - self.assertIs(o + p, tslib.NaT) - - for o in [offsets.YearBegin(2), offsets.MonthBegin(1), - offsets.Minute(), np.timedelta64(4, 'h'), - timedelta(hours=23)]: - self.assertIs(p + o, tslib.NaT) - - if isinstance(o, np.timedelta64): - with tm.assertRaises(TypeError): - o + p - else: - self.assertIs(o + p, tslib.NaT) - - for freq in ['H', '2H', '3H']: - p = Period('NaT', freq=freq) - for o in [offsets.Day(2), offsets.Hour(3), np.timedelta64(3, 'h'), - np.timedelta64(3600, 's'), timedelta(minutes=120), - timedelta(days=4, minutes=180)]: - self.assertIs(p + o, tslib.NaT) - - if not isinstance(o, np.timedelta64): - self.assertIs(o + p, tslib.NaT) - - for o in [offsets.YearBegin(2), offsets.MonthBegin(1), - offsets.Minute(), np.timedelta64(3200, 's'), - timedelta(hours=23, minutes=30)]: - self.assertIs(p + o, tslib.NaT) - - if isinstance(o, np.timedelta64): - with tm.assertRaises(TypeError): - o + p - else: - self.assertIs(o + p, tslib.NaT) - - def test_sub_pdnat(self): - # GH 13071 - p = pd.Period('2011-01', freq='M') - self.assertIs(p - pd.NaT, pd.NaT) - self.assertIs(pd.NaT - p, pd.NaT) - - p = pd.Period('NaT', freq='M') - self.assertIs(p - pd.NaT, pd.NaT) - self.assertIs(pd.NaT - p, pd.NaT) - - def test_sub_offset(self): - # freq is DateOffset - for freq in ['A', '2A', '3A']: - p = Period('2011', freq=freq) - self.assertEqual(p - offsets.YearEnd(2), Period('2009', freq=freq)) - - for o in [offsets.YearBegin(2), offsets.MonthBegin(1), - offsets.Minute(), np.timedelta64(365, 'D'), - timedelta(365)]: - with tm.assertRaises(period.IncompatibleFrequency): - p - o - - for freq in ['M', '2M', '3M']: - p = Period('2011-03', freq=freq) - self.assertEqual(p - offsets.MonthEnd(2), - Period('2011-01', freq=freq)) - self.assertEqual(p - offsets.MonthEnd(12), - Period('2010-03', freq=freq)) - - for o in [offsets.YearBegin(2), offsets.MonthBegin(1), - offsets.Minute(), np.timedelta64(365, 'D'), - timedelta(365)]: - with tm.assertRaises(period.IncompatibleFrequency): - p - o - - # freq is Tick - for freq in ['D', '2D', '3D']: - p = Period('2011-04-01', freq=freq) - self.assertEqual(p - offsets.Day(5), - Period('2011-03-27', freq=freq)) - self.assertEqual(p - offsets.Hour(24), - Period('2011-03-31', freq=freq)) - self.assertEqual(p - np.timedelta64(2, 'D'), - Period('2011-03-30', freq=freq)) - self.assertEqual(p - np.timedelta64(3600 * 24, 's'), - Period('2011-03-31', freq=freq)) - self.assertEqual(p - timedelta(-2), - Period('2011-04-03', freq=freq)) - self.assertEqual(p - timedelta(hours=48), - Period('2011-03-30', freq=freq)) - - for o in [offsets.YearBegin(2), offsets.MonthBegin(1), - offsets.Minute(), np.timedelta64(4, 'h'), - timedelta(hours=23)]: - with tm.assertRaises(period.IncompatibleFrequency): - p - o - - for freq in ['H', '2H', '3H']: - p = Period('2011-04-01 09:00', freq=freq) - self.assertEqual(p - offsets.Day(2), - Period('2011-03-30 09:00', freq=freq)) - self.assertEqual(p - offsets.Hour(3), - Period('2011-04-01 06:00', freq=freq)) - self.assertEqual(p - np.timedelta64(3, 'h'), - Period('2011-04-01 06:00', freq=freq)) - self.assertEqual(p - np.timedelta64(3600, 's'), - Period('2011-04-01 08:00', freq=freq)) - self.assertEqual(p - timedelta(minutes=120), - Period('2011-04-01 07:00', freq=freq)) - self.assertEqual(p - timedelta(days=4, minutes=180), - Period('2011-03-28 06:00', freq=freq)) - - for o in [offsets.YearBegin(2), offsets.MonthBegin(1), - offsets.Minute(), np.timedelta64(3200, 's'), - timedelta(hours=23, minutes=30)]: - with tm.assertRaises(period.IncompatibleFrequency): - p - o - - def test_sub_offset_nat(self): - # freq is DateOffset - for freq in ['A', '2A', '3A']: - p = Period('NaT', freq=freq) - for o in [offsets.YearEnd(2)]: - self.assertIs(p - o, tslib.NaT) - - for o in [offsets.YearBegin(2), offsets.MonthBegin(1), - offsets.Minute(), np.timedelta64(365, 'D'), - timedelta(365)]: - self.assertIs(p - o, tslib.NaT) - - for freq in ['M', '2M', '3M']: - p = Period('NaT', freq=freq) - for o in [offsets.MonthEnd(2), offsets.MonthEnd(12)]: - self.assertIs(p - o, tslib.NaT) - - for o in [offsets.YearBegin(2), offsets.MonthBegin(1), - offsets.Minute(), np.timedelta64(365, 'D'), - timedelta(365)]: - self.assertIs(p - o, tslib.NaT) - - # freq is Tick - for freq in ['D', '2D', '3D']: - p = Period('NaT', freq=freq) - for o in [offsets.Day(5), offsets.Hour(24), np.timedelta64(2, 'D'), - np.timedelta64(3600 * 24, 's'), timedelta(-2), - timedelta(hours=48)]: - self.assertIs(p - o, tslib.NaT) - - for o in [offsets.YearBegin(2), offsets.MonthBegin(1), - offsets.Minute(), np.timedelta64(4, 'h'), - timedelta(hours=23)]: - self.assertIs(p - o, tslib.NaT) - - for freq in ['H', '2H', '3H']: - p = Period('NaT', freq=freq) - for o in [offsets.Day(2), offsets.Hour(3), np.timedelta64(3, 'h'), - np.timedelta64(3600, 's'), timedelta(minutes=120), - timedelta(days=4, minutes=180)]: - self.assertIs(p - o, tslib.NaT) - - for o in [offsets.YearBegin(2), offsets.MonthBegin(1), - offsets.Minute(), np.timedelta64(3200, 's'), - timedelta(hours=23, minutes=30)]: - self.assertIs(p - o, tslib.NaT) - - def test_nat_ops(self): - for freq in ['M', '2M', '3M']: - p = Period('NaT', freq=freq) - self.assertIs(p + 1, tslib.NaT) - self.assertIs(1 + p, tslib.NaT) - self.assertIs(p - 1, tslib.NaT) - self.assertIs(p - Period('2011-01', freq=freq), tslib.NaT) - self.assertIs(Period('2011-01', freq=freq) - p, tslib.NaT) - - def test_period_ops_offset(self): - p = Period('2011-04-01', freq='D') - result = p + offsets.Day() - exp = pd.Period('2011-04-02', freq='D') - self.assertEqual(result, exp) - - result = p - offsets.Day(2) - exp = pd.Period('2011-03-30', freq='D') - self.assertEqual(result, exp) - - msg = r"Input cannot be converted to Period\(freq=D\)" - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - p + offsets.Hour(2) - - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - p - offsets.Hour(2) - - -class TestPeriodIndexSeriesMethods(tm.TestCase): - """ Test PeriodIndex and Period Series Ops consistency """ - - def _check(self, values, func, expected): - idx = pd.PeriodIndex(values) - result = func(idx) - if isinstance(expected, pd.Index): - tm.assert_index_equal(result, expected) - else: - # comp op results in bool - tm.assert_numpy_array_equal(result, expected) - - s = pd.Series(values) - result = func(s) - - exp = pd.Series(expected, name=values.name) - tm.assert_series_equal(result, exp) - - def test_pi_ops(self): - idx = PeriodIndex(['2011-01', '2011-02', '2011-03', - '2011-04'], freq='M', name='idx') - - expected = PeriodIndex(['2011-03', '2011-04', - '2011-05', '2011-06'], freq='M', name='idx') - self._check(idx, lambda x: x + 2, expected) - self._check(idx, lambda x: 2 + x, expected) - - self._check(idx + 2, lambda x: x - 2, idx) - result = idx - Period('2011-01', freq='M') - exp = pd.Index([0, 1, 2, 3], name='idx') - tm.assert_index_equal(result, exp) - - result = Period('2011-01', freq='M') - idx - exp = pd.Index([0, -1, -2, -3], name='idx') - tm.assert_index_equal(result, exp) - - def test_pi_ops_errors(self): - idx = PeriodIndex(['2011-01', '2011-02', '2011-03', - '2011-04'], freq='M', name='idx') - s = pd.Series(idx) - - msg = r"unsupported operand type\(s\)" - - for obj in [idx, s]: - for ng in ["str", 1.5]: - with tm.assertRaisesRegexp(TypeError, msg): - obj + ng - - with tm.assertRaises(TypeError): - # error message differs between PY2 and 3 - ng + obj - - with tm.assertRaisesRegexp(TypeError, msg): - obj - ng - - with tm.assertRaises(TypeError): - np.add(obj, ng) - - if _np_version_under1p10: - self.assertIs(np.add(ng, obj), NotImplemented) - else: - with tm.assertRaises(TypeError): - np.add(ng, obj) - - with tm.assertRaises(TypeError): - np.subtract(obj, ng) - - if _np_version_under1p10: - self.assertIs(np.subtract(ng, obj), NotImplemented) - else: - with tm.assertRaises(TypeError): - np.subtract(ng, obj) - - def test_pi_ops_nat(self): - idx = PeriodIndex(['2011-01', '2011-02', 'NaT', - '2011-04'], freq='M', name='idx') - expected = PeriodIndex(['2011-03', '2011-04', - 'NaT', '2011-06'], freq='M', name='idx') - self._check(idx, lambda x: x + 2, expected) - self._check(idx, lambda x: 2 + x, expected) - self._check(idx, lambda x: np.add(x, 2), expected) - - self._check(idx + 2, lambda x: x - 2, idx) - self._check(idx + 2, lambda x: np.subtract(x, 2), idx) - - # freq with mult - idx = PeriodIndex(['2011-01', '2011-02', 'NaT', - '2011-04'], freq='2M', name='idx') - expected = PeriodIndex(['2011-07', '2011-08', - 'NaT', '2011-10'], freq='2M', name='idx') - self._check(idx, lambda x: x + 3, expected) - self._check(idx, lambda x: 3 + x, expected) - self._check(idx, lambda x: np.add(x, 3), expected) - - self._check(idx + 3, lambda x: x - 3, idx) - self._check(idx + 3, lambda x: np.subtract(x, 3), idx) - - def test_pi_ops_array_int(self): - idx = PeriodIndex(['2011-01', '2011-02', 'NaT', - '2011-04'], freq='M', name='idx') - f = lambda x: x + np.array([1, 2, 3, 4]) - exp = PeriodIndex(['2011-02', '2011-04', 'NaT', - '2011-08'], freq='M', name='idx') - self._check(idx, f, exp) - - f = lambda x: np.add(x, np.array([4, -1, 1, 2])) - exp = PeriodIndex(['2011-05', '2011-01', 'NaT', - '2011-06'], freq='M', name='idx') - self._check(idx, f, exp) - - f = lambda x: x - np.array([1, 2, 3, 4]) - exp = PeriodIndex(['2010-12', '2010-12', 'NaT', - '2010-12'], freq='M', name='idx') - self._check(idx, f, exp) - - f = lambda x: np.subtract(x, np.array([3, 2, 3, -2])) - exp = PeriodIndex(['2010-10', '2010-12', 'NaT', - '2011-06'], freq='M', name='idx') - self._check(idx, f, exp) - - def test_pi_ops_offset(self): - idx = PeriodIndex(['2011-01-01', '2011-02-01', '2011-03-01', - '2011-04-01'], freq='D', name='idx') - f = lambda x: x + offsets.Day() - exp = PeriodIndex(['2011-01-02', '2011-02-02', '2011-03-02', - '2011-04-02'], freq='D', name='idx') - self._check(idx, f, exp) - - f = lambda x: x + offsets.Day(2) - exp = PeriodIndex(['2011-01-03', '2011-02-03', '2011-03-03', - '2011-04-03'], freq='D', name='idx') - self._check(idx, f, exp) - - f = lambda x: x - offsets.Day(2) - exp = PeriodIndex(['2010-12-30', '2011-01-30', '2011-02-27', - '2011-03-30'], freq='D', name='idx') - self._check(idx, f, exp) - - def test_pi_offset_errors(self): - idx = PeriodIndex(['2011-01-01', '2011-02-01', '2011-03-01', - '2011-04-01'], freq='D', name='idx') - s = pd.Series(idx) - - # Series op is applied per Period instance, thus error is raised - # from Period - msg_idx = r"Input has different freq from PeriodIndex\(freq=D\)" - msg_s = r"Input cannot be converted to Period\(freq=D\)" - for obj, msg in [(idx, msg_idx), (s, msg_s)]: - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - obj + offsets.Hour(2) - - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - offsets.Hour(2) + obj - - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - obj - offsets.Hour(2) - - def test_pi_sub_period(self): - # GH 13071 - idx = PeriodIndex(['2011-01', '2011-02', '2011-03', - '2011-04'], freq='M', name='idx') - - result = idx - pd.Period('2012-01', freq='M') - exp = pd.Index([-12, -11, -10, -9], name='idx') - tm.assert_index_equal(result, exp) - - result = np.subtract(idx, pd.Period('2012-01', freq='M')) - tm.assert_index_equal(result, exp) - - result = pd.Period('2012-01', freq='M') - idx - exp = pd.Index([12, 11, 10, 9], name='idx') - tm.assert_index_equal(result, exp) - - result = np.subtract(pd.Period('2012-01', freq='M'), idx) - if _np_version_under1p10: - self.assertIs(result, NotImplemented) - else: - tm.assert_index_equal(result, exp) - - exp = pd.TimedeltaIndex([np.nan, np.nan, np.nan, np.nan], name='idx') - tm.assert_index_equal(idx - pd.Period('NaT', freq='M'), exp) - tm.assert_index_equal(pd.Period('NaT', freq='M') - idx, exp) - - def test_pi_sub_pdnat(self): - # GH 13071 - idx = PeriodIndex(['2011-01', '2011-02', 'NaT', - '2011-04'], freq='M', name='idx') - exp = pd.TimedeltaIndex([pd.NaT] * 4, name='idx') - tm.assert_index_equal(pd.NaT - idx, exp) - tm.assert_index_equal(idx - pd.NaT, exp) - - def test_pi_sub_period_nat(self): - # GH 13071 - idx = PeriodIndex(['2011-01', 'NaT', '2011-03', - '2011-04'], freq='M', name='idx') - - result = idx - pd.Period('2012-01', freq='M') - exp = pd.Index([-12, np.nan, -10, -9], name='idx') - tm.assert_index_equal(result, exp) - - result = pd.Period('2012-01', freq='M') - idx - exp = pd.Index([12, np.nan, 10, 9], name='idx') - tm.assert_index_equal(result, exp) - - exp = pd.TimedeltaIndex([np.nan, np.nan, np.nan, np.nan], name='idx') - tm.assert_index_equal(idx - pd.Period('NaT', freq='M'), exp) - tm.assert_index_equal(pd.Period('NaT', freq='M') - idx, exp) - - def test_pi_comp_period(self): - idx = PeriodIndex(['2011-01', '2011-02', '2011-03', - '2011-04'], freq='M', name='idx') - - f = lambda x: x == pd.Period('2011-03', freq='M') - exp = np.array([False, False, True, False], dtype=np.bool) - self._check(idx, f, exp) - f = lambda x: pd.Period('2011-03', freq='M') == x - self._check(idx, f, exp) - - f = lambda x: x != pd.Period('2011-03', freq='M') - exp = np.array([True, True, False, True], dtype=np.bool) - self._check(idx, f, exp) - f = lambda x: pd.Period('2011-03', freq='M') != x - self._check(idx, f, exp) - - f = lambda x: pd.Period('2011-03', freq='M') >= x - exp = np.array([True, True, True, False], dtype=np.bool) - self._check(idx, f, exp) - - f = lambda x: x > pd.Period('2011-03', freq='M') - exp = np.array([False, False, False, True], dtype=np.bool) - self._check(idx, f, exp) - - f = lambda x: pd.Period('2011-03', freq='M') >= x - exp = np.array([True, True, True, False], dtype=np.bool) - self._check(idx, f, exp) - - def test_pi_comp_period_nat(self): - idx = PeriodIndex(['2011-01', 'NaT', '2011-03', - '2011-04'], freq='M', name='idx') - - f = lambda x: x == pd.Period('2011-03', freq='M') - exp = np.array([False, False, True, False], dtype=np.bool) - self._check(idx, f, exp) - f = lambda x: pd.Period('2011-03', freq='M') == x - self._check(idx, f, exp) - - f = lambda x: x == tslib.NaT - exp = np.array([False, False, False, False], dtype=np.bool) - self._check(idx, f, exp) - f = lambda x: tslib.NaT == x - self._check(idx, f, exp) - - f = lambda x: x != pd.Period('2011-03', freq='M') - exp = np.array([True, True, False, True], dtype=np.bool) - self._check(idx, f, exp) - f = lambda x: pd.Period('2011-03', freq='M') != x - self._check(idx, f, exp) - - f = lambda x: x != tslib.NaT - exp = np.array([True, True, True, True], dtype=np.bool) - self._check(idx, f, exp) - f = lambda x: tslib.NaT != x - self._check(idx, f, exp) - - f = lambda x: pd.Period('2011-03', freq='M') >= x - exp = np.array([True, False, True, False], dtype=np.bool) - self._check(idx, f, exp) - - f = lambda x: x < pd.Period('2011-03', freq='M') - exp = np.array([True, False, False, False], dtype=np.bool) - self._check(idx, f, exp) - - f = lambda x: x > tslib.NaT - exp = np.array([False, False, False, False], dtype=np.bool) - self._check(idx, f, exp) - - f = lambda x: tslib.NaT >= x - exp = np.array([False, False, False, False], dtype=np.bool) - self._check(idx, f, exp) - - -class TestPeriodRepresentation(tm.TestCase): - """ - Wish to match NumPy units - """ - - def test_annual(self): - self._check_freq('A', 1970) - - def test_monthly(self): - self._check_freq('M', '1970-01') - - def test_weekly(self): - self._check_freq('W-THU', '1970-01-01') - - def test_daily(self): - self._check_freq('D', '1970-01-01') - - def test_business_daily(self): - self._check_freq('B', '1970-01-01') - - def test_hourly(self): - self._check_freq('H', '1970-01-01') - - def test_minutely(self): - self._check_freq('T', '1970-01-01') - - def test_secondly(self): - self._check_freq('S', '1970-01-01') - - def test_millisecondly(self): - self._check_freq('L', '1970-01-01') - - def test_microsecondly(self): - self._check_freq('U', '1970-01-01') - - def test_nanosecondly(self): - self._check_freq('N', '1970-01-01') - - def _check_freq(self, freq, base_date): - rng = PeriodIndex(start=base_date, periods=10, freq=freq) - exp = np.arange(10, dtype=np.int64) - self.assert_numpy_array_equal(rng._values, exp) - self.assert_numpy_array_equal(rng.asi8, exp) - - def test_negone_ordinals(self): - freqs = ['A', 'M', 'Q', 'D', 'H', 'T', 'S'] - - period = Period(ordinal=-1, freq='D') - for freq in freqs: - repr(period.asfreq(freq)) - - for freq in freqs: - period = Period(ordinal=-1, freq=freq) - repr(period) - self.assertEqual(period.year, 1969) - - period = Period(ordinal=-1, freq='B') - repr(period) - period = Period(ordinal=-1, freq='W') - repr(period) - - -class TestComparisons(tm.TestCase): - - def setUp(self): - self.january1 = Period('2000-01', 'M') - self.january2 = Period('2000-01', 'M') - self.february = Period('2000-02', 'M') - self.march = Period('2000-03', 'M') - self.day = Period('2012-01-01', 'D') - - def test_equal(self): - self.assertEqual(self.january1, self.january2) - - def test_equal_Raises_Value(self): - with tm.assertRaises(period.IncompatibleFrequency): - self.january1 == self.day - - def test_notEqual(self): - self.assertNotEqual(self.january1, 1) - self.assertNotEqual(self.january1, self.february) - - def test_greater(self): - self.assertTrue(self.february > self.january1) - - def test_greater_Raises_Value(self): - with tm.assertRaises(period.IncompatibleFrequency): - self.january1 > self.day - - def test_greater_Raises_Type(self): - with tm.assertRaises(TypeError): - self.january1 > 1 - - def test_greaterEqual(self): - self.assertTrue(self.january1 >= self.january2) - - def test_greaterEqual_Raises_Value(self): - with tm.assertRaises(period.IncompatibleFrequency): - self.january1 >= self.day - - with tm.assertRaises(TypeError): - print(self.january1 >= 1) - - def test_smallerEqual(self): - self.assertTrue(self.january1 <= self.january2) - - def test_smallerEqual_Raises_Value(self): - with tm.assertRaises(period.IncompatibleFrequency): - self.january1 <= self.day - - def test_smallerEqual_Raises_Type(self): - with tm.assertRaises(TypeError): - self.january1 <= 1 - - def test_smaller(self): - self.assertTrue(self.january1 < self.february) - - def test_smaller_Raises_Value(self): - with tm.assertRaises(period.IncompatibleFrequency): - self.january1 < self.day - - def test_smaller_Raises_Type(self): - with tm.assertRaises(TypeError): - self.january1 < 1 - - def test_sort(self): - periods = [self.march, self.january1, self.february] - correctPeriods = [self.january1, self.february, self.march] - self.assertEqual(sorted(periods), correctPeriods) - - def test_period_nat_comp(self): - p_nat = Period('NaT', freq='D') - p = Period('2011-01-01', freq='D') - - nat = pd.Timestamp('NaT') - t = pd.Timestamp('2011-01-01') - # confirm Period('NaT') work identical with Timestamp('NaT') - for left, right in [(p_nat, p), (p, p_nat), (p_nat, p_nat), (nat, t), - (t, nat), (nat, nat)]: - self.assertEqual(left < right, False) - self.assertEqual(left > right, False) - self.assertEqual(left == right, False) - self.assertEqual(left != right, True) - self.assertEqual(left <= right, False) - self.assertEqual(left >= right, False) - - def test_pi_pi_comp(self): - - for freq in ['M', '2M', '3M']: - base = PeriodIndex(['2011-01', '2011-02', - '2011-03', '2011-04'], freq=freq) - p = Period('2011-02', freq=freq) - - exp = np.array([False, True, False, False]) - self.assert_numpy_array_equal(base == p, exp) - self.assert_numpy_array_equal(p == base, exp) - - exp = np.array([True, False, True, True]) - self.assert_numpy_array_equal(base != p, exp) - self.assert_numpy_array_equal(p != base, exp) - - exp = np.array([False, False, True, True]) - self.assert_numpy_array_equal(base > p, exp) - self.assert_numpy_array_equal(p < base, exp) - - exp = np.array([True, False, False, False]) - self.assert_numpy_array_equal(base < p, exp) - self.assert_numpy_array_equal(p > base, exp) - - exp = np.array([False, True, True, True]) - self.assert_numpy_array_equal(base >= p, exp) - self.assert_numpy_array_equal(p <= base, exp) - - exp = np.array([True, True, False, False]) - self.assert_numpy_array_equal(base <= p, exp) - self.assert_numpy_array_equal(p >= base, exp) - - idx = PeriodIndex(['2011-02', '2011-01', '2011-03', - '2011-05'], freq=freq) - - exp = np.array([False, False, True, False]) - self.assert_numpy_array_equal(base == idx, exp) - - exp = np.array([True, True, False, True]) - self.assert_numpy_array_equal(base != idx, exp) - - exp = np.array([False, True, False, False]) - self.assert_numpy_array_equal(base > idx, exp) - - exp = np.array([True, False, False, True]) - self.assert_numpy_array_equal(base < idx, exp) - - exp = np.array([False, True, True, False]) - self.assert_numpy_array_equal(base >= idx, exp) - - exp = np.array([True, False, True, True]) - self.assert_numpy_array_equal(base <= idx, exp) - - # different base freq - msg = "Input has different freq=A-DEC from PeriodIndex" - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - base <= Period('2011', freq='A') - - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - Period('2011', freq='A') >= base - - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - idx = PeriodIndex(['2011', '2012', '2013', '2014'], freq='A') - base <= idx - - # different mult - msg = "Input has different freq=4M from PeriodIndex" - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - base <= Period('2011', freq='4M') - - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - Period('2011', freq='4M') >= base - - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - idx = PeriodIndex(['2011', '2012', '2013', '2014'], freq='4M') - base <= idx - - def test_pi_nat_comp(self): - for freq in ['M', '2M', '3M']: - idx1 = PeriodIndex( - ['2011-01', '2011-02', 'NaT', '2011-05'], freq=freq) - - result = idx1 > Period('2011-02', freq=freq) - exp = np.array([False, False, False, True]) - self.assert_numpy_array_equal(result, exp) - result = Period('2011-02', freq=freq) < idx1 - self.assert_numpy_array_equal(result, exp) - - result = idx1 == Period('NaT', freq=freq) - exp = np.array([False, False, False, False]) - self.assert_numpy_array_equal(result, exp) - result = Period('NaT', freq=freq) == idx1 - self.assert_numpy_array_equal(result, exp) - - result = idx1 != Period('NaT', freq=freq) - exp = np.array([True, True, True, True]) - self.assert_numpy_array_equal(result, exp) - result = Period('NaT', freq=freq) != idx1 - self.assert_numpy_array_equal(result, exp) - - idx2 = PeriodIndex(['2011-02', '2011-01', '2011-04', - 'NaT'], freq=freq) - result = idx1 < idx2 - exp = np.array([True, False, False, False]) - self.assert_numpy_array_equal(result, exp) - - result = idx1 == idx2 - exp = np.array([False, False, False, False]) - self.assert_numpy_array_equal(result, exp) - - result = idx1 != idx2 - exp = np.array([True, True, True, True]) - self.assert_numpy_array_equal(result, exp) - - result = idx1 == idx1 - exp = np.array([True, True, False, True]) - self.assert_numpy_array_equal(result, exp) - - result = idx1 != idx1 - exp = np.array([False, False, True, False]) - self.assert_numpy_array_equal(result, exp) - - diff = PeriodIndex(['2011-02', '2011-01', '2011-04', - 'NaT'], freq='4M') - msg = "Input has different freq=4M from PeriodIndex" - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - idx1 > diff - - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - idx1 == diff - - -class TestSeriesPeriod(tm.TestCase): - - def setUp(self): - self.series = Series(period_range('2000-01-01', periods=10, freq='D')) - - def test_auto_conversion(self): - series = Series(list(period_range('2000-01-01', periods=10, freq='D'))) - self.assertEqual(series.dtype, 'object') - - series = pd.Series([pd.Period('2011-01-01', freq='D'), - pd.Period('2011-02-01', freq='D')]) - self.assertEqual(series.dtype, 'object') - - def test_getitem(self): - self.assertEqual(self.series[1], pd.Period('2000-01-02', freq='D')) - - result = self.series[[2, 4]] - exp = pd.Series([pd.Period('2000-01-03', freq='D'), - pd.Period('2000-01-05', freq='D')], - index=[2, 4]) - self.assert_series_equal(result, exp) - self.assertEqual(result.dtype, 'object') - - def test_constructor_cant_cast_period(self): - with tm.assertRaises(TypeError): - Series(period_range('2000-01-01', periods=10, freq='D'), - dtype=float) - - def test_constructor_cast_object(self): - s = Series(period_range('1/1/2000', periods=10), dtype=object) - exp = Series(period_range('1/1/2000', periods=10)) - tm.assert_series_equal(s, exp) - - def test_isnull(self): - # GH 13737 - s = Series([pd.Period('2011-01', freq='M'), - pd.Period('NaT', freq='M')]) - tm.assert_series_equal(s.isnull(), Series([False, True])) - tm.assert_series_equal(s.notnull(), Series([True, False])) - - def test_fillna(self): - # GH 13737 - s = Series([pd.Period('2011-01', freq='M'), - pd.Period('NaT', freq='M')]) - - res = s.fillna(pd.Period('2012-01', freq='M')) - exp = Series([pd.Period('2011-01', freq='M'), - pd.Period('2012-01', freq='M')]) - tm.assert_series_equal(res, exp) - self.assertEqual(res.dtype, 'object') - - res = s.fillna('XXX') - exp = Series([pd.Period('2011-01', freq='M'), 'XXX']) - tm.assert_series_equal(res, exp) - self.assertEqual(res.dtype, 'object') - - def test_dropna(self): - # GH 13737 - s = Series([pd.Period('2011-01', freq='M'), - pd.Period('NaT', freq='M')]) - tm.assert_series_equal(s.dropna(), - Series([pd.Period('2011-01', freq='M')])) - - def test_series_comparison_scalars(self): - val = pd.Period('2000-01-04', freq='D') - result = self.series > val - expected = pd.Series([x > val for x in self.series]) - tm.assert_series_equal(result, expected) - - val = self.series[5] - result = self.series > val - expected = pd.Series([x > val for x in self.series]) - tm.assert_series_equal(result, expected) - - def test_between(self): - left, right = self.series[[2, 7]] - result = self.series.between(left, right) - expected = (self.series >= left) & (self.series <= right) - tm.assert_series_equal(result, expected) - - # --------------------------------------------------------------------- - # NaT support - - """ - # ToDo: Enable when support period dtype - def test_NaT_scalar(self): - series = Series([0, 1000, 2000, iNaT], dtype='period[D]') - - val = series[3] - self.assertTrue(isnull(val)) - - series[2] = val - self.assertTrue(isnull(series[2])) - - def test_NaT_cast(self): - result = Series([np.nan]).astype('period[D]') - expected = Series([NaT]) - tm.assert_series_equal(result, expected) - """ - - def test_set_none_nan(self): - # currently Period is stored as object dtype, not as NaT - self.series[3] = None - self.assertIs(self.series[3], None) - - self.series[3:5] = None - self.assertIs(self.series[4], None) - - self.series[5] = np.nan - self.assertTrue(np.isnan(self.series[5])) - - self.series[5:7] = np.nan - self.assertTrue(np.isnan(self.series[6])) - - def test_intercept_astype_object(self): - expected = self.series.astype('object') - - df = DataFrame({'a': self.series, - 'b': np.random.randn(len(self.series))}) - - result = df.values.squeeze() - self.assertTrue((result[:, 0] == expected.values).all()) - - df = DataFrame({'a': self.series, 'b': ['foo'] * len(self.series)}) - - result = df.values.squeeze() - self.assertTrue((result[:, 0] == expected.values).all()) - - def test_ops_series_timedelta(self): - # GH 13043 - s = pd.Series([pd.Period('2015-01-01', freq='D'), - pd.Period('2015-01-02', freq='D')], name='xxx') - self.assertEqual(s.dtype, object) - - exp = pd.Series([pd.Period('2015-01-02', freq='D'), - pd.Period('2015-01-03', freq='D')], name='xxx') - tm.assert_series_equal(s + pd.Timedelta('1 days'), exp) - tm.assert_series_equal(pd.Timedelta('1 days') + s, exp) - - tm.assert_series_equal(s + pd.tseries.offsets.Day(), exp) - tm.assert_series_equal(pd.tseries.offsets.Day() + s, exp) - - def test_ops_series_period(self): - # GH 13043 - s = pd.Series([pd.Period('2015-01-01', freq='D'), - pd.Period('2015-01-02', freq='D')], name='xxx') - self.assertEqual(s.dtype, object) - - p = pd.Period('2015-01-10', freq='D') - # dtype will be object because of original dtype - exp = pd.Series([9, 8], name='xxx', dtype=object) - tm.assert_series_equal(p - s, exp) - tm.assert_series_equal(s - p, -exp) - - s2 = pd.Series([pd.Period('2015-01-05', freq='D'), - pd.Period('2015-01-04', freq='D')], name='xxx') - self.assertEqual(s2.dtype, object) - - exp = pd.Series([4, 2], name='xxx', dtype=object) - tm.assert_series_equal(s2 - s, exp) - tm.assert_series_equal(s - s2, -exp) - - def test_comp_series_period_scalar(self): - # GH 13200 - for freq in ['M', '2M', '3M']: - base = Series([Period(x, freq=freq) for x in - ['2011-01', '2011-02', '2011-03', '2011-04']]) - p = Period('2011-02', freq=freq) - - exp = pd.Series([False, True, False, False]) - tm.assert_series_equal(base == p, exp) - tm.assert_series_equal(p == base, exp) - - exp = pd.Series([True, False, True, True]) - tm.assert_series_equal(base != p, exp) - tm.assert_series_equal(p != base, exp) - - exp = pd.Series([False, False, True, True]) - tm.assert_series_equal(base > p, exp) - tm.assert_series_equal(p < base, exp) - - exp = pd.Series([True, False, False, False]) - tm.assert_series_equal(base < p, exp) - tm.assert_series_equal(p > base, exp) - - exp = pd.Series([False, True, True, True]) - tm.assert_series_equal(base >= p, exp) - tm.assert_series_equal(p <= base, exp) - - exp = pd.Series([True, True, False, False]) - tm.assert_series_equal(base <= p, exp) - tm.assert_series_equal(p >= base, exp) - - # different base freq - msg = "Input has different freq=A-DEC from Period" - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - base <= Period('2011', freq='A') - - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - Period('2011', freq='A') >= base - - def test_comp_series_period_series(self): - # GH 13200 - for freq in ['M', '2M', '3M']: - base = Series([Period(x, freq=freq) for x in - ['2011-01', '2011-02', '2011-03', '2011-04']]) - - s = Series([Period(x, freq=freq) for x in - ['2011-02', '2011-01', '2011-03', '2011-05']]) - - exp = Series([False, False, True, False]) - tm.assert_series_equal(base == s, exp) - - exp = Series([True, True, False, True]) - tm.assert_series_equal(base != s, exp) - - exp = Series([False, True, False, False]) - tm.assert_series_equal(base > s, exp) - - exp = Series([True, False, False, True]) - tm.assert_series_equal(base < s, exp) - - exp = Series([False, True, True, False]) - tm.assert_series_equal(base >= s, exp) - - exp = Series([True, False, True, True]) - tm.assert_series_equal(base <= s, exp) - - s2 = Series([Period(x, freq='A') for x in - ['2011', '2011', '2011', '2011']]) - - # different base freq - msg = "Input has different freq=A-DEC from Period" - with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): - base <= s2 - - def test_comp_series_period_object(self): - # GH 13200 - base = Series([Period('2011', freq='A'), Period('2011-02', freq='M'), - Period('2013', freq='A'), Period('2011-04', freq='M')]) - - s = Series([Period('2012', freq='A'), Period('2011-01', freq='M'), - Period('2013', freq='A'), Period('2011-05', freq='M')]) - - exp = Series([False, False, True, False]) - tm.assert_series_equal(base == s, exp) - - exp = Series([True, True, False, True]) - tm.assert_series_equal(base != s, exp) - - exp = Series([False, True, False, False]) - tm.assert_series_equal(base > s, exp) - - exp = Series([True, False, False, True]) - tm.assert_series_equal(base < s, exp) - - exp = Series([False, True, True, False]) - tm.assert_series_equal(base >= s, exp) - - exp = Series([True, False, True, True]) - tm.assert_series_equal(base <= s, exp) - - def test_ops_frame_period(self): - # GH 13043 - df = pd.DataFrame({'A': [pd.Period('2015-01', freq='M'), - pd.Period('2015-02', freq='M')], - 'B': [pd.Period('2014-01', freq='M'), - pd.Period('2014-02', freq='M')]}) - self.assertEqual(df['A'].dtype, object) - self.assertEqual(df['B'].dtype, object) - - p = pd.Period('2015-03', freq='M') - # dtype will be object because of original dtype - exp = pd.DataFrame({'A': np.array([2, 1], dtype=object), - 'B': np.array([14, 13], dtype=object)}) - tm.assert_frame_equal(p - df, exp) - tm.assert_frame_equal(df - p, -exp) - - df2 = pd.DataFrame({'A': [pd.Period('2015-05', freq='M'), - pd.Period('2015-06', freq='M')], - 'B': [pd.Period('2015-05', freq='M'), - pd.Period('2015-06', freq='M')]}) - self.assertEqual(df2['A'].dtype, object) - self.assertEqual(df2['B'].dtype, object) - - exp = pd.DataFrame({'A': np.array([4, 4], dtype=object), - 'B': np.array([16, 16], dtype=object)}) - tm.assert_frame_equal(df2 - df, exp) - tm.assert_frame_equal(df - df2, -exp) - - -class TestPeriodField(tm.TestCase): - - def test_get_period_field_raises_on_out_of_range(self): - self.assertRaises(ValueError, _period.get_period_field, -1, 0, 0) - - def test_get_period_field_array_raises_on_out_of_range(self): - self.assertRaises(ValueError, _period.get_period_field_arr, -1, - np.empty(1), 0) - - -class TestTslib(tm.TestCase): - def test_intraday_conversion_factors(self): - self.assertEqual(period_asfreq( - 1, get_freq('D'), get_freq('H'), False), 24) - self.assertEqual(period_asfreq( - 1, get_freq('D'), get_freq('T'), False), 1440) - self.assertEqual(period_asfreq( - 1, get_freq('D'), get_freq('S'), False), 86400) - self.assertEqual(period_asfreq(1, get_freq( - 'D'), get_freq('L'), False), 86400000) - self.assertEqual(period_asfreq(1, get_freq( - 'D'), get_freq('U'), False), 86400000000) - self.assertEqual(period_asfreq(1, get_freq( - 'D'), get_freq('N'), False), 86400000000000) - - self.assertEqual(period_asfreq( - 1, get_freq('H'), get_freq('T'), False), 60) - self.assertEqual(period_asfreq( - 1, get_freq('H'), get_freq('S'), False), 3600) - self.assertEqual(period_asfreq(1, get_freq('H'), - get_freq('L'), False), 3600000) - self.assertEqual(period_asfreq(1, get_freq( - 'H'), get_freq('U'), False), 3600000000) - self.assertEqual(period_asfreq(1, get_freq( - 'H'), get_freq('N'), False), 3600000000000) - - self.assertEqual(period_asfreq( - 1, get_freq('T'), get_freq('S'), False), 60) - self.assertEqual(period_asfreq( - 1, get_freq('T'), get_freq('L'), False), 60000) - self.assertEqual(period_asfreq(1, get_freq( - 'T'), get_freq('U'), False), 60000000) - self.assertEqual(period_asfreq(1, get_freq( - 'T'), get_freq('N'), False), 60000000000) - - self.assertEqual(period_asfreq( - 1, get_freq('S'), get_freq('L'), False), 1000) - self.assertEqual(period_asfreq(1, get_freq('S'), - get_freq('U'), False), 1000000) - self.assertEqual(period_asfreq(1, get_freq( - 'S'), get_freq('N'), False), 1000000000) - - self.assertEqual(period_asfreq( - 1, get_freq('L'), get_freq('U'), False), 1000) - self.assertEqual(period_asfreq(1, get_freq('L'), - get_freq('N'), False), 1000000) - - self.assertEqual(period_asfreq( - 1, get_freq('U'), get_freq('N'), False), 1000) - - def test_period_ordinal_start_values(self): - # information for 1.1.1970 - self.assertEqual(0, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, - get_freq('A'))) - self.assertEqual(0, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, - get_freq('M'))) - self.assertEqual(1, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, - get_freq('W'))) - self.assertEqual(0, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, - get_freq('D'))) - self.assertEqual(0, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, - get_freq('B'))) - - def test_period_ordinal_week(self): - self.assertEqual(1, period_ordinal(1970, 1, 4, 0, 0, 0, 0, 0, - get_freq('W'))) - self.assertEqual(2, period_ordinal(1970, 1, 5, 0, 0, 0, 0, 0, - get_freq('W'))) - - self.assertEqual(2284, period_ordinal(2013, 10, 6, 0, 0, 0, 0, 0, - get_freq('W'))) - self.assertEqual(2285, period_ordinal(2013, 10, 7, 0, 0, 0, 0, 0, - get_freq('W'))) - - def test_period_ordinal_business_day(self): - # Thursday - self.assertEqual(11415, period_ordinal(2013, 10, 3, 0, 0, 0, 0, 0, - get_freq('B'))) - # Friday - self.assertEqual(11416, period_ordinal(2013, 10, 4, 0, 0, 0, 0, 0, - get_freq('B'))) - # Saturday - self.assertEqual(11417, period_ordinal(2013, 10, 5, 0, 0, 0, 0, 0, - get_freq('B'))) - # Sunday - self.assertEqual(11417, period_ordinal(2013, 10, 6, 0, 0, 0, 0, 0, - get_freq('B'))) - # Monday - self.assertEqual(11417, period_ordinal(2013, 10, 7, 0, 0, 0, 0, 0, - get_freq('B'))) - # Tuesday - self.assertEqual(11418, period_ordinal(2013, 10, 8, 0, 0, 0, 0, 0, - get_freq('B'))) From d38d142aaac5ee046aabc32c1e6422f86c37dc41 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 8 Feb 2017 18:45:04 -0500 Subject: [PATCH 029/353] TST: more test_period reorg --- .../tests/indexes/period/test_construction.py | 25 +- pandas/tests/indexes/period/test_indexing.py | 317 +++++++ pandas/tests/indexes/period/test_ops.py | 492 +--------- pandas/tests/indexes/period/test_setops.py | 93 ++ pandas/tests/scalar/test_period.py | 898 +++--------------- pandas/tests/scalar/test_period_asfreq.py | 721 ++++++++++++++ 6 files changed, 1285 insertions(+), 1261 deletions(-) create mode 100644 pandas/tests/indexes/period/test_indexing.py create mode 100644 pandas/tests/scalar/test_period_asfreq.py diff --git a/pandas/tests/indexes/period/test_construction.py b/pandas/tests/indexes/period/test_construction.py index c1299c6abeda3..228615829b5b8 100644 --- a/pandas/tests/indexes/period/test_construction.py +++ b/pandas/tests/indexes/period/test_construction.py @@ -410,22 +410,9 @@ def test_constructor(self): self.assertTrue((i1 == i2).all()) self.assertEqual(i1.freq, i2.freq) - try: - PeriodIndex(start=start, end=end_intv) - raise AssertionError('Cannot allow mixed freq for start and end') - except ValueError: - pass - end_intv = Period('2005-05-01', 'B') i1 = PeriodIndex(start=start, end=end_intv) - try: - PeriodIndex(start=start) - raise AssertionError( - 'Must specify periods if missing start or end') - except ValueError: - pass - # infer freq from first element i2 = PeriodIndex([end_intv, Period('2005-05-05', 'B')]) self.assertEqual(len(i2), 2) @@ -441,6 +428,18 @@ def test_constructor(self): vals = np.array(vals) self.assertRaises(ValueError, PeriodIndex, vals) + def test_constructor_error(self): + start = Period('02-Apr-2005', 'B') + end_intv = Period('2006-12-31', ('w', 1)) + + msg = 'Start and end must have same freq' + with tm.assertRaisesRegexp(ValueError, msg): + PeriodIndex(start=start, end=end_intv) + + msg = 'Must specify 2 of start, end, periods' + with tm.assertRaisesRegexp(ValueError, msg): + PeriodIndex(start=start) + def test_recreate_from_data(self): for o in ['M', 'Q', 'A', 'D', 'B', 'T', 'S', 'L', 'U', 'N', 'H']: org = PeriodIndex(start='2001/04/01', freq=o, periods=1) diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py new file mode 100644 index 0000000000000..8d9e26406defc --- /dev/null +++ b/pandas/tests/indexes/period/test_indexing.py @@ -0,0 +1,317 @@ +from datetime import datetime + +import numpy as np +import pandas as pd +from pandas.util import testing as tm +from pandas.compat import lrange +from pandas import (PeriodIndex, Series, DatetimeIndex, + period_range, Period, tslib, _np_version_under1p9) + + +class TestGetItem(tm.TestCase): + + def setUp(self): + pass + + def test_getitem(self): + idx1 = pd.period_range('2011-01-01', '2011-01-31', freq='D', + name='idx') + + for idx in [idx1]: + result = idx[0] + self.assertEqual(result, pd.Period('2011-01-01', freq='D')) + + result = idx[-1] + self.assertEqual(result, pd.Period('2011-01-31', freq='D')) + + result = idx[0:5] + expected = pd.period_range('2011-01-01', '2011-01-05', freq='D', + name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + self.assertEqual(result.freq, 'D') + + result = idx[0:10:2] + expected = pd.PeriodIndex(['2011-01-01', '2011-01-03', + '2011-01-05', + '2011-01-07', '2011-01-09'], + freq='D', name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + self.assertEqual(result.freq, 'D') + + result = idx[-20:-5:3] + expected = pd.PeriodIndex(['2011-01-12', '2011-01-15', + '2011-01-18', + '2011-01-21', '2011-01-24'], + freq='D', name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + self.assertEqual(result.freq, 'D') + + result = idx[4::-1] + expected = PeriodIndex(['2011-01-05', '2011-01-04', '2011-01-03', + '2011-01-02', '2011-01-01'], + freq='D', name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + self.assertEqual(result.freq, 'D') + + def test_getitem_index(self): + idx = period_range('2007-01', periods=10, freq='M', name='x') + + result = idx[[1, 3, 5]] + exp = pd.PeriodIndex(['2007-02', '2007-04', '2007-06'], + freq='M', name='x') + tm.assert_index_equal(result, exp) + + result = idx[[True, True, False, False, False, + True, True, False, False, False]] + exp = pd.PeriodIndex(['2007-01', '2007-02', '2007-06', '2007-07'], + freq='M', name='x') + tm.assert_index_equal(result, exp) + + def test_getitem_partial(self): + rng = period_range('2007-01', periods=50, freq='M') + ts = Series(np.random.randn(len(rng)), rng) + + self.assertRaises(KeyError, ts.__getitem__, '2006') + + result = ts['2008'] + self.assertTrue((result.index.year == 2008).all()) + + result = ts['2008':'2009'] + self.assertEqual(len(result), 24) + + result = ts['2008-1':'2009-12'] + self.assertEqual(len(result), 24) + + result = ts['2008Q1':'2009Q4'] + self.assertEqual(len(result), 24) + + result = ts[:'2009'] + self.assertEqual(len(result), 36) + + result = ts['2009':] + self.assertEqual(len(result), 50 - 24) + + exp = result + result = ts[24:] + tm.assert_series_equal(exp, result) + + ts = ts[10:].append(ts[10:]) + self.assertRaisesRegexp(KeyError, + "left slice bound for non-unique " + "label: '2008'", + ts.__getitem__, slice('2008', '2009')) + + def test_getitem_datetime(self): + rng = period_range(start='2012-01-01', periods=10, freq='W-MON') + ts = Series(lrange(len(rng)), index=rng) + + dt1 = datetime(2011, 10, 2) + dt4 = datetime(2012, 4, 20) + + rs = ts[dt1:dt4] + tm.assert_series_equal(rs, ts) + + def test_getitem_nat(self): + idx = pd.PeriodIndex(['2011-01', 'NaT', '2011-02'], freq='M') + self.assertEqual(idx[0], pd.Period('2011-01', freq='M')) + self.assertIs(idx[1], tslib.NaT) + + s = pd.Series([0, 1, 2], index=idx) + self.assertEqual(s[pd.NaT], 1) + + s = pd.Series(idx, index=idx) + self.assertEqual(s[pd.Period('2011-01', freq='M')], + pd.Period('2011-01', freq='M')) + self.assertIs(s[pd.NaT], tslib.NaT) + + def test_getitem_list_periods(self): + # GH 7710 + rng = period_range(start='2012-01-01', periods=10, freq='D') + ts = Series(lrange(len(rng)), index=rng) + exp = ts.iloc[[1]] + tm.assert_series_equal(ts[[Period('2012-01-02', freq='D')]], exp) + + def test_getitem_seconds(self): + # GH 6716 + didx = DatetimeIndex(start='2013/01/01 09:00:00', freq='S', + periods=4000) + pidx = PeriodIndex(start='2013/01/01 09:00:00', freq='S', periods=4000) + + for idx in [didx, pidx]: + # getitem against index should raise ValueError + values = ['2014', '2013/02', '2013/01/02', '2013/02/01 9H', + '2013/02/01 09:00'] + for v in values: + if _np_version_under1p9: + with tm.assertRaises(ValueError): + idx[v] + else: + # GH7116 + # these show deprecations as we are trying + # to slice with non-integer indexers + # with tm.assertRaises(IndexError): + # idx[v] + continue + + s = Series(np.random.rand(len(idx)), index=idx) + tm.assert_series_equal(s['2013/01/01 10:00'], s[3600:3660]) + tm.assert_series_equal(s['2013/01/01 9H'], s[:3600]) + for d in ['2013/01/01', '2013/01', '2013']: + tm.assert_series_equal(s[d], s) + + def test_getitem_day(self): + # GH 6716 + # Confirm DatetimeIndex and PeriodIndex works identically + didx = DatetimeIndex(start='2013/01/01', freq='D', periods=400) + pidx = PeriodIndex(start='2013/01/01', freq='D', periods=400) + + for idx in [didx, pidx]: + # getitem against index should raise ValueError + values = ['2014', '2013/02', '2013/01/02', '2013/02/01 9H', + '2013/02/01 09:00'] + for v in values: + + if _np_version_under1p9: + with tm.assertRaises(ValueError): + idx[v] + else: + # GH7116 + # these show deprecations as we are trying + # to slice with non-integer indexers + # with tm.assertRaises(IndexError): + # idx[v] + continue + + s = Series(np.random.rand(len(idx)), index=idx) + tm.assert_series_equal(s['2013/01'], s[0:31]) + tm.assert_series_equal(s['2013/02'], s[31:59]) + tm.assert_series_equal(s['2014'], s[365:]) + + invalid = ['2013/02/01 9H', '2013/02/01 09:00'] + for v in invalid: + with tm.assertRaises(KeyError): + s[v] + + +class TestIndexing(tm.TestCase): + + def test_get_loc_msg(self): + idx = period_range('2000-1-1', freq='A', periods=10) + bad_period = Period('2012', 'A') + self.assertRaises(KeyError, idx.get_loc, bad_period) + + try: + idx.get_loc(bad_period) + except KeyError as inst: + self.assertEqual(inst.args[0], bad_period) + + def test_get_loc_nat(self): + didx = DatetimeIndex(['2011-01-01', 'NaT', '2011-01-03']) + pidx = PeriodIndex(['2011-01-01', 'NaT', '2011-01-03'], freq='M') + + # check DatetimeIndex compat + for idx in [didx, pidx]: + self.assertEqual(idx.get_loc(pd.NaT), 1) + self.assertEqual(idx.get_loc(None), 1) + self.assertEqual(idx.get_loc(float('nan')), 1) + self.assertEqual(idx.get_loc(np.nan), 1) + + def test_take(self): + # GH 10295 + idx1 = pd.period_range('2011-01-01', '2011-01-31', freq='D', + name='idx') + + for idx in [idx1]: + result = idx.take([0]) + self.assertEqual(result, pd.Period('2011-01-01', freq='D')) + + result = idx.take([5]) + self.assertEqual(result, pd.Period('2011-01-06', freq='D')) + + result = idx.take([0, 1, 2]) + expected = pd.period_range('2011-01-01', '2011-01-03', freq='D', + name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, 'D') + self.assertEqual(result.freq, expected.freq) + + result = idx.take([0, 2, 4]) + expected = pd.PeriodIndex(['2011-01-01', '2011-01-03', + '2011-01-05'], freq='D', name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + self.assertEqual(result.freq, 'D') + + result = idx.take([7, 4, 1]) + expected = pd.PeriodIndex(['2011-01-08', '2011-01-05', + '2011-01-02'], + freq='D', name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + self.assertEqual(result.freq, 'D') + + result = idx.take([3, 2, 5]) + expected = PeriodIndex(['2011-01-04', '2011-01-03', '2011-01-06'], + freq='D', name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + self.assertEqual(result.freq, 'D') + + result = idx.take([-3, 2, 5]) + expected = PeriodIndex(['2011-01-29', '2011-01-03', '2011-01-06'], + freq='D', name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + self.assertEqual(result.freq, 'D') + + def test_take_misc(self): + index = PeriodIndex(start='1/1/10', end='12/31/12', freq='D', + name='idx') + expected = PeriodIndex([datetime(2010, 1, 6), datetime(2010, 1, 7), + datetime(2010, 1, 9), datetime(2010, 1, 13)], + freq='D', name='idx') + + taken1 = index.take([5, 6, 8, 12]) + taken2 = index[[5, 6, 8, 12]] + + for taken in [taken1, taken2]: + tm.assert_index_equal(taken, expected) + tm.assertIsInstance(taken, PeriodIndex) + self.assertEqual(taken.freq, index.freq) + self.assertEqual(taken.name, expected.name) + + def test_take_fill_value(self): + # GH 12631 + idx = pd.PeriodIndex(['2011-01-01', '2011-02-01', '2011-03-01'], + name='xxx', freq='D') + result = idx.take(np.array([1, 0, -1])) + expected = pd.PeriodIndex(['2011-02-01', '2011-01-01', '2011-03-01'], + name='xxx', freq='D') + tm.assert_index_equal(result, expected) + + # fill_value + result = idx.take(np.array([1, 0, -1]), fill_value=True) + expected = pd.PeriodIndex(['2011-02-01', '2011-01-01', 'NaT'], + name='xxx', freq='D') + tm.assert_index_equal(result, expected) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, + fill_value=True) + expected = pd.PeriodIndex(['2011-02-01', '2011-01-01', '2011-03-01'], + name='xxx', freq='D') + tm.assert_index_equal(result, expected) + + msg = ('When allow_fill=True and fill_value is not None, ' + 'all indices must be >= -1') + with tm.assertRaisesRegexp(ValueError, msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with tm.assertRaisesRegexp(ValueError, msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + with tm.assertRaises(IndexError): + idx.take(np.array([1, -5])) diff --git a/pandas/tests/indexes/period/test_ops.py b/pandas/tests/indexes/period/test_ops.py index 70759e8659c25..82a881d7c65bc 100644 --- a/pandas/tests/indexes/period/test_ops.py +++ b/pandas/tests/indexes/period/test_ops.py @@ -1,14 +1,12 @@ import numpy as np -from datetime import timedelta, datetime +from datetime import timedelta import pandas as pd import pandas.tslib as tslib import pandas.util.testing as tm import pandas.tseries.period as period -from pandas.compat import lrange from pandas import (DatetimeIndex, PeriodIndex, period_range, Series, Period, - _np_version_under1p10, Index, Timedelta, offsets, - _np_version_under1p9) + _np_version_under1p10, Index, Timedelta, offsets) from pandas.tests.test_base import Ops @@ -285,57 +283,6 @@ def test_resolution(self): idx = pd.period_range(start='2013-04-01', periods=30, freq=freq) self.assertEqual(idx.resolution, expected) - def test_union(self): - # union - rng1 = pd.period_range('1/1/2000', freq='D', periods=5) - other1 = pd.period_range('1/6/2000', freq='D', periods=5) - expected1 = pd.period_range('1/1/2000', freq='D', periods=10) - - rng2 = pd.period_range('1/1/2000', freq='D', periods=5) - other2 = pd.period_range('1/4/2000', freq='D', periods=5) - expected2 = pd.period_range('1/1/2000', freq='D', periods=8) - - rng3 = pd.period_range('1/1/2000', freq='D', periods=5) - other3 = pd.PeriodIndex([], freq='D') - expected3 = pd.period_range('1/1/2000', freq='D', periods=5) - - rng4 = pd.period_range('2000-01-01 09:00', freq='H', periods=5) - other4 = pd.period_range('2000-01-02 09:00', freq='H', periods=5) - expected4 = pd.PeriodIndex(['2000-01-01 09:00', '2000-01-01 10:00', - '2000-01-01 11:00', '2000-01-01 12:00', - '2000-01-01 13:00', '2000-01-02 09:00', - '2000-01-02 10:00', '2000-01-02 11:00', - '2000-01-02 12:00', '2000-01-02 13:00'], - freq='H') - - rng5 = pd.PeriodIndex(['2000-01-01 09:01', '2000-01-01 09:03', - '2000-01-01 09:05'], freq='T') - other5 = pd.PeriodIndex(['2000-01-01 09:01', '2000-01-01 09:05' - '2000-01-01 09:08'], - freq='T') - expected5 = pd.PeriodIndex(['2000-01-01 09:01', '2000-01-01 09:03', - '2000-01-01 09:05', '2000-01-01 09:08'], - freq='T') - - rng6 = pd.period_range('2000-01-01', freq='M', periods=7) - other6 = pd.period_range('2000-04-01', freq='M', periods=7) - expected6 = pd.period_range('2000-01-01', freq='M', periods=10) - - rng7 = pd.period_range('2003-01-01', freq='A', periods=5) - other7 = pd.period_range('1998-01-01', freq='A', periods=8) - expected7 = pd.period_range('1998-01-01', freq='A', periods=10) - - for rng, other, expected in [(rng1, other1, expected1), - (rng2, other2, expected2), - (rng3, other3, expected3), (rng4, other4, - expected4), - (rng5, other5, expected5), (rng6, other6, - expected6), - (rng7, other7, expected7)]: - - result_union = rng.union(other) - tm.assert_index_equal(result_union, expected) - def test_add_iadd(self): rng = pd.period_range('1/1/2000', freq='D', periods=5) other = pd.period_range('1/6/2000', freq='D', periods=5) @@ -432,48 +379,6 @@ def test_add_iadd(self): rng += 1 tm.assert_index_equal(rng, expected) - def test_difference(self): - # diff - rng1 = pd.period_range('1/1/2000', freq='D', periods=5) - other1 = pd.period_range('1/6/2000', freq='D', periods=5) - expected1 = pd.period_range('1/1/2000', freq='D', periods=5) - - rng2 = pd.period_range('1/1/2000', freq='D', periods=5) - other2 = pd.period_range('1/4/2000', freq='D', periods=5) - expected2 = pd.period_range('1/1/2000', freq='D', periods=3) - - rng3 = pd.period_range('1/1/2000', freq='D', periods=5) - other3 = pd.PeriodIndex([], freq='D') - expected3 = pd.period_range('1/1/2000', freq='D', periods=5) - - rng4 = pd.period_range('2000-01-01 09:00', freq='H', periods=5) - other4 = pd.period_range('2000-01-02 09:00', freq='H', periods=5) - expected4 = rng4 - - rng5 = pd.PeriodIndex(['2000-01-01 09:01', '2000-01-01 09:03', - '2000-01-01 09:05'], freq='T') - other5 = pd.PeriodIndex( - ['2000-01-01 09:01', '2000-01-01 09:05'], freq='T') - expected5 = pd.PeriodIndex(['2000-01-01 09:03'], freq='T') - - rng6 = pd.period_range('2000-01-01', freq='M', periods=7) - other6 = pd.period_range('2000-04-01', freq='M', periods=7) - expected6 = pd.period_range('2000-01-01', freq='M', periods=3) - - rng7 = pd.period_range('2003-01-01', freq='A', periods=5) - other7 = pd.period_range('1998-01-01', freq='A', periods=8) - expected7 = pd.period_range('2006-01-01', freq='A', periods=2) - - for rng, other, expected in [(rng1, other1, expected1), - (rng2, other2, expected2), - (rng3, other3, expected3), - (rng4, other4, expected4), - (rng5, other5, expected5), - (rng6, other6, expected6), - (rng7, other7, expected7), ]: - result_union = rng.difference(other) - tm.assert_index_equal(result_union, expected) - def test_sub(self): rng = period_range('2007-01', periods=50) @@ -833,98 +738,6 @@ def test_order(self): self.assert_numpy_array_equal(indexer, exp, check_dtype=False) self.assertEqual(ordered.freq, 'D') - def test_getitem(self): - idx1 = pd.period_range('2011-01-01', '2011-01-31', freq='D', - name='idx') - - for idx in [idx1]: - result = idx[0] - self.assertEqual(result, pd.Period('2011-01-01', freq='D')) - - result = idx[-1] - self.assertEqual(result, pd.Period('2011-01-31', freq='D')) - - result = idx[0:5] - expected = pd.period_range('2011-01-01', '2011-01-05', freq='D', - name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - self.assertEqual(result.freq, 'D') - - result = idx[0:10:2] - expected = pd.PeriodIndex(['2011-01-01', '2011-01-03', - '2011-01-05', - '2011-01-07', '2011-01-09'], - freq='D', name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - self.assertEqual(result.freq, 'D') - - result = idx[-20:-5:3] - expected = pd.PeriodIndex(['2011-01-12', '2011-01-15', - '2011-01-18', - '2011-01-21', '2011-01-24'], - freq='D', name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - self.assertEqual(result.freq, 'D') - - result = idx[4::-1] - expected = PeriodIndex(['2011-01-05', '2011-01-04', '2011-01-03', - '2011-01-02', '2011-01-01'], - freq='D', name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - self.assertEqual(result.freq, 'D') - - def test_take(self): - # GH 10295 - idx1 = pd.period_range('2011-01-01', '2011-01-31', freq='D', - name='idx') - - for idx in [idx1]: - result = idx.take([0]) - self.assertEqual(result, pd.Period('2011-01-01', freq='D')) - - result = idx.take([5]) - self.assertEqual(result, pd.Period('2011-01-06', freq='D')) - - result = idx.take([0, 1, 2]) - expected = pd.period_range('2011-01-01', '2011-01-03', freq='D', - name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, 'D') - self.assertEqual(result.freq, expected.freq) - - result = idx.take([0, 2, 4]) - expected = pd.PeriodIndex(['2011-01-01', '2011-01-03', - '2011-01-05'], freq='D', name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - self.assertEqual(result.freq, 'D') - - result = idx.take([7, 4, 1]) - expected = pd.PeriodIndex(['2011-01-08', '2011-01-05', - '2011-01-02'], - freq='D', name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - self.assertEqual(result.freq, 'D') - - result = idx.take([3, 2, 5]) - expected = PeriodIndex(['2011-01-04', '2011-01-03', '2011-01-06'], - freq='D', name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - self.assertEqual(result.freq, 'D') - - result = idx.take([-3, 2, 5]) - expected = PeriodIndex(['2011-01-29', '2011-01-03', '2011-01-06'], - freq='D', name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - self.assertEqual(result.freq, 'D') - def test_nat_new(self): idx = pd.period_range('2011-01', freq='M', periods=5, name='x') @@ -1350,6 +1163,9 @@ def test_ops_series_period(self): tm.assert_series_equal(s2 - s, exp) tm.assert_series_equal(s - s2, -exp) + +class TestFramePeriod(tm.TestCase): + def test_ops_frame_period(self): # GH 13043 df = pd.DataFrame({'A': [pd.Period('2015-01', freq='M'), @@ -1379,303 +1195,7 @@ def test_ops_frame_period(self): tm.assert_frame_equal(df - df2, -exp) -class TestPeriodIndex(tm.TestCase): - - def setUp(self): - pass - - def test_getitem_index(self): - idx = period_range('2007-01', periods=10, freq='M', name='x') - - result = idx[[1, 3, 5]] - exp = pd.PeriodIndex(['2007-02', '2007-04', '2007-06'], - freq='M', name='x') - tm.assert_index_equal(result, exp) - - result = idx[[True, True, False, False, False, - True, True, False, False, False]] - exp = pd.PeriodIndex(['2007-01', '2007-02', '2007-06', '2007-07'], - freq='M', name='x') - tm.assert_index_equal(result, exp) - - def test_getitem_partial(self): - rng = period_range('2007-01', periods=50, freq='M') - ts = Series(np.random.randn(len(rng)), rng) - - self.assertRaises(KeyError, ts.__getitem__, '2006') - - result = ts['2008'] - self.assertTrue((result.index.year == 2008).all()) - - result = ts['2008':'2009'] - self.assertEqual(len(result), 24) - - result = ts['2008-1':'2009-12'] - self.assertEqual(len(result), 24) - - result = ts['2008Q1':'2009Q4'] - self.assertEqual(len(result), 24) - - result = ts[:'2009'] - self.assertEqual(len(result), 36) - - result = ts['2009':] - self.assertEqual(len(result), 50 - 24) - - exp = result - result = ts[24:] - tm.assert_series_equal(exp, result) - - ts = ts[10:].append(ts[10:]) - self.assertRaisesRegexp(KeyError, - "left slice bound for non-unique " - "label: '2008'", - ts.__getitem__, slice('2008', '2009')) - - def test_getitem_datetime(self): - rng = period_range(start='2012-01-01', periods=10, freq='W-MON') - ts = Series(lrange(len(rng)), index=rng) - - dt1 = datetime(2011, 10, 2) - dt4 = datetime(2012, 4, 20) - - rs = ts[dt1:dt4] - tm.assert_series_equal(rs, ts) - - def test_getitem_nat(self): - idx = pd.PeriodIndex(['2011-01', 'NaT', '2011-02'], freq='M') - self.assertEqual(idx[0], pd.Period('2011-01', freq='M')) - self.assertIs(idx[1], tslib.NaT) - - s = pd.Series([0, 1, 2], index=idx) - self.assertEqual(s[pd.NaT], 1) - - s = pd.Series(idx, index=idx) - self.assertEqual(s[pd.Period('2011-01', freq='M')], - pd.Period('2011-01', freq='M')) - self.assertIs(s[pd.NaT], tslib.NaT) - - def test_getitem_list_periods(self): - # GH 7710 - rng = period_range(start='2012-01-01', periods=10, freq='D') - ts = Series(lrange(len(rng)), index=rng) - exp = ts.iloc[[1]] - tm.assert_series_equal(ts[[Period('2012-01-02', freq='D')]], exp) - - def test_getitem_seconds(self): - # GH 6716 - didx = DatetimeIndex(start='2013/01/01 09:00:00', freq='S', - periods=4000) - pidx = PeriodIndex(start='2013/01/01 09:00:00', freq='S', periods=4000) - - for idx in [didx, pidx]: - # getitem against index should raise ValueError - values = ['2014', '2013/02', '2013/01/02', '2013/02/01 9H', - '2013/02/01 09:00'] - for v in values: - if _np_version_under1p9: - with tm.assertRaises(ValueError): - idx[v] - else: - # GH7116 - # these show deprecations as we are trying - # to slice with non-integer indexers - # with tm.assertRaises(IndexError): - # idx[v] - continue - - s = Series(np.random.rand(len(idx)), index=idx) - tm.assert_series_equal(s['2013/01/01 10:00'], s[3600:3660]) - tm.assert_series_equal(s['2013/01/01 9H'], s[:3600]) - for d in ['2013/01/01', '2013/01', '2013']: - tm.assert_series_equal(s[d], s) - - def test_getitem_day(self): - # GH 6716 - # Confirm DatetimeIndex and PeriodIndex works identically - didx = DatetimeIndex(start='2013/01/01', freq='D', periods=400) - pidx = PeriodIndex(start='2013/01/01', freq='D', periods=400) - - for idx in [didx, pidx]: - # getitem against index should raise ValueError - values = ['2014', '2013/02', '2013/01/02', '2013/02/01 9H', - '2013/02/01 09:00'] - for v in values: - - if _np_version_under1p9: - with tm.assertRaises(ValueError): - idx[v] - else: - # GH7116 - # these show deprecations as we are trying - # to slice with non-integer indexers - # with tm.assertRaises(IndexError): - # idx[v] - continue - - s = Series(np.random.rand(len(idx)), index=idx) - tm.assert_series_equal(s['2013/01'], s[0:31]) - tm.assert_series_equal(s['2013/02'], s[31:59]) - tm.assert_series_equal(s['2014'], s[365:]) - - invalid = ['2013/02/01 9H', '2013/02/01 09:00'] - for v in invalid: - with tm.assertRaises(KeyError): - s[v] - - def test_take(self): - index = PeriodIndex(start='1/1/10', end='12/31/12', freq='D', - name='idx') - expected = PeriodIndex([datetime(2010, 1, 6), datetime(2010, 1, 7), - datetime(2010, 1, 9), datetime(2010, 1, 13)], - freq='D', name='idx') - - taken1 = index.take([5, 6, 8, 12]) - taken2 = index[[5, 6, 8, 12]] - - for taken in [taken1, taken2]: - tm.assert_index_equal(taken, expected) - tm.assertIsInstance(taken, PeriodIndex) - self.assertEqual(taken.freq, index.freq) - self.assertEqual(taken.name, expected.name) - - def test_take_fill_value(self): - # GH 12631 - idx = pd.PeriodIndex(['2011-01-01', '2011-02-01', '2011-03-01'], - name='xxx', freq='D') - result = idx.take(np.array([1, 0, -1])) - expected = pd.PeriodIndex(['2011-02-01', '2011-01-01', '2011-03-01'], - name='xxx', freq='D') - tm.assert_index_equal(result, expected) - - # fill_value - result = idx.take(np.array([1, 0, -1]), fill_value=True) - expected = pd.PeriodIndex(['2011-02-01', '2011-01-01', 'NaT'], - name='xxx', freq='D') - tm.assert_index_equal(result, expected) - - # allow_fill=False - result = idx.take(np.array([1, 0, -1]), allow_fill=False, - fill_value=True) - expected = pd.PeriodIndex(['2011-02-01', '2011-01-01', '2011-03-01'], - name='xxx', freq='D') - tm.assert_index_equal(result, expected) - - msg = ('When allow_fill=True and fill_value is not None, ' - 'all indices must be >= -1') - with tm.assertRaisesRegexp(ValueError, msg): - idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assertRaisesRegexp(ValueError, msg): - idx.take(np.array([1, 0, -5]), fill_value=True) - - with tm.assertRaises(IndexError): - idx.take(np.array([1, -5])) - - def test_get_loc_msg(self): - idx = period_range('2000-1-1', freq='A', periods=10) - bad_period = Period('2012', 'A') - self.assertRaises(KeyError, idx.get_loc, bad_period) - - try: - idx.get_loc(bad_period) - except KeyError as inst: - self.assertEqual(inst.args[0], bad_period) - - def test_get_loc_nat(self): - didx = DatetimeIndex(['2011-01-01', 'NaT', '2011-01-03']) - pidx = PeriodIndex(['2011-01-01', 'NaT', '2011-01-03'], freq='M') - - # check DatetimeIndex compat - for idx in [didx, pidx]: - self.assertEqual(idx.get_loc(pd.NaT), 1) - self.assertEqual(idx.get_loc(None), 1) - self.assertEqual(idx.get_loc(float('nan')), 1) - self.assertEqual(idx.get_loc(np.nan), 1) - - -class TestComparisons(tm.TestCase): - - def setUp(self): - self.january1 = Period('2000-01', 'M') - self.january2 = Period('2000-01', 'M') - self.february = Period('2000-02', 'M') - self.march = Period('2000-03', 'M') - self.day = Period('2012-01-01', 'D') - - def test_equal(self): - self.assertEqual(self.january1, self.january2) - - def test_equal_Raises_Value(self): - with tm.assertRaises(period.IncompatibleFrequency): - self.january1 == self.day - - def test_notEqual(self): - self.assertNotEqual(self.january1, 1) - self.assertNotEqual(self.january1, self.february) - - def test_greater(self): - self.assertTrue(self.february > self.january1) - - def test_greater_Raises_Value(self): - with tm.assertRaises(period.IncompatibleFrequency): - self.january1 > self.day - - def test_greater_Raises_Type(self): - with tm.assertRaises(TypeError): - self.january1 > 1 - - def test_greaterEqual(self): - self.assertTrue(self.january1 >= self.january2) - - def test_greaterEqual_Raises_Value(self): - with tm.assertRaises(period.IncompatibleFrequency): - self.january1 >= self.day - - with tm.assertRaises(TypeError): - print(self.january1 >= 1) - - def test_smallerEqual(self): - self.assertTrue(self.january1 <= self.january2) - - def test_smallerEqual_Raises_Value(self): - with tm.assertRaises(period.IncompatibleFrequency): - self.january1 <= self.day - - def test_smallerEqual_Raises_Type(self): - with tm.assertRaises(TypeError): - self.january1 <= 1 - - def test_smaller(self): - self.assertTrue(self.january1 < self.february) - - def test_smaller_Raises_Value(self): - with tm.assertRaises(period.IncompatibleFrequency): - self.january1 < self.day - - def test_smaller_Raises_Type(self): - with tm.assertRaises(TypeError): - self.january1 < 1 - - def test_sort(self): - periods = [self.march, self.january1, self.february] - correctPeriods = [self.january1, self.february, self.march] - self.assertEqual(sorted(periods), correctPeriods) - - def test_period_nat_comp(self): - p_nat = Period('NaT', freq='D') - p = Period('2011-01-01', freq='D') - - nat = pd.Timestamp('NaT') - t = pd.Timestamp('2011-01-01') - # confirm Period('NaT') work identical with Timestamp('NaT') - for left, right in [(p_nat, p), (p, p_nat), (p_nat, p_nat), (nat, t), - (t, nat), (nat, nat)]: - self.assertEqual(left < right, False) - self.assertEqual(left > right, False) - self.assertEqual(left == right, False) - self.assertEqual(left != right, True) - self.assertEqual(left <= right, False) - self.assertEqual(left >= right, False) +class TestPeriodIndexComparisons(tm.TestCase): def test_pi_pi_comp(self): diff --git a/pandas/tests/indexes/period/test_setops.py b/pandas/tests/indexes/period/test_setops.py index 06e15f9175ed8..d4f06bae8bc32 100644 --- a/pandas/tests/indexes/period/test_setops.py +++ b/pandas/tests/indexes/period/test_setops.py @@ -43,6 +43,57 @@ def test_join_does_not_recur(self): tm.assert_index_equal(res, expected) def test_union(self): + # union + rng1 = pd.period_range('1/1/2000', freq='D', periods=5) + other1 = pd.period_range('1/6/2000', freq='D', periods=5) + expected1 = pd.period_range('1/1/2000', freq='D', periods=10) + + rng2 = pd.period_range('1/1/2000', freq='D', periods=5) + other2 = pd.period_range('1/4/2000', freq='D', periods=5) + expected2 = pd.period_range('1/1/2000', freq='D', periods=8) + + rng3 = pd.period_range('1/1/2000', freq='D', periods=5) + other3 = pd.PeriodIndex([], freq='D') + expected3 = pd.period_range('1/1/2000', freq='D', periods=5) + + rng4 = pd.period_range('2000-01-01 09:00', freq='H', periods=5) + other4 = pd.period_range('2000-01-02 09:00', freq='H', periods=5) + expected4 = pd.PeriodIndex(['2000-01-01 09:00', '2000-01-01 10:00', + '2000-01-01 11:00', '2000-01-01 12:00', + '2000-01-01 13:00', '2000-01-02 09:00', + '2000-01-02 10:00', '2000-01-02 11:00', + '2000-01-02 12:00', '2000-01-02 13:00'], + freq='H') + + rng5 = pd.PeriodIndex(['2000-01-01 09:01', '2000-01-01 09:03', + '2000-01-01 09:05'], freq='T') + other5 = pd.PeriodIndex(['2000-01-01 09:01', '2000-01-01 09:05' + '2000-01-01 09:08'], + freq='T') + expected5 = pd.PeriodIndex(['2000-01-01 09:01', '2000-01-01 09:03', + '2000-01-01 09:05', '2000-01-01 09:08'], + freq='T') + + rng6 = pd.period_range('2000-01-01', freq='M', periods=7) + other6 = pd.period_range('2000-04-01', freq='M', periods=7) + expected6 = pd.period_range('2000-01-01', freq='M', periods=10) + + rng7 = pd.period_range('2003-01-01', freq='A', periods=5) + other7 = pd.period_range('1998-01-01', freq='A', periods=8) + expected7 = pd.period_range('1998-01-01', freq='A', periods=10) + + for rng, other, expected in [(rng1, other1, expected1), + (rng2, other2, expected2), + (rng3, other3, expected3), (rng4, other4, + expected4), + (rng5, other5, expected5), (rng6, other6, + expected6), + (rng7, other7, expected7)]: + + result_union = rng.union(other) + tm.assert_index_equal(result_union, expected) + + def test_union_misc(self): index = period_range('1/1/2000', '1/20/2000', freq='D') result = index[:-5].union(index[10:]) @@ -155,3 +206,45 @@ def test_intersection_cases(self): result = rng.intersection(rng[0:0]) self.assertEqual(len(result), 0) + + def test_difference(self): + # diff + rng1 = pd.period_range('1/1/2000', freq='D', periods=5) + other1 = pd.period_range('1/6/2000', freq='D', periods=5) + expected1 = pd.period_range('1/1/2000', freq='D', periods=5) + + rng2 = pd.period_range('1/1/2000', freq='D', periods=5) + other2 = pd.period_range('1/4/2000', freq='D', periods=5) + expected2 = pd.period_range('1/1/2000', freq='D', periods=3) + + rng3 = pd.period_range('1/1/2000', freq='D', periods=5) + other3 = pd.PeriodIndex([], freq='D') + expected3 = pd.period_range('1/1/2000', freq='D', periods=5) + + rng4 = pd.period_range('2000-01-01 09:00', freq='H', periods=5) + other4 = pd.period_range('2000-01-02 09:00', freq='H', periods=5) + expected4 = rng4 + + rng5 = pd.PeriodIndex(['2000-01-01 09:01', '2000-01-01 09:03', + '2000-01-01 09:05'], freq='T') + other5 = pd.PeriodIndex( + ['2000-01-01 09:01', '2000-01-01 09:05'], freq='T') + expected5 = pd.PeriodIndex(['2000-01-01 09:03'], freq='T') + + rng6 = pd.period_range('2000-01-01', freq='M', periods=7) + other6 = pd.period_range('2000-04-01', freq='M', periods=7) + expected6 = pd.period_range('2000-01-01', freq='M', periods=3) + + rng7 = pd.period_range('2003-01-01', freq='A', periods=5) + other7 = pd.period_range('1998-01-01', freq='A', periods=8) + expected7 = pd.period_range('2006-01-01', freq='A', periods=2) + + for rng, other, expected in [(rng1, other1, expected1), + (rng2, other2, expected2), + (rng3, other3, expected3), + (rng4, other4, expected4), + (rng5, other5, expected5), + (rng6, other6, expected6), + (rng7, other7, expected7), ]: + result_union = rng.difference(other) + tm.assert_index_equal(result_union, expected) diff --git a/pandas/tests/scalar/test_period.py b/pandas/tests/scalar/test_period.py index c94a7c62a6dc9..ffe00a4a62a0a 100644 --- a/pandas/tests/scalar/test_period.py +++ b/pandas/tests/scalar/test_period.py @@ -7,12 +7,28 @@ from pandas.compat import text_type, iteritems from pandas.compat.numpy import np_datetime64_compat from pandas import Period, Timestamp, tslib, offsets, _period -from pandas.tseries.frequencies import DAYS, MONTHS, _period_code_map +from pandas.tseries.frequencies import DAYS, MONTHS class TestPeriodProperties(tm.TestCase): "Test properties such as year, month, weekday, etc...." + def test_is_leap_year(self): + # GH 13727 + for freq in ['A', 'M', 'D', 'H']: + p = Period('2000-01-01 00:00:00', freq=freq) + self.assertTrue(p.is_leap_year) + self.assertIsInstance(p.is_leap_year, bool) + + p = Period('1999-01-01 00:00:00', freq=freq) + self.assertFalse(p.is_leap_year) + + p = Period('2004-01-01 00:00:00', freq=freq) + self.assertTrue(p.is_leap_year) + + p = Period('2100-01-01 00:00:00', freq=freq) + self.assertFalse(p.is_leap_year) + def test_quarterly_negative_ordinals(self): p = Period(ordinal=-1, freq='Q-DEC') self.assertEqual(p.year, 1969) @@ -273,7 +289,7 @@ def test_timestamp_mult(self): self.assertEqual(p.to_timestamp(how='S'), pd.Timestamp('2011-01-01')) self.assertEqual(p.to_timestamp(how='E'), pd.Timestamp('2011-03-31')) - def test_period_constructor(self): + def test_construction(self): i1 = Period('1/1/2005', freq='M') i2 = Period('Jan 2005') @@ -299,6 +315,41 @@ def test_period_constructor(self): self.assertEqual(i1, i2) self.assertEqual(i1, i3) + i1 = Period('1982', freq='min') + i2 = Period('1982', freq='MIN') + self.assertEqual(i1, i2) + i2 = Period('1982', freq=('Min', 1)) + self.assertEqual(i1, i2) + + i1 = Period(year=2005, month=3, day=1, freq='D') + i2 = Period('3/1/2005', freq='D') + self.assertEqual(i1, i2) + + i3 = Period(year=2005, month=3, day=1, freq='d') + self.assertEqual(i1, i3) + + i1 = Period('2007-01-01 09:00:00.001') + expected = Period(datetime(2007, 1, 1, 9, 0, 0, 1000), freq='L') + self.assertEqual(i1, expected) + + expected = Period(np_datetime64_compat( + '2007-01-01 09:00:00.001Z'), freq='L') + self.assertEqual(i1, expected) + + i1 = Period('2007-01-01 09:00:00.00101') + expected = Period(datetime(2007, 1, 1, 9, 0, 0, 1010), freq='U') + self.assertEqual(i1, expected) + + expected = Period(np_datetime64_compat('2007-01-01 09:00:00.00101Z'), + freq='U') + self.assertEqual(i1, expected) + + self.assertRaises(ValueError, Period, ordinal=200701) + + self.assertRaises(ValueError, Period, '2007-1-1', freq='X') + + def test_construction_bday(self): + # Biz day construction, roll forward if non-weekday i1 = Period('3/10/12', freq='B') i2 = Period('3/10/12', freq='D') @@ -311,6 +362,12 @@ def test_period_constructor(self): i3 = Period('3/10/12', freq='b') self.assertEqual(i1, i3) + i1 = Period(year=2012, month=3, day=10, freq='B') + i2 = Period('3/12/12', freq='B') + self.assertEqual(i1, i2) + + def test_construction_quarter(self): + i1 = Period(year=2005, quarter=1, freq='Q') i2 = Period('1/1/2005', freq='Q') self.assertEqual(i1, i2) @@ -319,17 +376,6 @@ def test_period_constructor(self): i2 = Period('9/1/2005', freq='Q') self.assertEqual(i1, i2) - i1 = Period(year=2005, month=3, day=1, freq='D') - i2 = Period('3/1/2005', freq='D') - self.assertEqual(i1, i2) - - i3 = Period(year=2005, month=3, day=1, freq='d') - self.assertEqual(i1, i3) - - i1 = Period(year=2012, month=3, day=10, freq='B') - i2 = Period('3/12/12', freq='B') - self.assertEqual(i1, i2) - i1 = Period('2005Q1') i2 = Period(year=2005, quarter=1, freq='Q') i3 = Period('2005q1') @@ -356,11 +402,7 @@ def test_period_constructor(self): lower = Period('4q1984') self.assertEqual(i1, lower) - i1 = Period('1982', freq='min') - i2 = Period('1982', freq='MIN') - self.assertEqual(i1, i2) - i2 = Period('1982', freq=('Min', 1)) - self.assertEqual(i1, i2) + def test_construction_month(self): expected = Period('2007-01', freq='M') i1 = Period('200701', freq='M') @@ -389,26 +431,6 @@ def test_period_constructor(self): self.assertEqual(i1, i4) self.assertEqual(i1, i5) - i1 = Period('2007-01-01 09:00:00.001') - expected = Period(datetime(2007, 1, 1, 9, 0, 0, 1000), freq='L') - self.assertEqual(i1, expected) - - expected = Period(np_datetime64_compat( - '2007-01-01 09:00:00.001Z'), freq='L') - self.assertEqual(i1, expected) - - i1 = Period('2007-01-01 09:00:00.00101') - expected = Period(datetime(2007, 1, 1, 9, 0, 0, 1010), freq='U') - self.assertEqual(i1, expected) - - expected = Period(np_datetime64_compat('2007-01-01 09:00:00.00101Z'), - freq='U') - self.assertEqual(i1, expected) - - self.assertRaises(ValueError, Period, ordinal=200701) - - self.assertRaises(ValueError, Period, '2007-1-1', freq='X') - def test_period_constructor_offsets(self): self.assertEqual(Period('1/1/2005', freq=offsets.MonthEnd()), Period('1/1/2005', freq='M')) @@ -894,21 +916,6 @@ def test_constructor_infer_freq(self): p = Period('2007-01-01 07:10:15.123400') self.assertEqual(p.freq, 'U') - def test_asfreq_MS(self): - initial = Period("2013") - - self.assertEqual(initial.asfreq(freq="M", how="S"), - Period('2013-01', 'M')) - - msg = pd.tseries.frequencies._INVALID_FREQ_ERROR - with self.assertRaisesRegexp(ValueError, msg): - initial.asfreq(freq="MS", how="S") - - with tm.assertRaisesRegexp(ValueError, msg): - pd.Period('2013-01', 'MS') - - self.assertTrue(_period_code_map.get("MS") is None) - def test_badinput(self): self.assertRaises(ValueError, Period, '-2000', 'A') self.assertRaises(tslib.DateParseError, Period, '0', 'A') @@ -945,722 +952,89 @@ def test_get_period_field_array_raises_on_out_of_range(self): np.empty(1), 0) -class TestFreqConversion(tm.TestCase): - "Test frequency conversion of date objects" - - def test_asfreq_corner(self): - val = Period(freq='A', year=2007) - result1 = val.asfreq('5t') - result2 = val.asfreq('t') - expected = Period('2007-12-31 23:59', freq='t') - self.assertEqual(result1.ordinal, expected.ordinal) - self.assertEqual(result1.freqstr, '5T') - self.assertEqual(result2.ordinal, expected.ordinal) - self.assertEqual(result2.freqstr, 'T') - - def test_conv_annual(self): - # frequency conversion tests: from Annual Frequency - - ival_A = Period(freq='A', year=2007) - - ival_AJAN = Period(freq="A-JAN", year=2007) - ival_AJUN = Period(freq="A-JUN", year=2007) - ival_ANOV = Period(freq="A-NOV", year=2007) - - ival_A_to_Q_start = Period(freq='Q', year=2007, quarter=1) - ival_A_to_Q_end = Period(freq='Q', year=2007, quarter=4) - ival_A_to_M_start = Period(freq='M', year=2007, month=1) - ival_A_to_M_end = Period(freq='M', year=2007, month=12) - ival_A_to_W_start = Period(freq='W', year=2007, month=1, day=1) - ival_A_to_W_end = Period(freq='W', year=2007, month=12, day=31) - ival_A_to_B_start = Period(freq='B', year=2007, month=1, day=1) - ival_A_to_B_end = Period(freq='B', year=2007, month=12, day=31) - ival_A_to_D_start = Period(freq='D', year=2007, month=1, day=1) - ival_A_to_D_end = Period(freq='D', year=2007, month=12, day=31) - ival_A_to_H_start = Period(freq='H', year=2007, month=1, day=1, hour=0) - ival_A_to_H_end = Period(freq='H', year=2007, month=12, day=31, - hour=23) - ival_A_to_T_start = Period(freq='Min', year=2007, month=1, day=1, - hour=0, minute=0) - ival_A_to_T_end = Period(freq='Min', year=2007, month=12, day=31, - hour=23, minute=59) - ival_A_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, - minute=0, second=0) - ival_A_to_S_end = Period(freq='S', year=2007, month=12, day=31, - hour=23, minute=59, second=59) - - ival_AJAN_to_D_end = Period(freq='D', year=2007, month=1, day=31) - ival_AJAN_to_D_start = Period(freq='D', year=2006, month=2, day=1) - ival_AJUN_to_D_end = Period(freq='D', year=2007, month=6, day=30) - ival_AJUN_to_D_start = Period(freq='D', year=2006, month=7, day=1) - ival_ANOV_to_D_end = Period(freq='D', year=2007, month=11, day=30) - ival_ANOV_to_D_start = Period(freq='D', year=2006, month=12, day=1) - - self.assertEqual(ival_A.asfreq('Q', 'S'), ival_A_to_Q_start) - self.assertEqual(ival_A.asfreq('Q', 'e'), ival_A_to_Q_end) - self.assertEqual(ival_A.asfreq('M', 's'), ival_A_to_M_start) - self.assertEqual(ival_A.asfreq('M', 'E'), ival_A_to_M_end) - self.assertEqual(ival_A.asfreq('W', 'S'), ival_A_to_W_start) - self.assertEqual(ival_A.asfreq('W', 'E'), ival_A_to_W_end) - self.assertEqual(ival_A.asfreq('B', 'S'), ival_A_to_B_start) - self.assertEqual(ival_A.asfreq('B', 'E'), ival_A_to_B_end) - self.assertEqual(ival_A.asfreq('D', 'S'), ival_A_to_D_start) - self.assertEqual(ival_A.asfreq('D', 'E'), ival_A_to_D_end) - self.assertEqual(ival_A.asfreq('H', 'S'), ival_A_to_H_start) - self.assertEqual(ival_A.asfreq('H', 'E'), ival_A_to_H_end) - self.assertEqual(ival_A.asfreq('min', 'S'), ival_A_to_T_start) - self.assertEqual(ival_A.asfreq('min', 'E'), ival_A_to_T_end) - self.assertEqual(ival_A.asfreq('T', 'S'), ival_A_to_T_start) - self.assertEqual(ival_A.asfreq('T', 'E'), ival_A_to_T_end) - self.assertEqual(ival_A.asfreq('S', 'S'), ival_A_to_S_start) - self.assertEqual(ival_A.asfreq('S', 'E'), ival_A_to_S_end) - - self.assertEqual(ival_AJAN.asfreq('D', 'S'), ival_AJAN_to_D_start) - self.assertEqual(ival_AJAN.asfreq('D', 'E'), ival_AJAN_to_D_end) - - self.assertEqual(ival_AJUN.asfreq('D', 'S'), ival_AJUN_to_D_start) - self.assertEqual(ival_AJUN.asfreq('D', 'E'), ival_AJUN_to_D_end) - - self.assertEqual(ival_ANOV.asfreq('D', 'S'), ival_ANOV_to_D_start) - self.assertEqual(ival_ANOV.asfreq('D', 'E'), ival_ANOV_to_D_end) - - self.assertEqual(ival_A.asfreq('A'), ival_A) - - def test_conv_quarterly(self): - # frequency conversion tests: from Quarterly Frequency - - ival_Q = Period(freq='Q', year=2007, quarter=1) - ival_Q_end_of_year = Period(freq='Q', year=2007, quarter=4) - - ival_QEJAN = Period(freq="Q-JAN", year=2007, quarter=1) - ival_QEJUN = Period(freq="Q-JUN", year=2007, quarter=1) - - ival_Q_to_A = Period(freq='A', year=2007) - ival_Q_to_M_start = Period(freq='M', year=2007, month=1) - ival_Q_to_M_end = Period(freq='M', year=2007, month=3) - ival_Q_to_W_start = Period(freq='W', year=2007, month=1, day=1) - ival_Q_to_W_end = Period(freq='W', year=2007, month=3, day=31) - ival_Q_to_B_start = Period(freq='B', year=2007, month=1, day=1) - ival_Q_to_B_end = Period(freq='B', year=2007, month=3, day=30) - ival_Q_to_D_start = Period(freq='D', year=2007, month=1, day=1) - ival_Q_to_D_end = Period(freq='D', year=2007, month=3, day=31) - ival_Q_to_H_start = Period(freq='H', year=2007, month=1, day=1, hour=0) - ival_Q_to_H_end = Period(freq='H', year=2007, month=3, day=31, hour=23) - ival_Q_to_T_start = Period(freq='Min', year=2007, month=1, day=1, - hour=0, minute=0) - ival_Q_to_T_end = Period(freq='Min', year=2007, month=3, day=31, - hour=23, minute=59) - ival_Q_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, - minute=0, second=0) - ival_Q_to_S_end = Period(freq='S', year=2007, month=3, day=31, hour=23, - minute=59, second=59) - - ival_QEJAN_to_D_start = Period(freq='D', year=2006, month=2, day=1) - ival_QEJAN_to_D_end = Period(freq='D', year=2006, month=4, day=30) - - ival_QEJUN_to_D_start = Period(freq='D', year=2006, month=7, day=1) - ival_QEJUN_to_D_end = Period(freq='D', year=2006, month=9, day=30) - - self.assertEqual(ival_Q.asfreq('A'), ival_Q_to_A) - self.assertEqual(ival_Q_end_of_year.asfreq('A'), ival_Q_to_A) - - self.assertEqual(ival_Q.asfreq('M', 'S'), ival_Q_to_M_start) - self.assertEqual(ival_Q.asfreq('M', 'E'), ival_Q_to_M_end) - self.assertEqual(ival_Q.asfreq('W', 'S'), ival_Q_to_W_start) - self.assertEqual(ival_Q.asfreq('W', 'E'), ival_Q_to_W_end) - self.assertEqual(ival_Q.asfreq('B', 'S'), ival_Q_to_B_start) - self.assertEqual(ival_Q.asfreq('B', 'E'), ival_Q_to_B_end) - self.assertEqual(ival_Q.asfreq('D', 'S'), ival_Q_to_D_start) - self.assertEqual(ival_Q.asfreq('D', 'E'), ival_Q_to_D_end) - self.assertEqual(ival_Q.asfreq('H', 'S'), ival_Q_to_H_start) - self.assertEqual(ival_Q.asfreq('H', 'E'), ival_Q_to_H_end) - self.assertEqual(ival_Q.asfreq('Min', 'S'), ival_Q_to_T_start) - self.assertEqual(ival_Q.asfreq('Min', 'E'), ival_Q_to_T_end) - self.assertEqual(ival_Q.asfreq('S', 'S'), ival_Q_to_S_start) - self.assertEqual(ival_Q.asfreq('S', 'E'), ival_Q_to_S_end) - - self.assertEqual(ival_QEJAN.asfreq('D', 'S'), ival_QEJAN_to_D_start) - self.assertEqual(ival_QEJAN.asfreq('D', 'E'), ival_QEJAN_to_D_end) - self.assertEqual(ival_QEJUN.asfreq('D', 'S'), ival_QEJUN_to_D_start) - self.assertEqual(ival_QEJUN.asfreq('D', 'E'), ival_QEJUN_to_D_end) - - self.assertEqual(ival_Q.asfreq('Q'), ival_Q) - - def test_conv_monthly(self): - # frequency conversion tests: from Monthly Frequency - - ival_M = Period(freq='M', year=2007, month=1) - ival_M_end_of_year = Period(freq='M', year=2007, month=12) - ival_M_end_of_quarter = Period(freq='M', year=2007, month=3) - ival_M_to_A = Period(freq='A', year=2007) - ival_M_to_Q = Period(freq='Q', year=2007, quarter=1) - ival_M_to_W_start = Period(freq='W', year=2007, month=1, day=1) - ival_M_to_W_end = Period(freq='W', year=2007, month=1, day=31) - ival_M_to_B_start = Period(freq='B', year=2007, month=1, day=1) - ival_M_to_B_end = Period(freq='B', year=2007, month=1, day=31) - ival_M_to_D_start = Period(freq='D', year=2007, month=1, day=1) - ival_M_to_D_end = Period(freq='D', year=2007, month=1, day=31) - ival_M_to_H_start = Period(freq='H', year=2007, month=1, day=1, hour=0) - ival_M_to_H_end = Period(freq='H', year=2007, month=1, day=31, hour=23) - ival_M_to_T_start = Period(freq='Min', year=2007, month=1, day=1, - hour=0, minute=0) - ival_M_to_T_end = Period(freq='Min', year=2007, month=1, day=31, - hour=23, minute=59) - ival_M_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, - minute=0, second=0) - ival_M_to_S_end = Period(freq='S', year=2007, month=1, day=31, hour=23, - minute=59, second=59) - - self.assertEqual(ival_M.asfreq('A'), ival_M_to_A) - self.assertEqual(ival_M_end_of_year.asfreq('A'), ival_M_to_A) - self.assertEqual(ival_M.asfreq('Q'), ival_M_to_Q) - self.assertEqual(ival_M_end_of_quarter.asfreq('Q'), ival_M_to_Q) - - self.assertEqual(ival_M.asfreq('W', 'S'), ival_M_to_W_start) - self.assertEqual(ival_M.asfreq('W', 'E'), ival_M_to_W_end) - self.assertEqual(ival_M.asfreq('B', 'S'), ival_M_to_B_start) - self.assertEqual(ival_M.asfreq('B', 'E'), ival_M_to_B_end) - self.assertEqual(ival_M.asfreq('D', 'S'), ival_M_to_D_start) - self.assertEqual(ival_M.asfreq('D', 'E'), ival_M_to_D_end) - self.assertEqual(ival_M.asfreq('H', 'S'), ival_M_to_H_start) - self.assertEqual(ival_M.asfreq('H', 'E'), ival_M_to_H_end) - self.assertEqual(ival_M.asfreq('Min', 'S'), ival_M_to_T_start) - self.assertEqual(ival_M.asfreq('Min', 'E'), ival_M_to_T_end) - self.assertEqual(ival_M.asfreq('S', 'S'), ival_M_to_S_start) - self.assertEqual(ival_M.asfreq('S', 'E'), ival_M_to_S_end) - - self.assertEqual(ival_M.asfreq('M'), ival_M) - - def test_conv_weekly(self): - # frequency conversion tests: from Weekly Frequency - ival_W = Period(freq='W', year=2007, month=1, day=1) - - ival_WSUN = Period(freq='W', year=2007, month=1, day=7) - ival_WSAT = Period(freq='W-SAT', year=2007, month=1, day=6) - ival_WFRI = Period(freq='W-FRI', year=2007, month=1, day=5) - ival_WTHU = Period(freq='W-THU', year=2007, month=1, day=4) - ival_WWED = Period(freq='W-WED', year=2007, month=1, day=3) - ival_WTUE = Period(freq='W-TUE', year=2007, month=1, day=2) - ival_WMON = Period(freq='W-MON', year=2007, month=1, day=1) - - ival_WSUN_to_D_start = Period(freq='D', year=2007, month=1, day=1) - ival_WSUN_to_D_end = Period(freq='D', year=2007, month=1, day=7) - ival_WSAT_to_D_start = Period(freq='D', year=2006, month=12, day=31) - ival_WSAT_to_D_end = Period(freq='D', year=2007, month=1, day=6) - ival_WFRI_to_D_start = Period(freq='D', year=2006, month=12, day=30) - ival_WFRI_to_D_end = Period(freq='D', year=2007, month=1, day=5) - ival_WTHU_to_D_start = Period(freq='D', year=2006, month=12, day=29) - ival_WTHU_to_D_end = Period(freq='D', year=2007, month=1, day=4) - ival_WWED_to_D_start = Period(freq='D', year=2006, month=12, day=28) - ival_WWED_to_D_end = Period(freq='D', year=2007, month=1, day=3) - ival_WTUE_to_D_start = Period(freq='D', year=2006, month=12, day=27) - ival_WTUE_to_D_end = Period(freq='D', year=2007, month=1, day=2) - ival_WMON_to_D_start = Period(freq='D', year=2006, month=12, day=26) - ival_WMON_to_D_end = Period(freq='D', year=2007, month=1, day=1) - - ival_W_end_of_year = Period(freq='W', year=2007, month=12, day=31) - ival_W_end_of_quarter = Period(freq='W', year=2007, month=3, day=31) - ival_W_end_of_month = Period(freq='W', year=2007, month=1, day=31) - ival_W_to_A = Period(freq='A', year=2007) - ival_W_to_Q = Period(freq='Q', year=2007, quarter=1) - ival_W_to_M = Period(freq='M', year=2007, month=1) - - if Period(freq='D', year=2007, month=12, day=31).weekday == 6: - ival_W_to_A_end_of_year = Period(freq='A', year=2007) - else: - ival_W_to_A_end_of_year = Period(freq='A', year=2008) - - if Period(freq='D', year=2007, month=3, day=31).weekday == 6: - ival_W_to_Q_end_of_quarter = Period(freq='Q', year=2007, quarter=1) - else: - ival_W_to_Q_end_of_quarter = Period(freq='Q', year=2007, quarter=2) - - if Period(freq='D', year=2007, month=1, day=31).weekday == 6: - ival_W_to_M_end_of_month = Period(freq='M', year=2007, month=1) - else: - ival_W_to_M_end_of_month = Period(freq='M', year=2007, month=2) - - ival_W_to_B_start = Period(freq='B', year=2007, month=1, day=1) - ival_W_to_B_end = Period(freq='B', year=2007, month=1, day=5) - ival_W_to_D_start = Period(freq='D', year=2007, month=1, day=1) - ival_W_to_D_end = Period(freq='D', year=2007, month=1, day=7) - ival_W_to_H_start = Period(freq='H', year=2007, month=1, day=1, hour=0) - ival_W_to_H_end = Period(freq='H', year=2007, month=1, day=7, hour=23) - ival_W_to_T_start = Period(freq='Min', year=2007, month=1, day=1, - hour=0, minute=0) - ival_W_to_T_end = Period(freq='Min', year=2007, month=1, day=7, - hour=23, minute=59) - ival_W_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, - minute=0, second=0) - ival_W_to_S_end = Period(freq='S', year=2007, month=1, day=7, hour=23, - minute=59, second=59) - - self.assertEqual(ival_W.asfreq('A'), ival_W_to_A) - self.assertEqual(ival_W_end_of_year.asfreq('A'), - ival_W_to_A_end_of_year) - self.assertEqual(ival_W.asfreq('Q'), ival_W_to_Q) - self.assertEqual(ival_W_end_of_quarter.asfreq('Q'), - ival_W_to_Q_end_of_quarter) - self.assertEqual(ival_W.asfreq('M'), ival_W_to_M) - self.assertEqual(ival_W_end_of_month.asfreq('M'), - ival_W_to_M_end_of_month) - - self.assertEqual(ival_W.asfreq('B', 'S'), ival_W_to_B_start) - self.assertEqual(ival_W.asfreq('B', 'E'), ival_W_to_B_end) - - self.assertEqual(ival_W.asfreq('D', 'S'), ival_W_to_D_start) - self.assertEqual(ival_W.asfreq('D', 'E'), ival_W_to_D_end) - - self.assertEqual(ival_WSUN.asfreq('D', 'S'), ival_WSUN_to_D_start) - self.assertEqual(ival_WSUN.asfreq('D', 'E'), ival_WSUN_to_D_end) - self.assertEqual(ival_WSAT.asfreq('D', 'S'), ival_WSAT_to_D_start) - self.assertEqual(ival_WSAT.asfreq('D', 'E'), ival_WSAT_to_D_end) - self.assertEqual(ival_WFRI.asfreq('D', 'S'), ival_WFRI_to_D_start) - self.assertEqual(ival_WFRI.asfreq('D', 'E'), ival_WFRI_to_D_end) - self.assertEqual(ival_WTHU.asfreq('D', 'S'), ival_WTHU_to_D_start) - self.assertEqual(ival_WTHU.asfreq('D', 'E'), ival_WTHU_to_D_end) - self.assertEqual(ival_WWED.asfreq('D', 'S'), ival_WWED_to_D_start) - self.assertEqual(ival_WWED.asfreq('D', 'E'), ival_WWED_to_D_end) - self.assertEqual(ival_WTUE.asfreq('D', 'S'), ival_WTUE_to_D_start) - self.assertEqual(ival_WTUE.asfreq('D', 'E'), ival_WTUE_to_D_end) - self.assertEqual(ival_WMON.asfreq('D', 'S'), ival_WMON_to_D_start) - self.assertEqual(ival_WMON.asfreq('D', 'E'), ival_WMON_to_D_end) - - self.assertEqual(ival_W.asfreq('H', 'S'), ival_W_to_H_start) - self.assertEqual(ival_W.asfreq('H', 'E'), ival_W_to_H_end) - self.assertEqual(ival_W.asfreq('Min', 'S'), ival_W_to_T_start) - self.assertEqual(ival_W.asfreq('Min', 'E'), ival_W_to_T_end) - self.assertEqual(ival_W.asfreq('S', 'S'), ival_W_to_S_start) - self.assertEqual(ival_W.asfreq('S', 'E'), ival_W_to_S_end) - - self.assertEqual(ival_W.asfreq('W'), ival_W) +class TestComparisons(tm.TestCase): - msg = pd.tseries.frequencies._INVALID_FREQ_ERROR - with self.assertRaisesRegexp(ValueError, msg): - ival_W.asfreq('WK') + def setUp(self): + self.january1 = Period('2000-01', 'M') + self.january2 = Period('2000-01', 'M') + self.february = Period('2000-02', 'M') + self.march = Period('2000-03', 'M') + self.day = Period('2012-01-01', 'D') - def test_conv_weekly_legacy(self): - # frequency conversion tests: from Weekly Frequency - msg = pd.tseries.frequencies._INVALID_FREQ_ERROR - with self.assertRaisesRegexp(ValueError, msg): - Period(freq='WK', year=2007, month=1, day=1) + def test_equal(self): + self.assertEqual(self.january1, self.january2) - with self.assertRaisesRegexp(ValueError, msg): - Period(freq='WK-SAT', year=2007, month=1, day=6) - with self.assertRaisesRegexp(ValueError, msg): - Period(freq='WK-FRI', year=2007, month=1, day=5) - with self.assertRaisesRegexp(ValueError, msg): - Period(freq='WK-THU', year=2007, month=1, day=4) - with self.assertRaisesRegexp(ValueError, msg): - Period(freq='WK-WED', year=2007, month=1, day=3) - with self.assertRaisesRegexp(ValueError, msg): - Period(freq='WK-TUE', year=2007, month=1, day=2) - with self.assertRaisesRegexp(ValueError, msg): - Period(freq='WK-MON', year=2007, month=1, day=1) - - def test_conv_business(self): - # frequency conversion tests: from Business Frequency" - - ival_B = Period(freq='B', year=2007, month=1, day=1) - ival_B_end_of_year = Period(freq='B', year=2007, month=12, day=31) - ival_B_end_of_quarter = Period(freq='B', year=2007, month=3, day=30) - ival_B_end_of_month = Period(freq='B', year=2007, month=1, day=31) - ival_B_end_of_week = Period(freq='B', year=2007, month=1, day=5) - - ival_B_to_A = Period(freq='A', year=2007) - ival_B_to_Q = Period(freq='Q', year=2007, quarter=1) - ival_B_to_M = Period(freq='M', year=2007, month=1) - ival_B_to_W = Period(freq='W', year=2007, month=1, day=7) - ival_B_to_D = Period(freq='D', year=2007, month=1, day=1) - ival_B_to_H_start = Period(freq='H', year=2007, month=1, day=1, hour=0) - ival_B_to_H_end = Period(freq='H', year=2007, month=1, day=1, hour=23) - ival_B_to_T_start = Period(freq='Min', year=2007, month=1, day=1, - hour=0, minute=0) - ival_B_to_T_end = Period(freq='Min', year=2007, month=1, day=1, - hour=23, minute=59) - ival_B_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, - minute=0, second=0) - ival_B_to_S_end = Period(freq='S', year=2007, month=1, day=1, hour=23, - minute=59, second=59) - - self.assertEqual(ival_B.asfreq('A'), ival_B_to_A) - self.assertEqual(ival_B_end_of_year.asfreq('A'), ival_B_to_A) - self.assertEqual(ival_B.asfreq('Q'), ival_B_to_Q) - self.assertEqual(ival_B_end_of_quarter.asfreq('Q'), ival_B_to_Q) - self.assertEqual(ival_B.asfreq('M'), ival_B_to_M) - self.assertEqual(ival_B_end_of_month.asfreq('M'), ival_B_to_M) - self.assertEqual(ival_B.asfreq('W'), ival_B_to_W) - self.assertEqual(ival_B_end_of_week.asfreq('W'), ival_B_to_W) - - self.assertEqual(ival_B.asfreq('D'), ival_B_to_D) - - self.assertEqual(ival_B.asfreq('H', 'S'), ival_B_to_H_start) - self.assertEqual(ival_B.asfreq('H', 'E'), ival_B_to_H_end) - self.assertEqual(ival_B.asfreq('Min', 'S'), ival_B_to_T_start) - self.assertEqual(ival_B.asfreq('Min', 'E'), ival_B_to_T_end) - self.assertEqual(ival_B.asfreq('S', 'S'), ival_B_to_S_start) - self.assertEqual(ival_B.asfreq('S', 'E'), ival_B_to_S_end) - - self.assertEqual(ival_B.asfreq('B'), ival_B) - - def test_conv_daily(self): - # frequency conversion tests: from Business Frequency" - - ival_D = Period(freq='D', year=2007, month=1, day=1) - ival_D_end_of_year = Period(freq='D', year=2007, month=12, day=31) - ival_D_end_of_quarter = Period(freq='D', year=2007, month=3, day=31) - ival_D_end_of_month = Period(freq='D', year=2007, month=1, day=31) - ival_D_end_of_week = Period(freq='D', year=2007, month=1, day=7) - - ival_D_friday = Period(freq='D', year=2007, month=1, day=5) - ival_D_saturday = Period(freq='D', year=2007, month=1, day=6) - ival_D_sunday = Period(freq='D', year=2007, month=1, day=7) - - # TODO: unused? - # ival_D_monday = Period(freq='D', year=2007, month=1, day=8) - - ival_B_friday = Period(freq='B', year=2007, month=1, day=5) - ival_B_monday = Period(freq='B', year=2007, month=1, day=8) - - ival_D_to_A = Period(freq='A', year=2007) - - ival_Deoq_to_AJAN = Period(freq='A-JAN', year=2008) - ival_Deoq_to_AJUN = Period(freq='A-JUN', year=2007) - ival_Deoq_to_ADEC = Period(freq='A-DEC', year=2007) - - ival_D_to_QEJAN = Period(freq="Q-JAN", year=2007, quarter=4) - ival_D_to_QEJUN = Period(freq="Q-JUN", year=2007, quarter=3) - ival_D_to_QEDEC = Period(freq="Q-DEC", year=2007, quarter=1) - - ival_D_to_M = Period(freq='M', year=2007, month=1) - ival_D_to_W = Period(freq='W', year=2007, month=1, day=7) - - ival_D_to_H_start = Period(freq='H', year=2007, month=1, day=1, hour=0) - ival_D_to_H_end = Period(freq='H', year=2007, month=1, day=1, hour=23) - ival_D_to_T_start = Period(freq='Min', year=2007, month=1, day=1, - hour=0, minute=0) - ival_D_to_T_end = Period(freq='Min', year=2007, month=1, day=1, - hour=23, minute=59) - ival_D_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, - minute=0, second=0) - ival_D_to_S_end = Period(freq='S', year=2007, month=1, day=1, hour=23, - minute=59, second=59) - - self.assertEqual(ival_D.asfreq('A'), ival_D_to_A) - - self.assertEqual(ival_D_end_of_quarter.asfreq('A-JAN'), - ival_Deoq_to_AJAN) - self.assertEqual(ival_D_end_of_quarter.asfreq('A-JUN'), - ival_Deoq_to_AJUN) - self.assertEqual(ival_D_end_of_quarter.asfreq('A-DEC'), - ival_Deoq_to_ADEC) - - self.assertEqual(ival_D_end_of_year.asfreq('A'), ival_D_to_A) - self.assertEqual(ival_D_end_of_quarter.asfreq('Q'), ival_D_to_QEDEC) - self.assertEqual(ival_D.asfreq("Q-JAN"), ival_D_to_QEJAN) - self.assertEqual(ival_D.asfreq("Q-JUN"), ival_D_to_QEJUN) - self.assertEqual(ival_D.asfreq("Q-DEC"), ival_D_to_QEDEC) - self.assertEqual(ival_D.asfreq('M'), ival_D_to_M) - self.assertEqual(ival_D_end_of_month.asfreq('M'), ival_D_to_M) - self.assertEqual(ival_D.asfreq('W'), ival_D_to_W) - self.assertEqual(ival_D_end_of_week.asfreq('W'), ival_D_to_W) - - self.assertEqual(ival_D_friday.asfreq('B'), ival_B_friday) - self.assertEqual(ival_D_saturday.asfreq('B', 'S'), ival_B_friday) - self.assertEqual(ival_D_saturday.asfreq('B', 'E'), ival_B_monday) - self.assertEqual(ival_D_sunday.asfreq('B', 'S'), ival_B_friday) - self.assertEqual(ival_D_sunday.asfreq('B', 'E'), ival_B_monday) - - self.assertEqual(ival_D.asfreq('H', 'S'), ival_D_to_H_start) - self.assertEqual(ival_D.asfreq('H', 'E'), ival_D_to_H_end) - self.assertEqual(ival_D.asfreq('Min', 'S'), ival_D_to_T_start) - self.assertEqual(ival_D.asfreq('Min', 'E'), ival_D_to_T_end) - self.assertEqual(ival_D.asfreq('S', 'S'), ival_D_to_S_start) - self.assertEqual(ival_D.asfreq('S', 'E'), ival_D_to_S_end) - - self.assertEqual(ival_D.asfreq('D'), ival_D) - - def test_conv_hourly(self): - # frequency conversion tests: from Hourly Frequency" - - ival_H = Period(freq='H', year=2007, month=1, day=1, hour=0) - ival_H_end_of_year = Period(freq='H', year=2007, month=12, day=31, - hour=23) - ival_H_end_of_quarter = Period(freq='H', year=2007, month=3, day=31, - hour=23) - ival_H_end_of_month = Period(freq='H', year=2007, month=1, day=31, - hour=23) - ival_H_end_of_week = Period(freq='H', year=2007, month=1, day=7, - hour=23) - ival_H_end_of_day = Period(freq='H', year=2007, month=1, day=1, - hour=23) - ival_H_end_of_bus = Period(freq='H', year=2007, month=1, day=1, - hour=23) - - ival_H_to_A = Period(freq='A', year=2007) - ival_H_to_Q = Period(freq='Q', year=2007, quarter=1) - ival_H_to_M = Period(freq='M', year=2007, month=1) - ival_H_to_W = Period(freq='W', year=2007, month=1, day=7) - ival_H_to_D = Period(freq='D', year=2007, month=1, day=1) - ival_H_to_B = Period(freq='B', year=2007, month=1, day=1) - - ival_H_to_T_start = Period(freq='Min', year=2007, month=1, day=1, - hour=0, minute=0) - ival_H_to_T_end = Period(freq='Min', year=2007, month=1, day=1, hour=0, - minute=59) - ival_H_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, - minute=0, second=0) - ival_H_to_S_end = Period(freq='S', year=2007, month=1, day=1, hour=0, - minute=59, second=59) - - self.assertEqual(ival_H.asfreq('A'), ival_H_to_A) - self.assertEqual(ival_H_end_of_year.asfreq('A'), ival_H_to_A) - self.assertEqual(ival_H.asfreq('Q'), ival_H_to_Q) - self.assertEqual(ival_H_end_of_quarter.asfreq('Q'), ival_H_to_Q) - self.assertEqual(ival_H.asfreq('M'), ival_H_to_M) - self.assertEqual(ival_H_end_of_month.asfreq('M'), ival_H_to_M) - self.assertEqual(ival_H.asfreq('W'), ival_H_to_W) - self.assertEqual(ival_H_end_of_week.asfreq('W'), ival_H_to_W) - self.assertEqual(ival_H.asfreq('D'), ival_H_to_D) - self.assertEqual(ival_H_end_of_day.asfreq('D'), ival_H_to_D) - self.assertEqual(ival_H.asfreq('B'), ival_H_to_B) - self.assertEqual(ival_H_end_of_bus.asfreq('B'), ival_H_to_B) - - self.assertEqual(ival_H.asfreq('Min', 'S'), ival_H_to_T_start) - self.assertEqual(ival_H.asfreq('Min', 'E'), ival_H_to_T_end) - self.assertEqual(ival_H.asfreq('S', 'S'), ival_H_to_S_start) - self.assertEqual(ival_H.asfreq('S', 'E'), ival_H_to_S_end) - - self.assertEqual(ival_H.asfreq('H'), ival_H) - - def test_conv_minutely(self): - # frequency conversion tests: from Minutely Frequency" - - ival_T = Period(freq='Min', year=2007, month=1, day=1, hour=0, - minute=0) - ival_T_end_of_year = Period(freq='Min', year=2007, month=12, day=31, - hour=23, minute=59) - ival_T_end_of_quarter = Period(freq='Min', year=2007, month=3, day=31, - hour=23, minute=59) - ival_T_end_of_month = Period(freq='Min', year=2007, month=1, day=31, - hour=23, minute=59) - ival_T_end_of_week = Period(freq='Min', year=2007, month=1, day=7, - hour=23, minute=59) - ival_T_end_of_day = Period(freq='Min', year=2007, month=1, day=1, - hour=23, minute=59) - ival_T_end_of_bus = Period(freq='Min', year=2007, month=1, day=1, - hour=23, minute=59) - ival_T_end_of_hour = Period(freq='Min', year=2007, month=1, day=1, - hour=0, minute=59) - - ival_T_to_A = Period(freq='A', year=2007) - ival_T_to_Q = Period(freq='Q', year=2007, quarter=1) - ival_T_to_M = Period(freq='M', year=2007, month=1) - ival_T_to_W = Period(freq='W', year=2007, month=1, day=7) - ival_T_to_D = Period(freq='D', year=2007, month=1, day=1) - ival_T_to_B = Period(freq='B', year=2007, month=1, day=1) - ival_T_to_H = Period(freq='H', year=2007, month=1, day=1, hour=0) - - ival_T_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, - minute=0, second=0) - ival_T_to_S_end = Period(freq='S', year=2007, month=1, day=1, hour=0, - minute=0, second=59) - - self.assertEqual(ival_T.asfreq('A'), ival_T_to_A) - self.assertEqual(ival_T_end_of_year.asfreq('A'), ival_T_to_A) - self.assertEqual(ival_T.asfreq('Q'), ival_T_to_Q) - self.assertEqual(ival_T_end_of_quarter.asfreq('Q'), ival_T_to_Q) - self.assertEqual(ival_T.asfreq('M'), ival_T_to_M) - self.assertEqual(ival_T_end_of_month.asfreq('M'), ival_T_to_M) - self.assertEqual(ival_T.asfreq('W'), ival_T_to_W) - self.assertEqual(ival_T_end_of_week.asfreq('W'), ival_T_to_W) - self.assertEqual(ival_T.asfreq('D'), ival_T_to_D) - self.assertEqual(ival_T_end_of_day.asfreq('D'), ival_T_to_D) - self.assertEqual(ival_T.asfreq('B'), ival_T_to_B) - self.assertEqual(ival_T_end_of_bus.asfreq('B'), ival_T_to_B) - self.assertEqual(ival_T.asfreq('H'), ival_T_to_H) - self.assertEqual(ival_T_end_of_hour.asfreq('H'), ival_T_to_H) - - self.assertEqual(ival_T.asfreq('S', 'S'), ival_T_to_S_start) - self.assertEqual(ival_T.asfreq('S', 'E'), ival_T_to_S_end) - - self.assertEqual(ival_T.asfreq('Min'), ival_T) - - def test_conv_secondly(self): - # frequency conversion tests: from Secondly Frequency" - - ival_S = Period(freq='S', year=2007, month=1, day=1, hour=0, minute=0, - second=0) - ival_S_end_of_year = Period(freq='S', year=2007, month=12, day=31, - hour=23, minute=59, second=59) - ival_S_end_of_quarter = Period(freq='S', year=2007, month=3, day=31, - hour=23, minute=59, second=59) - ival_S_end_of_month = Period(freq='S', year=2007, month=1, day=31, - hour=23, minute=59, second=59) - ival_S_end_of_week = Period(freq='S', year=2007, month=1, day=7, - hour=23, minute=59, second=59) - ival_S_end_of_day = Period(freq='S', year=2007, month=1, day=1, - hour=23, minute=59, second=59) - ival_S_end_of_bus = Period(freq='S', year=2007, month=1, day=1, - hour=23, minute=59, second=59) - ival_S_end_of_hour = Period(freq='S', year=2007, month=1, day=1, - hour=0, minute=59, second=59) - ival_S_end_of_minute = Period(freq='S', year=2007, month=1, day=1, - hour=0, minute=0, second=59) - - ival_S_to_A = Period(freq='A', year=2007) - ival_S_to_Q = Period(freq='Q', year=2007, quarter=1) - ival_S_to_M = Period(freq='M', year=2007, month=1) - ival_S_to_W = Period(freq='W', year=2007, month=1, day=7) - ival_S_to_D = Period(freq='D', year=2007, month=1, day=1) - ival_S_to_B = Period(freq='B', year=2007, month=1, day=1) - ival_S_to_H = Period(freq='H', year=2007, month=1, day=1, hour=0) - ival_S_to_T = Period(freq='Min', year=2007, month=1, day=1, hour=0, - minute=0) - - self.assertEqual(ival_S.asfreq('A'), ival_S_to_A) - self.assertEqual(ival_S_end_of_year.asfreq('A'), ival_S_to_A) - self.assertEqual(ival_S.asfreq('Q'), ival_S_to_Q) - self.assertEqual(ival_S_end_of_quarter.asfreq('Q'), ival_S_to_Q) - self.assertEqual(ival_S.asfreq('M'), ival_S_to_M) - self.assertEqual(ival_S_end_of_month.asfreq('M'), ival_S_to_M) - self.assertEqual(ival_S.asfreq('W'), ival_S_to_W) - self.assertEqual(ival_S_end_of_week.asfreq('W'), ival_S_to_W) - self.assertEqual(ival_S.asfreq('D'), ival_S_to_D) - self.assertEqual(ival_S_end_of_day.asfreq('D'), ival_S_to_D) - self.assertEqual(ival_S.asfreq('B'), ival_S_to_B) - self.assertEqual(ival_S_end_of_bus.asfreq('B'), ival_S_to_B) - self.assertEqual(ival_S.asfreq('H'), ival_S_to_H) - self.assertEqual(ival_S_end_of_hour.asfreq('H'), ival_S_to_H) - self.assertEqual(ival_S.asfreq('Min'), ival_S_to_T) - self.assertEqual(ival_S_end_of_minute.asfreq('Min'), ival_S_to_T) - - self.assertEqual(ival_S.asfreq('S'), ival_S) - - def test_asfreq_mult(self): - # normal freq to mult freq - p = Period(freq='A', year=2007) - # ordinal will not change - for freq in ['3A', offsets.YearEnd(3)]: - result = p.asfreq(freq) - expected = Period('2007', freq='3A') - - self.assertEqual(result, expected) - self.assertEqual(result.ordinal, expected.ordinal) - self.assertEqual(result.freq, expected.freq) - # ordinal will not change - for freq in ['3A', offsets.YearEnd(3)]: - result = p.asfreq(freq, how='S') - expected = Period('2007', freq='3A') - - self.assertEqual(result, expected) - self.assertEqual(result.ordinal, expected.ordinal) - self.assertEqual(result.freq, expected.freq) - - # mult freq to normal freq - p = Period(freq='3A', year=2007) - # ordinal will change because how=E is the default - for freq in ['A', offsets.YearEnd()]: - result = p.asfreq(freq) - expected = Period('2009', freq='A') - - self.assertEqual(result, expected) - self.assertEqual(result.ordinal, expected.ordinal) - self.assertEqual(result.freq, expected.freq) - # ordinal will not change - for freq in ['A', offsets.YearEnd()]: - result = p.asfreq(freq, how='S') - expected = Period('2007', freq='A') - - self.assertEqual(result, expected) - self.assertEqual(result.ordinal, expected.ordinal) - self.assertEqual(result.freq, expected.freq) - - p = Period(freq='A', year=2007) - for freq in ['2M', offsets.MonthEnd(2)]: - result = p.asfreq(freq) - expected = Period('2007-12', freq='2M') - - self.assertEqual(result, expected) - self.assertEqual(result.ordinal, expected.ordinal) - self.assertEqual(result.freq, expected.freq) - for freq in ['2M', offsets.MonthEnd(2)]: - result = p.asfreq(freq, how='S') - expected = Period('2007-01', freq='2M') - - self.assertEqual(result, expected) - self.assertEqual(result.ordinal, expected.ordinal) - self.assertEqual(result.freq, expected.freq) - - p = Period(freq='3A', year=2007) - for freq in ['2M', offsets.MonthEnd(2)]: - result = p.asfreq(freq) - expected = Period('2009-12', freq='2M') - - self.assertEqual(result, expected) - self.assertEqual(result.ordinal, expected.ordinal) - self.assertEqual(result.freq, expected.freq) - for freq in ['2M', offsets.MonthEnd(2)]: - result = p.asfreq(freq, how='S') - expected = Period('2007-01', freq='2M') - - self.assertEqual(result, expected) - self.assertEqual(result.ordinal, expected.ordinal) - self.assertEqual(result.freq, expected.freq) - - def test_asfreq_combined(self): - # normal freq to combined freq - p = Period('2007', freq='H') - - # ordinal will not change - expected = Period('2007', freq='25H') - for freq, how in zip(['1D1H', '1H1D'], ['E', 'S']): - result = p.asfreq(freq, how=how) - self.assertEqual(result, expected) - self.assertEqual(result.ordinal, expected.ordinal) - self.assertEqual(result.freq, expected.freq) - - # combined freq to normal freq - p1 = Period(freq='1D1H', year=2007) - p2 = Period(freq='1H1D', year=2007) - - # ordinal will change because how=E is the default - result1 = p1.asfreq('H') - result2 = p2.asfreq('H') - expected = Period('2007-01-02', freq='H') - self.assertEqual(result1, expected) - self.assertEqual(result1.ordinal, expected.ordinal) - self.assertEqual(result1.freq, expected.freq) - self.assertEqual(result2, expected) - self.assertEqual(result2.ordinal, expected.ordinal) - self.assertEqual(result2.freq, expected.freq) - - # ordinal will not change - result1 = p1.asfreq('H', how='S') - result2 = p2.asfreq('H', how='S') - expected = Period('2007-01-01', freq='H') - self.assertEqual(result1, expected) - self.assertEqual(result1.ordinal, expected.ordinal) - self.assertEqual(result1.freq, expected.freq) - self.assertEqual(result2, expected) - self.assertEqual(result2.ordinal, expected.ordinal) - self.assertEqual(result2.freq, expected.freq) + def test_equal_Raises_Value(self): + with tm.assertRaises(period.IncompatibleFrequency): + self.january1 == self.day - def test_is_leap_year(self): - # GH 13727 - for freq in ['A', 'M', 'D', 'H']: - p = Period('2000-01-01 00:00:00', freq=freq) - self.assertTrue(p.is_leap_year) - self.assertIsInstance(p.is_leap_year, bool) + def test_notEqual(self): + self.assertNotEqual(self.january1, 1) + self.assertNotEqual(self.january1, self.february) - p = Period('1999-01-01 00:00:00', freq=freq) - self.assertFalse(p.is_leap_year) + def test_greater(self): + self.assertTrue(self.february > self.january1) - p = Period('2004-01-01 00:00:00', freq=freq) - self.assertTrue(p.is_leap_year) + def test_greater_Raises_Value(self): + with tm.assertRaises(period.IncompatibleFrequency): + self.january1 > self.day - p = Period('2100-01-01 00:00:00', freq=freq) - self.assertFalse(p.is_leap_year) + def test_greater_Raises_Type(self): + with tm.assertRaises(TypeError): + self.january1 > 1 + + def test_greaterEqual(self): + self.assertTrue(self.january1 >= self.january2) + + def test_greaterEqual_Raises_Value(self): + with tm.assertRaises(period.IncompatibleFrequency): + self.january1 >= self.day + + with tm.assertRaises(TypeError): + print(self.january1 >= 1) + + def test_smallerEqual(self): + self.assertTrue(self.january1 <= self.january2) + + def test_smallerEqual_Raises_Value(self): + with tm.assertRaises(period.IncompatibleFrequency): + self.january1 <= self.day + + def test_smallerEqual_Raises_Type(self): + with tm.assertRaises(TypeError): + self.january1 <= 1 + + def test_smaller(self): + self.assertTrue(self.january1 < self.february) + + def test_smaller_Raises_Value(self): + with tm.assertRaises(period.IncompatibleFrequency): + self.january1 < self.day + + def test_smaller_Raises_Type(self): + with tm.assertRaises(TypeError): + self.january1 < 1 + + def test_sort(self): + periods = [self.march, self.january1, self.february] + correctPeriods = [self.january1, self.february, self.march] + self.assertEqual(sorted(periods), correctPeriods) + + def test_period_nat_comp(self): + p_nat = Period('NaT', freq='D') + p = Period('2011-01-01', freq='D') + + nat = pd.Timestamp('NaT') + t = pd.Timestamp('2011-01-01') + # confirm Period('NaT') work identical with Timestamp('NaT') + for left, right in [(p_nat, p), (p, p_nat), (p_nat, p_nat), (nat, t), + (t, nat), (nat, nat)]: + self.assertEqual(left < right, False) + self.assertEqual(left > right, False) + self.assertEqual(left == right, False) + self.assertEqual(left != right, True) + self.assertEqual(left <= right, False) + self.assertEqual(left >= right, False) class TestMethods(tm.TestCase): diff --git a/pandas/tests/scalar/test_period_asfreq.py b/pandas/tests/scalar/test_period_asfreq.py new file mode 100644 index 0000000000000..d311fef8a826d --- /dev/null +++ b/pandas/tests/scalar/test_period_asfreq.py @@ -0,0 +1,721 @@ +import pandas as pd +from pandas import Period, offsets +from pandas.util import testing as tm +from pandas.tseries.frequencies import _period_code_map + + +class TestFreqConversion(tm.TestCase): + "Test frequency conversion of date objects" + + def test_asfreq_corner(self): + val = Period(freq='A', year=2007) + result1 = val.asfreq('5t') + result2 = val.asfreq('t') + expected = Period('2007-12-31 23:59', freq='t') + self.assertEqual(result1.ordinal, expected.ordinal) + self.assertEqual(result1.freqstr, '5T') + self.assertEqual(result2.ordinal, expected.ordinal) + self.assertEqual(result2.freqstr, 'T') + + def test_conv_annual(self): + # frequency conversion tests: from Annual Frequency + + ival_A = Period(freq='A', year=2007) + + ival_AJAN = Period(freq="A-JAN", year=2007) + ival_AJUN = Period(freq="A-JUN", year=2007) + ival_ANOV = Period(freq="A-NOV", year=2007) + + ival_A_to_Q_start = Period(freq='Q', year=2007, quarter=1) + ival_A_to_Q_end = Period(freq='Q', year=2007, quarter=4) + ival_A_to_M_start = Period(freq='M', year=2007, month=1) + ival_A_to_M_end = Period(freq='M', year=2007, month=12) + ival_A_to_W_start = Period(freq='W', year=2007, month=1, day=1) + ival_A_to_W_end = Period(freq='W', year=2007, month=12, day=31) + ival_A_to_B_start = Period(freq='B', year=2007, month=1, day=1) + ival_A_to_B_end = Period(freq='B', year=2007, month=12, day=31) + ival_A_to_D_start = Period(freq='D', year=2007, month=1, day=1) + ival_A_to_D_end = Period(freq='D', year=2007, month=12, day=31) + ival_A_to_H_start = Period(freq='H', year=2007, month=1, day=1, hour=0) + ival_A_to_H_end = Period(freq='H', year=2007, month=12, day=31, + hour=23) + ival_A_to_T_start = Period(freq='Min', year=2007, month=1, day=1, + hour=0, minute=0) + ival_A_to_T_end = Period(freq='Min', year=2007, month=12, day=31, + hour=23, minute=59) + ival_A_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, + minute=0, second=0) + ival_A_to_S_end = Period(freq='S', year=2007, month=12, day=31, + hour=23, minute=59, second=59) + + ival_AJAN_to_D_end = Period(freq='D', year=2007, month=1, day=31) + ival_AJAN_to_D_start = Period(freq='D', year=2006, month=2, day=1) + ival_AJUN_to_D_end = Period(freq='D', year=2007, month=6, day=30) + ival_AJUN_to_D_start = Period(freq='D', year=2006, month=7, day=1) + ival_ANOV_to_D_end = Period(freq='D', year=2007, month=11, day=30) + ival_ANOV_to_D_start = Period(freq='D', year=2006, month=12, day=1) + + self.assertEqual(ival_A.asfreq('Q', 'S'), ival_A_to_Q_start) + self.assertEqual(ival_A.asfreq('Q', 'e'), ival_A_to_Q_end) + self.assertEqual(ival_A.asfreq('M', 's'), ival_A_to_M_start) + self.assertEqual(ival_A.asfreq('M', 'E'), ival_A_to_M_end) + self.assertEqual(ival_A.asfreq('W', 'S'), ival_A_to_W_start) + self.assertEqual(ival_A.asfreq('W', 'E'), ival_A_to_W_end) + self.assertEqual(ival_A.asfreq('B', 'S'), ival_A_to_B_start) + self.assertEqual(ival_A.asfreq('B', 'E'), ival_A_to_B_end) + self.assertEqual(ival_A.asfreq('D', 'S'), ival_A_to_D_start) + self.assertEqual(ival_A.asfreq('D', 'E'), ival_A_to_D_end) + self.assertEqual(ival_A.asfreq('H', 'S'), ival_A_to_H_start) + self.assertEqual(ival_A.asfreq('H', 'E'), ival_A_to_H_end) + self.assertEqual(ival_A.asfreq('min', 'S'), ival_A_to_T_start) + self.assertEqual(ival_A.asfreq('min', 'E'), ival_A_to_T_end) + self.assertEqual(ival_A.asfreq('T', 'S'), ival_A_to_T_start) + self.assertEqual(ival_A.asfreq('T', 'E'), ival_A_to_T_end) + self.assertEqual(ival_A.asfreq('S', 'S'), ival_A_to_S_start) + self.assertEqual(ival_A.asfreq('S', 'E'), ival_A_to_S_end) + + self.assertEqual(ival_AJAN.asfreq('D', 'S'), ival_AJAN_to_D_start) + self.assertEqual(ival_AJAN.asfreq('D', 'E'), ival_AJAN_to_D_end) + + self.assertEqual(ival_AJUN.asfreq('D', 'S'), ival_AJUN_to_D_start) + self.assertEqual(ival_AJUN.asfreq('D', 'E'), ival_AJUN_to_D_end) + + self.assertEqual(ival_ANOV.asfreq('D', 'S'), ival_ANOV_to_D_start) + self.assertEqual(ival_ANOV.asfreq('D', 'E'), ival_ANOV_to_D_end) + + self.assertEqual(ival_A.asfreq('A'), ival_A) + + def test_conv_quarterly(self): + # frequency conversion tests: from Quarterly Frequency + + ival_Q = Period(freq='Q', year=2007, quarter=1) + ival_Q_end_of_year = Period(freq='Q', year=2007, quarter=4) + + ival_QEJAN = Period(freq="Q-JAN", year=2007, quarter=1) + ival_QEJUN = Period(freq="Q-JUN", year=2007, quarter=1) + + ival_Q_to_A = Period(freq='A', year=2007) + ival_Q_to_M_start = Period(freq='M', year=2007, month=1) + ival_Q_to_M_end = Period(freq='M', year=2007, month=3) + ival_Q_to_W_start = Period(freq='W', year=2007, month=1, day=1) + ival_Q_to_W_end = Period(freq='W', year=2007, month=3, day=31) + ival_Q_to_B_start = Period(freq='B', year=2007, month=1, day=1) + ival_Q_to_B_end = Period(freq='B', year=2007, month=3, day=30) + ival_Q_to_D_start = Period(freq='D', year=2007, month=1, day=1) + ival_Q_to_D_end = Period(freq='D', year=2007, month=3, day=31) + ival_Q_to_H_start = Period(freq='H', year=2007, month=1, day=1, hour=0) + ival_Q_to_H_end = Period(freq='H', year=2007, month=3, day=31, hour=23) + ival_Q_to_T_start = Period(freq='Min', year=2007, month=1, day=1, + hour=0, minute=0) + ival_Q_to_T_end = Period(freq='Min', year=2007, month=3, day=31, + hour=23, minute=59) + ival_Q_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, + minute=0, second=0) + ival_Q_to_S_end = Period(freq='S', year=2007, month=3, day=31, hour=23, + minute=59, second=59) + + ival_QEJAN_to_D_start = Period(freq='D', year=2006, month=2, day=1) + ival_QEJAN_to_D_end = Period(freq='D', year=2006, month=4, day=30) + + ival_QEJUN_to_D_start = Period(freq='D', year=2006, month=7, day=1) + ival_QEJUN_to_D_end = Period(freq='D', year=2006, month=9, day=30) + + self.assertEqual(ival_Q.asfreq('A'), ival_Q_to_A) + self.assertEqual(ival_Q_end_of_year.asfreq('A'), ival_Q_to_A) + + self.assertEqual(ival_Q.asfreq('M', 'S'), ival_Q_to_M_start) + self.assertEqual(ival_Q.asfreq('M', 'E'), ival_Q_to_M_end) + self.assertEqual(ival_Q.asfreq('W', 'S'), ival_Q_to_W_start) + self.assertEqual(ival_Q.asfreq('W', 'E'), ival_Q_to_W_end) + self.assertEqual(ival_Q.asfreq('B', 'S'), ival_Q_to_B_start) + self.assertEqual(ival_Q.asfreq('B', 'E'), ival_Q_to_B_end) + self.assertEqual(ival_Q.asfreq('D', 'S'), ival_Q_to_D_start) + self.assertEqual(ival_Q.asfreq('D', 'E'), ival_Q_to_D_end) + self.assertEqual(ival_Q.asfreq('H', 'S'), ival_Q_to_H_start) + self.assertEqual(ival_Q.asfreq('H', 'E'), ival_Q_to_H_end) + self.assertEqual(ival_Q.asfreq('Min', 'S'), ival_Q_to_T_start) + self.assertEqual(ival_Q.asfreq('Min', 'E'), ival_Q_to_T_end) + self.assertEqual(ival_Q.asfreq('S', 'S'), ival_Q_to_S_start) + self.assertEqual(ival_Q.asfreq('S', 'E'), ival_Q_to_S_end) + + self.assertEqual(ival_QEJAN.asfreq('D', 'S'), ival_QEJAN_to_D_start) + self.assertEqual(ival_QEJAN.asfreq('D', 'E'), ival_QEJAN_to_D_end) + self.assertEqual(ival_QEJUN.asfreq('D', 'S'), ival_QEJUN_to_D_start) + self.assertEqual(ival_QEJUN.asfreq('D', 'E'), ival_QEJUN_to_D_end) + + self.assertEqual(ival_Q.asfreq('Q'), ival_Q) + + def test_conv_monthly(self): + # frequency conversion tests: from Monthly Frequency + + ival_M = Period(freq='M', year=2007, month=1) + ival_M_end_of_year = Period(freq='M', year=2007, month=12) + ival_M_end_of_quarter = Period(freq='M', year=2007, month=3) + ival_M_to_A = Period(freq='A', year=2007) + ival_M_to_Q = Period(freq='Q', year=2007, quarter=1) + ival_M_to_W_start = Period(freq='W', year=2007, month=1, day=1) + ival_M_to_W_end = Period(freq='W', year=2007, month=1, day=31) + ival_M_to_B_start = Period(freq='B', year=2007, month=1, day=1) + ival_M_to_B_end = Period(freq='B', year=2007, month=1, day=31) + ival_M_to_D_start = Period(freq='D', year=2007, month=1, day=1) + ival_M_to_D_end = Period(freq='D', year=2007, month=1, day=31) + ival_M_to_H_start = Period(freq='H', year=2007, month=1, day=1, hour=0) + ival_M_to_H_end = Period(freq='H', year=2007, month=1, day=31, hour=23) + ival_M_to_T_start = Period(freq='Min', year=2007, month=1, day=1, + hour=0, minute=0) + ival_M_to_T_end = Period(freq='Min', year=2007, month=1, day=31, + hour=23, minute=59) + ival_M_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, + minute=0, second=0) + ival_M_to_S_end = Period(freq='S', year=2007, month=1, day=31, hour=23, + minute=59, second=59) + + self.assertEqual(ival_M.asfreq('A'), ival_M_to_A) + self.assertEqual(ival_M_end_of_year.asfreq('A'), ival_M_to_A) + self.assertEqual(ival_M.asfreq('Q'), ival_M_to_Q) + self.assertEqual(ival_M_end_of_quarter.asfreq('Q'), ival_M_to_Q) + + self.assertEqual(ival_M.asfreq('W', 'S'), ival_M_to_W_start) + self.assertEqual(ival_M.asfreq('W', 'E'), ival_M_to_W_end) + self.assertEqual(ival_M.asfreq('B', 'S'), ival_M_to_B_start) + self.assertEqual(ival_M.asfreq('B', 'E'), ival_M_to_B_end) + self.assertEqual(ival_M.asfreq('D', 'S'), ival_M_to_D_start) + self.assertEqual(ival_M.asfreq('D', 'E'), ival_M_to_D_end) + self.assertEqual(ival_M.asfreq('H', 'S'), ival_M_to_H_start) + self.assertEqual(ival_M.asfreq('H', 'E'), ival_M_to_H_end) + self.assertEqual(ival_M.asfreq('Min', 'S'), ival_M_to_T_start) + self.assertEqual(ival_M.asfreq('Min', 'E'), ival_M_to_T_end) + self.assertEqual(ival_M.asfreq('S', 'S'), ival_M_to_S_start) + self.assertEqual(ival_M.asfreq('S', 'E'), ival_M_to_S_end) + + self.assertEqual(ival_M.asfreq('M'), ival_M) + + def test_conv_weekly(self): + # frequency conversion tests: from Weekly Frequency + ival_W = Period(freq='W', year=2007, month=1, day=1) + + ival_WSUN = Period(freq='W', year=2007, month=1, day=7) + ival_WSAT = Period(freq='W-SAT', year=2007, month=1, day=6) + ival_WFRI = Period(freq='W-FRI', year=2007, month=1, day=5) + ival_WTHU = Period(freq='W-THU', year=2007, month=1, day=4) + ival_WWED = Period(freq='W-WED', year=2007, month=1, day=3) + ival_WTUE = Period(freq='W-TUE', year=2007, month=1, day=2) + ival_WMON = Period(freq='W-MON', year=2007, month=1, day=1) + + ival_WSUN_to_D_start = Period(freq='D', year=2007, month=1, day=1) + ival_WSUN_to_D_end = Period(freq='D', year=2007, month=1, day=7) + ival_WSAT_to_D_start = Period(freq='D', year=2006, month=12, day=31) + ival_WSAT_to_D_end = Period(freq='D', year=2007, month=1, day=6) + ival_WFRI_to_D_start = Period(freq='D', year=2006, month=12, day=30) + ival_WFRI_to_D_end = Period(freq='D', year=2007, month=1, day=5) + ival_WTHU_to_D_start = Period(freq='D', year=2006, month=12, day=29) + ival_WTHU_to_D_end = Period(freq='D', year=2007, month=1, day=4) + ival_WWED_to_D_start = Period(freq='D', year=2006, month=12, day=28) + ival_WWED_to_D_end = Period(freq='D', year=2007, month=1, day=3) + ival_WTUE_to_D_start = Period(freq='D', year=2006, month=12, day=27) + ival_WTUE_to_D_end = Period(freq='D', year=2007, month=1, day=2) + ival_WMON_to_D_start = Period(freq='D', year=2006, month=12, day=26) + ival_WMON_to_D_end = Period(freq='D', year=2007, month=1, day=1) + + ival_W_end_of_year = Period(freq='W', year=2007, month=12, day=31) + ival_W_end_of_quarter = Period(freq='W', year=2007, month=3, day=31) + ival_W_end_of_month = Period(freq='W', year=2007, month=1, day=31) + ival_W_to_A = Period(freq='A', year=2007) + ival_W_to_Q = Period(freq='Q', year=2007, quarter=1) + ival_W_to_M = Period(freq='M', year=2007, month=1) + + if Period(freq='D', year=2007, month=12, day=31).weekday == 6: + ival_W_to_A_end_of_year = Period(freq='A', year=2007) + else: + ival_W_to_A_end_of_year = Period(freq='A', year=2008) + + if Period(freq='D', year=2007, month=3, day=31).weekday == 6: + ival_W_to_Q_end_of_quarter = Period(freq='Q', year=2007, quarter=1) + else: + ival_W_to_Q_end_of_quarter = Period(freq='Q', year=2007, quarter=2) + + if Period(freq='D', year=2007, month=1, day=31).weekday == 6: + ival_W_to_M_end_of_month = Period(freq='M', year=2007, month=1) + else: + ival_W_to_M_end_of_month = Period(freq='M', year=2007, month=2) + + ival_W_to_B_start = Period(freq='B', year=2007, month=1, day=1) + ival_W_to_B_end = Period(freq='B', year=2007, month=1, day=5) + ival_W_to_D_start = Period(freq='D', year=2007, month=1, day=1) + ival_W_to_D_end = Period(freq='D', year=2007, month=1, day=7) + ival_W_to_H_start = Period(freq='H', year=2007, month=1, day=1, hour=0) + ival_W_to_H_end = Period(freq='H', year=2007, month=1, day=7, hour=23) + ival_W_to_T_start = Period(freq='Min', year=2007, month=1, day=1, + hour=0, minute=0) + ival_W_to_T_end = Period(freq='Min', year=2007, month=1, day=7, + hour=23, minute=59) + ival_W_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, + minute=0, second=0) + ival_W_to_S_end = Period(freq='S', year=2007, month=1, day=7, hour=23, + minute=59, second=59) + + self.assertEqual(ival_W.asfreq('A'), ival_W_to_A) + self.assertEqual(ival_W_end_of_year.asfreq('A'), + ival_W_to_A_end_of_year) + self.assertEqual(ival_W.asfreq('Q'), ival_W_to_Q) + self.assertEqual(ival_W_end_of_quarter.asfreq('Q'), + ival_W_to_Q_end_of_quarter) + self.assertEqual(ival_W.asfreq('M'), ival_W_to_M) + self.assertEqual(ival_W_end_of_month.asfreq('M'), + ival_W_to_M_end_of_month) + + self.assertEqual(ival_W.asfreq('B', 'S'), ival_W_to_B_start) + self.assertEqual(ival_W.asfreq('B', 'E'), ival_W_to_B_end) + + self.assertEqual(ival_W.asfreq('D', 'S'), ival_W_to_D_start) + self.assertEqual(ival_W.asfreq('D', 'E'), ival_W_to_D_end) + + self.assertEqual(ival_WSUN.asfreq('D', 'S'), ival_WSUN_to_D_start) + self.assertEqual(ival_WSUN.asfreq('D', 'E'), ival_WSUN_to_D_end) + self.assertEqual(ival_WSAT.asfreq('D', 'S'), ival_WSAT_to_D_start) + self.assertEqual(ival_WSAT.asfreq('D', 'E'), ival_WSAT_to_D_end) + self.assertEqual(ival_WFRI.asfreq('D', 'S'), ival_WFRI_to_D_start) + self.assertEqual(ival_WFRI.asfreq('D', 'E'), ival_WFRI_to_D_end) + self.assertEqual(ival_WTHU.asfreq('D', 'S'), ival_WTHU_to_D_start) + self.assertEqual(ival_WTHU.asfreq('D', 'E'), ival_WTHU_to_D_end) + self.assertEqual(ival_WWED.asfreq('D', 'S'), ival_WWED_to_D_start) + self.assertEqual(ival_WWED.asfreq('D', 'E'), ival_WWED_to_D_end) + self.assertEqual(ival_WTUE.asfreq('D', 'S'), ival_WTUE_to_D_start) + self.assertEqual(ival_WTUE.asfreq('D', 'E'), ival_WTUE_to_D_end) + self.assertEqual(ival_WMON.asfreq('D', 'S'), ival_WMON_to_D_start) + self.assertEqual(ival_WMON.asfreq('D', 'E'), ival_WMON_to_D_end) + + self.assertEqual(ival_W.asfreq('H', 'S'), ival_W_to_H_start) + self.assertEqual(ival_W.asfreq('H', 'E'), ival_W_to_H_end) + self.assertEqual(ival_W.asfreq('Min', 'S'), ival_W_to_T_start) + self.assertEqual(ival_W.asfreq('Min', 'E'), ival_W_to_T_end) + self.assertEqual(ival_W.asfreq('S', 'S'), ival_W_to_S_start) + self.assertEqual(ival_W.asfreq('S', 'E'), ival_W_to_S_end) + + self.assertEqual(ival_W.asfreq('W'), ival_W) + + msg = pd.tseries.frequencies._INVALID_FREQ_ERROR + with self.assertRaisesRegexp(ValueError, msg): + ival_W.asfreq('WK') + + def test_conv_weekly_legacy(self): + # frequency conversion tests: from Weekly Frequency + msg = pd.tseries.frequencies._INVALID_FREQ_ERROR + with self.assertRaisesRegexp(ValueError, msg): + Period(freq='WK', year=2007, month=1, day=1) + + with self.assertRaisesRegexp(ValueError, msg): + Period(freq='WK-SAT', year=2007, month=1, day=6) + with self.assertRaisesRegexp(ValueError, msg): + Period(freq='WK-FRI', year=2007, month=1, day=5) + with self.assertRaisesRegexp(ValueError, msg): + Period(freq='WK-THU', year=2007, month=1, day=4) + with self.assertRaisesRegexp(ValueError, msg): + Period(freq='WK-WED', year=2007, month=1, day=3) + with self.assertRaisesRegexp(ValueError, msg): + Period(freq='WK-TUE', year=2007, month=1, day=2) + with self.assertRaisesRegexp(ValueError, msg): + Period(freq='WK-MON', year=2007, month=1, day=1) + + def test_conv_business(self): + # frequency conversion tests: from Business Frequency" + + ival_B = Period(freq='B', year=2007, month=1, day=1) + ival_B_end_of_year = Period(freq='B', year=2007, month=12, day=31) + ival_B_end_of_quarter = Period(freq='B', year=2007, month=3, day=30) + ival_B_end_of_month = Period(freq='B', year=2007, month=1, day=31) + ival_B_end_of_week = Period(freq='B', year=2007, month=1, day=5) + + ival_B_to_A = Period(freq='A', year=2007) + ival_B_to_Q = Period(freq='Q', year=2007, quarter=1) + ival_B_to_M = Period(freq='M', year=2007, month=1) + ival_B_to_W = Period(freq='W', year=2007, month=1, day=7) + ival_B_to_D = Period(freq='D', year=2007, month=1, day=1) + ival_B_to_H_start = Period(freq='H', year=2007, month=1, day=1, hour=0) + ival_B_to_H_end = Period(freq='H', year=2007, month=1, day=1, hour=23) + ival_B_to_T_start = Period(freq='Min', year=2007, month=1, day=1, + hour=0, minute=0) + ival_B_to_T_end = Period(freq='Min', year=2007, month=1, day=1, + hour=23, minute=59) + ival_B_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, + minute=0, second=0) + ival_B_to_S_end = Period(freq='S', year=2007, month=1, day=1, hour=23, + minute=59, second=59) + + self.assertEqual(ival_B.asfreq('A'), ival_B_to_A) + self.assertEqual(ival_B_end_of_year.asfreq('A'), ival_B_to_A) + self.assertEqual(ival_B.asfreq('Q'), ival_B_to_Q) + self.assertEqual(ival_B_end_of_quarter.asfreq('Q'), ival_B_to_Q) + self.assertEqual(ival_B.asfreq('M'), ival_B_to_M) + self.assertEqual(ival_B_end_of_month.asfreq('M'), ival_B_to_M) + self.assertEqual(ival_B.asfreq('W'), ival_B_to_W) + self.assertEqual(ival_B_end_of_week.asfreq('W'), ival_B_to_W) + + self.assertEqual(ival_B.asfreq('D'), ival_B_to_D) + + self.assertEqual(ival_B.asfreq('H', 'S'), ival_B_to_H_start) + self.assertEqual(ival_B.asfreq('H', 'E'), ival_B_to_H_end) + self.assertEqual(ival_B.asfreq('Min', 'S'), ival_B_to_T_start) + self.assertEqual(ival_B.asfreq('Min', 'E'), ival_B_to_T_end) + self.assertEqual(ival_B.asfreq('S', 'S'), ival_B_to_S_start) + self.assertEqual(ival_B.asfreq('S', 'E'), ival_B_to_S_end) + + self.assertEqual(ival_B.asfreq('B'), ival_B) + + def test_conv_daily(self): + # frequency conversion tests: from Business Frequency" + + ival_D = Period(freq='D', year=2007, month=1, day=1) + ival_D_end_of_year = Period(freq='D', year=2007, month=12, day=31) + ival_D_end_of_quarter = Period(freq='D', year=2007, month=3, day=31) + ival_D_end_of_month = Period(freq='D', year=2007, month=1, day=31) + ival_D_end_of_week = Period(freq='D', year=2007, month=1, day=7) + + ival_D_friday = Period(freq='D', year=2007, month=1, day=5) + ival_D_saturday = Period(freq='D', year=2007, month=1, day=6) + ival_D_sunday = Period(freq='D', year=2007, month=1, day=7) + + # TODO: unused? + # ival_D_monday = Period(freq='D', year=2007, month=1, day=8) + + ival_B_friday = Period(freq='B', year=2007, month=1, day=5) + ival_B_monday = Period(freq='B', year=2007, month=1, day=8) + + ival_D_to_A = Period(freq='A', year=2007) + + ival_Deoq_to_AJAN = Period(freq='A-JAN', year=2008) + ival_Deoq_to_AJUN = Period(freq='A-JUN', year=2007) + ival_Deoq_to_ADEC = Period(freq='A-DEC', year=2007) + + ival_D_to_QEJAN = Period(freq="Q-JAN", year=2007, quarter=4) + ival_D_to_QEJUN = Period(freq="Q-JUN", year=2007, quarter=3) + ival_D_to_QEDEC = Period(freq="Q-DEC", year=2007, quarter=1) + + ival_D_to_M = Period(freq='M', year=2007, month=1) + ival_D_to_W = Period(freq='W', year=2007, month=1, day=7) + + ival_D_to_H_start = Period(freq='H', year=2007, month=1, day=1, hour=0) + ival_D_to_H_end = Period(freq='H', year=2007, month=1, day=1, hour=23) + ival_D_to_T_start = Period(freq='Min', year=2007, month=1, day=1, + hour=0, minute=0) + ival_D_to_T_end = Period(freq='Min', year=2007, month=1, day=1, + hour=23, minute=59) + ival_D_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, + minute=0, second=0) + ival_D_to_S_end = Period(freq='S', year=2007, month=1, day=1, hour=23, + minute=59, second=59) + + self.assertEqual(ival_D.asfreq('A'), ival_D_to_A) + + self.assertEqual(ival_D_end_of_quarter.asfreq('A-JAN'), + ival_Deoq_to_AJAN) + self.assertEqual(ival_D_end_of_quarter.asfreq('A-JUN'), + ival_Deoq_to_AJUN) + self.assertEqual(ival_D_end_of_quarter.asfreq('A-DEC'), + ival_Deoq_to_ADEC) + + self.assertEqual(ival_D_end_of_year.asfreq('A'), ival_D_to_A) + self.assertEqual(ival_D_end_of_quarter.asfreq('Q'), ival_D_to_QEDEC) + self.assertEqual(ival_D.asfreq("Q-JAN"), ival_D_to_QEJAN) + self.assertEqual(ival_D.asfreq("Q-JUN"), ival_D_to_QEJUN) + self.assertEqual(ival_D.asfreq("Q-DEC"), ival_D_to_QEDEC) + self.assertEqual(ival_D.asfreq('M'), ival_D_to_M) + self.assertEqual(ival_D_end_of_month.asfreq('M'), ival_D_to_M) + self.assertEqual(ival_D.asfreq('W'), ival_D_to_W) + self.assertEqual(ival_D_end_of_week.asfreq('W'), ival_D_to_W) + + self.assertEqual(ival_D_friday.asfreq('B'), ival_B_friday) + self.assertEqual(ival_D_saturday.asfreq('B', 'S'), ival_B_friday) + self.assertEqual(ival_D_saturday.asfreq('B', 'E'), ival_B_monday) + self.assertEqual(ival_D_sunday.asfreq('B', 'S'), ival_B_friday) + self.assertEqual(ival_D_sunday.asfreq('B', 'E'), ival_B_monday) + + self.assertEqual(ival_D.asfreq('H', 'S'), ival_D_to_H_start) + self.assertEqual(ival_D.asfreq('H', 'E'), ival_D_to_H_end) + self.assertEqual(ival_D.asfreq('Min', 'S'), ival_D_to_T_start) + self.assertEqual(ival_D.asfreq('Min', 'E'), ival_D_to_T_end) + self.assertEqual(ival_D.asfreq('S', 'S'), ival_D_to_S_start) + self.assertEqual(ival_D.asfreq('S', 'E'), ival_D_to_S_end) + + self.assertEqual(ival_D.asfreq('D'), ival_D) + + def test_conv_hourly(self): + # frequency conversion tests: from Hourly Frequency" + + ival_H = Period(freq='H', year=2007, month=1, day=1, hour=0) + ival_H_end_of_year = Period(freq='H', year=2007, month=12, day=31, + hour=23) + ival_H_end_of_quarter = Period(freq='H', year=2007, month=3, day=31, + hour=23) + ival_H_end_of_month = Period(freq='H', year=2007, month=1, day=31, + hour=23) + ival_H_end_of_week = Period(freq='H', year=2007, month=1, day=7, + hour=23) + ival_H_end_of_day = Period(freq='H', year=2007, month=1, day=1, + hour=23) + ival_H_end_of_bus = Period(freq='H', year=2007, month=1, day=1, + hour=23) + + ival_H_to_A = Period(freq='A', year=2007) + ival_H_to_Q = Period(freq='Q', year=2007, quarter=1) + ival_H_to_M = Period(freq='M', year=2007, month=1) + ival_H_to_W = Period(freq='W', year=2007, month=1, day=7) + ival_H_to_D = Period(freq='D', year=2007, month=1, day=1) + ival_H_to_B = Period(freq='B', year=2007, month=1, day=1) + + ival_H_to_T_start = Period(freq='Min', year=2007, month=1, day=1, + hour=0, minute=0) + ival_H_to_T_end = Period(freq='Min', year=2007, month=1, day=1, hour=0, + minute=59) + ival_H_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, + minute=0, second=0) + ival_H_to_S_end = Period(freq='S', year=2007, month=1, day=1, hour=0, + minute=59, second=59) + + self.assertEqual(ival_H.asfreq('A'), ival_H_to_A) + self.assertEqual(ival_H_end_of_year.asfreq('A'), ival_H_to_A) + self.assertEqual(ival_H.asfreq('Q'), ival_H_to_Q) + self.assertEqual(ival_H_end_of_quarter.asfreq('Q'), ival_H_to_Q) + self.assertEqual(ival_H.asfreq('M'), ival_H_to_M) + self.assertEqual(ival_H_end_of_month.asfreq('M'), ival_H_to_M) + self.assertEqual(ival_H.asfreq('W'), ival_H_to_W) + self.assertEqual(ival_H_end_of_week.asfreq('W'), ival_H_to_W) + self.assertEqual(ival_H.asfreq('D'), ival_H_to_D) + self.assertEqual(ival_H_end_of_day.asfreq('D'), ival_H_to_D) + self.assertEqual(ival_H.asfreq('B'), ival_H_to_B) + self.assertEqual(ival_H_end_of_bus.asfreq('B'), ival_H_to_B) + + self.assertEqual(ival_H.asfreq('Min', 'S'), ival_H_to_T_start) + self.assertEqual(ival_H.asfreq('Min', 'E'), ival_H_to_T_end) + self.assertEqual(ival_H.asfreq('S', 'S'), ival_H_to_S_start) + self.assertEqual(ival_H.asfreq('S', 'E'), ival_H_to_S_end) + + self.assertEqual(ival_H.asfreq('H'), ival_H) + + def test_conv_minutely(self): + # frequency conversion tests: from Minutely Frequency" + + ival_T = Period(freq='Min', year=2007, month=1, day=1, hour=0, + minute=0) + ival_T_end_of_year = Period(freq='Min', year=2007, month=12, day=31, + hour=23, minute=59) + ival_T_end_of_quarter = Period(freq='Min', year=2007, month=3, day=31, + hour=23, minute=59) + ival_T_end_of_month = Period(freq='Min', year=2007, month=1, day=31, + hour=23, minute=59) + ival_T_end_of_week = Period(freq='Min', year=2007, month=1, day=7, + hour=23, minute=59) + ival_T_end_of_day = Period(freq='Min', year=2007, month=1, day=1, + hour=23, minute=59) + ival_T_end_of_bus = Period(freq='Min', year=2007, month=1, day=1, + hour=23, minute=59) + ival_T_end_of_hour = Period(freq='Min', year=2007, month=1, day=1, + hour=0, minute=59) + + ival_T_to_A = Period(freq='A', year=2007) + ival_T_to_Q = Period(freq='Q', year=2007, quarter=1) + ival_T_to_M = Period(freq='M', year=2007, month=1) + ival_T_to_W = Period(freq='W', year=2007, month=1, day=7) + ival_T_to_D = Period(freq='D', year=2007, month=1, day=1) + ival_T_to_B = Period(freq='B', year=2007, month=1, day=1) + ival_T_to_H = Period(freq='H', year=2007, month=1, day=1, hour=0) + + ival_T_to_S_start = Period(freq='S', year=2007, month=1, day=1, hour=0, + minute=0, second=0) + ival_T_to_S_end = Period(freq='S', year=2007, month=1, day=1, hour=0, + minute=0, second=59) + + self.assertEqual(ival_T.asfreq('A'), ival_T_to_A) + self.assertEqual(ival_T_end_of_year.asfreq('A'), ival_T_to_A) + self.assertEqual(ival_T.asfreq('Q'), ival_T_to_Q) + self.assertEqual(ival_T_end_of_quarter.asfreq('Q'), ival_T_to_Q) + self.assertEqual(ival_T.asfreq('M'), ival_T_to_M) + self.assertEqual(ival_T_end_of_month.asfreq('M'), ival_T_to_M) + self.assertEqual(ival_T.asfreq('W'), ival_T_to_W) + self.assertEqual(ival_T_end_of_week.asfreq('W'), ival_T_to_W) + self.assertEqual(ival_T.asfreq('D'), ival_T_to_D) + self.assertEqual(ival_T_end_of_day.asfreq('D'), ival_T_to_D) + self.assertEqual(ival_T.asfreq('B'), ival_T_to_B) + self.assertEqual(ival_T_end_of_bus.asfreq('B'), ival_T_to_B) + self.assertEqual(ival_T.asfreq('H'), ival_T_to_H) + self.assertEqual(ival_T_end_of_hour.asfreq('H'), ival_T_to_H) + + self.assertEqual(ival_T.asfreq('S', 'S'), ival_T_to_S_start) + self.assertEqual(ival_T.asfreq('S', 'E'), ival_T_to_S_end) + + self.assertEqual(ival_T.asfreq('Min'), ival_T) + + def test_conv_secondly(self): + # frequency conversion tests: from Secondly Frequency" + + ival_S = Period(freq='S', year=2007, month=1, day=1, hour=0, minute=0, + second=0) + ival_S_end_of_year = Period(freq='S', year=2007, month=12, day=31, + hour=23, minute=59, second=59) + ival_S_end_of_quarter = Period(freq='S', year=2007, month=3, day=31, + hour=23, minute=59, second=59) + ival_S_end_of_month = Period(freq='S', year=2007, month=1, day=31, + hour=23, minute=59, second=59) + ival_S_end_of_week = Period(freq='S', year=2007, month=1, day=7, + hour=23, minute=59, second=59) + ival_S_end_of_day = Period(freq='S', year=2007, month=1, day=1, + hour=23, minute=59, second=59) + ival_S_end_of_bus = Period(freq='S', year=2007, month=1, day=1, + hour=23, minute=59, second=59) + ival_S_end_of_hour = Period(freq='S', year=2007, month=1, day=1, + hour=0, minute=59, second=59) + ival_S_end_of_minute = Period(freq='S', year=2007, month=1, day=1, + hour=0, minute=0, second=59) + + ival_S_to_A = Period(freq='A', year=2007) + ival_S_to_Q = Period(freq='Q', year=2007, quarter=1) + ival_S_to_M = Period(freq='M', year=2007, month=1) + ival_S_to_W = Period(freq='W', year=2007, month=1, day=7) + ival_S_to_D = Period(freq='D', year=2007, month=1, day=1) + ival_S_to_B = Period(freq='B', year=2007, month=1, day=1) + ival_S_to_H = Period(freq='H', year=2007, month=1, day=1, hour=0) + ival_S_to_T = Period(freq='Min', year=2007, month=1, day=1, hour=0, + minute=0) + + self.assertEqual(ival_S.asfreq('A'), ival_S_to_A) + self.assertEqual(ival_S_end_of_year.asfreq('A'), ival_S_to_A) + self.assertEqual(ival_S.asfreq('Q'), ival_S_to_Q) + self.assertEqual(ival_S_end_of_quarter.asfreq('Q'), ival_S_to_Q) + self.assertEqual(ival_S.asfreq('M'), ival_S_to_M) + self.assertEqual(ival_S_end_of_month.asfreq('M'), ival_S_to_M) + self.assertEqual(ival_S.asfreq('W'), ival_S_to_W) + self.assertEqual(ival_S_end_of_week.asfreq('W'), ival_S_to_W) + self.assertEqual(ival_S.asfreq('D'), ival_S_to_D) + self.assertEqual(ival_S_end_of_day.asfreq('D'), ival_S_to_D) + self.assertEqual(ival_S.asfreq('B'), ival_S_to_B) + self.assertEqual(ival_S_end_of_bus.asfreq('B'), ival_S_to_B) + self.assertEqual(ival_S.asfreq('H'), ival_S_to_H) + self.assertEqual(ival_S_end_of_hour.asfreq('H'), ival_S_to_H) + self.assertEqual(ival_S.asfreq('Min'), ival_S_to_T) + self.assertEqual(ival_S_end_of_minute.asfreq('Min'), ival_S_to_T) + + self.assertEqual(ival_S.asfreq('S'), ival_S) + + def test_asfreq_mult(self): + # normal freq to mult freq + p = Period(freq='A', year=2007) + # ordinal will not change + for freq in ['3A', offsets.YearEnd(3)]: + result = p.asfreq(freq) + expected = Period('2007', freq='3A') + + self.assertEqual(result, expected) + self.assertEqual(result.ordinal, expected.ordinal) + self.assertEqual(result.freq, expected.freq) + # ordinal will not change + for freq in ['3A', offsets.YearEnd(3)]: + result = p.asfreq(freq, how='S') + expected = Period('2007', freq='3A') + + self.assertEqual(result, expected) + self.assertEqual(result.ordinal, expected.ordinal) + self.assertEqual(result.freq, expected.freq) + + # mult freq to normal freq + p = Period(freq='3A', year=2007) + # ordinal will change because how=E is the default + for freq in ['A', offsets.YearEnd()]: + result = p.asfreq(freq) + expected = Period('2009', freq='A') + + self.assertEqual(result, expected) + self.assertEqual(result.ordinal, expected.ordinal) + self.assertEqual(result.freq, expected.freq) + # ordinal will not change + for freq in ['A', offsets.YearEnd()]: + result = p.asfreq(freq, how='S') + expected = Period('2007', freq='A') + + self.assertEqual(result, expected) + self.assertEqual(result.ordinal, expected.ordinal) + self.assertEqual(result.freq, expected.freq) + + p = Period(freq='A', year=2007) + for freq in ['2M', offsets.MonthEnd(2)]: + result = p.asfreq(freq) + expected = Period('2007-12', freq='2M') + + self.assertEqual(result, expected) + self.assertEqual(result.ordinal, expected.ordinal) + self.assertEqual(result.freq, expected.freq) + for freq in ['2M', offsets.MonthEnd(2)]: + result = p.asfreq(freq, how='S') + expected = Period('2007-01', freq='2M') + + self.assertEqual(result, expected) + self.assertEqual(result.ordinal, expected.ordinal) + self.assertEqual(result.freq, expected.freq) + + p = Period(freq='3A', year=2007) + for freq in ['2M', offsets.MonthEnd(2)]: + result = p.asfreq(freq) + expected = Period('2009-12', freq='2M') + + self.assertEqual(result, expected) + self.assertEqual(result.ordinal, expected.ordinal) + self.assertEqual(result.freq, expected.freq) + for freq in ['2M', offsets.MonthEnd(2)]: + result = p.asfreq(freq, how='S') + expected = Period('2007-01', freq='2M') + + self.assertEqual(result, expected) + self.assertEqual(result.ordinal, expected.ordinal) + self.assertEqual(result.freq, expected.freq) + + def test_asfreq_combined(self): + # normal freq to combined freq + p = Period('2007', freq='H') + + # ordinal will not change + expected = Period('2007', freq='25H') + for freq, how in zip(['1D1H', '1H1D'], ['E', 'S']): + result = p.asfreq(freq, how=how) + self.assertEqual(result, expected) + self.assertEqual(result.ordinal, expected.ordinal) + self.assertEqual(result.freq, expected.freq) + + # combined freq to normal freq + p1 = Period(freq='1D1H', year=2007) + p2 = Period(freq='1H1D', year=2007) + + # ordinal will change because how=E is the default + result1 = p1.asfreq('H') + result2 = p2.asfreq('H') + expected = Period('2007-01-02', freq='H') + self.assertEqual(result1, expected) + self.assertEqual(result1.ordinal, expected.ordinal) + self.assertEqual(result1.freq, expected.freq) + self.assertEqual(result2, expected) + self.assertEqual(result2.ordinal, expected.ordinal) + self.assertEqual(result2.freq, expected.freq) + + # ordinal will not change + result1 = p1.asfreq('H', how='S') + result2 = p2.asfreq('H', how='S') + expected = Period('2007-01-01', freq='H') + self.assertEqual(result1, expected) + self.assertEqual(result1.ordinal, expected.ordinal) + self.assertEqual(result1.freq, expected.freq) + self.assertEqual(result2, expected) + self.assertEqual(result2.ordinal, expected.ordinal) + self.assertEqual(result2.freq, expected.freq) + + def test_asfreq_MS(self): + initial = Period("2013") + + self.assertEqual(initial.asfreq(freq="M", how="S"), + Period('2013-01', 'M')) + + msg = pd.tseries.frequencies._INVALID_FREQ_ERROR + with self.assertRaisesRegexp(ValueError, msg): + initial.asfreq(freq="MS", how="S") + + with tm.assertRaisesRegexp(ValueError, msg): + pd.Period('2013-01', 'MS') + + self.assertTrue(_period_code_map.get("MS") is None) From c1bd201b270f162ec40229b79493524bcf4734ac Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 9 Feb 2017 11:42:15 -0500 Subject: [PATCH 030/353] DEPR: remove statsmodels as a dependency remove pd.ols, pd.fama_macbeth from top-level namespace xref #11898 closes https://github.com/pandas-dev/pandas2/issues/26 previously deprecated in 0.18.0 Author: Jeff Reback Closes #15353 from jreback/stats and squashes the following commits: 9563740 [Jeff Reback] DEPR: remove statsmodels as a dependency --- ci/requirements-2.7.pip | 1 - ci/requirements-2.7_COMPAT.run | 1 - ci/requirements-2.7_LOCALE.run | 1 - ci/requirements-2.7_SLOW.run | 1 - ci/requirements-3.4_SLOW.run | 1 - doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/api/tests/test_api.py | 4 +- pandas/stats/api.py | 4 - pandas/stats/common.py | 45 - pandas/stats/fama_macbeth.py | 241 ---- pandas/stats/interface.py | 143 --- pandas/stats/math.py | 130 --- pandas/stats/misc.py | 389 ------- pandas/stats/ols.py | 1377 ----------------------- pandas/stats/plm.py | 863 -------------- pandas/stats/tests/__init__.py | 0 pandas/stats/tests/common.py | 162 --- pandas/stats/tests/test_fama_macbeth.py | 68 -- pandas/stats/tests/test_math.py | 59 - pandas/stats/tests/test_ols.py | 968 ---------------- pandas/stats/tests/test_var.py | 94 -- pandas/stats/var.py | 605 ---------- pandas/util/print_versions.py | 1 - setup.py | 1 - 24 files changed, 3 insertions(+), 5158 deletions(-) delete mode 100644 pandas/stats/common.py delete mode 100644 pandas/stats/fama_macbeth.py delete mode 100644 pandas/stats/interface.py delete mode 100644 pandas/stats/math.py delete mode 100644 pandas/stats/misc.py delete mode 100644 pandas/stats/ols.py delete mode 100644 pandas/stats/plm.py delete mode 100644 pandas/stats/tests/__init__.py delete mode 100644 pandas/stats/tests/common.py delete mode 100644 pandas/stats/tests/test_fama_macbeth.py delete mode 100644 pandas/stats/tests/test_math.py delete mode 100644 pandas/stats/tests/test_ols.py delete mode 100644 pandas/stats/tests/test_var.py delete mode 100644 pandas/stats/var.py diff --git a/ci/requirements-2.7.pip b/ci/requirements-2.7.pip index d7266fe88fb32..d16b932c8be4f 100644 --- a/ci/requirements-2.7.pip +++ b/ci/requirements-2.7.pip @@ -1,4 +1,3 @@ -statsmodels blosc httplib2 google-api-python-client==1.2 diff --git a/ci/requirements-2.7_COMPAT.run b/ci/requirements-2.7_COMPAT.run index 32d71beb24388..d27b6a72c2d15 100644 --- a/ci/requirements-2.7_COMPAT.run +++ b/ci/requirements-2.7_COMPAT.run @@ -4,7 +4,6 @@ pytz=2013b scipy=0.11.0 xlwt=0.7.5 xlrd=0.9.2 -statsmodels=0.4.3 bottleneck=0.8.0 numexpr=2.2.2 pytables=3.0.0 diff --git a/ci/requirements-2.7_LOCALE.run b/ci/requirements-2.7_LOCALE.run index 9bb37ee10f8db..1a9b42d832b0b 100644 --- a/ci/requirements-2.7_LOCALE.run +++ b/ci/requirements-2.7_LOCALE.run @@ -13,5 +13,4 @@ html5lib=1.0b2 lxml=3.2.1 scipy=0.11.0 beautiful-soup=4.2.1 -statsmodels=0.4.3 bigquery=2.0.17 diff --git a/ci/requirements-2.7_SLOW.run b/ci/requirements-2.7_SLOW.run index 630d22636f284..c2d2a14285ad6 100644 --- a/ci/requirements-2.7_SLOW.run +++ b/ci/requirements-2.7_SLOW.run @@ -4,7 +4,6 @@ numpy=1.8.2 matplotlib=1.3.1 scipy patsy -statsmodels xlwt openpyxl xlsxwriter diff --git a/ci/requirements-3.4_SLOW.run b/ci/requirements-3.4_SLOW.run index 215f840381ada..39018439a1223 100644 --- a/ci/requirements-3.4_SLOW.run +++ b/ci/requirements-3.4_SLOW.run @@ -17,5 +17,4 @@ sqlalchemy bottleneck pymysql psycopg2 -statsmodels jinja2=2.8 diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 9afcf85c929a7..3fb6f7b0b9a91 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -396,7 +396,7 @@ Removal of prior version deprecations/changes - The ``pandas.io.ga`` module with a ``google-analytics`` interface is removed (:issue:`11308`). Similar functionality can be found in the `Google2Pandas `__ package. - ``pd.to_datetime`` and ``pd.to_timedelta`` have dropped the ``coerce`` parameter in favor of ``errors`` (:issue:`13602`) - +- ``pandas.stats.fama_macbeth``, ``pandas.stats.ols``, ``pandas.stats.plm`` and ``pandas.stats.var``, as well as the top-level ``pandas.fama_macbeth`` and ``pandas.ols`` routines are removed. Similar functionaility can be found in the `statsmodels `__ package. (:issue:`11898`) diff --git a/pandas/api/tests/test_api.py b/pandas/api/tests/test_api.py index 02165d82d4232..a53f6103b408b 100644 --- a/pandas/api/tests/test_api.py +++ b/pandas/api/tests/test_api.py @@ -42,7 +42,7 @@ class TestPDApi(Base, tm.TestCase): 'json', 'lib', 'index', 'parser'] # these are already deprecated; awaiting removal - deprecated_modules = ['ols', 'stats', 'datetools'] + deprecated_modules = ['stats', 'datetools'] # misc misc = ['IndexSlice', 'NaT'] @@ -109,7 +109,7 @@ class TestPDApi(Base, tm.TestCase): 'expanding_max', 'expanding_mean', 'expanding_median', 'expanding_min', 'expanding_quantile', 'expanding_skew', 'expanding_std', 'expanding_sum', - 'expanding_var', 'fama_macbeth', 'rolling_apply', + 'expanding_var', 'rolling_apply', 'rolling_corr', 'rolling_count', 'rolling_cov', 'rolling_kurt', 'rolling_max', 'rolling_mean', 'rolling_median', 'rolling_min', 'rolling_quantile', diff --git a/pandas/stats/api.py b/pandas/stats/api.py index fd81b875faa91..2a11456d4f9e5 100644 --- a/pandas/stats/api.py +++ b/pandas/stats/api.py @@ -2,10 +2,6 @@ Common namespace of statistical functions """ -# pylint: disable-msg=W0611,W0614,W0401 - # flake8: noqa from pandas.stats.moments import * -from pandas.stats.interface import ols -from pandas.stats.fama_macbeth import fama_macbeth diff --git a/pandas/stats/common.py b/pandas/stats/common.py deleted file mode 100644 index be3b842e93cc8..0000000000000 --- a/pandas/stats/common.py +++ /dev/null @@ -1,45 +0,0 @@ - -_WINDOW_TYPES = { - 0: 'full_sample', - 1: 'rolling', - 2: 'expanding' -} -# also allow 'rolling' as key -_WINDOW_TYPES.update((v, v) for k, v in list(_WINDOW_TYPES.items())) -_ADDITIONAL_CLUSTER_TYPES = set(("entity", "time")) - - -def _get_cluster_type(cluster_type): - # this was previous behavior - if cluster_type is None: - return cluster_type - try: - return _get_window_type(cluster_type) - except ValueError: - final_type = str(cluster_type).lower().replace("_", " ") - if final_type in _ADDITIONAL_CLUSTER_TYPES: - return final_type - raise ValueError('Unrecognized cluster type: %s' % cluster_type) - - -def _get_window_type(window_type): - # e.g., 0, 1, 2 - final_type = _WINDOW_TYPES.get(window_type) - # e.g., 'full_sample' - final_type = final_type or _WINDOW_TYPES.get( - str(window_type).lower().replace(" ", "_")) - if final_type is None: - raise ValueError('Unrecognized window type: %s' % window_type) - return final_type - - -def banner(text, width=80): - """ - - """ - toFill = width - len(text) - - left = toFill // 2 - right = toFill - left - - return '%s%s%s' % ('-' * left, text, '-' * right) diff --git a/pandas/stats/fama_macbeth.py b/pandas/stats/fama_macbeth.py deleted file mode 100644 index d564f9cb6c425..0000000000000 --- a/pandas/stats/fama_macbeth.py +++ /dev/null @@ -1,241 +0,0 @@ -from pandas.core.base import StringMixin -from pandas.compat import StringIO, range - -import numpy as np - -from pandas.core.api import Series, DataFrame -import pandas.stats.common as common -from pandas.util.decorators import cache_readonly - -# flake8: noqa - - -def fama_macbeth(**kwargs): - """Runs Fama-MacBeth regression. - - Parameters - ---------- - Takes the same arguments as a panel OLS, in addition to: - - nw_lags_beta: int - Newey-West adjusts the betas by the given lags - """ - window_type = kwargs.get('window_type') - if window_type is None: - klass = FamaMacBeth - else: - klass = MovingFamaMacBeth - - return klass(**kwargs) - - -class FamaMacBeth(StringMixin): - - def __init__(self, y, x, intercept=True, nw_lags=None, - nw_lags_beta=None, - entity_effects=False, time_effects=False, x_effects=None, - cluster=None, dropped_dummies=None, verbose=False): - import warnings - warnings.warn("The pandas.stats.fama_macbeth module is deprecated and will be " - "removed in a future version. We refer to external packages " - "like statsmodels, see here: " - "http://www.statsmodels.org/stable/index.html", - FutureWarning, stacklevel=4) - - if dropped_dummies is None: - dropped_dummies = {} - self._nw_lags_beta = nw_lags_beta - - from pandas.stats.plm import MovingPanelOLS - self._ols_result = MovingPanelOLS( - y=y, x=x, window_type='rolling', window=1, - intercept=intercept, - nw_lags=nw_lags, entity_effects=entity_effects, - time_effects=time_effects, x_effects=x_effects, cluster=cluster, - dropped_dummies=dropped_dummies, verbose=verbose) - - self._cols = self._ols_result._x.columns - - @cache_readonly - def _beta_raw(self): - return self._ols_result._beta_raw - - @cache_readonly - def _stats(self): - return _calc_t_stat(self._beta_raw, self._nw_lags_beta) - - @cache_readonly - def _mean_beta_raw(self): - return self._stats[0] - - @cache_readonly - def _std_beta_raw(self): - return self._stats[1] - - @cache_readonly - def _t_stat_raw(self): - return self._stats[2] - - def _make_result(self, result): - return Series(result, index=self._cols) - - @cache_readonly - def mean_beta(self): - return self._make_result(self._mean_beta_raw) - - @cache_readonly - def std_beta(self): - return self._make_result(self._std_beta_raw) - - @cache_readonly - def t_stat(self): - return self._make_result(self._t_stat_raw) - - @cache_readonly - def _results(self): - return { - 'mean_beta': self._mean_beta_raw, - 'std_beta': self._std_beta_raw, - 't_stat': self._t_stat_raw, - } - - @cache_readonly - def _coef_table(self): - buffer = StringIO() - buffer.write('%13s %13s %13s %13s %13s %13s\n' % - ('Variable', 'Beta', 'Std Err', 't-stat', 'CI 2.5%', 'CI 97.5%')) - template = '%13s %13.4f %13.4f %13.2f %13.4f %13.4f\n' - - for i, name in enumerate(self._cols): - if i and not (i % 5): - buffer.write('\n' + common.banner('')) - - mean_beta = self._results['mean_beta'][i] - std_beta = self._results['std_beta'][i] - t_stat = self._results['t_stat'][i] - ci1 = mean_beta - 1.96 * std_beta - ci2 = mean_beta + 1.96 * std_beta - - values = '(%s)' % name, mean_beta, std_beta, t_stat, ci1, ci2 - - buffer.write(template % values) - - if self._nw_lags_beta is not None: - buffer.write('\n') - buffer.write('*** The Std Err, t-stat are Newey-West ' - 'adjusted with Lags %5d\n' % self._nw_lags_beta) - - return buffer.getvalue() - - def __unicode__(self): - return self.summary - - @cache_readonly - def summary(self): - template = """ -----------------------Summary of Fama-MacBeth Analysis------------------------- - -Formula: Y ~ %(formulaRHS)s -# betas : %(nu)3d - -----------------------Summary of Estimated Coefficients------------------------ -%(coefTable)s ---------------------------------End of Summary--------------------------------- -""" - params = { - 'formulaRHS': ' + '.join(self._cols), - 'nu': len(self._beta_raw), - 'coefTable': self._coef_table, - } - - return template % params - - -class MovingFamaMacBeth(FamaMacBeth): - - def __init__(self, y, x, window_type='rolling', window=10, - intercept=True, nw_lags=None, nw_lags_beta=None, - entity_effects=False, time_effects=False, x_effects=None, - cluster=None, dropped_dummies=None, verbose=False): - if dropped_dummies is None: - dropped_dummies = {} - self._window_type = common._get_window_type(window_type) - self._window = window - - FamaMacBeth.__init__( - self, y=y, x=x, intercept=intercept, - nw_lags=nw_lags, nw_lags_beta=nw_lags_beta, - entity_effects=entity_effects, time_effects=time_effects, - x_effects=x_effects, cluster=cluster, - dropped_dummies=dropped_dummies, verbose=verbose) - - self._index = self._ols_result._index - self._T = len(self._index) - - @property - def _is_rolling(self): - return self._window_type == 'rolling' - - def _calc_stats(self): - mean_betas = [] - std_betas = [] - t_stats = [] - - # XXX - - mask = self._ols_result._rolling_ols_call[2] - obs_total = mask.astype(int).cumsum() - - start = self._window - 1 - betas = self._beta_raw - for i in range(start, self._T): - if self._is_rolling: - begin = i - start - else: - begin = 0 - - B = betas[max(obs_total[begin] - 1, 0): obs_total[i]] - mean_beta, std_beta, t_stat = _calc_t_stat(B, self._nw_lags_beta) - mean_betas.append(mean_beta) - std_betas.append(std_beta) - t_stats.append(t_stat) - - return np.array([mean_betas, std_betas, t_stats]) - - _stats = cache_readonly(_calc_stats) - - def _make_result(self, result): - return DataFrame(result, index=self._result_index, columns=self._cols) - - @cache_readonly - def _result_index(self): - mask = self._ols_result._rolling_ols_call[2] - # HACK XXX - return self._index[mask.cumsum() >= self._window] - - @cache_readonly - def _results(self): - return { - 'mean_beta': self._mean_beta_raw[-1], - 'std_beta': self._std_beta_raw[-1], - 't_stat': self._t_stat_raw[-1], - } - - -def _calc_t_stat(beta, nw_lags_beta): - N = len(beta) - B = beta - beta.mean(0) - C = np.dot(B.T, B) / N - - if nw_lags_beta is not None: - for i in range(nw_lags_beta + 1): - - cov = np.dot(B[i:].T, B[:(N - i)]) / N - weight = i / (nw_lags_beta + 1) - C += 2 * (1 - weight) * cov - - mean_beta = beta.mean(0) - std_beta = np.sqrt(np.diag(C)) / np.sqrt(N) - t_stat = mean_beta / std_beta - - return mean_beta, std_beta, t_stat diff --git a/pandas/stats/interface.py b/pandas/stats/interface.py deleted file mode 100644 index caf468b4f85fe..0000000000000 --- a/pandas/stats/interface.py +++ /dev/null @@ -1,143 +0,0 @@ -from pandas.core.api import Series, DataFrame, Panel, MultiIndex -from pandas.stats.ols import OLS, MovingOLS -from pandas.stats.plm import PanelOLS, MovingPanelOLS, NonPooledPanelOLS -import pandas.stats.common as common - - -def ols(**kwargs): - """Returns the appropriate OLS object depending on whether you need - simple or panel OLS, and a full-sample or rolling/expanding OLS. - - Will be a normal linear regression or a (pooled) panel regression depending - on the type of the inputs: - - y : Series, x : DataFrame -> OLS - y : Series, x : dict of DataFrame -> OLS - y : DataFrame, x : DataFrame -> PanelOLS - y : DataFrame, x : dict of DataFrame/Panel -> PanelOLS - y : Series with MultiIndex, x : Panel/DataFrame + MultiIndex -> PanelOLS - - Parameters - ---------- - y: Series or DataFrame - See above for types - x: Series, DataFrame, dict of Series, dict of DataFrame, Panel - weights : Series or ndarray - The weights are presumed to be (proportional to) the inverse of the - variance of the observations. That is, if the variables are to be - transformed by 1/sqrt(W) you must supply weights = 1/W - intercept: bool - True if you want an intercept. Defaults to True. - nw_lags: None or int - Number of Newey-West lags. Defaults to None. - nw_overlap: bool - Whether there are overlaps in the NW lags. Defaults to False. - window_type: {'full sample', 'rolling', 'expanding'} - 'full sample' by default - window: int - size of window (for rolling/expanding OLS). If window passed and no - explicit window_type, 'rolling" will be used as the window_type - - Panel OLS options: - pool: bool - Whether to run pooled panel regression. Defaults to true. - entity_effects: bool - Whether to account for entity fixed effects. Defaults to false. - time_effects: bool - Whether to account for time fixed effects. Defaults to false. - x_effects: list - List of x's to account for fixed effects. Defaults to none. - dropped_dummies: dict - Key is the name of the variable for the fixed effect. - Value is the value of that variable for which we drop the dummy. - - For entity fixed effects, key equals 'entity'. - - By default, the first dummy is dropped if no dummy is specified. - cluster: {'time', 'entity'} - cluster variances - - Examples - -------- - # Run simple OLS. - result = ols(y=y, x=x) - - # Run rolling simple OLS with window of size 10. - result = ols(y=y, x=x, window_type='rolling', window=10) - print(result.beta) - - result = ols(y=y, x=x, nw_lags=1) - - # Set up LHS and RHS for data across all items - y = A - x = {'B' : B, 'C' : C} - - # Run panel OLS. - result = ols(y=y, x=x) - - # Run expanding panel OLS with window 10 and entity clustering. - result = ols(y=y, x=x, cluster='entity', window_type='expanding', - window=10) - - Returns - ------- - The appropriate OLS object, which allows you to obtain betas and various - statistics, such as std err, t-stat, etc. - """ - - if (kwargs.get('cluster') is not None and - kwargs.get('nw_lags') is not None): - raise ValueError( - 'Pandas OLS does not work with Newey-West correction ' - 'and clustering.') - - pool = kwargs.get('pool') - if 'pool' in kwargs: - del kwargs['pool'] - - window_type = kwargs.get('window_type') - window = kwargs.get('window') - - if window_type is None: - if window is None: - window_type = 'full_sample' - else: - window_type = 'rolling' - else: - window_type = common._get_window_type(window_type) - - if window_type != 'full_sample': - kwargs['window_type'] = common._get_window_type(window_type) - - y = kwargs.get('y') - x = kwargs.get('x') - - panel = False - if isinstance(y, DataFrame) or (isinstance(y, Series) and - isinstance(y.index, MultiIndex)): - panel = True - if isinstance(x, Panel): - panel = True - - if window_type == 'full_sample': - for rolling_field in ('window_type', 'window', 'min_periods'): - if rolling_field in kwargs: - del kwargs[rolling_field] - - if panel: - if pool is False: - klass = NonPooledPanelOLS - else: - klass = PanelOLS - else: - klass = OLS - else: - if panel: - if pool is False: - klass = NonPooledPanelOLS - else: - klass = MovingPanelOLS - else: - klass = MovingOLS - - return klass(**kwargs) diff --git a/pandas/stats/math.py b/pandas/stats/math.py deleted file mode 100644 index 505415bebf89e..0000000000000 --- a/pandas/stats/math.py +++ /dev/null @@ -1,130 +0,0 @@ -# pylint: disable-msg=E1103 -# pylint: disable-msg=W0212 - -from __future__ import division - -from pandas.compat import range -import numpy as np -import numpy.linalg as linalg - - -def rank(X, cond=1.0e-12): - """ - Return the rank of a matrix X based on its generalized inverse, - not the SVD. - """ - X = np.asarray(X) - if len(X.shape) == 2: - import scipy.linalg as SL - D = SL.svdvals(X) - result = np.add.reduce(np.greater(D / D.max(), cond)) - return int(result.astype(np.int32)) - else: - return int(not np.alltrue(np.equal(X, 0.))) - - -def solve(a, b): - """Returns the solution of A X = B.""" - try: - return linalg.solve(a, b) - except linalg.LinAlgError: - return np.dot(linalg.pinv(a), b) - - -def inv(a): - """Returns the inverse of A.""" - try: - return np.linalg.inv(a) - except linalg.LinAlgError: - return np.linalg.pinv(a) - - -def is_psd(m): - eigvals = linalg.eigvals(m) - return np.isreal(eigvals).all() and (eigvals >= 0).all() - - -def newey_west(m, max_lags, nobs, df, nw_overlap=False): - """ - Compute Newey-West adjusted covariance matrix, taking into account - specified number of leads / lags - - Parameters - ---------- - m : (N x K) - max_lags : int - nobs : int - Number of observations in model - df : int - Degrees of freedom in explanatory variables - nw_overlap : boolean, default False - Assume data is overlapping - - Returns - ------- - ndarray (K x K) - - Reference - --------- - Newey, W. K. & West, K. D. (1987) A Simple, Positive - Semi-definite, Heteroskedasticity and Autocorrelation Consistent - Covariance Matrix, Econometrica, vol. 55(3), 703-708 - """ - Xeps = np.dot(m.T, m) - for lag in range(1, max_lags + 1): - auto_cov = np.dot(m[:-lag].T, m[lag:]) - weight = lag / (max_lags + 1) - if nw_overlap: - weight = 0 - bb = auto_cov + auto_cov.T - dd = (1 - weight) * bb - Xeps += dd - - Xeps *= nobs / (nobs - df) - - if nw_overlap and not is_psd(Xeps): - new_max_lags = int(np.ceil(max_lags * 1.5)) -# print('nw_overlap is True and newey_west generated a non positive ' -# 'semidefinite matrix, so using newey_west with max_lags of %d.' -# % new_max_lags) - return newey_west(m, new_max_lags, nobs, df) - - return Xeps - - -def calc_F(R, r, beta, var_beta, nobs, df): - """ - Computes the standard F-test statistic for linear restriction - hypothesis testing - - Parameters - ---------- - R: ndarray (N x N) - Restriction matrix - r: ndarray (N x 1) - Restriction vector - beta: ndarray (N x 1) - Estimated model coefficients - var_beta: ndarray (N x N) - Variance covariance matrix of regressors - nobs: int - Number of observations in model - df: int - Model degrees of freedom - - Returns - ------- - F value, (q, df_resid), p value - """ - from scipy.stats import f - - hyp = np.dot(R, beta.reshape(len(beta), 1)) - r - RSR = np.dot(R, np.dot(var_beta, R.T)) - - q = len(r) - - F = np.dot(hyp.T, np.dot(inv(RSR), hyp)).squeeze() / q - - p_value = 1 - f.cdf(F, q, nobs - df) - - return F, (q, nobs - df), p_value diff --git a/pandas/stats/misc.py b/pandas/stats/misc.py deleted file mode 100644 index 1a077dcb6f9a1..0000000000000 --- a/pandas/stats/misc.py +++ /dev/null @@ -1,389 +0,0 @@ -from numpy import NaN -from pandas import compat -import numpy as np - -from pandas.core.api import Series, DataFrame -from pandas.core.series import remove_na -from pandas.compat import zip, lrange -import pandas.core.common as com - - -def zscore(series): - return (series - series.mean()) / np.std(series, ddof=0) - - -def correl_ts(frame1, frame2): - """ - Pairwise correlation of columns of two DataFrame objects - - Parameters - ---------- - - Returns - ------- - y : Series - """ - results = {} - for col, series in compat.iteritems(frame1): - if col in frame2: - other = frame2[col] - - idx1 = series.valid().index - idx2 = other.valid().index - - common_index = idx1.intersection(idx2) - - seriesStand = zscore(series.reindex(common_index)) - otherStand = zscore(other.reindex(common_index)) - results[col] = (seriesStand * otherStand).mean() - - return Series(results) - - -def correl_xs(frame1, frame2): - return correl_ts(frame1.T, frame2.T) - - -def percentileofscore(a, score, kind='rank'): - """The percentile rank of a score relative to a list of scores. - - A `percentileofscore` of, for example, 80% means that 80% of the - scores in `a` are below the given score. In the case of gaps or - ties, the exact definition depends on the optional keyword, `kind`. - - Parameters - ---------- - a: array like - Array of scores to which `score` is compared. - score: int or float - Score that is compared to the elements in `a`. - kind: {'rank', 'weak', 'strict', 'mean'}, optional - This optional parameter specifies the interpretation of the - resulting score: - - - "rank": Average percentage ranking of score. In case of - multiple matches, average the percentage rankings of - all matching scores. - - "weak": This kind corresponds to the definition of a cumulative - distribution function. A percentileofscore of 80% - means that 80% of values are less than or equal - to the provided score. - - "strict": Similar to "weak", except that only values that are - strictly less than the given score are counted. - - "mean": The average of the "weak" and "strict" scores, often used in - testing. See - - http://en.wikipedia.org/wiki/Percentile_rank - - Returns - ------- - pcos : float - Percentile-position of score (0-100) relative to `a`. - - Examples - -------- - Three-quarters of the given values lie below a given score: - - >>> percentileofscore([1, 2, 3, 4], 3) - 75.0 - - With multiple matches, note how the scores of the two matches, 0.6 - and 0.8 respectively, are averaged: - - >>> percentileofscore([1, 2, 3, 3, 4], 3) - 70.0 - - Only 2/5 values are strictly less than 3: - - >>> percentileofscore([1, 2, 3, 3, 4], 3, kind='strict') - 40.0 - - But 4/5 values are less than or equal to 3: - - >>> percentileofscore([1, 2, 3, 3, 4], 3, kind='weak') - 80.0 - - The average between the weak and the strict scores is - - >>> percentileofscore([1, 2, 3, 3, 4], 3, kind='mean') - 60.0 - - """ - a = np.array(a) - n = len(a) - - if kind == 'rank': - if not(np.any(a == score)): - a = np.append(a, score) - a_len = np.array(lrange(len(a))) - else: - a_len = np.array(lrange(len(a))) + 1.0 - - a = np.sort(a) - idx = [a == score] - pct = (np.mean(a_len[idx]) / n) * 100.0 - return pct - - elif kind == 'strict': - return sum(a < score) / float(n) * 100 - elif kind == 'weak': - return sum(a <= score) / float(n) * 100 - elif kind == 'mean': - return (sum(a < score) + sum(a <= score)) * 50 / float(n) - else: - raise ValueError("kind can only be 'rank', 'strict', 'weak' or 'mean'") - - -def percentileRank(frame, column=None, kind='mean'): - """ - Return score at percentile for each point in time (cross-section) - - Parameters - ---------- - frame: DataFrame - column: string or Series, optional - Column name or specific Series to compute percentiles for. - If not provided, percentiles are computed for all values at each - point in time. Note that this can take a LONG time. - kind: {'rank', 'weak', 'strict', 'mean'}, optional - This optional parameter specifies the interpretation of the - resulting score: - - - "rank": Average percentage ranking of score. In case of - multiple matches, average the percentage rankings of - all matching scores. - - "weak": This kind corresponds to the definition of a cumulative - distribution function. A percentileofscore of 80% - means that 80% of values are less than or equal - to the provided score. - - "strict": Similar to "weak", except that only values that are - strictly less than the given score are counted. - - "mean": The average of the "weak" and "strict" scores, often used in - testing. See - - http://en.wikipedia.org/wiki/Percentile_rank - - Returns - ------- - TimeSeries or DataFrame, depending on input - """ - fun = lambda xs, score: percentileofscore(remove_na(xs), - score, kind=kind) - - results = {} - framet = frame.T - if column is not None: - if isinstance(column, Series): - for date, xs in compat.iteritems(frame.T): - results[date] = fun(xs, column.get(date, NaN)) - else: - for date, xs in compat.iteritems(frame.T): - results[date] = fun(xs, xs[column]) - results = Series(results) - else: - for column in frame.columns: - for date, xs in compat.iteritems(framet): - results.setdefault(date, {})[column] = fun(xs, xs[column]) - results = DataFrame(results).T - return results - - -def bucket(series, k, by=None): - """ - Produce DataFrame representing quantiles of a Series - - Parameters - ---------- - series : Series - k : int - number of quantiles - by : Series or same-length array - bucket by value - - Returns - ------- - DataFrame - """ - if by is None: - by = series - else: - by = by.reindex(series.index) - - split = _split_quantile(by, k) - mat = np.empty((len(series), k), dtype=float) * np.NaN - - for i, v in enumerate(split): - mat[:, i][v] = series.take(v) - - return DataFrame(mat, index=series.index, columns=np.arange(k) + 1) - - -def _split_quantile(arr, k): - arr = np.asarray(arr) - mask = np.isfinite(arr) - order = arr[mask].argsort() - n = len(arr) - - return np.array_split(np.arange(n)[mask].take(order), k) - - -def bucketcat(series, cats): - """ - Produce DataFrame representing quantiles of a Series - - Parameters - ---------- - series : Series - cat : Series or same-length array - bucket by category; mutually exclusive with 'by' - - Returns - ------- - DataFrame - """ - if not isinstance(series, Series): - series = Series(series, index=np.arange(len(series))) - - cats = np.asarray(cats) - - unique_labels = np.unique(cats) - unique_labels = unique_labels[com.notnull(unique_labels)] - - # group by - data = {} - - for label in unique_labels: - data[label] = series[cats == label] - - return DataFrame(data, columns=unique_labels) - - -def bucketpanel(series, bins=None, by=None, cat=None): - """ - Bucket data by two Series to create summary panel - - Parameters - ---------- - series : Series - bins : tuple (length-2) - e.g. (2, 2) - by : tuple of Series - bucket by value - cat : tuple of Series - bucket by category; mutually exclusive with 'by' - - Returns - ------- - DataFrame - """ - use_by = by is not None - use_cat = cat is not None - - if use_by and use_cat: - raise Exception('must specify by or cat, but not both') - elif use_by: - if len(by) != 2: - raise Exception('must provide two bucketing series') - - xby, yby = by - xbins, ybins = bins - - return _bucketpanel_by(series, xby, yby, xbins, ybins) - - elif use_cat: - xcat, ycat = cat - return _bucketpanel_cat(series, xcat, ycat) - else: - raise Exception('must specify either values or categories ' - 'to bucket by') - - -def _bucketpanel_by(series, xby, yby, xbins, ybins): - xby = xby.reindex(series.index) - yby = yby.reindex(series.index) - - xlabels = _bucket_labels(xby.reindex(series.index), xbins) - ylabels = _bucket_labels(yby.reindex(series.index), ybins) - - labels = _uniquify(xlabels, ylabels, xbins, ybins) - - mask = com.isnull(labels) - labels[mask] = -1 - - unique_labels = np.unique(labels) - bucketed = bucketcat(series, labels) - - _ulist = list(labels) - index_map = dict((x, _ulist.index(x)) for x in unique_labels) - - def relabel(key): - pos = index_map[key] - - xlab = xlabels[pos] - ylab = ylabels[pos] - - return '%sx%s' % (int(xlab) if com.notnull(xlab) else 'NULL', - int(ylab) if com.notnull(ylab) else 'NULL') - - return bucketed.rename(columns=relabel) - - -def _bucketpanel_cat(series, xcat, ycat): - xlabels, xmapping = _intern(xcat) - ylabels, ymapping = _intern(ycat) - - shift = 10 ** (np.ceil(np.log10(ylabels.max()))) - labels = xlabels * shift + ylabels - - sorter = labels.argsort() - sorted_labels = labels.take(sorter) - sorted_xlabels = xlabels.take(sorter) - sorted_ylabels = ylabels.take(sorter) - - unique_labels = np.unique(labels) - unique_labels = unique_labels[com.notnull(unique_labels)] - - locs = sorted_labels.searchsorted(unique_labels) - xkeys = sorted_xlabels.take(locs) - ykeys = sorted_ylabels.take(locs) - - stringified = ['(%s, %s)' % arg - for arg in zip(xmapping.take(xkeys), ymapping.take(ykeys))] - - result = bucketcat(series, labels) - result.columns = stringified - - return result - - -def _intern(values): - # assumed no NaN values - values = np.asarray(values) - - uniqued = np.unique(values) - labels = uniqued.searchsorted(values) - return labels, uniqued - - -def _uniquify(xlabels, ylabels, xbins, ybins): - # encode the stuff, create unique label - shifter = 10 ** max(xbins, ybins) - _xpiece = xlabels * shifter - _ypiece = ylabels - - return _xpiece + _ypiece - - -def _bucket_labels(series, k): - arr = np.asarray(series) - mask = np.isfinite(arr) - order = arr[mask].argsort() - n = len(series) - - split = np.array_split(np.arange(n)[mask].take(order), k) - - mat = np.empty(n, dtype=float) * np.NaN - for i, v in enumerate(split): - mat[v] = i - - return mat + 1 diff --git a/pandas/stats/ols.py b/pandas/stats/ols.py deleted file mode 100644 index 96ec70d59488a..0000000000000 --- a/pandas/stats/ols.py +++ /dev/null @@ -1,1377 +0,0 @@ -""" -Ordinary least squares regression -""" - -# pylint: disable-msg=W0201 - -# flake8: noqa - -from pandas.compat import zip, range, StringIO -from itertools import starmap -from pandas import compat -import numpy as np - -from pandas.core.api import DataFrame, Series, isnull -from pandas.core.base import StringMixin -from pandas.types.common import _ensure_float64 -from pandas.core.index import MultiIndex -from pandas.core.panel import Panel -from pandas.util.decorators import cache_readonly - -import pandas.stats.common as scom -import pandas.stats.math as math -import pandas.stats.moments as moments - -_FP_ERR = 1e-8 - - -class OLS(StringMixin): - """ - Runs a full sample ordinary least squares regression. - - Parameters - ---------- - y : Series - x : Series, DataFrame, dict of Series - intercept : bool - True if you want an intercept. - weights : array-like, optional - 1d array of weights. If you supply 1/W then the variables are pre- - multiplied by 1/sqrt(W). If no weights are supplied the default value - is 1 and WLS reults are the same as OLS. - nw_lags : None or int - Number of Newey-West lags. - nw_overlap : boolean, default False - Assume data is overlapping when computing Newey-West estimator - - """ - _panel_model = False - - def __init__(self, y, x, intercept=True, weights=None, nw_lags=None, - nw_overlap=False): - import warnings - warnings.warn("The pandas.stats.ols module is deprecated and will be " - "removed in a future version. We refer to external packages " - "like statsmodels, see some examples here: " - "http://www.statsmodels.org/stable/regression.html", - FutureWarning, stacklevel=4) - - try: - import statsmodels.api as sm - except ImportError: - import scikits.statsmodels.api as sm - - self._x_orig = x - self._y_orig = y - self._weights_orig = weights - self._intercept = intercept - self._nw_lags = nw_lags - self._nw_overlap = nw_overlap - - (self._y, self._x, self._weights, self._x_filtered, - self._index, self._time_has_obs) = self._prepare_data() - - if self._weights is not None: - self._x_trans = self._x.mul(np.sqrt(self._weights), axis=0) - self._y_trans = self._y * np.sqrt(self._weights) - self.sm_ols = sm.WLS(self._y.get_values(), - self._x.get_values(), - weights=self._weights.values).fit() - else: - self._x_trans = self._x - self._y_trans = self._y - self.sm_ols = sm.OLS(self._y.get_values(), - self._x.get_values()).fit() - - def _prepare_data(self): - """ - Cleans the input for single OLS. - - Parameters - ---------- - lhs: Series - Dependent variable in the regression. - rhs: dict, whose values are Series, DataFrame, or dict - Explanatory variables of the regression. - - Returns - ------- - Series, DataFrame - Cleaned lhs and rhs - """ - (filt_lhs, filt_rhs, filt_weights, - pre_filt_rhs, index, valid) = _filter_data(self._y_orig, self._x_orig, - self._weights_orig) - if self._intercept: - filt_rhs['intercept'] = 1. - pre_filt_rhs['intercept'] = 1. - - if hasattr(filt_weights, 'to_dense'): - filt_weights = filt_weights.to_dense() - - return (filt_lhs, filt_rhs, filt_weights, - pre_filt_rhs, index, valid) - - @property - def nobs(self): - return self._nobs - - @property - def _nobs(self): - return len(self._y) - - @property - def nw_lags(self): - return self._nw_lags - - @property - def x(self): - """Returns the filtered x used in the regression.""" - return self._x - - @property - def y(self): - """Returns the filtered y used in the regression.""" - return self._y - - @cache_readonly - def _beta_raw(self): - """Runs the regression and returns the beta.""" - return self.sm_ols.params - - @cache_readonly - def beta(self): - """Returns the betas in Series form.""" - return Series(self._beta_raw, index=self._x.columns) - - @cache_readonly - def _df_raw(self): - """Returns the degrees of freedom.""" - return math.rank(self._x.values) - - @cache_readonly - def df(self): - """Returns the degrees of freedom. - - This equals the rank of the X matrix. - """ - return self._df_raw - - @cache_readonly - def _df_model_raw(self): - """Returns the raw model degrees of freedom.""" - return self.sm_ols.df_model - - @cache_readonly - def df_model(self): - """Returns the degrees of freedom of the model.""" - return self._df_model_raw - - @cache_readonly - def _df_resid_raw(self): - """Returns the raw residual degrees of freedom.""" - return self.sm_ols.df_resid - - @cache_readonly - def df_resid(self): - """Returns the degrees of freedom of the residuals.""" - return self._df_resid_raw - - @cache_readonly - def _f_stat_raw(self): - """Returns the raw f-stat value.""" - from scipy.stats import f - - cols = self._x.columns - - if self._nw_lags is None: - F = self._r2_raw / (self._r2_raw - self._r2_adj_raw) - - q = len(cols) - if 'intercept' in cols: - q -= 1 - - shape = q, self.df_resid - p_value = 1 - f.cdf(F, shape[0], shape[1]) - return F, shape, p_value - - k = len(cols) - R = np.eye(k) - r = np.zeros((k, 1)) - - try: - intercept = cols.get_loc('intercept') - R = np.concatenate((R[0: intercept], R[intercept + 1:])) - r = np.concatenate((r[0: intercept], r[intercept + 1:])) - except KeyError: - # no intercept - pass - - return math.calc_F(R, r, self._beta_raw, self._var_beta_raw, - self._nobs, self.df) - - @cache_readonly - def f_stat(self): - """Returns the f-stat value.""" - return f_stat_to_dict(self._f_stat_raw) - - def f_test(self, hypothesis): - """Runs the F test, given a joint hypothesis. The hypothesis is - represented by a collection of equations, in the form - - A*x_1+B*x_2=C - - You must provide the coefficients even if they're 1. No spaces. - - The equations can be passed as either a single string or a - list of strings. - - Examples - -------- - o = ols(...) - o.f_test('1*x1+2*x2=0,1*x3=0') - o.f_test(['1*x1+2*x2=0','1*x3=0']) - """ - - x_names = self._x.columns - - R = [] - r = [] - - if isinstance(hypothesis, str): - eqs = hypothesis.split(',') - elif isinstance(hypothesis, list): - eqs = hypothesis - else: # pragma: no cover - raise Exception('hypothesis must be either string or list') - for equation in eqs: - row = np.zeros(len(x_names)) - lhs, rhs = equation.split('=') - for s in lhs.split('+'): - ss = s.split('*') - coeff = float(ss[0]) - x_name = ss[1] - - if x_name not in x_names: - raise Exception('no coefficient named %s' % x_name) - idx = x_names.get_loc(x_name) - row[idx] = coeff - rhs = float(rhs) - - R.append(row) - r.append(rhs) - - R = np.array(R) - q = len(r) - r = np.array(r).reshape(q, 1) - - result = math.calc_F(R, r, self._beta_raw, self._var_beta_raw, - self._nobs, self.df) - - return f_stat_to_dict(result) - - @cache_readonly - def _p_value_raw(self): - """Returns the raw p values.""" - from scipy.stats import t - - return 2 * t.sf(np.fabs(self._t_stat_raw), - self._df_resid_raw) - - @cache_readonly - def p_value(self): - """Returns the p values.""" - return Series(self._p_value_raw, index=self.beta.index) - - @cache_readonly - def _r2_raw(self): - """Returns the raw r-squared values.""" - if self._use_centered_tss: - return 1 - self.sm_ols.ssr / self.sm_ols.centered_tss - else: - return 1 - self.sm_ols.ssr / self.sm_ols.uncentered_tss - - @property - def _use_centered_tss(self): - # has_intercept = np.abs(self._resid_raw.sum()) < _FP_ERR - return self._intercept - - @cache_readonly - def r2(self): - """Returns the r-squared values.""" - return self._r2_raw - - @cache_readonly - def _r2_adj_raw(self): - """Returns the raw r-squared adjusted values.""" - return self.sm_ols.rsquared_adj - - @cache_readonly - def r2_adj(self): - """Returns the r-squared adjusted values.""" - return self._r2_adj_raw - - @cache_readonly - def _resid_raw(self): - """Returns the raw residuals.""" - return self.sm_ols.resid - - @cache_readonly - def resid(self): - """Returns the residuals.""" - return Series(self._resid_raw, index=self._x.index) - - @cache_readonly - def _rmse_raw(self): - """Returns the raw rmse values.""" - return np.sqrt(self.sm_ols.mse_resid) - - @cache_readonly - def rmse(self): - """Returns the rmse value.""" - return self._rmse_raw - - @cache_readonly - def _std_err_raw(self): - """Returns the raw standard err values.""" - return np.sqrt(np.diag(self._var_beta_raw)) - - @cache_readonly - def std_err(self): - """Returns the standard err values of the betas.""" - return Series(self._std_err_raw, index=self.beta.index) - - @cache_readonly - def _t_stat_raw(self): - """Returns the raw t-stat value.""" - return self._beta_raw / self._std_err_raw - - @cache_readonly - def t_stat(self): - """Returns the t-stat values of the betas.""" - return Series(self._t_stat_raw, index=self.beta.index) - - @cache_readonly - def _var_beta_raw(self): - """ - Returns the raw covariance of beta. - """ - x = self._x.values - y = self._y.values - - xx = np.dot(x.T, x) - - if self._nw_lags is None: - return math.inv(xx) * (self._rmse_raw ** 2) - else: - resid = y - np.dot(x, self._beta_raw) - m = (x.T * resid).T - - xeps = math.newey_west(m, self._nw_lags, self._nobs, self._df_raw, - self._nw_overlap) - - xx_inv = math.inv(xx) - return np.dot(xx_inv, np.dot(xeps, xx_inv)) - - @cache_readonly - def var_beta(self): - """Returns the variance-covariance matrix of beta.""" - return DataFrame(self._var_beta_raw, index=self.beta.index, - columns=self.beta.index) - - @cache_readonly - def _y_fitted_raw(self): - """Returns the raw fitted y values.""" - if self._weights is None: - X = self._x_filtered.values - else: - # XXX - return self.sm_ols.fittedvalues - - b = self._beta_raw - return np.dot(X, b) - - @cache_readonly - def y_fitted(self): - """Returns the fitted y values. This equals BX.""" - if self._weights is None: - index = self._x_filtered.index - orig_index = index - else: - index = self._y.index - orig_index = self._y_orig.index - - result = Series(self._y_fitted_raw, index=index) - return result.reindex(orig_index) - - @cache_readonly - def _y_predict_raw(self): - """Returns the raw predicted y values.""" - return self._y_fitted_raw - - @cache_readonly - def y_predict(self): - """Returns the predicted y values. - - For in-sample, this is same as y_fitted.""" - return self.y_fitted - - def predict(self, beta=None, x=None, fill_value=None, - fill_method=None, axis=0): - """ - Parameters - ---------- - beta : Series - x : Series or DataFrame - fill_value : scalar or dict, default None - fill_method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None - axis : {0, 1}, default 0 - See DataFrame.fillna for more details - - Notes - ----- - 1. If both fill_value and fill_method are None then NaNs are dropped - (this is the default behavior) - 2. An intercept will be automatically added to the new_y_values if - the model was fitted using an intercept - - Returns - ------- - Series of predicted values - """ - if beta is None and x is None: - return self.y_predict - - if beta is None: - beta = self.beta - else: - beta = beta.reindex(self.beta.index) - if isnull(beta).any(): - raise ValueError('Must supply betas for same variables') - - if x is None: - x = self._x - orig_x = x - else: - orig_x = x - if fill_value is None and fill_method is None: - x = x.dropna(how='any') - else: - x = x.fillna(value=fill_value, method=fill_method, axis=axis) - if isinstance(x, Series): - x = DataFrame({'x': x}) - if self._intercept: - x['intercept'] = 1. - - x = x.reindex(columns=self._x.columns) - - rs = np.dot(x.values, beta.values) - return Series(rs, x.index).reindex(orig_x.index) - - RESULT_FIELDS = ['r2', 'r2_adj', 'df', 'df_model', 'df_resid', 'rmse', - 'f_stat', 'beta', 'std_err', 't_stat', 'p_value', 'nobs'] - - @cache_readonly - def _results(self): - results = {} - for result in self.RESULT_FIELDS: - results[result] = getattr(self, result) - - return results - - @cache_readonly - def _coef_table(self): - buf = StringIO() - - buf.write('%14s %10s %10s %10s %10s %10s %10s\n' % - ('Variable', 'Coef', 'Std Err', 't-stat', - 'p-value', 'CI 2.5%', 'CI 97.5%')) - buf.write(scom.banner('')) - coef_template = '\n%14s %10.4f %10.4f %10.2f %10.4f %10.4f %10.4f' - - results = self._results - - beta = results['beta'] - - for i, name in enumerate(beta.index): - if i and not (i % 5): - buf.write('\n' + scom.banner('')) - - std_err = results['std_err'][name] - CI1 = beta[name] - 1.96 * std_err - CI2 = beta[name] + 1.96 * std_err - - t_stat = results['t_stat'][name] - p_value = results['p_value'][name] - - line = coef_template % (name, - beta[name], std_err, t_stat, p_value, CI1, CI2) - - buf.write(line) - - if self.nw_lags is not None: - buf.write('\n') - buf.write('*** The calculations are Newey-West ' - 'adjusted with lags %5d\n' % self.nw_lags) - - return buf.getvalue() - - @cache_readonly - def summary_as_matrix(self): - """Returns the formatted results of the OLS as a DataFrame.""" - results = self._results - beta = results['beta'] - data = {'beta': results['beta'], - 't-stat': results['t_stat'], - 'p-value': results['p_value'], - 'std err': results['std_err']} - return DataFrame(data, beta.index).T - - @cache_readonly - def summary(self): - """ - This returns the formatted result of the OLS computation - """ - template = """ -%(bannerTop)s - -Formula: Y ~ %(formula)s - -Number of Observations: %(nobs)d -Number of Degrees of Freedom: %(df)d - -R-squared: %(r2)10.4f -Adj R-squared: %(r2_adj)10.4f - -Rmse: %(rmse)10.4f - -F-stat %(f_stat_shape)s: %(f_stat)10.4f, p-value: %(f_stat_p_value)10.4f - -Degrees of Freedom: model %(df_model)d, resid %(df_resid)d - -%(bannerCoef)s -%(coef_table)s -%(bannerEnd)s -""" - coef_table = self._coef_table - - results = self._results - - f_stat = results['f_stat'] - - bracketed = ['<%s>' % str(c) for c in results['beta'].index] - - formula = StringIO() - formula.write(bracketed[0]) - tot = len(bracketed[0]) - line = 1 - for coef in bracketed[1:]: - tot = tot + len(coef) + 3 - - if tot // (68 * line): - formula.write('\n' + ' ' * 12) - line += 1 - - formula.write(' + ' + coef) - - params = { - 'bannerTop': scom.banner('Summary of Regression Analysis'), - 'bannerCoef': scom.banner('Summary of Estimated Coefficients'), - 'bannerEnd': scom.banner('End of Summary'), - 'formula': formula.getvalue(), - 'r2': results['r2'], - 'r2_adj': results['r2_adj'], - 'nobs': results['nobs'], - 'df': results['df'], - 'df_model': results['df_model'], - 'df_resid': results['df_resid'], - 'coef_table': coef_table, - 'rmse': results['rmse'], - 'f_stat': f_stat['f-stat'], - 'f_stat_shape': '(%d, %d)' % (f_stat['DF X'], f_stat['DF Resid']), - 'f_stat_p_value': f_stat['p-value'], - } - - return template % params - - def __unicode__(self): - return self.summary - - @cache_readonly - def _time_obs_count(self): - # XXX - return self._time_has_obs.astype(int) - - @property - def _total_times(self): - return self._time_has_obs.sum() - - -class MovingOLS(OLS): - """ - Runs a rolling/expanding simple OLS. - - Parameters - ---------- - y : Series - x : Series, DataFrame, or dict of Series - weights : array-like, optional - 1d array of weights. If None, equivalent to an unweighted OLS. - window_type : {'full sample', 'rolling', 'expanding'} - Default expanding - window : int - size of window (for rolling/expanding OLS) - min_periods : int - Threshold of non-null data points to require. - If None, defaults to size of window for window_type='rolling' and 1 - otherwise - intercept : bool - True if you want an intercept. - nw_lags : None or int - Number of Newey-West lags. - nw_overlap : boolean, default False - Assume data is overlapping when computing Newey-West estimator - - """ - - def __init__(self, y, x, weights=None, window_type='expanding', - window=None, min_periods=None, intercept=True, - nw_lags=None, nw_overlap=False): - - self._args = dict(intercept=intercept, nw_lags=nw_lags, - nw_overlap=nw_overlap) - - OLS.__init__(self, y=y, x=x, weights=weights, **self._args) - - self._set_window(window_type, window, min_periods) - - def _set_window(self, window_type, window, min_periods): - self._window_type = scom._get_window_type(window_type) - - if self._is_rolling: - if window is None: - raise AssertionError("Must specify window.") - if min_periods is None: - min_periods = window - else: - window = len(self._x) - if min_periods is None: - min_periods = 1 - - self._window = int(window) - self._min_periods = min_periods - -#------------------------------------------------------------------------------ -# "Public" results - - @cache_readonly - def beta(self): - """Returns the betas in Series/DataFrame form.""" - return DataFrame(self._beta_raw, - index=self._result_index, - columns=self._x.columns) - - @cache_readonly - def rank(self): - return Series(self._rank_raw, index=self._result_index) - - @cache_readonly - def df(self): - """Returns the degrees of freedom.""" - return Series(self._df_raw, index=self._result_index) - - @cache_readonly - def df_model(self): - """Returns the model degrees of freedom.""" - return Series(self._df_model_raw, index=self._result_index) - - @cache_readonly - def df_resid(self): - """Returns the residual degrees of freedom.""" - return Series(self._df_resid_raw, index=self._result_index) - - @cache_readonly - def f_stat(self): - """Returns the f-stat value.""" - f_stat_dicts = dict((date, f_stat_to_dict(f_stat)) - for date, f_stat in zip(self.beta.index, - self._f_stat_raw)) - - return DataFrame(f_stat_dicts).T - - def f_test(self, hypothesis): - raise NotImplementedError('must use full sample') - - @cache_readonly - def forecast_mean(self): - return Series(self._forecast_mean_raw, index=self._result_index) - - @cache_readonly - def forecast_vol(self): - return Series(self._forecast_vol_raw, index=self._result_index) - - @cache_readonly - def p_value(self): - """Returns the p values.""" - cols = self.beta.columns - return DataFrame(self._p_value_raw, columns=cols, - index=self._result_index) - - @cache_readonly - def r2(self): - """Returns the r-squared values.""" - return Series(self._r2_raw, index=self._result_index) - - @cache_readonly - def resid(self): - """Returns the residuals.""" - return Series(self._resid_raw[self._valid_obs_labels], - index=self._result_index) - - @cache_readonly - def r2_adj(self): - """Returns the r-squared adjusted values.""" - index = self.r2.index - - return Series(self._r2_adj_raw, index=index) - - @cache_readonly - def rmse(self): - """Returns the rmse values.""" - return Series(self._rmse_raw, index=self._result_index) - - @cache_readonly - def std_err(self): - """Returns the standard err values.""" - return DataFrame(self._std_err_raw, columns=self.beta.columns, - index=self._result_index) - - @cache_readonly - def t_stat(self): - """Returns the t-stat value.""" - return DataFrame(self._t_stat_raw, columns=self.beta.columns, - index=self._result_index) - - @cache_readonly - def var_beta(self): - """Returns the covariance of beta.""" - result = {} - result_index = self._result_index - for i in range(len(self._var_beta_raw)): - dm = DataFrame(self._var_beta_raw[i], columns=self.beta.columns, - index=self.beta.columns) - result[result_index[i]] = dm - - return Panel.from_dict(result, intersect=False) - - @cache_readonly - def y_fitted(self): - """Returns the fitted y values.""" - return Series(self._y_fitted_raw[self._valid_obs_labels], - index=self._result_index) - - @cache_readonly - def y_predict(self): - """Returns the predicted y values.""" - return Series(self._y_predict_raw[self._valid_obs_labels], - index=self._result_index) - -#------------------------------------------------------------------------------ -# "raw" attributes, calculations - - @property - def _is_rolling(self): - return self._window_type == 'rolling' - - @cache_readonly - def _beta_raw(self): - """Runs the regression and returns the beta.""" - beta, indices, mask = self._rolling_ols_call - - return beta[indices] - - @cache_readonly - def _result_index(self): - return self._index[self._valid_indices] - - @property - def _valid_indices(self): - return self._rolling_ols_call[1] - - @cache_readonly - def _rolling_ols_call(self): - return self._calc_betas(self._x_trans, self._y_trans) - - def _calc_betas(self, x, y): - N = len(self._index) - K = len(self._x.columns) - - betas = np.empty((N, K), dtype=float) - betas[:] = np.NaN - - valid = self._time_has_obs - enough = self._enough_obs - window = self._window - - # Use transformed (demeaned) Y, X variables - cum_xx = self._cum_xx(x) - cum_xy = self._cum_xy(x, y) - - for i in range(N): - if not valid[i] or not enough[i]: - continue - - xx = cum_xx[i] - xy = cum_xy[i] - if self._is_rolling and i >= window: - xx = xx - cum_xx[i - window] - xy = xy - cum_xy[i - window] - - betas[i] = math.solve(xx, xy) - - mask = ~np.isnan(betas).any(axis=1) - have_betas = np.arange(N)[mask] - - return betas, have_betas, mask - - def _rolling_rank(self): - dates = self._index - window = self._window - - ranks = np.empty(len(dates), dtype=float) - ranks[:] = np.NaN - for i, date in enumerate(dates): - if self._is_rolling and i >= window: - prior_date = dates[i - window + 1] - else: - prior_date = dates[0] - - x_slice = self._x.truncate(before=prior_date, after=date).values - - if len(x_slice) == 0: - continue - - ranks[i] = math.rank(x_slice) - - return ranks - - def _cum_xx(self, x): - dates = self._index - K = len(x.columns) - valid = self._time_has_obs - cum_xx = [] - - slicer = lambda df, dt: df.truncate(dt, dt).values - if not self._panel_model: - _get_index = x.index.get_loc - - def slicer(df, dt): - i = _get_index(dt) - return df.values[i:i + 1, :] - - last = np.zeros((K, K)) - - for i, date in enumerate(dates): - if not valid[i]: - cum_xx.append(last) - continue - - x_slice = slicer(x, date) - xx = last = last + np.dot(x_slice.T, x_slice) - cum_xx.append(xx) - - return cum_xx - - def _cum_xy(self, x, y): - dates = self._index - valid = self._time_has_obs - cum_xy = [] - - x_slicer = lambda df, dt: df.truncate(dt, dt).values - if not self._panel_model: - _get_index = x.index.get_loc - - def x_slicer(df, dt): - i = _get_index(dt) - return df.values[i:i + 1] - - _y_get_index = y.index.get_loc - _values = y.values - if isinstance(y.index, MultiIndex): - def y_slicer(df, dt): - loc = _y_get_index(dt) - return _values[loc] - else: - def y_slicer(df, dt): - i = _y_get_index(dt) - return _values[i:i + 1] - - last = np.zeros(len(x.columns)) - for i, date in enumerate(dates): - if not valid[i]: - cum_xy.append(last) - continue - - x_slice = x_slicer(x, date) - y_slice = y_slicer(y, date) - - xy = last = last + np.dot(x_slice.T, y_slice) - cum_xy.append(xy) - - return cum_xy - - @cache_readonly - def _rank_raw(self): - rank = self._rolling_rank() - return rank[self._valid_indices] - - @cache_readonly - def _df_raw(self): - """Returns the degrees of freedom.""" - return self._rank_raw - - @cache_readonly - def _df_model_raw(self): - """Returns the raw model degrees of freedom.""" - return self._df_raw - 1 - - @cache_readonly - def _df_resid_raw(self): - """Returns the raw residual degrees of freedom.""" - return self._nobs - self._df_raw - - @cache_readonly - def _f_stat_raw(self): - """Returns the raw f-stat value.""" - from scipy.stats import f - - items = self.beta.columns - nobs = self._nobs - df = self._df_raw - df_resid = nobs - df - - # var_beta has not been newey-west adjusted - if self._nw_lags is None: - F = self._r2_raw / (self._r2_raw - self._r2_adj_raw) - - q = len(items) - if 'intercept' in items: - q -= 1 - - def get_result_simple(Fst, d): - return Fst, (q, d), 1 - f.cdf(Fst, q, d) - - # Compute the P-value for each pair - result = starmap(get_result_simple, zip(F, df_resid)) - - return list(result) - - K = len(items) - R = np.eye(K) - r = np.zeros((K, 1)) - - try: - intercept = items.get_loc('intercept') - R = np.concatenate((R[0: intercept], R[intercept + 1:])) - r = np.concatenate((r[0: intercept], r[intercept + 1:])) - except KeyError: - # no intercept - pass - - def get_result(beta, vcov, n, d): - return math.calc_F(R, r, beta, vcov, n, d) - - results = starmap(get_result, - zip(self._beta_raw, self._var_beta_raw, nobs, df)) - - return list(results) - - @cache_readonly - def _p_value_raw(self): - """Returns the raw p values.""" - from scipy.stats import t - - result = [2 * t.sf(a, b) - for a, b in zip(np.fabs(self._t_stat_raw), - self._df_resid_raw)] - - return np.array(result) - - @cache_readonly - def _resid_stats(self): - uncentered_sst = [] - sst = [] - sse = [] - - Yreg = self._y - Y = self._y_trans - X = self._x_trans - weights = self._weights - - dates = self._index - window = self._window - for n, index in enumerate(self._valid_indices): - if self._is_rolling and index >= window: - prior_date = dates[index - window + 1] - else: - prior_date = dates[0] - - date = dates[index] - beta = self._beta_raw[n] - - X_slice = X.truncate(before=prior_date, after=date).values - Y_slice = _y_converter(Y.truncate(before=prior_date, after=date)) - - resid = Y_slice - np.dot(X_slice, beta) - - if weights is not None: - Y_slice = _y_converter(Yreg.truncate(before=prior_date, - after=date)) - weights_slice = weights.truncate(prior_date, date) - demeaned = Y_slice - np.average(Y_slice, weights=weights_slice) - SS_total = (weights_slice * demeaned ** 2).sum() - else: - SS_total = ((Y_slice - Y_slice.mean()) ** 2).sum() - - SS_err = (resid ** 2).sum() - SST_uncentered = (Y_slice ** 2).sum() - - sse.append(SS_err) - sst.append(SS_total) - uncentered_sst.append(SST_uncentered) - - return { - 'sse': np.array(sse), - 'centered_tss': np.array(sst), - 'uncentered_tss': np.array(uncentered_sst), - } - - @cache_readonly - def _rmse_raw(self): - """Returns the raw rmse values.""" - return np.sqrt(self._resid_stats['sse'] / self._df_resid_raw) - - @cache_readonly - def _r2_raw(self): - rs = self._resid_stats - - if self._use_centered_tss: - return 1 - rs['sse'] / rs['centered_tss'] - else: - return 1 - rs['sse'] / rs['uncentered_tss'] - - @cache_readonly - def _r2_adj_raw(self): - """Returns the raw r-squared adjusted values.""" - nobs = self._nobs - factors = (nobs - 1) / (nobs - self._df_raw) - return 1 - (1 - self._r2_raw) * factors - - @cache_readonly - def _resid_raw(self): - """Returns the raw residuals.""" - return (self._y.values - self._y_fitted_raw) - - @cache_readonly - def _std_err_raw(self): - """Returns the raw standard err values.""" - results = [] - for i in range(len(self._var_beta_raw)): - results.append(np.sqrt(np.diag(self._var_beta_raw[i]))) - - return np.array(results) - - @cache_readonly - def _t_stat_raw(self): - """Returns the raw t-stat value.""" - return self._beta_raw / self._std_err_raw - - @cache_readonly - def _var_beta_raw(self): - """Returns the raw covariance of beta.""" - x = self._x_trans - y = self._y_trans - dates = self._index - nobs = self._nobs - rmse = self._rmse_raw - beta = self._beta_raw - df = self._df_raw - window = self._window - cum_xx = self._cum_xx(self._x) - - results = [] - for n, i in enumerate(self._valid_indices): - xx = cum_xx[i] - date = dates[i] - - if self._is_rolling and i >= window: - xx = xx - cum_xx[i - window] - prior_date = dates[i - window + 1] - else: - prior_date = dates[0] - - x_slice = x.truncate(before=prior_date, after=date) - y_slice = y.truncate(before=prior_date, after=date) - xv = x_slice.values - yv = np.asarray(y_slice) - - if self._nw_lags is None: - result = math.inv(xx) * (rmse[n] ** 2) - else: - resid = yv - np.dot(xv, beta[n]) - m = (xv.T * resid).T - - xeps = math.newey_west(m, self._nw_lags, nobs[n], df[n], - self._nw_overlap) - - xx_inv = math.inv(xx) - result = np.dot(xx_inv, np.dot(xeps, xx_inv)) - - results.append(result) - - return np.array(results) - - @cache_readonly - def _forecast_mean_raw(self): - """Returns the raw covariance of beta.""" - nobs = self._nobs - window = self._window - - # x should be ones - dummy = DataFrame(index=self._y.index) - dummy['y'] = 1 - - cum_xy = self._cum_xy(dummy, self._y) - - results = [] - for n, i in enumerate(self._valid_indices): - sumy = cum_xy[i] - - if self._is_rolling and i >= window: - sumy = sumy - cum_xy[i - window] - - results.append(sumy[0] / nobs[n]) - - return np.array(results) - - @cache_readonly - def _forecast_vol_raw(self): - """Returns the raw covariance of beta.""" - beta = self._beta_raw - window = self._window - dates = self._index - x = self._x - - results = [] - for n, i in enumerate(self._valid_indices): - date = dates[i] - if self._is_rolling and i >= window: - prior_date = dates[i - window + 1] - else: - prior_date = dates[0] - - x_slice = x.truncate(prior_date, date).values - x_demeaned = x_slice - x_slice.mean(0) - x_cov = np.dot(x_demeaned.T, x_demeaned) / (len(x_slice) - 1) - - B = beta[n] - result = np.dot(B, np.dot(x_cov, B)) - results.append(np.sqrt(result)) - - return np.array(results) - - @cache_readonly - def _y_fitted_raw(self): - """Returns the raw fitted y values.""" - return (self._x.values * self._beta_matrix(lag=0)).sum(1) - - @cache_readonly - def _y_predict_raw(self): - """Returns the raw predicted y values.""" - return (self._x.values * self._beta_matrix(lag=1)).sum(1) - - @cache_readonly - def _results(self): - results = {} - for result in self.RESULT_FIELDS: - value = getattr(self, result) - if isinstance(value, Series): - value = value[self.beta.index[-1]] - elif isinstance(value, DataFrame): - value = value.xs(self.beta.index[-1]) - else: # pragma: no cover - raise Exception('Problem retrieving %s' % result) - results[result] = value - - return results - - @cache_readonly - def _window_time_obs(self): - window_obs = (Series(self._time_obs_count > 0) - .rolling(self._window, min_periods=1) - .sum() - .values - ) - - window_obs[np.isnan(window_obs)] = 0 - return window_obs.astype(int) - - @cache_readonly - def _nobs_raw(self): - if self._is_rolling: - window = self._window - else: - # expanding case - window = len(self._index) - - result = Series(self._time_obs_count).rolling( - window, min_periods=1).sum().values - - return result.astype(int) - - def _beta_matrix(self, lag=0): - if lag < 0: - raise AssertionError("'lag' must be greater than or equal to 0, " - "input was {0}".format(lag)) - - betas = self._beta_raw - - labels = np.arange(len(self._y)) - lag - indexer = self._valid_obs_labels.searchsorted(labels, side='left') - indexer[indexer == len(betas)] = len(betas) - 1 - - beta_matrix = betas[indexer] - beta_matrix[labels < self._valid_obs_labels[0]] = np.NaN - - return beta_matrix - - @cache_readonly - def _valid_obs_labels(self): - dates = self._index[self._valid_indices] - return self._y.index.searchsorted(dates) - - @cache_readonly - def _nobs(self): - return self._nobs_raw[self._valid_indices] - - @property - def nobs(self): - return Series(self._nobs, index=self._result_index) - - @cache_readonly - def _enough_obs(self): - # XXX: what's the best way to determine where to start? - return self._nobs_raw >= max(self._min_periods, - len(self._x.columns) + 1) - - -def _safe_update(d, other): - """ - Combine dictionaries with non-overlapping keys - """ - for k, v in compat.iteritems(other): - if k in d: - raise Exception('Duplicate regressor: %s' % k) - - d[k] = v - - -def _filter_data(lhs, rhs, weights=None): - """ - Cleans the input for single OLS. - - Parameters - ---------- - lhs : Series - Dependent variable in the regression. - rhs : dict, whose values are Series, DataFrame, or dict - Explanatory variables of the regression. - weights : array-like, optional - 1d array of weights. If None, equivalent to an unweighted OLS. - - Returns - ------- - Series, DataFrame - Cleaned lhs and rhs - """ - if not isinstance(lhs, Series): - if len(lhs) != len(rhs): - raise AssertionError("length of lhs must equal length of rhs") - lhs = Series(lhs, index=rhs.index) - - rhs = _combine_rhs(rhs) - lhs = DataFrame({'__y__': lhs}, dtype=float) - pre_filt_rhs = rhs.dropna(how='any') - - combined = rhs.join(lhs, how='outer') - if weights is not None: - combined['__weights__'] = weights - - valid = (combined.count(1) == len(combined.columns)).values - index = combined.index - combined = combined[valid] - - if weights is not None: - filt_weights = combined.pop('__weights__') - else: - filt_weights = None - - filt_lhs = combined.pop('__y__') - filt_rhs = combined - - if hasattr(filt_weights, 'to_dense'): - filt_weights = filt_weights.to_dense() - - return (filt_lhs.to_dense(), filt_rhs.to_dense(), filt_weights, - pre_filt_rhs.to_dense(), index, valid) - - -def _combine_rhs(rhs): - """ - Glue input X variables together while checking for potential - duplicates - """ - series = {} - - if isinstance(rhs, Series): - series['x'] = rhs - elif isinstance(rhs, DataFrame): - series = rhs.copy() - elif isinstance(rhs, dict): - for name, value in compat.iteritems(rhs): - if isinstance(value, Series): - _safe_update(series, {name: value}) - elif isinstance(value, (dict, DataFrame)): - _safe_update(series, value) - else: # pragma: no cover - raise Exception('Invalid RHS data type: %s' % type(value)) - else: # pragma: no cover - raise Exception('Invalid RHS type: %s' % type(rhs)) - - if not isinstance(series, DataFrame): - series = DataFrame(series, dtype=float) - - return series - -# A little kludge so we can use this method for both -# MovingOLS and MovingPanelOLS - - -def _y_converter(y): - y = y.values.squeeze() - if y.ndim == 0: # pragma: no cover - return np.array([y]) - else: - return y - - -def f_stat_to_dict(result): - f_stat, shape, p_value = result - - result = {} - result['f-stat'] = f_stat - result['DF X'] = shape[0] - result['DF Resid'] = shape[1] - result['p-value'] = p_value - - return result diff --git a/pandas/stats/plm.py b/pandas/stats/plm.py deleted file mode 100644 index 806dc289f843a..0000000000000 --- a/pandas/stats/plm.py +++ /dev/null @@ -1,863 +0,0 @@ -""" -Linear regression objects for panel data -""" - -# pylint: disable-msg=W0231 -# pylint: disable-msg=E1101,E1103 - -# flake8: noqa - -from __future__ import division -from pandas.compat import range -from pandas import compat -import warnings - -import numpy as np - -from pandas.core.panel import Panel -from pandas.core.frame import DataFrame -from pandas.core.reshape import get_dummies -from pandas.core.series import Series -from pandas.stats.ols import OLS, MovingOLS -import pandas.stats.common as com -import pandas.stats.math as math -from pandas.util.decorators import cache_readonly - - -class PanelOLS(OLS): - """Implements panel OLS. - - See ols function docs - """ - _panel_model = True - - def __init__(self, y, x, weights=None, intercept=True, nw_lags=None, - entity_effects=False, time_effects=False, x_effects=None, - cluster=None, dropped_dummies=None, verbose=False, - nw_overlap=False): - import warnings - warnings.warn("The pandas.stats.plm module is deprecated and will be " - "removed in a future version. We refer to external packages " - "like statsmodels, see some examples here: " - "http://www.statsmodels.org/stable/mixed_linear.html", - FutureWarning, stacklevel=4) - self._x_orig = x - self._y_orig = y - self._weights = weights - - self._intercept = intercept - self._nw_lags = nw_lags - self._nw_overlap = nw_overlap - self._entity_effects = entity_effects - self._time_effects = time_effects - self._x_effects = x_effects - self._dropped_dummies = dropped_dummies or {} - self._cluster = com._get_cluster_type(cluster) - self._verbose = verbose - - (self._x, self._x_trans, - self._x_filtered, self._y, - self._y_trans) = self._prepare_data() - - self._index = self._x.index.levels[0] - - self._T = len(self._index) - - def log(self, msg): - if self._verbose: # pragma: no cover - print(msg) - - def _prepare_data(self): - """Cleans and stacks input data into DataFrame objects - - If time effects is True, then we turn off intercepts and omit an item - from every (entity and x) fixed effect. - - Otherwise: - - If we have an intercept, we omit an item from every fixed effect. - - Else, we omit an item from every fixed effect except one of them. - - The categorical variables will get dropped from x. - """ - (x, x_filtered, y, weights, cat_mapping) = self._filter_data() - - self.log('Adding dummies to X variables') - x = self._add_dummies(x, cat_mapping) - - self.log('Adding dummies to filtered X variables') - x_filtered = self._add_dummies(x_filtered, cat_mapping) - - if self._x_effects: - x = x.drop(self._x_effects, axis=1) - x_filtered = x_filtered.drop(self._x_effects, axis=1) - - if self._time_effects: - x_regressor = x.sub(x.mean(level=0), level=0) - - unstacked_y = y.unstack() - y_regressor = unstacked_y.sub(unstacked_y.mean(1), axis=0).stack() - y_regressor.index = y.index - - elif self._intercept: - # only add intercept when no time effects - self.log('Adding intercept') - x = x_regressor = add_intercept(x) - x_filtered = add_intercept(x_filtered) - y_regressor = y - else: - self.log('No intercept added') - x_regressor = x - y_regressor = y - - if weights is not None: - if not y_regressor.index.equals(weights.index): - raise AssertionError("y_regressor and weights must have the " - "same index") - if not x_regressor.index.equals(weights.index): - raise AssertionError("x_regressor and weights must have the " - "same index") - - rt_weights = np.sqrt(weights) - y_regressor = y_regressor * rt_weights - x_regressor = x_regressor.mul(rt_weights, axis=0) - - return x, x_regressor, x_filtered, y, y_regressor - - def _filter_data(self): - """ - - """ - data = self._x_orig - cat_mapping = {} - - if isinstance(data, DataFrame): - data = data.to_panel() - else: - if isinstance(data, Panel): - data = data.copy() - - data, cat_mapping = self._convert_x(data) - - if not isinstance(data, Panel): - data = Panel.from_dict(data, intersect=True) - - x_names = data.items - - if self._weights is not None: - data['__weights__'] = self._weights - - # Filter x's without y (so we can make a prediction) - filtered = data.to_frame() - - # Filter all data together using to_frame - - # convert to DataFrame - y = self._y_orig - if isinstance(y, Series): - y = y.unstack() - - data['__y__'] = y - data_long = data.to_frame() - - x_filt = filtered.filter(x_names) - x = data_long.filter(x_names) - y = data_long['__y__'] - - if self._weights is not None and not self._weights.empty: - weights = data_long['__weights__'] - else: - weights = None - - return x, x_filt, y, weights, cat_mapping - - def _convert_x(self, x): - # Converts non-numeric data in x to floats. x_converted is the - # DataFrame with converted values, and x_conversion is a dict that - # provides the reverse mapping. For example, if 'A' was converted to 0 - # for x named 'variety', then x_conversion['variety'][0] is 'A'. - x_converted = {} - cat_mapping = {} - # x can be either a dict or a Panel, but in Python 3, dicts don't have - # .iteritems - iteritems = getattr(x, 'iteritems', x.items) - for key, df in iteritems(): - if not isinstance(df, DataFrame): - raise AssertionError("all input items must be DataFrames, " - "at least one is of " - "type {0}".format(type(df))) - - if _is_numeric(df): - x_converted[key] = df - else: - try: - df = df.astype(float) - except (TypeError, ValueError): - values = df.values - distinct_values = sorted(set(values.flat)) - cat_mapping[key] = dict(enumerate(distinct_values)) - new_values = np.searchsorted(distinct_values, values) - x_converted[key] = DataFrame(new_values, index=df.index, - columns=df.columns) - - if len(cat_mapping) == 0: - x_converted = x - - return x_converted, cat_mapping - - def _add_dummies(self, panel, mapping): - """ - Add entity and / or categorical dummies to input X DataFrame - - Returns - ------- - DataFrame - """ - panel = self._add_entity_effects(panel) - panel = self._add_categorical_dummies(panel, mapping) - - return panel - - def _add_entity_effects(self, panel): - """ - Add entity dummies to panel - - Returns - ------- - DataFrame - """ - from pandas.core.reshape import make_axis_dummies - - if not self._entity_effects: - return panel - - self.log('-- Adding entity fixed effect dummies') - - dummies = make_axis_dummies(panel, 'minor') - - if not self._use_all_dummies: - if 'entity' in self._dropped_dummies: - to_exclude = str(self._dropped_dummies.get('entity')) - else: - to_exclude = dummies.columns[0] - - if to_exclude not in dummies.columns: - raise Exception('%s not in %s' % (to_exclude, - dummies.columns)) - - self.log('-- Excluding dummy for entity: %s' % to_exclude) - - dummies = dummies.filter(dummies.columns.difference([to_exclude])) - - dummies = dummies.add_prefix('FE_') - panel = panel.join(dummies) - - return panel - - def _add_categorical_dummies(self, panel, cat_mappings): - """ - Add categorical dummies to panel - - Returns - ------- - DataFrame - """ - if not self._x_effects: - return panel - - dropped_dummy = (self._entity_effects and not self._use_all_dummies) - - for effect in self._x_effects: - self.log('-- Adding fixed effect dummies for %s' % effect) - - dummies = get_dummies(panel[effect]) - - val_map = cat_mappings.get(effect) - if val_map: - val_map = dict((v, k) for k, v in compat.iteritems(val_map)) - - if dropped_dummy or not self._use_all_dummies: - if effect in self._dropped_dummies: - to_exclude = mapped_name = self._dropped_dummies.get( - effect) - - if val_map: - mapped_name = val_map[to_exclude] - else: - to_exclude = mapped_name = dummies.columns[0] - - if mapped_name not in dummies.columns: # pragma: no cover - raise Exception('%s not in %s' % (to_exclude, - dummies.columns)) - - self.log( - '-- Excluding dummy for %s: %s' % (effect, to_exclude)) - - dummies = dummies.filter( - dummies.columns.difference([mapped_name])) - dropped_dummy = True - - dummies = _convertDummies(dummies, cat_mappings.get(effect)) - dummies = dummies.add_prefix('%s_' % effect) - panel = panel.join(dummies) - - return panel - - @property - def _use_all_dummies(self): - """ - In the case of using an intercept or including time fixed - effects, completely partitioning the sample would make the X - not full rank. - """ - return (not self._intercept and not self._time_effects) - - @cache_readonly - def _beta_raw(self): - """Runs the regression and returns the beta.""" - X = self._x_trans.values - Y = self._y_trans.values.squeeze() - - beta, _, _, _ = np.linalg.lstsq(X, Y) - - return beta - - @cache_readonly - def beta(self): - return Series(self._beta_raw, index=self._x.columns) - - @cache_readonly - def _df_model_raw(self): - """Returns the raw model degrees of freedom.""" - return self._df_raw - 1 - - @cache_readonly - def _df_resid_raw(self): - """Returns the raw residual degrees of freedom.""" - return self._nobs - self._df_raw - - @cache_readonly - def _df_raw(self): - """Returns the degrees of freedom.""" - df = math.rank(self._x_trans.values) - if self._time_effects: - df += self._total_times - - return df - - @cache_readonly - def _r2_raw(self): - Y = self._y_trans.values.squeeze() - X = self._x_trans.values - - resid = Y - np.dot(X, self._beta_raw) - - SSE = (resid ** 2).sum() - - if self._use_centered_tss: - SST = ((Y - np.mean(Y)) ** 2).sum() - else: - SST = (Y ** 2).sum() - - return 1 - SSE / SST - - @property - def _use_centered_tss(self): - # has_intercept = np.abs(self._resid_raw.sum()) < _FP_ERR - return self._intercept or self._entity_effects or self._time_effects - - @cache_readonly - def _r2_adj_raw(self): - """Returns the raw r-squared adjusted values.""" - nobs = self._nobs - factors = (nobs - 1) / (nobs - self._df_raw) - return 1 - (1 - self._r2_raw) * factors - - @cache_readonly - def _resid_raw(self): - Y = self._y.values.squeeze() - X = self._x.values - return Y - np.dot(X, self._beta_raw) - - @cache_readonly - def resid(self): - return self._unstack_vector(self._resid_raw) - - @cache_readonly - def _rmse_raw(self): - """Returns the raw rmse values.""" - # X = self._x.values - # Y = self._y.values.squeeze() - - X = self._x_trans.values - Y = self._y_trans.values.squeeze() - - resid = Y - np.dot(X, self._beta_raw) - ss = (resid ** 2).sum() - return np.sqrt(ss / (self._nobs - self._df_raw)) - - @cache_readonly - def _var_beta_raw(self): - cluster_axis = None - if self._cluster == 'time': - cluster_axis = 0 - elif self._cluster == 'entity': - cluster_axis = 1 - - x = self._x - y = self._y - - if self._time_effects: - xx = _xx_time_effects(x, y) - else: - xx = np.dot(x.values.T, x.values) - - return _var_beta_panel(y, x, self._beta_raw, xx, - self._rmse_raw, cluster_axis, self._nw_lags, - self._nobs, self._df_raw, self._nw_overlap) - - @cache_readonly - def _y_fitted_raw(self): - """Returns the raw fitted y values.""" - return np.dot(self._x.values, self._beta_raw) - - @cache_readonly - def y_fitted(self): - return self._unstack_vector(self._y_fitted_raw, index=self._x.index) - - def _unstack_vector(self, vec, index=None): - if index is None: - index = self._y_trans.index - panel = DataFrame(vec, index=index, columns=['dummy']) - return panel.to_panel()['dummy'] - - def _unstack_y(self, vec): - unstacked = self._unstack_vector(vec) - return unstacked.reindex(self.beta.index) - - @cache_readonly - def _time_obs_count(self): - return self._y_trans.count(level=0).values - - @cache_readonly - def _time_has_obs(self): - return self._time_obs_count > 0 - - @property - def _nobs(self): - return len(self._y) - - -def _convertDummies(dummies, mapping): - # cleans up the names of the generated dummies - new_items = [] - for item in dummies.columns: - if not mapping: - var = str(item) - if isinstance(item, float): - var = '%g' % item - - new_items.append(var) - else: - # renames the dummies if a conversion dict is provided - new_items.append(mapping[int(item)]) - - dummies = DataFrame(dummies.values, index=dummies.index, - columns=new_items) - - return dummies - - -def _is_numeric(df): - for col in df: - if df[col].dtype.name == 'object': - return False - - return True - - -def add_intercept(panel, name='intercept'): - """ - Add column of ones to input panel - - Parameters - ---------- - panel: Panel / DataFrame - name: string, default 'intercept'] - - Returns - ------- - New object (same type as input) - """ - panel = panel.copy() - panel[name] = 1. - - return panel.consolidate() - - -class MovingPanelOLS(MovingOLS, PanelOLS): - """Implements rolling/expanding panel OLS. - - See ols function docs - """ - _panel_model = True - - def __init__(self, y, x, weights=None, - window_type='expanding', window=None, - min_periods=None, - min_obs=None, - intercept=True, - nw_lags=None, nw_overlap=False, - entity_effects=False, - time_effects=False, - x_effects=None, - cluster=None, - dropped_dummies=None, - verbose=False): - - self._args = dict(intercept=intercept, - nw_lags=nw_lags, - nw_overlap=nw_overlap, - entity_effects=entity_effects, - time_effects=time_effects, - x_effects=x_effects, - cluster=cluster, - dropped_dummies=dropped_dummies, - verbose=verbose) - - PanelOLS.__init__(self, y=y, x=x, weights=weights, - **self._args) - - self._set_window(window_type, window, min_periods) - - if min_obs is None: - min_obs = len(self._x.columns) + 1 - - self._min_obs = min_obs - - @cache_readonly - def resid(self): - return self._unstack_y(self._resid_raw) - - @cache_readonly - def y_fitted(self): - return self._unstack_y(self._y_fitted_raw) - - @cache_readonly - def y_predict(self): - """Returns the predicted y values.""" - return self._unstack_y(self._y_predict_raw) - - def lagged_y_predict(self, lag=1): - """ - Compute forecast Y value lagging coefficient by input number - of time periods - - Parameters - ---------- - lag : int - - Returns - ------- - DataFrame - """ - x = self._x.values - betas = self._beta_matrix(lag=lag) - return self._unstack_y((betas * x).sum(1)) - - @cache_readonly - def _rolling_ols_call(self): - return self._calc_betas(self._x_trans, self._y_trans) - - @cache_readonly - def _df_raw(self): - """Returns the degrees of freedom.""" - df = self._rolling_rank() - - if self._time_effects: - df += self._window_time_obs - - return df[self._valid_indices] - - @cache_readonly - def _var_beta_raw(self): - """Returns the raw covariance of beta.""" - x = self._x - y = self._y - - dates = x.index.levels[0] - - cluster_axis = None - if self._cluster == 'time': - cluster_axis = 0 - elif self._cluster == 'entity': - cluster_axis = 1 - - nobs = self._nobs - rmse = self._rmse_raw - beta = self._beta_raw - df = self._df_raw - window = self._window - - if not self._time_effects: - # Non-transformed X - cum_xx = self._cum_xx(x) - - results = [] - for n, i in enumerate(self._valid_indices): - if self._is_rolling and i >= window: - prior_date = dates[i - window + 1] - else: - prior_date = dates[0] - - date = dates[i] - - x_slice = x.truncate(prior_date, date) - y_slice = y.truncate(prior_date, date) - - if self._time_effects: - xx = _xx_time_effects(x_slice, y_slice) - else: - xx = cum_xx[i] - if self._is_rolling and i >= window: - xx = xx - cum_xx[i - window] - - result = _var_beta_panel(y_slice, x_slice, beta[n], xx, rmse[n], - cluster_axis, self._nw_lags, - nobs[n], df[n], self._nw_overlap) - - results.append(result) - - return np.array(results) - - @cache_readonly - def _resid_raw(self): - beta_matrix = self._beta_matrix(lag=0) - - Y = self._y.values.squeeze() - X = self._x.values - resid = Y - (X * beta_matrix).sum(1) - - return resid - - @cache_readonly - def _y_fitted_raw(self): - x = self._x.values - betas = self._beta_matrix(lag=0) - return (betas * x).sum(1) - - @cache_readonly - def _y_predict_raw(self): - """Returns the raw predicted y values.""" - x = self._x.values - betas = self._beta_matrix(lag=1) - return (betas * x).sum(1) - - def _beta_matrix(self, lag=0): - if lag < 0: - raise AssertionError("'lag' must be greater than or equal to 0, " - "input was {0}".format(lag)) - - index = self._y_trans.index - major_labels = index.labels[0] - labels = major_labels - lag - indexer = self._valid_indices.searchsorted(labels, side='left') - - beta_matrix = self._beta_raw[indexer] - beta_matrix[labels < self._valid_indices[0]] = np.NaN - - return beta_matrix - - @cache_readonly - def _enough_obs(self): - # XXX: what's the best way to determine where to start? - # TODO: write unit tests for this - - rank_threshold = len(self._x.columns) + 1 - if self._min_obs < rank_threshold: # pragma: no cover - warnings.warn('min_obs is smaller than rank of X matrix') - - enough_observations = self._nobs_raw >= self._min_obs - enough_time_periods = self._window_time_obs >= self._min_periods - return enough_time_periods & enough_observations - - -def create_ols_dict(attr): - def attr_getter(self): - d = {} - for k, v in compat.iteritems(self.results): - result = getattr(v, attr) - d[k] = result - - return d - - return attr_getter - - -def create_ols_attr(attr): - return property(create_ols_dict(attr)) - - -class NonPooledPanelOLS(object): - """Implements non-pooled panel OLS. - - Parameters - ---------- - y : DataFrame - x : Series, DataFrame, or dict of Series - intercept : bool - True if you want an intercept. - nw_lags : None or int - Number of Newey-West lags. - window_type : {'full_sample', 'rolling', 'expanding'} - 'full_sample' by default - window : int - size of window (for rolling/expanding OLS) - """ - - ATTRIBUTES = [ - 'beta', - 'df', - 'df_model', - 'df_resid', - 'f_stat', - 'p_value', - 'r2', - 'r2_adj', - 'resid', - 'rmse', - 'std_err', - 'summary_as_matrix', - 't_stat', - 'var_beta', - 'x', - 'y', - 'y_fitted', - 'y_predict' - ] - - def __init__(self, y, x, window_type='full_sample', window=None, - min_periods=None, intercept=True, nw_lags=None, - nw_overlap=False): - - import warnings - warnings.warn("The pandas.stats.plm module is deprecated and will be " - "removed in a future version. We refer to external packages " - "like statsmodels, see some examples here: " - "http://www.statsmodels.org/stable/mixed_linear.html", - FutureWarning, stacklevel=4) - - for attr in self.ATTRIBUTES: - setattr(self.__class__, attr, create_ols_attr(attr)) - - results = {} - - for entity in y: - entity_y = y[entity] - - entity_x = {} - for x_var in x: - entity_x[x_var] = x[x_var][entity] - - from pandas.stats.interface import ols - results[entity] = ols(y=entity_y, - x=entity_x, - window_type=window_type, - window=window, - min_periods=min_periods, - intercept=intercept, - nw_lags=nw_lags, - nw_overlap=nw_overlap) - - self.results = results - - -def _var_beta_panel(y, x, beta, xx, rmse, cluster_axis, - nw_lags, nobs, df, nw_overlap): - xx_inv = math.inv(xx) - - yv = y.values - - if cluster_axis is None: - if nw_lags is None: - return xx_inv * (rmse ** 2) - else: - resid = yv - np.dot(x.values, beta) - m = (x.values.T * resid).T - - xeps = math.newey_west(m, nw_lags, nobs, df, nw_overlap) - - return np.dot(xx_inv, np.dot(xeps, xx_inv)) - else: - Xb = np.dot(x.values, beta).reshape((len(x.values), 1)) - resid = DataFrame(yv[:, None] - Xb, index=y.index, columns=['resid']) - - if cluster_axis == 1: - x = x.swaplevel(0, 1).sort_index(level=0) - resid = resid.swaplevel(0, 1).sort_index(level=0) - - m = _group_agg(x.values * resid.values, x.index._bounds, - lambda x: np.sum(x, axis=0)) - - if nw_lags is None: - nw_lags = 0 - - xox = 0 - for i in range(len(x.index.levels[0])): - xox += math.newey_west(m[i: i + 1], nw_lags, - nobs, df, nw_overlap) - - return np.dot(xx_inv, np.dot(xox, xx_inv)) - - -def _group_agg(values, bounds, f): - """ - R-style aggregator - - Parameters - ---------- - values : N-length or N x K ndarray - bounds : B-length ndarray - f : ndarray aggregation function - - Returns - ------- - ndarray with same length as bounds array - """ - if values.ndim == 1: - N = len(values) - result = np.empty(len(bounds), dtype=float) - elif values.ndim == 2: - N, K = values.shape - result = np.empty((len(bounds), K), dtype=float) - - testagg = f(values[:min(1, len(values))]) - if isinstance(testagg, np.ndarray) and testagg.ndim == 2: - raise AssertionError('Function must reduce') - - for i, left_bound in enumerate(bounds): - if i == len(bounds) - 1: - right_bound = N - else: - right_bound = bounds[i + 1] - - result[i] = f(values[left_bound:right_bound]) - - return result - - -def _xx_time_effects(x, y): - """ - Returns X'X - (X'T) (T'T)^-1 (T'X) - """ - # X'X - xx = np.dot(x.values.T, x.values) - xt = x.sum(level=0).values - - count = y.unstack().count(1).values - selector = count > 0 - - # X'X - (T'T)^-1 (T'X) - xt = xt[selector] - count = count[selector] - - return xx - np.dot(xt.T / count, xt) diff --git a/pandas/stats/tests/__init__.py b/pandas/stats/tests/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/pandas/stats/tests/common.py b/pandas/stats/tests/common.py deleted file mode 100644 index 0ce4b20a4b719..0000000000000 --- a/pandas/stats/tests/common.py +++ /dev/null @@ -1,162 +0,0 @@ -# pylint: disable-msg=W0611,W0402 -# flake8: noqa - -from datetime import datetime -import string -import nose - -import numpy as np - -from pandas import DataFrame, bdate_range -from pandas.util.testing import assert_almost_equal # imported in other tests -import pandas.util.testing as tm - -N = 100 -K = 4 - -start = datetime(2007, 1, 1) -DATE_RANGE = bdate_range(start, periods=N) - -COLS = ['Col' + c for c in string.ascii_uppercase[:K]] - - -def makeDataFrame(): - data = DataFrame(np.random.randn(N, K), - columns=COLS, - index=DATE_RANGE) - - return data - - -def getBasicDatasets(): - A = makeDataFrame() - B = makeDataFrame() - C = makeDataFrame() - - return A, B, C - - -def check_for_scipy(): - try: - import scipy - except ImportError: - raise nose.SkipTest('no scipy') - - -def check_for_statsmodels(): - _have_statsmodels = True - try: - import statsmodels.api as sm - except ImportError: - try: - import scikits.statsmodels.api as sm - except ImportError: - raise nose.SkipTest('no statsmodels') - - -class BaseTest(tm.TestCase): - - def setUp(self): - check_for_scipy() - check_for_statsmodels() - - self.A, self.B, self.C = getBasicDatasets() - - self.createData1() - self.createData2() - self.createData3() - - def createData1(self): - date = datetime(2007, 1, 1) - date2 = datetime(2007, 1, 15) - date3 = datetime(2007, 1, 22) - - A = self.A.copy() - B = self.B.copy() - C = self.C.copy() - - A['ColA'][date] = np.NaN - B['ColA'][date] = np.NaN - C['ColA'][date] = np.NaN - C['ColA'][date2] = np.NaN - - # truncate data to save time - A = A[:30] - B = B[:30] - C = C[:30] - - self.panel_y = A - self.panel_x = {'B': B, 'C': C} - - self.series_panel_y = A.filter(['ColA']) - self.series_panel_x = {'B': B.filter(['ColA']), - 'C': C.filter(['ColA'])} - self.series_y = A['ColA'] - self.series_x = {'B': B['ColA'], - 'C': C['ColA']} - - def createData2(self): - y_data = [[1, np.NaN], - [2, 3], - [4, 5]] - y_index = [datetime(2000, 1, 1), - datetime(2000, 1, 2), - datetime(2000, 1, 3)] - y_cols = ['A', 'B'] - self.panel_y2 = DataFrame(np.array(y_data), index=y_index, - columns=y_cols) - - x1_data = [[6, np.NaN], - [7, 8], - [9, 30], - [11, 12]] - x1_index = [datetime(2000, 1, 1), - datetime(2000, 1, 2), - datetime(2000, 1, 3), - datetime(2000, 1, 4)] - x1_cols = ['A', 'B'] - x1 = DataFrame(np.array(x1_data), index=x1_index, - columns=x1_cols) - - x2_data = [[13, 14, np.NaN], - [15, np.NaN, np.NaN], - [16, 17, 48], - [19, 20, 21], - [22, 23, 24]] - x2_index = [datetime(2000, 1, 1), - datetime(2000, 1, 2), - datetime(2000, 1, 3), - datetime(2000, 1, 4), - datetime(2000, 1, 5)] - x2_cols = ['C', 'A', 'B'] - x2 = DataFrame(np.array(x2_data), index=x2_index, - columns=x2_cols) - - self.panel_x2 = {'x1': x1, 'x2': x2} - - def createData3(self): - y_data = [[1, 2], - [3, 4]] - y_index = [datetime(2000, 1, 1), - datetime(2000, 1, 2)] - y_cols = ['A', 'B'] - self.panel_y3 = DataFrame(np.array(y_data), index=y_index, - columns=y_cols) - - x1_data = [['A', 'B'], - ['C', 'A']] - x1_index = [datetime(2000, 1, 1), - datetime(2000, 1, 2)] - x1_cols = ['A', 'B'] - x1 = DataFrame(np.array(x1_data), index=x1_index, - columns=x1_cols) - - x2_data = [['foo', 'bar'], - ['baz', 'foo']] - x2_index = [datetime(2000, 1, 1), - datetime(2000, 1, 2)] - x2_cols = ['A', 'B'] - x2 = DataFrame(np.array(x2_data), index=x2_index, - columns=x2_cols) - - self.panel_x3 = {'x1': x1, 'x2': x2} diff --git a/pandas/stats/tests/test_fama_macbeth.py b/pandas/stats/tests/test_fama_macbeth.py deleted file mode 100644 index 0c9fcf775ad2d..0000000000000 --- a/pandas/stats/tests/test_fama_macbeth.py +++ /dev/null @@ -1,68 +0,0 @@ -# flake8: noqa - -from pandas import DataFrame, Panel -from pandas.stats.api import fama_macbeth -from .common import assert_almost_equal, BaseTest - -from pandas.compat import range -from pandas import compat -import pandas.util.testing as tm -import numpy as np - - -class TestFamaMacBeth(BaseTest): - - def testFamaMacBethRolling(self): - # self.checkFamaMacBethExtended('rolling', self.panel_x, self.panel_y, - # nw_lags_beta=2) - - # df = DataFrame(np.random.randn(50, 10)) - x = dict((k, DataFrame(np.random.randn(50, 10))) for k in 'abcdefg') - x = Panel.from_dict(x) - y = (DataFrame(np.random.randn(50, 10)) + - DataFrame(0.01 * np.random.randn(50, 10))) - self.checkFamaMacBethExtended('rolling', x, y, nw_lags_beta=2) - self.checkFamaMacBethExtended('expanding', x, y, nw_lags_beta=2) - - def checkFamaMacBethExtended(self, window_type, x, y, **kwds): - window = 25 - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = fama_macbeth(y=y, x=x, window_type=window_type, window=window, - **kwds) - self._check_stuff_works(result) - - index = result._index - time = len(index) - - for i in range(time - window + 1): - if window_type == 'rolling': - start = index[i] - else: - start = index[0] - - end = index[i + window - 1] - - x2 = {} - for k, v in x.iteritems(): - x2[k] = v.truncate(start, end) - y2 = y.truncate(start, end) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - reference = fama_macbeth(y=y2, x=x2, **kwds) - # reference._stats is tuple - assert_almost_equal(reference._stats, result._stats[:, i], - check_dtype=False) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - static = fama_macbeth(y=y2, x=x2, **kwds) - self._check_stuff_works(static) - - def _check_stuff_works(self, result): - # does it work? - attrs = ['mean_beta', 'std_beta', 't_stat'] - for attr in attrs: - getattr(result, attr) - - # does it work? - result.summary diff --git a/pandas/stats/tests/test_math.py b/pandas/stats/tests/test_math.py deleted file mode 100644 index 3f89dbcd20065..0000000000000 --- a/pandas/stats/tests/test_math.py +++ /dev/null @@ -1,59 +0,0 @@ -import nose - -from datetime import datetime -from numpy.random import randn -import numpy as np - -from pandas.core.api import Series, DataFrame, date_range -import pandas.util.testing as tm -import pandas.stats.math as pmath -from pandas import ols - -N, K = 100, 10 - -_have_statsmodels = True -try: - import statsmodels.api as sm -except ImportError: - try: - import scikits.statsmodels.api as sm # noqa - except ImportError: - _have_statsmodels = False - - -class TestMath(tm.TestCase): - - _nan_locs = np.arange(20, 40) - _inf_locs = np.array([]) - - def setUp(self): - arr = randn(N) - arr[self._nan_locs] = np.NaN - - self.arr = arr - self.rng = date_range(datetime(2009, 1, 1), periods=N) - - self.series = Series(arr.copy(), index=self.rng) - - self.frame = DataFrame(randn(N, K), index=self.rng, - columns=np.arange(K)) - - def test_rank_1d(self): - self.assertEqual(1, pmath.rank(self.series)) - self.assertEqual(0, pmath.rank(Series(0, self.series.index))) - - def test_solve_rect(self): - if not _have_statsmodels: - raise nose.SkipTest("no statsmodels") - - b = Series(np.random.randn(N), self.frame.index) - result = pmath.solve(self.frame, b) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - expected = ols(y=b, x=self.frame, intercept=False).beta - self.assertTrue(np.allclose(result, expected)) - - def test_inv_illformed(self): - singular = DataFrame(np.array([[1, 1], [2, 2]])) - rs = pmath.inv(singular) - expected = np.array([[0.1, 0.2], [0.1, 0.2]]) - self.assertTrue(np.allclose(rs, expected)) diff --git a/pandas/stats/tests/test_ols.py b/pandas/stats/tests/test_ols.py deleted file mode 100644 index b90c51366c86f..0000000000000 --- a/pandas/stats/tests/test_ols.py +++ /dev/null @@ -1,968 +0,0 @@ -""" -Unit test suite for OLS and PanelOLS classes -""" - -# pylint: disable-msg=W0212 - -# flake8: noqa - -from __future__ import division - -from datetime import datetime -from pandas import compat -from distutils.version import LooseVersion -import nose -import numpy as np - -from pandas import date_range, bdate_range -from pandas.core.panel import Panel -from pandas import DataFrame, Index, Series, notnull, offsets -from pandas.stats.api import ols -from pandas.stats.ols import _filter_data -from pandas.stats.plm import NonPooledPanelOLS, PanelOLS -from pandas.util.testing import (assert_almost_equal, assert_series_equal, - assert_frame_equal, assertRaisesRegexp, slow) -import pandas.util.testing as tm -import pandas.compat as compat -from pandas.stats.tests.common import BaseTest - -_have_statsmodels = True -try: - import statsmodels.api as sm -except ImportError: - try: - import scikits.statsmodels.api as sm - except ImportError: - _have_statsmodels = False - - -def _check_repr(obj): - repr(obj) - str(obj) - - -def _compare_ols_results(model1, model2): - tm.assertIsInstance(model1, type(model2)) - - if hasattr(model1, '_window_type'): - _compare_moving_ols(model1, model2) - else: - _compare_fullsample_ols(model1, model2) - - -def _compare_fullsample_ols(model1, model2): - assert_series_equal(model1.beta, model2.beta) - - -def _compare_moving_ols(model1, model2): - assert_frame_equal(model1.beta, model2.beta) - - -class TestOLS(BaseTest): - - # TODO: Add tests for OLS y predict - # TODO: Right now we just check for consistency between full-sample and - # rolling/expanding results of the panel OLS. We should also cross-check - # with trusted implementations of panel OLS (e.g. R). - # TODO: Add tests for non pooled OLS. - - @classmethod - def setUpClass(cls): - super(TestOLS, cls).setUpClass() - try: - import matplotlib as mpl - mpl.use('Agg', warn=False) - except ImportError: - pass - - if not _have_statsmodels: - raise nose.SkipTest("no statsmodels") - - def testOLSWithDatasets_ccard(self): - self.checkDataSet(sm.datasets.ccard.load(), skip_moving=True) - self.checkDataSet(sm.datasets.cpunish.load(), skip_moving=True) - self.checkDataSet(sm.datasets.longley.load(), skip_moving=True) - self.checkDataSet(sm.datasets.stackloss.load(), skip_moving=True) - - @slow - def testOLSWithDatasets_copper(self): - self.checkDataSet(sm.datasets.copper.load()) - - @slow - def testOLSWithDatasets_scotland(self): - self.checkDataSet(sm.datasets.scotland.load()) - - # degenerate case fails on some platforms - # self.checkDataSet(datasets.ccard.load(), 39, 49) # one col in X all - # 0s - - def testWLS(self): - # WLS centered SS changed (fixed) in 0.5.0 - sm_version = sm.version.version - if sm_version < LooseVersion('0.5.0'): - raise nose.SkipTest("WLS centered SS not fixed in statsmodels" - " version {0}".format(sm_version)) - - X = DataFrame(np.random.randn(30, 4), columns=['A', 'B', 'C', 'D']) - Y = Series(np.random.randn(30)) - weights = X.std(1) - - self._check_wls(X, Y, weights) - - weights.loc[[5, 15]] = np.nan - Y[[2, 21]] = np.nan - self._check_wls(X, Y, weights) - - def _check_wls(self, x, y, weights): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = ols(y=y, x=x, weights=1 / weights) - - combined = x.copy() - combined['__y__'] = y - combined['__weights__'] = weights - combined = combined.dropna() - - endog = combined.pop('__y__').values - aweights = combined.pop('__weights__').values - exog = sm.add_constant(combined.values, prepend=False) - - sm_result = sm.WLS(endog, exog, weights=1 / aweights).fit() - - assert_almost_equal(sm_result.params, result._beta_raw) - assert_almost_equal(sm_result.resid, result._resid_raw) - - self.checkMovingOLS('rolling', x, y, weights=weights) - self.checkMovingOLS('expanding', x, y, weights=weights) - - def checkDataSet(self, dataset, start=None, end=None, skip_moving=False): - exog = dataset.exog[start: end] - endog = dataset.endog[start: end] - x = DataFrame(exog, index=np.arange(exog.shape[0]), - columns=np.arange(exog.shape[1])) - y = Series(endog, index=np.arange(len(endog))) - - self.checkOLS(exog, endog, x, y) - - if not skip_moving: - self.checkMovingOLS('rolling', x, y) - self.checkMovingOLS('rolling', x, y, nw_lags=0) - self.checkMovingOLS('expanding', x, y, nw_lags=0) - self.checkMovingOLS('rolling', x, y, nw_lags=1) - self.checkMovingOLS('expanding', x, y, nw_lags=1) - self.checkMovingOLS('expanding', x, y, nw_lags=1, nw_overlap=True) - - def checkOLS(self, exog, endog, x, y): - reference = sm.OLS(endog, sm.add_constant(exog, prepend=False)).fit() - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = ols(y=y, x=x) - - # check that sparse version is the same - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - sparse_result = ols(y=y.to_sparse(), x=x.to_sparse()) - _compare_ols_results(result, sparse_result) - - assert_almost_equal(reference.params, result._beta_raw) - assert_almost_equal(reference.df_model, result._df_model_raw) - assert_almost_equal(reference.df_resid, result._df_resid_raw) - assert_almost_equal(reference.fvalue, result._f_stat_raw[0]) - assert_almost_equal(reference.pvalues, result._p_value_raw) - assert_almost_equal(reference.rsquared, result._r2_raw) - assert_almost_equal(reference.rsquared_adj, result._r2_adj_raw) - assert_almost_equal(reference.resid, result._resid_raw) - assert_almost_equal(reference.bse, result._std_err_raw) - assert_almost_equal(reference.tvalues, result._t_stat_raw) - assert_almost_equal(reference.cov_params(), result._var_beta_raw) - assert_almost_equal(reference.fittedvalues, result._y_fitted_raw) - - _check_non_raw_results(result) - - def checkMovingOLS(self, window_type, x, y, weights=None, **kwds): - window = np.linalg.matrix_rank(x.values) * 2 - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - moving = ols(y=y, x=x, weights=weights, window_type=window_type, - window=window, **kwds) - - # check that sparse version is the same - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - sparse_moving = ols(y=y.to_sparse(), x=x.to_sparse(), - weights=weights, - window_type=window_type, - window=window, **kwds) - _compare_ols_results(moving, sparse_moving) - - index = moving._index - - for n, i in enumerate(moving._valid_indices): - if window_type == 'rolling' and i >= window: - prior_date = index[i - window + 1] - else: - prior_date = index[0] - - date = index[i] - - x_iter = {} - for k, v in compat.iteritems(x): - x_iter[k] = v.truncate(before=prior_date, after=date) - y_iter = y.truncate(before=prior_date, after=date) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - static = ols(y=y_iter, x=x_iter, weights=weights, **kwds) - - self.compare(static, moving, event_index=i, - result_index=n) - - _check_non_raw_results(moving) - - FIELDS = ['beta', 'df', 'df_model', 'df_resid', 'f_stat', 'p_value', - 'r2', 'r2_adj', 'rmse', 'std_err', 't_stat', - 'var_beta'] - - def compare(self, static, moving, event_index=None, - result_index=None): - - index = moving._index - - # Check resid if we have a time index specified - if event_index is not None: - ref = static._resid_raw[-1] - - label = index[event_index] - - res = moving.resid[label] - - assert_almost_equal(ref, res) - - ref = static._y_fitted_raw[-1] - res = moving.y_fitted[label] - - assert_almost_equal(ref, res) - - # Check y_fitted - - for field in self.FIELDS: - attr = '_%s_raw' % field - - ref = getattr(static, attr) - res = getattr(moving, attr) - - if result_index is not None: - res = res[result_index] - - assert_almost_equal(ref, res) - - def test_ols_object_dtype(self): - df = DataFrame(np.random.randn(20, 2), dtype=object) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - model = ols(y=df[0], x=df[1]) - summary = repr(model) - - -class TestOLSMisc(tm.TestCase): - - """ - For test coverage with faux data - """ - @classmethod - def setUpClass(cls): - super(TestOLSMisc, cls).setUpClass() - if not _have_statsmodels: - raise nose.SkipTest("no statsmodels") - - def test_f_test(self): - x = tm.makeTimeDataFrame() - y = x.pop('A') - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - model = ols(y=y, x=x) - - hyp = '1*B+1*C+1*D=0' - result = model.f_test(hyp) - - hyp = ['1*B=0', - '1*C=0', - '1*D=0'] - result = model.f_test(hyp) - assert_almost_equal(result['f-stat'], model.f_stat['f-stat']) - - self.assertRaises(Exception, model.f_test, '1*A=0') - - def test_r2_no_intercept(self): - y = tm.makeTimeSeries() - x = tm.makeTimeDataFrame() - - x_with = x.copy() - x_with['intercept'] = 1. - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - model1 = ols(y=y, x=x) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - model2 = ols(y=y, x=x_with, intercept=False) - assert_series_equal(model1.beta, model2.beta) - - # TODO: can we infer whether the intercept is there... - self.assertNotEqual(model1.r2, model2.r2) - - # rolling - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - model1 = ols(y=y, x=x, window=20) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - model2 = ols(y=y, x=x_with, window=20, intercept=False) - assert_frame_equal(model1.beta, model2.beta) - self.assertTrue((model1.r2 != model2.r2).all()) - - def test_summary_many_terms(self): - x = DataFrame(np.random.randn(100, 20)) - y = np.random.randn(100) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - model = ols(y=y, x=x) - model.summary - - def test_y_predict(self): - y = tm.makeTimeSeries() - x = tm.makeTimeDataFrame() - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - model1 = ols(y=y, x=x) - assert_series_equal(model1.y_predict, model1.y_fitted) - assert_almost_equal(model1._y_predict_raw, model1._y_fitted_raw) - - def test_predict(self): - y = tm.makeTimeSeries() - x = tm.makeTimeDataFrame() - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - model1 = ols(y=y, x=x) - assert_series_equal(model1.predict(), model1.y_predict) - assert_series_equal(model1.predict(x=x), model1.y_predict) - assert_series_equal(model1.predict(beta=model1.beta), model1.y_predict) - - exog = x.copy() - exog['intercept'] = 1. - rs = Series(np.dot(exog.values, model1.beta.values), x.index) - assert_series_equal(model1.y_predict, rs) - - x2 = x.reindex(columns=x.columns[::-1]) - assert_series_equal(model1.predict(x=x2), model1.y_predict) - - x3 = x2 + 10 - pred3 = model1.predict(x=x3) - x3['intercept'] = 1. - x3 = x3.reindex(columns=model1.beta.index) - expected = Series(np.dot(x3.values, model1.beta.values), x3.index) - assert_series_equal(expected, pred3) - - beta = Series(0., model1.beta.index) - pred4 = model1.predict(beta=beta) - assert_series_equal(Series(0., pred4.index), pred4) - - def test_predict_longer_exog(self): - exogenous = {"1998": "4760", "1999": "5904", "2000": "4504", - "2001": "9808", "2002": "4241", "2003": "4086", - "2004": "4687", "2005": "7686", "2006": "3740", - "2007": "3075", "2008": "3753", "2009": "4679", - "2010": "5468", "2011": "7154", "2012": "4292", - "2013": "4283", "2014": "4595", "2015": "9194", - "2016": "4221", "2017": "4520"} - endogenous = {"1998": "691", "1999": "1580", "2000": "80", - "2001": "1450", "2002": "555", "2003": "956", - "2004": "877", "2005": "614", "2006": "468", - "2007": "191"} - - endog = Series(endogenous) - exog = Series(exogenous) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - model = ols(y=endog, x=exog) - - pred = model.y_predict - self.assert_index_equal(pred.index, exog.index) - - def test_longpanel_series_combo(self): - wp = tm.makePanel() - lp = wp.to_frame() - - y = lp.pop('ItemA') - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - model = ols(y=y, x=lp, entity_effects=True, window=20) - self.assertTrue(notnull(model.beta.values).all()) - tm.assertIsInstance(model, PanelOLS) - model.summary - - def test_series_rhs(self): - y = tm.makeTimeSeries() - x = tm.makeTimeSeries() - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - model = ols(y=y, x=x) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - expected = ols(y=y, x={'x': x}) - assert_series_equal(model.beta, expected.beta) - - # GH 5233/5250 - assert_series_equal(model.y_predict, model.predict(x=x)) - - def test_various_attributes(self): - # just make sure everything "works". test correctness elsewhere - - x = DataFrame(np.random.randn(100, 5)) - y = np.random.randn(100) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - model = ols(y=y, x=x, window=20) - - series_attrs = ['rank', 'df', 'forecast_mean', 'forecast_vol'] - - for attr in series_attrs: - value = getattr(model, attr) - tm.assertIsInstance(value, Series) - - # works - model._results - - def test_catch_regressor_overlap(self): - df1 = tm.makeTimeDataFrame().loc[:, ['A', 'B']] - df2 = tm.makeTimeDataFrame().loc[:, ['B', 'C', 'D']] - y = tm.makeTimeSeries() - - data = {'foo': df1, 'bar': df2} - - def f(): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - ols(y=y, x=data) - self.assertRaises(Exception, f) - - def test_plm_ctor(self): - y = tm.makeTimeDataFrame() - x = {'a': tm.makeTimeDataFrame(), - 'b': tm.makeTimeDataFrame()} - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - model = ols(y=y, x=x, intercept=False) - model.summary - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - model = ols(y=y, x=Panel(x)) - model.summary - - def test_plm_attrs(self): - y = tm.makeTimeDataFrame() - x = {'a': tm.makeTimeDataFrame(), - 'b': tm.makeTimeDataFrame()} - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - rmodel = ols(y=y, x=x, window=10) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - model = ols(y=y, x=x) - model.resid - rmodel.resid - - def test_plm_lagged_y_predict(self): - y = tm.makeTimeDataFrame() - x = {'a': tm.makeTimeDataFrame(), - 'b': tm.makeTimeDataFrame()} - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - model = ols(y=y, x=x, window=10) - result = model.lagged_y_predict(2) - - def test_plm_f_test(self): - y = tm.makeTimeDataFrame() - x = {'a': tm.makeTimeDataFrame(), - 'b': tm.makeTimeDataFrame()} - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - model = ols(y=y, x=x) - - hyp = '1*a+1*b=0' - result = model.f_test(hyp) - - hyp = ['1*a=0', - '1*b=0'] - result = model.f_test(hyp) - assert_almost_equal(result['f-stat'], model.f_stat['f-stat']) - - def test_plm_exclude_dummy_corner(self): - y = tm.makeTimeDataFrame() - x = {'a': tm.makeTimeDataFrame(), - 'b': tm.makeTimeDataFrame()} - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - model = ols( - y=y, x=x, entity_effects=True, dropped_dummies={'entity': 'D'}) - model.summary - - def f(): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - ols(y=y, x=x, entity_effects=True, - dropped_dummies={'entity': 'E'}) - self.assertRaises(Exception, f) - - def test_columns_tuples_summary(self): - # #1837 - X = DataFrame(np.random.randn(10, 2), columns=[('a', 'b'), ('c', 'd')]) - Y = Series(np.random.randn(10)) - - # it works! - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - model = ols(y=Y, x=X) - model.summary - - -class TestPanelOLS(BaseTest): - - FIELDS = ['beta', 'df', 'df_model', 'df_resid', 'f_stat', - 'p_value', 'r2', 'r2_adj', 'rmse', 'std_err', - 't_stat', 'var_beta'] - - _other_fields = ['resid', 'y_fitted'] - - def testFiltering(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = ols(y=self.panel_y2, x=self.panel_x2) - - x = result._x - index = x.index.get_level_values(0) - index = Index(sorted(set(index))) - exp_index = Index([datetime(2000, 1, 1), datetime(2000, 1, 3)]) - self.assert_index_equal(exp_index, index) - - index = x.index.get_level_values(1) - index = Index(sorted(set(index))) - exp_index = Index(['A', 'B']) - self.assert_index_equal(exp_index, index) - - x = result._x_filtered - index = x.index.get_level_values(0) - index = Index(sorted(set(index))) - exp_index = Index([datetime(2000, 1, 1), - datetime(2000, 1, 3), - datetime(2000, 1, 4)]) - self.assert_index_equal(exp_index, index) - - # .flat is flatiter instance - assert_almost_equal(result._y.values.flat, [1, 4, 5], - check_dtype=False) - - exp_x = np.array([[6, 14, 1], [9, 17, 1], - [30, 48, 1]], dtype=np.float64) - assert_almost_equal(exp_x, result._x.values) - - exp_x_filtered = np.array([[6, 14, 1], [9, 17, 1], [30, 48, 1], - [11, 20, 1], [12, 21, 1]], dtype=np.float64) - assert_almost_equal(exp_x_filtered, result._x_filtered.values) - - self.assert_index_equal(result._x_filtered.index.levels[0], - result.y_fitted.index) - - def test_wls_panel(self): - y = tm.makeTimeDataFrame() - x = Panel({'x1': tm.makeTimeDataFrame(), - 'x2': tm.makeTimeDataFrame()}) - - y.iloc[[1, 7], y.columns.get_loc('A')] = np.nan - y.iloc[[6, 15], y.columns.get_loc('B')] = np.nan - y.iloc[[3, 20], y.columns.get_loc('C')] = np.nan - y.iloc[[5, 11], y.columns.get_loc('D')] = np.nan - - stack_y = y.stack() - stack_x = DataFrame(dict((k, v.stack()) - for k, v in x.iteritems())) - - weights = x.std('items') - stack_weights = weights.stack() - - stack_y.index = stack_y.index._tuple_index - stack_x.index = stack_x.index._tuple_index - stack_weights.index = stack_weights.index._tuple_index - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = ols(y=y, x=x, weights=1 / weights) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - expected = ols(y=stack_y, x=stack_x, weights=1 / stack_weights) - - assert_almost_equal(result.beta, expected.beta) - - for attr in ['resid', 'y_fitted']: - rvals = getattr(result, attr).stack().values - evals = getattr(expected, attr).values - assert_almost_equal(rvals, evals) - - def testWithTimeEffects(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = ols(y=self.panel_y2, x=self.panel_x2, time_effects=True) - - # .flat is flatiter instance - assert_almost_equal(result._y_trans.values.flat, [0, -0.5, 0.5], - check_dtype=False) - - exp_x = np.array([[0, 0], [-10.5, -15.5], [10.5, 15.5]]) - assert_almost_equal(result._x_trans.values, exp_x) - - # _check_non_raw_results(result) - - def testWithEntityEffects(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = ols(y=self.panel_y2, x=self.panel_x2, entity_effects=True) - - # .flat is flatiter instance - assert_almost_equal(result._y.values.flat, [1, 4, 5], - check_dtype=False) - - exp_x = DataFrame([[0., 6., 14., 1.], [0, 9, 17, 1], [1, 30, 48, 1]], - index=result._x.index, columns=['FE_B', 'x1', 'x2', - 'intercept'], - dtype=float) - tm.assert_frame_equal(result._x, exp_x.loc[:, result._x.columns]) - # _check_non_raw_results(result) - - def testWithEntityEffectsAndDroppedDummies(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = ols(y=self.panel_y2, x=self.panel_x2, entity_effects=True, - dropped_dummies={'entity': 'B'}) - - # .flat is flatiter instance - assert_almost_equal(result._y.values.flat, [1, 4, 5], - check_dtype=False) - exp_x = DataFrame([[1., 6., 14., 1.], [1, 9, 17, 1], [0, 30, 48, 1]], - index=result._x.index, columns=['FE_A', 'x1', 'x2', - 'intercept'], - dtype=float) - tm.assert_frame_equal(result._x, exp_x.loc[:, result._x.columns]) - # _check_non_raw_results(result) - - def testWithXEffects(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = ols(y=self.panel_y2, x=self.panel_x2, x_effects=['x1']) - - # .flat is flatiter instance - assert_almost_equal(result._y.values.flat, [1, 4, 5], - check_dtype=False) - - res = result._x - exp_x = DataFrame([[0., 0., 14., 1.], [0, 1, 17, 1], [1, 0, 48, 1]], - columns=['x1_30', 'x1_9', 'x2', 'intercept'], - index=res.index, dtype=float) - exp_x[['x1_30', 'x1_9']] = exp_x[['x1_30', 'x1_9']].astype(np.uint8) - assert_frame_equal(res, exp_x.reindex(columns=res.columns)) - - def testWithXEffectsAndDroppedDummies(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = ols(y=self.panel_y2, x=self.panel_x2, x_effects=['x1'], - dropped_dummies={'x1': 30}) - - res = result._x - # .flat is flatiter instance - assert_almost_equal(result._y.values.flat, [1, 4, 5], - check_dtype=False) - exp_x = DataFrame([[1., 0., 14., 1.], [0, 1, 17, 1], [0, 0, 48, 1]], - columns=['x1_6', 'x1_9', 'x2', 'intercept'], - index=res.index, dtype=float) - exp_x[['x1_6', 'x1_9']] = exp_x[['x1_6', 'x1_9']].astype(np.uint8) - - assert_frame_equal(res, exp_x.reindex(columns=res.columns)) - - def testWithXEffectsAndConversion(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = ols(y=self.panel_y3, x=self.panel_x3, - x_effects=['x1', 'x2']) - - # .flat is flatiter instance - assert_almost_equal(result._y.values.flat, [1, 2, 3, 4], - check_dtype=False) - exp_x = np.array([[0, 0, 0, 1, 1], [1, 0, 0, 0, 1], [0, 1, 1, 0, 1], - [0, 0, 0, 1, 1]], dtype=np.float64) - assert_almost_equal(result._x.values, exp_x) - - exp_index = Index(['x1_B', 'x1_C', 'x2_baz', 'x2_foo', 'intercept']) - self.assert_index_equal(exp_index, result._x.columns) - - # _check_non_raw_results(result) - - def testWithXEffectsAndConversionAndDroppedDummies(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = ols(y=self.panel_y3, x=self.panel_x3, x_effects=['x1', 'x2'], - dropped_dummies={'x2': 'foo'}) - # .flat is flatiter instance - assert_almost_equal(result._y.values.flat, [1, 2, 3, 4], - check_dtype=False) - exp_x = np.array([[0, 0, 0, 0, 1], [1, 0, 1, 0, 1], [0, 1, 0, 1, 1], - [0, 0, 0, 0, 1]], dtype=np.float64) - assert_almost_equal(result._x.values, exp_x) - - exp_index = Index(['x1_B', 'x1_C', 'x2_bar', 'x2_baz', 'intercept']) - self.assert_index_equal(exp_index, result._x.columns) - - # _check_non_raw_results(result) - - def testForSeries(self): - self.checkForSeries(self.series_panel_x, self.series_panel_y, - self.series_x, self.series_y) - - self.checkForSeries(self.series_panel_x, self.series_panel_y, - self.series_x, self.series_y, nw_lags=0) - - self.checkForSeries(self.series_panel_x, self.series_panel_y, - self.series_x, self.series_y, nw_lags=1, - nw_overlap=True) - - def testRolling(self): - self.checkMovingOLS(self.panel_x, self.panel_y) - - def testRollingWithFixedEffects(self): - self.checkMovingOLS(self.panel_x, self.panel_y, - entity_effects=True) - self.checkMovingOLS(self.panel_x, self.panel_y, intercept=False, - entity_effects=True) - - def testRollingWithTimeEffects(self): - self.checkMovingOLS(self.panel_x, self.panel_y, - time_effects=True) - - def testRollingWithNeweyWest(self): - self.checkMovingOLS(self.panel_x, self.panel_y, - nw_lags=1) - - def testRollingWithEntityCluster(self): - self.checkMovingOLS(self.panel_x, self.panel_y, - cluster='entity') - - def testUnknownClusterRaisesValueError(self): - assertRaisesRegexp(ValueError, "Unrecognized cluster.*ridiculous", - self.checkMovingOLS, self.panel_x, self.panel_y, - cluster='ridiculous') - - def testRollingWithTimeEffectsAndEntityCluster(self): - self.checkMovingOLS(self.panel_x, self.panel_y, - time_effects=True, cluster='entity') - - def testRollingWithTimeCluster(self): - self.checkMovingOLS(self.panel_x, self.panel_y, - cluster='time') - - def testRollingWithNeweyWestAndEntityCluster(self): - self.assertRaises(ValueError, self.checkMovingOLS, - self.panel_x, self.panel_y, - nw_lags=1, cluster='entity') - - def testRollingWithNeweyWestAndTimeEffectsAndEntityCluster(self): - self.assertRaises(ValueError, - self.checkMovingOLS, self.panel_x, self.panel_y, - nw_lags=1, cluster='entity', - time_effects=True) - - def testExpanding(self): - self.checkMovingOLS( - self.panel_x, self.panel_y, window_type='expanding') - - def testNonPooled(self): - self.checkNonPooled(y=self.panel_y, x=self.panel_x) - self.checkNonPooled(y=self.panel_y, x=self.panel_x, - window_type='rolling', window=25, min_periods=10) - - def testUnknownWindowType(self): - assertRaisesRegexp(ValueError, "window.*ridiculous", - self.checkNonPooled, y=self.panel_y, x=self.panel_x, - window_type='ridiculous', window=25, min_periods=10) - - def checkNonPooled(self, x, y, **kwds): - # For now, just check that it doesn't crash - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = ols(y=y, x=x, pool=False, **kwds) - - _check_repr(result) - for attr in NonPooledPanelOLS.ATTRIBUTES: - _check_repr(getattr(result, attr)) - - def checkMovingOLS(self, x, y, window_type='rolling', **kwds): - window = 25 # must be larger than rank of x - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - moving = ols(y=y, x=x, window_type=window_type, - window=window, **kwds) - - index = moving._index - - for n, i in enumerate(moving._valid_indices): - if window_type == 'rolling' and i >= window: - prior_date = index[i - window + 1] - else: - prior_date = index[0] - - date = index[i] - - x_iter = {} - for k, v in compat.iteritems(x): - x_iter[k] = v.truncate(before=prior_date, after=date) - y_iter = y.truncate(before=prior_date, after=date) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - static = ols(y=y_iter, x=x_iter, **kwds) - - self.compare(static, moving, event_index=i, - result_index=n) - - _check_non_raw_results(moving) - - def checkForSeries(self, x, y, series_x, series_y, **kwds): - # Consistency check with simple OLS. - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = ols(y=y, x=x, **kwds) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - reference = ols(y=series_y, x=series_x, **kwds) - - self.compare(reference, result) - - def compare(self, static, moving, event_index=None, - result_index=None): - - # Check resid if we have a time index specified - if event_index is not None: - staticSlice = _period_slice(static, -1) - movingSlice = _period_slice(moving, event_index) - - ref = static._resid_raw[staticSlice] - res = moving._resid_raw[movingSlice] - - assert_almost_equal(ref, res) - - ref = static._y_fitted_raw[staticSlice] - res = moving._y_fitted_raw[movingSlice] - - assert_almost_equal(ref, res) - - # Check y_fitted - - for field in self.FIELDS: - attr = '_%s_raw' % field - - ref = getattr(static, attr) - res = getattr(moving, attr) - - if result_index is not None: - res = res[result_index] - - assert_almost_equal(ref, res) - - def test_auto_rolling_window_type(self): - data = tm.makeTimeDataFrame() - y = data.pop('A') - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - window_model = ols(y=y, x=data, window=20, min_periods=10) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - rolling_model = ols(y=y, x=data, window=20, min_periods=10, - window_type='rolling') - - assert_frame_equal(window_model.beta, rolling_model.beta) - - def test_group_agg(self): - from pandas.stats.plm import _group_agg - - values = np.ones((10, 2)) * np.arange(10).reshape((10, 1)) - bounds = np.arange(5) * 2 - f = lambda x: x.mean(axis=0) - - agged = _group_agg(values, bounds, f) - - assert(agged[1][0] == 2.5) - assert(agged[2][0] == 4.5) - - # test a function that doesn't aggregate - f2 = lambda x: np.zeros((2, 2)) - self.assertRaises(Exception, _group_agg, values, bounds, f2) - - -def _check_non_raw_results(model): - _check_repr(model) - _check_repr(model.resid) - _check_repr(model.summary_as_matrix) - _check_repr(model.y_fitted) - _check_repr(model.y_predict) - - -def _period_slice(panelModel, i): - index = panelModel._x_trans.index - period = index.levels[0][i] - - L, R = index.get_major_bounds(period, period) - - return slice(L, R) - - -class TestOLSFilter(tm.TestCase): - - def setUp(self): - date_index = date_range(datetime(2009, 12, 11), periods=3, - freq=offsets.BDay()) - ts = Series([3, 1, 4], index=date_index) - self.TS1 = ts - - date_index = date_range(datetime(2009, 12, 11), periods=5, - freq=offsets.BDay()) - ts = Series([1, 5, 9, 2, 6], index=date_index) - self.TS2 = ts - - date_index = date_range(datetime(2009, 12, 11), periods=3, - freq=offsets.BDay()) - ts = Series([5, np.nan, 3], index=date_index) - self.TS3 = ts - - date_index = date_range(datetime(2009, 12, 11), periods=5, - freq=offsets.BDay()) - ts = Series([np.nan, 5, 8, 9, 7], index=date_index) - self.TS4 = ts - - data = {'x1': self.TS2, 'x2': self.TS4} - self.DF1 = DataFrame(data=data) - - data = {'x1': self.TS2, 'x2': self.TS4} - self.DICT1 = data - - def testFilterWithSeriesRHS(self): - (lhs, rhs, weights, rhs_pre, - index, valid) = _filter_data(self.TS1, {'x1': self.TS2}, None) - self.tsAssertEqual(self.TS1.astype(np.float64), lhs, check_names=False) - self.tsAssertEqual(self.TS2[:3].astype(np.float64), rhs['x1'], - check_names=False) - self.tsAssertEqual(self.TS2.astype(np.float64), rhs_pre['x1'], - check_names=False) - - def testFilterWithSeriesRHS2(self): - (lhs, rhs, weights, rhs_pre, - index, valid) = _filter_data(self.TS2, {'x1': self.TS1}, None) - self.tsAssertEqual(self.TS2[:3].astype(np.float64), lhs, - check_names=False) - self.tsAssertEqual(self.TS1.astype(np.float64), rhs['x1'], - check_names=False) - self.tsAssertEqual(self.TS1.astype(np.float64), rhs_pre['x1'], - check_names=False) - - def testFilterWithSeriesRHS3(self): - (lhs, rhs, weights, rhs_pre, - index, valid) = _filter_data(self.TS3, {'x1': self.TS4}, None) - exp_lhs = self.TS3[2:3] - exp_rhs = self.TS4[2:3] - exp_rhs_pre = self.TS4[1:] - self.tsAssertEqual(exp_lhs, lhs, check_names=False) - self.tsAssertEqual(exp_rhs, rhs['x1'], check_names=False) - self.tsAssertEqual(exp_rhs_pre, rhs_pre['x1'], check_names=False) - - def testFilterWithDataFrameRHS(self): - (lhs, rhs, weights, rhs_pre, - index, valid) = _filter_data(self.TS1, self.DF1, None) - exp_lhs = self.TS1[1:].astype(np.float64) - exp_rhs1 = self.TS2[1:3] - exp_rhs2 = self.TS4[1:3].astype(np.float64) - self.tsAssertEqual(exp_lhs, lhs, check_names=False) - self.tsAssertEqual(exp_rhs1, rhs['x1'], check_names=False) - self.tsAssertEqual(exp_rhs2, rhs['x2'], check_names=False) - - def testFilterWithDictRHS(self): - (lhs, rhs, weights, rhs_pre, - index, valid) = _filter_data(self.TS1, self.DICT1, None) - exp_lhs = self.TS1[1:].astype(np.float64) - exp_rhs1 = self.TS2[1:3].astype(np.float64) - exp_rhs2 = self.TS4[1:3].astype(np.float64) - self.tsAssertEqual(exp_lhs, lhs, check_names=False) - self.tsAssertEqual(exp_rhs1, rhs['x1'], check_names=False) - self.tsAssertEqual(exp_rhs2, rhs['x2'], check_names=False) - - def tsAssertEqual(self, ts1, ts2, **kwargs): - self.assert_series_equal(ts1, ts2, **kwargs) diff --git a/pandas/stats/tests/test_var.py b/pandas/stats/tests/test_var.py deleted file mode 100644 index 04e2019f00a82..0000000000000 --- a/pandas/stats/tests/test_var.py +++ /dev/null @@ -1,94 +0,0 @@ -# flake8: noqa - -from __future__ import print_function - -import pandas.util.testing as tm - -from pandas.compat import range -import nose - -raise nose.SkipTest('skipping this for now') - -try: - import statsmodels.tsa.var as sm_var - import statsmodels as sm -except ImportError: - import scikits.statsmodels.tsa.var as sm_var - import scikits.statsmodels as sm - - -import pandas.stats.var as _pvar -reload(_pvar) -from pandas.stats.var import VAR - -DECIMAL_6 = 6 -DECIMAL_5 = 5 -DECIMAL_4 = 4 -DECIMAL_3 = 3 -DECIMAL_2 = 2 - - -class CheckVAR(object): - - def test_params(self): - tm.assert_almost_equal(self.res1.params, self.res2.params, DECIMAL_3) - - def test_neqs(self): - tm.assert_numpy_array_equal(self.res1.neqs, self.res2.neqs) - - def test_nobs(self): - tm.assert_numpy_array_equal(self.res1.avobs, self.res2.nobs) - - def test_df_eq(self): - tm.assert_numpy_array_equal(self.res1.df_eq, self.res2.df_eq) - - def test_rmse(self): - results = self.res1.results - for i in range(len(results)): - tm.assert_almost_equal(results[i].mse_resid ** .5, - eval('self.res2.rmse_' + str(i + 1)), - DECIMAL_6) - - def test_rsquared(self): - results = self.res1.results - for i in range(len(results)): - tm.assert_almost_equal(results[i].rsquared, - eval('self.res2.rsquared_' + str(i + 1)), - DECIMAL_3) - - def test_llf(self): - results = self.res1.results - tm.assert_almost_equal(self.res1.llf, self.res2.llf, DECIMAL_2) - for i in range(len(results)): - tm.assert_almost_equal(results[i].llf, - eval('self.res2.llf_' + str(i + 1)), - DECIMAL_2) - - def test_aic(self): - tm.assert_almost_equal(self.res1.aic, self.res2.aic) - - def test_bic(self): - tm.assert_almost_equal(self.res1.bic, self.res2.bic) - - def test_hqic(self): - tm.assert_almost_equal(self.res1.hqic, self.res2.hqic) - - def test_fpe(self): - tm.assert_almost_equal(self.res1.fpe, self.res2.fpe) - - def test_detsig(self): - tm.assert_almost_equal(self.res1.detomega, self.res2.detsig) - - def test_bse(self): - tm.assert_almost_equal(self.res1.bse, self.res2.bse, DECIMAL_4) - - -class Foo(object): - - def __init__(self): - data = sm.datasets.macrodata.load() - data = data.data[['realinv', 'realgdp', 'realcons']].view((float, 3)) - data = diff(log(data), axis=0) - self.res1 = VAR2(endog=data).fit(maxlag=2) - from results import results_var - self.res2 = results_var.MacrodataResults() diff --git a/pandas/stats/var.py b/pandas/stats/var.py deleted file mode 100644 index db4028d60f5c8..0000000000000 --- a/pandas/stats/var.py +++ /dev/null @@ -1,605 +0,0 @@ -# flake8: noqa - -from __future__ import division - -from pandas.compat import range, lrange, zip, reduce -from pandas import compat -import numpy as np -from pandas.core.base import StringMixin -from pandas.util.decorators import cache_readonly -from pandas.core.frame import DataFrame -from pandas.core.panel import Panel -from pandas.core.series import Series -import pandas.stats.common as common -from pandas.stats.math import inv -from pandas.stats.ols import _combine_rhs - - -class VAR(StringMixin): - """ - Estimates VAR(p) regression on multivariate time series data - presented in pandas data structures. - - Parameters - ---------- - data : DataFrame or dict of Series - p : lags to include - - """ - - def __init__(self, data, p=1, intercept=True): - import warnings - warnings.warn("The pandas.stats.var module is deprecated and will be " - "removed in a future version. We refer to external packages " - "like statsmodels, see some examples here: " - "http://www.statsmodels.org/stable/vector_ar.html#var", - FutureWarning, stacklevel=4) - - try: - import statsmodels.tsa.vector_ar.api as sm_var - except ImportError: - import scikits.statsmodels.tsa.var as sm_var - - self._data = DataFrame(_combine_rhs(data)) - self._p = p - - self._columns = self._data.columns - self._index = self._data.index - - self._intercept = intercept - - @cache_readonly - def aic(self): - """Returns the Akaike information criterion.""" - return self._ic['aic'] - - @cache_readonly - def bic(self): - """Returns the Bayesian information criterion.""" - return self._ic['bic'] - - @cache_readonly - def beta(self): - """ - Returns a DataFrame, where each column x1 contains the betas - calculated by regressing the x1 column of the VAR input with - the lagged input. - - Returns - ------- - DataFrame - """ - d = dict([(key, value.beta) - for (key, value) in compat.iteritems(self.ols_results)]) - return DataFrame(d) - - def forecast(self, h): - """ - Returns a DataFrame containing the forecasts for 1, 2, ..., n time - steps. Each column x1 contains the forecasts of the x1 column. - - Parameters - ---------- - n: int - Number of time steps ahead to forecast. - - Returns - ------- - DataFrame - """ - forecast = self._forecast_raw(h)[:, 0, :] - return DataFrame(forecast, index=lrange(1, 1 + h), - columns=self._columns) - - def forecast_cov(self, h): - """ - Returns the covariance of the forecast residuals. - - Returns - ------- - DataFrame - """ - return [DataFrame(value, index=self._columns, columns=self._columns) - for value in self._forecast_cov_raw(h)] - - def forecast_std_err(self, h): - """ - Returns the standard errors of the forecast residuals. - - Returns - ------- - DataFrame - """ - return DataFrame(self._forecast_std_err_raw(h), - index=lrange(1, 1 + h), columns=self._columns) - - @cache_readonly - def granger_causality(self): - """Returns the f-stats and p-values from the Granger Causality Test. - - If the data consists of columns x1, x2, x3, then we perform the - following regressions: - - x1 ~ L(x2, x3) - x1 ~ L(x1, x3) - x1 ~ L(x1, x2) - - The f-stats of these results are placed in the 'x1' column of the - returned DataFrame. We then repeat for x2, x3. - - Returns - ------- - Dict, where 'f-stat' returns the DataFrame containing the f-stats, - and 'p-value' returns the DataFrame containing the corresponding - p-values of the f-stats. - """ - from pandas.stats.api import ols - from scipy.stats import f - - d = {} - for col in self._columns: - d[col] = {} - for i in range(1, 1 + self._p): - lagged_data = self._lagged_data[i].filter( - self._columns - [col]) - - for key, value in compat.iteritems(lagged_data): - d[col][_make_param_name(i, key)] = value - - f_stat_dict = {} - p_value_dict = {} - - for col, y in compat.iteritems(self._data): - ssr_full = (self.resid[col] ** 2).sum() - - f_stats = [] - p_values = [] - - for col2 in self._columns: - result = ols(y=y, x=d[col2]) - - resid = result.resid - ssr_reduced = (resid ** 2).sum() - - M = self._p - N = self._nobs - K = self._k * self._p + 1 - f_stat = ((ssr_reduced - ssr_full) / M) / (ssr_full / (N - K)) - f_stats.append(f_stat) - - p_value = f.sf(f_stat, M, N - K) - p_values.append(p_value) - - f_stat_dict[col] = Series(f_stats, self._columns) - p_value_dict[col] = Series(p_values, self._columns) - - f_stat_mat = DataFrame(f_stat_dict) - p_value_mat = DataFrame(p_value_dict) - - return { - 'f-stat': f_stat_mat, - 'p-value': p_value_mat, - } - - @cache_readonly - def ols_results(self): - """ - Returns the results of the regressions: - x_1 ~ L(X) - x_2 ~ L(X) - ... - x_k ~ L(X) - - where X = [x_1, x_2, ..., x_k] - and L(X) represents the columns of X lagged 1, 2, ..., n lags - (n is the user-provided number of lags). - - Returns - ------- - dict - """ - from pandas.stats.api import ols - - d = {} - for i in range(1, 1 + self._p): - for col, series in compat.iteritems(self._lagged_data[i]): - d[_make_param_name(i, col)] = series - - result = dict([(col, ols(y=y, x=d, intercept=self._intercept)) - for col, y in compat.iteritems(self._data)]) - - return result - - @cache_readonly - def resid(self): - """ - Returns the DataFrame containing the residuals of the VAR regressions. - Each column x1 contains the residuals generated by regressing the x1 - column of the input against the lagged input. - - Returns - ------- - DataFrame - """ - d = dict([(col, series.resid) - for (col, series) in compat.iteritems(self.ols_results)]) - return DataFrame(d, index=self._index) - - @cache_readonly - def summary(self): - template = """ -%(banner_top)s - -Number of Observations: %(nobs)d -AIC: %(aic).3f -BIC: %(bic).3f - -%(banner_coef)s -%(coef_table)s -%(banner_end)s -""" - params = { - 'banner_top': common.banner('Summary of VAR'), - 'banner_coef': common.banner('Summary of Estimated Coefficients'), - 'banner_end': common.banner('End of Summary'), - 'coef_table': self.beta, - 'aic': self.aic, - 'bic': self.bic, - 'nobs': self._nobs, - } - - return template % params - - @cache_readonly - def _alpha(self): - """ - Returns array where the i-th element contains the intercept - when regressing the i-th column of self._data with the lagged data. - """ - if self._intercept: - return self._beta_raw[-1] - else: - return np.zeros(self._k) - - @cache_readonly - def _beta_raw(self): - return np.array([list(self.beta[col].values()) for col in self._columns]).T - - def _trans_B(self, h): - """ - Returns 0, 1, ..., (h-1)-th power of transpose of B as defined in - equation (4) on p. 142 of the Stata 11 Time Series reference book. - """ - result = [np.eye(1 + self._k * self._p)] - - row1 = np.zeros((1, 1 + self._k * self._p)) - row1[0, 0] = 1 - - v = self._alpha.reshape((self._k, 1)) - row2 = np.hstack(tuple([v] + self._lag_betas)) - - m = self._k * (self._p - 1) - row3 = np.hstack(( - np.zeros((m, 1)), - np.eye(m), - np.zeros((m, self._k)) - )) - - trans_B = np.vstack((row1, row2, row3)).T - - result.append(trans_B) - - for i in range(2, h): - result.append(np.dot(trans_B, result[i - 1])) - - return result - - @cache_readonly - def _x(self): - values = np.array([ - list(self._lagged_data[i][col].values()) - for i in range(1, 1 + self._p) - for col in self._columns - ]).T - - x = np.hstack((np.ones((len(values), 1)), values))[self._p:] - - return x - - @cache_readonly - def _cov_beta(self): - cov_resid = self._sigma - - x = self._x - - inv_cov_x = inv(np.dot(x.T, x)) - - return np.kron(inv_cov_x, cov_resid) - - def _data_xs(self, i): - """ - Returns the cross-section of the data at the given timestep. - """ - return self._data.values[i] - - def _forecast_cov_raw(self, n): - resid = self._forecast_cov_resid_raw(n) - # beta = self._forecast_cov_beta_raw(n) - - # return [a + b for a, b in zip(resid, beta)] - # TODO: ignore the beta forecast std err until it's verified - - return resid - - def _forecast_cov_beta_raw(self, n): - """ - Returns the covariance of the beta errors for the forecast at - 1, 2, ..., n timesteps. - """ - p = self._p - - values = self._data.values - T = len(values) - self._p - 1 - - results = [] - - for h in range(1, n + 1): - psi = self._psi(h) - trans_B = self._trans_B(h) - - sum = 0 - - cov_beta = self._cov_beta - - for t in range(T + 1): - index = t + p - y = values.take(lrange(index, index - p, -1), axis=0).ravel() - trans_Z = np.hstack(([1], y)) - trans_Z = trans_Z.reshape(1, len(trans_Z)) - - sum2 = 0 - for i in range(h): - ZB = np.dot(trans_Z, trans_B[h - 1 - i]) - - prod = np.kron(ZB, psi[i]) - sum2 = sum2 + prod - - sum = sum + chain_dot(sum2, cov_beta, sum2.T) - - results.append(sum / (T + 1)) - - return results - - def _forecast_cov_resid_raw(self, h): - """ - Returns the covariance of the residual errors for the forecast at - 1, 2, ..., h timesteps. - """ - psi_values = self._psi(h) - sum = 0 - result = [] - for i in range(h): - psi = psi_values[i] - sum = sum + chain_dot(psi, self._sigma, psi.T) - result.append(sum) - - return result - - def _forecast_raw(self, h): - """ - Returns the forecast at 1, 2, ..., h timesteps in the future. - """ - k = self._k - result = [] - for i in range(h): - sum = self._alpha.reshape(1, k) - for j in range(self._p): - beta = self._lag_betas[j] - idx = i - j - if idx > 0: - y = result[idx - 1] - else: - y = self._data_xs(idx - 1) - - sum = sum + np.dot(beta, y.T).T - result.append(sum) - - return np.array(result) - - def _forecast_std_err_raw(self, h): - """ - Returns the standard error of the forecasts - at 1, 2, ..., n timesteps. - """ - return np.array([np.sqrt(np.diag(value)) - for value in self._forecast_cov_raw(h)]) - - @cache_readonly - def _ic(self): - """ - Returns the Akaike/Bayesian information criteria. - """ - RSS = self._rss - k = self._p * (self._k * self._p + 1) - n = self._nobs * self._k - - return {'aic': 2 * k + n * np.log(RSS / n), - 'bic': n * np.log(RSS / n) + k * np.log(n)} - - @cache_readonly - def _k(self): - return len(self._columns) - - @cache_readonly - def _lag_betas(self): - """ - Returns list of B_i, where B_i represents the (k, k) matrix - with the j-th row containing the betas of regressing the j-th - column of self._data with self._data lagged i time steps. - First element is B_1, second element is B_2, etc. - """ - k = self._k - b = self._beta_raw - return [b[k * i: k * (i + 1)].T for i in range(self._p)] - - @cache_readonly - def _lagged_data(self): - return dict([(i, self._data.shift(i)) - for i in range(1, 1 + self._p)]) - - @cache_readonly - def _nobs(self): - return len(self._data) - self._p - - def _psi(self, h): - """ - psi value used for calculating standard error. - - Returns [psi_0, psi_1, ..., psi_(h - 1)] - """ - k = self._k - result = [np.eye(k)] - for i in range(1, h): - result.append(sum( - [np.dot(result[i - j], self._lag_betas[j - 1]) - for j in range(1, 1 + i) - if j <= self._p])) - - return result - - @cache_readonly - def _resid_raw(self): - resid = np.array([self.ols_results[col]._resid_raw - for col in self._columns]) - return resid - - @cache_readonly - def _rss(self): - """Returns the sum of the squares of the residuals.""" - return (self._resid_raw ** 2).sum() - - @cache_readonly - def _sigma(self): - """Returns covariance of resids.""" - k = self._k - n = self._nobs - - resid = self._resid_raw - - return np.dot(resid, resid.T) / (n - k) - - def __unicode__(self): - return self.summary - - -def lag_select(data, max_lags=5, ic=None): - """ - Select number of lags based on a variety of information criteria - - Parameters - ---------- - data : DataFrame-like - max_lags : int - Maximum number of lags to evaluate - ic : {None, 'aic', 'bic', ...} - Choosing None will just display the results - - Returns - ------- - None - """ - pass - - -class PanelVAR(VAR): - """ - Performs Vector Autoregression on panel data. - - Parameters - ---------- - data: Panel or dict of DataFrame - lags: int - """ - - def __init__(self, data, lags, intercept=True): - self._data = _prep_panel_data(data) - self._p = lags - self._intercept = intercept - - self._columns = self._data.items - - @cache_readonly - def _nobs(self): - """Returns the number of observations.""" - _, timesteps, entities = self._data.values.shape - return (timesteps - self._p) * entities - - @cache_readonly - def _rss(self): - """Returns the sum of the squares of the residuals.""" - return (self.resid.values ** 2).sum() - - def forecast(self, h): - """ - Returns the forecasts at 1, 2, ..., n timesteps in the future. - """ - forecast = self._forecast_raw(h).T.swapaxes(1, 2) - index = lrange(1, 1 + h) - w = Panel(forecast, items=self._data.items, major_axis=index, - minor_axis=self._data.minor_axis) - return w - - @cache_readonly - def resid(self): - """ - Returns the DataFrame containing the residuals of the VAR regressions. - Each column x1 contains the residuals generated by regressing the x1 - column of the input against the lagged input. - - Returns - ------- - DataFrame - """ - d = dict([(key, value.resid) - for (key, value) in compat.iteritems(self.ols_results)]) - return Panel.fromDict(d) - - def _data_xs(self, i): - return self._data.values[:, i, :].T - - @cache_readonly - def _sigma(self): - """Returns covariance of resids.""" - k = self._k - resid = _drop_incomplete_rows(self.resid.toLong().values) - n = len(resid) - return np.dot(resid.T, resid) / (n - k) - - -def _prep_panel_data(data): - """Converts the given data into a Panel.""" - if isinstance(data, Panel): - return data - - return Panel.fromDict(data) - - -def _drop_incomplete_rows(array): - mask = np.isfinite(array).all(1) - indices = np.arange(len(array))[mask] - return array.take(indices, 0) - - -def _make_param_name(lag, name): - return 'L%d.%s' % (lag, name) - - -def chain_dot(*matrices): - """ - Returns the dot product of the given matrices. - - Parameters - ---------- - matrices: argument list of ndarray - """ - return reduce(lambda x, y: np.dot(y, x), matrices[::-1]) diff --git a/pandas/util/print_versions.py b/pandas/util/print_versions.py index c7b9f4bdea6b2..c3962ad9c823c 100644 --- a/pandas/util/print_versions.py +++ b/pandas/util/print_versions.py @@ -69,7 +69,6 @@ def show_versions(as_json=False): ("Cython", lambda mod: mod.__version__), ("numpy", lambda mod: mod.version.version), ("scipy", lambda mod: mod.version.version), - ("statsmodels", lambda mod: mod.__version__), ("xarray", lambda mod: mod.__version__), ("IPython", lambda mod: mod.__version__), ("sphinx", lambda mod: mod.__version__), diff --git a/setup.py b/setup.py index 3c2617da18eae..c3cb56f2d6d1b 100755 --- a/setup.py +++ b/setup.py @@ -660,7 +660,6 @@ def pxd(name): 'pandas.io.tests.json', 'pandas.io.tests.parser', 'pandas.io.tests.sas', - 'pandas.stats.tests', 'pandas.msgpack', 'pandas.util.clipboard' ], From 3c9fec39d502cf7a24d4a9e16e3c5733560dc05c Mon Sep 17 00:00:00 2001 From: Stephen Rauch Date: Thu, 9 Feb 2017 12:04:19 -0500 Subject: [PATCH 031/353] BUG: Multiline Eval broken for local variables after first line Also fixes the code which attempted to ignore any blank lines in the multiline expression. closes #15342 Author: Stephen Rauch Closes #15343 from stephenrauch/multi-line-eval-with-local and squashes the following commits: fe67ede [Stephen Rauch] BUG: GH15342 - Multiline Eval broken for local variables after first line --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/computation/eval.py | 5 ++--- pandas/computation/tests/test_eval.py | 19 +++++++++++++++---- 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 3fb6f7b0b9a91..e765cdef4d219 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -517,3 +517,4 @@ Bug Fixes - Bug in ``DataFrame.boxplot`` where ``fontsize`` was not applied to the tick labels on both axes (:issue:`15108`) - Bug in ``Series.replace`` and ``DataFrame.replace`` which failed on empty replacement dicts (:issue:`15289`) +- Bug in ``.eval()`` which caused multiline evals to fail with local variables not on the first line (:issue:`15342`) diff --git a/pandas/computation/eval.py b/pandas/computation/eval.py index a0a08e4a968cc..5b21c753a71da 100644 --- a/pandas/computation/eval.py +++ b/pandas/computation/eval.py @@ -236,7 +236,7 @@ def eval(expr, parser='pandas', engine=None, truediv=True, first_expr = True if isinstance(expr, string_types): _check_expression(expr) - exprs = [e for e in expr.splitlines() if e != ''] + exprs = [e.strip() for e in expr.splitlines() if e.strip() != ''] else: exprs = [expr] multi_line = len(exprs) > 1 @@ -254,8 +254,7 @@ def eval(expr, parser='pandas', engine=None, truediv=True, _check_for_locals(expr, level, parser) # get our (possibly passed-in) scope - level += 1 - env = _ensure_scope(level, global_dict=global_dict, + env = _ensure_scope(level + 1, global_dict=global_dict, local_dict=local_dict, resolvers=resolvers, target=target) diff --git a/pandas/computation/tests/test_eval.py b/pandas/computation/tests/test_eval.py index aa05626af9175..a4bb81ce7263c 100644 --- a/pandas/computation/tests/test_eval.py +++ b/pandas/computation/tests/test_eval.py @@ -1274,7 +1274,6 @@ def test_assignment_fails(self): local_dict={'df': df, 'df2': df2}) def test_assignment_column(self): - tm.skip_if_no_ne('numexpr') df = DataFrame(np.random.randn(5, 2), columns=list('ab')) orig_df = df.copy() @@ -1346,7 +1345,6 @@ def test_column_in(self): def assignment_not_inplace(self): # GH 9297 - tm.skip_if_no_ne('numexpr') df = DataFrame(np.random.randn(5, 2), columns=list('ab')) actual = df.eval('c = a + b', inplace=False) @@ -1365,7 +1363,6 @@ def assignment_not_inplace(self): def test_multi_line_expression(self): # GH 11149 - tm.skip_if_no_ne('numexpr') df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}) expected = df.copy() @@ -1393,7 +1390,6 @@ def test_multi_line_expression(self): def test_multi_line_expression_not_inplace(self): # GH 11149 - tm.skip_if_no_ne('numexpr') df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}) expected = df.copy() @@ -1411,6 +1407,21 @@ def test_multi_line_expression_not_inplace(self): e = a + 2""", inplace=False) assert_frame_equal(expected, df) + def test_multi_line_expression_local_variable(self): + # GH 15342 + df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}) + expected = df.copy() + + local_var = 7 + expected['c'] = expected['a'] * local_var + expected['d'] = expected['c'] + local_var + ans = df.eval(""" + c = a * @local_var + d = c + @local_var + """, inplace=True) + assert_frame_equal(expected, df) + self.assertIsNone(ans) + def test_assignment_in_query(self): # GH 8664 df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}) From c23b1a4c8cb4ac87c9e71703285393e5904e2a8a Mon Sep 17 00:00:00 2001 From: Piotr Chromiec Date: Thu, 9 Feb 2017 12:08:02 -0500 Subject: [PATCH 032/353] BUG: fix read_gbq lost precision for longs above 2^53 and floats above 10k closes #14020 closes #14305 Author: Piotr Chromiec Closes #14064 from tworec/read_gbq_full_long_support and squashes the following commits: 788ccee [Piotr Chromiec] BUG: fix read_gbq lost numeric precision --- doc/source/install.rst | 13 +- doc/source/io.rst | 61 +++++-- doc/source/whatsnew/v0.20.0.txt | 5 +- pandas/io/gbq.py | 24 +-- pandas/io/tests/test_gbq.py | 288 +++++++++++++++++++++----------- 5 files changed, 263 insertions(+), 128 deletions(-) diff --git a/doc/source/install.rst b/doc/source/install.rst index 158a6e5562b7a..4b3ea19624a0e 100644 --- a/doc/source/install.rst +++ b/doc/source/install.rst @@ -250,9 +250,9 @@ Optional Dependencies * `Feather Format `__: necessary for feather-based storage, version 0.3.1 or higher. * `SQLAlchemy `__: for SQL database support. Version 0.8.1 or higher recommended. Besides SQLAlchemy, you also need a database specific driver. You can find an overview of supported drivers for each SQL dialect in the `SQLAlchemy docs `__. Some common drivers are: - - `psycopg2 `__: for PostgreSQL - - `pymysql `__: for MySQL. - - `SQLite `__: for SQLite, this is included in Python's standard library by default. + * `psycopg2 `__: for PostgreSQL + * `pymysql `__: for MySQL. + * `SQLite `__: for SQLite, this is included in Python's standard library by default. * `matplotlib `__: for plotting * For Excel I/O: @@ -272,11 +272,8 @@ Optional Dependencies `__, or `xclip `__: necessary to use :func:`~pandas.read_clipboard`. Most package managers on Linux distributions will have ``xclip`` and/or ``xsel`` immediately available for installation. -* Google's `python-gflags <`__ , - `oauth2client `__ , - `httplib2 `__ - and `google-api-python-client `__ - : Needed for :mod:`~pandas.io.gbq` +* For Google BigQuery I/O - see :ref:`here `. + * `Backports.lzma `__: Only for Python 2, for writing to and/or reading from an xz compressed DataFrame in CSV; Python 3 support is built into the standard library. * One of the following combinations of libraries is needed to use the top-level :func:`~pandas.read_html` function: diff --git a/doc/source/io.rst b/doc/source/io.rst index 4c78758a0e2d2..22eac33a715ba 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -39,7 +39,7 @@ object. * :ref:`read_json` * :ref:`read_msgpack` * :ref:`read_html` - * :ref:`read_gbq` + * :ref:`read_gbq` * :ref:`read_stata` * :ref:`read_sas` * :ref:`read_clipboard` @@ -55,7 +55,7 @@ The corresponding ``writer`` functions are object methods that are accessed like * :ref:`to_json` * :ref:`to_msgpack` * :ref:`to_html` - * :ref:`to_gbq` + * :ref:`to_gbq` * :ref:`to_stata` * :ref:`to_clipboard` * :ref:`to_pickle` @@ -4648,16 +4648,11 @@ DataFrame with a shape and data types derived from the source table. Additionally, DataFrames can be inserted into new BigQuery tables or appended to existing tables. -You will need to install some additional dependencies: - -- Google's `python-gflags `__ -- `httplib2 `__ -- `google-api-python-client `__ - .. warning:: To use this module, you will need a valid BigQuery account. Refer to the - `BigQuery Documentation `__ for details on the service itself. + `BigQuery Documentation `__ + for details on the service itself. The key functions are: @@ -4671,7 +4666,44 @@ The key functions are: .. currentmodule:: pandas -.. _io.bigquery_reader: + +Supported Data Types +++++++++++++++++++++ + +Pandas supports all these `BigQuery data types `__: +``STRING``, ``INTEGER`` (64bit), ``FLOAT`` (64 bit), ``BOOLEAN`` and +``TIMESTAMP`` (microsecond precision). Data types ``BYTES`` and ``RECORD`` +are not supported. + +Integer and boolean ``NA`` handling ++++++++++++++++++++++++++++++++++++ + +.. versionadded:: 0.20 + +Since all columns in BigQuery queries are nullable, and NumPy lacks of ``NA`` +support for integer and boolean types, this module will store ``INTEGER`` or +``BOOLEAN`` columns with at least one ``NULL`` value as ``dtype=object``. +Otherwise those columns will be stored as ``dtype=int64`` or ``dtype=bool`` +respectively. + +This is opposite to default pandas behaviour which will promote integer +type to float in order to store NAs. See the :ref:`gotchas` +for detailed explaination. + +While this trade-off works well for most cases, it breaks down for storing +values greater than 2**53. Such values in BigQuery can represent identifiers +and unnoticed precision lost for identifier is what we want to avoid. + +.. _io.bigquery_deps: + +Dependencies +++++++++++++ + +This module requires following additional dependencies: + +- `httplib2 `__: HTTP client +- `google-api-python-client `__: Google's API client +- `oauth2client `__: authentication and authorization for Google's API .. _io.bigquery_authentication: @@ -4686,7 +4718,7 @@ Is possible to authenticate with either user account credentials or service acco Authenticating with user account credentials is as simple as following the prompts in a browser window which will be automatically opened for you. You will be authenticated to the specified ``BigQuery`` account using the product name ``pandas GBQ``. It is only possible on local host. -The remote authentication using user account credentials is not currently supported in Pandas. +The remote authentication using user account credentials is not currently supported in pandas. Additional information on the authentication mechanism can be found `here `__. @@ -4695,8 +4727,6 @@ is particularly useful when working on remote servers (eg. jupyter iPython noteb Additional information on service accounts can be found `here `__. -You will need to install an additional dependency: `oauth2client `__. - Authentication via ``application default credentials`` is also possible. This is only valid if the parameter ``private_key`` is not provided. This method also requires that the credentials can be fetched from the environment the code is running in. @@ -4716,6 +4746,7 @@ Additional information on A private key can be obtained from the Google developers console by clicking `here `__. Use JSON key type. +.. _io.bigquery_reader: Querying '''''''' @@ -4775,7 +4806,6 @@ For more information about query configuration parameters see .. _io.bigquery_writer: - Writing DataFrames '''''''''''''''''' @@ -4865,6 +4895,8 @@ For example: often as the service seems to be changing and evolving. BiqQuery is best for analyzing large sets of data quickly, but it is not a direct replacement for a transactional database. +.. _io.bigquery_create_tables: + Creating BigQuery Tables '''''''''''''''''''''''' @@ -4894,6 +4926,7 @@ produce the dictionary representation schema of the specified pandas DataFrame. the new table with a different name. Refer to `Google BigQuery issue 191 `__. + .. _io.stata: Stata Format diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index e765cdef4d219..9eae2b7a33923 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -369,7 +369,9 @@ Other API Changes - ``pd.read_csv()`` will now raise a ``ValueError`` for the C engine if the quote character is larger than than one byte (:issue:`11592`) - ``inplace`` arguments now require a boolean value, else a ``ValueError`` is thrown (:issue:`14189`) - ``pandas.api.types.is_datetime64_ns_dtype`` will now report ``True`` on a tz-aware dtype, similar to ``pandas.api.types.is_datetime64_any_dtype`` - - ``DataFrame.asof()`` will return a null filled ``Series`` instead the scalar ``NaN`` if a match is not found (:issue:`15118`) +- ``DataFrame.asof()`` will return a null filled ``Series`` instead the scalar ``NaN`` if a match is not found (:issue:`15118`) +- The :func:`pd.read_gbq` method now stores ``INTEGER`` columns as ``dtype=object`` if they contain ``NULL`` values. Otherwise they are stored as ``int64``. This prevents precision lost for integers greather than 2**53. Furthermore ``FLOAT`` columns with values above 10**4 are no more casted to ``int64`` which also caused precision lost (:issue: `14064`, :issue:`14305`). + .. _whatsnew_0200.deprecations: Deprecations @@ -439,6 +441,7 @@ Bug Fixes - Bug in ``DataFrame.loc`` with indexing a ``MultiIndex`` with a ``Series`` indexer (:issue:`14730`) + - Bug in ``pd.read_msgpack()`` in which ``Series`` categoricals were being improperly processed (:issue:`14901`) - Bug in ``Series.ffill()`` with mixed dtypes containing tz-aware datetimes. (:issue:`14956`) diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index 966f53e9d75ef..76c228418a616 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -603,18 +603,14 @@ def _parse_data(schema, rows): # see: # http://pandas.pydata.org/pandas-docs/dev/missing_data.html # #missing-data-casting-rules-and-indexing - dtype_map = {'INTEGER': np.dtype(float), - 'FLOAT': np.dtype(float), - # This seems to be buggy without nanosecond indicator + dtype_map = {'FLOAT': np.dtype(float), 'TIMESTAMP': 'M8[ns]'} fields = schema['fields'] col_types = [field['type'] for field in fields] col_names = [str(field['name']) for field in fields] col_dtypes = [dtype_map.get(field['type'], object) for field in fields] - page_array = np.zeros((len(rows),), - dtype=lzip(col_names, col_dtypes)) - + page_array = np.zeros((len(rows),), dtype=lzip(col_names, col_dtypes)) for row_num, raw_row in enumerate(rows): entries = raw_row.get('f', []) for col_num, field_type in enumerate(col_types): @@ -628,7 +624,9 @@ def _parse_data(schema, rows): def _parse_entry(field_value, field_type): if field_value is None or field_value == 'null': return None - if field_type == 'INTEGER' or field_type == 'FLOAT': + if field_type == 'INTEGER': + return int(field_value) + elif field_type == 'FLOAT': return float(field_value) elif field_type == 'TIMESTAMP': timestamp = datetime.utcfromtimestamp(float(field_value)) @@ -757,10 +755,14 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, 'Column order does not match this DataFrame.' ) - # Downcast floats to integers and objects to booleans - # if there are no NaN's. This is presently due to a - # limitation of numpy in handling missing data. - final_df._data = final_df._data.downcast(dtypes='infer') + # cast BOOLEAN and INTEGER columns from object to bool/int + # if they dont have any nulls + type_map = {'BOOLEAN': bool, 'INTEGER': int} + for field in schema['fields']: + if field['type'] in type_map and \ + final_df[field['name']].notnull().all(): + final_df[field['name']] = \ + final_df[field['name']].astype(type_map[field['type']]) connector.print_elapsed_seconds( 'Total time taken', diff --git a/pandas/io/tests/test_gbq.py b/pandas/io/tests/test_gbq.py index 457e2d218cb33..1157482d7ae67 100644 --- a/pandas/io/tests/test_gbq.py +++ b/pandas/io/tests/test_gbq.py @@ -46,6 +46,11 @@ def _skip_if_no_project_id(): "Cannot run integration tests without a project id") +def _skip_local_auth_if_in_travis_env(): + if _in_travis_environment(): + raise nose.SkipTest("Cannot run local auth in travis environment") + + def _skip_if_no_private_key_path(): if not _get_private_key_path(): raise nose.SkipTest("Cannot run integration tests without a " @@ -248,14 +253,14 @@ def test_generate_bq_schema_deprecated(): gbq.generate_bq_schema(df) -class TestGBQConnectorIntegration(tm.TestCase): +class TestGBQConnectorIntegrationWithLocalUserAccountAuth(tm.TestCase): def setUp(self): _setup_common() _skip_if_no_project_id() + _skip_local_auth_if_in_travis_env() - self.sut = gbq.GbqConnector(_get_project_id(), - private_key=_get_private_key_path()) + self.sut = gbq.GbqConnector(_get_project_id()) def test_should_be_able_to_make_a_connector(self): self.assertTrue(self.sut is not None, @@ -293,8 +298,7 @@ def test_get_application_default_credentials_returns_credentials(self): self.assertTrue(isinstance(credentials, GoogleCredentials)) -class TestGBQConnectorServiceAccountKeyPathIntegration(tm.TestCase): - +class TestGBQConnectorIntegrationWithServiceAccountKeyPath(tm.TestCase): def setUp(self): _setup_common() @@ -325,16 +329,15 @@ def test_should_be_able_to_get_results_from_query(self): self.assertTrue(pages is not None) -class TestGBQConnectorServiceAccountKeyContentsIntegration(tm.TestCase): - +class TestGBQConnectorIntegrationWithServiceAccountKeyContents(tm.TestCase): def setUp(self): _setup_common() _skip_if_no_project_id() - _skip_if_no_private_key_path() + _skip_if_no_private_key_contents() self.sut = gbq.GbqConnector(_get_project_id(), - private_key=_get_private_key_path()) + private_key=_get_private_key_contents()) def test_should_be_able_to_make_a_connector(self): self.assertTrue(self.sut is not None, @@ -373,9 +376,9 @@ def test_import_google_api_python_client(self): from googleapiclient.discovery import build # noqa from googleapiclient.errors import HttpError # noqa - def test_should_return_bigquery_integers_as_python_floats(self): + def test_should_return_bigquery_integers_as_python_ints(self): result = gbq._parse_entry(1, 'INTEGER') - tm.assert_equal(result, float(1)) + tm.assert_equal(result, int(1)) def test_should_return_bigquery_floats_as_python_floats(self): result = gbq._parse_entry(1, 'FLOAT') @@ -403,15 +406,15 @@ def test_to_gbq_with_no_project_id_given_should_fail(self): def test_read_gbq_with_no_project_id_given_should_fail(self): with tm.assertRaises(TypeError): - gbq.read_gbq('SELECT "1" as NUMBER_1') + gbq.read_gbq('SELECT 1') def test_that_parse_data_works_properly(self): test_schema = {'fields': [ - {'mode': 'NULLABLE', 'name': 'VALID_STRING', 'type': 'STRING'}]} + {'mode': 'NULLABLE', 'name': 'valid_string', 'type': 'STRING'}]} test_page = [{'f': [{'v': 'PI'}]}] test_output = gbq._parse_data(test_schema, test_page) - correct_output = DataFrame({'VALID_STRING': ['PI']}) + correct_output = DataFrame({'valid_string': ['PI']}) tm.assert_frame_equal(test_output, correct_output) def test_read_gbq_with_invalid_private_key_json_should_fail(self): @@ -435,12 +438,12 @@ def test_read_gbq_with_empty_private_key_file_should_fail(self): private_key=empty_file_path) def test_read_gbq_with_corrupted_private_key_json_should_fail(self): - _skip_if_no_private_key_path() + _skip_if_no_private_key_contents() with tm.assertRaises(gbq.InvalidPrivateKeyFormat): gbq.read_gbq( 'SELECT 1', project_id='x', - private_key=re.sub('[a-z]', '9', _get_private_key_path())) + private_key=re.sub('[a-z]', '9', _get_private_key_contents())) class TestReadGBQIntegration(tm.TestCase): @@ -475,112 +478,207 @@ def tearDown(self): pass def test_should_read_as_user_account(self): - if _in_travis_environment(): - raise nose.SkipTest("Cannot run local auth in travis environment") + _skip_local_auth_if_in_travis_env() - query = 'SELECT "PI" as VALID_STRING' + query = 'SELECT "PI" AS valid_string' df = gbq.read_gbq(query, project_id=_get_project_id()) - tm.assert_frame_equal(df, DataFrame({'VALID_STRING': ['PI']})) + tm.assert_frame_equal(df, DataFrame({'valid_string': ['PI']})) def test_should_read_as_service_account_with_key_path(self): _skip_if_no_private_key_path() - query = 'SELECT "PI" as VALID_STRING' + query = 'SELECT "PI" AS valid_string' df = gbq.read_gbq(query, project_id=_get_project_id(), private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({'VALID_STRING': ['PI']})) + tm.assert_frame_equal(df, DataFrame({'valid_string': ['PI']})) def test_should_read_as_service_account_with_key_contents(self): _skip_if_no_private_key_contents() - query = 'SELECT "PI" as VALID_STRING' + query = 'SELECT "PI" AS valid_string' df = gbq.read_gbq(query, project_id=_get_project_id(), private_key=_get_private_key_contents()) - tm.assert_frame_equal(df, DataFrame({'VALID_STRING': ['PI']})) + tm.assert_frame_equal(df, DataFrame({'valid_string': ['PI']})) + + +class TestReadGBQIntegrationWithServiceAccountKeyPath(tm.TestCase): + + @classmethod + def setUpClass(cls): + # - GLOBAL CLASS FIXTURES - + # put here any instruction you want to execute only *ONCE* *BEFORE* + # executing *ALL* tests described below. + + _skip_if_no_project_id() + _skip_if_no_private_key_path() + + _setup_common() + + def setUp(self): + # - PER-TEST FIXTURES - + # put here any instruction you want to be run *BEFORE* *EVERY* test is + # executed. + pass + + @classmethod + def tearDownClass(cls): + # - GLOBAL CLASS FIXTURES - + # put here any instruction you want to execute only *ONCE* *AFTER* + # executing all tests. + pass + + def tearDown(self): + # - PER-TEST FIXTURES - + # put here any instructions you want to be run *AFTER* *EVERY* test is + # executed. + pass def test_should_properly_handle_valid_strings(self): - query = 'SELECT "PI" as VALID_STRING' + query = 'SELECT "PI" AS valid_string' df = gbq.read_gbq(query, project_id=_get_project_id(), private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({'VALID_STRING': ['PI']})) + tm.assert_frame_equal(df, DataFrame({'valid_string': ['PI']})) def test_should_properly_handle_empty_strings(self): - query = 'SELECT "" as EMPTY_STRING' + query = 'SELECT "" AS empty_string' df = gbq.read_gbq(query, project_id=_get_project_id(), private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({'EMPTY_STRING': [""]})) + tm.assert_frame_equal(df, DataFrame({'empty_string': [""]})) def test_should_properly_handle_null_strings(self): - query = 'SELECT STRING(NULL) as NULL_STRING' + query = 'SELECT STRING(NULL) AS null_string' df = gbq.read_gbq(query, project_id=_get_project_id(), private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({'NULL_STRING': [None]})) + tm.assert_frame_equal(df, DataFrame({'null_string': [None]})) def test_should_properly_handle_valid_integers(self): - query = 'SELECT INTEGER(3) as VALID_INTEGER' + query = 'SELECT INTEGER(3) AS valid_integer' + df = gbq.read_gbq(query, project_id=_get_project_id(), + private_key=_get_private_key_path()) + tm.assert_frame_equal(df, DataFrame({'valid_integer': [3]})) + + def test_should_properly_handle_nullable_integers(self): + query = '''SELECT * FROM + (SELECT 1 AS nullable_integer), + (SELECT NULL AS nullable_integer)''' df = gbq.read_gbq(query, project_id=_get_project_id(), private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({'VALID_INTEGER': [3]})) + tm.assert_frame_equal( + df, DataFrame({'nullable_integer': [1, None]}).astype(object)) + + def test_should_properly_handle_valid_longs(self): + query = 'SELECT 1 << 62 AS valid_long' + df = gbq.read_gbq(query, project_id=_get_project_id(), + private_key=_get_private_key_path()) + tm.assert_frame_equal( + df, DataFrame({'valid_long': [1 << 62]})) + + def test_should_properly_handle_nullable_longs(self): + query = '''SELECT * FROM + (SELECT 1 << 62 AS nullable_long), + (SELECT NULL AS nullable_long)''' + df = gbq.read_gbq(query, project_id=_get_project_id(), + private_key=_get_private_key_path()) + tm.assert_frame_equal( + df, DataFrame({'nullable_long': [1 << 62, None]}).astype(object)) def test_should_properly_handle_null_integers(self): - query = 'SELECT INTEGER(NULL) as NULL_INTEGER' + query = 'SELECT INTEGER(NULL) AS null_integer' df = gbq.read_gbq(query, project_id=_get_project_id(), private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({'NULL_INTEGER': [np.nan]})) + tm.assert_frame_equal(df, DataFrame({'null_integer': [None]})) def test_should_properly_handle_valid_floats(self): - query = 'SELECT PI() as VALID_FLOAT' + from math import pi + query = 'SELECT PI() AS valid_float' + df = gbq.read_gbq(query, project_id=_get_project_id(), + private_key=_get_private_key_path()) + tm.assert_frame_equal(df, DataFrame( + {'valid_float': [pi]})) + + def test_should_properly_handle_nullable_floats(self): + from math import pi + query = '''SELECT * FROM + (SELECT PI() AS nullable_float), + (SELECT NULL AS nullable_float)''' + df = gbq.read_gbq(query, project_id=_get_project_id(), + private_key=_get_private_key_path()) + tm.assert_frame_equal( + df, DataFrame({'nullable_float': [pi, None]})) + + def test_should_properly_handle_valid_doubles(self): + from math import pi + query = 'SELECT PI() * POW(10, 307) AS valid_double' df = gbq.read_gbq(query, project_id=_get_project_id(), private_key=_get_private_key_path()) tm.assert_frame_equal(df, DataFrame( - {'VALID_FLOAT': [3.141592653589793]})) + {'valid_double': [pi * 10 ** 307]})) + + def test_should_properly_handle_nullable_doubles(self): + from math import pi + query = '''SELECT * FROM + (SELECT PI() * POW(10, 307) AS nullable_double), + (SELECT NULL AS nullable_double)''' + df = gbq.read_gbq(query, project_id=_get_project_id(), + private_key=_get_private_key_path()) + tm.assert_frame_equal( + df, DataFrame({'nullable_double': [pi * 10 ** 307, None]})) def test_should_properly_handle_null_floats(self): - query = 'SELECT FLOAT(NULL) as NULL_FLOAT' + query = 'SELECT FLOAT(NULL) AS null_float' df = gbq.read_gbq(query, project_id=_get_project_id(), private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({'NULL_FLOAT': [np.nan]})) + tm.assert_frame_equal(df, DataFrame({'null_float': [np.nan]})) def test_should_properly_handle_timestamp_unix_epoch(self): - query = 'SELECT TIMESTAMP("1970-01-01 00:00:00") as UNIX_EPOCH' + query = 'SELECT TIMESTAMP("1970-01-01 00:00:00") AS unix_epoch' df = gbq.read_gbq(query, project_id=_get_project_id(), private_key=_get_private_key_path()) tm.assert_frame_equal(df, DataFrame( - {'UNIX_EPOCH': [np.datetime64('1970-01-01T00:00:00.000000Z')]})) + {'unix_epoch': [np.datetime64('1970-01-01T00:00:00.000000Z')]})) def test_should_properly_handle_arbitrary_timestamp(self): - query = 'SELECT TIMESTAMP("2004-09-15 05:00:00") as VALID_TIMESTAMP' + query = 'SELECT TIMESTAMP("2004-09-15 05:00:00") AS valid_timestamp' df = gbq.read_gbq(query, project_id=_get_project_id(), private_key=_get_private_key_path()) tm.assert_frame_equal(df, DataFrame({ - 'VALID_TIMESTAMP': [np.datetime64('2004-09-15T05:00:00.000000Z')] + 'valid_timestamp': [np.datetime64('2004-09-15T05:00:00.000000Z')] })) def test_should_properly_handle_null_timestamp(self): - query = 'SELECT TIMESTAMP(NULL) as NULL_TIMESTAMP' + query = 'SELECT TIMESTAMP(NULL) AS null_timestamp' df = gbq.read_gbq(query, project_id=_get_project_id(), private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({'NULL_TIMESTAMP': [NaT]})) + tm.assert_frame_equal(df, DataFrame({'null_timestamp': [NaT]})) def test_should_properly_handle_true_boolean(self): - query = 'SELECT BOOLEAN(TRUE) as TRUE_BOOLEAN' + query = 'SELECT BOOLEAN(TRUE) AS true_boolean' df = gbq.read_gbq(query, project_id=_get_project_id(), private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({'TRUE_BOOLEAN': [True]})) + tm.assert_frame_equal(df, DataFrame({'true_boolean': [True]})) def test_should_properly_handle_false_boolean(self): - query = 'SELECT BOOLEAN(FALSE) as FALSE_BOOLEAN' + query = 'SELECT BOOLEAN(FALSE) AS false_boolean' df = gbq.read_gbq(query, project_id=_get_project_id(), private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({'FALSE_BOOLEAN': [False]})) + tm.assert_frame_equal(df, DataFrame({'false_boolean': [False]})) def test_should_properly_handle_null_boolean(self): - query = 'SELECT BOOLEAN(NULL) as NULL_BOOLEAN' + query = 'SELECT BOOLEAN(NULL) AS null_boolean' + df = gbq.read_gbq(query, project_id=_get_project_id(), + private_key=_get_private_key_path()) + tm.assert_frame_equal(df, DataFrame({'null_boolean': [None]})) + + def test_should_properly_handle_nullable_booleans(self): + query = '''SELECT * FROM + (SELECT BOOLEAN(TRUE) AS nullable_boolean), + (SELECT NULL AS nullable_boolean)''' df = gbq.read_gbq(query, project_id=_get_project_id(), private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({'NULL_BOOLEAN': [None]})) + tm.assert_frame_equal( + df, DataFrame({'nullable_boolean': [True, None]}).astype(object)) def test_unicode_string_conversion_and_normalization(self): correct_test_datatype = DataFrame( - {'UNICODE_STRING': [u("\xe9\xfc")]} + {'unicode_string': [u("\xe9\xfc")]} ) unicode_string = "\xc3\xa9\xc3\xbc" @@ -588,40 +686,40 @@ def test_unicode_string_conversion_and_normalization(self): if compat.PY3: unicode_string = unicode_string.encode('latin-1').decode('utf8') - query = 'SELECT "{0}" as UNICODE_STRING'.format(unicode_string) + query = 'SELECT "{0}" AS unicode_string'.format(unicode_string) df = gbq.read_gbq(query, project_id=_get_project_id(), private_key=_get_private_key_path()) tm.assert_frame_equal(df, correct_test_datatype) def test_index_column(self): - query = "SELECT 'a' as STRING_1, 'b' as STRING_2" + query = "SELECT 'a' AS string_1, 'b' AS string_2" result_frame = gbq.read_gbq(query, project_id=_get_project_id(), - index_col="STRING_1", + index_col="string_1", private_key=_get_private_key_path()) correct_frame = DataFrame( - {'STRING_1': ['a'], 'STRING_2': ['b']}).set_index("STRING_1") + {'string_1': ['a'], 'string_2': ['b']}).set_index("string_1") tm.assert_equal(result_frame.index.name, correct_frame.index.name) def test_column_order(self): - query = "SELECT 'a' as STRING_1, 'b' as STRING_2, 'c' as STRING_3" - col_order = ['STRING_3', 'STRING_1', 'STRING_2'] + query = "SELECT 'a' AS string_1, 'b' AS string_2, 'c' AS string_3" + col_order = ['string_3', 'string_1', 'string_2'] result_frame = gbq.read_gbq(query, project_id=_get_project_id(), col_order=col_order, private_key=_get_private_key_path()) - correct_frame = DataFrame({'STRING_1': ['a'], 'STRING_2': [ - 'b'], 'STRING_3': ['c']})[col_order] + correct_frame = DataFrame({'string_1': ['a'], 'string_2': [ + 'b'], 'string_3': ['c']})[col_order] tm.assert_frame_equal(result_frame, correct_frame) def test_column_order_plus_index(self): - query = "SELECT 'a' as STRING_1, 'b' as STRING_2, 'c' as STRING_3" - col_order = ['STRING_3', 'STRING_2'] + query = "SELECT 'a' AS string_1, 'b' AS string_2, 'c' AS string_3" + col_order = ['string_3', 'string_2'] result_frame = gbq.read_gbq(query, project_id=_get_project_id(), - index_col='STRING_1', col_order=col_order, + index_col='string_1', col_order=col_order, private_key=_get_private_key_path()) correct_frame = DataFrame( - {'STRING_1': ['a'], 'STRING_2': ['b'], 'STRING_3': ['c']}) - correct_frame.set_index('STRING_1', inplace=True) + {'string_1': ['a'], 'string_2': ['b'], 'string_3': ['c']}) + correct_frame.set_index('string_1', inplace=True) correct_frame = correct_frame[col_order] tm.assert_frame_equal(result_frame, correct_frame) @@ -655,14 +753,17 @@ def test_download_dataset_larger_than_200k_rows(self): def test_zero_rows(self): # Bug fix for https://github.com/pandas-dev/pandas/issues/10273 - df = gbq.read_gbq("SELECT title, id " + df = gbq.read_gbq("SELECT title, id, is_bot, " + "SEC_TO_TIMESTAMP(timestamp) ts " "FROM [publicdata:samples.wikipedia] " "WHERE timestamp=-9999999", project_id=_get_project_id(), private_key=_get_private_key_path()) page_array = np.zeros( - (0,), dtype=[('title', object), ('id', np.dtype(float))]) - expected_result = DataFrame(page_array, columns=['title', 'id']) + (0,), dtype=[('title', object), ('id', np.dtype(int)), + ('is_bot', np.dtype(bool)), ('ts', 'M8[ns]')]) + expected_result = DataFrame( + page_array, columns=['title', 'id', 'is_bot', 'ts']) self.assert_frame_equal(df, expected_result) def test_legacy_sql(self): @@ -715,7 +816,7 @@ def test_invalid_option_for_sql_dialect(self): dialect='standard', private_key=_get_private_key_path()) def test_query_with_parameters(self): - sql_statement = "SELECT @param1 + @param2 as VALID_RESULT" + sql_statement = "SELECT @param1 + @param2 AS valid_result" config = { 'query': { "useLegacySql": False, @@ -753,11 +854,11 @@ def test_query_with_parameters(self): df = gbq.read_gbq(sql_statement, project_id=_get_project_id(), private_key=_get_private_key_path(), configuration=config) - tm.assert_frame_equal(df, DataFrame({'VALID_RESULT': [3]})) + tm.assert_frame_equal(df, DataFrame({'valid_result': [3]})) def test_query_inside_configuration(self): - query_no_use = 'SELECT "PI_WRONG" as VALID_STRING' - query = 'SELECT "PI" as VALID_STRING' + query_no_use = 'SELECT "PI_WRONG" AS valid_string' + query = 'SELECT "PI" AS valid_string' config = { 'query': { "query": query, @@ -774,7 +875,7 @@ def test_query_inside_configuration(self): df = gbq.read_gbq(None, project_id=_get_project_id(), private_key=_get_private_key_path(), configuration=config) - tm.assert_frame_equal(df, DataFrame({'VALID_STRING': ['PI']})) + tm.assert_frame_equal(df, DataFrame({'valid_string': ['PI']})) def test_configuration_without_query(self): sql_statement = 'SELECT 1' @@ -800,7 +901,7 @@ def test_configuration_without_query(self): configuration=config) -class TestToGBQIntegration(tm.TestCase): +class TestToGBQIntegrationWithServiceAccountKeyPath(tm.TestCase): # Changes to BigQuery table schema may take up to 2 minutes as of May 2015 # As a workaround to this issue, each test should use a unique table name. # Make sure to modify the for loop range in the tearDownClass when a new @@ -814,6 +915,7 @@ def setUpClass(cls): # executing *ALL* tests described below. _skip_if_no_project_id() + _skip_if_no_private_key_path() _setup_common() clean_gbq_environment(_get_private_key_path()) @@ -859,11 +961,11 @@ def test_upload_data(self): sleep(30) # <- Curses Google!!! - result = gbq.read_gbq("SELECT COUNT(*) as NUM_ROWS FROM {0}" + result = gbq.read_gbq("SELECT COUNT(*) AS num_rows FROM {0}" .format(destination_table), project_id=_get_project_id(), private_key=_get_private_key_path()) - self.assertEqual(result['NUM_ROWS'][0], test_size) + self.assertEqual(result['num_rows'][0], test_size) def test_upload_data_if_table_exists_fail(self): destination_table = DESTINATION_TABLE + "2" @@ -899,11 +1001,11 @@ def test_upload_data_if_table_exists_append(self): sleep(30) # <- Curses Google!!! - result = gbq.read_gbq("SELECT COUNT(*) as NUM_ROWS FROM {0}" + result = gbq.read_gbq("SELECT COUNT(*) AS num_rows FROM {0}" .format(destination_table), project_id=_get_project_id(), private_key=_get_private_key_path()) - self.assertEqual(result['NUM_ROWS'][0], test_size * 2) + self.assertEqual(result['num_rows'][0], test_size * 2) # Try inserting with a different schema, confirm failure with tm.assertRaises(gbq.InvalidSchema): @@ -932,11 +1034,11 @@ def test_upload_data_if_table_exists_replace(self): sleep(30) # <- Curses Google!!! - result = gbq.read_gbq("SELECT COUNT(*) as NUM_ROWS FROM {0}" + result = gbq.read_gbq("SELECT COUNT(*) AS num_rows FROM {0}" .format(destination_table), project_id=_get_project_id(), private_key=_get_private_key_path()) - self.assertEqual(result['NUM_ROWS'][0], 5) + self.assertEqual(result['num_rows'][0], 5) @tm.slow def test_google_upload_errors_should_raise_exception(self): @@ -1113,7 +1215,7 @@ def test_dataset_does_not_exist(self): DATASET_ID + "_not_found"), 'Expected dataset not to exist') -class TestToGBQIntegrationServiceAccountKeyPath(tm.TestCase): +class TestToGBQIntegrationWithLocalUserAccountAuth(tm.TestCase): # Changes to BigQuery table schema may take up to 2 minutes as of May 2015 # As a workaround to this issue, each test should use a unique table name. # Make sure to modify the for loop range in the tearDownClass when a new @@ -1128,10 +1230,10 @@ def setUpClass(cls): # executing *ALL* tests described below. _skip_if_no_project_id() - _skip_if_no_private_key_path() + _skip_local_auth_if_in_travis_env() _setup_common() - clean_gbq_environment(_get_private_key_path()) + clean_gbq_environment() def setUp(self): # - PER-TEST FIXTURES - @@ -1145,7 +1247,7 @@ def tearDownClass(cls): # put here any instruction you want to execute only *ONCE* *AFTER* # executing all tests. - clean_gbq_environment(_get_private_key_path()) + clean_gbq_environment() def tearDown(self): # - PER-TEST FIXTURES - @@ -1153,26 +1255,24 @@ def tearDown(self): # is executed. pass - def test_upload_data_as_service_account_with_key_path(self): + def test_upload_data(self): destination_table = "{0}.{1}".format(DATASET_ID + "2", TABLE_ID + "1") test_size = 10 df = make_mixed_dataframe_v2(test_size) - gbq.to_gbq(df, destination_table, _get_project_id(), chunksize=10000, - private_key=_get_private_key_path()) + gbq.to_gbq(df, destination_table, _get_project_id(), chunksize=10000) sleep(30) # <- Curses Google!!! result = gbq.read_gbq( - "SELECT COUNT(*) as NUM_ROWS FROM {0}".format(destination_table), - project_id=_get_project_id(), - private_key=_get_private_key_path()) + "SELECT COUNT(*) AS num_rows FROM {0}".format(destination_table), + project_id=_get_project_id()) - self.assertEqual(result['NUM_ROWS'][0], test_size) + self.assertEqual(result['num_rows'][0], test_size) -class TestToGBQIntegrationServiceAccountKeyContents(tm.TestCase): +class TestToGBQIntegrationWithServiceAccountKeyContents(tm.TestCase): # Changes to BigQuery table schema may take up to 2 minutes as of May 2015 # As a workaround to this issue, each test should use a unique table name. # Make sure to modify the for loop range in the tearDownClass when a new @@ -1212,7 +1312,7 @@ def tearDown(self): # is executed. pass - def test_upload_data_as_service_account_with_key_contents(self): + def test_upload_data(self): destination_table = "{0}.{1}".format(DATASET_ID + "3", TABLE_ID + "1") test_size = 10 @@ -1224,7 +1324,7 @@ def test_upload_data_as_service_account_with_key_contents(self): sleep(30) # <- Curses Google!!! result = gbq.read_gbq( - "SELECT COUNT(*) as NUM_ROWS FROM {0}".format(destination_table), + "SELECT COUNT(*) AS num_rows FROM {0}".format(destination_table), project_id=_get_project_id(), private_key=_get_private_key_contents()) - self.assertEqual(result['NUM_ROWS'][0], test_size) + self.assertEqual(result['num_rows'][0], test_size) From ec9bd44c8c93f26f7ce0c7af4a0b80039df416a0 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 9 Feb 2017 17:36:19 -0500 Subject: [PATCH 033/353] CLN: strip out and form tools/concat.py from tools/merge.py will facilitate some changes in ``tools/merge`` w.r.t. #15321, plus these are independent anyhow. Author: Jeff Reback Closes #15358 from jreback/concat and squashes the following commits: ba34c51 [Jeff Reback] CLN: strip out and form tools/concat.py from tools/merge.py --- doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/__init__.py | 6 +- pandas/core/base.py | 4 +- pandas/core/categorical.py | 2 +- pandas/core/frame.py | 9 +- pandas/core/groupby.py | 12 +- pandas/core/panel.py | 2 +- pandas/core/reshape.py | 2 +- pandas/core/series.py | 2 +- pandas/formats/format.py | 4 +- pandas/io/gbq.py | 4 +- pandas/io/pytables.py | 8 +- pandas/tests/groupby/test_groupby.py | 5 +- pandas/tools/concat.py | 615 ++++++++++++++++++++++ pandas/tools/merge.py | 634 +---------------------- pandas/tools/pivot.py | 4 +- pandas/tools/plotting.py | 2 +- pandas/tools/tests/test_join.py | 3 +- pandas/tools/tests/test_merge.py | 3 +- pandas/tools/tests/test_merge_ordered.py | 2 - pandas/tools/tests/test_pivot.py | 4 +- 21 files changed, 672 insertions(+), 657 deletions(-) create mode 100644 pandas/tools/concat.py diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 9eae2b7a33923..2279d0464a5c7 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -385,7 +385,7 @@ Deprecations - ``TimedeltaIndex.searchsorted()``, ``DatetimeIndex.searchsorted()``, and ``PeriodIndex.searchsorted()`` have deprecated the ``key`` parameter in favor of ``value`` (:issue:`12662`) - ``DataFrame.astype()`` has deprecated the ``raise_on_error`` parameter in favor of ``errors`` (:issue:`14878`) - ``Series.sortlevel`` and ``DataFrame.sortlevel`` have been deprecated in favor of ``Series.sort_index`` and ``DataFrame.sort_index`` (:issue:`15099`) - +- importing ``concat`` from ``pandas.tools.merge`` has been deprecated in favor of imports from the ``pandas`` namespace. This should only affect explict imports (:issue:`15358`) .. _whatsnew_0200.prior_deprecations: diff --git a/pandas/__init__.py b/pandas/__init__.py index 9133e11beaa2b..76542db22a757 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -42,10 +42,10 @@ from pandas.sparse.api import * from pandas.stats.api import * from pandas.tseries.api import * -from pandas.io.api import * from pandas.computation.api import * -from pandas.tools.merge import (merge, concat, ordered_merge, +from pandas.tools.concat import concat +from pandas.tools.merge import (merge, ordered_merge, merge_ordered, merge_asof) from pandas.tools.pivot import pivot_table, crosstab from pandas.tools.plotting import scatter_matrix, plot_params @@ -54,6 +54,8 @@ from pandas.core.reshape import melt from pandas.util.print_versions import show_versions +from pandas.io.api import * + # define the testing framework import pandas.util.testing from pandas.util.nosetester import NoseTester diff --git a/pandas/core/base.py b/pandas/core/base.py index 657da859ddde2..92ec6bb3d73e6 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -472,7 +472,7 @@ def _aggregate(self, arg, *args, **kwargs): arg = new_arg - from pandas.tools.merge import concat + from pandas.tools.concat import concat def _agg_1dim(name, how, subset=None): """ @@ -579,7 +579,7 @@ def _agg(arg, func): return result, True def _aggregate_multiple_funcs(self, arg, _level): - from pandas.tools.merge import concat + from pandas.tools.concat import concat if self.axis != 0: raise NotImplementedError("axis other than 0 is not supported") diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 5980f872f951f..491db2e080953 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -1907,7 +1907,7 @@ def describe(self): counts = self.value_counts(dropna=False) freqs = counts / float(counts.sum()) - from pandas.tools.merge import concat + from pandas.tools.concat import concat result = concat([counts, freqs], axis=1) result.columns = ['counts', 'freqs'] result.index.name = 'categories' diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 79bdad82af5a3..aa03bfb9a54b9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4402,7 +4402,7 @@ def append(self, other, ignore_index=False, verify_integrity=False): if (self.columns.get_indexer(other.columns) >= 0).all(): other = other.loc[:, self.columns] - from pandas.tools.merge import concat + from pandas.tools.concat import concat if isinstance(other, (list, tuple)): to_concat = [self] + other else: @@ -4532,7 +4532,8 @@ def join(self, other, on=None, how='left', lsuffix='', rsuffix='', def _join_compat(self, other, on=None, how='left', lsuffix='', rsuffix='', sort=False): - from pandas.tools.merge import merge, concat + from pandas.tools.merge import merge + from pandas.tools.concat import concat if isinstance(other, Series): if other.name is None: @@ -4636,7 +4637,7 @@ def round(self, decimals=0, *args, **kwargs): Series.round """ - from pandas.tools.merge import concat + from pandas.tools.concat import concat def _dict_round(df, decimals): for col, vals in df.iteritems(): @@ -5306,7 +5307,7 @@ def isin(self, values): """ if isinstance(values, dict): from collections import defaultdict - from pandas.tools.merge import concat + from pandas.tools.concat import concat values = defaultdict(list, values) return concat((self.iloc[:, [i]].isin(values[col]) for i, col in enumerate(self.columns)), axis=1) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 99220232114ce..53b6dbe6075cf 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -854,7 +854,7 @@ def _wrap_applied_output(self, *args, **kwargs): raise AbstractMethodError(self) def _concat_objects(self, keys, values, not_indexed_same=False): - from pandas.tools.merge import concat + from pandas.tools.concat import concat def reset_identity(values): # reset the identities of the components @@ -3507,7 +3507,7 @@ def first_non_None_value(values): # still a series # path added as of GH 5545 elif all_indexed_same: - from pandas.tools.merge import concat + from pandas.tools.concat import concat return concat(values) if not all_indexed_same: @@ -3540,7 +3540,7 @@ def first_non_None_value(values): else: # GH5788 instead of stacking; concat gets the # dtypes correct - from pandas.tools.merge import concat + from pandas.tools.concat import concat result = concat(values, keys=key_index, names=key_index.names, axis=self.axis).unstack() @@ -3588,7 +3588,7 @@ def first_non_None_value(values): not_indexed_same=not_indexed_same) def _transform_general(self, func, *args, **kwargs): - from pandas.tools.merge import concat + from pandas.tools.concat import concat applied = [] obj = self._obj_with_exclusions @@ -3980,7 +3980,7 @@ def _iterate_column_groupbys(self): exclusions=self.exclusions) def _apply_to_column_groupbys(self, func): - from pandas.tools.merge import concat + from pandas.tools.concat import concat return concat( (func(col_groupby) for _, col_groupby in self._iterate_column_groupbys()), @@ -4061,7 +4061,7 @@ def groupby_series(obj, col=None): if isinstance(obj, Series): results = groupby_series(obj) else: - from pandas.tools.merge import concat + from pandas.tools.concat import concat results = [groupby_series(obj[col], col) for col in obj.columns] results = concat(results, axis=1) diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 6da10305eb4fc..4a6c6cf291316 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -1282,7 +1282,7 @@ def join(self, other, how='left', lsuffix='', rsuffix=''): ------- joined : Panel """ - from pandas.tools.merge import concat + from pandas.tools.concat import concat if isinstance(other, Panel): join_major, join_minor = self._get_join_index(other, how) diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index d6287f17c8387..bd0358abf67d5 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -1194,7 +1194,7 @@ def get_dummies(data, prefix=None, prefix_sep='_', dummy_na=False, -------- Series.str.get_dummies """ - from pandas.tools.merge import concat + from pandas.tools.concat import concat from itertools import cycle if isinstance(data, DataFrame): diff --git a/pandas/core/series.py b/pandas/core/series.py index 43f16f690692a..e1eac8f66017e 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1588,7 +1588,7 @@ def append(self, to_append, ignore_index=False, verify_integrity=False): """ - from pandas.tools.merge import concat + from pandas.tools.concat import concat if isinstance(to_append, (list, tuple)): to_concat = [self] + to_append diff --git a/pandas/formats/format.py b/pandas/formats/format.py index 439b96d650204..1a7a06199ad8a 100644 --- a/pandas/formats/format.py +++ b/pandas/formats/format.py @@ -165,7 +165,7 @@ def __init__(self, series, buf=None, length=True, header=True, index=True, self._chk_truncate() def _chk_truncate(self): - from pandas.tools.merge import concat + from pandas.tools.concat import concat max_rows = self.max_rows truncate_v = max_rows and (len(self.series) > max_rows) series = self.series @@ -406,7 +406,7 @@ def _chk_truncate(self): Checks whether the frame should be truncated. If so, slices the frame up. """ - from pandas.tools.merge import concat + from pandas.tools.concat import concat # Column of which first element is used to determine width of a dot col self.tr_size_col = -1 diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index 76c228418a616..169a2b1df9b4c 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -10,9 +10,7 @@ import numpy as np from distutils.version import StrictVersion -from pandas import compat -from pandas.core.api import DataFrame -from pandas.tools.merge import concat +from pandas import compat, DataFrame, concat from pandas.core.common import PandasError from pandas.compat import lzip, bytes_to_str diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 9f161dc5ec50e..9224f7d3d9a94 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -26,13 +26,12 @@ import pandas as pd from pandas import (Series, DataFrame, Panel, Panel4D, Index, - MultiIndex, Int64Index, isnull) + MultiIndex, Int64Index, isnull, concat, + SparseSeries, SparseDataFrame, PeriodIndex, + DatetimeIndex, TimedeltaIndex) from pandas.core import config from pandas.io.common import _stringify_path -from pandas.sparse.api import SparseSeries, SparseDataFrame from pandas.sparse.array import BlockIndex, IntIndex -from pandas.tseries.api import PeriodIndex, DatetimeIndex -from pandas.tseries.tdi import TimedeltaIndex from pandas.core.base import StringMixin from pandas.formats.printing import adjoin, pprint_thing from pandas.core.common import _asarray_tuplesafe, PerformanceWarning @@ -42,7 +41,6 @@ _block2d_to_blocknd, _factor_indexer, _block_shape) from pandas.core.index import _ensure_index -from pandas.tools.merge import concat from pandas import compat from pandas.compat import u_safe as u, PY3, range, lrange, string_types, filter from pandas.core.config import get_option diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 458e869130190..53f85349834ac 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -6,7 +6,8 @@ from numpy import nan from pandas import (date_range, bdate_range, Timestamp, - isnull, Index, MultiIndex, DataFrame, Series) + isnull, Index, MultiIndex, DataFrame, Series, + concat, Panel) from pandas.core.common import UnsupportedFunctionCall from pandas.util.testing import (assert_panel_equal, assert_frame_equal, assert_series_equal, assert_almost_equal, @@ -14,8 +15,6 @@ from pandas.compat import (range, long, lrange, StringIO, lmap, lzip, map, zip, builtins, OrderedDict, product as cart_product) from pandas import compat -from pandas.core.panel import Panel -from pandas.tools.merge import concat from collections import defaultdict import pandas.core.common as com import numpy as np diff --git a/pandas/tools/concat.py b/pandas/tools/concat.py new file mode 100644 index 0000000000000..dbbc831b19d1d --- /dev/null +++ b/pandas/tools/concat.py @@ -0,0 +1,615 @@ +""" +concat routines +""" + +import numpy as np +from pandas import compat, DataFrame, Series, Index, MultiIndex +from pandas.core.index import (_get_combined_index, + _ensure_index, _get_consensus_names, + _all_indexes_same) +from pandas.core.categorical import (_factorize_from_iterable, + _factorize_from_iterables) +from pandas.core.internals import concatenate_block_managers +from pandas.core import common as com +from pandas.core.generic import NDFrame +import pandas.types.concat as _concat + +# --------------------------------------------------------------------- +# Concatenate DataFrame objects + + +def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, + keys=None, levels=None, names=None, verify_integrity=False, + copy=True): + """ + Concatenate pandas objects along a particular axis with optional set logic + along the other axes. + + Can also add a layer of hierarchical indexing on the concatenation axis, + which may be useful if the labels are the same (or overlapping) on + the passed axis number. + + Parameters + ---------- + objs : a sequence or mapping of Series, DataFrame, or Panel objects + If a dict is passed, the sorted keys will be used as the `keys` + argument, unless it is passed, in which case the values will be + selected (see below). Any None objects will be dropped silently unless + they are all None in which case a ValueError will be raised + axis : {0/'index', 1/'columns'}, default 0 + The axis to concatenate along + join : {'inner', 'outer'}, default 'outer' + How to handle indexes on other axis(es) + join_axes : list of Index objects + Specific indexes to use for the other n - 1 axes instead of performing + inner/outer set logic + ignore_index : boolean, default False + If True, do not use the index values along the concatenation axis. The + resulting axis will be labeled 0, ..., n - 1. This is useful if you are + concatenating objects where the concatenation axis does not have + meaningful indexing information. Note the index values on the other + axes are still respected in the join. + keys : sequence, default None + If multiple levels passed, should contain tuples. Construct + hierarchical index using the passed keys as the outermost level + levels : list of sequences, default None + Specific levels (unique values) to use for constructing a + MultiIndex. Otherwise they will be inferred from the keys + names : list, default None + Names for the levels in the resulting hierarchical index + verify_integrity : boolean, default False + Check whether the new concatenated axis contains duplicates. This can + be very expensive relative to the actual data concatenation + copy : boolean, default True + If False, do not copy data unnecessarily + + Returns + ------- + concatenated : type of objects + + Notes + ----- + The keys, levels, and names arguments are all optional. + + A walkthrough of how this method fits in with other tools for combining + panda objects can be found `here + `__. + + See Also + -------- + Series.append + DataFrame.append + DataFrame.join + DataFrame.merge + + Examples + -------- + Combine two ``Series``. + + >>> s1 = pd.Series(['a', 'b']) + >>> s2 = pd.Series(['c', 'd']) + >>> pd.concat([s1, s2]) + 0 a + 1 b + 0 c + 1 d + dtype: object + + Clear the existing index and reset it in the result + by setting the ``ignore_index`` option to ``True``. + + >>> pd.concat([s1, s2], ignore_index=True) + 0 a + 1 b + 2 c + 3 d + dtype: object + + Add a hierarchical index at the outermost level of + the data with the ``keys`` option. + + >>> pd.concat([s1, s2], keys=['s1', 's2',]) + s1 0 a + 1 b + s2 0 c + 1 d + dtype: object + + Label the index keys you create with the ``names`` option. + + >>> pd.concat([s1, s2], keys=['s1', 's2'], + ... names=['Series name', 'Row ID']) + Series name Row ID + s1 0 a + 1 b + s2 0 c + 1 d + dtype: object + + Combine two ``DataFrame`` objects with identical columns. + + >>> df1 = pd.DataFrame([['a', 1], ['b', 2]], + ... columns=['letter', 'number']) + >>> df1 + letter number + 0 a 1 + 1 b 2 + >>> df2 = pd.DataFrame([['c', 3], ['d', 4]], + ... columns=['letter', 'number']) + >>> df2 + letter number + 0 c 3 + 1 d 4 + >>> pd.concat([df1, df2]) + letter number + 0 a 1 + 1 b 2 + 0 c 3 + 1 d 4 + + Combine ``DataFrame`` objects with overlapping columns + and return everything. Columns outside the intersection will + be filled with ``NaN`` values. + + >>> df3 = pd.DataFrame([['c', 3, 'cat'], ['d', 4, 'dog']], + ... columns=['letter', 'number', 'animal']) + >>> df3 + letter number animal + 0 c 3 cat + 1 d 4 dog + >>> pd.concat([df1, df3]) + animal letter number + 0 NaN a 1 + 1 NaN b 2 + 0 cat c 3 + 1 dog d 4 + + Combine ``DataFrame`` objects with overlapping columns + and return only those that are shared by passing ``inner`` to + the ``join`` keyword argument. + + >>> pd.concat([df1, df3], join="inner") + letter number + 0 a 1 + 1 b 2 + 0 c 3 + 1 d 4 + + Combine ``DataFrame`` objects horizontally along the x axis by + passing in ``axis=1``. + + >>> df4 = pd.DataFrame([['bird', 'polly'], ['monkey', 'george']], + ... columns=['animal', 'name']) + >>> pd.concat([df1, df4], axis=1) + letter number animal name + 0 a 1 bird polly + 1 b 2 monkey george + + Prevent the result from including duplicate index values with the + ``verify_integrity`` option. + + >>> df5 = pd.DataFrame([1], index=['a']) + >>> df5 + 0 + a 1 + >>> df6 = pd.DataFrame([2], index=['a']) + >>> df6 + 0 + a 2 + >>> pd.concat([df5, df6], verify_integrity=True) + ValueError: Indexes have overlapping values: ['a'] + """ + op = _Concatenator(objs, axis=axis, join_axes=join_axes, + ignore_index=ignore_index, join=join, + keys=keys, levels=levels, names=names, + verify_integrity=verify_integrity, + copy=copy) + return op.get_result() + + +class _Concatenator(object): + """ + Orchestrates a concatenation operation for BlockManagers + """ + + def __init__(self, objs, axis=0, join='outer', join_axes=None, + keys=None, levels=None, names=None, + ignore_index=False, verify_integrity=False, copy=True): + if isinstance(objs, (NDFrame, compat.string_types)): + raise TypeError('first argument must be an iterable of pandas ' + 'objects, you passed an object of type ' + '"{0}"'.format(type(objs).__name__)) + + if join == 'outer': + self.intersect = False + elif join == 'inner': + self.intersect = True + else: # pragma: no cover + raise ValueError('Only can inner (intersect) or outer (union) ' + 'join the other axis') + + if isinstance(objs, dict): + if keys is None: + keys = sorted(objs) + objs = [objs[k] for k in keys] + else: + objs = list(objs) + + if len(objs) == 0: + raise ValueError('No objects to concatenate') + + if keys is None: + objs = [obj for obj in objs if obj is not None] + else: + # #1649 + clean_keys = [] + clean_objs = [] + for k, v in zip(keys, objs): + if v is None: + continue + clean_keys.append(k) + clean_objs.append(v) + objs = clean_objs + name = getattr(keys, 'name', None) + keys = Index(clean_keys, name=name) + + if len(objs) == 0: + raise ValueError('All objects passed were None') + + # consolidate data & figure out what our result ndim is going to be + ndims = set() + for obj in objs: + if not isinstance(obj, NDFrame): + raise TypeError("cannot concatenate a non-NDFrame object") + + # consolidate + obj.consolidate(inplace=True) + ndims.add(obj.ndim) + + # get the sample + # want the higest ndim that we have, and must be non-empty + # unless all objs are empty + sample = None + if len(ndims) > 1: + max_ndim = max(ndims) + for obj in objs: + if obj.ndim == max_ndim and np.sum(obj.shape): + sample = obj + break + + else: + # filter out the empties if we have not multi-index possibiltes + # note to keep empty Series as it affect to result columns / name + non_empties = [obj for obj in objs + if sum(obj.shape) > 0 or isinstance(obj, Series)] + + if (len(non_empties) and (keys is None and names is None and + levels is None and join_axes is None)): + objs = non_empties + sample = objs[0] + + if sample is None: + sample = objs[0] + self.objs = objs + + # Standardize axis parameter to int + if isinstance(sample, Series): + axis = DataFrame()._get_axis_number(axis) + else: + axis = sample._get_axis_number(axis) + + # Need to flip BlockManager axis in the DataFrame special case + self._is_frame = isinstance(sample, DataFrame) + if self._is_frame: + axis = 1 if axis == 0 else 0 + + self._is_series = isinstance(sample, Series) + if not 0 <= axis <= sample.ndim: + raise AssertionError("axis must be between 0 and {0}, " + "input was {1}".format(sample.ndim, axis)) + + # if we have mixed ndims, then convert to highest ndim + # creating column numbers as needed + if len(ndims) > 1: + current_column = 0 + max_ndim = sample.ndim + self.objs, objs = [], self.objs + for obj in objs: + + ndim = obj.ndim + if ndim == max_ndim: + pass + + elif ndim != max_ndim - 1: + raise ValueError("cannot concatenate unaligned mixed " + "dimensional NDFrame objects") + + else: + name = getattr(obj, 'name', None) + if ignore_index or name is None: + name = current_column + current_column += 1 + + # doing a row-wise concatenation so need everything + # to line up + if self._is_frame and axis == 1: + name = 0 + obj = sample._constructor({name: obj}) + + self.objs.append(obj) + + # note: this is the BlockManager axis (since DataFrame is transposed) + self.axis = axis + self.join_axes = join_axes + self.keys = keys + self.names = names or getattr(keys, 'names', None) + self.levels = levels + + self.ignore_index = ignore_index + self.verify_integrity = verify_integrity + self.copy = copy + + self.new_axes = self._get_new_axes() + + def get_result(self): + + # series only + if self._is_series: + + # stack blocks + if self.axis == 0: + # concat Series with length to keep dtype as much + non_empties = [x for x in self.objs if len(x) > 0] + if len(non_empties) > 0: + values = [x._values for x in non_empties] + else: + values = [x._values for x in self.objs] + new_data = _concat._concat_compat(values) + + name = com._consensus_name_attr(self.objs) + cons = _concat._get_series_result_type(new_data) + + return (cons(new_data, index=self.new_axes[0], + name=name, dtype=new_data.dtype) + .__finalize__(self, method='concat')) + + # combine as columns in a frame + else: + data = dict(zip(range(len(self.objs)), self.objs)) + cons = _concat._get_series_result_type(data) + + index, columns = self.new_axes + df = cons(data, index=index) + df.columns = columns + return df.__finalize__(self, method='concat') + + # combine block managers + else: + mgrs_indexers = [] + for obj in self.objs: + mgr = obj._data + indexers = {} + for ax, new_labels in enumerate(self.new_axes): + if ax == self.axis: + # Suppress reindexing on concat axis + continue + + obj_labels = mgr.axes[ax] + if not new_labels.equals(obj_labels): + indexers[ax] = obj_labels.reindex(new_labels)[1] + + mgrs_indexers.append((obj._data, indexers)) + + new_data = concatenate_block_managers( + mgrs_indexers, self.new_axes, concat_axis=self.axis, + copy=self.copy) + if not self.copy: + new_data._consolidate_inplace() + + cons = _concat._get_frame_result_type(new_data, self.objs) + return (cons._from_axes(new_data, self.new_axes) + .__finalize__(self, method='concat')) + + def _get_result_dim(self): + if self._is_series and self.axis == 1: + return 2 + else: + return self.objs[0].ndim + + def _get_new_axes(self): + ndim = self._get_result_dim() + new_axes = [None] * ndim + + if self.join_axes is None: + for i in range(ndim): + if i == self.axis: + continue + new_axes[i] = self._get_comb_axis(i) + else: + if len(self.join_axes) != ndim - 1: + raise AssertionError("length of join_axes must not be " + "equal to {0}".format(ndim - 1)) + + # ufff... + indices = compat.lrange(ndim) + indices.remove(self.axis) + + for i, ax in zip(indices, self.join_axes): + new_axes[i] = ax + + new_axes[self.axis] = self._get_concat_axis() + return new_axes + + def _get_comb_axis(self, i): + if self._is_series: + all_indexes = [x.index for x in self.objs] + else: + try: + all_indexes = [x._data.axes[i] for x in self.objs] + except IndexError: + types = [type(x).__name__ for x in self.objs] + raise TypeError("Cannot concatenate list of %s" % types) + + return _get_combined_index(all_indexes, intersect=self.intersect) + + def _get_concat_axis(self): + """ + Return index to be used along concatenation axis. + """ + if self._is_series: + if self.axis == 0: + indexes = [x.index for x in self.objs] + elif self.ignore_index: + idx = com._default_index(len(self.objs)) + return idx + elif self.keys is None: + names = [None] * len(self.objs) + num = 0 + has_names = False + for i, x in enumerate(self.objs): + if not isinstance(x, Series): + raise TypeError("Cannot concatenate type 'Series' " + "with object of type " + "%r" % type(x).__name__) + if x.name is not None: + names[i] = x.name + has_names = True + else: + names[i] = num + num += 1 + if has_names: + return Index(names) + else: + return com._default_index(len(self.objs)) + else: + return _ensure_index(self.keys) + else: + indexes = [x._data.axes[self.axis] for x in self.objs] + + if self.ignore_index: + idx = com._default_index(sum(len(i) for i in indexes)) + return idx + + if self.keys is None: + concat_axis = _concat_indexes(indexes) + else: + concat_axis = _make_concat_multiindex(indexes, self.keys, + self.levels, self.names) + + self._maybe_check_integrity(concat_axis) + + return concat_axis + + def _maybe_check_integrity(self, concat_index): + if self.verify_integrity: + if not concat_index.is_unique: + overlap = concat_index.get_duplicates() + raise ValueError('Indexes have overlapping values: %s' + % str(overlap)) + + +def _concat_indexes(indexes): + return indexes[0].append(indexes[1:]) + + +def _make_concat_multiindex(indexes, keys, levels=None, names=None): + + if ((levels is None and isinstance(keys[0], tuple)) or + (levels is not None and len(levels) > 1)): + zipped = compat.lzip(*keys) + if names is None: + names = [None] * len(zipped) + + if levels is None: + _, levels = _factorize_from_iterables(zipped) + else: + levels = [_ensure_index(x) for x in levels] + else: + zipped = [keys] + if names is None: + names = [None] + + if levels is None: + levels = [_ensure_index(keys)] + else: + levels = [_ensure_index(x) for x in levels] + + if not _all_indexes_same(indexes): + label_list = [] + + # things are potentially different sizes, so compute the exact labels + # for each level and pass those to MultiIndex.from_arrays + + for hlevel, level in zip(zipped, levels): + to_concat = [] + for key, index in zip(hlevel, indexes): + try: + i = level.get_loc(key) + except KeyError: + raise ValueError('Key %s not in level %s' + % (str(key), str(level))) + + to_concat.append(np.repeat(i, len(index))) + label_list.append(np.concatenate(to_concat)) + + concat_index = _concat_indexes(indexes) + + # these go at the end + if isinstance(concat_index, MultiIndex): + levels.extend(concat_index.levels) + label_list.extend(concat_index.labels) + else: + codes, categories = _factorize_from_iterable(concat_index) + levels.append(categories) + label_list.append(codes) + + if len(names) == len(levels): + names = list(names) + else: + # make sure that all of the passed indices have the same nlevels + if not len(set([idx.nlevels for idx in indexes])) == 1: + raise AssertionError("Cannot concat indices that do" + " not have the same number of levels") + + # also copies + names = names + _get_consensus_names(indexes) + + return MultiIndex(levels=levels, labels=label_list, names=names, + verify_integrity=False) + + new_index = indexes[0] + n = len(new_index) + kpieces = len(indexes) + + # also copies + new_names = list(names) + new_levels = list(levels) + + # construct labels + new_labels = [] + + # do something a bit more speedy + + for hlevel, level in zip(zipped, levels): + hlevel = _ensure_index(hlevel) + mapped = level.get_indexer(hlevel) + + mask = mapped == -1 + if mask.any(): + raise ValueError('Values not found in passed level: %s' + % str(hlevel[mask])) + + new_labels.append(np.repeat(mapped, n)) + + if isinstance(new_index, MultiIndex): + new_levels.extend(new_index.levels) + new_labels.extend([np.tile(lab, kpieces) for lab in new_index.labels]) + else: + new_levels.append(new_index) + new_labels.append(np.tile(np.arange(n), kpieces)) + + if len(new_names) < len(new_levels): + new_names.extend(new_index.names) + + return MultiIndex(levels=new_levels, labels=new_labels, names=new_names, + verify_integrity=False) diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index 3fbd83a6f3245..d938c2eeacbef 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -4,19 +4,16 @@ import copy import warnings - import string import numpy as np -from pandas.compat import range, lrange, lzip, zip, map, filter +from pandas.compat import range, lzip, zip, map, filter import pandas.compat as compat -from pandas import (Categorical, DataFrame, Series, +import pandas as pd +from pandas import (Categorical, Series, DataFrame, Index, MultiIndex, Timedelta) -from pandas.core.categorical import (_factorize_from_iterable, - _factorize_from_iterables) from pandas.core.frame import _merge_doc -from pandas.types.generic import ABCSeries from pandas.types.common import (is_datetime64tz_dtype, is_datetime64_dtype, needs_i8_conversion, @@ -33,23 +30,31 @@ _ensure_object, _get_dtype) from pandas.types.missing import na_value_for_dtype - -from pandas.core.generic import NDFrame -from pandas.core.index import (_get_combined_index, - _ensure_index, _get_consensus_names, - _all_indexes_same) from pandas.core.internals import (items_overlap_with_suffix, concatenate_block_managers) from pandas.util.decorators import Appender, Substitution import pandas.core.algorithms as algos import pandas.core.common as com -import pandas.types.concat as _concat import pandas._join as _join import pandas.hashtable as _hash +# back-compat of pseudo-public API +def concat_wrap(): + + def wrapper(*args, **kwargs): + warnings.warn("pandas.tools.merge.concat is deprecated. " + "import from the public API: " + "pandas.concat instead", + FutureWarning, stacklevel=3) + return pd.concat(*args, **kwargs) + return wrapper + +concat = concat_wrap() + + @Substitution('\nleft : DataFrame') @Appender(_merge_doc, indents=0) def merge(left, right, how='inner', on=None, left_on=None, right_on=None, @@ -139,6 +144,7 @@ def _groupby_and_merge(by, on, left, right, _merge_pieces, # preserve the original order # if we have a missing piece this can be reset + from pandas.tools.concat import concat result = concat(pieces, ignore_index=True) result = result.reindex(columns=pieces[0].columns, copy=False) return result, lby @@ -793,9 +799,9 @@ def _get_merge_keys(self): left, right = self.left, self.right is_lkey = lambda x: isinstance( - x, (np.ndarray, ABCSeries)) and len(x) == len(left) + x, (np.ndarray, Series)) and len(x) == len(left) is_rkey = lambda x: isinstance( - x, (np.ndarray, ABCSeries)) and len(x) == len(right) + x, (np.ndarray, Series)) and len(x) == len(right) # Note that pd.merge_asof() has separate 'on' and 'by' parameters. A # user could, for example, request 'left_index' and 'left_by'. In a @@ -1419,606 +1425,6 @@ def _get_join_keys(llab, rlab, shape, sort): return _get_join_keys(llab, rlab, shape, sort) -# --------------------------------------------------------------------- -# Concatenate DataFrame objects - - -def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, - keys=None, levels=None, names=None, verify_integrity=False, - copy=True): - """ - Concatenate pandas objects along a particular axis with optional set logic - along the other axes. - - Can also add a layer of hierarchical indexing on the concatenation axis, - which may be useful if the labels are the same (or overlapping) on - the passed axis number. - - Parameters - ---------- - objs : a sequence or mapping of Series, DataFrame, or Panel objects - If a dict is passed, the sorted keys will be used as the `keys` - argument, unless it is passed, in which case the values will be - selected (see below). Any None objects will be dropped silently unless - they are all None in which case a ValueError will be raised - axis : {0/'index', 1/'columns'}, default 0 - The axis to concatenate along - join : {'inner', 'outer'}, default 'outer' - How to handle indexes on other axis(es) - join_axes : list of Index objects - Specific indexes to use for the other n - 1 axes instead of performing - inner/outer set logic - ignore_index : boolean, default False - If True, do not use the index values along the concatenation axis. The - resulting axis will be labeled 0, ..., n - 1. This is useful if you are - concatenating objects where the concatenation axis does not have - meaningful indexing information. Note the index values on the other - axes are still respected in the join. - keys : sequence, default None - If multiple levels passed, should contain tuples. Construct - hierarchical index using the passed keys as the outermost level - levels : list of sequences, default None - Specific levels (unique values) to use for constructing a - MultiIndex. Otherwise they will be inferred from the keys - names : list, default None - Names for the levels in the resulting hierarchical index - verify_integrity : boolean, default False - Check whether the new concatenated axis contains duplicates. This can - be very expensive relative to the actual data concatenation - copy : boolean, default True - If False, do not copy data unnecessarily - - Returns - ------- - concatenated : type of objects - - Notes - ----- - The keys, levels, and names arguments are all optional. - - A walkthrough of how this method fits in with other tools for combining - panda objects can be found `here - `__. - - See Also - -------- - Series.append - DataFrame.append - DataFrame.join - DataFrame.merge - - Examples - -------- - Combine two ``Series``. - - >>> s1 = pd.Series(['a', 'b']) - >>> s2 = pd.Series(['c', 'd']) - >>> pd.concat([s1, s2]) - 0 a - 1 b - 0 c - 1 d - dtype: object - - Clear the existing index and reset it in the result - by setting the ``ignore_index`` option to ``True``. - - >>> pd.concat([s1, s2], ignore_index=True) - 0 a - 1 b - 2 c - 3 d - dtype: object - - Add a hierarchical index at the outermost level of - the data with the ``keys`` option. - - >>> pd.concat([s1, s2], keys=['s1', 's2',]) - s1 0 a - 1 b - s2 0 c - 1 d - dtype: object - - Label the index keys you create with the ``names`` option. - - >>> pd.concat([s1, s2], keys=['s1', 's2'], - ... names=['Series name', 'Row ID']) - Series name Row ID - s1 0 a - 1 b - s2 0 c - 1 d - dtype: object - - Combine two ``DataFrame`` objects with identical columns. - - >>> df1 = pd.DataFrame([['a', 1], ['b', 2]], - ... columns=['letter', 'number']) - >>> df1 - letter number - 0 a 1 - 1 b 2 - >>> df2 = pd.DataFrame([['c', 3], ['d', 4]], - ... columns=['letter', 'number']) - >>> df2 - letter number - 0 c 3 - 1 d 4 - >>> pd.concat([df1, df2]) - letter number - 0 a 1 - 1 b 2 - 0 c 3 - 1 d 4 - - Combine ``DataFrame`` objects with overlapping columns - and return everything. Columns outside the intersection will - be filled with ``NaN`` values. - - >>> df3 = pd.DataFrame([['c', 3, 'cat'], ['d', 4, 'dog']], - ... columns=['letter', 'number', 'animal']) - >>> df3 - letter number animal - 0 c 3 cat - 1 d 4 dog - >>> pd.concat([df1, df3]) - animal letter number - 0 NaN a 1 - 1 NaN b 2 - 0 cat c 3 - 1 dog d 4 - - Combine ``DataFrame`` objects with overlapping columns - and return only those that are shared by passing ``inner`` to - the ``join`` keyword argument. - - >>> pd.concat([df1, df3], join="inner") - letter number - 0 a 1 - 1 b 2 - 0 c 3 - 1 d 4 - - Combine ``DataFrame`` objects horizontally along the x axis by - passing in ``axis=1``. - - >>> df4 = pd.DataFrame([['bird', 'polly'], ['monkey', 'george']], - ... columns=['animal', 'name']) - >>> pd.concat([df1, df4], axis=1) - letter number animal name - 0 a 1 bird polly - 1 b 2 monkey george - - Prevent the result from including duplicate index values with the - ``verify_integrity`` option. - - >>> df5 = pd.DataFrame([1], index=['a']) - >>> df5 - 0 - a 1 - >>> df6 = pd.DataFrame([2], index=['a']) - >>> df6 - 0 - a 2 - >>> pd.concat([df5, df6], verify_integrity=True) - ValueError: Indexes have overlapping values: ['a'] - """ - op = _Concatenator(objs, axis=axis, join_axes=join_axes, - ignore_index=ignore_index, join=join, - keys=keys, levels=levels, names=names, - verify_integrity=verify_integrity, - copy=copy) - return op.get_result() - - -class _Concatenator(object): - """ - Orchestrates a concatenation operation for BlockManagers - """ - - def __init__(self, objs, axis=0, join='outer', join_axes=None, - keys=None, levels=None, names=None, - ignore_index=False, verify_integrity=False, copy=True): - if isinstance(objs, (NDFrame, compat.string_types)): - raise TypeError('first argument must be an iterable of pandas ' - 'objects, you passed an object of type ' - '"{0}"'.format(type(objs).__name__)) - - if join == 'outer': - self.intersect = False - elif join == 'inner': - self.intersect = True - else: # pragma: no cover - raise ValueError('Only can inner (intersect) or outer (union) ' - 'join the other axis') - - if isinstance(objs, dict): - if keys is None: - keys = sorted(objs) - objs = [objs[k] for k in keys] - else: - objs = list(objs) - - if len(objs) == 0: - raise ValueError('No objects to concatenate') - - if keys is None: - objs = [obj for obj in objs if obj is not None] - else: - # #1649 - clean_keys = [] - clean_objs = [] - for k, v in zip(keys, objs): - if v is None: - continue - clean_keys.append(k) - clean_objs.append(v) - objs = clean_objs - name = getattr(keys, 'name', None) - keys = Index(clean_keys, name=name) - - if len(objs) == 0: - raise ValueError('All objects passed were None') - - # consolidate data & figure out what our result ndim is going to be - ndims = set() - for obj in objs: - if not isinstance(obj, NDFrame): - raise TypeError("cannot concatenate a non-NDFrame object") - - # consolidate - obj.consolidate(inplace=True) - ndims.add(obj.ndim) - - # get the sample - # want the higest ndim that we have, and must be non-empty - # unless all objs are empty - sample = None - if len(ndims) > 1: - max_ndim = max(ndims) - for obj in objs: - if obj.ndim == max_ndim and np.sum(obj.shape): - sample = obj - break - - else: - # filter out the empties if we have not multi-index possibiltes - # note to keep empty Series as it affect to result columns / name - non_empties = [obj for obj in objs - if sum(obj.shape) > 0 or isinstance(obj, Series)] - - if (len(non_empties) and (keys is None and names is None and - levels is None and join_axes is None)): - objs = non_empties - sample = objs[0] - - if sample is None: - sample = objs[0] - self.objs = objs - - # Standardize axis parameter to int - if isinstance(sample, Series): - axis = DataFrame()._get_axis_number(axis) - else: - axis = sample._get_axis_number(axis) - - # Need to flip BlockManager axis in the DataFrame special case - self._is_frame = isinstance(sample, DataFrame) - if self._is_frame: - axis = 1 if axis == 0 else 0 - - self._is_series = isinstance(sample, ABCSeries) - if not 0 <= axis <= sample.ndim: - raise AssertionError("axis must be between 0 and {0}, " - "input was {1}".format(sample.ndim, axis)) - - # if we have mixed ndims, then convert to highest ndim - # creating column numbers as needed - if len(ndims) > 1: - current_column = 0 - max_ndim = sample.ndim - self.objs, objs = [], self.objs - for obj in objs: - - ndim = obj.ndim - if ndim == max_ndim: - pass - - elif ndim != max_ndim - 1: - raise ValueError("cannot concatenate unaligned mixed " - "dimensional NDFrame objects") - - else: - name = getattr(obj, 'name', None) - if ignore_index or name is None: - name = current_column - current_column += 1 - - # doing a row-wise concatenation so need everything - # to line up - if self._is_frame and axis == 1: - name = 0 - obj = sample._constructor({name: obj}) - - self.objs.append(obj) - - # note: this is the BlockManager axis (since DataFrame is transposed) - self.axis = axis - self.join_axes = join_axes - self.keys = keys - self.names = names or getattr(keys, 'names', None) - self.levels = levels - - self.ignore_index = ignore_index - self.verify_integrity = verify_integrity - self.copy = copy - - self.new_axes = self._get_new_axes() - - def get_result(self): - - # series only - if self._is_series: - - # stack blocks - if self.axis == 0: - # concat Series with length to keep dtype as much - non_empties = [x for x in self.objs if len(x) > 0] - if len(non_empties) > 0: - values = [x._values for x in non_empties] - else: - values = [x._values for x in self.objs] - new_data = _concat._concat_compat(values) - - name = com._consensus_name_attr(self.objs) - cons = _concat._get_series_result_type(new_data) - - return (cons(new_data, index=self.new_axes[0], - name=name, dtype=new_data.dtype) - .__finalize__(self, method='concat')) - - # combine as columns in a frame - else: - data = dict(zip(range(len(self.objs)), self.objs)) - cons = _concat._get_series_result_type(data) - - index, columns = self.new_axes - df = cons(data, index=index) - df.columns = columns - return df.__finalize__(self, method='concat') - - # combine block managers - else: - mgrs_indexers = [] - for obj in self.objs: - mgr = obj._data - indexers = {} - for ax, new_labels in enumerate(self.new_axes): - if ax == self.axis: - # Suppress reindexing on concat axis - continue - - obj_labels = mgr.axes[ax] - if not new_labels.equals(obj_labels): - indexers[ax] = obj_labels.reindex(new_labels)[1] - - mgrs_indexers.append((obj._data, indexers)) - - new_data = concatenate_block_managers( - mgrs_indexers, self.new_axes, concat_axis=self.axis, - copy=self.copy) - if not self.copy: - new_data._consolidate_inplace() - - cons = _concat._get_frame_result_type(new_data, self.objs) - return (cons._from_axes(new_data, self.new_axes) - .__finalize__(self, method='concat')) - - def _get_result_dim(self): - if self._is_series and self.axis == 1: - return 2 - else: - return self.objs[0].ndim - - def _get_new_axes(self): - ndim = self._get_result_dim() - new_axes = [None] * ndim - - if self.join_axes is None: - for i in range(ndim): - if i == self.axis: - continue - new_axes[i] = self._get_comb_axis(i) - else: - if len(self.join_axes) != ndim - 1: - raise AssertionError("length of join_axes must not be " - "equal to {0}".format(ndim - 1)) - - # ufff... - indices = lrange(ndim) - indices.remove(self.axis) - - for i, ax in zip(indices, self.join_axes): - new_axes[i] = ax - - new_axes[self.axis] = self._get_concat_axis() - return new_axes - - def _get_comb_axis(self, i): - if self._is_series: - all_indexes = [x.index for x in self.objs] - else: - try: - all_indexes = [x._data.axes[i] for x in self.objs] - except IndexError: - types = [type(x).__name__ for x in self.objs] - raise TypeError("Cannot concatenate list of %s" % types) - - return _get_combined_index(all_indexes, intersect=self.intersect) - - def _get_concat_axis(self): - """ - Return index to be used along concatenation axis. - """ - if self._is_series: - if self.axis == 0: - indexes = [x.index for x in self.objs] - elif self.ignore_index: - idx = com._default_index(len(self.objs)) - return idx - elif self.keys is None: - names = [None] * len(self.objs) - num = 0 - has_names = False - for i, x in enumerate(self.objs): - if not isinstance(x, Series): - raise TypeError("Cannot concatenate type 'Series' " - "with object of type " - "%r" % type(x).__name__) - if x.name is not None: - names[i] = x.name - has_names = True - else: - names[i] = num - num += 1 - if has_names: - return Index(names) - else: - return com._default_index(len(self.objs)) - else: - return _ensure_index(self.keys) - else: - indexes = [x._data.axes[self.axis] for x in self.objs] - - if self.ignore_index: - idx = com._default_index(sum(len(i) for i in indexes)) - return idx - - if self.keys is None: - concat_axis = _concat_indexes(indexes) - else: - concat_axis = _make_concat_multiindex(indexes, self.keys, - self.levels, self.names) - - self._maybe_check_integrity(concat_axis) - - return concat_axis - - def _maybe_check_integrity(self, concat_index): - if self.verify_integrity: - if not concat_index.is_unique: - overlap = concat_index.get_duplicates() - raise ValueError('Indexes have overlapping values: %s' - % str(overlap)) - - -def _concat_indexes(indexes): - return indexes[0].append(indexes[1:]) - - -def _make_concat_multiindex(indexes, keys, levels=None, names=None): - - if ((levels is None and isinstance(keys[0], tuple)) or - (levels is not None and len(levels) > 1)): - zipped = lzip(*keys) - if names is None: - names = [None] * len(zipped) - - if levels is None: - _, levels = _factorize_from_iterables(zipped) - else: - levels = [_ensure_index(x) for x in levels] - else: - zipped = [keys] - if names is None: - names = [None] - - if levels is None: - levels = [_ensure_index(keys)] - else: - levels = [_ensure_index(x) for x in levels] - - if not _all_indexes_same(indexes): - label_list = [] - - # things are potentially different sizes, so compute the exact labels - # for each level and pass those to MultiIndex.from_arrays - - for hlevel, level in zip(zipped, levels): - to_concat = [] - for key, index in zip(hlevel, indexes): - try: - i = level.get_loc(key) - except KeyError: - raise ValueError('Key %s not in level %s' - % (str(key), str(level))) - - to_concat.append(np.repeat(i, len(index))) - label_list.append(np.concatenate(to_concat)) - - concat_index = _concat_indexes(indexes) - - # these go at the end - if isinstance(concat_index, MultiIndex): - levels.extend(concat_index.levels) - label_list.extend(concat_index.labels) - else: - codes, categories = _factorize_from_iterable(concat_index) - levels.append(categories) - label_list.append(codes) - - if len(names) == len(levels): - names = list(names) - else: - # make sure that all of the passed indices have the same nlevels - if not len(set([idx.nlevels for idx in indexes])) == 1: - raise AssertionError("Cannot concat indices that do" - " not have the same number of levels") - - # also copies - names = names + _get_consensus_names(indexes) - - return MultiIndex(levels=levels, labels=label_list, names=names, - verify_integrity=False) - - new_index = indexes[0] - n = len(new_index) - kpieces = len(indexes) - - # also copies - new_names = list(names) - new_levels = list(levels) - - # construct labels - new_labels = [] - - # do something a bit more speedy - - for hlevel, level in zip(zipped, levels): - hlevel = _ensure_index(hlevel) - mapped = level.get_indexer(hlevel) - - mask = mapped == -1 - if mask.any(): - raise ValueError('Values not found in passed level: %s' - % str(hlevel[mask])) - - new_labels.append(np.repeat(mapped, n)) - - if isinstance(new_index, MultiIndex): - new_levels.extend(new_index.levels) - new_labels.extend([np.tile(lab, kpieces) for lab in new_index.labels]) - else: - new_levels.append(new_index) - new_labels.append(np.tile(np.arange(n), kpieces)) - - if len(new_names) < len(new_levels): - new_names.extend(new_index.names) - - return MultiIndex(levels=new_levels, labels=new_labels, names=new_names, - verify_integrity=False) - def _should_fill(lname, rname): if (not isinstance(lname, compat.string_types) or diff --git a/pandas/tools/pivot.py b/pandas/tools/pivot.py index 01eefe5f07173..41fc705691a96 100644 --- a/pandas/tools/pivot.py +++ b/pandas/tools/pivot.py @@ -2,10 +2,8 @@ from pandas.types.common import is_list_like, is_scalar -from pandas import Series, DataFrame -from pandas.core.index import MultiIndex, Index +from pandas import Series, DataFrame, MultiIndex, Index, concat from pandas.core.groupby import Grouper -from pandas.tools.merge import concat from pandas.tools.util import cartesian_product from pandas.compat import range, lrange, zip from pandas import compat diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index ee70515850b25..0b1ced97d2b81 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -3135,7 +3135,7 @@ def boxplot_frame_groupby(grouped, subplots=True, column=None, fontsize=None, fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, right=0.9, wspace=0.2) else: - from pandas.tools.merge import concat + from pandas.tools.concat import concat keys, frames = zip(*grouped) if grouped.axis == 0: df = concat(frames, keys=keys, axis=1) diff --git a/pandas/tools/tests/test_join.py b/pandas/tools/tests/test_join.py index ff0a494bd7d02..fe5821a637205 100644 --- a/pandas/tools/tests/test_join.py +++ b/pandas/tools/tests/test_join.py @@ -6,9 +6,8 @@ import pandas as pd from pandas.compat import lrange import pandas.compat as compat -from pandas.tools.merge import merge, concat from pandas.util.testing import assert_frame_equal -from pandas import DataFrame, MultiIndex, Series +from pandas import DataFrame, MultiIndex, Series, merge, concat import pandas._join as _join import pandas.util.testing as tm diff --git a/pandas/tools/tests/test_merge.py b/pandas/tools/tests/test_merge.py index a348a901442c9..d66cd793ec0be 100644 --- a/pandas/tools/tests/test_merge.py +++ b/pandas/tools/tests/test_merge.py @@ -8,7 +8,8 @@ import pandas as pd from pandas.compat import lrange, lzip -from pandas.tools.merge import merge, concat, MergeError +from pandas.tools.concat import concat +from pandas.tools.merge import merge, MergeError from pandas.util.testing import (assert_frame_equal, assert_series_equal, slow) diff --git a/pandas/tools/tests/test_merge_ordered.py b/pandas/tools/tests/test_merge_ordered.py index e08cc98e50794..e4a41ea9a28eb 100644 --- a/pandas/tools/tests/test_merge_ordered.py +++ b/pandas/tools/tests/test_merge_ordered.py @@ -40,10 +40,8 @@ def test_ffill(self): def test_multigroup(self): left = pd.concat([self.left, self.left], ignore_index=True) - # right = concat([self.right, self.right], ignore_index=True) left['group'] = ['a'] * 3 + ['b'] * 3 - # right['group'] = ['a'] * 4 + ['b'] * 4 result = merge_ordered(left, self.right, on='key', left_by='group', fill_method='ffill') diff --git a/pandas/tools/tests/test_pivot.py b/pandas/tools/tests/test_pivot.py index 7f2bb7e724362..f5d91d0088306 100644 --- a/pandas/tools/tests/test_pivot.py +++ b/pandas/tools/tests/test_pivot.py @@ -3,8 +3,8 @@ import numpy as np import pandas as pd -from pandas import DataFrame, Series, Index, MultiIndex, Grouper, date_range -from pandas.tools.merge import concat +from pandas import (DataFrame, Series, Index, MultiIndex, + Grouper, date_range, concat) from pandas.tools.pivot import pivot_table, crosstab from pandas.compat import range, product import pandas.util.testing as tm From f593ee824c5649d6e0e61d249f4542c58dfe66c5 Mon Sep 17 00:00:00 2001 From: TrigonaMinima Date: Fri, 10 Feb 2017 00:33:21 +0530 Subject: [PATCH 034/353] TST: Remaining tseries tests reorg closes #14854 closes #15359 --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/tests/indexes/datetimes/test_ops.py | 14 +- pandas/tests/indexes/datetimes/test_setops.py | 10 +- .../tests => tests/tseries}/__init__.py | 0 .../tseries}/data/cday-0.14.1.pickle | Bin .../tseries}/data/dateoffset_0_15_2.pickle | 0 .../tseries}/test_bin_groupby.py | 0 .../tests => tests/tseries}/test_converter.py | 0 .../tseries}/test_frequencies.py | 0 .../tests => tests/tseries}/test_holiday.py | 0 .../tests => tests/tseries}/test_offsets.py | 0 .../tests => tests/tseries}/test_resample.py | 0 .../tests => tests/tseries}/test_timezones.py | 0 .../tseries/tests/data/daterange_073.pickle | Bin 650 -> 0 bytes pandas/tseries/tests/data/frame.pickle | Bin 1182 -> 0 bytes pandas/tseries/tests/data/series.pickle | Bin 646 -> 0 bytes .../tests/data/series_daterange0.pickle | Bin 357 -> 0 bytes .../tseries/tests/test_timeseries_legacy.py | 219 ------------------ setup.py | 5 +- 19 files changed, 25 insertions(+), 224 deletions(-) rename pandas/{tseries/tests => tests/tseries}/__init__.py (100%) rename pandas/{tseries/tests => tests/tseries}/data/cday-0.14.1.pickle (100%) rename pandas/{tseries/tests => tests/tseries}/data/dateoffset_0_15_2.pickle (100%) rename pandas/{tseries/tests => tests/tseries}/test_bin_groupby.py (100%) rename pandas/{tseries/tests => tests/tseries}/test_converter.py (100%) rename pandas/{tseries/tests => tests/tseries}/test_frequencies.py (100%) rename pandas/{tseries/tests => tests/tseries}/test_holiday.py (100%) rename pandas/{tseries/tests => tests/tseries}/test_offsets.py (100%) rename pandas/{tseries/tests => tests/tseries}/test_resample.py (100%) rename pandas/{tseries/tests => tests/tseries}/test_timezones.py (100%) delete mode 100644 pandas/tseries/tests/data/daterange_073.pickle delete mode 100644 pandas/tseries/tests/data/frame.pickle delete mode 100644 pandas/tseries/tests/data/series.pickle delete mode 100644 pandas/tseries/tests/data/series_daterange0.pickle delete mode 100644 pandas/tseries/tests/test_timeseries_legacy.py diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 2279d0464a5c7..17ce4517035a7 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -371,6 +371,7 @@ Other API Changes - ``pandas.api.types.is_datetime64_ns_dtype`` will now report ``True`` on a tz-aware dtype, similar to ``pandas.api.types.is_datetime64_any_dtype`` - ``DataFrame.asof()`` will return a null filled ``Series`` instead the scalar ``NaN`` if a match is not found (:issue:`15118`) - The :func:`pd.read_gbq` method now stores ``INTEGER`` columns as ``dtype=object`` if they contain ``NULL`` values. Otherwise they are stored as ``int64``. This prevents precision lost for integers greather than 2**53. Furthermore ``FLOAT`` columns with values above 10**4 are no more casted to ``int64`` which also caused precision lost (:issue: `14064`, :issue:`14305`). +- Reorganization of timeseries development tests (:issue:`14854`) .. _whatsnew_0200.deprecations: diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index 63bf07ec041d3..7a5ce3a44681b 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -7,10 +7,12 @@ import pandas.util.testing as tm from pandas.core.common import PerformanceWarning from pandas.tseries.index import cdate_range +from pandas.tseries.frequencies import get_offset, to_offset from pandas import (DatetimeIndex, PeriodIndex, Series, Timestamp, Timedelta, date_range, TimedeltaIndex, _np_version_under1p10, Index, datetime, Float64Index, offsets, bdate_range) -from pandas.tseries.offsets import BMonthEnd, CDay, BDay +from pandas.tseries.offsets import (BMonthEnd, CDay, BDay, Milli, MonthBegin, + Micro) from pandas.tests.test_base import Ops @@ -911,6 +913,16 @@ def test_equals(self): self.assertFalse(idx.equals(list(idx3))) self.assertFalse(idx.equals(pd.Series(idx3))) + def test_ms_vs_MS(self): + left = get_offset('ms') + right = get_offset('MS') + self.assertEqual(left, Milli()) + self.assertEqual(right, MonthBegin()) + + def test_rule_aliases(self): + rule = to_offset('10us') + self.assertEqual(rule, Micro(10)) + class TestDateTimeIndexToJulianDate(tm.TestCase): diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index 7da660a956e23..8d05a4016ba45 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -6,7 +6,7 @@ import pandas.util.testing as tm from pandas.tseries.index import cdate_range from pandas import (DatetimeIndex, date_range, Series, bdate_range, DataFrame, - Int64Index, Index) + Int64Index, Index, to_datetime) from pandas.tseries.offsets import Minute, BMonthEnd, MonthEnd START, END = datetime(2009, 1, 1), datetime(2010, 1, 1) @@ -190,6 +190,14 @@ def test_datetimeindex_union_join_empty(self): result = dti.join(empty) tm.assertIsInstance(result, DatetimeIndex) + def test_join_nonunique(self): + idx1 = to_datetime(['2012-11-06 16:00:11.477563', + '2012-11-06 16:00:11.477563']) + idx2 = to_datetime(['2012-11-06 15:11:09.006507', + '2012-11-06 15:11:09.006507']) + rs = idx1.join(idx2, how='outer') + self.assertTrue(rs.is_monotonic) + class TestBusinessDatetimeIndex(tm.TestCase): diff --git a/pandas/tseries/tests/__init__.py b/pandas/tests/tseries/__init__.py similarity index 100% rename from pandas/tseries/tests/__init__.py rename to pandas/tests/tseries/__init__.py diff --git a/pandas/tseries/tests/data/cday-0.14.1.pickle b/pandas/tests/tseries/data/cday-0.14.1.pickle similarity index 100% rename from pandas/tseries/tests/data/cday-0.14.1.pickle rename to pandas/tests/tseries/data/cday-0.14.1.pickle diff --git a/pandas/tseries/tests/data/dateoffset_0_15_2.pickle b/pandas/tests/tseries/data/dateoffset_0_15_2.pickle similarity index 100% rename from pandas/tseries/tests/data/dateoffset_0_15_2.pickle rename to pandas/tests/tseries/data/dateoffset_0_15_2.pickle diff --git a/pandas/tseries/tests/test_bin_groupby.py b/pandas/tests/tseries/test_bin_groupby.py similarity index 100% rename from pandas/tseries/tests/test_bin_groupby.py rename to pandas/tests/tseries/test_bin_groupby.py diff --git a/pandas/tseries/tests/test_converter.py b/pandas/tests/tseries/test_converter.py similarity index 100% rename from pandas/tseries/tests/test_converter.py rename to pandas/tests/tseries/test_converter.py diff --git a/pandas/tseries/tests/test_frequencies.py b/pandas/tests/tseries/test_frequencies.py similarity index 100% rename from pandas/tseries/tests/test_frequencies.py rename to pandas/tests/tseries/test_frequencies.py diff --git a/pandas/tseries/tests/test_holiday.py b/pandas/tests/tseries/test_holiday.py similarity index 100% rename from pandas/tseries/tests/test_holiday.py rename to pandas/tests/tseries/test_holiday.py diff --git a/pandas/tseries/tests/test_offsets.py b/pandas/tests/tseries/test_offsets.py similarity index 100% rename from pandas/tseries/tests/test_offsets.py rename to pandas/tests/tseries/test_offsets.py diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tests/tseries/test_resample.py similarity index 100% rename from pandas/tseries/tests/test_resample.py rename to pandas/tests/tseries/test_resample.py diff --git a/pandas/tseries/tests/test_timezones.py b/pandas/tests/tseries/test_timezones.py similarity index 100% rename from pandas/tseries/tests/test_timezones.py rename to pandas/tests/tseries/test_timezones.py diff --git a/pandas/tseries/tests/data/daterange_073.pickle b/pandas/tseries/tests/data/daterange_073.pickle deleted file mode 100644 index 0214a023e6338dce54e6daf8b3d94a7275baca66..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 650 zcmZY7OH0E*5C`z4PrH3+U)q<}*CAed_9jSAE<`BoQDl>BZ848-vOy{q^blLWo!>~) zb{G%N!ZQ3A=JKESwB<$ad@;2AKn&f;Q8OL{d_f)qVfkLDg2+-tYSx^4HV=1WHdi9x z-jg7sq#JKLnWm|jY36DyGdk61Gu|yGwpz>uky)0$zosdwB?CE~W|;P77{=XCQrnN- zDD&$<=5=ecUCmrUu#p8u3g22LwXJw8_oh3^q7*@LCj=6X`nPgnkX%h7Rn(=8|4V3gVF}+qI5udC|!^~N>3;w{`{A; z@_i>Hx1;1JWdG_z9xvsI&WfHNxZIh&3OQJ_yg!+QLdny=^fnRN!cm;avn2QACCQ(& Y?DLBq%8RAEoDS9@(>$t0rm-@Izk(m6nE(I) diff --git a/pandas/tseries/tests/data/frame.pickle b/pandas/tseries/tests/data/frame.pickle deleted file mode 100644 index b3b100fb43022faf7bd0b949238988afc7de53bf..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1182 zcmZo*N-jvuOGzx&OU^G!)k`Z%%uVHTNi0cp1G5SlH5&_2m1O3Xq!#5R<`i=|<>V)4 z`zGckrl%Hh6*2==vcwj$Y9!~C<`z^!%*!p!DalMMDoU*6iZ4n{&d)0@DJo4a;VNW9 zu{JX=CAEUfGq1$V#1qUcWcOxh4P{Jf4=Uu)@MiR8ZH1W1l~Ph!kjhoa8OoGt;mzR9 z2voqO;msV%XyfPS=k*^5z=StLNm6I11_Kl@LTM%_%?zbkpmd2}YgY7m%J$PITE56D?utru>CGx=D$H7fJyWf^==6j7BJDUVc$-VoqjNYN2dL zC|iD7T5)Pgp&TM4K*5ocnp2XJQig0taVTS+H)Cm% zUwcw&Y@sqRmcZ$Y3z%rZ>8el#9w(~cq~guh28xw5SgfewNFN;`6M*TW;fH?PbI(qD zpGB$Jd180$pY_~~+OhDdefBm(`J$sw?Iq%0oobwM+g|&L=xlec2ljnYDGD!n-q>g8 zUwoA0f6#t{RL7q$(nsuHeSXyJ?tR(*#=mDLA1WNNXXv=rfBD92`>Bn3vYT!_x3`GD za8a>$i~TLpIhKblp4tZ;l`Kf#cV$0_Toy_unf!A`-q_VZL0AG-eV zsr|Adk2*u+Yxb8^`t(}$F4?E}MdinQzi+=`r_oc77x(PTws(Kna^b%HBhDjt=Q_T$ z&pcMAp>pVf{gs02oSzq6vX}X#bi8QMeftj!9HuX9TW+tmz5In8^E>;!XC?%#<#=n~ z!1O`X*y@NqU;2%8|888h*FUt3f9K|R_OmDVNaSvOY(Fu@LfxqNfxVtm!`}<-H-Wh& PF}6@WgCns$DM=3iVMVf9 diff --git a/pandas/tseries/tests/data/series.pickle b/pandas/tseries/tests/data/series.pickle deleted file mode 100644 index 307a4ac26517384a8267b23a8891e4cf919a20d8..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 646 zcmZo*O3o|IEvVE>&M!*U%Pq|*$xJLNO049HFG@|$&nqq|DorloDr8J9NX$z~EQTm6 zPA$qzE#?Zz%uNl3FbkQy8CpXbliGs{nKir_y}4Q;#&V^UR2HOi6|#gfrCE40cryYO zuxfZShcepu`T2SM2LdqR%}|om85;0gre(H&?L+&pe^2_|L|)soNw4(YaOa)<>%zvK zYfcaC6He?G)>?kezSjQ7#_DSi>^Cm|Q~hw#b9(`k2lLpcEV8#d5t>_a^o4!SJ<&Jf z{KxD|GEg0!l30>jl$e*E%H;xN1%X+GY;dQuL!6!gbge(kwH#pA)}Xr99_ZTGLQaij zkbxz@VBmr?3b{hL*sn4&Gk`&BP$72)M1%z{!UGjyg^Tb)McCjXd{7Z~xClQ~gbOYr z02SeeiwHtRc;F&JP!V3Zh%i)y4=y5-TH@E*h7!YI@8sv_6mvPb024!@sAglKSZ$%W zMkr@qeo<~>PG(hVp+rY0TYg$vacW7SBqAh0!I6@hQ!OOZ59U9nA?q0TAECTVJ&E1=s@p_O5Y99O<1%Q zB;BptG-d=J1D0mc)|$GRnB*5MVgwiVVm1}zl5>9Zji(6111*Nz)~! zRiE7aziNM|S|k1TkABe8-^TPSq(1=Pb|Z~nI#^obZBbmI= 3") - - pth, _ = os.path.split(os.path.abspath(__file__)) - filepath = os.path.join(pth, 'data', 'frame.pickle') - - with open(filepath, 'rb') as f: - cls.frame = pickle.load(f) - - filepath = os.path.join(pth, 'data', 'series.pickle') - with open(filepath, 'rb') as f: - cls.series = pickle.load(f) - - def test_pass_offset_warn(self): - buf = StringIO() - - sys.stderr = buf - DatetimeIndex(start='1/1/2000', periods=10, offset='H') - sys.stderr = sys.__stderr__ - - def test_unpickle_legacy_frame(self): - dtindex = DatetimeIndex(start='1/3/2005', end='1/14/2005', - freq=BDay(1)) - - unpickled = self.frame - - self.assertEqual(type(unpickled.index), DatetimeIndex) - self.assertEqual(len(unpickled), 10) - self.assertTrue((unpickled.columns == Int64Index(np.arange(5))).all()) - self.assertTrue((unpickled.index == dtindex).all()) - self.assertEqual(unpickled.index.offset, BDay(1, normalize=True)) - - def test_unpickle_legacy_series(self): - unpickled = self.series - - dtindex = DatetimeIndex(start='1/3/2005', end='1/14/2005', - freq=BDay(1)) - - self.assertEqual(type(unpickled.index), DatetimeIndex) - self.assertEqual(len(unpickled), 10) - self.assertTrue((unpickled.index == dtindex).all()) - self.assertEqual(unpickled.index.offset, BDay(1, normalize=True)) - - def test_unpickle_legacy_len0_daterange(self): - pth, _ = os.path.split(os.path.abspath(__file__)) - filepath = os.path.join(pth, 'data', 'series_daterange0.pickle') - - result = pd.read_pickle(filepath) - - ex_index = DatetimeIndex([], freq='B') - - self.assert_index_equal(result.index, ex_index) - tm.assertIsInstance(result.index.freq, BDay) - self.assertEqual(len(result), 0) - - def test_arithmetic_interaction(self): - index = self.frame.index - obj_index = index.asobject - - dseries = Series(rand(len(index)), index=index) - oseries = Series(dseries.values, index=obj_index) - - result = dseries + oseries - expected = dseries * 2 - tm.assertIsInstance(result.index, DatetimeIndex) - assert_series_equal(result, expected) - - result = dseries + oseries[:5] - expected = dseries + dseries[:5] - tm.assertIsInstance(result.index, DatetimeIndex) - assert_series_equal(result, expected) - - def test_join_interaction(self): - index = self.frame.index - obj_index = index.asobject - - def _check_join(left, right, how='inner'): - ra, rb, rc = left.join(right, how=how, return_indexers=True) - ea, eb, ec = left.join(DatetimeIndex(right), how=how, - return_indexers=True) - - tm.assertIsInstance(ra, DatetimeIndex) - self.assert_index_equal(ra, ea) - - assert_almost_equal(rb, eb) - assert_almost_equal(rc, ec) - - _check_join(index[:15], obj_index[5:], how='inner') - _check_join(index[:15], obj_index[5:], how='outer') - _check_join(index[:15], obj_index[5:], how='right') - _check_join(index[:15], obj_index[5:], how='left') - - def test_join_nonunique(self): - idx1 = to_datetime(['2012-11-06 16:00:11.477563', - '2012-11-06 16:00:11.477563']) - idx2 = to_datetime(['2012-11-06 15:11:09.006507', - '2012-11-06 15:11:09.006507']) - rs = idx1.join(idx2, how='outer') - self.assertTrue(rs.is_monotonic) - - def test_unpickle_daterange(self): - pth, _ = os.path.split(os.path.abspath(__file__)) - filepath = os.path.join(pth, 'data', 'daterange_073.pickle') - - rng = read_pickle(filepath) - tm.assertIsInstance(rng[0], datetime) - tm.assertIsInstance(rng.offset, BDay) - self.assertEqual(rng.values.dtype, object) - - def test_setops(self): - index = self.frame.index - obj_index = index.asobject - - result = index[:5].union(obj_index[5:]) - expected = index - tm.assertIsInstance(result, DatetimeIndex) - self.assert_index_equal(result, expected) - - result = index[:10].intersection(obj_index[5:]) - expected = index[5:10] - tm.assertIsInstance(result, DatetimeIndex) - self.assert_index_equal(result, expected) - - result = index[:10] - obj_index[5:] - expected = index[:5] - tm.assertIsInstance(result, DatetimeIndex) - self.assert_index_equal(result, expected) - - def test_index_conversion(self): - index = self.frame.index - obj_index = index.asobject - - conv = DatetimeIndex(obj_index) - self.assert_index_equal(conv, index) - - self.assertRaises(ValueError, DatetimeIndex, ['a', 'b', 'c', 'd']) - - def test_tolist(self): - rng = date_range('1/1/2000', periods=10) - - result = rng.tolist() - tm.assertIsInstance(result[0], Timestamp) - - def test_object_convert_fail(self): - idx = DatetimeIndex([np.NaT]) - self.assertRaises(ValueError, idx.astype, 'O') - - def test_setops_conversion_fail(self): - index = self.frame.index - - right = Index(['a', 'b', 'c', 'd']) - - result = index.union(right) - expected = Index(np.concatenate([index.asobject, right])) - self.assert_index_equal(result, expected) - - result = index.intersection(right) - expected = Index([]) - self.assert_index_equal(result, expected) - - def test_legacy_time_rules(self): - rules = [('WEEKDAY', 'B'), ('EOM', 'BM'), ('W@MON', 'W-MON'), - ('W@TUE', 'W-TUE'), ('W@WED', 'W-WED'), ('W@THU', 'W-THU'), - ('W@FRI', 'W-FRI'), ('Q@JAN', 'BQ-JAN'), ('Q@FEB', 'BQ-FEB'), - ('Q@MAR', 'BQ-MAR'), ('A@JAN', 'BA-JAN'), ('A@FEB', 'BA-FEB'), - ('A@MAR', 'BA-MAR'), ('A@APR', 'BA-APR'), ('A@MAY', 'BA-MAY'), - ('A@JUN', 'BA-JUN'), ('A@JUL', 'BA-JUL'), ('A@AUG', 'BA-AUG'), - ('A@SEP', 'BA-SEP'), ('A@OCT', 'BA-OCT'), ('A@NOV', 'BA-NOV'), - ('A@DEC', 'BA-DEC'), ('WOM@1FRI', 'WOM-1FRI'), - ('WOM@2FRI', 'WOM-2FRI'), ('WOM@3FRI', 'WOM-3FRI'), - ('WOM@4FRI', 'WOM-4FRI')] - - start, end = '1/1/2000', '1/1/2010' - - for old_freq, new_freq in rules: - old_rng = date_range(start, end, freq=old_freq) - new_rng = date_range(start, end, freq=new_freq) - self.assert_index_equal(old_rng, new_rng) - - def test_ms_vs_MS(self): - left = get_offset('ms') - right = get_offset('MS') - self.assertEqual(left, Milli()) - self.assertEqual(right, MonthBegin()) - - def test_rule_aliases(self): - rule = to_offset('10us') - self.assertEqual(rule, Micro(10)) diff --git a/setup.py b/setup.py index c3cb56f2d6d1b..edec53e9cefb0 100755 --- a/setup.py +++ b/setup.py @@ -648,13 +648,13 @@ def pxd(name): 'pandas.tests.series', 'pandas.tests.formats', 'pandas.tests.scalar', + 'pandas.tests.tseries', 'pandas.tests.types', 'pandas.tests.test_msgpack', 'pandas.tests.plotting', 'pandas.tools', 'pandas.tools.tests', 'pandas.tseries', - 'pandas.tseries.tests', 'pandas.types', 'pandas.io.tests', 'pandas.io.tests.json', @@ -688,8 +688,7 @@ def pxd(name): 'pandas.tests': ['data/*.csv'], 'pandas.tests.formats': ['data/*.csv'], 'pandas.tests.indexes': ['data/*.pickle'], - 'pandas.tseries.tests': ['data/*.pickle', - 'data/*.csv'] + 'pandas.tests.tseries': ['data/*.pickle'] }, ext_modules=extensions, maintainer_email=EMAIL, From e303e268770824f3259f456263aaa1b1783a7aab Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 9 Feb 2017 18:34:18 -0500 Subject: [PATCH 035/353] TST: more tseries reorg --- .../tests/{tseries => groupby}/test_bin_groupby.py | 0 pandas/tests/indexes/datetimes/test_ops.py | 14 +------------- pandas/tests/tseries/test_frequencies.py | 12 ++++++++++++ 3 files changed, 13 insertions(+), 13 deletions(-) rename pandas/tests/{tseries => groupby}/test_bin_groupby.py (100%) diff --git a/pandas/tests/tseries/test_bin_groupby.py b/pandas/tests/groupby/test_bin_groupby.py similarity index 100% rename from pandas/tests/tseries/test_bin_groupby.py rename to pandas/tests/groupby/test_bin_groupby.py diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index 7a5ce3a44681b..63bf07ec041d3 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -7,12 +7,10 @@ import pandas.util.testing as tm from pandas.core.common import PerformanceWarning from pandas.tseries.index import cdate_range -from pandas.tseries.frequencies import get_offset, to_offset from pandas import (DatetimeIndex, PeriodIndex, Series, Timestamp, Timedelta, date_range, TimedeltaIndex, _np_version_under1p10, Index, datetime, Float64Index, offsets, bdate_range) -from pandas.tseries.offsets import (BMonthEnd, CDay, BDay, Milli, MonthBegin, - Micro) +from pandas.tseries.offsets import BMonthEnd, CDay, BDay from pandas.tests.test_base import Ops @@ -913,16 +911,6 @@ def test_equals(self): self.assertFalse(idx.equals(list(idx3))) self.assertFalse(idx.equals(pd.Series(idx3))) - def test_ms_vs_MS(self): - left = get_offset('ms') - right = get_offset('MS') - self.assertEqual(left, Milli()) - self.assertEqual(right, MonthBegin()) - - def test_rule_aliases(self): - rule = to_offset('10us') - self.assertEqual(rule, Micro(10)) - class TestDateTimeIndexToJulianDate(tm.TestCase): diff --git a/pandas/tests/tseries/test_frequencies.py b/pandas/tests/tseries/test_frequencies.py index 9983bf5270b29..5fbef465ca8fc 100644 --- a/pandas/tests/tseries/test_frequencies.py +++ b/pandas/tests/tseries/test_frequencies.py @@ -247,6 +247,18 @@ def test_anchored_shortcuts(self): frequencies.to_offset(invalid_anchor) +def test_ms_vs_MS(): + left = frequencies.get_offset('ms') + right = frequencies.get_offset('MS') + assert left == offsets.Milli() + assert right == offsets.MonthBegin() + + +def test_rule_aliases(): + rule = frequencies.to_offset('10us') + assert rule == offsets.Micro(10) + + def test_get_rule_month(): result = frequencies._get_rule_month('W') assert (result == 'DEC') From 3d6fcdcd356b2b1853346bc4e709baa3bf16ddad Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Thu, 9 Feb 2017 19:16:38 -0500 Subject: [PATCH 036/353] API: Reformat output of groupby.describe (#4792) closes #4792 Author: Matt Roeschke Author: Matthew Roeschke Closes #15260 from mroeschke/fix_4792 and squashes the following commits: 618bc46 [Matthew Roeschke] Merge branch 'master' into fix_4792 184378d [Matt Roeschke] TST: groupby.describe levels don't appear as column (#4792) --- doc/source/whatsnew/v0.20.0.txt | 53 +++++++++++++++++ pandas/core/groupby.py | 19 +++++- pandas/tests/formats/test_format.py | 33 +++-------- pandas/tests/groupby/test_categorical.py | 22 ++++--- pandas/tests/groupby/test_groupby.py | 74 +++++++++++++++++------- pandas/tests/test_generic.py | 8 +-- 6 files changed, 150 insertions(+), 59 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 17ce4517035a7..6fe066b08e255 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -356,6 +356,59 @@ New Behavior: In [11]: index.memory_usage(deep=True) Out[11]: 260 +.. _whatsnew_0200.api_breaking.groupby_describe: + +Groupby Describe Formatting +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The output formatting of ``groupby.describe()`` now labels the ``describe()`` metrics in the columns instead of the index. +This format is consistent with ``groupby.agg()`` when applying multiple functions at once. (:issue:`4792`) + +Previous Behavior: + +.. code-block:: ipython + + In [1]: df = pd.DataFrame({'A': [1, 1, 2, 2], 'B': [1, 2, 3, 4]}) + + In [2]: df.groupby('A').describe() + Out[2]: + B + A + 1 count 2.000000 + mean 1.500000 + std 0.707107 + min 1.000000 + 25% 1.250000 + 50% 1.500000 + 75% 1.750000 + max 2.000000 + 2 count 2.000000 + mean 3.500000 + std 0.707107 + min 3.000000 + 25% 3.250000 + 50% 3.500000 + 75% 3.750000 + max 4.000000 + + In [3]: df.groupby('A').agg([np.mean, np.std, np.min, np.max]) + Out[3]: + B + mean std amin amax + A + 1 1.5 0.707107 1 2 + 2 3.5 0.707107 3 4 + +New Behavior: + +.. ipython:: python + + df = pd.DataFrame({'A': [1, 1, 2, 2], 'B': [1, 2, 3, 4]}) + + df.groupby('A').describe() + + df.groupby('A').agg([np.mean, np.std, np.min, np.max]) + .. _whatsnew_0200.api: Other API Changes diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 53b6dbe6075cf..a228861270aea 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -80,7 +80,6 @@ 'mean', 'sum', 'min', 'max', 'cumcount', 'resample', - 'describe', 'rank', 'quantile', 'fillna', 'mad', @@ -1138,6 +1137,16 @@ def ohlc(self): return self._apply_to_column_groupbys( lambda x: x._cython_agg_general('ohlc')) + @Appender(DataFrame.describe.__doc__) + @Substitution(name='groupby') + @Appender(_doc_template) + def describe(self, **kwargs): + self._set_group_selection() + result = self.apply(lambda x: x.describe(**kwargs)) + if self.axis == 1: + return result.T + return result.unstack() + @Substitution(name='groupby') @Appender(_doc_template) def resample(self, rule, *args, **kwargs): @@ -3039,6 +3048,14 @@ def nlargest(self, n=5, keep='first'): def nsmallest(self, n=5, keep='first'): return self.apply(lambda x: x.nsmallest(n=n, keep=keep)) + @Appender(Series.describe.__doc__) + def describe(self, **kwargs): + self._set_group_selection() + result = self.apply(lambda x: x.describe(**kwargs)) + if self.axis == 1: + return result.T + return result.unstack() + def value_counts(self, normalize=False, sort=True, ascending=False, bins=None, dropna=True): diff --git a/pandas/tests/formats/test_format.py b/pandas/tests/formats/test_format.py index a9553d9ea10cb..99cc70ae36f6b 100644 --- a/pandas/tests/formats/test_format.py +++ b/pandas/tests/formats/test_format.py @@ -3545,30 +3545,15 @@ def test_to_latex_multiindex(self): self.assertEqual(result, expected) result = df.groupby('a').describe().to_latex() - expected = r"""\begin{tabular}{llr} -\toprule - & & c \\ -a & {} & \\ -\midrule -0 & count & 2.000000 \\ - & mean & 1.500000 \\ - & std & 0.707107 \\ - & min & 1.000000 \\ - & 25\% & 1.250000 \\ - & 50\% & 1.500000 \\ - & 75\% & 1.750000 \\ - & max & 2.000000 \\ -1 & count & 2.000000 \\ - & mean & 3.500000 \\ - & std & 0.707107 \\ - & min & 3.000000 \\ - & 25\% & 3.250000 \\ - & 50\% & 3.500000 \\ - & 75\% & 3.750000 \\ - & max & 4.000000 \\ -\bottomrule -\end{tabular} -""" + expected = ('\\begin{tabular}{lrrrrrrrr}\n\\toprule\n{} & c & ' + ' & & & & & & ' + '\\\\\n{} & count & mean & std & min & 25\\% & ' + '50\\% & 75\\% & max \\\\\na & & & ' + ' & & & & & \\\\\n\\midrule\n0 ' + '& 2.0 & 1.5 & 0.707107 & 1.0 & 1.25 & 1.5 & 1.75 ' + '& 2.0 \\\\\n1 & 2.0 & 3.5 & 0.707107 & 3.0 & 3.25 ' + '& 3.5 & 3.75 & 4.0 ' + '\\\\\n\\bottomrule\n\\end{tabular}\n') self.assertEqual(result, expected) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 8952b520f4f78..eebd0e0f490c1 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -107,17 +107,20 @@ def test_groupby_categorical(self): exp_cats = Categorical(ord_labels, ordered=True, categories=['foo', 'bar', 'baz', 'qux']) expected = ord_data.groupby(exp_cats, sort=False).describe() - expected.index.names = [None, None] assert_frame_equal(desc_result, expected) # GH 10460 expc = Categorical.from_codes(np.arange(4).repeat(8), levels, ordered=True) exp = CategoricalIndex(expc) - self.assert_index_equal(desc_result.index.get_level_values(0), exp) + self.assert_index_equal((desc_result.stack() + .index + .get_level_values(0)), exp) exp = Index(['count', 'mean', 'std', 'min', '25%', '50%', '75%', 'max'] * 4) - self.assert_index_equal(desc_result.index.get_level_values(1), exp) + self.assert_index_equal((desc_result.stack() + .index + .get_level_values(1)), exp) def test_groupby_datetime_categorical(self): # GH9049: ensure backward compatibility @@ -144,7 +147,6 @@ def test_groupby_datetime_categorical(self): ord_labels = cats.take_nd(idx) ord_data = data.take(idx) expected = ord_data.groupby(ord_labels).describe() - expected.index.names = [None, None] assert_frame_equal(desc_result, expected) tm.assert_index_equal(desc_result.index, expected.index) tm.assert_index_equal( @@ -155,10 +157,14 @@ def test_groupby_datetime_categorical(self): expc = Categorical.from_codes( np.arange(4).repeat(8), levels, ordered=True) exp = CategoricalIndex(expc) - self.assert_index_equal(desc_result.index.get_level_values(0), exp) + self.assert_index_equal((desc_result.stack() + .index + .get_level_values(0)), exp) exp = Index(['count', 'mean', 'std', 'min', '25%', '50%', '75%', 'max'] * 4) - self.assert_index_equal(desc_result.index.get_level_values(1), exp) + self.assert_index_equal((desc_result.stack() + .index + .get_level_values(1)), exp) def test_groupby_categorical_index(self): @@ -195,8 +201,8 @@ def test_groupby_describe_categorical_columns(self): df = DataFrame(np.random.randn(20, 4), columns=cats) result = df.groupby([1, 2, 3, 4] * 5).describe() - tm.assert_index_equal(result.columns, cats) - tm.assert_categorical_equal(result.columns.values, cats.values) + tm.assert_index_equal(result.stack().columns, cats) + tm.assert_categorical_equal(result.stack().columns.values, cats.values) def test_groupby_unstack_categorical(self): # GH11558 (example is taken from the original issue) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 53f85349834ac..d625fa07d932c 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1085,7 +1085,7 @@ def test_attr_wrapper(self): for name, gp in grouped: expected[name] = gp.describe() expected = DataFrame(expected).T - assert_frame_equal(result.unstack(), expected) + assert_frame_equal(result, expected) # get attribute result = grouped.dtype @@ -1097,7 +1097,7 @@ def test_attr_wrapper(self): def test_series_describe_multikey(self): ts = tm.makeTimeSeries() grouped = ts.groupby([lambda x: x.year, lambda x: x.month]) - result = grouped.describe().unstack() + result = grouped.describe() assert_series_equal(result['mean'], grouped.mean(), check_names=False) assert_series_equal(result['std'], grouped.std(), check_names=False) assert_series_equal(result['min'], grouped.min(), check_names=False) @@ -1106,7 +1106,7 @@ def test_series_describe_single(self): ts = tm.makeTimeSeries() grouped = ts.groupby(lambda x: x.month) result = grouped.apply(lambda x: x.describe()) - expected = grouped.describe() + expected = grouped.describe().stack() assert_series_equal(result, expected) def test_series_index_name(self): @@ -1117,17 +1117,27 @@ def test_series_index_name(self): def test_frame_describe_multikey(self): grouped = self.tsframe.groupby([lambda x: x.year, lambda x: x.month]) result = grouped.describe() - + desc_groups = [] for col in self.tsframe: - expected = grouped[col].describe() - assert_series_equal(result[col], expected, check_names=False) + group = grouped[col].describe() + group_col = pd.MultiIndex([[col] * len(group.columns), + group.columns], + [[0] * len(group.columns), + range(len(group.columns))]) + group = pd.DataFrame(group.values, + columns=group_col, + index=group.index) + desc_groups.append(group) + expected = pd.concat(desc_groups, axis=1) + tm.assert_frame_equal(result, expected) groupedT = self.tsframe.groupby({'A': 0, 'B': 0, 'C': 1, 'D': 1}, axis=1) result = groupedT.describe() - - for name, group in groupedT: - assert_frame_equal(result[name], group.describe()) + expected = self.tsframe.describe().T + expected.index = pd.MultiIndex([[0, 0, 1, 1], expected.index], + [range(4), range(len(expected.index))]) + tm.assert_frame_equal(result, expected) def test_frame_describe_tupleindex(self): @@ -1137,10 +1147,27 @@ def test_frame_describe_tupleindex(self): 'z': [100, 200, 300, 400, 500] * 3}) df1['k'] = [(0, 0, 1), (0, 1, 0), (1, 0, 0)] * 5 df2 = df1.rename(columns={'k': 'key'}) - result = df1.groupby('k').describe() - expected = df2.groupby('key').describe() - expected.index.set_names(result.index.names, inplace=True) - assert_frame_equal(result, expected) + tm.assertRaises(ValueError, lambda: df1.groupby('k').describe()) + tm.assertRaises(ValueError, lambda: df2.groupby('key').describe()) + + def test_frame_describe_unstacked_format(self): + # GH 4792 + prices = {pd.Timestamp('2011-01-06 10:59:05', tz=None): 24990, + pd.Timestamp('2011-01-06 12:43:33', tz=None): 25499, + pd.Timestamp('2011-01-06 12:54:09', tz=None): 25499} + volumes = {pd.Timestamp('2011-01-06 10:59:05', tz=None): 1500000000, + pd.Timestamp('2011-01-06 12:43:33', tz=None): 5000000000, + pd.Timestamp('2011-01-06 12:54:09', tz=None): 100000000} + df = pd.DataFrame({'PRICE': prices, + 'VOLUME': volumes}) + result = df.groupby('PRICE').VOLUME.describe() + data = [df[df.PRICE == 24990].VOLUME.describe().values.tolist(), + df[df.PRICE == 25499].VOLUME.describe().values.tolist()] + expected = pd.DataFrame(data, + index=pd.Index([24990, 25499], name='PRICE'), + columns=['count', 'mean', 'std', 'min', + '25%', '50%', '75%', 'max']) + tm.assert_frame_equal(result, expected) def test_frame_groupby(self): grouped = self.tsframe.groupby(lambda x: x.weekday()) @@ -2545,16 +2572,21 @@ def test_non_cython_api(self): assert_frame_equal(result, expected) # describe - expected = DataFrame(dict(B=concat( - [df.loc[[0, 1], 'B'].describe(), df.loc[[2], 'B'].describe()], - keys=[1, 3]))) - expected.index.names = ['A', None] + expected_index = pd.Index([1, 3], name='A') + expected_col = pd.MultiIndex(levels=[['B'], + ['count', 'mean', 'std', 'min', + '25%', '50%', '75%', 'max']], + labels=[[0] * 8, list(range(8))]) + expected = pd.DataFrame([[1.0, 2.0, nan, 2.0, 2.0, 2.0, 2.0, 2.0], + [0.0, nan, nan, nan, nan, nan, nan, nan]], + index=expected_index, + columns=expected_col) result = g.describe() assert_frame_equal(result, expected) - expected = concat( - [df.loc[[0, 1], ['A', 'B']].describe(), - df.loc[[2], ['A', 'B']].describe()], keys=[0, 1]) + expected = pd.concat([df[df.A == 1].describe().unstack().to_frame().T, + df[df.A == 3].describe().unstack().to_frame().T]) + expected.index = pd.Index([0, 1]) result = gni.describe() assert_frame_equal(result, expected) @@ -3872,7 +3904,6 @@ def test_groupby_whitelist(self): 'tail', 'cumcount', 'resample', - 'describe', 'rank', 'quantile', 'fillna', @@ -3909,7 +3940,6 @@ def test_groupby_whitelist(self): 'tail', 'cumcount', 'resample', - 'describe', 'rank', 'quantile', 'fillna', diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py index bb341c26d454e..e84e2d6809e7b 100644 --- a/pandas/tests/test_generic.py +++ b/pandas/tests/test_generic.py @@ -1267,10 +1267,10 @@ def test_describe_typefiltering_groupby(self): 'numD': np.arange(24.) + .5, 'ts': tm.makeTimeSeries()[:24].index}) G = df.groupby('catA') - self.assertTrue(G.describe(include=['number']).shape == (16, 2)) - self.assertTrue(G.describe(include=['number', 'object']).shape == (22, - 3)) - self.assertTrue(G.describe(include='all').shape == (26, 4)) + self.assertTrue(G.describe(include=['number']).shape == (2, 16)) + self.assertTrue(G.describe(include=['number', 'object']).shape == (2, + 33)) + self.assertTrue(G.describe(include='all').shape == (2, 52)) def test_describe_multi_index_df_column_names(self): """ Test that column names persist after the describe operation.""" From e8840725447859531ddcc4b878266f2043fb6465 Mon Sep 17 00:00:00 2001 From: Tobias Gustafsson Date: Fri, 10 Feb 2017 09:09:31 -0500 Subject: [PATCH 037/353] BUG: Fix #15344 by backporting ujson usage of PEP 393 API Make use of the PEP 393 API to avoid expanding single byte ascii characters into four byte unicode characters when encoding objects to json. closes #15344 Author: Tobias Gustafsson Closes #15360 from tobgu/backport-ujson-compact-ascii-encoding and squashes the following commits: 44de133 [Tobias Gustafsson] Fix C-code formatting to pass linting of GH15344 b7e404f [Tobias Gustafsson] Merge branch 'master' into backport-ujson-compact-ascii-encoding 4e8e2ff [Tobias Gustafsson] BUG: Fix #15344 by backporting ujson usage of PEP 393 APIs for compact ascii --- doc/source/whatsnew/v0.20.0.txt | 5 ++++- pandas/io/tests/json/test_pandas.py | 10 ++++++++++ pandas/src/ujson/python/objToJSON.c | 10 ++++++++++ 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 6fe066b08e255..5fbce3d2594a9 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -538,6 +538,8 @@ Bug Fixes - Bug in ``pd.pivot_table()`` where no error was raised when values argument was not in the columns (:issue:`14938`) - Bug in ``.to_json()`` where ``lines=True`` and contents (keys or values) contain escaped characters (:issue:`15096`) +- Bug in ``.to_json()`` causing single byte ascii characters to be expanded to four byte unicode (:issue:`15344`) +- Bug in ``.read_json()`` for Python 2 where ``lines=True`` and contents contain non-ascii unicode characters (:issue:`15132`) - Bug in ``.rolling/expanding()`` functions where ``count()`` was not counting ``np.Inf``, nor handling ``object`` dtypes (:issue:`12541`) - Bug in ``DataFrame.resample().median()`` if duplicate column names are present (:issue:`14233`) @@ -561,7 +563,6 @@ Bug Fixes - Bug in ``DataFrame.fillna()`` where the argument ``downcast`` was ignored when fillna value was of type ``dict`` (:issue:`15277`) -- Bug in ``.read_json()`` for Python 2 where ``lines=True`` and contents contain non-ascii unicode characters (:issue:`15132`) - Bug in ``pd.read_csv()`` with ``float_precision='round_trip'`` which caused a segfault when a text entry is parsed (:issue:`15140`) @@ -574,4 +575,6 @@ Bug Fixes - Bug in ``DataFrame.boxplot`` where ``fontsize`` was not applied to the tick labels on both axes (:issue:`15108`) - Bug in ``Series.replace`` and ``DataFrame.replace`` which failed on empty replacement dicts (:issue:`15289`) + + - Bug in ``.eval()`` which caused multiline evals to fail with local variables not on the first line (:issue:`15342`) diff --git a/pandas/io/tests/json/test_pandas.py b/pandas/io/tests/json/test_pandas.py index ee5039c38b182..440f5c13d5121 100644 --- a/pandas/io/tests/json/test_pandas.py +++ b/pandas/io/tests/json/test_pandas.py @@ -1044,3 +1044,13 @@ def roundtrip(s, encoding='latin-1'): for s in examples: roundtrip(s) + + def test_data_frame_size_after_to_json(self): + # GH15344 + df = DataFrame({'a': [str(1)]}) + + size_before = df.memory_usage(index=True, deep=True).sum() + df.to_json() + size_after = df.memory_usage(index=True, deep=True).sum() + + self.assertEqual(size_before, size_after) diff --git a/pandas/src/ujson/python/objToJSON.c b/pandas/src/ujson/python/objToJSON.c index 42c0b62a57511..e3c75d3b6e081 100644 --- a/pandas/src/ujson/python/objToJSON.c +++ b/pandas/src/ujson/python/objToJSON.c @@ -402,6 +402,16 @@ static void *PyStringToUTF8(JSOBJ _obj, JSONTypeContext *tc, void *outValue, static void *PyUnicodeToUTF8(JSOBJ _obj, JSONTypeContext *tc, void *outValue, size_t *_outLen) { PyObject *obj = (PyObject *)_obj; + +#if (PY_VERSION_HEX >= 0x03030000) + if (PyUnicode_IS_COMPACT_ASCII(obj)) { + Py_ssize_t len; + char *data = PyUnicode_AsUTF8AndSize(obj, &len); + *_outLen = len; + return data; + } +#endif + PyObject *newObj = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(obj), PyUnicode_GET_SIZE(obj), NULL); From ab8822ae85ab469efd338d34c10aef6ff89cc8d0 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 10 Feb 2017 10:33:36 -0500 Subject: [PATCH 038/353] TST: Use pytest closes https://github.com/pydata/pandas/issues/13097 Author: Tom Augspurger Closes #13856 from TomAugspurger/pytest and squashes the following commits: 59e2be9 [Tom Augspurger] NOSE_ARGS -> TEST_ARGS 03695aa [Tom Augspurger] TST: Remove disabled marks 42790ae [Tom Augspurger] TST: Remove test_multi.sh 40d7336 [Tom Augspurger] PKG: redo pd.test import 9ba1f12 [Tom Augspurger] TST: Skip if getlocale is None 14c447c [Tom Augspurger] TST: pd.test uses pytest c4f6008 [Tom Augspurger] TST/CI: Use pytest b268d89 [Tom Augspurger] TST: Change method to make reporting more standard a638390 [Tom Augspurger] TST: Test consistency change c8dc927 [Tom Augspurger] TST: Refactor to use setup_class 9b5f2b2 [Tom Augspurger] TST: Refactor sql test inheritance --- .gitignore | 2 + .travis.yml | 38 ++-- appveyor.yml | 8 +- ci/install_test.sh | 3 +- ci/install_travis.sh | 11 +- ci/requirements_all.txt | 2 + ci/requirements_dev.txt | 2 + ci/script.sh | 8 +- doc/source/contributing.rst | 24 ++- doc/source/install.rst | 4 +- doc/source/whatsnew/v0.20.0.txt | 3 + pandas/__init__.py | 6 +- pandas/api/tests/test_api.py | 2 +- pandas/conftest.py | 21 ++ pandas/io/tests/parser/test_network.py | 2 +- pandas/io/tests/test_packers.py | 21 +- pandas/io/tests/test_pickle.py | 7 +- pandas/io/tests/test_sql.py | 27 +-- pandas/tests/formats/test_format.py | 22 +-- pandas/tests/frame/common.py | 4 +- pandas/tools/tests/test_util.py | 5 + pandas/util/_tester.py | 23 +++ pandas/util/nosetester.py | 261 ------------------------- pandas/util/testing.py | 7 +- setup.cfg | 6 + test.bat | 3 +- test.sh | 9 +- test_fast.sh | 3 +- test_multi.sh | 1 - test_rebuild.sh | 8 +- tox.ini | 9 +- 31 files changed, 179 insertions(+), 373 deletions(-) create mode 100644 pandas/conftest.py create mode 100644 pandas/util/_tester.py delete mode 100644 pandas/util/nosetester.py delete mode 100755 test_multi.sh diff --git a/.gitignore b/.gitignore index a77e780f3332d..808d9fb73a631 100644 --- a/.gitignore +++ b/.gitignore @@ -56,6 +56,8 @@ dist **/wheelhouse/* # coverage .coverage +coverage.xml +coverage_html_report # OS generated files # ###################### diff --git a/.travis.yml b/.travis.yml index be2058950d8ec..b38c99e3a5be9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -32,7 +32,7 @@ matrix: env: - PYTHON_VERSION=3.5 - JOB_NAME: "35_osx" - - NOSE_ARGS="not slow and not network and not disabled" + - TEST_ARGS="--skip-slow --skip-network" - BUILD_TYPE=conda - JOB_TAG=_OSX - TRAVIS_PYTHON_VERSION=3.5 @@ -42,7 +42,7 @@ matrix: env: - PYTHON_VERSION=2.7 - JOB_NAME: "27_slow_nnet_LOCALE" - - NOSE_ARGS="slow and not network and not disabled" + - TEST_ARGS="--only-slow --skip-network" - LOCALE_OVERRIDE="zh_CN.UTF-8" - FULL_DEPS=true - JOB_TAG=_LOCALE @@ -56,7 +56,7 @@ matrix: env: - PYTHON_VERSION=2.7 - JOB_NAME: "27_nslow" - - NOSE_ARGS="not slow and not disabled" + - TEST_ARGS="--skip-slow" - FULL_DEPS=true - CLIPBOARD_GUI=gtk2 - LINT=true @@ -70,7 +70,7 @@ matrix: env: - PYTHON_VERSION=3.5 - JOB_NAME: "35_nslow" - - NOSE_ARGS="not slow and not network and not disabled" + - TEST_ARGS="--skip-slow --skip-network" - FULL_DEPS=true - CLIPBOARD=xsel - COVERAGE=true @@ -84,7 +84,7 @@ matrix: env: - PYTHON_VERSION=3.6 - JOB_NAME: "36" - - NOSE_ARGS="not slow and not network and not disabled" + - TEST_ARGS="--skip-slow --skip-network" - PANDAS_TESTING_MODE="deprecate" addons: apt: @@ -96,7 +96,7 @@ matrix: env: - PYTHON_VERSION=2.7 - JOB_NAME: "27_nslow_nnet_COMPAT" - - NOSE_ARGS="not slow and not network and not disabled" + - TEST_ARGS="--skip-slow --skip-network" - LOCALE_OVERRIDE="it_IT.UTF-8" - INSTALL_TEST=true - JOB_TAG=_COMPAT @@ -112,7 +112,7 @@ matrix: - PYTHON_VERSION=2.7 - JOB_NAME: "27_slow" - JOB_TAG=_SLOW - - NOSE_ARGS="slow and not network and not disabled" + - TEST_ARGS="--only-slow --skip-network" - FULL_DEPS=true - CACHE_NAME="27_slow" - USE_CACHE=true @@ -122,7 +122,7 @@ matrix: - PYTHON_VERSION=2.7 - JOB_NAME: "27_build_test_conda" - JOB_TAG=_BUILD_TEST - - NOSE_ARGS="not slow and not disabled" + - TEST_ARGS="--skip-slow" - FULL_DEPS=true - BUILD_TEST=true - CACHE_NAME="27_build_test_conda" @@ -133,7 +133,7 @@ matrix: - PYTHON_VERSION=3.4 - JOB_NAME: "34_nslow" - LOCALE_OVERRIDE="zh_CN.UTF-8" - - NOSE_ARGS="not slow and not disabled" + - TEST_ARGS="--skip-slow" - FULL_DEPS=true - CLIPBOARD=xsel - CACHE_NAME="34_nslow" @@ -149,7 +149,7 @@ matrix: - PYTHON_VERSION=3.4 - JOB_NAME: "34_slow" - JOB_TAG=_SLOW - - NOSE_ARGS="slow and not network and not disabled" + - TEST_ARGS="--only-slow --skip-network" - FULL_DEPS=true - CLIPBOARD=xsel - CACHE_NAME="34_slow" @@ -164,7 +164,7 @@ matrix: - PYTHON_VERSION=3.5 - JOB_NAME: "35_numpy_dev" - JOB_TAG=_NUMPY_DEV - - NOSE_ARGS="not slow and not network and not disabled" + - TEST_ARGS="--skip-slow --skip-network" - PANDAS_TESTING_MODE="deprecate" - CACHE_NAME="35_numpy_dev" - USE_CACHE=true @@ -179,7 +179,7 @@ matrix: - PYTHON_VERSION=3.5 - JOB_NAME: "35_ascii" - JOB_TAG=_ASCII - - NOSE_ARGS="not slow and not network and not disabled" + - TEST_ARGS="--skip-slow --skip-network" - LOCALE_OVERRIDE="C" - CACHE_NAME="35_ascii" - USE_CACHE=true @@ -199,7 +199,7 @@ matrix: - PYTHON_VERSION=2.7 - JOB_NAME: "27_slow" - JOB_TAG=_SLOW - - NOSE_ARGS="slow and not network and not disabled" + - TEST_ARGS="--only-slow --skip-network" - FULL_DEPS=true - CACHE_NAME="27_slow" - USE_CACHE=true @@ -208,7 +208,7 @@ matrix: - PYTHON_VERSION=3.4 - JOB_NAME: "34_slow" - JOB_TAG=_SLOW - - NOSE_ARGS="slow and not network and not disabled" + - TEST_ARGS="--only-slow --skip-network" - FULL_DEPS=true - CLIPBOARD=xsel - CACHE_NAME="34_slow" @@ -222,7 +222,7 @@ matrix: - PYTHON_VERSION=2.7 - JOB_NAME: "27_build_test_conda" - JOB_TAG=_BUILD_TEST - - NOSE_ARGS="not slow and not disabled" + - TEST_ARGS="--skip-slow" - FULL_DEPS=true - BUILD_TEST=true - CACHE_NAME="27_build_test_conda" @@ -232,7 +232,7 @@ matrix: - PYTHON_VERSION=3.4 - JOB_NAME: "34_nslow" - LOCALE_OVERRIDE="zh_CN.UTF-8" - - NOSE_ARGS="not slow and not disabled" + - TEST_ARGS="--skip-slow" - FULL_DEPS=true - CLIPBOARD=xsel - CACHE_NAME="34_nslow" @@ -247,7 +247,7 @@ matrix: - PYTHON_VERSION=3.5 - JOB_NAME: "35_numpy_dev" - JOB_TAG=_NUMPY_DEV - - NOSE_ARGS="not slow and not network and not disabled" + - TEST_ARGS="--skip-slow --skip-network" - PANDAS_TESTING_MODE="deprecate" - CACHE_NAME="35_numpy_dev" - USE_CACHE=true @@ -260,7 +260,7 @@ matrix: env: - PYTHON_VERSION=2.7 - JOB_NAME: "27_nslow_nnet_COMPAT" - - NOSE_ARGS="not slow and not network and not disabled" + - TEST_ARGS="--skip-slow --skip-network" - LOCALE_OVERRIDE="it_IT.UTF-8" - INSTALL_TEST=true - JOB_TAG=_COMPAT @@ -275,7 +275,7 @@ matrix: - PYTHON_VERSION=3.5 - JOB_NAME: "35_ascii" - JOB_TAG=_ASCII - - NOSE_ARGS="not slow and not network and not disabled" + - TEST_ARGS="--skip-slow --skip-network" - LOCALE_OVERRIDE="C" - CACHE_NAME="35_ascii" - USE_CACHE=true diff --git a/appveyor.yml b/appveyor.yml index 2499e7069843d..42c3be13af809 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -14,6 +14,7 @@ environment: # /E:ON and /V:ON options are not enabled in the batch script intepreter # See: http://stackoverflow.com/a/13751649/163740 CMD_IN_ENV: "cmd /E:ON /V:ON /C .\\ci\\run_with_env.cmd" + clone_folder: C:\projects\pandas matrix: @@ -82,7 +83,7 @@ install: - cmd: '%CMD_IN_ENV% conda build ci\appveyor.recipe -q' # create our env - - cmd: conda create -q -n pandas python=%PYTHON_VERSION% nose + - cmd: conda create -q -n pandas python=%PYTHON_VERSION% nose pytest - cmd: activate pandas - SET REQ=ci\requirements-%PYTHON_VERSION%-%PYTHON_ARCH%.run - cmd: echo "installing requirements from %REQ%" @@ -93,7 +94,8 @@ install: test_script: # tests - - cd \ - cmd: activate pandas - cmd: conda list - - cmd: nosetests --exe -A "not slow and not network and not disabled" pandas + - cmd: cd \ + - cmd: python -c "import pandas; pandas.test(['--skip-slow', '--skip-network'])" + diff --git a/ci/install_test.sh b/ci/install_test.sh index e01ad7b94a349..cbb84d8fa4b65 100755 --- a/ci/install_test.sh +++ b/ci/install_test.sh @@ -8,7 +8,8 @@ if [ "$INSTALL_TEST" ]; then conda uninstall cython || exit 1 python "$TRAVIS_BUILD_DIR"/setup.py sdist --formats=zip,gztar || exit 1 pip install "$TRAVIS_BUILD_DIR"/dist/*tar.gz || exit 1 - nosetests --exe -A "$NOSE_ARGS" pandas/tests/test_series.py --with-xunit --xunit-file=/tmp/nosetests_install.xml + # nosetests --exe -A "$TEST_ARGS" pandas/tests/test_series.py --with-xunit --xunit-file=/tmp/nosetests_install.xml + pytest pandas/tests/test_series.py --junitxml=/tmp/pytest_install.xml else echo "Skipping installation test." fi diff --git a/ci/install_travis.sh b/ci/install_travis.sh index 52b52d787aade..f65176fb1147c 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -83,6 +83,7 @@ else # Useful for debugging any issues with conda conda info -a || exit 1 + fi # may have installation instructions for this build @@ -90,13 +91,9 @@ INSTALL="ci/install-${PYTHON_VERSION}${JOB_TAG}.sh" if [ -e ${INSTALL} ]; then time bash $INSTALL || exit 1 else - # create new env - time conda create -n pandas python=$PYTHON_VERSION nose || exit 1 + time conda create -n pandas python=$PYTHON_VERSION nose pytest || exit 1 - if [ "$COVERAGE" ]; then - pip install coverage - fi if [ "$LINT" ]; then conda install flake8 pip install cpplint @@ -119,6 +116,10 @@ fi source activate pandas +if [ "$COVERAGE" ]; then + pip install coverage pytest-cov +fi + if [ "$BUILD_TEST" ]; then # build testing diff --git a/ci/requirements_all.txt b/ci/requirements_all.txt index bc97957bff2b7..b64143fcd4ecd 100644 --- a/ci/requirements_all.txt +++ b/ci/requirements_all.txt @@ -1,4 +1,6 @@ nose +pytest +pytest-cov flake8 sphinx ipython diff --git a/ci/requirements_dev.txt b/ci/requirements_dev.txt index 7396fba6548d9..b8af9d035de98 100644 --- a/ci/requirements_dev.txt +++ b/ci/requirements_dev.txt @@ -3,4 +3,6 @@ pytz numpy cython nose +pytest +pytest-cov flake8 diff --git a/ci/script.sh b/ci/script.sh index e2ba883b81883..3eac3002d6805 100755 --- a/ci/script.sh +++ b/ci/script.sh @@ -20,11 +20,11 @@ fi if [ "$BUILD_TEST" ]; then echo "We are not running nosetests as this is simply a build test." elif [ "$COVERAGE" ]; then - echo nosetests --exe -A "$NOSE_ARGS" pandas --with-coverage --with-xunit --xunit-file=/tmp/nosetests.xml - nosetests --exe -A "$NOSE_ARGS" pandas --with-coverage --cover-package=pandas --cover-tests --with-xunit --xunit-file=/tmp/nosetests.xml + echo pytest -s --cov=pandas --cov-report xml:/tmp/nosetests.xml $TEST_ARGS pandas + pytest -s --cov=pandas --cov-report xml:/tmp/nosetests.xml $TEST_ARGS pandas else - echo nosetests --exe -A "$NOSE_ARGS" pandas --doctest-tests --with-xunit --xunit-file=/tmp/nosetests.xml - nosetests --exe -A "$NOSE_ARGS" pandas --doctest-tests --with-xunit --xunit-file=/tmp/nosetests.xml + echo pytest $TEST_ARGS pandas + pytest $TEST_ARGS pandas # TODO: doctest fi RET="$?" diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index ecc2a5e723c45..dbe329b589c75 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -552,8 +552,8 @@ use cases and writing corresponding tests. Adding tests is one of the most common requests after code is pushed to *pandas*. Therefore, it is worth getting in the habit of writing tests ahead of time so this is never an issue. -Like many packages, *pandas* uses the `Nose testing system -`_ and the convenient +Like many packages, *pandas* uses `pytest +`_ and the convenient extensions in `numpy.testing `_. @@ -595,17 +595,25 @@ Running the test suite The tests can then be run directly inside your Git clone (without having to install *pandas*) by typing:: - nosetests pandas + pytest pandas The tests suite is exhaustive and takes around 20 minutes to run. Often it is worth running only a subset of tests first around your changes before running the -entire suite. This is done using one of the following constructs:: +entire suite. - nosetests pandas/tests/[test-module].py - nosetests pandas/tests/[test-module].py:[TestClass] - nosetests pandas/tests/[test-module].py:[TestClass].[test_method] +The easiest way to do this is with:: - .. versionadded:: 0.18.0 + pytest pandas/path/to/test.py -k regex_matching_test_name + +Or with one of the following constructs:: + + pytest pandas/tests/[test-module].py + pytest pandas/tests/[test-module].py::[TestClass] + pytest pandas/tests/[test-module].py::[TestClass]::[test_method] + +For more, see the `pytest`_ documentation. + + .. versionadded:: 0.18.0 Furthermore one can run diff --git a/doc/source/install.rst b/doc/source/install.rst index 4b3ea19624a0e..1c7cbc9326614 100644 --- a/doc/source/install.rst +++ b/doc/source/install.rst @@ -188,8 +188,8 @@ Running the test suite pandas is equipped with an exhaustive set of unit tests covering about 97% of the codebase as of this writing. To run it on your machine to verify that everything is working (and you have all of the dependencies, soft and hard, -installed), make sure you have `nose -`__ and run: +installed), make sure you have `pytest +`__ and run: :: diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 5fbce3d2594a9..d0ffa786aaa8e 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -11,6 +11,9 @@ Highlights include: - Building pandas for development now requires ``cython >= 0.23`` (:issue:`14831`) - The ``.ix`` indexer has been deprecated, see :ref:`here ` +- Switched the test framework to `pytest`_ (:issue:`13097`) + +.. _pytest: http://doc.pytest.org/en/latest/ Check the :ref:`API Changes ` and :ref:`deprecations ` before updating. diff --git a/pandas/__init__.py b/pandas/__init__.py index 76542db22a757..70c547010f623 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -56,11 +56,7 @@ from pandas.io.api import * -# define the testing framework -import pandas.util.testing -from pandas.util.nosetester import NoseTester -test = NoseTester().test -del NoseTester +from pandas.util._tester import test # use the closest tagged version if possible from ._version import get_versions diff --git a/pandas/api/tests/test_api.py b/pandas/api/tests/test_api.py index a53f6103b408b..05cf5dc4b7e7b 100644 --- a/pandas/api/tests/test_api.py +++ b/pandas/api/tests/test_api.py @@ -28,7 +28,7 @@ class TestPDApi(Base, tm.TestCase): # these are optionally imported based on testing # & need to be ignored - ignored = ['tests', 'locale'] + ignored = ['tests', 'locale', 'conftest'] # top-level sub-packages lib = ['api', 'compat', 'computation', 'core', diff --git a/pandas/conftest.py b/pandas/conftest.py new file mode 100644 index 0000000000000..b3683de3a173b --- /dev/null +++ b/pandas/conftest.py @@ -0,0 +1,21 @@ +import pytest + + +def pytest_addoption(parser): + parser.addoption("--skip-slow", action="store_true", + help="skip slow tests") + parser.addoption("--skip-network", action="store_true", + help="run network tests") + parser.addoption("--only-slow", action="store_true", + help="run only slow tests") + + +def pytest_runtest_setup(item): + if 'slow' in item.keywords and item.config.getoption("--skip-slow"): + pytest.skip("skipping due to --skip-slow") + + if 'slow' not in item.keywords and item.config.getoption("--only-slow"): + pytest.skip("skipping due to --only-slow") + + if 'skip' in item.keywords and item.config.getoption("--skip-network"): + pytest.skip("skipping due to --skip-network") diff --git a/pandas/io/tests/parser/test_network.py b/pandas/io/tests/parser/test_network.py index e06f94c780c8b..533b7733bde28 100644 --- a/pandas/io/tests/parser/test_network.py +++ b/pandas/io/tests/parser/test_network.py @@ -24,7 +24,7 @@ class TestCompressedUrl(object): 'xz': '.xz', } - def __init__(self): + def setup(self): path = os.path.join(tm.get_data_path(), 'salaries.csv') self.local_table = read_table(path) self.base_url = ('https://github.com/pandas-dev/pandas/raw/master/' diff --git a/pandas/io/tests/test_packers.py b/pandas/io/tests/test_packers.py index 8a0cfb92bd3c0..2ee36d85f674c 100644 --- a/pandas/io/tests/test_packers.py +++ b/pandas/io/tests/test_packers.py @@ -793,18 +793,19 @@ class TestMsgpack(): http://stackoverflow.com/questions/6689537/nose-test-generators-inside-class """ - def setUp(self): + @classmethod + def setup_class(cls): from pandas.io.tests.generate_legacy_storage_files import ( create_msgpack_data, create_data) - self.data = create_msgpack_data() - self.all_data = create_data() - self.path = u('__%s__.msgpack' % tm.rands(10)) - self.minimum_structure = {'series': ['float', 'int', 'mixed', - 'ts', 'mi', 'dup'], - 'frame': ['float', 'int', 'mixed', 'mi'], - 'panel': ['float'], - 'index': ['int', 'date', 'period'], - 'mi': ['reg2']} + cls.data = create_msgpack_data() + cls.all_data = create_data() + cls.path = u('__%s__.msgpack' % tm.rands(10)) + cls.minimum_structure = {'series': ['float', 'int', 'mixed', + 'ts', 'mi', 'dup'], + 'frame': ['float', 'int', 'mixed', 'mi'], + 'panel': ['float'], + 'index': ['int', 'date', 'period'], + 'mi': ['reg2']} def check_min_structure(self, data): for typ, v in self.minimum_structure.items(): diff --git a/pandas/io/tests/test_pickle.py b/pandas/io/tests/test_pickle.py index 73a9173e85906..89827817a85fb 100644 --- a/pandas/io/tests/test_pickle.py +++ b/pandas/io/tests/test_pickle.py @@ -31,11 +31,12 @@ class TestPickle(): nose-test-generators-inside-class """ - def setUp(self): + @classmethod + def setup_class(cls): from pandas.io.tests.generate_legacy_storage_files import ( create_pickle_data) - self.data = create_pickle_data() - self.path = u('__%s__.pickle' % tm.rands(10)) + cls.data = create_pickle_data() + cls.path = u('__%s__.pickle' % tm.rands(10)) def compare_element(self, result, expected, typ, version=None): if isinstance(expected, Index): diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py index 4bcde764001c1..ddda65c5bafc8 100644 --- a/pandas/io/tests/test_sql.py +++ b/pandas/io/tests/test_sql.py @@ -236,7 +236,7 @@ def _close_conn(self): pass -class PandasSQLTest(unittest.TestCase): +class PandasSQLTest(object): """ Base class with common private methods for SQLAlchemy and fallback cases. @@ -839,7 +839,7 @@ def test_unicode_column_name(self): df.to_sql('test_unicode', self.conn, index=False) -class TestSQLApi(SQLAlchemyMixIn, _TestSQLApi): +class TestSQLApi(SQLAlchemyMixIn, _TestSQLApi, unittest.TestCase): """ Test the public API as it would be used directly @@ -1024,11 +1024,11 @@ def tearDown(self): super(_EngineToConnMixin, self).tearDown() -class TestSQLApiConn(_EngineToConnMixin, TestSQLApi): +class TestSQLApiConn(_EngineToConnMixin, TestSQLApi, unittest.TestCase): pass -class TestSQLiteFallbackApi(SQLiteMixIn, _TestSQLApi): +class TestSQLiteFallbackApi(SQLiteMixIn, _TestSQLApi, unittest.TestCase): """ Test the public sqlite connection fallback API @@ -1875,34 +1875,39 @@ def test_schema_support(self): tm.assert_frame_equal(res1, res2) -class TestMySQLAlchemy(_TestMySQLAlchemy, _TestSQLAlchemy): +class TestMySQLAlchemy(_TestMySQLAlchemy, _TestSQLAlchemy, unittest.TestCase): pass -class TestMySQLAlchemyConn(_TestMySQLAlchemy, _TestSQLAlchemyConn): +class TestMySQLAlchemyConn(_TestMySQLAlchemy, _TestSQLAlchemyConn, + unittest.TestCase): pass -class TestPostgreSQLAlchemy(_TestPostgreSQLAlchemy, _TestSQLAlchemy): +class TestPostgreSQLAlchemy(_TestPostgreSQLAlchemy, _TestSQLAlchemy, + unittest.TestCase): pass -class TestPostgreSQLAlchemyConn(_TestPostgreSQLAlchemy, _TestSQLAlchemyConn): +class TestPostgreSQLAlchemyConn(_TestPostgreSQLAlchemy, _TestSQLAlchemyConn, + unittest.TestCase): pass -class TestSQLiteAlchemy(_TestSQLiteAlchemy, _TestSQLAlchemy): +class TestSQLiteAlchemy(_TestSQLiteAlchemy, _TestSQLAlchemy, + unittest.TestCase): pass -class TestSQLiteAlchemyConn(_TestSQLiteAlchemy, _TestSQLAlchemyConn): +class TestSQLiteAlchemyConn(_TestSQLiteAlchemy, _TestSQLAlchemyConn, + unittest.TestCase): pass # ----------------------------------------------------------------------------- # -- Test Sqlite / MySQL fallback -class TestSQLiteFallback(SQLiteMixIn, PandasSQLTest): +class TestSQLiteFallback(SQLiteMixIn, PandasSQLTest, unittest.TestCase): """ Test the fallback mode against an in-memory sqlite database. diff --git a/pandas/tests/formats/test_format.py b/pandas/tests/formats/test_format.py index 99cc70ae36f6b..9a24ae332f7c5 100644 --- a/pandas/tests/formats/test_format.py +++ b/pandas/tests/formats/test_format.py @@ -3923,6 +3923,15 @@ def test_period(self): self.assertEqual(str(df), exp) +def gen_series_formatting(): + s1 = pd.Series(['a'] * 100) + s2 = pd.Series(['ab'] * 100) + s3 = pd.Series(['a', 'ab', 'abc', 'abcd', 'abcde', 'abcdef']) + s4 = s3[::-1] + test_sers = {'onel': s1, 'twol': s2, 'asc': s3, 'desc': s4} + return test_sers + + class TestSeriesFormatting(tm.TestCase): def setUp(self): @@ -4320,15 +4329,6 @@ def test_consistent_format(self): '1.0000\n129 1.0000\ndtype: float64') self.assertEqual(res, exp) - @staticmethod - def gen_test_series(): - s1 = pd.Series(['a'] * 100) - s2 = pd.Series(['ab'] * 100) - s3 = pd.Series(['a', 'ab', 'abc', 'abcd', 'abcde', 'abcdef']) - s4 = s3[::-1] - test_sers = {'onel': s1, 'twol': s2, 'asc': s3, 'desc': s4} - return test_sers - def chck_ncols(self, s): with option_context("display.max_rows", 10): res = repr(s) @@ -4339,7 +4339,7 @@ def chck_ncols(self, s): self.assertEqual(ncolsizes, 1) def test_format_explicit(self): - test_sers = self.gen_test_series() + test_sers = gen_series_formatting() with option_context("display.max_rows", 4): res = repr(test_sers['onel']) exp = '0 a\n1 a\n ..\n98 a\n99 a\ndtype: object' @@ -4358,7 +4358,7 @@ def test_format_explicit(self): self.assertEqual(exp, res) def test_ncols(self): - test_sers = self.gen_test_series() + test_sers = gen_series_formatting() for s in test_sers.values(): self.chck_ncols(s) diff --git a/pandas/tests/frame/common.py b/pandas/tests/frame/common.py index 37f67712e1b58..b9cd764c8704c 100644 --- a/pandas/tests/frame/common.py +++ b/pandas/tests/frame/common.py @@ -89,11 +89,11 @@ def empty(self): @cache_readonly def ts1(self): - return tm.makeTimeSeries() + return tm.makeTimeSeries(nper=30) @cache_readonly def ts2(self): - return tm.makeTimeSeries()[5:] + return tm.makeTimeSeries(nper=30)[5:] @cache_readonly def simple(self): diff --git a/pandas/tools/tests/test_util.py b/pandas/tools/tests/test_util.py index e1d057eb3c3c0..0716a13fac3fe 100644 --- a/pandas/tools/tests/test_util.py +++ b/pandas/tools/tests/test_util.py @@ -93,6 +93,11 @@ def test_set_locale(self): raise nose.SkipTest("Only a single locale found, no point in " "trying to test setting another locale") + if all(x is None for x in CURRENT_LOCALE): + # Not sure why, but on some travis runs with pytest, + # getlocale() returned (None, None). + raise nose.SkipTest("CURRENT_LOCALE is not set.") + if LOCALE_OVERRIDE is None: lang, enc = 'it_CH', 'UTF-8' elif LOCALE_OVERRIDE == 'C': diff --git a/pandas/util/_tester.py b/pandas/util/_tester.py new file mode 100644 index 0000000000000..b0e402939caae --- /dev/null +++ b/pandas/util/_tester.py @@ -0,0 +1,23 @@ +""" +Entrypoint for testing from the top-level namespace +""" +import os + +PKG = os.path.dirname(os.path.dirname(__file__)) + + +try: + import pytest +except ImportError: + def test(): + raise ImportError("Need pytest>=3.0 to run tests") +else: + def test(extra_args=None): + if extra_args: + cmd = ['-q'] + extra_args + [PKG] + else: + cmd = ['-q', PKG] + pytest.main(cmd) + + +__all__ = ['test'] diff --git a/pandas/util/nosetester.py b/pandas/util/nosetester.py deleted file mode 100644 index 1bdaaff99fd50..0000000000000 --- a/pandas/util/nosetester.py +++ /dev/null @@ -1,261 +0,0 @@ -""" -Nose test running. - -This module implements ``test()`` function for pandas modules. - -""" -from __future__ import division, absolute_import, print_function - -import os -import sys -import warnings -from pandas.compat import string_types -from numpy.testing import nosetester - - -def get_package_name(filepath): - """ - Given a path where a package is installed, determine its name. - - Parameters - ---------- - filepath : str - Path to a file. If the determination fails, "pandas" is returned. - - Examples - -------- - >>> pandas.util.nosetester.get_package_name('nonsense') - 'pandas' - - """ - - pkg_name = [] - while 'site-packages' in filepath or 'dist-packages' in filepath: - filepath, p2 = os.path.split(filepath) - if p2 in ('site-packages', 'dist-packages'): - break - pkg_name.append(p2) - - # if package name determination failed, just default to pandas - if not pkg_name: - return "pandas" - - # otherwise, reverse to get correct order and return - pkg_name.reverse() - - # don't include the outer egg directory - if pkg_name[0].endswith('.egg'): - pkg_name.pop(0) - - return '.'.join(pkg_name) - -import_nose = nosetester.import_nose -run_module_suite = nosetester.run_module_suite - - -class NoseTester(nosetester.NoseTester): - """ - Nose test runner. - - This class is made available as pandas.util.nosetester.NoseTester, and - a test function is typically added to a package's __init__.py like so:: - - from numpy.testing import Tester - test = Tester().test - - Calling this test function finds and runs all tests associated with the - package and all its sub-packages. - - Attributes - ---------- - package_path : str - Full path to the package to test. - package_name : str - Name of the package to test. - - Parameters - ---------- - package : module, str or None, optional - The package to test. If a string, this should be the full path to - the package. If None (default), `package` is set to the module from - which `NoseTester` is initialized. - raise_warnings : None, str or sequence of warnings, optional - This specifies which warnings to configure as 'raise' instead - of 'warn' during the test execution. Valid strings are: - - - "develop" : equals ``(DeprecationWarning, RuntimeWarning)`` - - "release" : equals ``()``, don't raise on any warnings. - - See Notes for more details. - - Notes - ----- - The default for `raise_warnings` is - ``(DeprecationWarning, RuntimeWarning)`` for development versions of - pandas, and ``()`` for released versions. The purpose of this switching - behavior is to catch as many warnings as possible during development, but - not give problems for packaging of released versions. - - """ - excludes = [] - - def _show_system_info(self): - nose = import_nose() - - import pandas - print("pandas version %s" % pandas.__version__) - import numpy - print("numpy version %s" % numpy.__version__) - pddir = os.path.dirname(pandas.__file__) - print("pandas is installed in %s" % pddir) - - pyversion = sys.version.replace('\n', '') - print("Python version %s" % pyversion) - print("nose version %d.%d.%d" % nose.__versioninfo__) - - def _get_custom_doctester(self): - """ Return instantiated plugin for doctests - - Allows subclassing of this class to override doctester - - A return value of None means use the nose builtin doctest plugin - """ - return None - - def _test_argv(self, label, verbose, extra_argv): - """ - Generate argv for nosetest command - - Parameters - ---------- - label : {'fast', 'full', '', attribute identifier}, optional - see ``test`` docstring - verbose : int, optional - Verbosity value for test outputs, in the range 1-10. Default is 1. - extra_argv : list, optional - List with any extra arguments to pass to nosetests. - - Returns - ------- - argv : list - command line arguments that will be passed to nose - """ - - argv = [__file__, self.package_path] - if label and label != 'full': - if not isinstance(label, string_types): - raise TypeError('Selection label should be a string') - if label == 'fast': - label = 'not slow and not network and not disabled' - argv += ['-A', label] - argv += ['--verbosity', str(verbose)] - - # When installing with setuptools, and also in some other cases, the - # test_*.py files end up marked +x executable. Nose, by default, does - # not run files marked with +x as they might be scripts. However, in - # our case nose only looks for test_*.py files under the package - # directory, which should be safe. - argv += ['--exe'] - - if extra_argv: - argv += extra_argv - return argv - - def test(self, label='fast', verbose=1, extra_argv=None, - doctests=False, coverage=False, raise_warnings=None): - """ - Run tests for module using nose. - - Parameters - ---------- - label : {'fast', 'full', '', attribute identifier}, optional - Identifies the tests to run. This can be a string to pass to - the nosetests executable with the '-A' option, or one of several - special values. Special values are: - - * 'fast' - the default - which corresponds to the ``nosetests -A`` - option of 'not slow'. - * 'full' - fast (as above) and slow tests as in the - 'no -A' option to nosetests - this is the same as ''. - * None or '' - run all tests. - * attribute_identifier - string passed directly to nosetests - as '-A'. - - verbose : int, optional - Verbosity value for test outputs, in the range 1-10. Default is 1. - extra_argv : list, optional - List with any extra arguments to pass to nosetests. - doctests : bool, optional - If True, run doctests in module. Default is False. - coverage : bool, optional - If True, report coverage of NumPy code. Default is False. - (This requires the `coverage module - `_). - raise_warnings : str or sequence of warnings, optional - This specifies which warnings to configure as 'raise' instead - of 'warn' during the test execution. Valid strings are: - - - 'develop' : equals ``(DeprecationWarning, RuntimeWarning)`` - - 'release' : equals ``()``, don't raise on any warnings. - - Returns - ------- - result : object - Returns the result of running the tests as a - ``nose.result.TextTestResult`` object. - - """ - - # cap verbosity at 3 because nose becomes *very* verbose beyond that - verbose = min(verbose, 3) - - if doctests: - print("Running unit tests and doctests for %s" % self.package_name) - else: - print("Running unit tests for %s" % self.package_name) - - self._show_system_info() - - # reset doctest state on every run - import doctest - doctest.master = None - - if raise_warnings is None: - - # default based on if we are released - from pandas import __version__ - from distutils.version import StrictVersion - try: - StrictVersion(__version__) - raise_warnings = 'release' - except ValueError: - raise_warnings = 'develop' - - _warn_opts = dict(develop=(DeprecationWarning, RuntimeWarning), - release=()) - if isinstance(raise_warnings, string_types): - raise_warnings = _warn_opts[raise_warnings] - - with warnings.catch_warnings(): - - if len(raise_warnings): - - # Reset the warning filters to the default state, - # so that running the tests is more repeatable. - warnings.resetwarnings() - # Set all warnings to 'warn', this is because the default - # 'once' has the bad property of possibly shadowing later - # warnings. - warnings.filterwarnings('always') - # Force the requested warnings to raise - for warningtype in raise_warnings: - warnings.filterwarnings('error', category=warningtype) - # Filter out annoying import messages. - warnings.filterwarnings("ignore", category=FutureWarning) - - from numpy.testing.noseclasses import NumpyTestProgram - argv, plugins = self.prepare_test_args( - label, verbose, extra_argv, doctests, coverage) - t = NumpyTestProgram(argv=argv, exit=False, plugins=plugins) - - return t.result diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 6b2e920a24063..336a766fd5830 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -19,7 +19,8 @@ from distutils.version import LooseVersion from numpy.random import randn, rand -from numpy.testing.decorators import slow # noqa +# from numpy.testing.decorators import slow # noqa +import pytest import numpy as np import pandas as pd @@ -2549,9 +2550,7 @@ def assert_produces_warning(expected_warning=Warning, filter_level="always", % extra_warnings) -def disabled(t): - t.disabled = True - return t +slow = pytest.mark.slow class RNGContext(object): diff --git a/setup.cfg b/setup.cfg index f69e256b80869..143470f7ee350 100644 --- a/setup.cfg +++ b/setup.cfg @@ -19,3 +19,9 @@ based_on_style = pep8 split_before_named_assigns = false split_penalty_after_opening_bracket = 1000000 split_penalty_logical_operator = 30 + +[tool:pytest] +# TODO: Change all yield-based (nose-style) fixutures to pytest fixtures +# Silencing the warning until then +addopts = --disable-pytest-warnings +testpaths = pandas diff --git a/test.bat b/test.bat index 16aa6c9105ec3..7f9244abb2bc8 100644 --- a/test.bat +++ b/test.bat @@ -1,3 +1,4 @@ :: test on windows -nosetests --exe -A "not slow and not network and not disabled" pandas %* +:: nosetests --exe -A "not slow and not network and not disabled" pandas %* +pytest pandas diff --git a/test.sh b/test.sh index 4a9ffd7be98b1..23c7ff52d2ce9 100755 --- a/test.sh +++ b/test.sh @@ -1,11 +1,4 @@ #!/bin/sh command -v coverage >/dev/null && coverage erase command -v python-coverage >/dev/null && python-coverage erase -# nosetests pandas/tests/test_index.py --with-coverage --cover-package=pandas.core --pdb-failure --pdb -#nosetests -w pandas --with-coverage --cover-package=pandas --pdb-failure --pdb #--cover-inclusive -#nosetests -A "not slow" -w pandas/tseries --with-coverage --cover-package=pandas.tseries $* #--cover-inclusive -nosetests -w pandas --with-coverage --cover-package=pandas $* -# nosetests -w pandas/io --with-coverage --cover-package=pandas.io --pdb-failure --pdb -# nosetests -w pandas/core --with-coverage --cover-package=pandas.core --pdb-failure --pdb -# nosetests -w pandas/stats --with-coverage --cover-package=pandas.stats -# coverage run runtests.py +pytest pandas --cov=pandas diff --git a/test_fast.sh b/test_fast.sh index b390705f901ad..0b394cffa3d74 100755 --- a/test_fast.sh +++ b/test_fast.sh @@ -1 +1,2 @@ -nosetests -A "not slow and not network" pandas --with-id $* +# nosetests -A "not slow and not network" pandas --with-id $* +pytest pandas --skip-slow diff --git a/test_multi.sh b/test_multi.sh deleted file mode 100755 index 5d77945c66a26..0000000000000 --- a/test_multi.sh +++ /dev/null @@ -1 +0,0 @@ -nosetests -A "not slow and not network" pandas --processes=4 $* diff --git a/test_rebuild.sh b/test_rebuild.sh index d3710c5ff67d3..65aa1098811a1 100755 --- a/test_rebuild.sh +++ b/test_rebuild.sh @@ -3,10 +3,4 @@ python setup.py clean python setup.py build_ext --inplace coverage erase -# nosetests pandas/tests/test_index.py --with-coverage --cover-package=pandas.core --pdb-failure --pdb -#nosetests -w pandas --with-coverage --cover-package=pandas --pdb-failure --pdb #--cover-inclusive -nosetests -w pandas --with-coverage --cover-package=pandas $* #--cover-inclusive -# nosetests -w pandas/io --with-coverage --cover-package=pandas.io --pdb-failure --pdb -# nosetests -w pandas/core --with-coverage --cover-package=pandas.core --pdb-failure --pdb -# nosetests -w pandas/stats --with-coverage --cover-package=pandas.stats -# coverage run runtests.py +pytest pandas --cov=pandas diff --git a/tox.ini b/tox.ini index 5d6c8975307b6..85c5d90fde7fb 100644 --- a/tox.ini +++ b/tox.ini @@ -10,6 +10,7 @@ envlist = py27, py34, py35 deps = cython nose + pytest pytz>=2011k python-dateutil beautifulsoup4 @@ -26,7 +27,7 @@ changedir = {envdir} commands = # TODO: --exe because of GH #761 - {envbindir}/nosetests --exe pandas {posargs:-A "not network and not disabled"} + {envbindir}/pytest pandas {posargs:-A "not network and not disabled"} # cleanup the temp. build dir created by the tox build # /bin/rm -rf {toxinidir}/build @@ -63,18 +64,18 @@ usedevelop = True deps = {[testenv]deps} openpyxl<2.0.0 -commands = {envbindir}/nosetests {toxinidir}/pandas/io/tests/test_excel.py +commands = {envbindir}/pytest {toxinidir}/pandas/io/tests/test_excel.py [testenv:openpyxl20] usedevelop = True deps = {[testenv]deps} openpyxl<2.2.0 -commands = {envbindir}/nosetests {posargs} {toxinidir}/pandas/io/tests/test_excel.py +commands = {envbindir}/pytest {posargs} {toxinidir}/pandas/io/tests/test_excel.py [testenv:openpyxl22] usedevelop = True deps = {[testenv]deps} openpyxl>=2.2.0 -commands = {envbindir}/nosetests {posargs} {toxinidir}/pandas/io/tests/test_excel.py +commands = {envbindir}/pytest {posargs} {toxinidir}/pandas/io/tests/test_excel.py From 7713f2940b74fff8254087d9bdde1d2d3c3927e6 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 10 Feb 2017 10:42:18 -0500 Subject: [PATCH 039/353] TST: small adjustments for pytest --- doc/source/contributing.rst | 2 +- pandas/util/_tester.py | 9 ++++++--- pandas/util/testing.py | 6 ++---- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index dbe329b589c75..3ef9ed8962a23 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -613,7 +613,7 @@ Or with one of the following constructs:: For more, see the `pytest`_ documentation. - .. versionadded:: 0.18.0 + .. versionadded:: 0.20.0 Furthermore one can run diff --git a/pandas/util/_tester.py b/pandas/util/_tester.py index b0e402939caae..8d9701e0b4672 100644 --- a/pandas/util/_tester.py +++ b/pandas/util/_tester.py @@ -13,10 +13,13 @@ def test(): raise ImportError("Need pytest>=3.0 to run tests") else: def test(extra_args=None): + cmd = ['--skip-slow', '--skip-network'] if extra_args: - cmd = ['-q'] + extra_args + [PKG] - else: - cmd = ['-q', PKG] + if not isinstance(extra_args, list): + extra_args = [extra_args] + cmd = extra_args + cmd += [PKG] + print("running: pytest {}".format(' '.join(cmd))) pytest.main(cmd) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 336a766fd5830..c3633c945f60a 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -19,7 +19,6 @@ from distutils.version import LooseVersion from numpy.random import randn, rand -# from numpy.testing.decorators import slow # noqa import pytest import numpy as np @@ -50,6 +49,8 @@ from pandas.util.decorators import deprecate from pandas import _testing from pandas.io.common import urlopen +slow = pytest.mark.slow + N = 30 K = 4 @@ -2550,9 +2551,6 @@ def assert_produces_warning(expected_warning=Warning, filter_level="always", % extra_warnings) -slow = pytest.mark.slow - - class RNGContext(object): """ Context manager to set the numpy random number generator speed. Returns From dcb4e47a0b6620f1efbe5e02ed493e6513fc8763 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 10 Feb 2017 13:19:22 -0500 Subject: [PATCH 040/353] COMPAT: skip tests for numpy >= 1.12 with pow and integer inputs closes #15363 CI: fix 3.5 build to numpy 1.11.3 --- .gitignore | 1 + ci/requirements-3.5.build | 2 +- ci/requirements-3.5.run | 2 +- pandas/tests/test_expressions.py | 17 ++++++++++++++++- 4 files changed, 19 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 808d9fb73a631..a509fcf736ea8 100644 --- a/.gitignore +++ b/.gitignore @@ -19,6 +19,7 @@ .noseids .ipynb_checkpoints .tags +.cache/ # Compiled source # ################### diff --git a/ci/requirements-3.5.build b/ci/requirements-3.5.build index 9558cf00ddf5c..2fc2053e64fe9 100644 --- a/ci/requirements-3.5.build +++ b/ci/requirements-3.5.build @@ -1,4 +1,4 @@ python-dateutil pytz -numpy +numpy=1.11.3 cython diff --git a/ci/requirements-3.5.run b/ci/requirements-3.5.run index ef354195c8f23..b07ce611c79a2 100644 --- a/ci/requirements-3.5.run +++ b/ci/requirements-3.5.run @@ -1,6 +1,6 @@ python-dateutil pytz -numpy +numpy=1.11.3 openpyxl xlsxwriter xlrd diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index eca4a8f3c9e66..136786ecff0a0 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -12,7 +12,7 @@ from pandas.core.api import DataFrame, Panel from pandas.computation import expressions as expr -from pandas import compat +from pandas import compat, _np_version_under1p12 from pandas.util.testing import (assert_almost_equal, assert_series_equal, assert_frame_equal, assert_panel_equal, assert_panel4d_equal, slow) @@ -78,6 +78,13 @@ def run_arithmetic_test(self, df, other, assert_func, check_dtype=False, if not compat.PY3: operations.append('div') for arith in operations: + + # numpy >= 1.12 doesn't handle integers + # raised to integer powers + # https://github.com/pandas-dev/pandas/issues/15363 + if arith == 'pow' and not _np_version_under1p12: + continue + operator_name = arith if arith == 'div': operator_name = 'truediv' @@ -90,6 +97,7 @@ def run_arithmetic_test(self, df, other, assert_func, check_dtype=False, expr.set_use_numexpr(False) expected = op(df, other) expr.set_use_numexpr(True) + result = op(df, other) try: if check_dtype: @@ -273,6 +281,13 @@ def testit(): for op, op_str in [('add', '+'), ('sub', '-'), ('mul', '*'), ('div', '/'), ('pow', '**')]: + + # numpy >= 1.12 doesn't handle integers + # raised to integer powers + # https://github.com/pandas-dev/pandas/issues/15363 + if op == 'pow' and not _np_version_under1p12: + continue + if op == 'div': op = getattr(operator, 'truediv', None) else: From 61deba5cfc43425e35c8fc61bcad1123c83a6a5a Mon Sep 17 00:00:00 2001 From: Joshua Bradt Date: Fri, 10 Feb 2017 14:48:11 -0500 Subject: [PATCH 041/353] BUG: Fixed handling of non-list value_vars in melt The value_vars argument of melt is now cast to list like the id_vars argument. closes #15348 Author: Joshua Bradt Author: Joshua Bradt Closes #15351 from jbradt/fix-melt and squashes the following commits: a2f2510 [Joshua Bradt] Changed to tm.assertRaisesRegexp for Python 2 compat. 3038f64 [Joshua Bradt] Merge remote-tracking branch 'upstream/master' into fix-melt e907135 [Joshua Bradt] Split test into two parts 20159c1 [Joshua Bradt] Changed exception classes to ValueError. 129d531 [Joshua Bradt] Moved binary operators to satisfy flake8 70d7256 [Joshua Bradt] Merge branch 'master' into fix-melt 455a310 [Joshua Bradt] Tested types when using MultiIndex to ensure they are lists. 7406222 [Joshua Bradt] Fixed formatting. Added comment with issue number to test. d4c5da3 [Joshua Bradt] Improved type checking and tests. Added whatsnew note. 33728de [Joshua Bradt] BUG: Fixed handling of non-list value_vars in melt --- doc/source/whatsnew/v0.20.0.txt | 3 +-- pandas/core/reshape.py | 14 ++++++++++-- pandas/tests/test_reshape.py | 39 +++++++++++++++++++++++++++++++++ 3 files changed, 52 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index d0ffa786aaa8e..9f86c777c665d 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -578,6 +578,5 @@ Bug Fixes - Bug in ``DataFrame.boxplot`` where ``fontsize`` was not applied to the tick labels on both axes (:issue:`15108`) - Bug in ``Series.replace`` and ``DataFrame.replace`` which failed on empty replacement dicts (:issue:`15289`) - - +- Bug in ``pd.melt()`` where passing a tuple value for ``value_vars`` caused a ``TypeError`` (:issue:`15348`) - Bug in ``.eval()`` which caused multiline evals to fail with local variables not on the first line (:issue:`15342`) diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index bd0358abf67d5..cebaf4e3fd89b 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -761,16 +761,26 @@ def melt(frame, id_vars=None, value_vars=None, var_name=None, """ # TODO: what about the existing index? if id_vars is not None: - if not isinstance(id_vars, (tuple, list, np.ndarray)): + if not is_list_like(id_vars): id_vars = [id_vars] + elif (isinstance(frame.columns, MultiIndex) and + not isinstance(id_vars, list)): + raise ValueError('id_vars must be a list of tuples when columns' + ' are a MultiIndex') else: id_vars = list(id_vars) else: id_vars = [] if value_vars is not None: - if not isinstance(value_vars, (tuple, list, np.ndarray)): + if not is_list_like(value_vars): value_vars = [value_vars] + elif (isinstance(frame.columns, MultiIndex) and + not isinstance(value_vars, list)): + raise ValueError('value_vars must be a list of tuples when' + ' columns are a MultiIndex') + else: + value_vars = list(value_vars) frame = frame.loc[:, id_vars + value_vars] else: frame = frame.copy() diff --git a/pandas/tests/test_reshape.py b/pandas/tests/test_reshape.py index ed5ec970ba33c..d587e4ea6a1fa 100644 --- a/pandas/tests/test_reshape.py +++ b/pandas/tests/test_reshape.py @@ -56,6 +56,45 @@ def test_value_vars(self): columns=['id1', 'id2', 'variable', 'value']) tm.assert_frame_equal(result4, expected4) + def test_value_vars_types(self): + # GH 15348 + expected = DataFrame({'id1': self.df['id1'].tolist() * 2, + 'id2': self.df['id2'].tolist() * 2, + 'variable': ['A'] * 10 + ['B'] * 10, + 'value': (self.df['A'].tolist() + + self.df['B'].tolist())}, + columns=['id1', 'id2', 'variable', 'value']) + + for type_ in (tuple, list, np.array): + result = melt(self.df, id_vars=['id1', 'id2'], + value_vars=type_(('A', 'B'))) + tm.assert_frame_equal(result, expected) + + def test_vars_work_with_multiindex(self): + expected = DataFrame({ + ('A', 'a'): self.df1[('A', 'a')], + 'CAP': ['B'] * len(self.df1), + 'low': ['b'] * len(self.df1), + 'value': self.df1[('B', 'b')], + }, columns=[('A', 'a'), 'CAP', 'low', 'value']) + + result = melt(self.df1, id_vars=[('A', 'a')], value_vars=[('B', 'b')]) + tm.assert_frame_equal(result, expected) + + def test_tuple_vars_fail_with_multiindex(self): + # melt should fail with an informative error message if + # the columns have a MultiIndex and a tuple is passed + # for id_vars or value_vars. + tuple_a = ('A', 'a') + list_a = [tuple_a] + tuple_b = ('B', 'b') + list_b = [tuple_b] + + for id_vars, value_vars in ((tuple_a, list_b), (list_a, tuple_b), + (tuple_a, tuple_b)): + with tm.assertRaisesRegexp(ValueError, r'MultiIndex'): + melt(self.df1, id_vars=id_vars, value_vars=value_vars) + def test_custom_var_name(self): result5 = melt(self.df, var_name=self.var_name) self.assertEqual(result5.columns.tolist(), ['var', 'value']) From 3f7d2db773fbc3c9bdbfba59b6866be0f2d7b711 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 11 Feb 2017 16:05:54 -0500 Subject: [PATCH 042/353] TST: split up tests/indexing/test_indexing a bit Author: Jeff Reback Closes #15367 from jreback/indexing and squashes the following commits: 15e6010 [Jeff Reback] pep 3a12fdd [Jeff Reback] add panel 5605b2b [Jeff Reback] add chaining and caching 05f6f40 [Jeff Reback] split out datetime d6be34f [Jeff Reback] TST: split up tests/indexing/test_indexing a bit --- pandas/tests/indexing/common.py | 5 + .../indexing/test_chaining_and_caching.py | 358 +++ pandas/tests/indexing/test_datetime.py | 192 ++ pandas/tests/indexing/test_floats.py | 157 ++ pandas/tests/indexing/test_indexing.py | 2242 +---------------- pandas/tests/indexing/test_multiindex.py | 1206 +++++++++ pandas/tests/indexing/test_panel.py | 209 ++ pandas/tests/indexing/test_timedelta.py | 21 + 8 files changed, 2215 insertions(+), 2175 deletions(-) create mode 100644 pandas/tests/indexing/common.py create mode 100644 pandas/tests/indexing/test_chaining_and_caching.py create mode 100644 pandas/tests/indexing/test_datetime.py create mode 100644 pandas/tests/indexing/test_multiindex.py create mode 100644 pandas/tests/indexing/test_panel.py create mode 100644 pandas/tests/indexing/test_timedelta.py diff --git a/pandas/tests/indexing/common.py b/pandas/tests/indexing/common.py new file mode 100644 index 0000000000000..73167393cf35d --- /dev/null +++ b/pandas/tests/indexing/common.py @@ -0,0 +1,5 @@ +""" common utilities """ + + +def _mklbl(prefix, n): + return ["%s%s" % (prefix, i) for i in range(n)] diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py new file mode 100644 index 0000000000000..0e921aaf826f9 --- /dev/null +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -0,0 +1,358 @@ +import numpy as np +import pandas as pd +from pandas.core import common as com +from pandas import (compat, DataFrame, option_context, + Series, MultiIndex, date_range, Timestamp) +from pandas.util import testing as tm + + +class TestCaching(tm.TestCase): + + def test_slice_consolidate_invalidate_item_cache(self): + + # this is chained assignment, but will 'work' + with option_context('chained_assignment', None): + + # #3970 + df = DataFrame({"aa": compat.lrange(5), "bb": [2.2] * 5}) + + # Creates a second float block + df["cc"] = 0.0 + + # caches a reference to the 'bb' series + df["bb"] + + # repr machinery triggers consolidation + repr(df) + + # Assignment to wrong series + df['bb'].iloc[0] = 0.17 + df._clear_item_cache() + self.assertAlmostEqual(df['bb'][0], 0.17) + + def test_setitem_cache_updating(self): + # GH 5424 + cont = ['one', 'two', 'three', 'four', 'five', 'six', 'seven'] + + for do_ref in [False, False]: + df = DataFrame({'a': cont, + "b": cont[3:] + cont[:3], + 'c': np.arange(7)}) + + # ref the cache + if do_ref: + df.ix[0, "c"] + + # set it + df.ix[7, 'c'] = 1 + + self.assertEqual(df.ix[0, 'c'], 0.0) + self.assertEqual(df.ix[7, 'c'], 1.0) + + # GH 7084 + # not updating cache on series setting with slices + expected = DataFrame({'A': [600, 600, 600]}, + index=date_range('5/7/2014', '5/9/2014')) + out = DataFrame({'A': [0, 0, 0]}, + index=date_range('5/7/2014', '5/9/2014')) + df = DataFrame({'C': ['A', 'A', 'A'], 'D': [100, 200, 300]}) + + # loop through df to update out + six = Timestamp('5/7/2014') + eix = Timestamp('5/9/2014') + for ix, row in df.iterrows(): + out.loc[six:eix, row['C']] = out.loc[six:eix, row['C']] + row['D'] + + tm.assert_frame_equal(out, expected) + tm.assert_series_equal(out['A'], expected['A']) + + # try via a chain indexing + # this actually works + out = DataFrame({'A': [0, 0, 0]}, + index=date_range('5/7/2014', '5/9/2014')) + for ix, row in df.iterrows(): + v = out[row['C']][six:eix] + row['D'] + out[row['C']][six:eix] = v + + tm.assert_frame_equal(out, expected) + tm.assert_series_equal(out['A'], expected['A']) + + out = DataFrame({'A': [0, 0, 0]}, + index=date_range('5/7/2014', '5/9/2014')) + for ix, row in df.iterrows(): + out.loc[six:eix, row['C']] += row['D'] + + tm.assert_frame_equal(out, expected) + tm.assert_series_equal(out['A'], expected['A']) + + +class TestChaining(tm.TestCase): + + def test_setitem_chained_setfault(self): + + # GH6026 + # setfaults under numpy 1.7.1 (ok on 1.8) + data = ['right', 'left', 'left', 'left', 'right', 'left', 'timeout'] + mdata = ['right', 'left', 'left', 'left', 'right', 'left', 'none'] + + df = DataFrame({'response': np.array(data)}) + mask = df.response == 'timeout' + df.response[mask] = 'none' + tm.assert_frame_equal(df, DataFrame({'response': mdata})) + + recarray = np.rec.fromarrays([data], names=['response']) + df = DataFrame(recarray) + mask = df.response == 'timeout' + df.response[mask] = 'none' + tm.assert_frame_equal(df, DataFrame({'response': mdata})) + + df = DataFrame({'response': data, 'response1': data}) + mask = df.response == 'timeout' + df.response[mask] = 'none' + tm.assert_frame_equal(df, DataFrame({'response': mdata, + 'response1': data})) + + # GH 6056 + expected = DataFrame(dict(A=[np.nan, 'bar', 'bah', 'foo', 'bar'])) + df = DataFrame(dict(A=np.array(['foo', 'bar', 'bah', 'foo', 'bar']))) + df['A'].iloc[0] = np.nan + result = df.head() + tm.assert_frame_equal(result, expected) + + df = DataFrame(dict(A=np.array(['foo', 'bar', 'bah', 'foo', 'bar']))) + df.A.iloc[0] = np.nan + result = df.head() + tm.assert_frame_equal(result, expected) + + def test_detect_chained_assignment(self): + + pd.set_option('chained_assignment', 'raise') + + # work with the chain + expected = DataFrame([[-5, 1], [-6, 3]], columns=list('AB')) + df = DataFrame(np.arange(4).reshape(2, 2), + columns=list('AB'), dtype='int64') + self.assertIsNone(df.is_copy) + df['A'][0] = -5 + df['A'][1] = -6 + tm.assert_frame_equal(df, expected) + + # test with the chaining + df = DataFrame({'A': Series(range(2), dtype='int64'), + 'B': np.array(np.arange(2, 4), dtype=np.float64)}) + self.assertIsNone(df.is_copy) + + def f(): + df['A'][0] = -5 + + self.assertRaises(com.SettingWithCopyError, f) + + def f(): + df['A'][1] = np.nan + + self.assertRaises(com.SettingWithCopyError, f) + self.assertIsNone(df['A'].is_copy) + + # using a copy (the chain), fails + df = DataFrame({'A': Series(range(2), dtype='int64'), + 'B': np.array(np.arange(2, 4), dtype=np.float64)}) + + def f(): + df.loc[0]['A'] = -5 + + self.assertRaises(com.SettingWithCopyError, f) + + # doc example + df = DataFrame({'a': ['one', 'one', 'two', 'three', + 'two', 'one', 'six'], + 'c': Series(range(7), dtype='int64')}) + self.assertIsNone(df.is_copy) + expected = DataFrame({'a': ['one', 'one', 'two', 'three', + 'two', 'one', 'six'], + 'c': [42, 42, 2, 3, 4, 42, 6]}) + + def f(): + indexer = df.a.str.startswith('o') + df[indexer]['c'] = 42 + + self.assertRaises(com.SettingWithCopyError, f) + + expected = DataFrame({'A': [111, 'bbb', 'ccc'], 'B': [1, 2, 3]}) + df = DataFrame({'A': ['aaa', 'bbb', 'ccc'], 'B': [1, 2, 3]}) + + def f(): + df['A'][0] = 111 + + self.assertRaises(com.SettingWithCopyError, f) + + def f(): + df.loc[0]['A'] = 111 + + self.assertRaises(com.SettingWithCopyError, f) + + df.loc[0, 'A'] = 111 + tm.assert_frame_equal(df, expected) + + # make sure that is_copy is picked up reconstruction + # GH5475 + df = DataFrame({"A": [1, 2]}) + self.assertIsNone(df.is_copy) + with tm.ensure_clean('__tmp__pickle') as path: + df.to_pickle(path) + df2 = pd.read_pickle(path) + df2["B"] = df2["A"] + df2["B"] = df2["A"] + + # a suprious raise as we are setting the entire column here + # GH5597 + from string import ascii_letters as letters + + def random_text(nobs=100): + df = [] + for i in range(nobs): + idx = np.random.randint(len(letters), size=2) + idx.sort() + df.append([letters[idx[0]:idx[1]]]) + + return DataFrame(df, columns=['letters']) + + df = random_text(100000) + + # always a copy + x = df.iloc[[0, 1, 2]] + self.assertIsNotNone(x.is_copy) + x = df.iloc[[0, 1, 2, 4]] + self.assertIsNotNone(x.is_copy) + + # explicity copy + indexer = df.letters.apply(lambda x: len(x) > 10) + df = df.ix[indexer].copy() + self.assertIsNone(df.is_copy) + df['letters'] = df['letters'].apply(str.lower) + + # implicity take + df = random_text(100000) + indexer = df.letters.apply(lambda x: len(x) > 10) + df = df.ix[indexer] + self.assertIsNotNone(df.is_copy) + df['letters'] = df['letters'].apply(str.lower) + + # implicity take 2 + df = random_text(100000) + indexer = df.letters.apply(lambda x: len(x) > 10) + df = df.ix[indexer] + self.assertIsNotNone(df.is_copy) + df.loc[:, 'letters'] = df['letters'].apply(str.lower) + + # should be ok even though it's a copy! + self.assertIsNone(df.is_copy) + df['letters'] = df['letters'].apply(str.lower) + self.assertIsNone(df.is_copy) + + df = random_text(100000) + indexer = df.letters.apply(lambda x: len(x) > 10) + df.ix[indexer, 'letters'] = df.ix[indexer, 'letters'].apply(str.lower) + + # an identical take, so no copy + df = DataFrame({'a': [1]}).dropna() + self.assertIsNone(df.is_copy) + df['a'] += 1 + + # inplace ops + # original from: + # http://stackoverflow.com/questions/20508968/series-fillna-in-a-multiindex-dataframe-does-not-fill-is-this-a-bug + a = [12, 23] + b = [123, None] + c = [1234, 2345] + d = [12345, 23456] + tuples = [('eyes', 'left'), ('eyes', 'right'), ('ears', 'left'), + ('ears', 'right')] + events = {('eyes', 'left'): a, + ('eyes', 'right'): b, + ('ears', 'left'): c, + ('ears', 'right'): d} + multiind = MultiIndex.from_tuples(tuples, names=['part', 'side']) + zed = DataFrame(events, index=['a', 'b'], columns=multiind) + + def f(): + zed['eyes']['right'].fillna(value=555, inplace=True) + + self.assertRaises(com.SettingWithCopyError, f) + + df = DataFrame(np.random.randn(10, 4)) + s = df.iloc[:, 0].sort_values() + tm.assert_series_equal(s, df.iloc[:, 0].sort_values()) + tm.assert_series_equal(s, df[0].sort_values()) + + # false positives GH6025 + df = DataFrame({'column1': ['a', 'a', 'a'], 'column2': [4, 8, 9]}) + str(df) + df['column1'] = df['column1'] + 'b' + str(df) + df = df[df['column2'] != 8] + str(df) + df['column1'] = df['column1'] + 'c' + str(df) + + # from SO: + # http://stackoverflow.com/questions/24054495/potential-bug-setting-value-for-undefined-column-using-iloc + df = DataFrame(np.arange(0, 9), columns=['count']) + df['group'] = 'b' + + def f(): + df.iloc[0:5]['group'] = 'a' + + self.assertRaises(com.SettingWithCopyError, f) + + # mixed type setting + # same dtype & changing dtype + df = DataFrame(dict(A=date_range('20130101', periods=5), + B=np.random.randn(5), + C=np.arange(5, dtype='int64'), + D=list('abcde'))) + + def f(): + df.ix[2]['D'] = 'foo' + + self.assertRaises(com.SettingWithCopyError, f) + + def f(): + df.ix[2]['C'] = 'foo' + + self.assertRaises(com.SettingWithCopyError, f) + + def f(): + df['C'][2] = 'foo' + + self.assertRaises(com.SettingWithCopyError, f) + + def test_setting_with_copy_bug(self): + + # operating on a copy + df = pd.DataFrame({'a': list(range(4)), + 'b': list('ab..'), + 'c': ['a', 'b', np.nan, 'd']}) + mask = pd.isnull(df.c) + + def f(): + df[['c']][mask] = df[['b']][mask] + + self.assertRaises(com.SettingWithCopyError, f) + + # invalid warning as we are returning a new object + # GH 8730 + df1 = DataFrame({'x': Series(['a', 'b', 'c']), + 'y': Series(['d', 'e', 'f'])}) + df2 = df1[['x']] + + # this should not raise + df2['y'] = ['g', 'h', 'i'] + + def test_detect_chained_assignment_warnings(self): + + # warnings + with option_context('chained_assignment', 'warn'): + df = DataFrame({'A': ['aaa', 'bbb', 'ccc'], 'B': [1, 2, 3]}) + with tm.assert_produces_warning( + expected_warning=com.SettingWithCopyWarning): + df.loc[0]['A'] = 111 diff --git a/pandas/tests/indexing/test_datetime.py b/pandas/tests/indexing/test_datetime.py new file mode 100644 index 0000000000000..1c4e5772d316f --- /dev/null +++ b/pandas/tests/indexing/test_datetime.py @@ -0,0 +1,192 @@ +import numpy as np +import pandas as pd +from pandas import date_range, Index, DataFrame, Series, Timestamp +from pandas.util import testing as tm + + +class TestDatetimeIndex(tm.TestCase): + + def test_indexing_with_datetime_tz(self): + + # 8260 + # support datetime64 with tz + + idx = Index(date_range('20130101', periods=3, tz='US/Eastern'), + name='foo') + dr = date_range('20130110', periods=3) + df = DataFrame({'A': idx, 'B': dr}) + df['C'] = idx + df.iloc[1, 1] = pd.NaT + df.iloc[1, 2] = pd.NaT + + # indexing + result = df.iloc[1] + expected = Series([Timestamp('2013-01-02 00:00:00-0500', + tz='US/Eastern'), np.nan, np.nan], + index=list('ABC'), dtype='object', name=1) + tm.assert_series_equal(result, expected) + result = df.loc[1] + expected = Series([Timestamp('2013-01-02 00:00:00-0500', + tz='US/Eastern'), np.nan, np.nan], + index=list('ABC'), dtype='object', name=1) + tm.assert_series_equal(result, expected) + + # indexing - fast_xs + df = DataFrame({'a': date_range('2014-01-01', periods=10, tz='UTC')}) + result = df.iloc[5] + expected = Timestamp('2014-01-06 00:00:00+0000', tz='UTC', freq='D') + self.assertEqual(result, expected) + + result = df.loc[5] + self.assertEqual(result, expected) + + # indexing - boolean + result = df[df.a > df.a[3]] + expected = df.iloc[4:] + tm.assert_frame_equal(result, expected) + + # indexing - setting an element + df = DataFrame(data=pd.to_datetime( + ['2015-03-30 20:12:32', '2015-03-12 00:11:11']), columns=['time']) + df['new_col'] = ['new', 'old'] + df.time = df.set_index('time').index.tz_localize('UTC') + v = df[df.new_col == 'new'].set_index('time').index.tz_convert( + 'US/Pacific') + + # trying to set a single element on a part of a different timezone + def f(): + df.loc[df.new_col == 'new', 'time'] = v + + self.assertRaises(ValueError, f) + + v = df.loc[df.new_col == 'new', 'time'] + pd.Timedelta('1s') + df.loc[df.new_col == 'new', 'time'] = v + tm.assert_series_equal(df.loc[df.new_col == 'new', 'time'], v) + + def test_indexing_with_datetimeindex_tz(self): + + # GH 12050 + # indexing on a series with a datetimeindex with tz + index = pd.date_range('2015-01-01', periods=2, tz='utc') + + ser = pd.Series(range(2), index=index, + dtype='int64') + + # list-like indexing + + for sel in (index, list(index)): + # getitem + tm.assert_series_equal(ser[sel], ser) + + # setitem + result = ser.copy() + result[sel] = 1 + expected = pd.Series(1, index=index) + tm.assert_series_equal(result, expected) + + # .loc getitem + tm.assert_series_equal(ser.loc[sel], ser) + + # .loc setitem + result = ser.copy() + result.loc[sel] = 1 + expected = pd.Series(1, index=index) + tm.assert_series_equal(result, expected) + + # single element indexing + + # getitem + self.assertEqual(ser[index[1]], 1) + + # setitem + result = ser.copy() + result[index[1]] = 5 + expected = pd.Series([0, 5], index=index) + tm.assert_series_equal(result, expected) + + # .loc getitem + self.assertEqual(ser.loc[index[1]], 1) + + # .loc setitem + result = ser.copy() + result.loc[index[1]] = 5 + expected = pd.Series([0, 5], index=index) + tm.assert_series_equal(result, expected) + + def test_partial_setting_with_datetimelike_dtype(self): + + # GH9478 + # a datetimeindex alignment issue with partial setting + df = pd.DataFrame(np.arange(6.).reshape(3, 2), columns=list('AB'), + index=pd.date_range('1/1/2000', periods=3, + freq='1H')) + expected = df.copy() + expected['C'] = [expected.index[0]] + [pd.NaT, pd.NaT] + + mask = df.A < 1 + df.loc[mask, 'C'] = df.loc[mask].index + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_datetime(self): + + # GH 9516 + dt1 = Timestamp('20130101 09:00:00') + dt2 = Timestamp('20130101 10:00:00') + + for conv in [lambda x: x, lambda x: x.to_datetime64(), + lambda x: x.to_pydatetime(), lambda x: np.datetime64(x)]: + + df = pd.DataFrame() + df.loc[conv(dt1), 'one'] = 100 + df.loc[conv(dt2), 'one'] = 200 + + expected = DataFrame({'one': [100.0, 200.0]}, index=[dt1, dt2]) + tm.assert_frame_equal(df, expected) + + def test_series_partial_set_datetime(self): + # GH 11497 + + idx = date_range('2011-01-01', '2011-01-02', freq='D', name='idx') + ser = Series([0.1, 0.2], index=idx, name='s') + + result = ser.loc[[Timestamp('2011-01-01'), Timestamp('2011-01-02')]] + exp = Series([0.1, 0.2], index=idx, name='s') + tm.assert_series_equal(result, exp, check_index_type=True) + + keys = [Timestamp('2011-01-02'), Timestamp('2011-01-02'), + Timestamp('2011-01-01')] + exp = Series([0.2, 0.2, 0.1], index=pd.DatetimeIndex(keys, name='idx'), + name='s') + tm.assert_series_equal(ser.loc[keys], exp, check_index_type=True) + + keys = [Timestamp('2011-01-03'), Timestamp('2011-01-02'), + Timestamp('2011-01-03')] + exp = Series([np.nan, 0.2, np.nan], + index=pd.DatetimeIndex(keys, name='idx'), name='s') + tm.assert_series_equal(ser.loc[keys], exp, check_index_type=True) + + def test_series_partial_set_period(self): + # GH 11497 + + idx = pd.period_range('2011-01-01', '2011-01-02', freq='D', name='idx') + ser = Series([0.1, 0.2], index=idx, name='s') + + result = ser.loc[[pd.Period('2011-01-01', freq='D'), + pd.Period('2011-01-02', freq='D')]] + exp = Series([0.1, 0.2], index=idx, name='s') + tm.assert_series_equal(result, exp, check_index_type=True) + + keys = [pd.Period('2011-01-02', freq='D'), + pd.Period('2011-01-02', freq='D'), + pd.Period('2011-01-01', freq='D')] + exp = Series([0.2, 0.2, 0.1], index=pd.PeriodIndex(keys, name='idx'), + name='s') + tm.assert_series_equal(ser.loc[keys], exp, check_index_type=True) + + keys = [pd.Period('2011-01-03', freq='D'), + pd.Period('2011-01-02', freq='D'), + pd.Period('2011-01-03', freq='D')] + exp = Series([np.nan, 0.2, np.nan], + index=pd.PeriodIndex(keys, name='idx'), name='s') + result = ser.loc[keys] + tm.assert_series_equal(result, exp) diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py index 8f0fa2d56113b..99e7460b2a3de 100644 --- a/pandas/tests/indexing/test_floats.py +++ b/pandas/tests/indexing/test_floats.py @@ -709,3 +709,160 @@ def test_floating_tuples(self): result = s[0.0] expected = Series([(1, 1), (2, 2)], index=[0.0, 0.0], name='foo') assert_series_equal(result, expected) + + def test_float64index_slicing_bug(self): + # GH 5557, related to slicing a float index + ser = {256: 2321.0, + 1: 78.0, + 2: 2716.0, + 3: 0.0, + 4: 369.0, + 5: 0.0, + 6: 269.0, + 7: 0.0, + 8: 0.0, + 9: 0.0, + 10: 3536.0, + 11: 0.0, + 12: 24.0, + 13: 0.0, + 14: 931.0, + 15: 0.0, + 16: 101.0, + 17: 78.0, + 18: 9643.0, + 19: 0.0, + 20: 0.0, + 21: 0.0, + 22: 63761.0, + 23: 0.0, + 24: 446.0, + 25: 0.0, + 26: 34773.0, + 27: 0.0, + 28: 729.0, + 29: 78.0, + 30: 0.0, + 31: 0.0, + 32: 3374.0, + 33: 0.0, + 34: 1391.0, + 35: 0.0, + 36: 361.0, + 37: 0.0, + 38: 61808.0, + 39: 0.0, + 40: 0.0, + 41: 0.0, + 42: 6677.0, + 43: 0.0, + 44: 802.0, + 45: 0.0, + 46: 2691.0, + 47: 0.0, + 48: 3582.0, + 49: 0.0, + 50: 734.0, + 51: 0.0, + 52: 627.0, + 53: 70.0, + 54: 2584.0, + 55: 0.0, + 56: 324.0, + 57: 0.0, + 58: 605.0, + 59: 0.0, + 60: 0.0, + 61: 0.0, + 62: 3989.0, + 63: 10.0, + 64: 42.0, + 65: 0.0, + 66: 904.0, + 67: 0.0, + 68: 88.0, + 69: 70.0, + 70: 8172.0, + 71: 0.0, + 72: 0.0, + 73: 0.0, + 74: 64902.0, + 75: 0.0, + 76: 347.0, + 77: 0.0, + 78: 36605.0, + 79: 0.0, + 80: 379.0, + 81: 70.0, + 82: 0.0, + 83: 0.0, + 84: 3001.0, + 85: 0.0, + 86: 1630.0, + 87: 7.0, + 88: 364.0, + 89: 0.0, + 90: 67404.0, + 91: 9.0, + 92: 0.0, + 93: 0.0, + 94: 7685.0, + 95: 0.0, + 96: 1017.0, + 97: 0.0, + 98: 2831.0, + 99: 0.0, + 100: 2963.0, + 101: 0.0, + 102: 854.0, + 103: 0.0, + 104: 0.0, + 105: 0.0, + 106: 0.0, + 107: 0.0, + 108: 0.0, + 109: 0.0, + 110: 0.0, + 111: 0.0, + 112: 0.0, + 113: 0.0, + 114: 0.0, + 115: 0.0, + 116: 0.0, + 117: 0.0, + 118: 0.0, + 119: 0.0, + 120: 0.0, + 121: 0.0, + 122: 0.0, + 123: 0.0, + 124: 0.0, + 125: 0.0, + 126: 67744.0, + 127: 22.0, + 128: 264.0, + 129: 0.0, + 260: 197.0, + 268: 0.0, + 265: 0.0, + 269: 0.0, + 261: 0.0, + 266: 1198.0, + 267: 0.0, + 262: 2629.0, + 258: 775.0, + 257: 0.0, + 263: 0.0, + 259: 0.0, + 264: 163.0, + 250: 10326.0, + 251: 0.0, + 252: 1228.0, + 253: 0.0, + 254: 2769.0, + 255: 0.0} + + # smoke test for the repr + s = Series(ser) + result = s.value_counts() + str(result) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index b06b1067b7c6b..f7a4af711bbb8 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -1,7 +1,5 @@ # -*- coding: utf-8 -*- # pylint: disable-msg=W0612,E1101 -import sys -import nose import itertools import warnings from warnings import catch_warnings @@ -10,22 +8,21 @@ from pandas.types.common import (is_integer_dtype, is_float_dtype, is_scalar) -from pandas.compat import range, lrange, lzip, StringIO, lmap, map +from pandas.compat import range, lrange, lzip, StringIO, lmap from pandas.tslib import NaT from numpy import nan from numpy.random import randn import numpy as np import pandas as pd -import pandas.core.common as com from pandas import option_context from pandas.core.indexing import _non_reducing_slice, _maybe_numeric_slice from pandas.core.api import (DataFrame, Index, Series, Panel, isnull, MultiIndex, Timestamp, Timedelta, UInt64Index) from pandas.formats.printing import pprint_thing from pandas import concat -from pandas.core.common import PerformanceWarning, UnsortedIndexError - +from pandas.core.common import PerformanceWarning +from pandas.tests.indexing.common import _mklbl import pandas.util.testing as tm from pandas import date_range @@ -93,10 +90,6 @@ def _axify(obj, key, axis): return tuple(axes) -def _mklbl(prefix, n): - return ["%s%s" % (prefix, i) for i in range(n)] - - class TestIndexing(tm.TestCase): _objs = set(['series', 'frame', 'panel']) @@ -665,40 +658,6 @@ def test_iloc_getitem_slice_dups(self): tm.assert_frame_equal(df.iloc[10:, :2], df2) tm.assert_frame_equal(df.iloc[10:, 2:], df1) - def test_iloc_getitem_multiindex2(self): - # TODO(wesm): fix this - raise nose.SkipTest('this test was being suppressed, ' - 'needs to be fixed') - - arr = np.random.randn(3, 3) - df = DataFrame(arr, columns=[[2, 2, 4], [6, 8, 10]], - index=[[4, 4, 8], [8, 10, 12]]) - - rs = df.iloc[2] - xp = Series(arr[2], index=df.columns) - tm.assert_series_equal(rs, xp) - - rs = df.iloc[:, 2] - xp = Series(arr[:, 2], index=df.index) - tm.assert_series_equal(rs, xp) - - rs = df.iloc[2, 2] - xp = df.values[2, 2] - self.assertEqual(rs, xp) - - # for multiple items - # GH 5528 - rs = df.iloc[[0, 1]] - xp = df.xs(4, drop_level=False) - tm.assert_frame_equal(rs, xp) - - tup = zip(*[['a', 'a', 'b', 'b'], ['x', 'y', 'x', 'y']]) - index = MultiIndex.from_tuples(tup) - df = DataFrame(np.random.randn(4, 4), index=index) - rs = df.iloc[[2, 3]] - xp = df.xs('b', drop_level=False) - tm.assert_frame_equal(rs, xp) - def test_iloc_setitem(self): df = self.frame_ints @@ -872,210 +831,6 @@ def compare(result, expected): result2 = s.loc[0:3] tm.assert_series_equal(result1, result2) - def test_setitem_multiindex(self): - for index_fn in ('ix', 'loc'): - - def check(target, indexers, value, compare_fn, expected=None): - fn = getattr(target, index_fn) - fn.__setitem__(indexers, value) - result = fn.__getitem__(indexers) - if expected is None: - expected = value - compare_fn(result, expected) - # GH7190 - index = pd.MultiIndex.from_product([np.arange(0, 100), - np.arange(0, 80)], - names=['time', 'firm']) - t, n = 0, 2 - df = DataFrame(np.nan, columns=['A', 'w', 'l', 'a', 'x', - 'X', 'd', 'profit'], - index=index) - check(target=df, indexers=((t, n), 'X'), value=0, - compare_fn=self.assertEqual) - - df = DataFrame(-999, columns=['A', 'w', 'l', 'a', 'x', - 'X', 'd', 'profit'], - index=index) - check(target=df, indexers=((t, n), 'X'), value=1, - compare_fn=self.assertEqual) - - df = DataFrame(columns=['A', 'w', 'l', 'a', 'x', - 'X', 'd', 'profit'], - index=index) - check(target=df, indexers=((t, n), 'X'), value=2, - compare_fn=self.assertEqual) - - # GH 7218, assinging with 0-dim arrays - df = DataFrame(-999, columns=['A', 'w', 'l', 'a', 'x', - 'X', 'd', 'profit'], - index=index) - check(target=df, - indexers=((t, n), 'X'), - value=np.array(3), - compare_fn=self.assertEqual, - expected=3, ) - - # GH5206 - df = pd.DataFrame(np.arange(25).reshape(5, 5), - columns='A,B,C,D,E'.split(','), dtype=float) - df['F'] = 99 - row_selection = df['A'] % 2 == 0 - col_selection = ['B', 'C'] - with catch_warnings(record=True): - df.ix[row_selection, col_selection] = df['F'] - output = pd.DataFrame(99., index=[0, 2, 4], columns=['B', 'C']) - with catch_warnings(record=True): - tm.assert_frame_equal(df.ix[row_selection, col_selection], - output) - check(target=df, - indexers=(row_selection, col_selection), - value=df['F'], - compare_fn=tm.assert_frame_equal, - expected=output, ) - - # GH11372 - idx = pd.MultiIndex.from_product([ - ['A', 'B', 'C'], - pd.date_range('2015-01-01', '2015-04-01', freq='MS')]) - cols = pd.MultiIndex.from_product([ - ['foo', 'bar'], - pd.date_range('2016-01-01', '2016-02-01', freq='MS')]) - - df = pd.DataFrame(np.random.random((12, 4)), - index=idx, columns=cols) - - subidx = pd.MultiIndex.from_tuples( - [('A', pd.Timestamp('2015-01-01')), - ('A', pd.Timestamp('2015-02-01'))]) - subcols = pd.MultiIndex.from_tuples( - [('foo', pd.Timestamp('2016-01-01')), - ('foo', pd.Timestamp('2016-02-01'))]) - - vals = pd.DataFrame(np.random.random((2, 2)), - index=subidx, columns=subcols) - check(target=df, - indexers=(subidx, subcols), - value=vals, - compare_fn=tm.assert_frame_equal, ) - # set all columns - vals = pd.DataFrame( - np.random.random((2, 4)), index=subidx, columns=cols) - check(target=df, - indexers=(subidx, slice(None, None, None)), - value=vals, - compare_fn=tm.assert_frame_equal, ) - # identity - copy = df.copy() - check(target=df, indexers=(df.index, df.columns), value=df, - compare_fn=tm.assert_frame_equal, expected=copy) - - def test_indexing_with_datetime_tz(self): - - # 8260 - # support datetime64 with tz - - idx = Index(date_range('20130101', periods=3, tz='US/Eastern'), - name='foo') - dr = date_range('20130110', periods=3) - df = DataFrame({'A': idx, 'B': dr}) - df['C'] = idx - df.iloc[1, 1] = pd.NaT - df.iloc[1, 2] = pd.NaT - - # indexing - result = df.iloc[1] - expected = Series([Timestamp('2013-01-02 00:00:00-0500', - tz='US/Eastern'), np.nan, np.nan], - index=list('ABC'), dtype='object', name=1) - tm.assert_series_equal(result, expected) - result = df.loc[1] - expected = Series([Timestamp('2013-01-02 00:00:00-0500', - tz='US/Eastern'), np.nan, np.nan], - index=list('ABC'), dtype='object', name=1) - tm.assert_series_equal(result, expected) - - # indexing - fast_xs - df = DataFrame({'a': date_range('2014-01-01', periods=10, tz='UTC')}) - result = df.iloc[5] - expected = Timestamp('2014-01-06 00:00:00+0000', tz='UTC', freq='D') - self.assertEqual(result, expected) - - result = df.loc[5] - self.assertEqual(result, expected) - - # indexing - boolean - result = df[df.a > df.a[3]] - expected = df.iloc[4:] - tm.assert_frame_equal(result, expected) - - # indexing - setting an element - df = DataFrame(data=pd.to_datetime( - ['2015-03-30 20:12:32', '2015-03-12 00:11:11']), columns=['time']) - df['new_col'] = ['new', 'old'] - df.time = df.set_index('time').index.tz_localize('UTC') - v = df[df.new_col == 'new'].set_index('time').index.tz_convert( - 'US/Pacific') - - # trying to set a single element on a part of a different timezone - def f(): - df.loc[df.new_col == 'new', 'time'] = v - - self.assertRaises(ValueError, f) - - v = df.loc[df.new_col == 'new', 'time'] + pd.Timedelta('1s') - df.loc[df.new_col == 'new', 'time'] = v - tm.assert_series_equal(df.loc[df.new_col == 'new', 'time'], v) - - def test_indexing_with_datetimeindex_tz(self): - - # GH 12050 - # indexing on a series with a datetimeindex with tz - index = pd.date_range('2015-01-01', periods=2, tz='utc') - - ser = pd.Series(range(2), index=index, - dtype='int64') - - # list-like indexing - - for sel in (index, list(index)): - # getitem - tm.assert_series_equal(ser[sel], ser) - - # setitem - result = ser.copy() - result[sel] = 1 - expected = pd.Series(1, index=index) - tm.assert_series_equal(result, expected) - - # .loc getitem - tm.assert_series_equal(ser.loc[sel], ser) - - # .loc setitem - result = ser.copy() - result.loc[sel] = 1 - expected = pd.Series(1, index=index) - tm.assert_series_equal(result, expected) - - # single element indexing - - # getitem - self.assertEqual(ser[index[1]], 1) - - # setitem - result = ser.copy() - result[index[1]] = 5 - expected = pd.Series([0, 5], index=index) - tm.assert_series_equal(result, expected) - - # .loc getitem - self.assertEqual(ser.loc[index[1]], 1) - - # .loc setitem - result = ser.copy() - result.loc[index[1]] = 5 - expected = pd.Series([0, 5], index=index) - tm.assert_series_equal(result, expected) - def test_loc_setitem_dups(self): # GH 6541 @@ -1241,28 +996,6 @@ def test_loc_getitem_label_array_like(self): self.check_result('array like', 'loc', Series(index=[4, 8, 12]).index, 'ix', [4, 8, 12], typs=['ints', 'uints'], axes=2) - def test_loc_getitem_series(self): - # GH14730 - # passing a series as a key with a MultiIndex - index = MultiIndex.from_product([[1, 2, 3], ['A', 'B', 'C']]) - x = Series(index=index, data=range(9), dtype=np.float64) - y = Series([1, 3]) - expected = Series( - data=[0, 1, 2, 6, 7, 8], - index=MultiIndex.from_product([[1, 3], ['A', 'B', 'C']]), - dtype=np.float64) - result = x.loc[y] - tm.assert_series_equal(result, expected) - - result = x.loc[[1, 3]] - tm.assert_series_equal(result, expected) - - empty = Series(data=[], dtype=np.float64) - expected = Series([], index=MultiIndex( - levels=index.levels, labels=[[], []], dtype=np.float64)) - result = x.loc[empty] - tm.assert_series_equal(result, expected) - def test_loc_getitem_bool(self): # boolean indexers b = [True, False, True, False] @@ -1700,136 +1433,6 @@ def test_iloc_getitem_labelled_frame(self): # trying to use a label self.assertRaises(ValueError, df.iloc.__getitem__, tuple(['j', 'D'])) - def test_iloc_getitem_panel(self): - - # GH 7189 - p = Panel(np.arange(4 * 3 * 2).reshape(4, 3, 2), - items=['A', 'B', 'C', 'D'], - major_axis=['a', 'b', 'c'], - minor_axis=['one', 'two']) - - result = p.iloc[1] - expected = p.loc['B'] - tm.assert_frame_equal(result, expected) - - result = p.iloc[1, 1] - expected = p.loc['B', 'b'] - tm.assert_series_equal(result, expected) - - result = p.iloc[1, 1, 1] - expected = p.loc['B', 'b', 'two'] - self.assertEqual(result, expected) - - # slice - result = p.iloc[1:3] - expected = p.loc[['B', 'C']] - tm.assert_panel_equal(result, expected) - - result = p.iloc[:, 0:2] - expected = p.loc[:, ['a', 'b']] - tm.assert_panel_equal(result, expected) - - # list of integers - result = p.iloc[[0, 2]] - expected = p.loc[['A', 'C']] - tm.assert_panel_equal(result, expected) - - # neg indicies - result = p.iloc[[-1, 1], [-1, 1]] - expected = p.loc[['D', 'B'], ['c', 'b']] - tm.assert_panel_equal(result, expected) - - # dups indicies - result = p.iloc[[-1, -1, 1], [-1, 1]] - expected = p.loc[['D', 'D', 'B'], ['c', 'b']] - tm.assert_panel_equal(result, expected) - - # combined - result = p.iloc[0, [True, True], [0, 1]] - expected = p.loc['A', ['a', 'b'], ['one', 'two']] - tm.assert_frame_equal(result, expected) - - # out-of-bounds exception - self.assertRaises(IndexError, p.iloc.__getitem__, tuple([10, 5])) - - def f(): - p.iloc[0, [True, True], [0, 1, 2]] - - self.assertRaises(IndexError, f) - - # trying to use a label - self.assertRaises(ValueError, p.iloc.__getitem__, tuple(['j', 'D'])) - - # GH - p = Panel( - np.random.rand(4, 3, 2), items=['A', 'B', 'C', 'D'], - major_axis=['U', 'V', 'W'], minor_axis=['X', 'Y']) - expected = p['A'] - - result = p.iloc[0, :, :] - tm.assert_frame_equal(result, expected) - - result = p.iloc[0, [True, True, True], :] - tm.assert_frame_equal(result, expected) - - result = p.iloc[0, [True, True, True], [0, 1]] - tm.assert_frame_equal(result, expected) - - def f(): - p.iloc[0, [True, True, True], [0, 1, 2]] - - self.assertRaises(IndexError, f) - - def f(): - p.iloc[0, [True, True, True], [2]] - - self.assertRaises(IndexError, f) - - def test_iloc_getitem_panel_multiindex(self): - # GH 7199 - # Panel with multi-index - multi_index = pd.MultiIndex.from_tuples([('ONE', 'one'), - ('TWO', 'two'), - ('THREE', 'three')], - names=['UPPER', 'lower']) - - simple_index = [x[0] for x in multi_index] - wd1 = Panel(items=['First', 'Second'], major_axis=['a', 'b', 'c', 'd'], - minor_axis=multi_index) - - wd2 = Panel(items=['First', 'Second'], major_axis=['a', 'b', 'c', 'd'], - minor_axis=simple_index) - - expected1 = wd1['First'].iloc[[True, True, True, False], [0, 2]] - result1 = wd1.iloc[0, [True, True, True, False], [0, 2]] # WRONG - tm.assert_frame_equal(result1, expected1) - - expected2 = wd2['First'].iloc[[True, True, True, False], [0, 2]] - result2 = wd2.iloc[0, [True, True, True, False], [0, 2]] - tm.assert_frame_equal(result2, expected2) - - expected1 = DataFrame(index=['a'], columns=multi_index, - dtype='float64') - result1 = wd1.iloc[0, [0], [0, 1, 2]] - tm.assert_frame_equal(result1, expected1) - - expected2 = DataFrame(index=['a'], columns=simple_index, - dtype='float64') - result2 = wd2.iloc[0, [0], [0, 1, 2]] - tm.assert_frame_equal(result2, expected2) - - # GH 7516 - mi = MultiIndex.from_tuples([(0, 'x'), (1, 'y'), (2, 'z')]) - p = Panel(np.arange(3 * 3 * 3, dtype='int64').reshape(3, 3, 3), - items=['a', 'b', 'c'], major_axis=mi, - minor_axis=['u', 'v', 'w']) - result = p.iloc[:, 1, 0] - expected = Series([3, 12, 21], index=['a', 'b', 'c'], name='u') - tm.assert_series_equal(result, expected) - - result = p.loc[:, (1, 'y'), 'u'] - tm.assert_series_equal(result, expected) - def test_iloc_getitem_doc_issue(self): # multi axis slicing issue with single block @@ -1956,929 +1559,90 @@ def test_iloc_setitem_list_of_lists(self): expected = DataFrame(dict(A=[0, 1, 10, 12, 4], B=[5, 6, 11, 13, 9])) tm.assert_frame_equal(df, expected) - df = DataFrame( - dict(A=list('abcde'), B=np.arange(5, 10, dtype='int64'))) - df.iloc[2:4] = [['x', 11], ['y', 13]] - expected = DataFrame(dict(A=['a', 'b', 'x', 'y', 'e'], - B=[5, 6, 11, 13, 9])) - tm.assert_frame_equal(df, expected) - - def test_iloc_getitem_multiindex(self): - mi_labels = DataFrame(np.random.randn(4, 3), - columns=[['i', 'i', 'j'], ['A', 'A', 'B']], - index=[['i', 'i', 'j', 'k'], - ['X', 'X', 'Y', 'Y']]) - - mi_int = DataFrame(np.random.randn(3, 3), - columns=[[2, 2, 4], [6, 8, 10]], - index=[[4, 4, 8], [8, 10, 12]]) - - # the first row - rs = mi_int.iloc[0] - with catch_warnings(record=True): - xp = mi_int.ix[4].ix[8] - tm.assert_series_equal(rs, xp, check_names=False) - self.assertEqual(rs.name, (4, 8)) - self.assertEqual(xp.name, 8) - - # 2nd (last) columns - rs = mi_int.iloc[:, 2] - with catch_warnings(record=True): - xp = mi_int.ix[:, 2] - tm.assert_series_equal(rs, xp) - - # corner column - rs = mi_int.iloc[2, 2] - with catch_warnings(record=True): - xp = mi_int.ix[:, 2].ix[2] - self.assertEqual(rs, xp) - - # this is basically regular indexing - rs = mi_labels.iloc[2, 2] - with catch_warnings(record=True): - xp = mi_labels.ix['j'].ix[:, 'j'].ix[0, 0] - self.assertEqual(rs, xp) - - def test_loc_multiindex(self): - - mi_labels = DataFrame(np.random.randn(3, 3), - columns=[['i', 'i', 'j'], ['A', 'A', 'B']], - index=[['i', 'i', 'j'], ['X', 'X', 'Y']]) - - mi_int = DataFrame(np.random.randn(3, 3), - columns=[[2, 2, 4], [6, 8, 10]], - index=[[4, 4, 8], [8, 10, 12]]) - - # the first row - rs = mi_labels.loc['i'] - with catch_warnings(record=True): - xp = mi_labels.ix['i'] - tm.assert_frame_equal(rs, xp) - - # 2nd (last) columns - rs = mi_labels.loc[:, 'j'] - with catch_warnings(record=True): - xp = mi_labels.ix[:, 'j'] - tm.assert_frame_equal(rs, xp) - - # corner column - rs = mi_labels.loc['j'].loc[:, 'j'] - with catch_warnings(record=True): - xp = mi_labels.ix['j'].ix[:, 'j'] - tm.assert_frame_equal(rs, xp) - - # with a tuple - rs = mi_labels.loc[('i', 'X')] - with catch_warnings(record=True): - xp = mi_labels.ix[('i', 'X')] - tm.assert_frame_equal(rs, xp) - - rs = mi_int.loc[4] - with catch_warnings(record=True): - xp = mi_int.ix[4] - tm.assert_frame_equal(rs, xp) - - def test_loc_multiindex_indexer_none(self): - - # GH6788 - # multi-index indexer is None (meaning take all) - attributes = ['Attribute' + str(i) for i in range(1)] - attribute_values = ['Value' + str(i) for i in range(5)] - - index = MultiIndex.from_product([attributes, attribute_values]) - df = 0.1 * np.random.randn(10, 1 * 5) + 0.5 - df = DataFrame(df, columns=index) - result = df[attributes] - tm.assert_frame_equal(result, df) - - # GH 7349 - # loc with a multi-index seems to be doing fallback - df = DataFrame(np.arange(12).reshape(-1, 1), - index=pd.MultiIndex.from_product([[1, 2, 3, 4], - [1, 2, 3]])) - - expected = df.loc[([1, 2], ), :] - result = df.loc[[1, 2]] - tm.assert_frame_equal(result, expected) - - def test_loc_multiindex_incomplete(self): - - # GH 7399 - # incomplete indexers - s = pd.Series(np.arange(15, dtype='int64'), - MultiIndex.from_product([range(5), ['a', 'b', 'c']])) - expected = s.loc[:, 'a':'c'] - - result = s.loc[0:4, 'a':'c'] - tm.assert_series_equal(result, expected) - tm.assert_series_equal(result, expected) - - result = s.loc[:4, 'a':'c'] - tm.assert_series_equal(result, expected) - tm.assert_series_equal(result, expected) - - result = s.loc[0:, 'a':'c'] - tm.assert_series_equal(result, expected) - tm.assert_series_equal(result, expected) - - # GH 7400 - # multiindexer gettitem with list of indexers skips wrong element - s = pd.Series(np.arange(15, dtype='int64'), - MultiIndex.from_product([range(5), ['a', 'b', 'c']])) - expected = s.iloc[[6, 7, 8, 12, 13, 14]] - result = s.loc[2:4:2, 'a':'c'] - tm.assert_series_equal(result, expected) - - def test_multiindex_perf_warn(self): - - if sys.version_info < (2, 7): - raise nose.SkipTest('python version < 2.7') - - df = DataFrame({'jim': [0, 0, 1, 1], - 'joe': ['x', 'x', 'z', 'y'], - 'jolie': np.random.rand(4)}).set_index(['jim', 'joe']) - - with tm.assert_produces_warning(PerformanceWarning, - clear=[pd.core.index]): - df.loc[(1, 'z')] - - df = df.iloc[[2, 1, 3, 0]] - with tm.assert_produces_warning(PerformanceWarning): - df.loc[(0, )] - - def test_series_getitem_multiindex(self): - - # GH 6018 - # series regression getitem with a multi-index - - s = Series([1, 2, 3]) - s.index = MultiIndex.from_tuples([(0, 0), (1, 1), (2, 1)]) - - result = s[:, 0] - expected = Series([1], index=[0]) - tm.assert_series_equal(result, expected) - - result = s.loc[:, 1] - expected = Series([2, 3], index=[1, 2]) - tm.assert_series_equal(result, expected) - - # xs - result = s.xs(0, level=0) - expected = Series([1], index=[0]) - tm.assert_series_equal(result, expected) - - result = s.xs(1, level=1) - expected = Series([2, 3], index=[1, 2]) - tm.assert_series_equal(result, expected) - - # GH6258 - dt = list(date_range('20130903', periods=3)) - idx = MultiIndex.from_product([list('AB'), dt]) - s = Series([1, 3, 4, 1, 3, 4], index=idx) - - result = s.xs('20130903', level=1) - expected = Series([1, 1], index=list('AB')) - tm.assert_series_equal(result, expected) - - # GH5684 - idx = MultiIndex.from_tuples([('a', 'one'), ('a', 'two'), ('b', 'one'), - ('b', 'two')]) - s = Series([1, 2, 3, 4], index=idx) - s.index.set_names(['L1', 'L2'], inplace=True) - result = s.xs('one', level='L2') - expected = Series([1, 3], index=['a', 'b']) - expected.index.set_names(['L1'], inplace=True) - tm.assert_series_equal(result, expected) - - def test_ix_general(self): - - # ix general issues - - # GH 2817 - data = {'amount': {0: 700, 1: 600, 2: 222, 3: 333, 4: 444}, - 'col': {0: 3.5, 1: 3.5, 2: 4.0, 3: 4.0, 4: 4.0}, - 'year': {0: 2012, 1: 2011, 2: 2012, 3: 2012, 4: 2012}} - df = DataFrame(data).set_index(keys=['col', 'year']) - key = 4.0, 2012 - - # emits a PerformanceWarning, ok - with self.assert_produces_warning(PerformanceWarning): - tm.assert_frame_equal(df.loc[key], df.iloc[2:]) - - # this is ok - df.sort_index(inplace=True) - res = df.loc[key] - - # col has float dtype, result should be Float64Index - index = MultiIndex.from_arrays([[4.] * 3, [2012] * 3], - names=['col', 'year']) - expected = DataFrame({'amount': [222, 333, 444]}, index=index) - tm.assert_frame_equal(res, expected) - - def test_ix_weird_slicing(self): - # http://stackoverflow.com/q/17056560/1240268 - df = DataFrame({'one': [1, 2, 3, np.nan, np.nan], - 'two': [1, 2, 3, 4, 5]}) - df.loc[df['one'] > 1, 'two'] = -df['two'] - - expected = DataFrame({'one': {0: 1.0, - 1: 2.0, - 2: 3.0, - 3: nan, - 4: nan}, - 'two': {0: 1, - 1: -2, - 2: -3, - 3: 4, - 4: 5}}) - tm.assert_frame_equal(df, expected) - - def test_xs_multiindex(self): - - # GH2903 - columns = MultiIndex.from_tuples( - [('a', 'foo'), ('a', 'bar'), ('b', 'hello'), - ('b', 'world')], names=['lvl0', 'lvl1']) - df = DataFrame(np.random.randn(4, 4), columns=columns) - df.sort_index(axis=1, inplace=True) - result = df.xs('a', level='lvl0', axis=1) - expected = df.iloc[:, 0:2].loc[:, 'a'] - tm.assert_frame_equal(result, expected) - - result = df.xs('foo', level='lvl1', axis=1) - expected = df.iloc[:, 1:2].copy() - expected.columns = expected.columns.droplevel('lvl1') - tm.assert_frame_equal(result, expected) - - def test_per_axis_per_level_getitem(self): - - # GH6134 - # example test case - ix = MultiIndex.from_product([_mklbl('A', 5), _mklbl('B', 7), _mklbl( - 'C', 4), _mklbl('D', 2)]) - df = DataFrame(np.arange(len(ix.get_values())), index=ix) - - result = df.loc[(slice('A1', 'A3'), slice(None), ['C1', 'C3']), :] - expected = df.loc[[tuple([a, b, c, d]) - for a, b, c, d in df.index.values - if (a == 'A1' or a == 'A2' or a == 'A3') and ( - c == 'C1' or c == 'C3')]] - tm.assert_frame_equal(result, expected) - - expected = df.loc[[tuple([a, b, c, d]) - for a, b, c, d in df.index.values - if (a == 'A1' or a == 'A2' or a == 'A3') and ( - c == 'C1' or c == 'C2' or c == 'C3')]] - result = df.loc[(slice('A1', 'A3'), slice(None), slice('C1', 'C3')), :] - tm.assert_frame_equal(result, expected) - - # test multi-index slicing with per axis and per index controls - index = MultiIndex.from_tuples([('A', 1), ('A', 2), - ('A', 3), ('B', 1)], - names=['one', 'two']) - columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'), - ('b', 'foo'), ('b', 'bah')], - names=['lvl0', 'lvl1']) - - df = DataFrame( - np.arange(16, dtype='int64').reshape( - 4, 4), index=index, columns=columns) - df = df.sort_index(axis=0).sort_index(axis=1) - - # identity - result = df.loc[(slice(None), slice(None)), :] - tm.assert_frame_equal(result, df) - result = df.loc[(slice(None), slice(None)), (slice(None), slice(None))] - tm.assert_frame_equal(result, df) - result = df.loc[:, (slice(None), slice(None))] - tm.assert_frame_equal(result, df) - - # index - result = df.loc[(slice(None), [1]), :] - expected = df.iloc[[0, 3]] - tm.assert_frame_equal(result, expected) - - result = df.loc[(slice(None), 1), :] - expected = df.iloc[[0, 3]] - tm.assert_frame_equal(result, expected) - - # columns - result = df.loc[:, (slice(None), ['foo'])] - expected = df.iloc[:, [1, 3]] - tm.assert_frame_equal(result, expected) - - # both - result = df.loc[(slice(None), 1), (slice(None), ['foo'])] - expected = df.iloc[[0, 3], [1, 3]] - tm.assert_frame_equal(result, expected) - - result = df.loc['A', 'a'] - expected = DataFrame(dict(bar=[1, 5, 9], foo=[0, 4, 8]), - index=Index([1, 2, 3], name='two'), - columns=Index(['bar', 'foo'], name='lvl1')) - tm.assert_frame_equal(result, expected) - - result = df.loc[(slice(None), [1, 2]), :] - expected = df.iloc[[0, 1, 3]] - tm.assert_frame_equal(result, expected) - - # multi-level series - s = Series(np.arange(len(ix.get_values())), index=ix) - result = s.loc['A1':'A3', :, ['C1', 'C3']] - expected = s.loc[[tuple([a, b, c, d]) - for a, b, c, d in s.index.values - if (a == 'A1' or a == 'A2' or a == 'A3') and ( - c == 'C1' or c == 'C3')]] - tm.assert_series_equal(result, expected) - - # boolean indexers - result = df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :] - expected = df.iloc[[2, 3]] - tm.assert_frame_equal(result, expected) - - def f(): - df.loc[(slice(None), np.array([True, False])), :] - - self.assertRaises(ValueError, f) - - # ambiguous cases - # these can be multiply interpreted (e.g. in this case - # as df.loc[slice(None),[1]] as well - self.assertRaises(KeyError, lambda: df.loc[slice(None), [1]]) - - result = df.loc[(slice(None), [1]), :] - expected = df.iloc[[0, 3]] - tm.assert_frame_equal(result, expected) - - # not lexsorted - self.assertEqual(df.index.lexsort_depth, 2) - df = df.sort_index(level=1, axis=0) - self.assertEqual(df.index.lexsort_depth, 0) - with tm.assertRaisesRegexp( - UnsortedIndexError, - 'MultiIndex Slicing requires the index to be fully ' - r'lexsorted tuple len \(2\), lexsort depth \(0\)'): - df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :] - - def test_multiindex_slicers_non_unique(self): - - # GH 7106 - # non-unique mi index support - df = (DataFrame(dict(A=['foo', 'foo', 'foo', 'foo'], - B=['a', 'a', 'a', 'a'], - C=[1, 2, 1, 3], - D=[1, 2, 3, 4])) - .set_index(['A', 'B', 'C']).sort_index()) - self.assertFalse(df.index.is_unique) - expected = (DataFrame(dict(A=['foo', 'foo'], B=['a', 'a'], - C=[1, 1], D=[1, 3])) - .set_index(['A', 'B', 'C']).sort_index()) - result = df.loc[(slice(None), slice(None), 1), :] - tm.assert_frame_equal(result, expected) - - # this is equivalent of an xs expression - result = df.xs(1, level=2, drop_level=False) - tm.assert_frame_equal(result, expected) - - df = (DataFrame(dict(A=['foo', 'foo', 'foo', 'foo'], - B=['a', 'a', 'a', 'a'], - C=[1, 2, 1, 2], - D=[1, 2, 3, 4])) - .set_index(['A', 'B', 'C']).sort_index()) - self.assertFalse(df.index.is_unique) - expected = (DataFrame(dict(A=['foo', 'foo'], B=['a', 'a'], - C=[1, 1], D=[1, 3])) - .set_index(['A', 'B', 'C']).sort_index()) - result = df.loc[(slice(None), slice(None), 1), :] - self.assertFalse(result.index.is_unique) - tm.assert_frame_equal(result, expected) - - # GH12896 - # numpy-implementation dependent bug - ints = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 12, 13, 14, 14, 16, - 17, 18, 19, 200000, 200000] - n = len(ints) - idx = MultiIndex.from_arrays([['a'] * n, ints]) - result = Series([1] * n, index=idx) - result = result.sort_index() - result = result.loc[(slice(None), slice(100000))] - expected = Series([1] * (n - 2), index=idx[:-2]).sort_index() - tm.assert_series_equal(result, expected) - - def test_multiindex_slicers_datetimelike(self): - - # GH 7429 - # buggy/inconsistent behavior when slicing with datetime-like - import datetime - dates = [datetime.datetime(2012, 1, 1, 12, 12, 12) + - datetime.timedelta(days=i) for i in range(6)] - freq = [1, 2] - index = MultiIndex.from_product( - [dates, freq], names=['date', 'frequency']) - - df = DataFrame( - np.arange(6 * 2 * 4, dtype='int64').reshape( - -1, 4), index=index, columns=list('ABCD')) - - # multi-axis slicing - idx = pd.IndexSlice - expected = df.iloc[[0, 2, 4], [0, 1]] - result = df.loc[(slice(Timestamp('2012-01-01 12:12:12'), - Timestamp('2012-01-03 12:12:12')), - slice(1, 1)), slice('A', 'B')] - tm.assert_frame_equal(result, expected) - - result = df.loc[(idx[Timestamp('2012-01-01 12:12:12'):Timestamp( - '2012-01-03 12:12:12')], idx[1:1]), slice('A', 'B')] - tm.assert_frame_equal(result, expected) - - result = df.loc[(slice(Timestamp('2012-01-01 12:12:12'), - Timestamp('2012-01-03 12:12:12')), 1), - slice('A', 'B')] - tm.assert_frame_equal(result, expected) - - # with strings - result = df.loc[(slice('2012-01-01 12:12:12', '2012-01-03 12:12:12'), - slice(1, 1)), slice('A', 'B')] - tm.assert_frame_equal(result, expected) - - result = df.loc[(idx['2012-01-01 12:12:12':'2012-01-03 12:12:12'], 1), - idx['A', 'B']] - tm.assert_frame_equal(result, expected) - - def test_multiindex_slicers_edges(self): - # GH 8132 - # various edge cases - df = DataFrame( - {'A': ['A0'] * 5 + ['A1'] * 5 + ['A2'] * 5, - 'B': ['B0', 'B0', 'B1', 'B1', 'B2'] * 3, - 'DATE': ["2013-06-11", "2013-07-02", "2013-07-09", "2013-07-30", - "2013-08-06", "2013-06-11", "2013-07-02", "2013-07-09", - "2013-07-30", "2013-08-06", "2013-09-03", "2013-10-01", - "2013-07-09", "2013-08-06", "2013-09-03"], - 'VALUES': [22, 35, 14, 9, 4, 40, 18, 4, 2, 5, 1, 2, 3, 4, 2]}) - - df['DATE'] = pd.to_datetime(df['DATE']) - df1 = df.set_index(['A', 'B', 'DATE']) - df1 = df1.sort_index() - - # A1 - Get all values under "A0" and "A1" - result = df1.loc[(slice('A1')), :] - expected = df1.iloc[0:10] - tm.assert_frame_equal(result, expected) - - # A2 - Get all values from the start to "A2" - result = df1.loc[(slice('A2')), :] - expected = df1 - tm.assert_frame_equal(result, expected) - - # A3 - Get all values under "B1" or "B2" - result = df1.loc[(slice(None), slice('B1', 'B2')), :] - expected = df1.iloc[[2, 3, 4, 7, 8, 9, 12, 13, 14]] - tm.assert_frame_equal(result, expected) - - # A4 - Get all values between 2013-07-02 and 2013-07-09 - result = df1.loc[(slice(None), slice(None), - slice('20130702', '20130709')), :] - expected = df1.iloc[[1, 2, 6, 7, 12]] - tm.assert_frame_equal(result, expected) - - # B1 - Get all values in B0 that are also under A0, A1 and A2 - result = df1.loc[(slice('A2'), slice('B0')), :] - expected = df1.iloc[[0, 1, 5, 6, 10, 11]] - tm.assert_frame_equal(result, expected) - - # B2 - Get all values in B0, B1 and B2 (similar to what #2 is doing for - # the As) - result = df1.loc[(slice(None), slice('B2')), :] - expected = df1 - tm.assert_frame_equal(result, expected) - - # B3 - Get all values from B1 to B2 and up to 2013-08-06 - result = df1.loc[(slice(None), slice('B1', 'B2'), - slice('2013-08-06')), :] - expected = df1.iloc[[2, 3, 4, 7, 8, 9, 12, 13]] - tm.assert_frame_equal(result, expected) - - # B4 - Same as A4 but the start of the date slice is not a key. - # shows indexing on a partial selection slice - result = df1.loc[(slice(None), slice(None), - slice('20130701', '20130709')), :] - expected = df1.iloc[[1, 2, 6, 7, 12]] - tm.assert_frame_equal(result, expected) - - def test_per_axis_per_level_doc_examples(self): - - # test index maker - idx = pd.IndexSlice - - # from indexing.rst / advanced - index = MultiIndex.from_product([_mklbl('A', 4), _mklbl('B', 2), - _mklbl('C', 4), _mklbl('D', 2)]) - columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'), - ('b', 'foo'), ('b', 'bah')], - names=['lvl0', 'lvl1']) - df = DataFrame(np.arange(len(index) * len(columns), dtype='int64') - .reshape((len(index), len(columns))), - index=index, columns=columns) - result = df.loc[(slice('A1', 'A3'), slice(None), ['C1', 'C3']), :] - expected = df.loc[[tuple([a, b, c, d]) - for a, b, c, d in df.index.values - if (a == 'A1' or a == 'A2' or a == 'A3') and ( - c == 'C1' or c == 'C3')]] - tm.assert_frame_equal(result, expected) - result = df.loc[idx['A1':'A3', :, ['C1', 'C3']], :] - tm.assert_frame_equal(result, expected) - - result = df.loc[(slice(None), slice(None), ['C1', 'C3']), :] - expected = df.loc[[tuple([a, b, c, d]) - for a, b, c, d in df.index.values - if (c == 'C1' or c == 'C3')]] - tm.assert_frame_equal(result, expected) - result = df.loc[idx[:, :, ['C1', 'C3']], :] - tm.assert_frame_equal(result, expected) - - # not sorted - def f(): - df.loc['A1', (slice(None), 'foo')] - - self.assertRaises(UnsortedIndexError, f) - df = df.sort_index(axis=1) - - # slicing - df.loc['A1', (slice(None), 'foo')] - df.loc[(slice(None), slice(None), ['C1', 'C3']), (slice(None), 'foo')] - - # setitem - df.loc(axis=0)[:, :, ['C1', 'C3']] = -10 - - def test_loc_axis_arguments(self): - - index = MultiIndex.from_product([_mklbl('A', 4), _mklbl('B', 2), - _mklbl('C', 4), _mklbl('D', 2)]) - columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'), - ('b', 'foo'), ('b', 'bah')], - names=['lvl0', 'lvl1']) - df = DataFrame(np.arange(len(index) * len(columns), dtype='int64') - .reshape((len(index), len(columns))), - index=index, - columns=columns).sort_index().sort_index(axis=1) - - # axis 0 - result = df.loc(axis=0)['A1':'A3', :, ['C1', 'C3']] - expected = df.loc[[tuple([a, b, c, d]) - for a, b, c, d in df.index.values - if (a == 'A1' or a == 'A2' or a == 'A3') and ( - c == 'C1' or c == 'C3')]] - tm.assert_frame_equal(result, expected) - - result = df.loc(axis='index')[:, :, ['C1', 'C3']] - expected = df.loc[[tuple([a, b, c, d]) - for a, b, c, d in df.index.values - if (c == 'C1' or c == 'C3')]] - tm.assert_frame_equal(result, expected) - - # axis 1 - result = df.loc(axis=1)[:, 'foo'] - expected = df.loc[:, (slice(None), 'foo')] - tm.assert_frame_equal(result, expected) - - result = df.loc(axis='columns')[:, 'foo'] - expected = df.loc[:, (slice(None), 'foo')] - tm.assert_frame_equal(result, expected) - - # invalid axis - def f(): - df.loc(axis=-1)[:, :, ['C1', 'C3']] - - self.assertRaises(ValueError, f) - - def f(): - df.loc(axis=2)[:, :, ['C1', 'C3']] - - self.assertRaises(ValueError, f) - - def f(): - df.loc(axis='foo')[:, :, ['C1', 'C3']] - - self.assertRaises(ValueError, f) - - def test_loc_coerceion(self): - - # 12411 - df = DataFrame({'date': [pd.Timestamp('20130101').tz_localize('UTC'), - pd.NaT]}) - expected = df.dtypes - - result = df.iloc[[0]] - tm.assert_series_equal(result.dtypes, expected) - - result = df.iloc[[1]] - tm.assert_series_equal(result.dtypes, expected) - - # 12045 - import datetime - df = DataFrame({'date': [datetime.datetime(2012, 1, 1), - datetime.datetime(1012, 1, 2)]}) - expected = df.dtypes - - result = df.iloc[[0]] - tm.assert_series_equal(result.dtypes, expected) - - result = df.iloc[[1]] - tm.assert_series_equal(result.dtypes, expected) - - # 11594 - df = DataFrame({'text': ['some words'] + [None] * 9}) - expected = df.dtypes - - result = df.iloc[0:2] - tm.assert_series_equal(result.dtypes, expected) - - result = df.iloc[3:] - tm.assert_series_equal(result.dtypes, expected) - - def test_per_axis_per_level_setitem(self): - - # test index maker - idx = pd.IndexSlice - - # test multi-index slicing with per axis and per index controls - index = MultiIndex.from_tuples([('A', 1), ('A', 2), - ('A', 3), ('B', 1)], - names=['one', 'two']) - columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'), - ('b', 'foo'), ('b', 'bah')], - names=['lvl0', 'lvl1']) - - df_orig = DataFrame( - np.arange(16, dtype='int64').reshape( - 4, 4), index=index, columns=columns) - df_orig = df_orig.sort_index(axis=0).sort_index(axis=1) - - # identity - df = df_orig.copy() - df.loc[(slice(None), slice(None)), :] = 100 - expected = df_orig.copy() - expected.iloc[:, :] = 100 - tm.assert_frame_equal(df, expected) - - df = df_orig.copy() - df.loc(axis=0)[:, :] = 100 - expected = df_orig.copy() - expected.iloc[:, :] = 100 - tm.assert_frame_equal(df, expected) - - df = df_orig.copy() - df.loc[(slice(None), slice(None)), (slice(None), slice(None))] = 100 - expected = df_orig.copy() - expected.iloc[:, :] = 100 - tm.assert_frame_equal(df, expected) - - df = df_orig.copy() - df.loc[:, (slice(None), slice(None))] = 100 - expected = df_orig.copy() - expected.iloc[:, :] = 100 - tm.assert_frame_equal(df, expected) - - # index - df = df_orig.copy() - df.loc[(slice(None), [1]), :] = 100 - expected = df_orig.copy() - expected.iloc[[0, 3]] = 100 - tm.assert_frame_equal(df, expected) - - df = df_orig.copy() - df.loc[(slice(None), 1), :] = 100 - expected = df_orig.copy() - expected.iloc[[0, 3]] = 100 - tm.assert_frame_equal(df, expected) - - df = df_orig.copy() - df.loc(axis=0)[:, 1] = 100 - expected = df_orig.copy() - expected.iloc[[0, 3]] = 100 - tm.assert_frame_equal(df, expected) - - # columns - df = df_orig.copy() - df.loc[:, (slice(None), ['foo'])] = 100 - expected = df_orig.copy() - expected.iloc[:, [1, 3]] = 100 - tm.assert_frame_equal(df, expected) - - # both - df = df_orig.copy() - df.loc[(slice(None), 1), (slice(None), ['foo'])] = 100 - expected = df_orig.copy() - expected.iloc[[0, 3], [1, 3]] = 100 - tm.assert_frame_equal(df, expected) - - df = df_orig.copy() - df.loc[idx[:, 1], idx[:, ['foo']]] = 100 - expected = df_orig.copy() - expected.iloc[[0, 3], [1, 3]] = 100 - tm.assert_frame_equal(df, expected) - - df = df_orig.copy() - df.loc['A', 'a'] = 100 - expected = df_orig.copy() - expected.iloc[0:3, 0:2] = 100 - tm.assert_frame_equal(df, expected) - - # setting with a list-like - df = df_orig.copy() - df.loc[(slice(None), 1), (slice(None), ['foo'])] = np.array( - [[100, 100], [100, 100]], dtype='int64') - expected = df_orig.copy() - expected.iloc[[0, 3], [1, 3]] = 100 - tm.assert_frame_equal(df, expected) - - # not enough values - df = df_orig.copy() - - def f(): - df.loc[(slice(None), 1), (slice(None), ['foo'])] = np.array( - [[100], [100, 100]], dtype='int64') - - self.assertRaises(ValueError, f) - - def f(): - df.loc[(slice(None), 1), (slice(None), ['foo'])] = np.array( - [100, 100, 100, 100], dtype='int64') - - self.assertRaises(ValueError, f) - - # with an alignable rhs - df = df_orig.copy() - df.loc[(slice(None), 1), (slice(None), ['foo'])] = df.loc[(slice( - None), 1), (slice(None), ['foo'])] * 5 - expected = df_orig.copy() - expected.iloc[[0, 3], [1, 3]] = expected.iloc[[0, 3], [1, 3]] * 5 - tm.assert_frame_equal(df, expected) - - df = df_orig.copy() - df.loc[(slice(None), 1), (slice(None), ['foo'])] *= df.loc[(slice( - None), 1), (slice(None), ['foo'])] - expected = df_orig.copy() - expected.iloc[[0, 3], [1, 3]] *= expected.iloc[[0, 3], [1, 3]] - tm.assert_frame_equal(df, expected) - - rhs = df_orig.loc[(slice(None), 1), (slice(None), ['foo'])].copy() - rhs.loc[:, ('c', 'bah')] = 10 - df = df_orig.copy() - df.loc[(slice(None), 1), (slice(None), ['foo'])] *= rhs - expected = df_orig.copy() - expected.iloc[[0, 3], [1, 3]] *= expected.iloc[[0, 3], [1, 3]] + df = DataFrame( + dict(A=list('abcde'), B=np.arange(5, 10, dtype='int64'))) + df.iloc[2:4] = [['x', 11], ['y', 13]] + expected = DataFrame(dict(A=['a', 'b', 'x', 'y', 'e'], + B=[5, 6, 11, 13, 9])) tm.assert_frame_equal(df, expected) - def test_multiindex_setitem(self): - - # GH 3738 - # setting with a multi-index right hand side - arrays = [np.array(['bar', 'bar', 'baz', 'qux', 'qux', 'bar']), - np.array(['one', 'two', 'one', 'one', 'two', 'one']), - np.arange(0, 6, 1)] - - df_orig = pd.DataFrame(np.random.randn(6, 3), - index=arrays, - columns=['A', 'B', 'C']).sort_index() + def test_ix_general(self): - expected = df_orig.loc[['bar']] * 2 - df = df_orig.copy() - df.loc[['bar']] *= 2 - tm.assert_frame_equal(df.loc[['bar']], expected) + # ix general issues - # raise because these have differing levels - def f(): - df.loc['bar'] *= 2 + # GH 2817 + data = {'amount': {0: 700, 1: 600, 2: 222, 3: 333, 4: 444}, + 'col': {0: 3.5, 1: 3.5, 2: 4.0, 3: 4.0, 4: 4.0}, + 'year': {0: 2012, 1: 2011, 2: 2012, 3: 2012, 4: 2012}} + df = DataFrame(data).set_index(keys=['col', 'year']) + key = 4.0, 2012 - self.assertRaises(TypeError, f) + # emits a PerformanceWarning, ok + with self.assert_produces_warning(PerformanceWarning): + tm.assert_frame_equal(df.loc[key], df.iloc[2:]) - # from SO - # http://stackoverflow.com/questions/24572040/pandas-access-the-level-of-multiindex-for-inplace-operation - df_orig = DataFrame.from_dict({'price': { - ('DE', 'Coal', 'Stock'): 2, - ('DE', 'Gas', 'Stock'): 4, - ('DE', 'Elec', 'Demand'): 1, - ('FR', 'Gas', 'Stock'): 5, - ('FR', 'Solar', 'SupIm'): 0, - ('FR', 'Wind', 'SupIm'): 0 - }}) - df_orig.index = MultiIndex.from_tuples(df_orig.index, - names=['Sit', 'Com', 'Type']) + # this is ok + df.sort_index(inplace=True) + res = df.loc[key] - expected = df_orig.copy() - expected.iloc[[0, 2, 3]] *= 2 + # col has float dtype, result should be Float64Index + index = MultiIndex.from_arrays([[4.] * 3, [2012] * 3], + names=['col', 'year']) + expected = DataFrame({'amount': [222, 333, 444]}, index=index) + tm.assert_frame_equal(res, expected) - idx = pd.IndexSlice - df = df_orig.copy() - df.loc[idx[:, :, 'Stock'], :] *= 2 - tm.assert_frame_equal(df, expected) + def test_ix_weird_slicing(self): + # http://stackoverflow.com/q/17056560/1240268 + df = DataFrame({'one': [1, 2, 3, np.nan, np.nan], + 'two': [1, 2, 3, 4, 5]}) + df.loc[df['one'] > 1, 'two'] = -df['two'] - df = df_orig.copy() - df.loc[idx[:, :, 'Stock'], 'price'] *= 2 + expected = DataFrame({'one': {0: 1.0, + 1: 2.0, + 2: 3.0, + 3: nan, + 4: nan}, + 'two': {0: 1, + 1: -2, + 2: -3, + 3: 4, + 4: 5}}) tm.assert_frame_equal(df, expected) - def test_getitem_multiindex(self): - # GH 5725 the 'A' happens to be a valid Timestamp so the doesn't raise - # the appropriate error, only in PY3 of course! - index = MultiIndex(levels=[['D', 'B', 'C'], - [0, 26, 27, 37, 57, 67, 75, 82]], - labels=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], - [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]], - names=['tag', 'day']) - arr = np.random.randn(len(index), 1) - df = DataFrame(arr, index=index, columns=['val']) - result = df.val['D'] - expected = Series(arr.ravel()[0:3], name='val', index=Index( - [26, 37, 57], name='day')) - tm.assert_series_equal(result, expected) - - def f(): - df.val['A'] - - self.assertRaises(KeyError, f) - - def f(): - df.val['X'] - - self.assertRaises(KeyError, f) - - # A is treated as a special Timestamp - index = MultiIndex(levels=[['A', 'B', 'C'], - [0, 26, 27, 37, 57, 67, 75, 82]], - labels=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], - [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]], - names=['tag', 'day']) - df = DataFrame(arr, index=index, columns=['val']) - result = df.val['A'] - expected = Series(arr.ravel()[0:3], name='val', index=Index( - [26, 37, 57], name='day')) - tm.assert_series_equal(result, expected) - - def f(): - df.val['X'] + def test_loc_coerceion(self): - self.assertRaises(KeyError, f) + # 12411 + df = DataFrame({'date': [pd.Timestamp('20130101').tz_localize('UTC'), + pd.NaT]}) + expected = df.dtypes - # GH 7866 - # multi-index slicing with missing indexers - idx = pd.MultiIndex.from_product([['A', 'B', 'C'], - ['foo', 'bar', 'baz']], - names=['one', 'two']) - s = pd.Series(np.arange(9, dtype='int64'), index=idx).sort_index() + result = df.iloc[[0]] + tm.assert_series_equal(result.dtypes, expected) - exp_idx = pd.MultiIndex.from_product([['A'], ['foo', 'bar', 'baz']], - names=['one', 'two']) - expected = pd.Series(np.arange(3, dtype='int64'), - index=exp_idx).sort_index() + result = df.iloc[[1]] + tm.assert_series_equal(result.dtypes, expected) - result = s.loc[['A']] - tm.assert_series_equal(result, expected) - result = s.loc[['A', 'D']] - tm.assert_series_equal(result, expected) + # 12045 + import datetime + df = DataFrame({'date': [datetime.datetime(2012, 1, 1), + datetime.datetime(1012, 1, 2)]}) + expected = df.dtypes - # not any values found - self.assertRaises(KeyError, lambda: s.loc[['D']]) + result = df.iloc[[0]] + tm.assert_series_equal(result.dtypes, expected) - # empty ok - result = s.loc[[]] - expected = s.iloc[[]] - tm.assert_series_equal(result, expected) + result = df.iloc[[1]] + tm.assert_series_equal(result.dtypes, expected) - idx = pd.IndexSlice - expected = pd.Series([0, 3, 6], index=pd.MultiIndex.from_product( - [['A', 'B', 'C'], ['foo']], names=['one', 'two'])).sort_index() + # 11594 + df = DataFrame({'text': ['some words'] + [None] * 9}) + expected = df.dtypes - result = s.loc[idx[:, ['foo']]] - tm.assert_series_equal(result, expected) - result = s.loc[idx[:, ['foo', 'bah']]] - tm.assert_series_equal(result, expected) + result = df.iloc[0:2] + tm.assert_series_equal(result.dtypes, expected) - # GH 8737 - # empty indexer - multi_index = pd.MultiIndex.from_product((['foo', 'bar', 'baz'], - ['alpha', 'beta'])) - df = DataFrame( - np.random.randn(5, 6), index=range(5), columns=multi_index) - df = df.sort_index(level=0, axis=1) - - expected = DataFrame(index=range(5), - columns=multi_index.reindex([])[0]) - result1 = df.loc[:, ([], slice(None))] - result2 = df.loc[:, (['foo'], [])] - tm.assert_frame_equal(result1, expected) - tm.assert_frame_equal(result2, expected) - - # regression from < 0.14.0 - # GH 7914 - df = DataFrame([[np.mean, np.median], ['mean', 'median']], - columns=MultiIndex.from_tuples([('functs', 'mean'), - ('functs', 'median')]), - index=['function', 'name']) - result = df.loc['function', ('functs', 'mean')] - self.assertEqual(result, np.mean) + result = df.iloc[3:] + tm.assert_series_equal(result.dtypes, expected) def test_setitem_dtype_upcast(self): @@ -3154,233 +1918,6 @@ def test_multi_nan_indexing(self): Index(['C1', 'C2', 'C3', 'C4'], name='b')]) tm.assert_frame_equal(result, expected) - def test_iloc_panel_issue(self): - - # GH 3617 - p = Panel(randn(4, 4, 4)) - - self.assertEqual(p.iloc[:3, :3, :3].shape, (3, 3, 3)) - self.assertEqual(p.iloc[1, :3, :3].shape, (3, 3)) - self.assertEqual(p.iloc[:3, 1, :3].shape, (3, 3)) - self.assertEqual(p.iloc[:3, :3, 1].shape, (3, 3)) - self.assertEqual(p.iloc[1, 1, :3].shape, (3, )) - self.assertEqual(p.iloc[1, :3, 1].shape, (3, )) - self.assertEqual(p.iloc[:3, 1, 1].shape, (3, )) - - def test_panel_getitem(self): - # GH4016, date selection returns a frame when a partial string - # selection - ind = date_range(start="2000", freq="D", periods=1000) - df = DataFrame( - np.random.randn( - len(ind), 5), index=ind, columns=list('ABCDE')) - panel = Panel(dict([('frame_' + c, df) for c in list('ABC')])) - - test2 = panel.ix[:, "2002":"2002-12-31"] - test1 = panel.ix[:, "2002"] - tm.assert_panel_equal(test1, test2) - - # GH8710 - # multi-element getting with a list - panel = tm.makePanel() - - expected = panel.iloc[[0, 1]] - - result = panel.loc[['ItemA', 'ItemB']] - tm.assert_panel_equal(result, expected) - - result = panel.loc[['ItemA', 'ItemB'], :, :] - tm.assert_panel_equal(result, expected) - - result = panel[['ItemA', 'ItemB']] - tm.assert_panel_equal(result, expected) - - result = panel.loc['ItemA':'ItemB'] - tm.assert_panel_equal(result, expected) - - result = panel.ix['ItemA':'ItemB'] - tm.assert_panel_equal(result, expected) - - result = panel.ix[['ItemA', 'ItemB']] - tm.assert_panel_equal(result, expected) - - # with an object-like - # GH 9140 - class TestObject: - - def __str__(self): - return "TestObject" - - obj = TestObject() - - p = Panel(np.random.randn(1, 5, 4), items=[obj], - major_axis=date_range('1/1/2000', periods=5), - minor_axis=['A', 'B', 'C', 'D']) - - expected = p.iloc[0] - result = p[obj] - tm.assert_frame_equal(result, expected) - - def test_panel_setitem(self): - - # GH 7763 - # loc and setitem have setting differences - np.random.seed(0) - index = range(3) - columns = list('abc') - - panel = Panel({'A': DataFrame(np.random.randn(3, 3), - index=index, columns=columns), - 'B': DataFrame(np.random.randn(3, 3), - index=index, columns=columns), - 'C': DataFrame(np.random.randn(3, 3), - index=index, columns=columns)}) - - replace = DataFrame(np.eye(3, 3), index=range(3), columns=columns) - expected = Panel({'A': replace, 'B': replace, 'C': replace}) - - p = panel.copy() - for idx in list('ABC'): - p[idx] = replace - tm.assert_panel_equal(p, expected) - - p = panel.copy() - for idx in list('ABC'): - p.loc[idx, :, :] = replace - tm.assert_panel_equal(p, expected) - - def test_panel_setitem_with_multiindex(self): - - # 10360 - # failing with a multi-index - arr = np.array([[[1, 2, 3], [0, 0, 0]], [[0, 0, 0], [0, 0, 0]]], - dtype=np.float64) - - # reg index - axes = dict(items=['A', 'B'], major_axis=[0, 1], - minor_axis=['X', 'Y', 'Z']) - p1 = Panel(0., **axes) - p1.iloc[0, 0, :] = [1, 2, 3] - expected = Panel(arr, **axes) - tm.assert_panel_equal(p1, expected) - - # multi-indexes - axes['items'] = pd.MultiIndex.from_tuples([('A', 'a'), ('B', 'b')]) - p2 = Panel(0., **axes) - p2.iloc[0, 0, :] = [1, 2, 3] - expected = Panel(arr, **axes) - tm.assert_panel_equal(p2, expected) - - axes['major_axis'] = pd.MultiIndex.from_tuples([('A', 1), ('A', 2)]) - p3 = Panel(0., **axes) - p3.iloc[0, 0, :] = [1, 2, 3] - expected = Panel(arr, **axes) - tm.assert_panel_equal(p3, expected) - - axes['minor_axis'] = pd.MultiIndex.from_product([['X'], range(3)]) - p4 = Panel(0., **axes) - p4.iloc[0, 0, :] = [1, 2, 3] - expected = Panel(arr, **axes) - tm.assert_panel_equal(p4, expected) - - arr = np.array( - [[[1, 0, 0], [2, 0, 0]], [[0, 0, 0], [0, 0, 0]]], dtype=np.float64) - p5 = Panel(0., **axes) - p5.iloc[0, :, 0] = [1, 2] - expected = Panel(arr, **axes) - tm.assert_panel_equal(p5, expected) - - def test_panel_assignment(self): - # GH3777 - wp = Panel(randn(2, 5, 4), items=['Item1', 'Item2'], - major_axis=date_range('1/1/2000', periods=5), - minor_axis=['A', 'B', 'C', 'D']) - wp2 = Panel(randn(2, 5, 4), items=['Item1', 'Item2'], - major_axis=date_range('1/1/2000', periods=5), - minor_axis=['A', 'B', 'C', 'D']) - - # TODO: unused? - # expected = wp.loc[['Item1', 'Item2'], :, ['A', 'B']] - - def f(): - wp.loc[['Item1', 'Item2'], :, ['A', 'B']] = wp2.loc[ - ['Item1', 'Item2'], :, ['A', 'B']] - - self.assertRaises(NotImplementedError, f) - - # to_assign = wp2.loc[['Item1', 'Item2'], :, ['A', 'B']] - # wp.loc[['Item1', 'Item2'], :, ['A', 'B']] = to_assign - # result = wp.loc[['Item1', 'Item2'], :, ['A', 'B']] - # tm.assert_panel_equal(result,expected) - - def test_multiindex_assignment(self): - - # GH3777 part 2 - - # mixed dtype - df = DataFrame(np.random.randint(5, 10, size=9).reshape(3, 3), - columns=list('abc'), - index=[[4, 4, 8], [8, 10, 12]]) - df['d'] = np.nan - arr = np.array([0., 1.]) - - df.ix[4, 'd'] = arr - tm.assert_series_equal(df.ix[4, 'd'], - Series(arr, index=[8, 10], name='d')) - - # single dtype - df = DataFrame(np.random.randint(5, 10, size=9).reshape(3, 3), - columns=list('abc'), - index=[[4, 4, 8], [8, 10, 12]]) - - df.ix[4, 'c'] = arr - exp = Series(arr, index=[8, 10], name='c', dtype='float64') - tm.assert_series_equal(df.ix[4, 'c'], exp) - - # scalar ok - df.ix[4, 'c'] = 10 - exp = Series(10, index=[8, 10], name='c', dtype='float64') - tm.assert_series_equal(df.ix[4, 'c'], exp) - - # invalid assignments - def f(): - df.ix[4, 'c'] = [0, 1, 2, 3] - - self.assertRaises(ValueError, f) - - def f(): - df.ix[4, 'c'] = [0] - - self.assertRaises(ValueError, f) - - # groupby example - NUM_ROWS = 100 - NUM_COLS = 10 - col_names = ['A' + num for num in - map(str, np.arange(NUM_COLS).tolist())] - index_cols = col_names[:5] - - df = DataFrame(np.random.randint(5, size=(NUM_ROWS, NUM_COLS)), - dtype=np.int64, columns=col_names) - df = df.set_index(index_cols).sort_index() - grp = df.groupby(level=index_cols[:4]) - df['new_col'] = np.nan - - f_index = np.arange(5) - - def f(name, df2): - return Series(np.arange(df2.shape[0]), - name=df2.index.values[0]).reindex(f_index) - - # TODO(wesm): unused? - # new_df = pd.concat([f(name, df2) for name, df2 in grp], axis=1).T - - # we are actually operating on a copy here - # but in this case, that's ok - for name, df2 in grp: - new_vals = np.arange(df2.shape[0]) - df.ix[name, 'new_col'] = new_vals - def test_multi_assign(self): # GH 3626, an assignement of a sub-df to a df @@ -4069,36 +2606,6 @@ def f(): dtype='float64') tm.assert_frame_equal(df, exp) - def test_partial_setting_with_datetimelike_dtype(self): - - # GH9478 - # a datetimeindex alignment issue with partial setting - df = pd.DataFrame(np.arange(6.).reshape(3, 2), columns=list('AB'), - index=pd.date_range('1/1/2000', periods=3, - freq='1H')) - expected = df.copy() - expected['C'] = [expected.index[0]] + [pd.NaT, pd.NaT] - - mask = df.A < 1 - df.loc[mask, 'C'] = df.loc[mask].index - tm.assert_frame_equal(df, expected) - - def test_loc_setitem_datetime(self): - - # GH 9516 - dt1 = Timestamp('20130101 09:00:00') - dt2 = Timestamp('20130101 10:00:00') - - for conv in [lambda x: x, lambda x: x.to_datetime64(), - lambda x: x.to_pydatetime(), lambda x: np.datetime64(x)]: - - df = pd.DataFrame() - df.loc[conv(dt1), 'one'] = 100 - df.loc[conv(dt2), 'one'] = 200 - - expected = DataFrame({'one': [100.0, 200.0]}, index=[dt1, dt2]) - tm.assert_frame_equal(df, expected) - def test_series_partial_set(self): # partial set with new index # Regression from GH4825 @@ -4233,54 +2740,6 @@ def test_series_partial_set_with_name(self): result = ser.iloc[[1, 1, 0, 0]] tm.assert_series_equal(result, expected, check_index_type=True) - def test_series_partial_set_datetime(self): - # GH 11497 - - idx = date_range('2011-01-01', '2011-01-02', freq='D', name='idx') - ser = Series([0.1, 0.2], index=idx, name='s') - - result = ser.loc[[Timestamp('2011-01-01'), Timestamp('2011-01-02')]] - exp = Series([0.1, 0.2], index=idx, name='s') - tm.assert_series_equal(result, exp, check_index_type=True) - - keys = [Timestamp('2011-01-02'), Timestamp('2011-01-02'), - Timestamp('2011-01-01')] - exp = Series([0.2, 0.2, 0.1], index=pd.DatetimeIndex(keys, name='idx'), - name='s') - tm.assert_series_equal(ser.loc[keys], exp, check_index_type=True) - - keys = [Timestamp('2011-01-03'), Timestamp('2011-01-02'), - Timestamp('2011-01-03')] - exp = Series([np.nan, 0.2, np.nan], - index=pd.DatetimeIndex(keys, name='idx'), name='s') - tm.assert_series_equal(ser.loc[keys], exp, check_index_type=True) - - def test_series_partial_set_period(self): - # GH 11497 - - idx = pd.period_range('2011-01-01', '2011-01-02', freq='D', name='idx') - ser = Series([0.1, 0.2], index=idx, name='s') - - result = ser.loc[[pd.Period('2011-01-01', freq='D'), - pd.Period('2011-01-02', freq='D')]] - exp = Series([0.1, 0.2], index=idx, name='s') - tm.assert_series_equal(result, exp, check_index_type=True) - - keys = [pd.Period('2011-01-02', freq='D'), - pd.Period('2011-01-02', freq='D'), - pd.Period('2011-01-01', freq='D')] - exp = Series([0.2, 0.2, 0.1], index=pd.PeriodIndex(keys, name='idx'), - name='s') - tm.assert_series_equal(ser.loc[keys], exp, check_index_type=True) - - keys = [pd.Period('2011-01-03', freq='D'), - pd.Period('2011-01-02', freq='D'), - pd.Period('2011-01-03', freq='D')] - exp = Series([np.nan, 0.2, np.nan], - index=pd.PeriodIndex(keys, name='idx'), name='s') - result = ser.loc[keys] - tm.assert_series_equal(result, exp) - def test_partial_set_invalid(self): # GH 4940 @@ -4566,509 +3025,6 @@ def test_cache_updating(self): expected = Series([0, 0, 0, 2, 0], name='f') tm.assert_series_equal(df.f, expected) - def test_slice_consolidate_invalidate_item_cache(self): - - # this is chained assignment, but will 'work' - with option_context('chained_assignment', None): - - # #3970 - df = DataFrame({"aa": lrange(5), "bb": [2.2] * 5}) - - # Creates a second float block - df["cc"] = 0.0 - - # caches a reference to the 'bb' series - df["bb"] - - # repr machinery triggers consolidation - repr(df) - - # Assignment to wrong series - df['bb'].iloc[0] = 0.17 - df._clear_item_cache() - self.assertAlmostEqual(df['bb'][0], 0.17) - - def test_setitem_cache_updating(self): - # GH 5424 - cont = ['one', 'two', 'three', 'four', 'five', 'six', 'seven'] - - for do_ref in [False, False]: - df = DataFrame({'a': cont, - "b": cont[3:] + cont[:3], - 'c': np.arange(7)}) - - # ref the cache - if do_ref: - df.ix[0, "c"] - - # set it - df.ix[7, 'c'] = 1 - - self.assertEqual(df.ix[0, 'c'], 0.0) - self.assertEqual(df.ix[7, 'c'], 1.0) - - # GH 7084 - # not updating cache on series setting with slices - expected = DataFrame({'A': [600, 600, 600]}, - index=date_range('5/7/2014', '5/9/2014')) - out = DataFrame({'A': [0, 0, 0]}, - index=date_range('5/7/2014', '5/9/2014')) - df = DataFrame({'C': ['A', 'A', 'A'], 'D': [100, 200, 300]}) - - # loop through df to update out - six = Timestamp('5/7/2014') - eix = Timestamp('5/9/2014') - for ix, row in df.iterrows(): - out.loc[six:eix, row['C']] = out.loc[six:eix, row['C']] + row['D'] - - tm.assert_frame_equal(out, expected) - tm.assert_series_equal(out['A'], expected['A']) - - # try via a chain indexing - # this actually works - out = DataFrame({'A': [0, 0, 0]}, - index=date_range('5/7/2014', '5/9/2014')) - for ix, row in df.iterrows(): - v = out[row['C']][six:eix] + row['D'] - out[row['C']][six:eix] = v - - tm.assert_frame_equal(out, expected) - tm.assert_series_equal(out['A'], expected['A']) - - out = DataFrame({'A': [0, 0, 0]}, - index=date_range('5/7/2014', '5/9/2014')) - for ix, row in df.iterrows(): - out.loc[six:eix, row['C']] += row['D'] - - tm.assert_frame_equal(out, expected) - tm.assert_series_equal(out['A'], expected['A']) - - def test_setitem_chained_setfault(self): - - # GH6026 - # setfaults under numpy 1.7.1 (ok on 1.8) - data = ['right', 'left', 'left', 'left', 'right', 'left', 'timeout'] - mdata = ['right', 'left', 'left', 'left', 'right', 'left', 'none'] - - df = DataFrame({'response': np.array(data)}) - mask = df.response == 'timeout' - df.response[mask] = 'none' - tm.assert_frame_equal(df, DataFrame({'response': mdata})) - - recarray = np.rec.fromarrays([data], names=['response']) - df = DataFrame(recarray) - mask = df.response == 'timeout' - df.response[mask] = 'none' - tm.assert_frame_equal(df, DataFrame({'response': mdata})) - - df = DataFrame({'response': data, 'response1': data}) - mask = df.response == 'timeout' - df.response[mask] = 'none' - tm.assert_frame_equal(df, DataFrame({'response': mdata, - 'response1': data})) - - # GH 6056 - expected = DataFrame(dict(A=[np.nan, 'bar', 'bah', 'foo', 'bar'])) - df = DataFrame(dict(A=np.array(['foo', 'bar', 'bah', 'foo', 'bar']))) - df['A'].iloc[0] = np.nan - result = df.head() - tm.assert_frame_equal(result, expected) - - df = DataFrame(dict(A=np.array(['foo', 'bar', 'bah', 'foo', 'bar']))) - df.A.iloc[0] = np.nan - result = df.head() - tm.assert_frame_equal(result, expected) - - def test_detect_chained_assignment(self): - - pd.set_option('chained_assignment', 'raise') - - # work with the chain - expected = DataFrame([[-5, 1], [-6, 3]], columns=list('AB')) - df = DataFrame(np.arange(4).reshape(2, 2), - columns=list('AB'), dtype='int64') - self.assertIsNone(df.is_copy) - df['A'][0] = -5 - df['A'][1] = -6 - tm.assert_frame_equal(df, expected) - - # test with the chaining - df = DataFrame({'A': Series(range(2), dtype='int64'), - 'B': np.array(np.arange(2, 4), dtype=np.float64)}) - self.assertIsNone(df.is_copy) - - def f(): - df['A'][0] = -5 - - self.assertRaises(com.SettingWithCopyError, f) - - def f(): - df['A'][1] = np.nan - - self.assertRaises(com.SettingWithCopyError, f) - self.assertIsNone(df['A'].is_copy) - - # using a copy (the chain), fails - df = DataFrame({'A': Series(range(2), dtype='int64'), - 'B': np.array(np.arange(2, 4), dtype=np.float64)}) - - def f(): - df.loc[0]['A'] = -5 - - self.assertRaises(com.SettingWithCopyError, f) - - # doc example - df = DataFrame({'a': ['one', 'one', 'two', 'three', - 'two', 'one', 'six'], - 'c': Series(range(7), dtype='int64')}) - self.assertIsNone(df.is_copy) - expected = DataFrame({'a': ['one', 'one', 'two', 'three', - 'two', 'one', 'six'], - 'c': [42, 42, 2, 3, 4, 42, 6]}) - - def f(): - indexer = df.a.str.startswith('o') - df[indexer]['c'] = 42 - - self.assertRaises(com.SettingWithCopyError, f) - - expected = DataFrame({'A': [111, 'bbb', 'ccc'], 'B': [1, 2, 3]}) - df = DataFrame({'A': ['aaa', 'bbb', 'ccc'], 'B': [1, 2, 3]}) - - def f(): - df['A'][0] = 111 - - self.assertRaises(com.SettingWithCopyError, f) - - def f(): - df.loc[0]['A'] = 111 - - self.assertRaises(com.SettingWithCopyError, f) - - df.loc[0, 'A'] = 111 - tm.assert_frame_equal(df, expected) - - # make sure that is_copy is picked up reconstruction - # GH5475 - df = DataFrame({"A": [1, 2]}) - self.assertIsNone(df.is_copy) - with tm.ensure_clean('__tmp__pickle') as path: - df.to_pickle(path) - df2 = pd.read_pickle(path) - df2["B"] = df2["A"] - df2["B"] = df2["A"] - - # a suprious raise as we are setting the entire column here - # GH5597 - from string import ascii_letters as letters - - def random_text(nobs=100): - df = [] - for i in range(nobs): - idx = np.random.randint(len(letters), size=2) - idx.sort() - df.append([letters[idx[0]:idx[1]]]) - - return DataFrame(df, columns=['letters']) - - df = random_text(100000) - - # always a copy - x = df.iloc[[0, 1, 2]] - self.assertIsNotNone(x.is_copy) - x = df.iloc[[0, 1, 2, 4]] - self.assertIsNotNone(x.is_copy) - - # explicity copy - indexer = df.letters.apply(lambda x: len(x) > 10) - df = df.ix[indexer].copy() - self.assertIsNone(df.is_copy) - df['letters'] = df['letters'].apply(str.lower) - - # implicity take - df = random_text(100000) - indexer = df.letters.apply(lambda x: len(x) > 10) - df = df.ix[indexer] - self.assertIsNotNone(df.is_copy) - df['letters'] = df['letters'].apply(str.lower) - - # implicity take 2 - df = random_text(100000) - indexer = df.letters.apply(lambda x: len(x) > 10) - df = df.ix[indexer] - self.assertIsNotNone(df.is_copy) - df.loc[:, 'letters'] = df['letters'].apply(str.lower) - - # should be ok even though it's a copy! - self.assertIsNone(df.is_copy) - df['letters'] = df['letters'].apply(str.lower) - self.assertIsNone(df.is_copy) - - df = random_text(100000) - indexer = df.letters.apply(lambda x: len(x) > 10) - df.ix[indexer, 'letters'] = df.ix[indexer, 'letters'].apply(str.lower) - - # an identical take, so no copy - df = DataFrame({'a': [1]}).dropna() - self.assertIsNone(df.is_copy) - df['a'] += 1 - - # inplace ops - # original from: - # http://stackoverflow.com/questions/20508968/series-fillna-in-a-multiindex-dataframe-does-not-fill-is-this-a-bug - a = [12, 23] - b = [123, None] - c = [1234, 2345] - d = [12345, 23456] - tuples = [('eyes', 'left'), ('eyes', 'right'), ('ears', 'left'), - ('ears', 'right')] - events = {('eyes', 'left'): a, - ('eyes', 'right'): b, - ('ears', 'left'): c, - ('ears', 'right'): d} - multiind = MultiIndex.from_tuples(tuples, names=['part', 'side']) - zed = DataFrame(events, index=['a', 'b'], columns=multiind) - - def f(): - zed['eyes']['right'].fillna(value=555, inplace=True) - - self.assertRaises(com.SettingWithCopyError, f) - - df = DataFrame(np.random.randn(10, 4)) - s = df.iloc[:, 0].sort_values() - tm.assert_series_equal(s, df.iloc[:, 0].sort_values()) - tm.assert_series_equal(s, df[0].sort_values()) - - # false positives GH6025 - df = DataFrame({'column1': ['a', 'a', 'a'], 'column2': [4, 8, 9]}) - str(df) - df['column1'] = df['column1'] + 'b' - str(df) - df = df[df['column2'] != 8] - str(df) - df['column1'] = df['column1'] + 'c' - str(df) - - # from SO: - # http://stackoverflow.com/questions/24054495/potential-bug-setting-value-for-undefined-column-using-iloc - df = DataFrame(np.arange(0, 9), columns=['count']) - df['group'] = 'b' - - def f(): - df.iloc[0:5]['group'] = 'a' - - self.assertRaises(com.SettingWithCopyError, f) - - # mixed type setting - # same dtype & changing dtype - df = DataFrame(dict(A=date_range('20130101', periods=5), - B=np.random.randn(5), - C=np.arange(5, dtype='int64'), - D=list('abcde'))) - - def f(): - df.ix[2]['D'] = 'foo' - - self.assertRaises(com.SettingWithCopyError, f) - - def f(): - df.ix[2]['C'] = 'foo' - - self.assertRaises(com.SettingWithCopyError, f) - - def f(): - df['C'][2] = 'foo' - - self.assertRaises(com.SettingWithCopyError, f) - - def test_setting_with_copy_bug(self): - - # operating on a copy - df = pd.DataFrame({'a': list(range(4)), - 'b': list('ab..'), - 'c': ['a', 'b', np.nan, 'd']}) - mask = pd.isnull(df.c) - - def f(): - df[['c']][mask] = df[['b']][mask] - - self.assertRaises(com.SettingWithCopyError, f) - - # invalid warning as we are returning a new object - # GH 8730 - df1 = DataFrame({'x': Series(['a', 'b', 'c']), - 'y': Series(['d', 'e', 'f'])}) - df2 = df1[['x']] - - # this should not raise - df2['y'] = ['g', 'h', 'i'] - - def test_detect_chained_assignment_warnings(self): - - # warnings - with option_context('chained_assignment', 'warn'): - df = DataFrame({'A': ['aaa', 'bbb', 'ccc'], 'B': [1, 2, 3]}) - with tm.assert_produces_warning( - expected_warning=com.SettingWithCopyWarning): - df.loc[0]['A'] = 111 - - def test_float64index_slicing_bug(self): - # GH 5557, related to slicing a float index - ser = {256: 2321.0, - 1: 78.0, - 2: 2716.0, - 3: 0.0, - 4: 369.0, - 5: 0.0, - 6: 269.0, - 7: 0.0, - 8: 0.0, - 9: 0.0, - 10: 3536.0, - 11: 0.0, - 12: 24.0, - 13: 0.0, - 14: 931.0, - 15: 0.0, - 16: 101.0, - 17: 78.0, - 18: 9643.0, - 19: 0.0, - 20: 0.0, - 21: 0.0, - 22: 63761.0, - 23: 0.0, - 24: 446.0, - 25: 0.0, - 26: 34773.0, - 27: 0.0, - 28: 729.0, - 29: 78.0, - 30: 0.0, - 31: 0.0, - 32: 3374.0, - 33: 0.0, - 34: 1391.0, - 35: 0.0, - 36: 361.0, - 37: 0.0, - 38: 61808.0, - 39: 0.0, - 40: 0.0, - 41: 0.0, - 42: 6677.0, - 43: 0.0, - 44: 802.0, - 45: 0.0, - 46: 2691.0, - 47: 0.0, - 48: 3582.0, - 49: 0.0, - 50: 734.0, - 51: 0.0, - 52: 627.0, - 53: 70.0, - 54: 2584.0, - 55: 0.0, - 56: 324.0, - 57: 0.0, - 58: 605.0, - 59: 0.0, - 60: 0.0, - 61: 0.0, - 62: 3989.0, - 63: 10.0, - 64: 42.0, - 65: 0.0, - 66: 904.0, - 67: 0.0, - 68: 88.0, - 69: 70.0, - 70: 8172.0, - 71: 0.0, - 72: 0.0, - 73: 0.0, - 74: 64902.0, - 75: 0.0, - 76: 347.0, - 77: 0.0, - 78: 36605.0, - 79: 0.0, - 80: 379.0, - 81: 70.0, - 82: 0.0, - 83: 0.0, - 84: 3001.0, - 85: 0.0, - 86: 1630.0, - 87: 7.0, - 88: 364.0, - 89: 0.0, - 90: 67404.0, - 91: 9.0, - 92: 0.0, - 93: 0.0, - 94: 7685.0, - 95: 0.0, - 96: 1017.0, - 97: 0.0, - 98: 2831.0, - 99: 0.0, - 100: 2963.0, - 101: 0.0, - 102: 854.0, - 103: 0.0, - 104: 0.0, - 105: 0.0, - 106: 0.0, - 107: 0.0, - 108: 0.0, - 109: 0.0, - 110: 0.0, - 111: 0.0, - 112: 0.0, - 113: 0.0, - 114: 0.0, - 115: 0.0, - 116: 0.0, - 117: 0.0, - 118: 0.0, - 119: 0.0, - 120: 0.0, - 121: 0.0, - 122: 0.0, - 123: 0.0, - 124: 0.0, - 125: 0.0, - 126: 67744.0, - 127: 22.0, - 128: 264.0, - 129: 0.0, - 260: 197.0, - 268: 0.0, - 265: 0.0, - 269: 0.0, - 261: 0.0, - 266: 1198.0, - 267: 0.0, - 262: 2629.0, - 258: 775.0, - 257: 0.0, - 263: 0.0, - 259: 0.0, - 264: 163.0, - 250: 10326.0, - 251: 0.0, - 252: 1228.0, - 253: 0.0, - 254: 2769.0, - 255: 0.0} - - # smoke test for the repr - s = Series(ser) - result = s.value_counts() - str(result) - def test_set_ix_out_of_bounds_axis_0(self): df = pd.DataFrame( randn(2, 5), index=["row%s" % i for i in range(2)], @@ -5281,34 +3237,6 @@ def assert_slices_equivalent(l_slc, i_slc): assert_slices_equivalent(SLC[idx[13]:idx[9]:-1], SLC[13:8:-1]) assert_slices_equivalent(SLC[idx[9]:idx[13]:-1], SLC[:0]) - def test_multiindex_label_slicing_with_negative_step(self): - s = Series(np.arange(20), - MultiIndex.from_product([list('abcde'), np.arange(4)])) - SLC = pd.IndexSlice - - def assert_slices_equivalent(l_slc, i_slc): - tm.assert_series_equal(s.loc[l_slc], s.iloc[i_slc]) - tm.assert_series_equal(s[l_slc], s.iloc[i_slc]) - tm.assert_series_equal(s.ix[l_slc], s.iloc[i_slc]) - - assert_slices_equivalent(SLC[::-1], SLC[::-1]) - - assert_slices_equivalent(SLC['d'::-1], SLC[15::-1]) - assert_slices_equivalent(SLC[('d', )::-1], SLC[15::-1]) - - assert_slices_equivalent(SLC[:'d':-1], SLC[:11:-1]) - assert_slices_equivalent(SLC[:('d', ):-1], SLC[:11:-1]) - - assert_slices_equivalent(SLC['d':'b':-1], SLC[15:3:-1]) - assert_slices_equivalent(SLC[('d', ):'b':-1], SLC[15:3:-1]) - assert_slices_equivalent(SLC['d':('b', ):-1], SLC[15:3:-1]) - assert_slices_equivalent(SLC[('d', ):('b', ):-1], SLC[15:3:-1]) - assert_slices_equivalent(SLC['b':'d':-1], SLC[:0]) - - assert_slices_equivalent(SLC[('c', 2)::-1], SLC[10::-1]) - assert_slices_equivalent(SLC[:('c', 2):-1], SLC[:9:-1]) - assert_slices_equivalent(SLC[('e', 0):('c', 2):-1], SLC[16:9:-1]) - def test_slice_with_zero_step_raises(self): s = Series(np.arange(20), index=_mklbl('A', 20)) self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', @@ -5390,23 +3318,6 @@ def test_maybe_numeric_slice(self): expected = [1] self.assertEqual(result, expected) - def test_multiindex_slice_first_level(self): - # GH 12697 - freq = ['a', 'b', 'c', 'd'] - idx = pd.MultiIndex.from_product([freq, np.arange(500)]) - df = pd.DataFrame(list(range(2000)), index=idx, columns=['Test']) - df_slice = df.loc[pd.IndexSlice[:, 30:70], :] - result = df_slice.loc['a'] - expected = pd.DataFrame(list(range(30, 71)), - columns=['Test'], - index=range(30, 71)) - tm.assert_frame_equal(result, expected) - result = df_slice.loc['d'] - expected = pd.DataFrame(list(range(1530, 1571)), - columns=['Test'], - index=range(30, 71)) - tm.assert_frame_equal(result, expected) - class TestSeriesNoneCoercion(tm.TestCase): EXPECTED_RESULTS = [ @@ -5511,22 +3422,3 @@ def test_none_coercion_mixed_dtypes(self): datetime(2000, 1, 3)], 'd': [None, 'b', 'c']}) tm.assert_frame_equal(start_dataframe, exp) - - -class TestTimedeltaIndexing(tm.TestCase): - - def test_boolean_indexing(self): - # GH 14946 - df = pd.DataFrame({'x': range(10)}) - df.index = pd.to_timedelta(range(10), unit='s') - conditions = [df['x'] > 3, df['x'] == 3, df['x'] < 3] - expected_data = [[0, 1, 2, 3, 10, 10, 10, 10, 10, 10], - [0, 1, 2, 10, 4, 5, 6, 7, 8, 9], - [10, 10, 10, 3, 4, 5, 6, 7, 8, 9]] - for cond, data in zip(conditions, expected_data): - result = df.copy() - result.loc[cond, 'x'] = 10 - expected = pd.DataFrame(data, - index=pd.to_timedelta(range(10), unit='s'), - columns=['x']) - tm.assert_frame_equal(expected, result) diff --git a/pandas/tests/indexing/test_multiindex.py b/pandas/tests/indexing/test_multiindex.py new file mode 100644 index 0000000000000..1e6ecbbcdc756 --- /dev/null +++ b/pandas/tests/indexing/test_multiindex.py @@ -0,0 +1,1206 @@ +from warnings import catch_warnings +import pytest +import numpy as np +import pandas as pd +from pandas import (Panel, Series, MultiIndex, DataFrame, + Timestamp, Index, date_range) +from pandas.util import testing as tm +from pandas.core.common import PerformanceWarning, UnsortedIndexError +from pandas.tests.indexing.common import _mklbl + + +class TestMultiIndexBasic(tm.TestCase): + + def test_iloc_getitem_multiindex2(self): + # TODO(wesm): fix this + pytest.skip('this test was being suppressed, ' + 'needs to be fixed') + + arr = np.random.randn(3, 3) + df = DataFrame(arr, columns=[[2, 2, 4], [6, 8, 10]], + index=[[4, 4, 8], [8, 10, 12]]) + + rs = df.iloc[2] + xp = Series(arr[2], index=df.columns) + tm.assert_series_equal(rs, xp) + + rs = df.iloc[:, 2] + xp = Series(arr[:, 2], index=df.index) + tm.assert_series_equal(rs, xp) + + rs = df.iloc[2, 2] + xp = df.values[2, 2] + self.assertEqual(rs, xp) + + # for multiple items + # GH 5528 + rs = df.iloc[[0, 1]] + xp = df.xs(4, drop_level=False) + tm.assert_frame_equal(rs, xp) + + tup = zip(*[['a', 'a', 'b', 'b'], ['x', 'y', 'x', 'y']]) + index = MultiIndex.from_tuples(tup) + df = DataFrame(np.random.randn(4, 4), index=index) + rs = df.iloc[[2, 3]] + xp = df.xs('b', drop_level=False) + tm.assert_frame_equal(rs, xp) + + def test_setitem_multiindex(self): + for index_fn in ('ix', 'loc'): + + def check(target, indexers, value, compare_fn, expected=None): + fn = getattr(target, index_fn) + fn.__setitem__(indexers, value) + result = fn.__getitem__(indexers) + if expected is None: + expected = value + compare_fn(result, expected) + # GH7190 + index = pd.MultiIndex.from_product([np.arange(0, 100), + np.arange(0, 80)], + names=['time', 'firm']) + t, n = 0, 2 + df = DataFrame(np.nan, columns=['A', 'w', 'l', 'a', 'x', + 'X', 'd', 'profit'], + index=index) + check(target=df, indexers=((t, n), 'X'), value=0, + compare_fn=self.assertEqual) + + df = DataFrame(-999, columns=['A', 'w', 'l', 'a', 'x', + 'X', 'd', 'profit'], + index=index) + check(target=df, indexers=((t, n), 'X'), value=1, + compare_fn=self.assertEqual) + + df = DataFrame(columns=['A', 'w', 'l', 'a', 'x', + 'X', 'd', 'profit'], + index=index) + check(target=df, indexers=((t, n), 'X'), value=2, + compare_fn=self.assertEqual) + + # GH 7218, assinging with 0-dim arrays + df = DataFrame(-999, columns=['A', 'w', 'l', 'a', 'x', + 'X', 'd', 'profit'], + index=index) + check(target=df, + indexers=((t, n), 'X'), + value=np.array(3), + compare_fn=self.assertEqual, + expected=3, ) + + # GH5206 + df = pd.DataFrame(np.arange(25).reshape(5, 5), + columns='A,B,C,D,E'.split(','), dtype=float) + df['F'] = 99 + row_selection = df['A'] % 2 == 0 + col_selection = ['B', 'C'] + with catch_warnings(record=True): + df.ix[row_selection, col_selection] = df['F'] + output = pd.DataFrame(99., index=[0, 2, 4], columns=['B', 'C']) + with catch_warnings(record=True): + tm.assert_frame_equal(df.ix[row_selection, col_selection], + output) + check(target=df, + indexers=(row_selection, col_selection), + value=df['F'], + compare_fn=tm.assert_frame_equal, + expected=output, ) + + # GH11372 + idx = pd.MultiIndex.from_product([ + ['A', 'B', 'C'], + pd.date_range('2015-01-01', '2015-04-01', freq='MS')]) + cols = pd.MultiIndex.from_product([ + ['foo', 'bar'], + pd.date_range('2016-01-01', '2016-02-01', freq='MS')]) + + df = pd.DataFrame(np.random.random((12, 4)), + index=idx, columns=cols) + + subidx = pd.MultiIndex.from_tuples( + [('A', pd.Timestamp('2015-01-01')), + ('A', pd.Timestamp('2015-02-01'))]) + subcols = pd.MultiIndex.from_tuples( + [('foo', pd.Timestamp('2016-01-01')), + ('foo', pd.Timestamp('2016-02-01'))]) + + vals = pd.DataFrame(np.random.random((2, 2)), + index=subidx, columns=subcols) + check(target=df, + indexers=(subidx, subcols), + value=vals, + compare_fn=tm.assert_frame_equal, ) + # set all columns + vals = pd.DataFrame( + np.random.random((2, 4)), index=subidx, columns=cols) + check(target=df, + indexers=(subidx, slice(None, None, None)), + value=vals, + compare_fn=tm.assert_frame_equal, ) + # identity + copy = df.copy() + check(target=df, indexers=(df.index, df.columns), value=df, + compare_fn=tm.assert_frame_equal, expected=copy) + + def test_loc_getitem_series(self): + # GH14730 + # passing a series as a key with a MultiIndex + index = MultiIndex.from_product([[1, 2, 3], ['A', 'B', 'C']]) + x = Series(index=index, data=range(9), dtype=np.float64) + y = Series([1, 3]) + expected = Series( + data=[0, 1, 2, 6, 7, 8], + index=MultiIndex.from_product([[1, 3], ['A', 'B', 'C']]), + dtype=np.float64) + result = x.loc[y] + tm.assert_series_equal(result, expected) + + result = x.loc[[1, 3]] + tm.assert_series_equal(result, expected) + + empty = Series(data=[], dtype=np.float64) + expected = Series([], index=MultiIndex( + levels=index.levels, labels=[[], []], dtype=np.float64)) + result = x.loc[empty] + tm.assert_series_equal(result, expected) + + def test_iloc_getitem_multiindex(self): + mi_labels = DataFrame(np.random.randn(4, 3), + columns=[['i', 'i', 'j'], ['A', 'A', 'B']], + index=[['i', 'i', 'j', 'k'], + ['X', 'X', 'Y', 'Y']]) + + mi_int = DataFrame(np.random.randn(3, 3), + columns=[[2, 2, 4], [6, 8, 10]], + index=[[4, 4, 8], [8, 10, 12]]) + + # the first row + rs = mi_int.iloc[0] + with catch_warnings(record=True): + xp = mi_int.ix[4].ix[8] + tm.assert_series_equal(rs, xp, check_names=False) + self.assertEqual(rs.name, (4, 8)) + self.assertEqual(xp.name, 8) + + # 2nd (last) columns + rs = mi_int.iloc[:, 2] + with catch_warnings(record=True): + xp = mi_int.ix[:, 2] + tm.assert_series_equal(rs, xp) + + # corner column + rs = mi_int.iloc[2, 2] + with catch_warnings(record=True): + xp = mi_int.ix[:, 2].ix[2] + self.assertEqual(rs, xp) + + # this is basically regular indexing + rs = mi_labels.iloc[2, 2] + with catch_warnings(record=True): + xp = mi_labels.ix['j'].ix[:, 'j'].ix[0, 0] + self.assertEqual(rs, xp) + + def test_loc_multiindex(self): + + mi_labels = DataFrame(np.random.randn(3, 3), + columns=[['i', 'i', 'j'], ['A', 'A', 'B']], + index=[['i', 'i', 'j'], ['X', 'X', 'Y']]) + + mi_int = DataFrame(np.random.randn(3, 3), + columns=[[2, 2, 4], [6, 8, 10]], + index=[[4, 4, 8], [8, 10, 12]]) + + # the first row + rs = mi_labels.loc['i'] + with catch_warnings(record=True): + xp = mi_labels.ix['i'] + tm.assert_frame_equal(rs, xp) + + # 2nd (last) columns + rs = mi_labels.loc[:, 'j'] + with catch_warnings(record=True): + xp = mi_labels.ix[:, 'j'] + tm.assert_frame_equal(rs, xp) + + # corner column + rs = mi_labels.loc['j'].loc[:, 'j'] + with catch_warnings(record=True): + xp = mi_labels.ix['j'].ix[:, 'j'] + tm.assert_frame_equal(rs, xp) + + # with a tuple + rs = mi_labels.loc[('i', 'X')] + with catch_warnings(record=True): + xp = mi_labels.ix[('i', 'X')] + tm.assert_frame_equal(rs, xp) + + rs = mi_int.loc[4] + with catch_warnings(record=True): + xp = mi_int.ix[4] + tm.assert_frame_equal(rs, xp) + + def test_loc_multiindex_indexer_none(self): + + # GH6788 + # multi-index indexer is None (meaning take all) + attributes = ['Attribute' + str(i) for i in range(1)] + attribute_values = ['Value' + str(i) for i in range(5)] + + index = MultiIndex.from_product([attributes, attribute_values]) + df = 0.1 * np.random.randn(10, 1 * 5) + 0.5 + df = DataFrame(df, columns=index) + result = df[attributes] + tm.assert_frame_equal(result, df) + + # GH 7349 + # loc with a multi-index seems to be doing fallback + df = DataFrame(np.arange(12).reshape(-1, 1), + index=pd.MultiIndex.from_product([[1, 2, 3, 4], + [1, 2, 3]])) + + expected = df.loc[([1, 2], ), :] + result = df.loc[[1, 2]] + tm.assert_frame_equal(result, expected) + + def test_loc_multiindex_incomplete(self): + + # GH 7399 + # incomplete indexers + s = pd.Series(np.arange(15, dtype='int64'), + MultiIndex.from_product([range(5), ['a', 'b', 'c']])) + expected = s.loc[:, 'a':'c'] + + result = s.loc[0:4, 'a':'c'] + tm.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) + + result = s.loc[:4, 'a':'c'] + tm.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) + + result = s.loc[0:, 'a':'c'] + tm.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) + + # GH 7400 + # multiindexer gettitem with list of indexers skips wrong element + s = pd.Series(np.arange(15, dtype='int64'), + MultiIndex.from_product([range(5), ['a', 'b', 'c']])) + expected = s.iloc[[6, 7, 8, 12, 13, 14]] + result = s.loc[2:4:2, 'a':'c'] + tm.assert_series_equal(result, expected) + + def test_multiindex_perf_warn(self): + + df = DataFrame({'jim': [0, 0, 1, 1], + 'joe': ['x', 'x', 'z', 'y'], + 'jolie': np.random.rand(4)}).set_index(['jim', 'joe']) + + with tm.assert_produces_warning(PerformanceWarning, + clear=[pd.core.index]): + df.loc[(1, 'z')] + + df = df.iloc[[2, 1, 3, 0]] + with tm.assert_produces_warning(PerformanceWarning): + df.loc[(0, )] + + def test_series_getitem_multiindex(self): + + # GH 6018 + # series regression getitem with a multi-index + + s = Series([1, 2, 3]) + s.index = MultiIndex.from_tuples([(0, 0), (1, 1), (2, 1)]) + + result = s[:, 0] + expected = Series([1], index=[0]) + tm.assert_series_equal(result, expected) + + result = s.loc[:, 1] + expected = Series([2, 3], index=[1, 2]) + tm.assert_series_equal(result, expected) + + # xs + result = s.xs(0, level=0) + expected = Series([1], index=[0]) + tm.assert_series_equal(result, expected) + + result = s.xs(1, level=1) + expected = Series([2, 3], index=[1, 2]) + tm.assert_series_equal(result, expected) + + # GH6258 + dt = list(date_range('20130903', periods=3)) + idx = MultiIndex.from_product([list('AB'), dt]) + s = Series([1, 3, 4, 1, 3, 4], index=idx) + + result = s.xs('20130903', level=1) + expected = Series([1, 1], index=list('AB')) + tm.assert_series_equal(result, expected) + + # GH5684 + idx = MultiIndex.from_tuples([('a', 'one'), ('a', 'two'), ('b', 'one'), + ('b', 'two')]) + s = Series([1, 2, 3, 4], index=idx) + s.index.set_names(['L1', 'L2'], inplace=True) + result = s.xs('one', level='L2') + expected = Series([1, 3], index=['a', 'b']) + expected.index.set_names(['L1'], inplace=True) + tm.assert_series_equal(result, expected) + + def test_xs_multiindex(self): + + # GH2903 + columns = MultiIndex.from_tuples( + [('a', 'foo'), ('a', 'bar'), ('b', 'hello'), + ('b', 'world')], names=['lvl0', 'lvl1']) + df = DataFrame(np.random.randn(4, 4), columns=columns) + df.sort_index(axis=1, inplace=True) + result = df.xs('a', level='lvl0', axis=1) + expected = df.iloc[:, 0:2].loc[:, 'a'] + tm.assert_frame_equal(result, expected) + + result = df.xs('foo', level='lvl1', axis=1) + expected = df.iloc[:, 1:2].copy() + expected.columns = expected.columns.droplevel('lvl1') + tm.assert_frame_equal(result, expected) + + def test_multiindex_setitem(self): + + # GH 3738 + # setting with a multi-index right hand side + arrays = [np.array(['bar', 'bar', 'baz', 'qux', 'qux', 'bar']), + np.array(['one', 'two', 'one', 'one', 'two', 'one']), + np.arange(0, 6, 1)] + + df_orig = pd.DataFrame(np.random.randn(6, 3), + index=arrays, + columns=['A', 'B', 'C']).sort_index() + + expected = df_orig.loc[['bar']] * 2 + df = df_orig.copy() + df.loc[['bar']] *= 2 + tm.assert_frame_equal(df.loc[['bar']], expected) + + # raise because these have differing levels + def f(): + df.loc['bar'] *= 2 + + self.assertRaises(TypeError, f) + + # from SO + # http://stackoverflow.com/questions/24572040/pandas-access-the-level-of-multiindex-for-inplace-operation + df_orig = DataFrame.from_dict({'price': { + ('DE', 'Coal', 'Stock'): 2, + ('DE', 'Gas', 'Stock'): 4, + ('DE', 'Elec', 'Demand'): 1, + ('FR', 'Gas', 'Stock'): 5, + ('FR', 'Solar', 'SupIm'): 0, + ('FR', 'Wind', 'SupIm'): 0 + }}) + df_orig.index = MultiIndex.from_tuples(df_orig.index, + names=['Sit', 'Com', 'Type']) + + expected = df_orig.copy() + expected.iloc[[0, 2, 3]] *= 2 + + idx = pd.IndexSlice + df = df_orig.copy() + df.loc[idx[:, :, 'Stock'], :] *= 2 + tm.assert_frame_equal(df, expected) + + df = df_orig.copy() + df.loc[idx[:, :, 'Stock'], 'price'] *= 2 + tm.assert_frame_equal(df, expected) + + def test_getitem_multiindex(self): + # GH 5725 the 'A' happens to be a valid Timestamp so the doesn't raise + # the appropriate error, only in PY3 of course! + index = MultiIndex(levels=[['D', 'B', 'C'], + [0, 26, 27, 37, 57, 67, 75, 82]], + labels=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], + [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]], + names=['tag', 'day']) + arr = np.random.randn(len(index), 1) + df = DataFrame(arr, index=index, columns=['val']) + result = df.val['D'] + expected = Series(arr.ravel()[0:3], name='val', index=Index( + [26, 37, 57], name='day')) + tm.assert_series_equal(result, expected) + + def f(): + df.val['A'] + + self.assertRaises(KeyError, f) + + def f(): + df.val['X'] + + self.assertRaises(KeyError, f) + + # A is treated as a special Timestamp + index = MultiIndex(levels=[['A', 'B', 'C'], + [0, 26, 27, 37, 57, 67, 75, 82]], + labels=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], + [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]], + names=['tag', 'day']) + df = DataFrame(arr, index=index, columns=['val']) + result = df.val['A'] + expected = Series(arr.ravel()[0:3], name='val', index=Index( + [26, 37, 57], name='day')) + tm.assert_series_equal(result, expected) + + def f(): + df.val['X'] + + self.assertRaises(KeyError, f) + + # GH 7866 + # multi-index slicing with missing indexers + idx = pd.MultiIndex.from_product([['A', 'B', 'C'], + ['foo', 'bar', 'baz']], + names=['one', 'two']) + s = pd.Series(np.arange(9, dtype='int64'), index=idx).sort_index() + + exp_idx = pd.MultiIndex.from_product([['A'], ['foo', 'bar', 'baz']], + names=['one', 'two']) + expected = pd.Series(np.arange(3, dtype='int64'), + index=exp_idx).sort_index() + + result = s.loc[['A']] + tm.assert_series_equal(result, expected) + result = s.loc[['A', 'D']] + tm.assert_series_equal(result, expected) + + # not any values found + self.assertRaises(KeyError, lambda: s.loc[['D']]) + + # empty ok + result = s.loc[[]] + expected = s.iloc[[]] + tm.assert_series_equal(result, expected) + + idx = pd.IndexSlice + expected = pd.Series([0, 3, 6], index=pd.MultiIndex.from_product( + [['A', 'B', 'C'], ['foo']], names=['one', 'two'])).sort_index() + + result = s.loc[idx[:, ['foo']]] + tm.assert_series_equal(result, expected) + result = s.loc[idx[:, ['foo', 'bah']]] + tm.assert_series_equal(result, expected) + + # GH 8737 + # empty indexer + multi_index = pd.MultiIndex.from_product((['foo', 'bar', 'baz'], + ['alpha', 'beta'])) + df = DataFrame( + np.random.randn(5, 6), index=range(5), columns=multi_index) + df = df.sort_index(level=0, axis=1) + + expected = DataFrame(index=range(5), + columns=multi_index.reindex([])[0]) + result1 = df.loc[:, ([], slice(None))] + result2 = df.loc[:, (['foo'], [])] + tm.assert_frame_equal(result1, expected) + tm.assert_frame_equal(result2, expected) + + # regression from < 0.14.0 + # GH 7914 + df = DataFrame([[np.mean, np.median], ['mean', 'median']], + columns=MultiIndex.from_tuples([('functs', 'mean'), + ('functs', 'median')]), + index=['function', 'name']) + result = df.loc['function', ('functs', 'mean')] + self.assertEqual(result, np.mean) + + def test_multiindex_assignment(self): + + # GH3777 part 2 + + # mixed dtype + df = DataFrame(np.random.randint(5, 10, size=9).reshape(3, 3), + columns=list('abc'), + index=[[4, 4, 8], [8, 10, 12]]) + df['d'] = np.nan + arr = np.array([0., 1.]) + + df.ix[4, 'd'] = arr + tm.assert_series_equal(df.ix[4, 'd'], + Series(arr, index=[8, 10], name='d')) + + # single dtype + df = DataFrame(np.random.randint(5, 10, size=9).reshape(3, 3), + columns=list('abc'), + index=[[4, 4, 8], [8, 10, 12]]) + + df.ix[4, 'c'] = arr + exp = Series(arr, index=[8, 10], name='c', dtype='float64') + tm.assert_series_equal(df.ix[4, 'c'], exp) + + # scalar ok + df.ix[4, 'c'] = 10 + exp = Series(10, index=[8, 10], name='c', dtype='float64') + tm.assert_series_equal(df.ix[4, 'c'], exp) + + # invalid assignments + def f(): + df.ix[4, 'c'] = [0, 1, 2, 3] + + self.assertRaises(ValueError, f) + + def f(): + df.ix[4, 'c'] = [0] + + self.assertRaises(ValueError, f) + + # groupby example + NUM_ROWS = 100 + NUM_COLS = 10 + col_names = ['A' + num for num in + map(str, np.arange(NUM_COLS).tolist())] + index_cols = col_names[:5] + + df = DataFrame(np.random.randint(5, size=(NUM_ROWS, NUM_COLS)), + dtype=np.int64, columns=col_names) + df = df.set_index(index_cols).sort_index() + grp = df.groupby(level=index_cols[:4]) + df['new_col'] = np.nan + + f_index = np.arange(5) + + def f(name, df2): + return Series(np.arange(df2.shape[0]), + name=df2.index.values[0]).reindex(f_index) + + # TODO(wesm): unused? + # new_df = pd.concat([f(name, df2) for name, df2 in grp], axis=1).T + + # we are actually operating on a copy here + # but in this case, that's ok + for name, df2 in grp: + new_vals = np.arange(df2.shape[0]) + df.ix[name, 'new_col'] = new_vals + + def test_multiindex_label_slicing_with_negative_step(self): + s = Series(np.arange(20), + MultiIndex.from_product([list('abcde'), np.arange(4)])) + SLC = pd.IndexSlice + + def assert_slices_equivalent(l_slc, i_slc): + tm.assert_series_equal(s.loc[l_slc], s.iloc[i_slc]) + tm.assert_series_equal(s[l_slc], s.iloc[i_slc]) + tm.assert_series_equal(s.ix[l_slc], s.iloc[i_slc]) + + assert_slices_equivalent(SLC[::-1], SLC[::-1]) + + assert_slices_equivalent(SLC['d'::-1], SLC[15::-1]) + assert_slices_equivalent(SLC[('d', )::-1], SLC[15::-1]) + + assert_slices_equivalent(SLC[:'d':-1], SLC[:11:-1]) + assert_slices_equivalent(SLC[:('d', ):-1], SLC[:11:-1]) + + assert_slices_equivalent(SLC['d':'b':-1], SLC[15:3:-1]) + assert_slices_equivalent(SLC[('d', ):'b':-1], SLC[15:3:-1]) + assert_slices_equivalent(SLC['d':('b', ):-1], SLC[15:3:-1]) + assert_slices_equivalent(SLC[('d', ):('b', ):-1], SLC[15:3:-1]) + assert_slices_equivalent(SLC['b':'d':-1], SLC[:0]) + + assert_slices_equivalent(SLC[('c', 2)::-1], SLC[10::-1]) + assert_slices_equivalent(SLC[:('c', 2):-1], SLC[:9:-1]) + assert_slices_equivalent(SLC[('e', 0):('c', 2):-1], SLC[16:9:-1]) + + def test_multiindex_slice_first_level(self): + # GH 12697 + freq = ['a', 'b', 'c', 'd'] + idx = pd.MultiIndex.from_product([freq, np.arange(500)]) + df = pd.DataFrame(list(range(2000)), index=idx, columns=['Test']) + df_slice = df.loc[pd.IndexSlice[:, 30:70], :] + result = df_slice.loc['a'] + expected = pd.DataFrame(list(range(30, 71)), + columns=['Test'], + index=range(30, 71)) + tm.assert_frame_equal(result, expected) + result = df_slice.loc['d'] + expected = pd.DataFrame(list(range(1530, 1571)), + columns=['Test'], + index=range(30, 71)) + tm.assert_frame_equal(result, expected) + + +class TestMultiIndexSlicers(tm.TestCase): + + def test_per_axis_per_level_getitem(self): + + # GH6134 + # example test case + ix = MultiIndex.from_product([_mklbl('A', 5), _mklbl('B', 7), _mklbl( + 'C', 4), _mklbl('D', 2)]) + df = DataFrame(np.arange(len(ix.get_values())), index=ix) + + result = df.loc[(slice('A1', 'A3'), slice(None), ['C1', 'C3']), :] + expected = df.loc[[tuple([a, b, c, d]) + for a, b, c, d in df.index.values + if (a == 'A1' or a == 'A2' or a == 'A3') and ( + c == 'C1' or c == 'C3')]] + tm.assert_frame_equal(result, expected) + + expected = df.loc[[tuple([a, b, c, d]) + for a, b, c, d in df.index.values + if (a == 'A1' or a == 'A2' or a == 'A3') and ( + c == 'C1' or c == 'C2' or c == 'C3')]] + result = df.loc[(slice('A1', 'A3'), slice(None), slice('C1', 'C3')), :] + tm.assert_frame_equal(result, expected) + + # test multi-index slicing with per axis and per index controls + index = MultiIndex.from_tuples([('A', 1), ('A', 2), + ('A', 3), ('B', 1)], + names=['one', 'two']) + columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'), + ('b', 'foo'), ('b', 'bah')], + names=['lvl0', 'lvl1']) + + df = DataFrame( + np.arange(16, dtype='int64').reshape( + 4, 4), index=index, columns=columns) + df = df.sort_index(axis=0).sort_index(axis=1) + + # identity + result = df.loc[(slice(None), slice(None)), :] + tm.assert_frame_equal(result, df) + result = df.loc[(slice(None), slice(None)), (slice(None), slice(None))] + tm.assert_frame_equal(result, df) + result = df.loc[:, (slice(None), slice(None))] + tm.assert_frame_equal(result, df) + + # index + result = df.loc[(slice(None), [1]), :] + expected = df.iloc[[0, 3]] + tm.assert_frame_equal(result, expected) + + result = df.loc[(slice(None), 1), :] + expected = df.iloc[[0, 3]] + tm.assert_frame_equal(result, expected) + + # columns + result = df.loc[:, (slice(None), ['foo'])] + expected = df.iloc[:, [1, 3]] + tm.assert_frame_equal(result, expected) + + # both + result = df.loc[(slice(None), 1), (slice(None), ['foo'])] + expected = df.iloc[[0, 3], [1, 3]] + tm.assert_frame_equal(result, expected) + + result = df.loc['A', 'a'] + expected = DataFrame(dict(bar=[1, 5, 9], foo=[0, 4, 8]), + index=Index([1, 2, 3], name='two'), + columns=Index(['bar', 'foo'], name='lvl1')) + tm.assert_frame_equal(result, expected) + + result = df.loc[(slice(None), [1, 2]), :] + expected = df.iloc[[0, 1, 3]] + tm.assert_frame_equal(result, expected) + + # multi-level series + s = Series(np.arange(len(ix.get_values())), index=ix) + result = s.loc['A1':'A3', :, ['C1', 'C3']] + expected = s.loc[[tuple([a, b, c, d]) + for a, b, c, d in s.index.values + if (a == 'A1' or a == 'A2' or a == 'A3') and ( + c == 'C1' or c == 'C3')]] + tm.assert_series_equal(result, expected) + + # boolean indexers + result = df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :] + expected = df.iloc[[2, 3]] + tm.assert_frame_equal(result, expected) + + def f(): + df.loc[(slice(None), np.array([True, False])), :] + + self.assertRaises(ValueError, f) + + # ambiguous cases + # these can be multiply interpreted (e.g. in this case + # as df.loc[slice(None),[1]] as well + self.assertRaises(KeyError, lambda: df.loc[slice(None), [1]]) + + result = df.loc[(slice(None), [1]), :] + expected = df.iloc[[0, 3]] + tm.assert_frame_equal(result, expected) + + # not lexsorted + self.assertEqual(df.index.lexsort_depth, 2) + df = df.sort_index(level=1, axis=0) + self.assertEqual(df.index.lexsort_depth, 0) + with tm.assertRaisesRegexp( + UnsortedIndexError, + 'MultiIndex Slicing requires the index to be fully ' + r'lexsorted tuple len \(2\), lexsort depth \(0\)'): + df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :] + + def test_multiindex_slicers_non_unique(self): + + # GH 7106 + # non-unique mi index support + df = (DataFrame(dict(A=['foo', 'foo', 'foo', 'foo'], + B=['a', 'a', 'a', 'a'], + C=[1, 2, 1, 3], + D=[1, 2, 3, 4])) + .set_index(['A', 'B', 'C']).sort_index()) + self.assertFalse(df.index.is_unique) + expected = (DataFrame(dict(A=['foo', 'foo'], B=['a', 'a'], + C=[1, 1], D=[1, 3])) + .set_index(['A', 'B', 'C']).sort_index()) + result = df.loc[(slice(None), slice(None), 1), :] + tm.assert_frame_equal(result, expected) + + # this is equivalent of an xs expression + result = df.xs(1, level=2, drop_level=False) + tm.assert_frame_equal(result, expected) + + df = (DataFrame(dict(A=['foo', 'foo', 'foo', 'foo'], + B=['a', 'a', 'a', 'a'], + C=[1, 2, 1, 2], + D=[1, 2, 3, 4])) + .set_index(['A', 'B', 'C']).sort_index()) + self.assertFalse(df.index.is_unique) + expected = (DataFrame(dict(A=['foo', 'foo'], B=['a', 'a'], + C=[1, 1], D=[1, 3])) + .set_index(['A', 'B', 'C']).sort_index()) + result = df.loc[(slice(None), slice(None), 1), :] + self.assertFalse(result.index.is_unique) + tm.assert_frame_equal(result, expected) + + # GH12896 + # numpy-implementation dependent bug + ints = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 12, 13, 14, 14, 16, + 17, 18, 19, 200000, 200000] + n = len(ints) + idx = MultiIndex.from_arrays([['a'] * n, ints]) + result = Series([1] * n, index=idx) + result = result.sort_index() + result = result.loc[(slice(None), slice(100000))] + expected = Series([1] * (n - 2), index=idx[:-2]).sort_index() + tm.assert_series_equal(result, expected) + + def test_multiindex_slicers_datetimelike(self): + + # GH 7429 + # buggy/inconsistent behavior when slicing with datetime-like + import datetime + dates = [datetime.datetime(2012, 1, 1, 12, 12, 12) + + datetime.timedelta(days=i) for i in range(6)] + freq = [1, 2] + index = MultiIndex.from_product( + [dates, freq], names=['date', 'frequency']) + + df = DataFrame( + np.arange(6 * 2 * 4, dtype='int64').reshape( + -1, 4), index=index, columns=list('ABCD')) + + # multi-axis slicing + idx = pd.IndexSlice + expected = df.iloc[[0, 2, 4], [0, 1]] + result = df.loc[(slice(Timestamp('2012-01-01 12:12:12'), + Timestamp('2012-01-03 12:12:12')), + slice(1, 1)), slice('A', 'B')] + tm.assert_frame_equal(result, expected) + + result = df.loc[(idx[Timestamp('2012-01-01 12:12:12'):Timestamp( + '2012-01-03 12:12:12')], idx[1:1]), slice('A', 'B')] + tm.assert_frame_equal(result, expected) + + result = df.loc[(slice(Timestamp('2012-01-01 12:12:12'), + Timestamp('2012-01-03 12:12:12')), 1), + slice('A', 'B')] + tm.assert_frame_equal(result, expected) + + # with strings + result = df.loc[(slice('2012-01-01 12:12:12', '2012-01-03 12:12:12'), + slice(1, 1)), slice('A', 'B')] + tm.assert_frame_equal(result, expected) + + result = df.loc[(idx['2012-01-01 12:12:12':'2012-01-03 12:12:12'], 1), + idx['A', 'B']] + tm.assert_frame_equal(result, expected) + + def test_multiindex_slicers_edges(self): + # GH 8132 + # various edge cases + df = DataFrame( + {'A': ['A0'] * 5 + ['A1'] * 5 + ['A2'] * 5, + 'B': ['B0', 'B0', 'B1', 'B1', 'B2'] * 3, + 'DATE': ["2013-06-11", "2013-07-02", "2013-07-09", "2013-07-30", + "2013-08-06", "2013-06-11", "2013-07-02", "2013-07-09", + "2013-07-30", "2013-08-06", "2013-09-03", "2013-10-01", + "2013-07-09", "2013-08-06", "2013-09-03"], + 'VALUES': [22, 35, 14, 9, 4, 40, 18, 4, 2, 5, 1, 2, 3, 4, 2]}) + + df['DATE'] = pd.to_datetime(df['DATE']) + df1 = df.set_index(['A', 'B', 'DATE']) + df1 = df1.sort_index() + + # A1 - Get all values under "A0" and "A1" + result = df1.loc[(slice('A1')), :] + expected = df1.iloc[0:10] + tm.assert_frame_equal(result, expected) + + # A2 - Get all values from the start to "A2" + result = df1.loc[(slice('A2')), :] + expected = df1 + tm.assert_frame_equal(result, expected) + + # A3 - Get all values under "B1" or "B2" + result = df1.loc[(slice(None), slice('B1', 'B2')), :] + expected = df1.iloc[[2, 3, 4, 7, 8, 9, 12, 13, 14]] + tm.assert_frame_equal(result, expected) + + # A4 - Get all values between 2013-07-02 and 2013-07-09 + result = df1.loc[(slice(None), slice(None), + slice('20130702', '20130709')), :] + expected = df1.iloc[[1, 2, 6, 7, 12]] + tm.assert_frame_equal(result, expected) + + # B1 - Get all values in B0 that are also under A0, A1 and A2 + result = df1.loc[(slice('A2'), slice('B0')), :] + expected = df1.iloc[[0, 1, 5, 6, 10, 11]] + tm.assert_frame_equal(result, expected) + + # B2 - Get all values in B0, B1 and B2 (similar to what #2 is doing for + # the As) + result = df1.loc[(slice(None), slice('B2')), :] + expected = df1 + tm.assert_frame_equal(result, expected) + + # B3 - Get all values from B1 to B2 and up to 2013-08-06 + result = df1.loc[(slice(None), slice('B1', 'B2'), + slice('2013-08-06')), :] + expected = df1.iloc[[2, 3, 4, 7, 8, 9, 12, 13]] + tm.assert_frame_equal(result, expected) + + # B4 - Same as A4 but the start of the date slice is not a key. + # shows indexing on a partial selection slice + result = df1.loc[(slice(None), slice(None), + slice('20130701', '20130709')), :] + expected = df1.iloc[[1, 2, 6, 7, 12]] + tm.assert_frame_equal(result, expected) + + def test_per_axis_per_level_doc_examples(self): + + # test index maker + idx = pd.IndexSlice + + # from indexing.rst / advanced + index = MultiIndex.from_product([_mklbl('A', 4), _mklbl('B', 2), + _mklbl('C', 4), _mklbl('D', 2)]) + columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'), + ('b', 'foo'), ('b', 'bah')], + names=['lvl0', 'lvl1']) + df = DataFrame(np.arange(len(index) * len(columns), dtype='int64') + .reshape((len(index), len(columns))), + index=index, columns=columns) + result = df.loc[(slice('A1', 'A3'), slice(None), ['C1', 'C3']), :] + expected = df.loc[[tuple([a, b, c, d]) + for a, b, c, d in df.index.values + if (a == 'A1' or a == 'A2' or a == 'A3') and ( + c == 'C1' or c == 'C3')]] + tm.assert_frame_equal(result, expected) + result = df.loc[idx['A1':'A3', :, ['C1', 'C3']], :] + tm.assert_frame_equal(result, expected) + + result = df.loc[(slice(None), slice(None), ['C1', 'C3']), :] + expected = df.loc[[tuple([a, b, c, d]) + for a, b, c, d in df.index.values + if (c == 'C1' or c == 'C3')]] + tm.assert_frame_equal(result, expected) + result = df.loc[idx[:, :, ['C1', 'C3']], :] + tm.assert_frame_equal(result, expected) + + # not sorted + def f(): + df.loc['A1', (slice(None), 'foo')] + + self.assertRaises(UnsortedIndexError, f) + df = df.sort_index(axis=1) + + # slicing + df.loc['A1', (slice(None), 'foo')] + df.loc[(slice(None), slice(None), ['C1', 'C3']), (slice(None), 'foo')] + + # setitem + df.loc(axis=0)[:, :, ['C1', 'C3']] = -10 + + def test_loc_axis_arguments(self): + + index = MultiIndex.from_product([_mklbl('A', 4), _mklbl('B', 2), + _mklbl('C', 4), _mklbl('D', 2)]) + columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'), + ('b', 'foo'), ('b', 'bah')], + names=['lvl0', 'lvl1']) + df = DataFrame(np.arange(len(index) * len(columns), dtype='int64') + .reshape((len(index), len(columns))), + index=index, + columns=columns).sort_index().sort_index(axis=1) + + # axis 0 + result = df.loc(axis=0)['A1':'A3', :, ['C1', 'C3']] + expected = df.loc[[tuple([a, b, c, d]) + for a, b, c, d in df.index.values + if (a == 'A1' or a == 'A2' or a == 'A3') and ( + c == 'C1' or c == 'C3')]] + tm.assert_frame_equal(result, expected) + + result = df.loc(axis='index')[:, :, ['C1', 'C3']] + expected = df.loc[[tuple([a, b, c, d]) + for a, b, c, d in df.index.values + if (c == 'C1' or c == 'C3')]] + tm.assert_frame_equal(result, expected) + + # axis 1 + result = df.loc(axis=1)[:, 'foo'] + expected = df.loc[:, (slice(None), 'foo')] + tm.assert_frame_equal(result, expected) + + result = df.loc(axis='columns')[:, 'foo'] + expected = df.loc[:, (slice(None), 'foo')] + tm.assert_frame_equal(result, expected) + + # invalid axis + def f(): + df.loc(axis=-1)[:, :, ['C1', 'C3']] + + self.assertRaises(ValueError, f) + + def f(): + df.loc(axis=2)[:, :, ['C1', 'C3']] + + self.assertRaises(ValueError, f) + + def f(): + df.loc(axis='foo')[:, :, ['C1', 'C3']] + + self.assertRaises(ValueError, f) + + def test_per_axis_per_level_setitem(self): + + # test index maker + idx = pd.IndexSlice + + # test multi-index slicing with per axis and per index controls + index = MultiIndex.from_tuples([('A', 1), ('A', 2), + ('A', 3), ('B', 1)], + names=['one', 'two']) + columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'), + ('b', 'foo'), ('b', 'bah')], + names=['lvl0', 'lvl1']) + + df_orig = DataFrame( + np.arange(16, dtype='int64').reshape( + 4, 4), index=index, columns=columns) + df_orig = df_orig.sort_index(axis=0).sort_index(axis=1) + + # identity + df = df_orig.copy() + df.loc[(slice(None), slice(None)), :] = 100 + expected = df_orig.copy() + expected.iloc[:, :] = 100 + tm.assert_frame_equal(df, expected) + + df = df_orig.copy() + df.loc(axis=0)[:, :] = 100 + expected = df_orig.copy() + expected.iloc[:, :] = 100 + tm.assert_frame_equal(df, expected) + + df = df_orig.copy() + df.loc[(slice(None), slice(None)), (slice(None), slice(None))] = 100 + expected = df_orig.copy() + expected.iloc[:, :] = 100 + tm.assert_frame_equal(df, expected) + + df = df_orig.copy() + df.loc[:, (slice(None), slice(None))] = 100 + expected = df_orig.copy() + expected.iloc[:, :] = 100 + tm.assert_frame_equal(df, expected) + + # index + df = df_orig.copy() + df.loc[(slice(None), [1]), :] = 100 + expected = df_orig.copy() + expected.iloc[[0, 3]] = 100 + tm.assert_frame_equal(df, expected) + + df = df_orig.copy() + df.loc[(slice(None), 1), :] = 100 + expected = df_orig.copy() + expected.iloc[[0, 3]] = 100 + tm.assert_frame_equal(df, expected) + + df = df_orig.copy() + df.loc(axis=0)[:, 1] = 100 + expected = df_orig.copy() + expected.iloc[[0, 3]] = 100 + tm.assert_frame_equal(df, expected) + + # columns + df = df_orig.copy() + df.loc[:, (slice(None), ['foo'])] = 100 + expected = df_orig.copy() + expected.iloc[:, [1, 3]] = 100 + tm.assert_frame_equal(df, expected) + + # both + df = df_orig.copy() + df.loc[(slice(None), 1), (slice(None), ['foo'])] = 100 + expected = df_orig.copy() + expected.iloc[[0, 3], [1, 3]] = 100 + tm.assert_frame_equal(df, expected) + + df = df_orig.copy() + df.loc[idx[:, 1], idx[:, ['foo']]] = 100 + expected = df_orig.copy() + expected.iloc[[0, 3], [1, 3]] = 100 + tm.assert_frame_equal(df, expected) + + df = df_orig.copy() + df.loc['A', 'a'] = 100 + expected = df_orig.copy() + expected.iloc[0:3, 0:2] = 100 + tm.assert_frame_equal(df, expected) + + # setting with a list-like + df = df_orig.copy() + df.loc[(slice(None), 1), (slice(None), ['foo'])] = np.array( + [[100, 100], [100, 100]], dtype='int64') + expected = df_orig.copy() + expected.iloc[[0, 3], [1, 3]] = 100 + tm.assert_frame_equal(df, expected) + + # not enough values + df = df_orig.copy() + + def f(): + df.loc[(slice(None), 1), (slice(None), ['foo'])] = np.array( + [[100], [100, 100]], dtype='int64') + + self.assertRaises(ValueError, f) + + def f(): + df.loc[(slice(None), 1), (slice(None), ['foo'])] = np.array( + [100, 100, 100, 100], dtype='int64') + + self.assertRaises(ValueError, f) + + # with an alignable rhs + df = df_orig.copy() + df.loc[(slice(None), 1), (slice(None), ['foo'])] = df.loc[(slice( + None), 1), (slice(None), ['foo'])] * 5 + expected = df_orig.copy() + expected.iloc[[0, 3], [1, 3]] = expected.iloc[[0, 3], [1, 3]] * 5 + tm.assert_frame_equal(df, expected) + + df = df_orig.copy() + df.loc[(slice(None), 1), (slice(None), ['foo'])] *= df.loc[(slice( + None), 1), (slice(None), ['foo'])] + expected = df_orig.copy() + expected.iloc[[0, 3], [1, 3]] *= expected.iloc[[0, 3], [1, 3]] + tm.assert_frame_equal(df, expected) + + rhs = df_orig.loc[(slice(None), 1), (slice(None), ['foo'])].copy() + rhs.loc[:, ('c', 'bah')] = 10 + df = df_orig.copy() + df.loc[(slice(None), 1), (slice(None), ['foo'])] *= rhs + expected = df_orig.copy() + expected.iloc[[0, 3], [1, 3]] *= expected.iloc[[0, 3], [1, 3]] + tm.assert_frame_equal(df, expected) + + +class TestMultiIndexPanel(tm.TestCase): + + def test_iloc_getitem_panel_multiindex(self): + # GH 7199 + # Panel with multi-index + multi_index = pd.MultiIndex.from_tuples([('ONE', 'one'), + ('TWO', 'two'), + ('THREE', 'three')], + names=['UPPER', 'lower']) + + simple_index = [x[0] for x in multi_index] + wd1 = Panel(items=['First', 'Second'], major_axis=['a', 'b', 'c', 'd'], + minor_axis=multi_index) + + wd2 = Panel(items=['First', 'Second'], major_axis=['a', 'b', 'c', 'd'], + minor_axis=simple_index) + + expected1 = wd1['First'].iloc[[True, True, True, False], [0, 2]] + result1 = wd1.iloc[0, [True, True, True, False], [0, 2]] # WRONG + tm.assert_frame_equal(result1, expected1) + + expected2 = wd2['First'].iloc[[True, True, True, False], [0, 2]] + result2 = wd2.iloc[0, [True, True, True, False], [0, 2]] + tm.assert_frame_equal(result2, expected2) + + expected1 = DataFrame(index=['a'], columns=multi_index, + dtype='float64') + result1 = wd1.iloc[0, [0], [0, 1, 2]] + tm.assert_frame_equal(result1, expected1) + + expected2 = DataFrame(index=['a'], columns=simple_index, + dtype='float64') + result2 = wd2.iloc[0, [0], [0, 1, 2]] + tm.assert_frame_equal(result2, expected2) + + # GH 7516 + mi = MultiIndex.from_tuples([(0, 'x'), (1, 'y'), (2, 'z')]) + p = Panel(np.arange(3 * 3 * 3, dtype='int64').reshape(3, 3, 3), + items=['a', 'b', 'c'], major_axis=mi, + minor_axis=['u', 'v', 'w']) + result = p.iloc[:, 1, 0] + expected = Series([3, 12, 21], index=['a', 'b', 'c'], name='u') + tm.assert_series_equal(result, expected) + + result = p.loc[:, (1, 'y'), 'u'] + tm.assert_series_equal(result, expected) + + def test_panel_setitem_with_multiindex(self): + + # 10360 + # failing with a multi-index + arr = np.array([[[1, 2, 3], [0, 0, 0]], [[0, 0, 0], [0, 0, 0]]], + dtype=np.float64) + + # reg index + axes = dict(items=['A', 'B'], major_axis=[0, 1], + minor_axis=['X', 'Y', 'Z']) + p1 = Panel(0., **axes) + p1.iloc[0, 0, :] = [1, 2, 3] + expected = Panel(arr, **axes) + tm.assert_panel_equal(p1, expected) + + # multi-indexes + axes['items'] = pd.MultiIndex.from_tuples([('A', 'a'), ('B', 'b')]) + p2 = Panel(0., **axes) + p2.iloc[0, 0, :] = [1, 2, 3] + expected = Panel(arr, **axes) + tm.assert_panel_equal(p2, expected) + + axes['major_axis'] = pd.MultiIndex.from_tuples([('A', 1), ('A', 2)]) + p3 = Panel(0., **axes) + p3.iloc[0, 0, :] = [1, 2, 3] + expected = Panel(arr, **axes) + tm.assert_panel_equal(p3, expected) + + axes['minor_axis'] = pd.MultiIndex.from_product([['X'], range(3)]) + p4 = Panel(0., **axes) + p4.iloc[0, 0, :] = [1, 2, 3] + expected = Panel(arr, **axes) + tm.assert_panel_equal(p4, expected) + + arr = np.array( + [[[1, 0, 0], [2, 0, 0]], [[0, 0, 0], [0, 0, 0]]], dtype=np.float64) + p5 = Panel(0., **axes) + p5.iloc[0, :, 0] = [1, 2] + expected = Panel(arr, **axes) + tm.assert_panel_equal(p5, expected) diff --git a/pandas/tests/indexing/test_panel.py b/pandas/tests/indexing/test_panel.py new file mode 100644 index 0000000000000..5ec3076af599a --- /dev/null +++ b/pandas/tests/indexing/test_panel.py @@ -0,0 +1,209 @@ +import numpy as np +from pandas.util import testing as tm +from pandas import Panel, date_range, DataFrame + + +class TestPanel(tm.TestCase): + + def test_iloc_getitem_panel(self): + + # GH 7189 + p = Panel(np.arange(4 * 3 * 2).reshape(4, 3, 2), + items=['A', 'B', 'C', 'D'], + major_axis=['a', 'b', 'c'], + minor_axis=['one', 'two']) + + result = p.iloc[1] + expected = p.loc['B'] + tm.assert_frame_equal(result, expected) + + result = p.iloc[1, 1] + expected = p.loc['B', 'b'] + tm.assert_series_equal(result, expected) + + result = p.iloc[1, 1, 1] + expected = p.loc['B', 'b', 'two'] + self.assertEqual(result, expected) + + # slice + result = p.iloc[1:3] + expected = p.loc[['B', 'C']] + tm.assert_panel_equal(result, expected) + + result = p.iloc[:, 0:2] + expected = p.loc[:, ['a', 'b']] + tm.assert_panel_equal(result, expected) + + # list of integers + result = p.iloc[[0, 2]] + expected = p.loc[['A', 'C']] + tm.assert_panel_equal(result, expected) + + # neg indicies + result = p.iloc[[-1, 1], [-1, 1]] + expected = p.loc[['D', 'B'], ['c', 'b']] + tm.assert_panel_equal(result, expected) + + # dups indicies + result = p.iloc[[-1, -1, 1], [-1, 1]] + expected = p.loc[['D', 'D', 'B'], ['c', 'b']] + tm.assert_panel_equal(result, expected) + + # combined + result = p.iloc[0, [True, True], [0, 1]] + expected = p.loc['A', ['a', 'b'], ['one', 'two']] + tm.assert_frame_equal(result, expected) + + # out-of-bounds exception + self.assertRaises(IndexError, p.iloc.__getitem__, tuple([10, 5])) + + def f(): + p.iloc[0, [True, True], [0, 1, 2]] + + self.assertRaises(IndexError, f) + + # trying to use a label + self.assertRaises(ValueError, p.iloc.__getitem__, tuple(['j', 'D'])) + + # GH + p = Panel( + np.random.rand(4, 3, 2), items=['A', 'B', 'C', 'D'], + major_axis=['U', 'V', 'W'], minor_axis=['X', 'Y']) + expected = p['A'] + + result = p.iloc[0, :, :] + tm.assert_frame_equal(result, expected) + + result = p.iloc[0, [True, True, True], :] + tm.assert_frame_equal(result, expected) + + result = p.iloc[0, [True, True, True], [0, 1]] + tm.assert_frame_equal(result, expected) + + def f(): + p.iloc[0, [True, True, True], [0, 1, 2]] + + self.assertRaises(IndexError, f) + + def f(): + p.iloc[0, [True, True, True], [2]] + + self.assertRaises(IndexError, f) + + def test_iloc_panel_issue(self): + + # GH 3617 + p = Panel(np.random.randn(4, 4, 4)) + + self.assertEqual(p.iloc[:3, :3, :3].shape, (3, 3, 3)) + self.assertEqual(p.iloc[1, :3, :3].shape, (3, 3)) + self.assertEqual(p.iloc[:3, 1, :3].shape, (3, 3)) + self.assertEqual(p.iloc[:3, :3, 1].shape, (3, 3)) + self.assertEqual(p.iloc[1, 1, :3].shape, (3, )) + self.assertEqual(p.iloc[1, :3, 1].shape, (3, )) + self.assertEqual(p.iloc[:3, 1, 1].shape, (3, )) + + def test_panel_getitem(self): + # GH4016, date selection returns a frame when a partial string + # selection + ind = date_range(start="2000", freq="D", periods=1000) + df = DataFrame( + np.random.randn( + len(ind), 5), index=ind, columns=list('ABCDE')) + panel = Panel(dict([('frame_' + c, df) for c in list('ABC')])) + + test2 = panel.ix[:, "2002":"2002-12-31"] + test1 = panel.ix[:, "2002"] + tm.assert_panel_equal(test1, test2) + + # GH8710 + # multi-element getting with a list + panel = tm.makePanel() + + expected = panel.iloc[[0, 1]] + + result = panel.loc[['ItemA', 'ItemB']] + tm.assert_panel_equal(result, expected) + + result = panel.loc[['ItemA', 'ItemB'], :, :] + tm.assert_panel_equal(result, expected) + + result = panel[['ItemA', 'ItemB']] + tm.assert_panel_equal(result, expected) + + result = panel.loc['ItemA':'ItemB'] + tm.assert_panel_equal(result, expected) + + result = panel.ix['ItemA':'ItemB'] + tm.assert_panel_equal(result, expected) + + result = panel.ix[['ItemA', 'ItemB']] + tm.assert_panel_equal(result, expected) + + # with an object-like + # GH 9140 + class TestObject: + + def __str__(self): + return "TestObject" + + obj = TestObject() + + p = Panel(np.random.randn(1, 5, 4), items=[obj], + major_axis=date_range('1/1/2000', periods=5), + minor_axis=['A', 'B', 'C', 'D']) + + expected = p.iloc[0] + result = p[obj] + tm.assert_frame_equal(result, expected) + + def test_panel_setitem(self): + + # GH 7763 + # loc and setitem have setting differences + np.random.seed(0) + index = range(3) + columns = list('abc') + + panel = Panel({'A': DataFrame(np.random.randn(3, 3), + index=index, columns=columns), + 'B': DataFrame(np.random.randn(3, 3), + index=index, columns=columns), + 'C': DataFrame(np.random.randn(3, 3), + index=index, columns=columns)}) + + replace = DataFrame(np.eye(3, 3), index=range(3), columns=columns) + expected = Panel({'A': replace, 'B': replace, 'C': replace}) + + p = panel.copy() + for idx in list('ABC'): + p[idx] = replace + tm.assert_panel_equal(p, expected) + + p = panel.copy() + for idx in list('ABC'): + p.loc[idx, :, :] = replace + tm.assert_panel_equal(p, expected) + + def test_panel_assignment(self): + # GH3777 + wp = Panel(np.random.randn(2, 5, 4), items=['Item1', 'Item2'], + major_axis=date_range('1/1/2000', periods=5), + minor_axis=['A', 'B', 'C', 'D']) + wp2 = Panel(np.random.randn(2, 5, 4), items=['Item1', 'Item2'], + major_axis=date_range('1/1/2000', periods=5), + minor_axis=['A', 'B', 'C', 'D']) + + # TODO: unused? + # expected = wp.loc[['Item1', 'Item2'], :, ['A', 'B']] + + def f(): + wp.loc[['Item1', 'Item2'], :, ['A', 'B']] = wp2.loc[ + ['Item1', 'Item2'], :, ['A', 'B']] + + self.assertRaises(NotImplementedError, f) + + # to_assign = wp2.loc[['Item1', 'Item2'], :, ['A', 'B']] + # wp.loc[['Item1', 'Item2'], :, ['A', 'B']] = to_assign + # result = wp.loc[['Item1', 'Item2'], :, ['A', 'B']] + # tm.assert_panel_equal(result,expected) diff --git a/pandas/tests/indexing/test_timedelta.py b/pandas/tests/indexing/test_timedelta.py new file mode 100644 index 0000000000000..e5ccd72cac20a --- /dev/null +++ b/pandas/tests/indexing/test_timedelta.py @@ -0,0 +1,21 @@ +import pandas as pd +from pandas.util import testing as tm + + +class TestTimedeltaIndexing(tm.TestCase): + + def test_boolean_indexing(self): + # GH 14946 + df = pd.DataFrame({'x': range(10)}) + df.index = pd.to_timedelta(range(10), unit='s') + conditions = [df['x'] > 3, df['x'] == 3, df['x'] < 3] + expected_data = [[0, 1, 2, 3, 10, 10, 10, 10, 10, 10], + [0, 1, 2, 10, 4, 5, 6, 7, 8, 9], + [10, 10, 10, 3, 4, 5, 6, 7, 8, 9]] + for cond, data in zip(conditions, expected_data): + result = df.copy() + result.loc[cond, 'x'] = 10 + expected = pd.DataFrame(data, + index=pd.to_timedelta(range(10), unit='s'), + columns=['x']) + tm.assert_frame_equal(expected, result) From 2100a3af599449efd3edf3dc020f60f8e6436227 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 11 Feb 2017 23:02:13 +0100 Subject: [PATCH 043/353] DOC: fix py3 compat (change lost in FAQ-gotchas merge) --- doc/source/advanced.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst index 21ae9f1eb8409..8833d73cb0a84 100644 --- a/doc/source/advanced.rst +++ b/doc/source/advanced.rst @@ -880,7 +880,7 @@ normal Python ``list``. Monotonicity of an index can be tested with the ``is_mon .. ipython:: python - df = pd.DataFrame(index=[2,3,3,4,5], columns=['data'], data=range(5)) + df = pd.DataFrame(index=[2,3,3,4,5], columns=['data'], data=list(range(5))) df.index.is_monotonic_increasing # no rows 0 or 1, but still returns rows 2, 3 (both of them), and 4: @@ -894,7 +894,7 @@ On the other hand, if the index is not monotonic, then both slice bounds must be .. ipython:: python - df = pd.DataFrame(index=[2,3,1,4,3,5], columns=['data'], data=range(6)) + df = pd.DataFrame(index=[2,3,1,4,3,5], columns=['data'], data=list(range(6))) df.index.is_monotonic_increasing # OK because 2 and 4 are in the index From 61a243b858fe41aac81d20a75e5f1e86baefd868 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 11 Feb 2017 18:56:19 -0500 Subject: [PATCH 044/353] TST: remove nose TST: raise nose.SkipTest -> pytest.skip TST: remove KnownFailure (unused), should be replaced by pytest.xfail anyhow xref #15341 Author: Jeff Reback Closes #15368 from jreback/skip and squashes the following commits: afdb5f9 [Jeff Reback] TST: raise nose.SkipTest -> pytest.skip --- .travis.yml | 2 +- ci/install_test.sh | 1 - ci/install_travis.sh | 13 +- ci/lint.sh | 2 +- ci/requirements_all.txt | 2 +- ci/requirements_dev.txt | 2 +- ci/script.sh | 6 +- doc/source/contributing.rst | 2 +- pandas/computation/tests/test_compat.py | 8 +- pandas/computation/tests/test_eval.py | 19 ++- pandas/io/tests/json/test_pandas.py | 6 +- pandas/io/tests/json/test_ujson.py | 12 +- pandas/io/tests/parser/c_parser_only.py | 4 +- pandas/io/tests/parser/common.py | 8 +- pandas/io/tests/parser/compression.py | 8 +- pandas/io/tests/parser/converters.py | 6 +- pandas/io/tests/parser/parse_dates.py | 8 +- pandas/io/tests/parser/python_parser_only.py | 6 +- pandas/io/tests/parser/test_network.py | 4 +- pandas/io/tests/parser/test_read_fwf.py | 8 +- pandas/io/tests/parser/usecols.py | 4 +- pandas/io/tests/test_clipboard.py | 7 +- pandas/io/tests/test_excel.py | 42 +++--- pandas/io/tests/test_feather.py | 9 +- pandas/io/tests/test_gbq.py | 26 ++-- pandas/io/tests/test_html.py | 12 +- pandas/io/tests/test_packers.py | 28 ++-- pandas/io/tests/test_pickle.py | 4 +- pandas/io/tests/test_pytables.py | 29 ++-- pandas/io/tests/test_sql.py | 38 ++--- pandas/io/tests/test_stata.py | 4 +- pandas/sparse/tests/test_indexing.py | 2 +- pandas/sparse/tests/test_libsparse.py | 4 +- pandas/sparse/tests/test_series.py | 4 +- pandas/tests/formats/test_format.py | 12 +- pandas/tests/formats/test_printing.py | 2 +- pandas/tests/formats/test_style.py | 18 +-- pandas/tests/frame/test_analytics.py | 6 +- pandas/tests/frame/test_constructors.py | 6 +- pandas/tests/frame/test_missing.py | 4 +- pandas/tests/frame/test_operators.py | 4 +- pandas/tests/frame/test_quantile.py | 6 +- pandas/tests/frame/test_query_eval.py | 8 +- pandas/tests/groupby/test_misc.py | 4 +- pandas/tests/indexes/datetimes/test_tools.py | 6 +- pandas/tests/indexes/test_multi.py | 6 +- pandas/tests/indexing/test_coercion.py | 12 +- pandas/tests/plotting/common.py | 4 +- pandas/tests/plotting/test_boxplot_method.py | 4 +- pandas/tests/plotting/test_datetimelike.py | 6 +- pandas/tests/plotting/test_frame.py | 4 +- pandas/tests/series/test_analytics.py | 16 +- pandas/tests/series/test_missing.py | 8 +- pandas/tests/series/test_quantile.py | 8 +- pandas/tests/test_base.py | 4 +- pandas/tests/test_expressions.py | 96 ++++++------ pandas/tests/test_generic.py | 6 +- pandas/tests/test_internals.py | 4 +- pandas/tests/test_msgpack/test_unpack.py | 4 +- pandas/tests/test_multilevel.py | 4 +- pandas/tests/test_panel.py | 12 +- pandas/tests/test_panel4d.py | 4 +- pandas/tests/test_testing.py | 33 +--- pandas/tests/test_window.py | 16 +- pandas/tests/tseries/test_converter.py | 7 +- pandas/tests/tseries/test_offsets.py | 8 +- pandas/tools/tests/test_util.py | 16 +- pandas/util/decorators.py | 59 -------- pandas/util/print_versions.py | 2 +- pandas/util/testing.py | 150 ++++++++----------- setup.cfg | 2 +- 71 files changed, 389 insertions(+), 522 deletions(-) diff --git a/.travis.yml b/.travis.yml index b38c99e3a5be9..2ff5d508d0371 100644 --- a/.travis.yml +++ b/.travis.yml @@ -331,5 +331,5 @@ after_script: - echo "after_script start" - ci/install_test.sh - source activate pandas && python -c "import pandas; pandas.show_versions();" - - ci/print_skipped.py /tmp/nosetests.xml + - ci/print_skipped.py /tmp/pytest.xml - echo "after_script done" diff --git a/ci/install_test.sh b/ci/install_test.sh index cbb84d8fa4b65..9ace633d7f39d 100755 --- a/ci/install_test.sh +++ b/ci/install_test.sh @@ -8,7 +8,6 @@ if [ "$INSTALL_TEST" ]; then conda uninstall cython || exit 1 python "$TRAVIS_BUILD_DIR"/setup.py sdist --formats=zip,gztar || exit 1 pip install "$TRAVIS_BUILD_DIR"/dist/*tar.gz || exit 1 - # nosetests --exe -A "$TEST_ARGS" pandas/tests/test_series.py --with-xunit --xunit-file=/tmp/nosetests_install.xml pytest pandas/tests/test_series.py --junitxml=/tmp/pytest_install.xml else echo "Skipping installation test." diff --git a/ci/install_travis.sh b/ci/install_travis.sh index f65176fb1147c..ad804b96a0d82 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -92,12 +92,7 @@ if [ -e ${INSTALL} ]; then time bash $INSTALL || exit 1 else # create new env - time conda create -n pandas python=$PYTHON_VERSION nose pytest || exit 1 - - if [ "$LINT" ]; then - conda install flake8 - pip install cpplint - fi + time conda create -n pandas python=$PYTHON_VERSION pytest || exit 1 fi # build deps @@ -116,6 +111,12 @@ fi source activate pandas +pip install pytest-xdist +if [ "$LINT" ]; then + conda install flake8 + pip install cpplint +fi + if [ "$COVERAGE" ]; then pip install coverage pytest-cov fi diff --git a/ci/lint.sh b/ci/lint.sh index 2cbfdadf486b8..2ffc68e5eb139 100755 --- a/ci/lint.sh +++ b/ci/lint.sh @@ -55,7 +55,7 @@ if [ "$LINT" ]; then echo "Linting *.c and *.h DONE" echo "Check for invalid testing" - grep -r -E --include '*.py' --exclude nosetester.py --exclude testing.py '(numpy|np)\.testing' pandas + grep -r -E --include '*.py' --exclude testing.py '(numpy|np)\.testing' pandas if [ $? = "0" ]; then RET=1 fi diff --git a/ci/requirements_all.txt b/ci/requirements_all.txt index b64143fcd4ecd..4ff80a478f247 100644 --- a/ci/requirements_all.txt +++ b/ci/requirements_all.txt @@ -1,6 +1,6 @@ -nose pytest pytest-cov +pytest-xdist flake8 sphinx ipython diff --git a/ci/requirements_dev.txt b/ci/requirements_dev.txt index b8af9d035de98..b0a8adc8df5cb 100644 --- a/ci/requirements_dev.txt +++ b/ci/requirements_dev.txt @@ -2,7 +2,7 @@ python-dateutil pytz numpy cython -nose pytest pytest-cov +pytest-xdist flake8 diff --git a/ci/script.sh b/ci/script.sh index 3eac3002d6805..c52fa0fdb33a3 100755 --- a/ci/script.sh +++ b/ci/script.sh @@ -18,10 +18,10 @@ if [ -n "$LOCALE_OVERRIDE" ]; then fi if [ "$BUILD_TEST" ]; then - echo "We are not running nosetests as this is simply a build test." + echo "We are not running pytest as this is simply a build test." elif [ "$COVERAGE" ]; then - echo pytest -s --cov=pandas --cov-report xml:/tmp/nosetests.xml $TEST_ARGS pandas - pytest -s --cov=pandas --cov-report xml:/tmp/nosetests.xml $TEST_ARGS pandas + echo pytest -s --cov=pandas --cov-report xml:/tmp/pytest.xml $TEST_ARGS pandas + pytest -s --cov=pandas --cov-report xml:/tmp/pytest.xml $TEST_ARGS pandas else echo pytest $TEST_ARGS pandas pytest $TEST_ARGS pandas # TODO: doctest diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index 3ef9ed8962a23..5c2bb9b73d618 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -734,7 +734,7 @@ gbq integration tests on a forked repository: the status by visiting your Travis branches page which exists at the following location: https://travis-ci.org/your-user-name/pandas/branches . Click on a build job for your branch. Expand the following line in the - build log: ``ci/print_skipped.py /tmp/nosetests.xml`` . Search for the + build log: ``ci/print_skipped.py /tmp/pytest.xml`` . Search for the term ``test_gbq`` and confirm that gbq integration tests are not skipped. Running the vbench performance test suite (phasing out) diff --git a/pandas/computation/tests/test_compat.py b/pandas/computation/tests/test_compat.py index 900dd2c28b4c5..599d0c10336dc 100644 --- a/pandas/computation/tests/test_compat.py +++ b/pandas/computation/tests/test_compat.py @@ -1,7 +1,7 @@ # flake8: noqa -import nose +import pytest from itertools import product from distutils.version import LooseVersion @@ -31,7 +31,7 @@ def test_compat(): assert _NUMEXPR_INSTALLED except ImportError: - raise nose.SkipTest("not testing numexpr version compat") + pytest.skip("not testing numexpr version compat") def test_invalid_numexpr_version(): @@ -49,14 +49,14 @@ def testit(): try: import numexpr as ne except ImportError: - raise nose.SkipTest("no numexpr") + pytest.skip("no numexpr") else: if ne.__version__ < LooseVersion('2.1'): with tm.assertRaisesRegexp(ImportError, "'numexpr' version is " ".+, must be >= 2.1"): testit() elif ne.__version__ == LooseVersion('2.4.4'): - raise nose.SkipTest("numexpr version==2.4.4") + pytest.skip("numexpr version==2.4.4") else: testit() else: diff --git a/pandas/computation/tests/test_eval.py b/pandas/computation/tests/test_eval.py index a4bb81ce7263c..ada714c8ac52e 100644 --- a/pandas/computation/tests/test_eval.py +++ b/pandas/computation/tests/test_eval.py @@ -6,8 +6,7 @@ from itertools import product from distutils.version import LooseVersion -import nose -from nose.tools import assert_raises +import pytest from numpy.random import randn, rand, randint import numpy as np @@ -319,7 +318,7 @@ def get_expected_pow_result(self, lhs, rhs): except ValueError as e: if str(e).startswith('negative number cannot be raised to a fractional power'): if self.engine == 'python': - raise nose.SkipTest(str(e)) + pytest.skip(str(e)) else: expected = np.nan else: @@ -1174,13 +1173,15 @@ def test_bool_ops_with_constants(self): def test_panel_fails(self): x = Panel(randn(3, 4, 5)) y = Series(randn(10)) - assert_raises(NotImplementedError, self.eval, 'x + y', + with pytest.raises(NotImplementedError): + self.eval('x + y', local_dict={'x': x, 'y': y}) def test_4d_ndarray_fails(self): x = randn(3, 4, 5, 6) y = Series(randn(10)) - assert_raises(NotImplementedError, self.eval, 'x + y', + with pytest.raises(NotImplementedError): + self.eval('x + y', local_dict={'x': x, 'y': y}) def test_constant(self): @@ -1705,7 +1706,7 @@ def test_result_types(self): def test_result_types2(self): # xref https://github.com/pandas-dev/pandas/issues/12293 - raise nose.SkipTest("unreliable tests on complex128") + pytest.skip("unreliable tests on complex128") # Did not test complex64 because DataFrame is converting it to # complex128. Due to https://github.com/pandas-dev/pandas/issues/10952 @@ -1822,7 +1823,8 @@ def check_disallowed_nodes(engine, parser): inst = VisitorClass('x + 1', engine, parser) for ops in uns_ops: - assert_raises(NotImplementedError, getattr(inst, ops)) + with pytest.raises(NotImplementedError): + getattr(inst, ops)() def test_disallowed_nodes(): @@ -1833,7 +1835,8 @@ def test_disallowed_nodes(): def check_syntax_error_exprs(engine, parser): tm.skip_if_no_ne(engine) e = 's +' - assert_raises(SyntaxError, pd.eval, e, engine=engine, parser=parser) + with pytest.raises(SyntaxError): + pd.eval(e, engine=engine, parser=parser) def test_syntax_error_exprs(): diff --git a/pandas/io/tests/json/test_pandas.py b/pandas/io/tests/json/test_pandas.py index 440f5c13d5121..c298b3841096c 100644 --- a/pandas/io/tests/json/test_pandas.py +++ b/pandas/io/tests/json/test_pandas.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # pylint: disable-msg=W0612,E1101 -import nose +import pytest from pandas.compat import range, lrange, StringIO, OrderedDict import os @@ -1009,8 +1009,8 @@ def test_latin_encoding(self): return # GH 13774 - raise nose.SkipTest("encoding not implemented in .to_json(), " - "xref #13774") + pytest.skip("encoding not implemented in .to_json(), " + "xref #13774") values = [[b'E\xc9, 17', b'', b'a', b'b', b'c'], [b'E\xc9, 17', b'a', b'b', b'c'], diff --git a/pandas/io/tests/json/test_ujson.py b/pandas/io/tests/json/test_ujson.py index 3da61b7696fdc..6a986710ae444 100644 --- a/pandas/io/tests/json/test_ujson.py +++ b/pandas/io/tests/json/test_ujson.py @@ -7,7 +7,7 @@ except ImportError: import simplejson as json import math -import nose +import pytest import platform import sys import time @@ -28,7 +28,7 @@ def _skip_if_python_ver(skip_major, skip_minor=None): major, minor = sys.version_info[:2] if major == skip_major and (skip_minor is None or minor == skip_minor): - raise nose.SkipTest("skipping Python version %d.%d" % (major, minor)) + pytest.skip("skipping Python version %d.%d" % (major, minor)) json_unicode = (json.dumps if compat.PY3 @@ -95,7 +95,7 @@ def test_encodeNonCLocale(self): try: locale.setlocale(locale.LC_NUMERIC, 'Italian_Italy') except: - raise nose.SkipTest('Could not set locale for testing') + pytest.skip('Could not set locale for testing') self.assertEqual(ujson.loads(ujson.dumps(4.78e60)), 4.78e60) self.assertEqual(ujson.loads('4.78', precise_float=True), 4.78) locale.setlocale(locale.LC_NUMERIC, savedlocale) @@ -113,7 +113,7 @@ def test_decimalDecodeTestPrecise(self): def test_encodeDoubleTinyExponential(self): if compat.is_platform_windows() and not compat.PY3: - raise nose.SkipTest("buggy on win-64 for py2") + pytest.skip("buggy on win-64 for py2") num = 1e-40 self.assertEqual(num, ujson.decode(ujson.encode(num))) @@ -393,8 +393,8 @@ def test_nat(self): def test_npy_nat(self): from distutils.version import LooseVersion if LooseVersion(np.__version__) < '1.7.0': - raise nose.SkipTest("numpy version < 1.7.0, is " - "{0}".format(np.__version__)) + pytest.skip("numpy version < 1.7.0, is " + "{0}".format(np.__version__)) input = np.datetime64('NaT') assert ujson.encode(input) == 'null', "Expected null" diff --git a/pandas/io/tests/parser/c_parser_only.py b/pandas/io/tests/parser/c_parser_only.py index 11073f3f108ba..ffbd904843bfc 100644 --- a/pandas/io/tests/parser/c_parser_only.py +++ b/pandas/io/tests/parser/c_parser_only.py @@ -7,7 +7,7 @@ further arguments when parsing. """ -import nose +import pytest import numpy as np import pandas as pd @@ -159,7 +159,7 @@ def error(val): def test_pass_dtype_as_recarray(self): if compat.is_platform_windows() and self.low_memory: - raise nose.SkipTest( + pytest.skip( "segfaults on win-64, only when all tests are run") data = """\ diff --git a/pandas/io/tests/parser/common.py b/pandas/io/tests/parser/common.py index 9655c481b763a..0671901fc170a 100644 --- a/pandas/io/tests/parser/common.py +++ b/pandas/io/tests/parser/common.py @@ -9,7 +9,7 @@ import sys from datetime import datetime -import nose +import pytest import numpy as np from pandas.lib import Timestamp @@ -635,8 +635,8 @@ def test_file(self): url_table = self.read_table('file://localhost/' + localtable) except URLError: # fails on some systems - raise nose.SkipTest("failing on %s" % - ' '.join(platform.uname()).strip()) + pytest.skip("failing on %s" % + ' '.join(platform.uname()).strip()) tm.assert_frame_equal(url_table, local_table) @@ -1262,7 +1262,7 @@ def test_verbose_import(self): def test_iteration_open_handle(self): if PY3: - raise nose.SkipTest( + pytest.skip( "won't work in Python 3 {0}".format(sys.version_info)) with tm.ensure_clean() as path: diff --git a/pandas/io/tests/parser/compression.py b/pandas/io/tests/parser/compression.py index 308ca6e8a5a2c..bdcd10fc64aa5 100644 --- a/pandas/io/tests/parser/compression.py +++ b/pandas/io/tests/parser/compression.py @@ -5,7 +5,7 @@ of the parsers defined in parsers.py """ -import nose +import pytest import pandas.util.testing as tm @@ -16,7 +16,7 @@ def test_zip(self): try: import zipfile except ImportError: - raise nose.SkipTest('need zipfile to run') + pytest.skip('need zipfile to run') with open(self.csv1, 'rb') as data_file: data = data_file.read() @@ -67,7 +67,7 @@ def test_gzip(self): try: import gzip except ImportError: - raise nose.SkipTest('need gzip to run') + pytest.skip('need gzip to run') with open(self.csv1, 'rb') as data_file: data = data_file.read() @@ -96,7 +96,7 @@ def test_bz2(self): try: import bz2 except ImportError: - raise nose.SkipTest('need bz2 to run') + pytest.skip('need bz2 to run') with open(self.csv1, 'rb') as data_file: data = data_file.read() diff --git a/pandas/io/tests/parser/converters.py b/pandas/io/tests/parser/converters.py index 2ceaff9291e7e..859d2e19bd56a 100644 --- a/pandas/io/tests/parser/converters.py +++ b/pandas/io/tests/parser/converters.py @@ -7,7 +7,7 @@ from datetime import datetime -import nose +import pytest import numpy as np import pandas as pd @@ -84,8 +84,8 @@ def test_converter_return_string_bug(self): def test_converters_corner_with_nas(self): # skip aberration observed on Win64 Python 3.2.2 if hash(np.int64(-1)) != -2: - raise nose.SkipTest("skipping because of windows hash on Python" - " 3.2.2") + pytest.skip("skipping because of windows hash on Python" + " 3.2.2") data = """id,score,days 1,2,12 diff --git a/pandas/io/tests/parser/parse_dates.py b/pandas/io/tests/parser/parse_dates.py index ad3d5f2382a49..6197d07d4eafa 100644 --- a/pandas/io/tests/parser/parse_dates.py +++ b/pandas/io/tests/parser/parse_dates.py @@ -8,7 +8,7 @@ from distutils.version import LooseVersion from datetime import datetime -import nose +import pytest import numpy as np import pandas.lib as lib from pandas.lib import Timestamp @@ -268,9 +268,9 @@ def test_yy_format_with_yearfirst(self): # See gh-217 import dateutil if dateutil.__version__ >= LooseVersion('2.5.0'): - raise nose.SkipTest("testing yearfirst=True not-support" - "on datetutil < 2.5.0 this works but" - "is wrong") + pytest.skip("testing yearfirst=True not-support" + "on datetutil < 2.5.0 this works but" + "is wrong") rs = self.read_csv(StringIO(data), index_col=0, parse_dates=[['date', 'time']]) diff --git a/pandas/io/tests/parser/python_parser_only.py b/pandas/io/tests/parser/python_parser_only.py index 283ff366b5efd..bd76070933c47 100644 --- a/pandas/io/tests/parser/python_parser_only.py +++ b/pandas/io/tests/parser/python_parser_only.py @@ -9,7 +9,7 @@ import csv import sys -import nose +import pytest import pandas.util.testing as tm from pandas import DataFrame, Index @@ -79,7 +79,7 @@ def test_sniff_delimiter(self): def test_BytesIO_input(self): if not compat.PY3: - raise nose.SkipTest( + pytest.skip( "Bytes-related test - only needs to work on Python 3") data = BytesIO("שלום::1234\n562::123".encode('cp1255')) @@ -130,7 +130,7 @@ def test_decompression_regex_sep(self): import gzip import bz2 except ImportError: - raise nose.SkipTest('need gzip and bz2 to run') + pytest.skip('need gzip and bz2 to run') with open(self.csv1, 'rb') as f: data = f.read() diff --git a/pandas/io/tests/parser/test_network.py b/pandas/io/tests/parser/test_network.py index 533b7733bde28..4d75b59b09560 100644 --- a/pandas/io/tests/parser/test_network.py +++ b/pandas/io/tests/parser/test_network.py @@ -6,7 +6,7 @@ """ import os -import nose +import pytest import functools from itertools import product @@ -59,7 +59,7 @@ def setUp(self): try: import s3fs # noqa except ImportError: - raise nose.SkipTest("s3fs not installed") + pytest.skip("s3fs not installed") @tm.network def test_parse_public_s3_bucket(self): diff --git a/pandas/io/tests/parser/test_read_fwf.py b/pandas/io/tests/parser/test_read_fwf.py index a423355081ac3..dccae06afe4d1 100644 --- a/pandas/io/tests/parser/test_read_fwf.py +++ b/pandas/io/tests/parser/test_read_fwf.py @@ -8,7 +8,7 @@ from datetime import datetime -import nose +import pytest import numpy as np import pandas as pd import pandas.util.testing as tm @@ -75,7 +75,7 @@ def test_fwf(self): def test_BytesIO_input(self): if not compat.PY3: - raise nose.SkipTest( + pytest.skip( "Bytes-related test - only needs to work on Python 3") result = read_fwf(BytesIO("שלום\nשלום".encode('utf8')), widths=[ @@ -192,7 +192,7 @@ def test_fwf_compression(self): import gzip import bz2 except ImportError: - raise nose.SkipTest("Need gzip and bz2 to run this test") + pytest.skip("Need gzip and bz2 to run this test") data = """1111111111 2222222222 @@ -333,7 +333,7 @@ def test_multiple_delimiters(self): def test_variable_width_unicode(self): if not compat.PY3: - raise nose.SkipTest( + pytest.skip( 'Bytes-related test - only needs to work on Python 3') test = """ שלום שלום diff --git a/pandas/io/tests/parser/usecols.py b/pandas/io/tests/parser/usecols.py index 4875282067fb3..95df077dae997 100644 --- a/pandas/io/tests/parser/usecols.py +++ b/pandas/io/tests/parser/usecols.py @@ -5,7 +5,7 @@ for all of the parsers defined in parsers.py """ -import nose +import pytest import numpy as np import pandas.util.testing as tm @@ -377,7 +377,7 @@ def test_usecols_with_multibyte_characters(self): tm.assert_frame_equal(df, expected) def test_usecols_with_multibyte_unicode_characters(self): - raise nose.SkipTest('TODO: see gh-13253') + pytest.skip('TODO: see gh-13253') s = '''あああ,いい,ううう,ええええ 0.056674973,8,True,a diff --git a/pandas/io/tests/test_clipboard.py b/pandas/io/tests/test_clipboard.py index 98a4152754b55..3abd1093362f4 100644 --- a/pandas/io/tests/test_clipboard.py +++ b/pandas/io/tests/test_clipboard.py @@ -2,7 +2,7 @@ import numpy as np from numpy.random import randint -import nose +import pytest import pandas as pd from pandas import DataFrame @@ -15,10 +15,13 @@ try: DataFrame({'A': [1, 2]}).to_clipboard() + _DEPS_INSTALLED = 1 except PyperclipException: - raise nose.SkipTest("clipboard primitives not installed") + _DEPS_INSTALLED = 0 +@pytest.mark.skipif(not _DEPS_INSTALLED, + reason="clipboard primitives not installed") class TestClipboard(tm.TestCase): @classmethod diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py index 2791e397d5b86..a22c89184f20d 100644 --- a/pandas/io/tests/test_excel.py +++ b/pandas/io/tests/test_excel.py @@ -9,7 +9,7 @@ import warnings import operator import functools -import nose +import pytest from numpy import nan import numpy as np @@ -32,30 +32,30 @@ def _skip_if_no_xlrd(): import xlrd ver = tuple(map(int, xlrd.__VERSION__.split(".")[:2])) if ver < (0, 9): - raise nose.SkipTest('xlrd < 0.9, skipping') + pytest.skip('xlrd < 0.9, skipping') except ImportError: - raise nose.SkipTest('xlrd not installed, skipping') + pytest.skip('xlrd not installed, skipping') def _skip_if_no_xlwt(): try: import xlwt # NOQA except ImportError: - raise nose.SkipTest('xlwt not installed, skipping') + pytest.skip('xlwt not installed, skipping') def _skip_if_no_openpyxl(): try: import openpyxl # NOQA except ImportError: - raise nose.SkipTest('openpyxl not installed, skipping') + pytest.skip('openpyxl not installed, skipping') def _skip_if_no_xlsxwriter(): try: import xlsxwriter # NOQA except ImportError: - raise nose.SkipTest('xlsxwriter not installed, skipping') + pytest.skip('xlsxwriter not installed, skipping') def _skip_if_no_excelsuite(): @@ -68,7 +68,7 @@ def _skip_if_no_s3fs(): try: import s3fs # noqa except ImportError: - raise nose.SkipTest('s3fs not installed, skipping') + pytest.skip('s3fs not installed, skipping') _seriesd = tm.getSeriesData() @@ -600,7 +600,7 @@ def test_read_from_file_url(self): # FILE if sys.version_info[:2] < (2, 6): - raise nose.SkipTest("file:// not supported with Python < 2.6") + pytest.skip("file:// not supported with Python < 2.6") localtable = os.path.join(self.dirpath, 'test1' + self.ext) local_table = read_excel(localtable) @@ -610,8 +610,8 @@ def test_read_from_file_url(self): except URLError: # fails on some systems import platform - raise nose.SkipTest("failing on %s" % - ' '.join(platform.uname()).strip()) + pytest.skip("failing on %s" % + ' '.join(platform.uname()).strip()) tm.assert_frame_equal(url_table, local_table) @@ -1513,7 +1513,7 @@ def test_to_excel_unicode_filename(self): try: f = open(filename, 'wb') except UnicodeEncodeError: - raise nose.SkipTest('no unicode file names on this system') + pytest.skip('no unicode file names on this system') else: f.close() @@ -1555,7 +1555,7 @@ def test_to_excel_unicode_filename(self): # import xlwt # import xlrd # except ImportError: - # raise nose.SkipTest + # pytest.skip # filename = '__tmp_to_excel_header_styling_xls__.xls' # pdf.to_excel(filename, 'test1') @@ -1601,9 +1601,9 @@ def test_to_excel_unicode_filename(self): # import openpyxl # from openpyxl.cell import get_column_letter # except ImportError: - # raise nose.SkipTest + # pytest.skip # if openpyxl.__version__ < '1.6.1': - # raise nose.SkipTest + # pytest.skip # # test xlsx_styling # filename = '__tmp_to_excel_header_styling_xlsx__.xlsx' # pdf.to_excel(filename, 'test1') @@ -1635,7 +1635,7 @@ def test_excel_010_hemstring(self): _skip_if_no_xlrd() if self.merge_cells: - raise nose.SkipTest('Skip tests for merged MI format.') + pytest.skip('Skip tests for merged MI format.') from pandas.util.testing import makeCustomDataframe as mkdf # ensure limited functionality in 0.10 @@ -1690,7 +1690,7 @@ def test_excel_010_hemstring_raises_NotImplementedError(self): _skip_if_no_xlrd() if self.merge_cells: - raise nose.SkipTest('Skip tests for merged MI format.') + pytest.skip('Skip tests for merged MI format.') from pandas.util.testing import makeCustomDataframe as mkdf # ensure limited functionality in 0.10 @@ -1873,7 +1873,7 @@ class OpenpyxlTests(ExcelWriterBase, tm.TestCase): def test_to_excel_styleconverter(self): _skip_if_no_openpyxl() if not openpyxl_compat.is_compat(major_ver=1): - raise nose.SkipTest('incompatiable openpyxl version') + pytest.skip('incompatiable openpyxl version') import openpyxl @@ -1910,7 +1910,7 @@ def setUpClass(cls): ver = openpyxl.__version__ if (not (LooseVersion(ver) >= LooseVersion('2.0.0') and LooseVersion(ver) < LooseVersion('2.2.0'))): - raise nose.SkipTest("openpyxl %s >= 2.2" % str(ver)) + pytest.skip("openpyxl %s >= 2.2" % str(ver)) cls.setUpClass = setUpClass return cls @@ -2026,7 +2026,7 @@ def setUpClass(cls): import openpyxl ver = openpyxl.__version__ if LooseVersion(ver) < LooseVersion('2.2.0'): - raise nose.SkipTest("openpyxl %s < 2.2" % str(ver)) + pytest.skip("openpyxl %s < 2.2" % str(ver)) cls.setUpClass = setUpClass return cls @@ -2095,7 +2095,7 @@ def test_to_excel_styleconverter(self): def test_write_cells_merge_styled(self): if not openpyxl_compat.is_compat(major_ver=2): - raise nose.SkipTest('incompatiable openpyxl version') + pytest.skip('incompatiable openpyxl version') from pandas.formats.format import ExcelCell @@ -2278,7 +2278,7 @@ def test_ExcelWriter_dispatch(self): except ImportError: _skip_if_no_openpyxl() if not openpyxl_compat.is_compat(major_ver=1): - raise nose.SkipTest('incompatible openpyxl version') + pytest.skip('incompatible openpyxl version') writer_klass = _Openpyxl1Writer with ensure_clean('.xlsx') as path: diff --git a/pandas/io/tests/test_feather.py b/pandas/io/tests/test_feather.py index 218175e5ef527..6e2c28a0f68de 100644 --- a/pandas/io/tests/test_feather.py +++ b/pandas/io/tests/test_feather.py @@ -1,17 +1,12 @@ """ test feather-format compat """ -import nose +import pytest +feather = pytest.importorskip('feather') import numpy as np import pandas as pd - from pandas.io.feather_format import to_feather, read_feather -try: - import feather # noqa -except ImportError: - raise nose.SkipTest('no feather-format installed') - from feather import FeatherError import pandas.util.testing as tm from pandas.util.testing import assert_frame_equal, ensure_clean diff --git a/pandas/io/tests/test_gbq.py b/pandas/io/tests/test_gbq.py index 1157482d7ae67..0868edd2147b5 100644 --- a/pandas/io/tests/test_gbq.py +++ b/pandas/io/tests/test_gbq.py @@ -1,6 +1,6 @@ import re from datetime import datetime -import nose +import pytest import pytz import platform from time import sleep @@ -42,25 +42,25 @@ def _skip_if_no_project_id(): if not _get_project_id(): - raise nose.SkipTest( + pytest.skip( "Cannot run integration tests without a project id") def _skip_local_auth_if_in_travis_env(): if _in_travis_environment(): - raise nose.SkipTest("Cannot run local auth in travis environment") + pytest.skip("Cannot run local auth in travis environment") def _skip_if_no_private_key_path(): if not _get_private_key_path(): - raise nose.SkipTest("Cannot run integration tests without a " - "private key json file path") + pytest.skip("Cannot run integration tests without a " + "private key json file path") def _skip_if_no_private_key_contents(): if not _get_private_key_contents(): - raise nose.SkipTest("Cannot run integration tests without a " - "private key json contents") + pytest.skip("Cannot run integration tests without a " + "private key json contents") def _in_travis_environment(): @@ -184,7 +184,7 @@ def _setup_common(): try: _test_imports() except (ImportError, NotImplementedError) as import_exception: - raise nose.SkipTest(import_exception) + pytest.skip(import_exception) if _in_travis_environment(): logging.getLogger('oauth2client').setLevel(logging.ERROR) @@ -284,15 +284,15 @@ def test_should_be_able_to_get_results_from_query(self): def test_get_application_default_credentials_does_not_throw_error(self): if _check_if_can_get_correct_default_credentials(): - raise nose.SkipTest("Can get default_credentials " - "from the environment!") + pytest.skip("Can get default_credentials " + "from the environment!") credentials = self.sut.get_application_default_credentials() self.assertIsNone(credentials) def test_get_application_default_credentials_returns_credentials(self): if not _check_if_can_get_correct_default_credentials(): - raise nose.SkipTest("Cannot get default_credentials " - "from the environment!") + pytest.skip("Cannot get default_credentials " + "from the environment!") from oauth2client.client import GoogleCredentials credentials = self.sut.get_application_default_credentials() self.assertTrue(isinstance(credentials, GoogleCredentials)) @@ -1015,7 +1015,7 @@ def test_upload_data_if_table_exists_append(self): def test_upload_data_if_table_exists_replace(self): - raise nose.SkipTest("buggy test") + pytest.skip("buggy test") destination_table = DESTINATION_TABLE + "4" diff --git a/pandas/io/tests/test_html.py b/pandas/io/tests/test_html.py index 356adb92829c6..232e68a87f16e 100644 --- a/pandas/io/tests/test_html.py +++ b/pandas/io/tests/test_html.py @@ -12,7 +12,7 @@ from distutils.version import LooseVersion -import nose +import pytest import numpy as np from numpy.random import rand @@ -39,7 +39,7 @@ def _have_module(module_name): def _skip_if_no(module_name): if not _have_module(module_name): - raise nose.SkipTest("{0!r} not found".format(module_name)) + pytest.skip("{0!r} not found".format(module_name)) def _skip_if_none_of(module_names): @@ -48,16 +48,16 @@ def _skip_if_none_of(module_names): if module_names == 'bs4': import bs4 if bs4.__version__ == LooseVersion('4.2.0'): - raise nose.SkipTest("Bad version of bs4: 4.2.0") + pytest.skip("Bad version of bs4: 4.2.0") else: not_found = [module_name for module_name in module_names if not _have_module(module_name)] if set(not_found) & set(module_names): - raise nose.SkipTest("{0!r} not found".format(not_found)) + pytest.skip("{0!r} not found".format(not_found)) if 'bs4' in module_names: import bs4 if bs4.__version__ == LooseVersion('4.2.0'): - raise nose.SkipTest("Bad version of bs4: 4.2.0") + pytest.skip("Bad version of bs4: 4.2.0") DATA_PATH = tm.get_data_path() @@ -685,7 +685,7 @@ def test_decimal_rows(self): ''') expected = DataFrame(data={'Header': 1100.101}, index=[0]) result = self.read_html(data, decimal='#')[0] - nose.tools.assert_equal(result['Header'].dtype, np.dtype('float64')) + assert result['Header'].dtype == np.dtype('float64') tm.assert_frame_equal(result, expected) def test_bool_header_arg(self): diff --git a/pandas/io/tests/test_packers.py b/pandas/io/tests/test_packers.py index 2ee36d85f674c..4bb6f4a69bab3 100644 --- a/pandas/io/tests/test_packers.py +++ b/pandas/io/tests/test_packers.py @@ -1,4 +1,4 @@ -import nose +import pytest import os import datetime @@ -168,7 +168,7 @@ def test_list_numpy_float(self): def test_list_numpy_float_complex(self): if not hasattr(np, 'complex128'): - raise nose.SkipTest('numpy cant handle complex128') + pytest.skip('numpy cant handle complex128') x = [np.float32(np.random.rand()) for i in range(5)] + \ [np.complex128(np.random.rand() + 1j * np.random.rand()) @@ -261,7 +261,7 @@ def test_datetimes(self): # fails under 2.6/win32 (np.datetime64 seems broken) if LooseVersion(sys.version) < '2.7': - raise nose.SkipTest('2.6 with np.datetime64 is broken') + pytest.skip('2.6 with np.datetime64 is broken') for i in [datetime.datetime(2013, 1, 1), datetime.datetime(2013, 1, 1, 5, 1), @@ -589,12 +589,12 @@ def _test_compression(self, compress): def test_compression_zlib(self): if not _ZLIB_INSTALLED: - raise nose.SkipTest('no zlib') + pytest.skip('no zlib') self._test_compression('zlib') def test_compression_blosc(self): if not _BLOSC_INSTALLED: - raise nose.SkipTest('no blosc') + pytest.skip('no blosc') self._test_compression('blosc') def _test_compression_warns_when_decompress_caches(self, compress): @@ -653,12 +653,12 @@ def decompress(ob): def test_compression_warns_when_decompress_caches_zlib(self): if not _ZLIB_INSTALLED: - raise nose.SkipTest('no zlib') + pytest.skip('no zlib') self._test_compression_warns_when_decompress_caches('zlib') def test_compression_warns_when_decompress_caches_blosc(self): if not _BLOSC_INSTALLED: - raise nose.SkipTest('no blosc') + pytest.skip('no blosc') self._test_compression_warns_when_decompress_caches('blosc') def _test_small_strings_no_warn(self, compress): @@ -690,18 +690,18 @@ def _test_small_strings_no_warn(self, compress): def test_small_strings_no_warn_zlib(self): if not _ZLIB_INSTALLED: - raise nose.SkipTest('no zlib') + pytest.skip('no zlib') self._test_small_strings_no_warn('zlib') def test_small_strings_no_warn_blosc(self): if not _BLOSC_INSTALLED: - raise nose.SkipTest('no blosc') + pytest.skip('no blosc') self._test_small_strings_no_warn('blosc') def test_readonly_axis_blosc(self): # GH11880 if not _BLOSC_INSTALLED: - raise nose.SkipTest('no blosc') + pytest.skip('no blosc') df1 = DataFrame({'A': list('abcd')}) df2 = DataFrame(df1, index=[1., 2., 3., 4.]) self.assertTrue(1 in self.encode_decode(df1['A'], compress='blosc')) @@ -717,9 +717,9 @@ def test_readonly_axis_zlib(self): def test_readonly_axis_blosc_to_sql(self): # GH11880 if not _BLOSC_INSTALLED: - raise nose.SkipTest('no blosc') + pytest.skip('no blosc') if not self._SQLALCHEMY_INSTALLED: - raise nose.SkipTest('no sqlalchemy') + pytest.skip('no sqlalchemy') expected = DataFrame({'A': list('abcd')}) df = self.encode_decode(expected, compress='blosc') eng = self._create_sql_engine("sqlite:///:memory:") @@ -731,9 +731,9 @@ def test_readonly_axis_blosc_to_sql(self): def test_readonly_axis_zlib_to_sql(self): # GH11880 if not _ZLIB_INSTALLED: - raise nose.SkipTest('no zlib') + pytest.skip('no zlib') if not self._SQLALCHEMY_INSTALLED: - raise nose.SkipTest('no sqlalchemy') + pytest.skip('no sqlalchemy') expected = DataFrame({'A': list('abcd')}) df = self.encode_decode(expected, compress='zlib') eng = self._create_sql_engine("sqlite:///:memory:") diff --git a/pandas/io/tests/test_pickle.py b/pandas/io/tests/test_pickle.py index 89827817a85fb..588b2d5f04888 100644 --- a/pandas/io/tests/test_pickle.py +++ b/pandas/io/tests/test_pickle.py @@ -2,7 +2,7 @@ """ manage legacy pickle tests """ -import nose +import pytest import os from distutils.version import LooseVersion @@ -172,7 +172,7 @@ def compare_sp_frame_float(self, result, expected, typ, version): def read_pickles(self, version): if not is_platform_little_endian(): - raise nose.SkipTest("known failure on non-little endian") + pytest.skip("known failure on non-little endian") pth = tm.get_data_path('legacy_pickle/{0}'.format(str(version))) n = 0 diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index 501e744ad308c..3fa0eb2ef52dc 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -1,4 +1,4 @@ -import nose +import pytest import sys import os import warnings @@ -17,17 +17,14 @@ from pandas.compat import is_platform_windows, PY3, PY35 from pandas.formats.printing import pprint_thing -from pandas.io.pytables import _tables, TableIterator -try: - _tables() -except ImportError as e: - raise nose.SkipTest(e) - +tables = pytest.importorskip('tables') +from pandas.io.pytables import TableIterator from pandas.io.pytables import (HDFStore, get_store, Term, read_hdf, IncompatibilityWarning, PerformanceWarning, AttributeConflictWarning, DuplicateWarning, PossibleDataLossError, ClosedFileError) + from pandas.io import pytables as pytables import pandas.util.testing as tm from pandas.util.testing import (assert_panel4d_equal, @@ -43,7 +40,7 @@ try: import tables except ImportError: - raise nose.SkipTest('no pytables') + pytest.skip('no pytables') from distutils.version import LooseVersion @@ -738,7 +735,7 @@ def test_put_compression(self): def test_put_compression_blosc(self): tm.skip_if_no_package('tables', '2.2', app='blosc support') if skip_compression: - raise nose.SkipTest("skipping on windows/PY3") + pytest.skip("skipping on windows/PY3") df = tm.makeTimeDataFrame() @@ -968,7 +965,7 @@ def check(format, index): def test_encoding(self): if sys.byteorder != 'little': - raise nose.SkipTest('system byteorder is not little') + pytest.skip('system byteorder is not little') with ensure_clean_store(self.path) as store: df = DataFrame(dict(A='foo', B='bar'), index=range(5)) @@ -2830,14 +2827,14 @@ def test_index_types(self): def test_timeseries_preepoch(self): if sys.version_info[0] == 2 and sys.version_info[1] < 7: - raise nose.SkipTest("won't work on Python < 2.7") + pytest.skip("won't work on Python < 2.7") dr = bdate_range('1/1/1940', '1/1/1960') ts = Series(np.random.randn(len(dr)), index=dr) try: self._check_roundtrip(ts, tm.assert_series_equal) except OverflowError: - raise nose.SkipTest('known failer on some windows platforms') + pytest.skip('known failer on some windows platforms') def test_frame(self): @@ -4202,8 +4199,8 @@ def test_nan_selection_bug_4858(self): # GH 4858; nan selection bug, only works for pytables >= 3.1 if LooseVersion(tables.__version__) < '3.1.0': - raise nose.SkipTest('tables version does not support fix for nan ' - 'selection bug: GH 4858') + pytest.skip('tables version does not support fix for nan ' + 'selection bug: GH 4858') with ensure_clean_store(self.path) as store: @@ -4453,7 +4450,7 @@ def test_pytables_native_read(self): def test_pytables_native2_read(self): # fails on win/3.5 oddly if PY35 and is_platform_windows(): - raise nose.SkipTest("native2 read fails oddly on windows / 3.5") + pytest.skip("native2 read fails oddly on windows / 3.5") with ensure_clean_store( tm.get_data_path('legacy_hdf/pytables_native2.h5'), @@ -4585,7 +4582,7 @@ def do_copy(f=None, new_f=None, keys=None, safe_remove(path) def test_legacy_table_write(self): - raise nose.SkipTest("cannot write legacy tables") + pytest.skip("cannot write legacy tables") store = HDFStore(tm.get_data_path( 'legacy_hdf/legacy_table_%s.h5' % pandas.__version__), 'a') diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py index ddda65c5bafc8..a6f4d96001021 100644 --- a/pandas/io/tests/test_sql.py +++ b/pandas/io/tests/test_sql.py @@ -24,7 +24,7 @@ import os import sys -import nose +import pytest import warnings import numpy as np import pandas as pd @@ -854,7 +854,7 @@ def connect(self): if SQLALCHEMY_INSTALLED: return sqlalchemy.create_engine('sqlite:///:memory:') else: - raise nose.SkipTest('SQLAlchemy not installed') + pytest.skip('SQLAlchemy not installed') def test_read_table_columns(self): # test columns argument in read_table @@ -1063,7 +1063,7 @@ def test_con_string_import_error(self): self.assertRaises(ImportError, sql.read_sql, "SELECT * FROM iris", conn) else: - raise nose.SkipTest('SQLAlchemy is installed') + pytest.skip('SQLAlchemy is installed') def test_read_sql_delegate(self): iris_frame1 = sql.read_sql_query("SELECT * FROM iris", self.conn) @@ -1128,7 +1128,7 @@ def setUpClass(cls): conn.connect() except sqlalchemy.exc.OperationalError: msg = "{0} - can't connect to {1} server".format(cls, cls.flavor) - raise nose.SkipTest(msg) + pytest.skip(msg) def setUp(self): self.setup_connect() @@ -1141,7 +1141,7 @@ def setUp(self): def setup_import(cls): # Skip this test if SQLAlchemy not available if not SQLALCHEMY_INSTALLED: - raise nose.SkipTest('SQLAlchemy not installed') + pytest.skip('SQLAlchemy not installed') @classmethod def setup_driver(cls): @@ -1158,7 +1158,7 @@ def setup_connect(self): # to test if connection can be made: self.conn.connect() except sqlalchemy.exc.OperationalError: - raise nose.SkipTest( + pytest.skip( "Can't connect to {0} server".format(self.flavor)) def test_aread_sql(self): @@ -1304,7 +1304,7 @@ def check(col): # GH11216 df = pd.read_sql_query("select * from types_test_data", self.conn) if not hasattr(df, 'DateColWithTz'): - raise nose.SkipTest("no column with datetime with time zone") + pytest.skip("no column with datetime with time zone") # this is parsed on Travis (linux), but not on macosx for some reason # even with the same versions of psycopg2 & sqlalchemy, possibly a @@ -1319,7 +1319,7 @@ def check(col): df = pd.read_sql_query("select * from types_test_data", self.conn, parse_dates=['DateColWithTz']) if not hasattr(df, 'DateColWithTz'): - raise nose.SkipTest("no column with datetime with time zone") + pytest.skip("no column with datetime with time zone") check(df.DateColWithTz) df = pd.concat(list(pd.read_sql_query("select * from types_test_data", @@ -1665,7 +1665,7 @@ class Temporary(Base): class _TestSQLAlchemyConn(_EngineToConnMixin, _TestSQLAlchemy): def test_transactions(self): - raise nose.SkipTest( + pytest.skip( "Nested transactions rollbacks don't work with Pandas") @@ -1739,7 +1739,7 @@ def setup_driver(cls): import pymysql # noqa cls.driver = 'pymysql' except ImportError: - raise nose.SkipTest('pymysql not installed') + pytest.skip('pymysql not installed') def test_default_type_conversion(self): df = sql.read_sql_table("types_test_data", self.conn) @@ -1808,7 +1808,7 @@ def setup_driver(cls): import psycopg2 # noqa cls.driver = 'psycopg2' except ImportError: - raise nose.SkipTest('psycopg2 not installed') + pytest.skip('psycopg2 not installed') def test_schema_support(self): # only test this for postgresql (schema's not supported in @@ -2007,7 +2007,7 @@ def test_to_sql_save_index(self): def test_transactions(self): if PY36: - raise nose.SkipTest("not working on python > 3.5") + pytest.skip("not working on python > 3.5") self._transaction_test() def _get_sqlite_column_type(self, table, column): @@ -2019,7 +2019,7 @@ def _get_sqlite_column_type(self, table, column): def test_dtype(self): if self.flavor == 'mysql': - raise nose.SkipTest('Not applicable to MySQL legacy') + pytest.skip('Not applicable to MySQL legacy') cols = ['A', 'B'] data = [(0.8, True), (0.9, None)] @@ -2045,7 +2045,7 @@ def test_dtype(self): def test_notnull_dtype(self): if self.flavor == 'mysql': - raise nose.SkipTest('Not applicable to MySQL legacy') + pytest.skip('Not applicable to MySQL legacy') cols = {'Bool': Series([True, None]), 'Date': Series([datetime(2012, 5, 1), None]), @@ -2130,7 +2130,7 @@ def _skip_if_no_pymysql(): try: import pymysql # noqa except ImportError: - raise nose.SkipTest('pymysql not installed, skipping') + pytest.skip('pymysql not installed, skipping') class TestXSQLite(SQLiteMixIn, tm.TestCase): @@ -2389,12 +2389,12 @@ def setUpClass(cls): try: pymysql.connect(read_default_group='pandas') except pymysql.ProgrammingError: - raise nose.SkipTest( + pytest.skip( "Create a group of connection parameters under the heading " "[pandas] in your system's mysql default file, " "typically located at ~/.my.cnf or /etc/.my.cnf. ") except pymysql.Error: - raise nose.SkipTest( + pytest.skip( "Cannot connect to database. " "Create a group of connection parameters under the heading " "[pandas] in your system's mysql default file, " @@ -2415,12 +2415,12 @@ def setUp(self): try: self.conn = pymysql.connect(read_default_group='pandas') except pymysql.ProgrammingError: - raise nose.SkipTest( + pytest.skip( "Create a group of connection parameters under the heading " "[pandas] in your system's mysql default file, " "typically located at ~/.my.cnf or /etc/.my.cnf. ") except pymysql.Error: - raise nose.SkipTest( + pytest.skip( "Cannot connect to database. " "Create a group of connection parameters under the heading " "[pandas] in your system's mysql default file, " diff --git a/pandas/io/tests/test_stata.py b/pandas/io/tests/test_stata.py index fcb935925e61f..ae09e671dbca3 100644 --- a/pandas/io/tests/test_stata.py +++ b/pandas/io/tests/test_stata.py @@ -9,7 +9,7 @@ from datetime import datetime from distutils.version import LooseVersion -import nose +import pytest import numpy as np import pandas as pd import pandas.util.testing as tm @@ -128,7 +128,7 @@ def test_read_dta1(self): def test_read_dta2(self): if LooseVersion(sys.version) < '2.7': - raise nose.SkipTest('datetime interp under 2.6 is faulty') + pytest.skip('datetime interp under 2.6 is faulty') expected = DataFrame.from_records( [ diff --git a/pandas/sparse/tests/test_indexing.py b/pandas/sparse/tests/test_indexing.py index c400b68c8a7d8..357a7103f4027 100644 --- a/pandas/sparse/tests/test_indexing.py +++ b/pandas/sparse/tests/test_indexing.py @@ -1,6 +1,6 @@ # pylint: disable-msg=E1101,W0612 -import nose # noqa +import pytest # noqa import numpy as np import pandas as pd import pandas.util.testing as tm diff --git a/pandas/sparse/tests/test_libsparse.py b/pandas/sparse/tests/test_libsparse.py index 491005db2ae79..4d5a93d77cf14 100644 --- a/pandas/sparse/tests/test_libsparse.py +++ b/pandas/sparse/tests/test_libsparse.py @@ -1,6 +1,6 @@ from pandas import Series -import nose +import pytest import numpy as np import operator import pandas.util.testing as tm @@ -213,7 +213,7 @@ def _check_case(xloc, xlen, yloc, ylen, eloc, elen): longer_index.to_int_index()) if compat.is_platform_windows(): - raise nose.SkipTest("segfaults on win-64 when all tests are run") + pytest.skip("segfaults on win-64 when all tests are run") check_cases(_check_case) def test_intersect_empty(self): diff --git a/pandas/sparse/tests/test_series.py b/pandas/sparse/tests/test_series.py index db6ae14b096d3..d4543b97af4dd 100644 --- a/pandas/sparse/tests/test_series.py +++ b/pandas/sparse/tests/test_series.py @@ -577,8 +577,8 @@ def check(a, b): def test_binary_operators(self): # skipping for now ##### - import nose - raise nose.SkipTest("skipping sparse binary operators test") + import pytest + pytest.skip("skipping sparse binary operators test") def _check_inplace_op(iop, op): tmp = self.bseries.copy() diff --git a/pandas/tests/formats/test_format.py b/pandas/tests/formats/test_format.py index 9a24ae332f7c5..476c6a636ae5a 100644 --- a/pandas/tests/formats/test_format.py +++ b/pandas/tests/formats/test_format.py @@ -44,7 +44,7 @@ reset_option) from datetime import datetime -import nose +import pytest use_32bit_repr = is_platform_windows() or is_platform_32bit() @@ -287,7 +287,7 @@ def test_repr_non_interactive(self): def test_repr_max_columns_max_rows(self): term_width, term_height = get_terminal_size() if term_width < 10 or term_height < 10: - raise nose.SkipTest("terminal size too small, " + pytest.skip("terminal size too small, " "{0} x {1}".format(term_width, term_height)) def mkframe(n): @@ -1871,7 +1871,7 @@ def test_to_html_regression_GH6098(self): df.pivot_table(index=[u('clé1')], columns=[u('clé2')])._repr_html_() def test_to_html_truncate(self): - raise nose.SkipTest("unreliable on travis") + pytest.skip("unreliable on travis") index = pd.DatetimeIndex(start='20010101', freq='D', periods=20) df = DataFrame(index=index, columns=range(20)) fmt.set_option('display.max_rows', 8) @@ -1972,7 +1972,7 @@ def test_to_html_truncate(self): self.assertEqual(result, expected) def test_to_html_truncate_multi_index(self): - raise nose.SkipTest("unreliable on travis") + pytest.skip("unreliable on travis") arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] df = DataFrame(index=arrays, columns=arrays) @@ -2089,7 +2089,7 @@ def test_to_html_truncate_multi_index(self): self.assertEqual(result, expected) def test_to_html_truncate_multi_index_sparse_off(self): - raise nose.SkipTest("unreliable on travis") + pytest.skip("unreliable on travis") arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] df = DataFrame(index=arrays, columns=arrays) @@ -2250,7 +2250,7 @@ def test_pprint_thing(self): from pandas.formats.printing import pprint_thing as pp_t if PY3: - raise nose.SkipTest("doesn't work on Python 3") + pytest.skip("doesn't work on Python 3") self.assertEqual(pp_t('a'), u('a')) self.assertEqual(pp_t(u('a')), u('a')) diff --git a/pandas/tests/formats/test_printing.py b/pandas/tests/formats/test_printing.py index 1e6794c1c9c69..52f3e06c6cbd0 100644 --- a/pandas/tests/formats/test_printing.py +++ b/pandas/tests/formats/test_printing.py @@ -126,7 +126,7 @@ def test_ambiguous_width(self): # common.console_encode should encode things as utf-8. # """ # if compat.PY3: -# raise nose.SkipTest +# pytest.skip # with tm.stdin_encoding(encoding=None): # result = printing.console_encode(u"\u05d0") diff --git a/pandas/tests/formats/test_style.py b/pandas/tests/formats/test_style.py index eaa209178b2e9..53bb3f9010f7e 100644 --- a/pandas/tests/formats/test_style.py +++ b/pandas/tests/formats/test_style.py @@ -1,5 +1,4 @@ -import os -from nose import SkipTest +import pytest import copy import numpy as np @@ -8,20 +7,7 @@ from pandas.util.testing import TestCase import pandas.util.testing as tm -# Getting failures on a python 2.7 build with -# whenever we try to import jinja, whether it's installed or not. -# so we're explicitly skipping that one *before* we try to import -# jinja. We still need to export the imports as globals, -# since importing Styler tries to import jinja2. -job_name = os.environ.get('JOB_NAME', None) -if job_name == '27_slow_nnet_LOCALE': - raise SkipTest("No jinja") -try: - # Do try except on just jinja, so the only reason - # We skip is if jinja can't import, not something else - import jinja2 # noqa -except ImportError: - raise SkipTest("No Jinja2") +jinja2 = pytest.importorskip('jinja2') from pandas.formats.style import Styler, _get_level_lengths # noqa diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index a55d2cfb2fb2b..1f0d16e959cd7 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -5,7 +5,7 @@ from datetime import timedelta, datetime from distutils.version import LooseVersion import sys -import nose +import pytest from numpy import nan from numpy.random import randn @@ -2066,8 +2066,8 @@ def test_round_issue(self): def test_built_in_round(self): if not compat.PY3: - raise nose.SkipTest("build in round cannot be overriden " - "prior to Python 3") + pytest.skip("build in round cannot be overriden " + "prior to Python 3") # GH11763 # Here's the test frame we'll be working with diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 66a235e1260bd..76eb61bd81110 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -6,7 +6,7 @@ import functools import itertools -import nose +import pytest from numpy.random import randn import numpy as np @@ -1702,7 +1702,7 @@ def test_from_records_with_datetimes(self): # this may fail on certain platforms because of a numpy issue # related GH6140 if not is_platform_little_endian(): - raise nose.SkipTest("known failure of test on non-little endian") + pytest.skip("known failure of test on non-little endian") # construction with a null in a recarray # GH 6140 @@ -1714,7 +1714,7 @@ def test_from_records_with_datetimes(self): try: recarray = np.core.records.fromarrays(arrdata, dtype=dtypes) except (ValueError): - raise nose.SkipTest("known failure of numpy rec array creation") + pytest.skip("known failure of numpy rec array creation") result = DataFrame.from_records(recarray) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py index ef800f0dface3..80ea01d3a05aa 100644 --- a/pandas/tests/frame/test_missing.py +++ b/pandas/tests/frame/test_missing.py @@ -23,8 +23,8 @@ def _skip_if_no_pchip(): try: from scipy.interpolate import pchip_interpolate # noqa except ImportError: - import nose - raise nose.SkipTest('scipy.interpolate.pchip missing') + import pytest + pytest.skip('scipy.interpolate.pchip missing') class TestDataFrameMissingData(tm.TestCase, TestData): diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index ec73689088035..d6a3592446fd5 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -5,7 +5,7 @@ from datetime import datetime import operator -import nose +import pytest from numpy import nan, random import numpy as np @@ -323,7 +323,7 @@ def test_logical_typeerror(self): self.assertRaises(TypeError, self.frame.__gt__, 'foo') self.assertRaises(TypeError, self.frame.__ne__, 'foo') else: - raise nose.SkipTest('test_logical_typeerror not tested on PY3') + pytest.skip('test_logical_typeerror not tested on PY3') def test_logical_with_nas(self): d = DataFrame({'a': [np.nan, False], 'b': [True, True]}) diff --git a/pandas/tests/frame/test_quantile.py b/pandas/tests/frame/test_quantile.py index 400ead788aa7c..909a1a6a4c917 100644 --- a/pandas/tests/frame/test_quantile.py +++ b/pandas/tests/frame/test_quantile.py @@ -3,7 +3,7 @@ from __future__ import print_function -import nose +import pytest import numpy as np from pandas import (DataFrame, Series, Timestamp, _np_version_under1p11) @@ -106,7 +106,7 @@ def test_quantile_axis_parameter(self): def test_quantile_interpolation(self): # GH #10174 if _np_version_under1p9: - raise nose.SkipTest("Numpy version under 1.9") + pytest.skip("Numpy version under 1.9") from numpy import percentile @@ -171,7 +171,7 @@ def test_quantile_interpolation(self): def test_quantile_interpolation_np_lt_1p9(self): # GH #10174 if not _np_version_under1p9: - raise nose.SkipTest("Numpy version is greater than 1.9") + pytest.skip("Numpy version is greater than 1.9") from numpy import percentile diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index aed02b7323f85..647af92b42273 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -3,7 +3,7 @@ from __future__ import print_function import operator -import nose +import pytest from itertools import product from pandas.compat import (zip, range, lrange, StringIO) @@ -30,14 +30,14 @@ def skip_if_no_pandas_parser(parser): if parser != 'pandas': - raise nose.SkipTest("cannot evaluate with parser {0!r}".format(parser)) + pytest.skip("cannot evaluate with parser {0!r}".format(parser)) def skip_if_no_ne(engine='numexpr'): if engine == 'numexpr': if not _NUMEXPR_INSTALLED: - raise nose.SkipTest("cannot query engine numexpr when numexpr not " - "installed") + pytest.skip("cannot query engine numexpr when numexpr not " + "installed") class TestCompat(tm.TestCase): diff --git a/pandas/tests/groupby/test_misc.py b/pandas/tests/groupby/test_misc.py index c9d8ad4231cfb..9395304385681 100644 --- a/pandas/tests/groupby/test_misc.py +++ b/pandas/tests/groupby/test_misc.py @@ -1,6 +1,6 @@ """ misc non-groupby routines, as they are defined in core/groupby.py """ -import nose +import pytest import numpy as np from numpy import nan from pandas.util import testing as tm @@ -42,7 +42,7 @@ def test_nargsort(self): np.argsort(np.array([[1, 2], [1, 3], [1, 2]], dtype='i')) np.argsort(items2, kind='mergesort') except TypeError: - raise nose.SkipTest('requested sort not available for type') + pytest.skip('requested sort not available for type') # mergesort is the most difficult to get right because we want it to be # stable. diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index af749963146c6..1b67ffce63b10 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -1,7 +1,7 @@ """ test to_datetime """ import sys -import nose +import pytest import locale import calendar import numpy as np @@ -139,7 +139,7 @@ def test_to_datetime_with_non_exact(self): # 8904 # exact kw if sys.version_info < (2, 7): - raise nose.SkipTest('on python version < 2.7') + pytest.skip('on python version < 2.7') s = Series(['19MAY11', 'foobar19MAY11', '19MAY11:00:00:00', '19MAY11 00:00:00Z']) @@ -277,7 +277,7 @@ def test_to_datetime_tz_psycopg2(self): try: import psycopg2 except ImportError: - raise nose.SkipTest("no psycopg2 installed") + pytest.skip("no psycopg2 installed") # misc cases tz1 = psycopg2.tz.FixedOffsetTimezone(offset=-300, name=None) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 365236f72e80e..702c4758da245 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -6,7 +6,7 @@ from datetime import timedelta from itertools import product -import nose +import pytest import numpy as np @@ -988,8 +988,8 @@ def test_iter(self): def test_legacy_pickle(self): if PY3: - raise nose.SkipTest("testing for legacy pickles not " - "support on py3") + pytest.skip("testing for legacy pickles not " + "support on py3") path = tm.get_data_path('multiindex_v1.pickle') obj = pd.read_pickle(path) diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py index b9a746cd25c7a..38f8bb5355a69 100644 --- a/pandas/tests/indexing/test_coercion.py +++ b/pandas/tests/indexing/test_coercion.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -import nose +import pytest import numpy as np import pandas as pd @@ -1172,8 +1172,8 @@ def _assert_replace_conversion(self, from_key, to_key, how): if (from_key == 'bool' and to_key == 'int64' and tm.is_platform_windows()): - raise nose.SkipTest("windows platform buggy: {0} -> {1}".format - (from_key, to_key)) + pytest.skip("windows platform buggy: {0} -> {1}".format + (from_key, to_key)) if ((from_key == 'float64' and to_key in ('bool', 'int64')) or @@ -1189,8 +1189,8 @@ def _assert_replace_conversion(self, from_key, to_key, how): # buggy on 32-bit if tm.is_platform_32bit(): - raise nose.SkipTest("32-bit platform buggy: {0} -> {1}".format - (from_key, to_key)) + pytest.skip("32-bit platform buggy: {0} -> {1}".format + (from_key, to_key)) # Expected: do not downcast by replacement exp = pd.Series(self.rep[to_key], index=index, @@ -1243,7 +1243,7 @@ def test_replace_series_bool(self): if compat.PY3: # doesn't work in PY3, though ...dict_from_bool works fine - raise nose.SkipTest("doesn't work as in PY3") + pytest.skip("doesn't work as in PY3") self._assert_replace_conversion(from_key, to_key, how='series') diff --git a/pandas/tests/plotting/common.py b/pandas/tests/plotting/common.py index 9fe1d7cacd38f..92e2dc7b5d934 100644 --- a/pandas/tests/plotting/common.py +++ b/pandas/tests/plotting/common.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # coding: utf-8 -import nose +import pytest import os import warnings @@ -28,7 +28,7 @@ def _skip_if_no_scipy_gaussian_kde(): try: from scipy.stats import gaussian_kde # noqa except ImportError: - raise nose.SkipTest("scipy version doesn't support gaussian_kde") + pytest.skip("scipy version doesn't support gaussian_kde") def _ok_for_gaussian_kde(kind): diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py index f7fd6a8519533..31c150bc1e64f 100644 --- a/pandas/tests/plotting/test_boxplot_method.py +++ b/pandas/tests/plotting/test_boxplot_method.py @@ -1,6 +1,6 @@ # coding: utf-8 -import nose +import pytest import itertools import string from distutils.version import LooseVersion @@ -28,7 +28,7 @@ def _skip_if_mpl_14_or_dev_boxplot(): # Don't need try / except since that's done at class level import matplotlib if str(matplotlib.__version__) >= LooseVersion('1.4'): - raise nose.SkipTest("Matplotlib Regression in 1.4 and current dev.") + pytest.skip("Matplotlib Regression in 1.4 and current dev.") @tm.mplskip diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index bcc9c7ceea8b5..25568f7eb61dc 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -2,7 +2,7 @@ from datetime import datetime, timedelta, date, time -import nose +import pytest from pandas.compat import lrange, zip import numpy as np @@ -161,8 +161,8 @@ def check_format_of_first_point(ax, expected_string): self.assertEqual(expected_string, ax.format_coord(first_x, first_y)) except (ValueError): - raise nose.SkipTest("skipping test because issue forming " - "test comparison GH7664") + pytest.skip("skipping test because issue forming " + "test comparison GH7664") annual = Series(1, index=date_range('2014-01-01', periods=3, freq='A-DEC')) diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index 81a54bd38b3f8..48af366f24ea4 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -2,7 +2,7 @@ """ Test cases for DataFrame.plot """ -import nose +import pytest import string import warnings @@ -1275,7 +1275,7 @@ def test_kde_missing_vals(self): def test_hist_df(self): from matplotlib.patches import Rectangle if self.mpl_le_1_2_1: - raise nose.SkipTest("not supported in matplotlib <= 1.2.x") + pytest.skip("not supported in matplotlib <= 1.2.x") df = DataFrame(randn(100, 4)) series = df[0] diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 52b85c89a7009..222165e9d3633 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -4,7 +4,7 @@ from itertools import product from distutils.version import LooseVersion -import nose +import pytest from numpy import nan import numpy as np @@ -476,8 +476,8 @@ def test_cummax_timedelta64(self): self.assert_series_equal(expected, result) def test_npdiff(self): - raise nose.SkipTest("skipping due to Series no longer being an " - "ndarray") + pytest.skip("skipping due to Series no longer being an " + "ndarray") # no longer works as the return type of np.diff is now nd.array s = Series(np.arange(5)) @@ -622,7 +622,7 @@ def test_numpy_round(self): def test_built_in_round(self): if not compat.PY3: - raise nose.SkipTest( + pytest.skip( 'build in round cannot be overriden prior to Python 3') s = Series([1.123, 2.123, 3.123], index=lrange(3)) @@ -785,8 +785,8 @@ def test_corr_rank(self): # these methods got rewritten in 0.8 if scipy.__version__ < LooseVersion('0.9'): - raise nose.SkipTest("skipping corr rank because of scipy version " - "{0}".format(scipy.__version__)) + pytest.skip("skipping corr rank because of scipy version " + "{0}".format(scipy.__version__)) # results from R A = Series( @@ -1063,8 +1063,8 @@ def test_rank_signature(self): self.assertRaises(ValueError, s.rank, 'average') def test_rank_inf(self): - raise nose.SkipTest('DataFrame.rank does not currently rank ' - 'np.inf and -np.inf properly') + pytest.skip('DataFrame.rank does not currently rank ' + 'np.inf and -np.inf properly') values = np.array( [-np.inf, -50, -1, -1e-20, -1e-25, -1e-50, 0, 1e-40, 1e-20, 1e-10, diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 702fa2acb5106..405d6c98a5d37 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -22,16 +22,16 @@ def _skip_if_no_pchip(): try: from scipy.interpolate import pchip_interpolate # noqa except ImportError: - import nose - raise nose.SkipTest('scipy.interpolate.pchip missing') + import pytest + pytest.skip('scipy.interpolate.pchip missing') def _skip_if_no_akima(): try: from scipy.interpolate import Akima1DInterpolator # noqa except ImportError: - import nose - raise nose.SkipTest('scipy.interpolate.Akima1DInterpolator missing') + import pytest + pytest.skip('scipy.interpolate.Akima1DInterpolator missing') def _simple_ts(start, end, freq='D'): diff --git a/pandas/tests/series/test_quantile.py b/pandas/tests/series/test_quantile.py index 76db6c90a685f..b8d1b92081858 100644 --- a/pandas/tests/series/test_quantile.py +++ b/pandas/tests/series/test_quantile.py @@ -1,7 +1,7 @@ # coding=utf-8 # pylint: disable-msg=E1101,W0612 -import nose +import pytest import numpy as np import pandas as pd @@ -73,7 +73,7 @@ def test_quantile_multi(self): def test_quantile_interpolation(self): # GH #10174 if _np_version_under1p9: - raise nose.SkipTest("Numpy version is under 1.9") + pytest.skip("Numpy version is under 1.9") from numpy import percentile @@ -89,7 +89,7 @@ def test_quantile_interpolation(self): def test_quantile_interpolation_dtype(self): # GH #10174 if _np_version_under1p9: - raise nose.SkipTest("Numpy version is under 1.9") + pytest.skip("Numpy version is under 1.9") from numpy import percentile @@ -105,7 +105,7 @@ def test_quantile_interpolation_dtype(self): def test_quantile_interpolation_np_lt_1p9(self): # GH #10174 if not _np_version_under1p9: - raise nose.SkipTest("Numpy version is greater than 1.9") + pytest.skip("Numpy version is greater than 1.9") from numpy import percentile diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 1d1ef1a08859c..473f1d81c9532 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -4,7 +4,7 @@ import re import sys from datetime import datetime, timedelta -import nose +import pytest import numpy as np import pandas as pd @@ -32,7 +32,7 @@ def test_string_methods_dont_fail(self): def test_tricky_container(self): if not hasattr(self, 'unicode_container'): - raise nose.SkipTest('Need unicode_container to test with this') + pytest.skip('Need unicode_container to test with this') repr(self.unicode_container) str(self.unicode_container) bytes(self.unicode_container) diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index 136786ecff0a0..0318757f76a11 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -4,7 +4,7 @@ import re import operator -import nose +import pytest from numpy.random import randn @@ -21,13 +21,7 @@ if not expr._USE_NUMEXPR: - try: - import numexpr # noqa - except ImportError: - msg = "don't have" - else: - msg = "not using" - raise nose.SkipTest("{0} numexpr".format(msg)) + numexpr = pytest.importorskip('numexpr') _frame = DataFrame(randn(10000, 4), columns=list('ABCD'), dtype='float64') _frame2 = DataFrame(randn(100, 4), columns=list('ABCD'), dtype='float64') @@ -70,9 +64,8 @@ def setUp(self): def tearDown(self): expr._MIN_ELEMENTS = self._MIN_ELEMENTS - @nose.tools.nottest - def run_arithmetic_test(self, df, other, assert_func, check_dtype=False, - test_flex=True): + def run_arithmetic(self, df, other, assert_func, check_dtype=False, + test_flex=True): expr._MIN_ELEMENTS = 0 operations = ['add', 'sub', 'mul', 'mod', 'truediv', 'floordiv', 'pow'] if not compat.PY3: @@ -109,15 +102,14 @@ def run_arithmetic_test(self, df, other, assert_func, check_dtype=False, raise def test_integer_arithmetic(self): - self.run_arithmetic_test(self.integer, self.integer, - assert_frame_equal) - self.run_arithmetic_test(self.integer.iloc[:, 0], - self.integer.iloc[:, 0], assert_series_equal, - check_dtype=True) - - @nose.tools.nottest - def run_binary_test(self, df, other, assert_func, test_flex=False, - numexpr_ops=set(['gt', 'lt', 'ge', 'le', 'eq', 'ne'])): + self.run_arithmetic(self.integer, self.integer, + assert_frame_equal) + self.run_arithmetic(self.integer.iloc[:, 0], + self.integer.iloc[:, 0], assert_series_equal, + check_dtype=True) + + def run_binary(self, df, other, assert_func, test_flex=False, + numexpr_ops=set(['gt', 'lt', 'ge', 'le', 'eq', 'ne'])): """ tests solely that the result is the same whether or not numexpr is enabled. Need to test whether the function does the correct thing @@ -151,46 +143,46 @@ def run_binary_test(self, df, other, assert_func, test_flex=False, def run_frame(self, df, other, binary_comp=None, run_binary=True, **kwargs): - self.run_arithmetic_test(df, other, assert_frame_equal, - test_flex=False, **kwargs) - self.run_arithmetic_test(df, other, assert_frame_equal, test_flex=True, - **kwargs) + self.run_arithmetic(df, other, assert_frame_equal, + test_flex=False, **kwargs) + self.run_arithmetic(df, other, assert_frame_equal, test_flex=True, + **kwargs) if run_binary: if binary_comp is None: expr.set_use_numexpr(False) binary_comp = other + 1 expr.set_use_numexpr(True) - self.run_binary_test(df, binary_comp, assert_frame_equal, - test_flex=False, **kwargs) - self.run_binary_test(df, binary_comp, assert_frame_equal, - test_flex=True, **kwargs) + self.run_binary(df, binary_comp, assert_frame_equal, + test_flex=False, **kwargs) + self.run_binary(df, binary_comp, assert_frame_equal, + test_flex=True, **kwargs) def run_series(self, ser, other, binary_comp=None, **kwargs): - self.run_arithmetic_test(ser, other, assert_series_equal, - test_flex=False, **kwargs) - self.run_arithmetic_test(ser, other, assert_almost_equal, - test_flex=True, **kwargs) + self.run_arithmetic(ser, other, assert_series_equal, + test_flex=False, **kwargs) + self.run_arithmetic(ser, other, assert_almost_equal, + test_flex=True, **kwargs) # series doesn't uses vec_compare instead of numexpr... # if binary_comp is None: # binary_comp = other + 1 - # self.run_binary_test(ser, binary_comp, assert_frame_equal, + # self.run_binary(ser, binary_comp, assert_frame_equal, # test_flex=False, **kwargs) - # self.run_binary_test(ser, binary_comp, assert_frame_equal, + # self.run_binary(ser, binary_comp, assert_frame_equal, # test_flex=True, **kwargs) def run_panel(self, panel, other, binary_comp=None, run_binary=True, assert_func=assert_panel_equal, **kwargs): - self.run_arithmetic_test(panel, other, assert_func, test_flex=False, - **kwargs) - self.run_arithmetic_test(panel, other, assert_func, test_flex=True, - **kwargs) + self.run_arithmetic(panel, other, assert_func, test_flex=False, + **kwargs) + self.run_arithmetic(panel, other, assert_func, test_flex=True, + **kwargs) if run_binary: if binary_comp is None: binary_comp = other + 1 - self.run_binary_test(panel, binary_comp, assert_func, - test_flex=False, **kwargs) - self.run_binary_test(panel, binary_comp, assert_func, - test_flex=True, **kwargs) + self.run_binary(panel, binary_comp, assert_func, + test_flex=False, **kwargs) + self.run_binary(panel, binary_comp, assert_func, + test_flex=True, **kwargs) def test_integer_arithmetic_frame(self): self.run_frame(self.integer, self.integer) @@ -234,22 +226,22 @@ def test_mixed_panel(self): binary_comp=-2) def test_float_arithemtic(self): - self.run_arithmetic_test(self.frame, self.frame, assert_frame_equal) - self.run_arithmetic_test(self.frame.iloc[:, 0], self.frame.iloc[:, 0], - assert_series_equal, check_dtype=True) + self.run_arithmetic(self.frame, self.frame, assert_frame_equal) + self.run_arithmetic(self.frame.iloc[:, 0], self.frame.iloc[:, 0], + assert_series_equal, check_dtype=True) def test_mixed_arithmetic(self): - self.run_arithmetic_test(self.mixed, self.mixed, assert_frame_equal) + self.run_arithmetic(self.mixed, self.mixed, assert_frame_equal) for col in self.mixed.columns: - self.run_arithmetic_test(self.mixed[col], self.mixed[col], - assert_series_equal) + self.run_arithmetic(self.mixed[col], self.mixed[col], + assert_series_equal) def test_integer_with_zeros(self): self.integer *= np.random.randint(0, 2, size=np.shape(self.integer)) - self.run_arithmetic_test(self.integer, self.integer, - assert_frame_equal) - self.run_arithmetic_test(self.integer.iloc[:, 0], - self.integer.iloc[:, 0], assert_series_equal) + self.run_arithmetic(self.integer, self.integer, + assert_frame_equal) + self.run_arithmetic(self.integer.iloc[:, 0], + self.integer.iloc[:, 0], assert_series_equal) def test_invalid(self): diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py index e84e2d6809e7b..28f1dc61533c1 100644 --- a/pandas/tests/test_generic.py +++ b/pandas/tests/test_generic.py @@ -2,7 +2,7 @@ # pylint: disable-msg=E1101,W0612 from operator import methodcaller -import nose +import pytest import numpy as np from numpy import nan import pandas as pd @@ -367,7 +367,7 @@ def test_head_tail(self): try: o.head() except (NotImplementedError): - raise nose.SkipTest('not implemented on {0}'.format( + pytest.skip('not implemented on {0}'.format( o.__class__.__name__)) self._compare(o.head(), o.iloc[:5]) @@ -1567,7 +1567,7 @@ class TestPanel4D(tm.TestCase, Generic): _comparator = lambda self, x, y: assert_panel4d_equal(x, y, by_blocks=True) def test_sample(self): - raise nose.SkipTest("sample on Panel4D") + pytest.skip("sample on Panel4D") def test_to_xarray(self): diff --git a/pandas/tests/test_internals.py b/pandas/tests/test_internals.py index 1dfea168c067c..f086935df6dc8 100644 --- a/pandas/tests/test_internals.py +++ b/pandas/tests/test_internals.py @@ -3,7 +3,7 @@ from datetime import datetime, date -import nose +import pytest import numpy as np import re @@ -276,7 +276,7 @@ def test_split_block_at(self): # with dup column support this method was taken out # GH3679 - raise nose.SkipTest("skipping for now") + pytest.skip("skipping for now") bs = list(self.fblock.split_block_at('a')) self.assertEqual(len(bs), 1) diff --git a/pandas/tests/test_msgpack/test_unpack.py b/pandas/tests/test_msgpack/test_unpack.py index a182c676adb3b..ae8227ab276fb 100644 --- a/pandas/tests/test_msgpack/test_unpack.py +++ b/pandas/tests/test_msgpack/test_unpack.py @@ -2,7 +2,7 @@ import sys from pandas.msgpack import Unpacker, packb, OutOfData, ExtType import pandas.util.testing as tm -import nose +import pytest class TestUnpack(tm.TestCase): @@ -19,7 +19,7 @@ def test_unpack_array_header_from_file(self): def test_unpacker_hook_refcnt(self): if not hasattr(sys, 'getrefcount'): - raise nose.SkipTest('no sys.getrefcount()') + pytest.skip('no sys.getrefcount()') result = [] def hook(x): diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 1fe2d701f5a41..8e0628eefa392 100755 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -3,7 +3,7 @@ from warnings import catch_warnings import datetime import itertools -import nose +import pytest from numpy.random import randn import numpy as np @@ -1733,7 +1733,7 @@ def test_getitem_lowerdim_corner(self): # AMBIGUOUS CASES! def test_partial_ix_missing(self): - raise nose.SkipTest("skipping for now") + pytest.skip("skipping for now") result = self.ymd.loc[2000, 0] expected = self.ymd.loc[2000]['A'] diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 4f56419b1323a..2f329f241a5b8 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -4,7 +4,7 @@ from datetime import datetime import operator -import nose +import pytest import numpy as np import pandas as pd @@ -97,7 +97,7 @@ def test_skew(self): try: from scipy.stats import skew except ImportError: - raise nose.SkipTest("no scipy.stats.skew") + pytest.skip("no scipy.stats.skew") def this_skew(x): if len(x) < 3: @@ -2059,7 +2059,7 @@ def test_to_excel(self): import openpyxl # noqa from pandas.io.excel import ExcelFile except ImportError: - raise nose.SkipTest("need xlwt xlrd openpyxl") + pytest.skip("need xlwt xlrd openpyxl") for ext in ['xls', 'xlsx']: with ensure_clean('__tmp__.' + ext) as path: @@ -2067,7 +2067,7 @@ def test_to_excel(self): try: reader = ExcelFile(path) except ImportError: - raise nose.SkipTest("need xlwt xlrd openpyxl") + pytest.skip("need xlwt xlrd openpyxl") for item, df in self.panel.iteritems(): recdf = reader.parse(str(item), index_col=0) @@ -2079,14 +2079,14 @@ def test_to_excel_xlsxwriter(self): import xlsxwriter # noqa from pandas.io.excel import ExcelFile except ImportError: - raise nose.SkipTest("Requires xlrd and xlsxwriter. Skipping test.") + pytest.skip("Requires xlrd and xlsxwriter. Skipping test.") with ensure_clean('__tmp__.xlsx') as path: self.panel.to_excel(path, engine='xlsxwriter') try: reader = ExcelFile(path) except ImportError as e: - raise nose.SkipTest("cannot write excel file: %s" % e) + pytest.skip("cannot write excel file: %s" % e) for item, df in self.panel.iteritems(): recdf = reader.parse(str(item), index_col=0) diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py index 96864c626ba7f..902b42e7d77d7 100644 --- a/pandas/tests/test_panel4d.py +++ b/pandas/tests/test_panel4d.py @@ -2,7 +2,7 @@ from datetime import datetime from pandas.compat import range, lrange import operator -import nose +import pytest import numpy as np @@ -66,7 +66,7 @@ def test_skew(self): try: from scipy.stats import skew except ImportError: - raise nose.SkipTest("no scipy.stats.skew") + pytest.skip("no scipy.stats.skew") def this_skew(x): if len(x) < 3: diff --git a/pandas/tests/test_testing.py b/pandas/tests/test_testing.py index 466e9ee5a30b8..07bfdc8fc9078 100644 --- a/pandas/tests/test_testing.py +++ b/pandas/tests/test_testing.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- import pandas as pd import unittest -import nose +import pytest import numpy as np import sys from pandas import Series, DataFrame @@ -10,8 +10,7 @@ raise_with_traceback, assert_index_equal, assert_series_equal, assert_frame_equal, assert_numpy_array_equal, - RNGContext, assertRaises, - skip_if_no_package_deco) + RNGContext) from pandas.compat import is_platform_windows # let's get meta. @@ -167,8 +166,8 @@ class TestAssertNumpyArrayEqual(tm.TestCase): def test_numpy_array_equal_message(self): if is_platform_windows(): - raise nose.SkipTest("windows has incomparable line-endings " - "and uses L on the shape") + pytest.skip("windows has incomparable line-endings " + "and uses L on the shape") expected = """numpy array are different @@ -295,8 +294,8 @@ def test_numpy_array_equal_message(self): def test_numpy_array_equal_object_message(self): if is_platform_windows(): - raise nose.SkipTest("windows has incomparable line-endings " - "and uses L on the shape") + pytest.skip("windows has incomparable line-endings " + "and uses L on the shape") a = np.array([pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-01')]) b = np.array([pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')]) @@ -772,27 +771,9 @@ class TestLocale(tm.TestCase): def test_locale(self): if sys.platform == 'win32': - raise nose.SkipTest( + pytest.skip( "skipping on win platforms as locale not available") # GH9744 locales = tm.get_locales() self.assertTrue(len(locales) >= 1) - - -def test_skiptest_deco(): - from nose import SkipTest - - @skip_if_no_package_deco("fakepackagename") - def f(): - pass - with assertRaises(SkipTest): - f() - - @skip_if_no_package_deco("numpy") - def f(): - pass - # hack to ensure that SkipTest is *not* raised - with assertRaises(ValueError): - f() - raise ValueError diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 48861fc6a9528..3add568c1ea99 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -1,9 +1,8 @@ from itertools import product -import nose +import pytest import sys import warnings -from nose.tools import assert_raises from datetime import datetime from numpy.random import randn import numpy as np @@ -726,7 +725,8 @@ def check_dtypes(self, f, f_name, d, d_name, exp): else: # other methods not Implemented ATM - assert_raises(NotImplementedError, f, roll) + with pytest.raises(NotImplementedError): + f(roll) class TestDtype_timedelta(DatetimeLike): @@ -741,8 +741,8 @@ class TestDtype_datetime64UTC(DatetimeLike): dtype = 'datetime64[ns, UTC]' def _create_data(self): - raise nose.SkipTest("direct creation of extension dtype " - "datetime64[ns, UTC] is not supported ATM") + pytest.skip("direct creation of extension dtype " + "datetime64[ns, UTC] is not supported ATM") class TestMoments(Base): @@ -1160,7 +1160,7 @@ def test_rolling_skew(self): try: from scipy.stats import skew except ImportError: - raise nose.SkipTest('no scipy') + pytest.skip('no scipy') self._check_moment_func(mom.rolling_skew, lambda x: skew(x, bias=False), name='skew') @@ -1168,14 +1168,14 @@ def test_rolling_kurt(self): try: from scipy.stats import kurtosis except ImportError: - raise nose.SkipTest('no scipy') + pytest.skip('no scipy') self._check_moment_func(mom.rolling_kurt, lambda x: kurtosis(x, bias=False), name='kurt') def test_fperr_robustness(self): # TODO: remove this once python 2.5 out of picture if PY3: - raise nose.SkipTest("doesn't work on python 3") + pytest.skip("doesn't work on python 3") # #2114 data = '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1a@\xaa\xaa\xaa\xaa\xaa\xaa\x02@8\x8e\xe38\x8e\xe3\xe8?z\t\xed%\xb4\x97\xd0?\xa2\x0c<\xdd\x9a\x1f\xb6?\x82\xbb\xfa&y\x7f\x9d?\xac\'\xa7\xc4P\xaa\x83?\x90\xdf\xde\xb0k8j?`\xea\xe9u\xf2zQ?*\xe37\x9d\x98N7?\xe2.\xf5&v\x13\x1f?\xec\xc9\xf8\x19\xa4\xb7\x04?\x90b\xf6w\x85\x9f\xeb>\xb5A\xa4\xfaXj\xd2>F\x02\xdb\xf8\xcb\x8d\xb8>.\xac<\xfb\x87^\xa0>\xe8:\xa6\xf9_\xd3\x85>\xfb?\xe2cUU\xfd?\xfc\x7fA\xed8\x8e\xe3?\xa5\xaa\xac\x91\xf6\x12\xca?n\x1cs\xb6\xf9a\xb1?\xe8%D\xf3L-\x97?5\xddZD\x11\xe7~?#>\xe7\x82\x0b\x9ad?\xd9R4Y\x0fxK?;7x;\nP2?N\xf4JO\xb8j\x18?4\xf81\x8a%G\x00?\x9a\xf5\x97\r2\xb4\xe5>\xcd\x9c\xca\xbcB\xf0\xcc>3\x13\x87(\xd7J\xb3>\x99\x19\xb4\xe0\x1e\xb9\x99>ff\xcd\x95\x14&\x81>\x88\x88\xbc\xc7p\xddf>`\x0b\xa6_\x96|N>@\xb2n\xea\x0eS4>U\x98\x938i\x19\x1b>\x8eeb\xd0\xf0\x10\x02>\xbd\xdc-k\x96\x16\xe8=(\x93\x1e\xf2\x0e\x0f\xd0=\xe0n\xd3Bii\xb5=*\xe9\x19Y\x8c\x8c\x9c=\xc6\xf0\xbb\x90]\x08\x83=]\x96\xfa\xc0|`i=>d\xfc\xd5\xfd\xeaP=R0\xfb\xc7\xa7\x8e6=\xc2\x95\xf9_\x8a\x13\x1e=\xd6c\xa6\xea\x06\r\x04=r\xda\xdd8\t\xbc\xea<\xf6\xe6\x93\xd0\xb0\xd2\xd1<\x9d\xdeok\x96\xc3\xb7<&~\xea9s\xaf\x9f\xb8\x02@\xc6\xd2&\xfd\xa8\xf5\xe8?\xd9\xe1\x19\xfe\xc5\xa3\xd0?v\x82"\xa8\xb2/\xb6?\x9dX\x835\xee\x94\x9d?h\x90W\xce\x9e\xb8\x83?\x8a\xc0th~Kj?\\\x80\xf8\x9a\xa9\x87Q?%\xab\xa0\xce\x8c_7?1\xe4\x80\x13\x11*\x1f? \x98\x00\r\xb6\xc6\x04?\x80u\xabf\x9d\xb3\xeb>UNrD\xbew\xd2>\x1c\x13C[\xa8\x9f\xb8>\x12b\xd7m-\x1fQ@\xe3\x85>\xe6\x91)l\x00/m>Da\xc6\xf2\xaatS>\x05\xd7]\xee\xe3\xf09>' # noqa diff --git a/pandas/tests/tseries/test_converter.py b/pandas/tests/tseries/test_converter.py index b934aaed7d41f..5351e26f0e62b 100644 --- a/pandas/tests/tseries/test_converter.py +++ b/pandas/tests/tseries/test_converter.py @@ -1,3 +1,4 @@ +import pytest from datetime import datetime, date import numpy as np @@ -7,11 +8,7 @@ from pandas.tseries.offsets import Second, Milli, Micro, Day from pandas.compat.numpy import np_datetime64_compat -try: - import pandas.tseries.converter as converter -except ImportError: - import nose - raise nose.SkipTest("no pandas.tseries.converter, skipping") +converter = pytest.importorskip('pandas.tseries.converter') def test_timtetonum_accepts_unicode(): diff --git a/pandas/tests/tseries/test_offsets.py b/pandas/tests/tseries/test_offsets.py index 7c5a4c3df28b2..dfa1e94e4dc11 100644 --- a/pandas/tests/tseries/test_offsets.py +++ b/pandas/tests/tseries/test_offsets.py @@ -3,8 +3,7 @@ from datetime import date, datetime, timedelta from dateutil.relativedelta import relativedelta -import nose -from nose.tools import assert_raises +import pytest from pandas.compat import range, iteritems from pandas import compat @@ -59,7 +58,8 @@ def test_ole2datetime(): actual = ole2datetime(60000) assert actual == datetime(2064, 4, 8) - assert_raises(ValueError, ole2datetime, 60) + with pytest.raises(ValueError): + ole2datetime(60) def test_to_datetime1(): @@ -159,7 +159,7 @@ def test_apply_out_of_range(self): except (tslib.OutOfBoundsDatetime): raise except (ValueError, KeyError) as e: - raise nose.SkipTest( + pytest.skip( "cannot create out_of_range offset: {0} {1}".format( str(self).split('.')[-1], e)) diff --git a/pandas/tools/tests/test_util.py b/pandas/tools/tests/test_util.py index 0716a13fac3fe..2672db13a959f 100644 --- a/pandas/tools/tests/test_util.py +++ b/pandas/tools/tests/test_util.py @@ -1,7 +1,7 @@ import os import locale import codecs -import nose +import pytest import decimal import numpy as np @@ -68,7 +68,7 @@ def setUpClass(cls): cls.locales = tm.get_locales() if not cls.locales: - raise nose.SkipTest("No locales found") + pytest.skip("No locales found") tm._skip_if_windows() @@ -83,20 +83,20 @@ def test_get_locales(self): def test_get_locales_prefix(self): if len(self.locales) == 1: - raise nose.SkipTest("Only a single locale found, no point in " - "trying to test filtering locale prefixes") + pytest.skip("Only a single locale found, no point in " + "trying to test filtering locale prefixes") first_locale = self.locales[0] assert len(tm.get_locales(prefix=first_locale[:2])) > 0 def test_set_locale(self): if len(self.locales) == 1: - raise nose.SkipTest("Only a single locale found, no point in " - "trying to test setting another locale") + pytest.skip("Only a single locale found, no point in " + "trying to test setting another locale") if all(x is None for x in CURRENT_LOCALE): # Not sure why, but on some travis runs with pytest, # getlocale() returned (None, None). - raise nose.SkipTest("CURRENT_LOCALE is not set.") + pytest.skip("CURRENT_LOCALE is not set.") if LOCALE_OVERRIDE is None: lang, enc = 'it_CH', 'UTF-8' @@ -456,7 +456,7 @@ def test_downcast_limits(self): # Test the limits of each downcast. Bug: #14401. # Check to make sure numpy is new enough to run this test. if _np_version_under1p9: - raise nose.SkipTest("Numpy version is under 1.9") + pytest.skip("Numpy version is under 1.9") i = 'integer' u = 'unsigned' diff --git a/pandas/util/decorators.py b/pandas/util/decorators.py index 85d77c2f6f57c..1b501eb1d9bda 100644 --- a/pandas/util/decorators.py +++ b/pandas/util/decorators.py @@ -206,65 +206,6 @@ def wrapped(*args, **kwargs): return wrapped -class KnownFailureTest(Exception): - """Raise this exception to mark a test as a known failing test.""" - pass - - -def knownfailureif(fail_condition, msg=None): - """ - Make function raise KnownFailureTest exception if given condition is true. - - If the condition is a callable, it is used at runtime to dynamically - make the decision. This is useful for tests that may require costly - imports, to delay the cost until the test suite is actually executed. - - Parameters - ---------- - fail_condition : bool or callable - Flag to determine whether to mark the decorated test as a known - failure (if True) or not (if False). - msg : str, optional - Message to give on raising a KnownFailureTest exception. - Default is None. - - Returns - ------- - decorator : function - Decorator, which, when applied to a function, causes SkipTest - to be raised when `skip_condition` is True, and the function - to be called normally otherwise. - - Notes - ----- - The decorator itself is decorated with the ``nose.tools.make_decorator`` - function in order to transmit function name, and various other metadata. - - """ - if msg is None: - msg = 'Test skipped due to known failure' - - # Allow for both boolean or callable known failure conditions. - if callable(fail_condition): - fail_val = fail_condition - else: - fail_val = lambda: fail_condition - - def knownfail_decorator(f): - # Local import to avoid a hard nose dependency and only incur the - # import time overhead at actual test-time. - import nose - - def knownfailer(*args, **kwargs): - if fail_val(): - raise KnownFailureTest(msg) - else: - return f(*args, **kwargs) - return nose.tools.make_decorator(f)(knownfailer) - - return knownfail_decorator - - def make_signature(func): """ Returns a string repr of the arg list of a func call, with any defaults diff --git a/pandas/util/print_versions.py b/pandas/util/print_versions.py index c3962ad9c823c..7c5148caf7e74 100644 --- a/pandas/util/print_versions.py +++ b/pandas/util/print_versions.py @@ -63,7 +63,7 @@ def show_versions(as_json=False): deps = [ # (MODULE_NAME, f(mod) -> mod version) ("pandas", lambda mod: mod.__version__), - ("nose", lambda mod: mod.__version__), + ("pytest", lambda mod: mod.__version__), ("pip", lambda mod: mod.__version__), ("setuptools", lambda mod: mod.__version__), ("Cython", lambda mod: mod.__version__), diff --git a/pandas/util/testing.py b/pandas/util/testing.py index c3633c945f60a..566ceec027b2b 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -248,9 +248,9 @@ def close(fignum=None): def _skip_if_32bit(): - import nose + import pytest if is_platform_32bit(): - raise nose.SkipTest("skipping for 32 bit") + pytest.skip("skipping for 32 bit") def mplskip(cls): @@ -262,8 +262,8 @@ def setUpClass(cls): import matplotlib as mpl mpl.use("Agg", warn=False) except ImportError: - import nose - raise nose.SkipTest("matplotlib not installed") + import pytest + pytest.skip("matplotlib not installed") cls.setUpClass = setUpClass return cls @@ -273,102 +273,102 @@ def _skip_if_no_mpl(): try: import matplotlib # noqa except ImportError: - import nose - raise nose.SkipTest("matplotlib not installed") + import pytest + pytest.skip("matplotlib not installed") def _skip_if_mpl_1_5(): import matplotlib v = matplotlib.__version__ if v > LooseVersion('1.4.3') or v[0] == '0': - import nose - raise nose.SkipTest("matplotlib 1.5") + import pytest + pytest.skip("matplotlib 1.5") def _skip_if_no_scipy(): try: import scipy.stats # noqa except ImportError: - import nose - raise nose.SkipTest("no scipy.stats module") + import pytest + pytest.skip("no scipy.stats module") try: import scipy.interpolate # noqa except ImportError: - import nose - raise nose.SkipTest('scipy.interpolate missing') + import pytest + pytest.skip('scipy.interpolate missing') def _skip_if_scipy_0_17(): import scipy v = scipy.__version__ if v >= LooseVersion("0.17.0"): - import nose - raise nose.SkipTest("scipy 0.17") + import pytest + pytest.skip("scipy 0.17") def _skip_if_no_lzma(): try: return compat.import_lzma() except ImportError: - import nose - raise nose.SkipTest('need backports.lzma to run') + import pytest + pytest.skip('need backports.lzma to run') def _skip_if_no_xarray(): try: import xarray except ImportError: - import nose - raise nose.SkipTest("xarray not installed") + import pytest + pytest.skip("xarray not installed") v = xarray.__version__ if v < LooseVersion('0.7.0'): - import nose - raise nose.SkipTest("xarray not version is too low: {0}".format(v)) + import pytest + pytest.skip("xarray not version is too low: {0}".format(v)) def _skip_if_no_pytz(): try: import pytz # noqa except ImportError: - import nose - raise nose.SkipTest("pytz not installed") + import pytest + pytest.skip("pytz not installed") def _skip_if_no_dateutil(): try: import dateutil # noqa except ImportError: - import nose - raise nose.SkipTest("dateutil not installed") + import pytest + pytest.skip("dateutil not installed") def _skip_if_windows_python_3(): if PY3 and is_platform_windows(): - import nose - raise nose.SkipTest("not used on python 3/win32") + import pytest + pytest.skip("not used on python 3/win32") def _skip_if_windows(): if is_platform_windows(): - import nose - raise nose.SkipTest("Running on Windows") + import pytest + pytest.skip("Running on Windows") def _skip_if_no_pathlib(): try: from pathlib import Path # noqa except ImportError: - import nose - raise nose.SkipTest("pathlib not available") + import pytest + pytest.skip("pathlib not available") def _skip_if_no_localpath(): try: from py.path import local as LocalPath # noqa except ImportError: - import nose - raise nose.SkipTest("py.path not installed") + import pytest + pytest.skip("py.path not installed") def _incompat_bottleneck_version(method): @@ -392,27 +392,27 @@ def skip_if_no_ne(engine='numexpr'): if engine == 'numexpr': if not _USE_NUMEXPR: - import nose - raise nose.SkipTest("numexpr enabled->{enabled}, " - "installed->{installed}".format( - enabled=_USE_NUMEXPR, - installed=_NUMEXPR_INSTALLED)) + import pytest + pytest.skip("numexpr enabled->{enabled}, " + "installed->{installed}".format( + enabled=_USE_NUMEXPR, + installed=_NUMEXPR_INSTALLED)) def _skip_if_has_locale(): import locale lang, _ = locale.getlocale() if lang is not None: - import nose - raise nose.SkipTest("Specific locale is set {0}".format(lang)) + import pytest + pytest.skip("Specific locale is set {0}".format(lang)) def _skip_if_not_us_locale(): import locale lang, _ = locale.getlocale() if lang != 'en_US': - import nose - raise nose.SkipTest("Specific locale is set {0}".format(lang)) + import pytest + pytest.skip("Specific locale is set {0}".format(lang)) # ----------------------------------------------------------------------------- # locale utilities @@ -662,8 +662,8 @@ def ensure_clean(filename=None, return_filelike=False): try: fd, filename = tempfile.mkstemp(suffix=filename) except UnicodeEncodeError: - import nose - raise nose.SkipTest('no unicode file names on this system') + import pytest + pytest.skip('no unicode file names on this system') try: yield filename @@ -1997,9 +1997,7 @@ def __init__(self, *args, **kwargs): # Dependency checks. Copied this from Nipy/Nipype (Copyright of # respective developers, license: BSD-3) -def package_check(pkg_name, version=None, app='pandas', checker=LooseVersion, - exc_failed_import=ImportError, - exc_failed_check=RuntimeError): +def package_check(pkg_name, version=None, app='pandas', checker=LooseVersion): """Check that the minimal version of the required package is installed. Parameters @@ -2015,10 +2013,6 @@ def package_check(pkg_name, version=None, app='pandas', checker=LooseVersion, checker : object, optional The class that will perform the version checking. Default is distutils.version.LooseVersion. - exc_failed_import : Exception, optional - Class of the exception to be thrown if import failed. - exc_failed_check : Exception, optional - Class of the exception to be thrown if version check failed. Examples -------- @@ -2027,6 +2021,7 @@ def package_check(pkg_name, version=None, app='pandas', checker=LooseVersion, """ + import pytest if app: msg = '%s requires %s' % (app, pkg_name) else: @@ -2036,46 +2031,24 @@ def package_check(pkg_name, version=None, app='pandas', checker=LooseVersion, try: mod = __import__(pkg_name) except ImportError: - raise exc_failed_import(msg) - if not version: - return + mod = None try: have_version = mod.__version__ except AttributeError: - raise exc_failed_check('Cannot find version for %s' % pkg_name) - if checker(have_version) < checker(version): - raise exc_failed_check(msg) + pytest.skip('Cannot find version for %s' % pkg_name) + if version and checker(have_version) < checker(version): + pytest.skip(msg) def skip_if_no_package(*args, **kwargs): - """Raise SkipTest if package_check fails + """pytest.skip() if package_check fails Parameters ---------- *args Positional parameters passed to `package_check` *kwargs Keyword parameters passed to `package_check` """ - from nose import SkipTest - package_check(exc_failed_import=SkipTest, - exc_failed_check=SkipTest, - *args, **kwargs) - - -def skip_if_no_package_deco(pkg_name, version=None, app='pandas'): - from nose import SkipTest - - def deco(func): - @wraps(func) - def wrapper(*args, **kwargs): - package_check(pkg_name, version=version, app=app, - exc_failed_import=SkipTest, - exc_failed_check=SkipTest) - return func(*args, **kwargs) - return wrapper - return deco -# -# Additional tags decorators for nose -# + package_check(*args, **kwargs) def optional_args(decorator): @@ -2255,18 +2228,17 @@ def network(t, url="http://www.google.com", >>> test_something() Traceback (most recent call last): ... - SkipTest Errors not related to networking will always be raised. """ - from nose import SkipTest + from pytest import skip t.network = True @wraps(t) def wrapper(*args, **kwargs): if check_before_test and not raise_on_error: if not can_connect(url, error_classes): - raise SkipTest + skip() try: return t(*args, **kwargs) except Exception as e: @@ -2275,8 +2247,8 @@ def wrapper(*args, **kwargs): errno = getattr(e.reason, 'errno', None) if errno in skip_errnos: - raise SkipTest("Skipping test due to known errno" - " and error %s" % e) + skip("Skipping test due to known errno" + " and error %s" % e) try: e_str = traceback.format_exc(e) @@ -2284,8 +2256,8 @@ def wrapper(*args, **kwargs): e_str = str(e) if any([m.lower() in e_str.lower() for m in _skip_on_messages]): - raise SkipTest("Skipping test because exception " - "message is known and error %s" % e) + skip("Skipping test because exception " + "message is known and error %s" % e) if not isinstance(e, error_classes): raise @@ -2293,8 +2265,8 @@ def wrapper(*args, **kwargs): if raise_on_error or can_connect(url, error_classes): raise else: - raise SkipTest("Skipping test due to lack of connectivity" - " and error %s" % e) + skip("Skipping test due to lack of connectivity" + " and error %s" % e) return wrapper @@ -2775,8 +2747,8 @@ def set_timezone(tz): 'EDT' """ if is_platform_windows(): - import nose - raise nose.SkipTest("timezone setting not supported on windows") + import pytest + pytest.skip("timezone setting not supported on windows") import os import time diff --git a/setup.cfg b/setup.cfg index 143470f7ee350..45d98dd733f1f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -12,7 +12,7 @@ tag_prefix = v parentdir_prefix = pandas- [flake8] -ignore = E731 +ignore = E731,E402 [yapf] based_on_style = pep8 From 2f971a23a67ef9bc51453d94ae7b9626e12be006 Mon Sep 17 00:00:00 2001 From: "John W. O'Brien" Date: Sat, 11 Feb 2017 21:21:56 -0500 Subject: [PATCH 045/353] BUG: Avoid grafting missing examples directory (#15373) --- MANIFEST.in | 1 - 1 file changed, 1 deletion(-) diff --git a/MANIFEST.in b/MANIFEST.in index 2d26fbfd6adaf..b7a7e6039ac9a 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -7,7 +7,6 @@ include setup.py graft doc prune doc/build -graft examples graft pandas global-exclude *.so From 1bad601641cc024cc4d0c1215b12c9d0066b8103 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 11 Feb 2017 21:53:44 -0500 Subject: [PATCH 046/353] CLN: remove pandas/io/auth.py, from ga.py (now removed) (#15374) --- pandas/io/auth.py | 126 ---------------------------------------------- 1 file changed, 126 deletions(-) delete mode 100644 pandas/io/auth.py diff --git a/pandas/io/auth.py b/pandas/io/auth.py deleted file mode 100644 index e42df6a7309b7..0000000000000 --- a/pandas/io/auth.py +++ /dev/null @@ -1,126 +0,0 @@ -from __future__ import print_function -# see LICENSES directory for copyright and license -import os -import sys -import logging - -import httplib2 - -import apiclient.discovery as gapi -import gflags -import oauth2client.file as auth_file -import oauth2client.client as oauth -import oauth2client.tools as tools -OOB_CALLBACK_URN = oauth.OOB_CALLBACK_URN - - -class AuthenticationConfigError(ValueError): - pass - -FLOWS = {} -FLAGS = gflags.FLAGS -DEFAULT_SECRETS = os.path.join( - os.path.dirname(__file__), 'client_secrets.json') -DEFAULT_SCOPE = 'https://www.googleapis.com/auth/analytics.readonly' -DEFAULT_TOKEN_FILE = os.path.join(os.path.dirname(__file__), 'analytics.dat') -MISSING_CLIENT_MSG = """ -WARNING: Please configure OAuth 2.0 - -You need to populate the client_secrets.json file found at: - - %s - -with information from the APIs Console -. - -""" -DOC_URL = ('https://developers.google.com/api-client-library/python/guide/' - 'aaa_client_secrets') - -gflags.DEFINE_enum('logging_level', 'ERROR', - ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], - 'Set the level of logging detail.') - -# Name of file that will store the access and refresh tokens to access -# the API without having to login each time. Make sure this file is in -# a secure place. - - -def process_flags(flags=None): - """Uses the command-line flags to set the logging level. - - Args: - argv: List of command line arguments passed to the python script. - """ - if flags is None: - flags = [] - - # Let the gflags module process the command-line arguments. - try: - FLAGS(flags) - except gflags.FlagsError as e: - print('%s\nUsage: %s ARGS\n%s' % (e, str(flags), FLAGS)) - sys.exit(1) - - # Set the logging according to the command-line flag. - logging.getLogger().setLevel(getattr(logging, FLAGS.logging_level)) - - -def get_flow(secret, scope, redirect): - """ - Retrieve an authentication flow object based on the given - configuration in the secret file name, the authentication scope, - and a redirect URN - """ - key = (secret, scope, redirect) - flow = FLOWS.get(key, None) - if flow is None: - msg = MISSING_CLIENT_MSG % secret - if not os.path.exists(secret): - raise AuthenticationConfigError(msg) - flow = oauth.flow_from_clientsecrets(secret, scope, - redirect_uri=redirect, - message=msg) - FLOWS[key] = flow - return flow - - -def make_token_store(fpath=None): - """create token storage from give file name""" - if fpath is None: - fpath = DEFAULT_TOKEN_FILE - return auth_file.Storage(fpath) - - -def authenticate(flow, storage=None): - """ - Try to retrieve a valid set of credentials from the token store if possible - Otherwise use the given authentication flow to obtain new credentials - and return an authenticated http object - - Parameters - ---------- - flow : authentication workflow - storage: token storage, default None - """ - http = httplib2.Http() - - # Prepare credentials, and authorize HTTP object with them. - credentials = storage.get() - if credentials is None or credentials.invalid: - credentials = tools.run(flow, storage) - - http = credentials.authorize(http) - return http - - -def init_service(http): - """ - Use the given http object to build the analytics service object - """ - return gapi.build('analytics', 'v3', http=http) - - -def reset_default_token_store(): - import os - os.remove(DEFAULT_TOKEN_FILE) From 5fb5228988832ff0328c4d830cb4e2609b882ab1 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 12 Feb 2017 09:50:55 -0500 Subject: [PATCH 047/353] TST: consolidate remaining tests under pandas.tests move all remaining tests so that ALL tests are now under pandas/tests Author: Jeff Reback Closes #15371 from jreback/tests and squashes the following commits: 43039e4 [Jeff Reback] add in data 118127b [Jeff Reback] wip bfa6a9c [Jeff Reback] fix data locations 79a79e6 [Jeff Reback] fix import 57437bf [Jeff Reback] fixes b407586 [Jeff Reback] move io e13bfe3 [Jeff Reback] move tools 0194e31 [Jeff Reback] move computation 0e6bcb4 [Jeff Reback] rename test_msgpack -> msgpack c5e4ab8 [Jeff Reback] move sparse 42e60e2 [Jeff Reback] move api tests --- pandas/{api/tests => tests/api}/__init__.py | 0 pandas/{api/tests => tests/api}/test_api.py | 2 +- .../tests => tests/computation}/__init__.py | 0 .../computation}/test_compat.py | 0 .../tests => tests/computation}/test_eval.py | 0 pandas/tests/indexes/datetimes/test_ops.py | 3 +- pandas/{io/tests => tests/io}/__init__.py | 0 .../{io/tests => tests/io}/data/S4_EDUC1.dta | Bin .../{io/tests => tests/io}/data/banklist.csv | 0 .../{io/tests => tests/io}/data/banklist.html | 0 pandas/{io/tests => tests/io}/data/blank.xls | Bin pandas/{io/tests => tests/io}/data/blank.xlsm | Bin pandas/{io/tests => tests/io}/data/blank.xlsx | Bin .../io}/data/blank_with_header.xls | Bin .../io}/data/blank_with_header.xlsm | Bin .../io}/data/blank_with_header.xlsx | Bin .../io}/data/categorical_0_14_1.pickle | 0 .../io}/data/categorical_0_15_2.pickle | Bin .../io}/data/computer_sales_page.html | 0 .../tests => tests/io}/data/gbq_fake_job.txt | 0 .../data/html_encoding/chinese_utf-16.html | Bin .../data/html_encoding/chinese_utf-32.html | Bin .../io}/data/html_encoding/chinese_utf-8.html | 0 .../io}/data/html_encoding/letz_latin1.html | 0 pandas/{io/tests => tests/io}/data/iris.csv | 0 .../io}/data/legacy_hdf/datetimetz_object.h5 | Bin .../io}/data/legacy_hdf/legacy.h5 | Bin .../io}/data/legacy_hdf/legacy_0.10.h5 | Bin .../io}/data/legacy_hdf/legacy_table.h5 | Bin .../io}/data/legacy_hdf/legacy_table_0.11.h5 | Bin .../io}/data/legacy_hdf/pytables_native.h5 | Bin .../io}/data/legacy_hdf/pytables_native2.h5 | Bin .../0.16.0/0.16.0_x86_64_darwin_2.7.9.msgpack | Bin .../0.16.2_AMD64_windows_2.7.10.msgpack | Bin .../0.16.2/0.16.2_AMD64_windows_3.4.3.msgpack | Bin .../0.16.2_x86_64_darwin_2.7.10.msgpack | Bin .../0.16.2/0.16.2_x86_64_darwin_2.7.9.msgpack | Bin .../0.16.2/0.16.2_x86_64_darwin_3.4.3.msgpack | Bin .../0.16.2/0.16.2_x86_64_linux_2.7.10.msgpack | Bin .../0.16.2/0.16.2_x86_64_linux_3.4.3.msgpack | Bin .../0.17.0_AMD64_windows_2.7.11.msgpack | Bin .../0.17.0/0.17.0_AMD64_windows_3.4.4.msgpack | Bin .../0.17.0_x86_64_darwin_2.7.11.msgpack | Bin .../0.17.0/0.17.0_x86_64_darwin_3.4.4.msgpack | Bin .../0.17.0/0.17.0_x86_64_linux_2.7.11.msgpack | Bin .../0.17.0/0.17.0_x86_64_linux_3.4.4.msgpack | Bin .../0.17.1_AMD64_windows_2.7.11.msgpack | Bin .../0.17.0/0.17.1_AMD64_windows_3.5.1.msgpack | Bin .../0.17.1_AMD64_windows_2.7.11.msgpack | Bin .../0.17.1/0.17.1_AMD64_windows_3.5.1.msgpack | Bin .../0.17.1_x86_64_darwin_2.7.11.msgpack | Bin .../0.17.1/0.17.1_x86_64_darwin_3.5.1.msgpack | Bin .../0.17.1/0.17.1_x86_64_linux_2.7.11.msgpack | Bin .../0.17.1/0.17.1_x86_64_linux_3.4.4.msgpack | Bin .../0.18.0_AMD64_windows_2.7.11.msgpack | Bin .../0.18.0/0.18.0_AMD64_windows_3.5.1.msgpack | Bin .../0.18.0_x86_64_darwin_2.7.11.msgpack | Bin .../0.18.0/0.18.0_x86_64_darwin_3.5.1.msgpack | Bin .../0.18.1_x86_64_darwin_2.7.12.msgpack | Bin .../0.18.1/0.18.1_x86_64_darwin_3.5.2.msgpack | Bin .../0.10.1/AMD64_windows_2.7.3.pickle | Bin .../0.10.1/x86_64_linux_2.7.3.pickle | Bin .../0.11.0/0.11.0_x86_64_linux_3.3.0.pickle | Bin .../0.11.0/x86_64_linux_2.7.3.pickle | Bin .../0.11.0/x86_64_linux_3.3.0.pickle | Bin .../0.12.0/0.12.0_AMD64_windows_2.7.3.pickle | Bin .../0.12.0/0.12.0_x86_64_linux_2.7.3.pickle | Bin .../0.13.0/0.13.0_AMD64_windows_2.7.3.pickle | Bin .../0.13.0/0.13.0_i686_linux_2.6.5.pickle | Bin .../0.13.0/0.13.0_i686_linux_2.7.3.pickle | Bin .../0.13.0/0.13.0_i686_linux_3.2.3.pickle | Bin .../0.13.0/0.13.0_x86_64_darwin_2.7.5.pickle | Bin .../0.13.0/0.13.0_x86_64_darwin_2.7.6.pickle | Bin .../0.13.0/0.13.0_x86_64_linux_2.7.3.pickle | Bin .../0.13.0/0.13.0_x86_64_linux_2.7.8.pickle | Bin .../0.13.0/0.13.0_x86_64_linux_3.3.0.pickle | Bin .../0.14.0/0.14.0_x86_64_darwin_2.7.6.pickle | Bin .../0.14.0/0.14.0_x86_64_linux_2.7.8.pickle | Bin .../0.14.1/0.14.1_x86_64_darwin_2.7.12.pickle | Bin .../0.14.1/0.14.1_x86_64_linux_2.7.8.pickle | Bin .../0.15.0/0.15.0_x86_64_darwin_2.7.12.pickle | Bin .../0.15.0/0.15.0_x86_64_linux_2.7.8.pickle | Bin .../0.15.2/0.15.2_x86_64_darwin_2.7.9.pickle | Bin .../0.16.0/0.16.0_x86_64_darwin_2.7.9.pickle | Bin .../0.16.2/0.16.2_AMD64_windows_2.7.10.pickle | Bin .../0.16.2/0.16.2_AMD64_windows_3.4.3.pickle | Bin .../0.16.2/0.16.2_x86_64_darwin_2.7.10.pickle | Bin .../0.16.2/0.16.2_x86_64_darwin_2.7.9.pickle | Bin .../0.16.2/0.16.2_x86_64_darwin_3.4.3.pickle | Bin .../0.16.2/0.16.2_x86_64_linux_2.7.10.pickle | Bin .../0.16.2/0.16.2_x86_64_linux_3.4.3.pickle | Bin .../0.17.0/0.17.0_AMD64_windows_2.7.11.pickle | Bin .../0.17.0/0.17.0_AMD64_windows_3.4.4.pickle | Bin .../0.17.0/0.17.0_x86_64_darwin_2.7.11.pickle | Bin .../0.17.0/0.17.0_x86_64_darwin_3.4.4.pickle | Bin .../0.17.0/0.17.0_x86_64_linux_2.7.11.pickle | Bin .../0.17.0/0.17.0_x86_64_linux_3.4.4.pickle | Bin .../0.17.0/0.17.1_AMD64_windows_2.7.11.pickle | Bin .../0.17.1/0.17.1_AMD64_windows_2.7.11.pickle | Bin .../0.17.1/0.17.1_x86_64_darwin_2.7.11.pickle | Bin .../0.18.0/0.18.0_AMD64_windows_2.7.11.pickle | Bin .../0.18.0/0.18.0_AMD64_windows_3.5.1.pickle | Bin .../0.18.0/0.18.0_x86_64_darwin_2.7.11.pickle | Bin .../0.18.0/0.18.0_x86_64_darwin_3.5.1.pickle | Bin .../0.18.1/0.18.1_x86_64_darwin_2.7.12.pickle | Bin .../0.18.1/0.18.1_x86_64_darwin_3.5.2.pickle | Bin pandas/{io/tests => tests/io}/data/macau.html | 0 .../{io/tests => tests/io}/data/nyse_wsj.html | 0 pandas/{io/tests => tests/io}/data/spam.html | 0 .../tests => tests/io}/data/stata10_115.dta | Bin .../tests => tests/io}/data/stata10_117.dta | Bin .../tests => tests/io}/data/stata11_115.dta | Bin .../tests => tests/io}/data/stata11_117.dta | Bin .../tests => tests/io}/data/stata12_117.dta | Bin .../tests => tests/io}/data/stata14_118.dta | Bin .../{io/tests => tests/io}/data/stata15.dta | Bin .../tests => tests/io}/data/stata1_114.dta | Bin .../tests => tests/io}/data/stata1_117.dta | Bin .../io}/data/stata1_encoding.dta | Bin .../tests => tests/io}/data/stata2_113.dta | Bin .../tests => tests/io}/data/stata2_114.dta | Bin .../tests => tests/io}/data/stata2_115.dta | Bin .../tests => tests/io}/data/stata2_117.dta | Bin pandas/{io/tests => tests/io}/data/stata3.csv | 0 .../tests => tests/io}/data/stata3_113.dta | Bin .../tests => tests/io}/data/stata3_114.dta | Bin .../tests => tests/io}/data/stata3_115.dta | Bin .../tests => tests/io}/data/stata3_117.dta | Bin .../tests => tests/io}/data/stata4_113.dta | Bin .../tests => tests/io}/data/stata4_114.dta | Bin .../tests => tests/io}/data/stata4_115.dta | Bin .../tests => tests/io}/data/stata4_117.dta | Bin pandas/{io/tests => tests/io}/data/stata5.csv | 0 .../tests => tests/io}/data/stata5_113.dta | Bin .../tests => tests/io}/data/stata5_114.dta | Bin .../tests => tests/io}/data/stata5_115.dta | Bin .../tests => tests/io}/data/stata5_117.dta | Bin pandas/{io/tests => tests/io}/data/stata6.csv | 0 .../tests => tests/io}/data/stata6_113.dta | Bin .../tests => tests/io}/data/stata6_114.dta | Bin .../tests => tests/io}/data/stata6_115.dta | Bin .../tests => tests/io}/data/stata6_117.dta | Bin .../tests => tests/io}/data/stata7_111.dta | Bin .../tests => tests/io}/data/stata7_115.dta | Bin .../tests => tests/io}/data/stata7_117.dta | Bin .../tests => tests/io}/data/stata8_113.dta | Bin .../tests => tests/io}/data/stata8_115.dta | Bin .../tests => tests/io}/data/stata8_117.dta | Bin .../tests => tests/io}/data/stata9_115.dta | Bin .../tests => tests/io}/data/stata9_117.dta | Bin pandas/{io/tests => tests/io}/data/test1.csv | 0 pandas/{io/tests => tests/io}/data/test1.xls | Bin pandas/{io/tests => tests/io}/data/test1.xlsm | Bin pandas/{io/tests => tests/io}/data/test1.xlsx | Bin pandas/{io/tests => tests/io}/data/test2.xls | Bin pandas/{io/tests => tests/io}/data/test2.xlsm | Bin pandas/{io/tests => tests/io}/data/test2.xlsx | Bin pandas/{io/tests => tests/io}/data/test3.xls | Bin pandas/{io/tests => tests/io}/data/test3.xlsm | Bin pandas/{io/tests => tests/io}/data/test3.xlsx | Bin pandas/{io/tests => tests/io}/data/test4.xls | Bin pandas/{io/tests => tests/io}/data/test4.xlsm | Bin pandas/{io/tests => tests/io}/data/test4.xlsx | Bin pandas/{io/tests => tests/io}/data/test5.xls | Bin pandas/{io/tests => tests/io}/data/test5.xlsm | Bin pandas/{io/tests => tests/io}/data/test5.xlsx | Bin .../io}/data/test_converters.xls | Bin .../io}/data/test_converters.xlsm | Bin .../io}/data/test_converters.xlsx | Bin .../io}/data/test_index_name_pre17.xls | Bin .../io}/data/test_index_name_pre17.xlsm | Bin .../io}/data/test_index_name_pre17.xlsx | Bin .../{io/tests => tests/io}/data/test_mmap.csv | 0 .../io}/data/test_multisheet.xls | Bin .../io}/data/test_multisheet.xlsm | Bin .../io}/data/test_multisheet.xlsx | Bin .../tests => tests/io}/data/test_squeeze.xls | Bin .../tests => tests/io}/data/test_squeeze.xlsm | Bin .../tests => tests/io}/data/test_squeeze.xlsx | Bin .../tests => tests/io}/data/test_types.xls | Bin .../tests => tests/io}/data/test_types.xlsm | Bin .../tests => tests/io}/data/test_types.xlsx | Bin .../io}/data/testdateoverflow.xls | Bin .../io}/data/testdateoverflow.xlsm | Bin .../io}/data/testdateoverflow.xlsx | Bin .../{io/tests => tests/io}/data/testdtype.xls | Bin .../tests => tests/io}/data/testdtype.xlsm | Bin .../tests => tests/io}/data/testdtype.xlsx | Bin .../io}/data/testmultiindex.xls | Bin .../io}/data/testmultiindex.xlsm | Bin .../io}/data/testmultiindex.xlsx | Bin .../tests => tests/io}/data/testskiprows.xls | Bin .../tests => tests/io}/data/testskiprows.xlsm | Bin .../tests => tests/io}/data/testskiprows.xlsx | Bin .../tests => tests/io}/data/times_1900.xls | Bin .../tests => tests/io}/data/times_1900.xlsm | Bin .../tests => tests/io}/data/times_1900.xlsx | Bin .../tests => tests/io}/data/times_1904.xls | Bin .../tests => tests/io}/data/times_1904.xlsm | Bin .../tests => tests/io}/data/times_1904.xlsx | Bin pandas/{io/tests => tests/io}/data/tips.csv | 0 .../tests => tests/io}/data/valid_markup.html | 0 .../io}/data/wikipedia_states.html | 0 .../io}/generate_legacy_storage_files.py | 0 .../{io/tests => tests/io}/json/__init__.py | 0 .../io}/json/data/tsframe_iso_v012.json | 0 .../io}/json/data/tsframe_v012.json | 0 .../tests => tests/io}/json/test_normalize.py | 0 .../tests => tests/io}/json/test_pandas.py | 0 .../{io/tests => tests/io}/json/test_ujson.py | 0 .../{io/tests => tests/io}/parser/__init__.py | 0 .../io}/parser/c_parser_only.py | 0 .../{io/tests => tests/io}/parser/comment.py | 0 .../{io/tests => tests/io}/parser/common.py | 0 .../tests => tests/io}/parser/compression.py | 0 .../tests => tests/io}/parser/converters.py | 0 .../tests => tests/io}/parser/data/iris.csv | 0 .../io}/parser/data/salaries.csv | 0 .../io}/parser/data/salaries.csv.bz2 | Bin .../io}/parser/data/salaries.csv.gz | Bin .../io}/parser/data/salaries.csv.xz | Bin .../io}/parser/data/salaries.csv.zip | Bin .../io}/parser/data/sauron.SHIFT_JIS.csv | 0 .../tests => tests/io}/parser/data/test1.csv | 0 .../io}/parser/data/test1.csv.bz2 | Bin .../io}/parser/data/test1.csv.gz | Bin .../tests => tests/io}/parser/data/test2.csv | 0 .../io}/parser/data/test_mmap.csv | 0 .../tests => tests/io}/parser/data/tips.csv | 0 .../io}/parser/data/unicode_series.csv | 0 .../io}/parser/data/utf16_ex.txt | Bin .../{io/tests => tests/io}/parser/dialect.py | 0 .../{io/tests => tests/io}/parser/dtypes.py | 0 .../{io/tests => tests/io}/parser/header.py | 0 .../tests => tests/io}/parser/index_col.py | 0 .../tests => tests/io}/parser/multithread.py | 0 .../tests => tests/io}/parser/na_values.py | 0 .../tests => tests/io}/parser/parse_dates.py | 0 .../io}/parser/python_parser_only.py | 0 .../{io/tests => tests/io}/parser/quoting.py | 0 .../{io/tests => tests/io}/parser/skiprows.py | 0 .../tests => tests/io}/parser/test_network.py | 0 .../tests => tests/io}/parser/test_parsers.py | 0 .../io}/parser/test_read_fwf.py | 0 .../io}/parser/test_textreader.py | 0 .../io}/parser/test_unsupported.py | 0 .../{io/tests => tests/io}/parser/usecols.py | 0 .../tests => tests/io}/sas/data/DEMO_G.csv | 0 .../tests => tests/io}/sas/data/DEMO_G.xpt | Bin .../tests => tests/io}/sas/data/DRXFCD_G.csv | 0 .../tests => tests/io}/sas/data/DRXFCD_G.xpt | Bin .../tests => tests/io}/sas/data/SSHSV1_A.csv | 0 .../tests => tests/io}/sas/data/SSHSV1_A.xpt | Bin .../tests => tests/io}/sas/data/airline.csv | 0 .../io}/sas/data/airline.sas7bdat | Bin .../io}/sas/data/paxraw_d_short.csv | 0 .../io}/sas/data/paxraw_d_short.xpt | Bin .../io}/sas/data/productsales.csv | 0 .../io}/sas/data/productsales.sas7bdat | Bin .../io}/sas/data/test1.sas7bdat | Bin .../io}/sas/data/test10.sas7bdat | Bin .../io}/sas/data/test11.sas7bdat | Bin .../io}/sas/data/test12.sas7bdat | Bin .../io}/sas/data/test13.sas7bdat | Bin .../io}/sas/data/test14.sas7bdat | Bin .../io}/sas/data/test15.sas7bdat | Bin .../io}/sas/data/test16.sas7bdat | Bin .../io}/sas/data/test2.sas7bdat | Bin .../io}/sas/data/test3.sas7bdat | Bin .../io}/sas/data/test4.sas7bdat | Bin .../io}/sas/data/test5.sas7bdat | Bin .../io}/sas/data/test6.sas7bdat | Bin .../io}/sas/data/test7.sas7bdat | Bin .../io}/sas/data/test8.sas7bdat | Bin .../io}/sas/data/test9.sas7bdat | Bin .../io}/sas/data/test_12659.csv | 0 .../io}/sas/data/test_12659.sas7bdat | Bin .../io}/sas/data/test_sas7bdat_1.csv | 0 .../io}/sas/data/test_sas7bdat_2.csv | 0 pandas/{io/tests => tests/io}/sas/test_sas.py | 0 .../tests => tests/io}/sas/test_sas7bdat.py | 0 .../{io/tests => tests/io}/sas/test_xport.py | 0 .../{io/tests => tests/io}/test_clipboard.py | 0 pandas/{io/tests => tests/io}/test_common.py | 0 .../io}/test_date_converters.py | 0 pandas/{io/tests => tests/io}/test_excel.py | 0 pandas/{io/tests => tests/io}/test_feather.py | 0 pandas/{io/tests => tests/io}/test_gbq.py | 0 pandas/{io/tests => tests/io}/test_html.py | 0 pandas/{io/tests => tests/io}/test_packers.py | 2 +- pandas/{io/tests => tests/io}/test_pickle.py | 2 +- .../{io/tests => tests/io}/test_pytables.py | 0 pandas/{io/tests => tests/io}/test_s3.py | 0 pandas/{io/tests => tests/io}/test_sql.py | 0 pandas/{io/tests => tests/io}/test_stata.py | 0 .../tests => tests/msgpack}/__init__.py | 0 .../{test_msgpack => msgpack}/test_buffer.py | 0 .../{test_msgpack => msgpack}/test_case.py | 0 .../{test_msgpack => msgpack}/test_except.py | 0 .../test_extension.py | 0 .../{test_msgpack => msgpack}/test_format.py | 0 .../{test_msgpack => msgpack}/test_limits.py | 0 .../{test_msgpack => msgpack}/test_newspec.py | 0 .../{test_msgpack => msgpack}/test_obj.py | 0 .../{test_msgpack => msgpack}/test_pack.py | 0 .../test_read_size.py | 0 .../{test_msgpack => msgpack}/test_seq.py | 0 .../test_sequnpack.py | 0 .../{test_msgpack => msgpack}/test_subtype.py | 0 .../{test_msgpack => msgpack}/test_unpack.py | 0 .../test_unpack_raw.py | 0 .../{test_msgpack => sparse}/__init__.py | 0 .../sparse}/test_arithmetics.py | 0 .../tests => tests/sparse}/test_array.py | 0 .../sparse}/test_combine_concat.py | 0 .../tests => tests/sparse}/test_format.py | 0 .../tests => tests/sparse}/test_frame.py | 0 .../tests => tests/sparse}/test_groupby.py | 0 .../tests => tests/sparse}/test_indexing.py | 0 .../tests => tests/sparse}/test_libsparse.py | 0 .../tests => tests/sparse}/test_list.py | 0 .../tests => tests/sparse}/test_pivot.py | 0 .../tests => tests/sparse}/test_series.py | 0 .../{tools/tests => tests/tools}/__init__.py | 0 .../tools}/data/allow_exact_matches.csv | 0 .../allow_exact_matches_and_tolerance.csv | 0 .../tests => tests/tools}/data/asof.csv | 0 .../tests => tests/tools}/data/asof2.csv | 0 .../tests => tests/tools}/data/cut_data.csv | 0 .../tests => tests/tools}/data/quotes.csv | 0 .../tests => tests/tools}/data/quotes2.csv | 0 .../tests => tests/tools}/data/tolerance.csv | 0 .../tests => tests/tools}/data/trades.csv | 0 .../tests => tests/tools}/data/trades2.csv | 0 .../tests => tests/tools}/test_concat.py | 0 .../tests => tests/tools}/test_hashing.py | 0 .../{tools/tests => tests/tools}/test_join.py | 2 +- .../tests => tests/tools}/test_merge.py | 0 .../tests => tests/tools}/test_merge_asof.py | 0 .../tools}/test_merge_ordered.py | 0 .../tests => tests/tools}/test_pivot.py | 0 .../{tools/tests => tests/tools}/test_tile.py | 0 .../{tools/tests => tests/tools}/test_util.py | 0 setup.py | 64 +++++++++--------- 344 files changed, 38 insertions(+), 37 deletions(-) rename pandas/{api/tests => tests/api}/__init__.py (100%) rename pandas/{api/tests => tests/api}/test_api.py (99%) rename pandas/{computation/tests => tests/computation}/__init__.py (100%) rename pandas/{computation/tests => tests/computation}/test_compat.py (100%) rename pandas/{computation/tests => tests/computation}/test_eval.py (100%) rename pandas/{io/tests => tests/io}/__init__.py (100%) rename pandas/{io/tests => tests/io}/data/S4_EDUC1.dta (100%) rename pandas/{io/tests => tests/io}/data/banklist.csv (100%) rename pandas/{io/tests => tests/io}/data/banklist.html (100%) rename pandas/{io/tests => tests/io}/data/blank.xls (100%) mode change 100755 => 100644 rename pandas/{io/tests => tests/io}/data/blank.xlsm (100%) mode change 100755 => 100644 rename pandas/{io/tests => tests/io}/data/blank.xlsx (100%) mode change 100755 => 100644 rename pandas/{io/tests => tests/io}/data/blank_with_header.xls (100%) mode change 100755 => 100644 rename pandas/{io/tests => tests/io}/data/blank_with_header.xlsm (100%) mode change 100755 => 100644 rename pandas/{io/tests => tests/io}/data/blank_with_header.xlsx (100%) mode change 100755 => 100644 rename pandas/{io/tests => tests/io}/data/categorical_0_14_1.pickle (100%) rename pandas/{io/tests => tests/io}/data/categorical_0_15_2.pickle (100%) rename pandas/{io/tests => tests/io}/data/computer_sales_page.html (100%) rename pandas/{io/tests => tests/io}/data/gbq_fake_job.txt (100%) rename pandas/{io/tests => tests/io}/data/html_encoding/chinese_utf-16.html (100%) rename pandas/{io/tests => tests/io}/data/html_encoding/chinese_utf-32.html (100%) rename pandas/{io/tests => tests/io}/data/html_encoding/chinese_utf-8.html (100%) rename pandas/{io/tests => tests/io}/data/html_encoding/letz_latin1.html (100%) rename pandas/{io/tests => tests/io}/data/iris.csv (100%) rename pandas/{io/tests => tests/io}/data/legacy_hdf/datetimetz_object.h5 (100%) rename pandas/{io/tests => tests/io}/data/legacy_hdf/legacy.h5 (100%) rename pandas/{io/tests => tests/io}/data/legacy_hdf/legacy_0.10.h5 (100%) rename pandas/{io/tests => tests/io}/data/legacy_hdf/legacy_table.h5 (100%) rename pandas/{io/tests => tests/io}/data/legacy_hdf/legacy_table_0.11.h5 (100%) rename pandas/{io/tests => tests/io}/data/legacy_hdf/pytables_native.h5 (100%) rename pandas/{io/tests => tests/io}/data/legacy_hdf/pytables_native2.h5 (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.16.0/0.16.0_x86_64_darwin_2.7.9.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.16.2/0.16.2_AMD64_windows_2.7.10.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.16.2/0.16.2_AMD64_windows_3.4.3.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.16.2/0.16.2_x86_64_darwin_2.7.10.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.16.2/0.16.2_x86_64_darwin_2.7.9.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.16.2/0.16.2_x86_64_darwin_3.4.3.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.16.2/0.16.2_x86_64_linux_2.7.10.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.16.2/0.16.2_x86_64_linux_3.4.3.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.17.0/0.17.0_AMD64_windows_2.7.11.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.17.0/0.17.0_AMD64_windows_3.4.4.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.17.0/0.17.0_x86_64_darwin_2.7.11.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.17.0/0.17.0_x86_64_darwin_3.4.4.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.17.0/0.17.0_x86_64_linux_2.7.11.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.17.0/0.17.0_x86_64_linux_3.4.4.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.17.0/0.17.1_AMD64_windows_2.7.11.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.17.0/0.17.1_AMD64_windows_3.5.1.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.17.1/0.17.1_AMD64_windows_2.7.11.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.17.1/0.17.1_AMD64_windows_3.5.1.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.17.1/0.17.1_x86_64_darwin_2.7.11.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.17.1/0.17.1_x86_64_darwin_3.5.1.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.17.1/0.17.1_x86_64_linux_2.7.11.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.17.1/0.17.1_x86_64_linux_3.4.4.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.18.0/0.18.0_AMD64_windows_2.7.11.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.18.0/0.18.0_AMD64_windows_3.5.1.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.18.0/0.18.0_x86_64_darwin_2.7.11.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.18.0/0.18.0_x86_64_darwin_3.5.1.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.18.1/0.18.1_x86_64_darwin_2.7.12.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_msgpack/0.18.1/0.18.1_x86_64_darwin_3.5.2.msgpack (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.10.1/AMD64_windows_2.7.3.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.10.1/x86_64_linux_2.7.3.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.11.0/0.11.0_x86_64_linux_3.3.0.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.11.0/x86_64_linux_2.7.3.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.11.0/x86_64_linux_3.3.0.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.12.0/0.12.0_AMD64_windows_2.7.3.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.12.0/0.12.0_x86_64_linux_2.7.3.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.13.0/0.13.0_AMD64_windows_2.7.3.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.13.0/0.13.0_i686_linux_2.6.5.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.13.0/0.13.0_i686_linux_2.7.3.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.13.0/0.13.0_i686_linux_3.2.3.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.13.0/0.13.0_x86_64_darwin_2.7.5.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.13.0/0.13.0_x86_64_darwin_2.7.6.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.13.0/0.13.0_x86_64_linux_2.7.3.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.13.0/0.13.0_x86_64_linux_2.7.8.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.13.0/0.13.0_x86_64_linux_3.3.0.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.14.0/0.14.0_x86_64_darwin_2.7.6.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.14.0/0.14.0_x86_64_linux_2.7.8.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.14.1/0.14.1_x86_64_darwin_2.7.12.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.14.1/0.14.1_x86_64_linux_2.7.8.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.15.0/0.15.0_x86_64_darwin_2.7.12.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.15.0/0.15.0_x86_64_linux_2.7.8.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.15.2/0.15.2_x86_64_darwin_2.7.9.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.16.0/0.16.0_x86_64_darwin_2.7.9.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.16.2/0.16.2_AMD64_windows_2.7.10.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.16.2/0.16.2_AMD64_windows_3.4.3.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.16.2/0.16.2_x86_64_darwin_2.7.10.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.16.2/0.16.2_x86_64_darwin_2.7.9.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.16.2/0.16.2_x86_64_darwin_3.4.3.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.16.2/0.16.2_x86_64_linux_2.7.10.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.16.2/0.16.2_x86_64_linux_3.4.3.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.17.0/0.17.0_AMD64_windows_2.7.11.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.17.0/0.17.0_AMD64_windows_3.4.4.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.17.0/0.17.0_x86_64_darwin_2.7.11.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.17.0/0.17.0_x86_64_darwin_3.4.4.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.17.0/0.17.0_x86_64_linux_2.7.11.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.17.0/0.17.0_x86_64_linux_3.4.4.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.17.0/0.17.1_AMD64_windows_2.7.11.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.17.1/0.17.1_AMD64_windows_2.7.11.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.17.1/0.17.1_x86_64_darwin_2.7.11.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.18.0/0.18.0_AMD64_windows_2.7.11.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.18.0/0.18.0_AMD64_windows_3.5.1.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.18.0/0.18.0_x86_64_darwin_2.7.11.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.18.0/0.18.0_x86_64_darwin_3.5.1.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.18.1/0.18.1_x86_64_darwin_2.7.12.pickle (100%) rename pandas/{io/tests => tests/io}/data/legacy_pickle/0.18.1/0.18.1_x86_64_darwin_3.5.2.pickle (100%) rename pandas/{io/tests => tests/io}/data/macau.html (100%) rename pandas/{io/tests => tests/io}/data/nyse_wsj.html (100%) rename pandas/{io/tests => tests/io}/data/spam.html (100%) rename pandas/{io/tests => tests/io}/data/stata10_115.dta (100%) mode change 100755 => 100644 rename pandas/{io/tests => tests/io}/data/stata10_117.dta (100%) mode change 100755 => 100644 rename pandas/{io/tests => tests/io}/data/stata11_115.dta (100%) mode change 100755 => 100644 rename pandas/{io/tests => tests/io}/data/stata11_117.dta (100%) mode change 100755 => 100644 rename pandas/{io/tests => tests/io}/data/stata12_117.dta (100%) rename pandas/{io/tests => tests/io}/data/stata14_118.dta (100%) rename pandas/{io/tests => tests/io}/data/stata15.dta (100%) rename pandas/{io/tests => tests/io}/data/stata1_114.dta (100%) rename pandas/{io/tests => tests/io}/data/stata1_117.dta (100%) rename pandas/{io/tests => tests/io}/data/stata1_encoding.dta (100%) rename pandas/{io/tests => tests/io}/data/stata2_113.dta (100%) rename pandas/{io/tests => tests/io}/data/stata2_114.dta (100%) rename pandas/{io/tests => tests/io}/data/stata2_115.dta (100%) rename pandas/{io/tests => tests/io}/data/stata2_117.dta (100%) rename pandas/{io/tests => tests/io}/data/stata3.csv (100%) rename pandas/{io/tests => tests/io}/data/stata3_113.dta (100%) rename pandas/{io/tests => tests/io}/data/stata3_114.dta (100%) rename pandas/{io/tests => tests/io}/data/stata3_115.dta (100%) rename pandas/{io/tests => tests/io}/data/stata3_117.dta (100%) rename pandas/{io/tests => tests/io}/data/stata4_113.dta (100%) rename pandas/{io/tests => tests/io}/data/stata4_114.dta (100%) rename pandas/{io/tests => tests/io}/data/stata4_115.dta (100%) rename pandas/{io/tests => tests/io}/data/stata4_117.dta (100%) rename pandas/{io/tests => tests/io}/data/stata5.csv (100%) rename pandas/{io/tests => tests/io}/data/stata5_113.dta (100%) rename pandas/{io/tests => tests/io}/data/stata5_114.dta (100%) rename pandas/{io/tests => tests/io}/data/stata5_115.dta (100%) rename pandas/{io/tests => tests/io}/data/stata5_117.dta (100%) rename pandas/{io/tests => tests/io}/data/stata6.csv (100%) rename pandas/{io/tests => tests/io}/data/stata6_113.dta (100%) rename pandas/{io/tests => tests/io}/data/stata6_114.dta (100%) rename pandas/{io/tests => tests/io}/data/stata6_115.dta (100%) rename pandas/{io/tests => tests/io}/data/stata6_117.dta (100%) rename pandas/{io/tests => tests/io}/data/stata7_111.dta (100%) rename pandas/{io/tests => tests/io}/data/stata7_115.dta (100%) rename pandas/{io/tests => tests/io}/data/stata7_117.dta (100%) rename pandas/{io/tests => tests/io}/data/stata8_113.dta (100%) rename pandas/{io/tests => tests/io}/data/stata8_115.dta (100%) rename pandas/{io/tests => tests/io}/data/stata8_117.dta (100%) rename pandas/{io/tests => tests/io}/data/stata9_115.dta (100%) rename pandas/{io/tests => tests/io}/data/stata9_117.dta (100%) rename pandas/{io/tests => tests/io}/data/test1.csv (100%) rename pandas/{io/tests => tests/io}/data/test1.xls (100%) rename pandas/{io/tests => tests/io}/data/test1.xlsm (100%) rename pandas/{io/tests => tests/io}/data/test1.xlsx (100%) rename pandas/{io/tests => tests/io}/data/test2.xls (100%) rename pandas/{io/tests => tests/io}/data/test2.xlsm (100%) rename pandas/{io/tests => tests/io}/data/test2.xlsx (100%) rename pandas/{io/tests => tests/io}/data/test3.xls (100%) rename pandas/{io/tests => tests/io}/data/test3.xlsm (100%) rename pandas/{io/tests => tests/io}/data/test3.xlsx (100%) rename pandas/{io/tests => tests/io}/data/test4.xls (100%) rename pandas/{io/tests => tests/io}/data/test4.xlsm (100%) rename pandas/{io/tests => tests/io}/data/test4.xlsx (100%) rename pandas/{io/tests => tests/io}/data/test5.xls (100%) rename pandas/{io/tests => tests/io}/data/test5.xlsm (100%) rename pandas/{io/tests => tests/io}/data/test5.xlsx (100%) rename pandas/{io/tests => tests/io}/data/test_converters.xls (100%) rename pandas/{io/tests => tests/io}/data/test_converters.xlsm (100%) rename pandas/{io/tests => tests/io}/data/test_converters.xlsx (100%) rename pandas/{io/tests => tests/io}/data/test_index_name_pre17.xls (100%) rename pandas/{io/tests => tests/io}/data/test_index_name_pre17.xlsm (100%) rename pandas/{io/tests => tests/io}/data/test_index_name_pre17.xlsx (100%) rename pandas/{io/tests => tests/io}/data/test_mmap.csv (100%) rename pandas/{io/tests => tests/io}/data/test_multisheet.xls (100%) rename pandas/{io/tests => tests/io}/data/test_multisheet.xlsm (100%) rename pandas/{io/tests => tests/io}/data/test_multisheet.xlsx (100%) rename pandas/{io/tests => tests/io}/data/test_squeeze.xls (100%) rename pandas/{io/tests => tests/io}/data/test_squeeze.xlsm (100%) rename pandas/{io/tests => tests/io}/data/test_squeeze.xlsx (100%) rename pandas/{io/tests => tests/io}/data/test_types.xls (100%) rename pandas/{io/tests => tests/io}/data/test_types.xlsm (100%) rename pandas/{io/tests => tests/io}/data/test_types.xlsx (100%) rename pandas/{io/tests => tests/io}/data/testdateoverflow.xls (100%) rename pandas/{io/tests => tests/io}/data/testdateoverflow.xlsm (100%) rename pandas/{io/tests => tests/io}/data/testdateoverflow.xlsx (100%) rename pandas/{io/tests => tests/io}/data/testdtype.xls (100%) rename pandas/{io/tests => tests/io}/data/testdtype.xlsm (100%) rename pandas/{io/tests => tests/io}/data/testdtype.xlsx (100%) rename pandas/{io/tests => tests/io}/data/testmultiindex.xls (100%) rename pandas/{io/tests => tests/io}/data/testmultiindex.xlsm (100%) rename pandas/{io/tests => tests/io}/data/testmultiindex.xlsx (100%) rename pandas/{io/tests => tests/io}/data/testskiprows.xls (100%) rename pandas/{io/tests => tests/io}/data/testskiprows.xlsm (100%) rename pandas/{io/tests => tests/io}/data/testskiprows.xlsx (100%) rename pandas/{io/tests => tests/io}/data/times_1900.xls (100%) rename pandas/{io/tests => tests/io}/data/times_1900.xlsm (100%) rename pandas/{io/tests => tests/io}/data/times_1900.xlsx (100%) rename pandas/{io/tests => tests/io}/data/times_1904.xls (100%) rename pandas/{io/tests => tests/io}/data/times_1904.xlsm (100%) rename pandas/{io/tests => tests/io}/data/times_1904.xlsx (100%) rename pandas/{io/tests => tests/io}/data/tips.csv (100%) rename pandas/{io/tests => tests/io}/data/valid_markup.html (100%) rename pandas/{io/tests => tests/io}/data/wikipedia_states.html (100%) rename pandas/{io/tests => tests/io}/generate_legacy_storage_files.py (100%) rename pandas/{io/tests => tests/io}/json/__init__.py (100%) rename pandas/{io/tests => tests/io}/json/data/tsframe_iso_v012.json (100%) rename pandas/{io/tests => tests/io}/json/data/tsframe_v012.json (100%) rename pandas/{io/tests => tests/io}/json/test_normalize.py (100%) rename pandas/{io/tests => tests/io}/json/test_pandas.py (100%) rename pandas/{io/tests => tests/io}/json/test_ujson.py (100%) rename pandas/{io/tests => tests/io}/parser/__init__.py (100%) rename pandas/{io/tests => tests/io}/parser/c_parser_only.py (100%) rename pandas/{io/tests => tests/io}/parser/comment.py (100%) rename pandas/{io/tests => tests/io}/parser/common.py (100%) rename pandas/{io/tests => tests/io}/parser/compression.py (100%) rename pandas/{io/tests => tests/io}/parser/converters.py (100%) rename pandas/{io/tests => tests/io}/parser/data/iris.csv (100%) rename pandas/{io/tests => tests/io}/parser/data/salaries.csv (100%) rename pandas/{io/tests => tests/io}/parser/data/salaries.csv.bz2 (100%) rename pandas/{io/tests => tests/io}/parser/data/salaries.csv.gz (100%) rename pandas/{io/tests => tests/io}/parser/data/salaries.csv.xz (100%) rename pandas/{io/tests => tests/io}/parser/data/salaries.csv.zip (100%) rename pandas/{io/tests => tests/io}/parser/data/sauron.SHIFT_JIS.csv (100%) rename pandas/{io/tests => tests/io}/parser/data/test1.csv (100%) rename pandas/{io/tests => tests/io}/parser/data/test1.csv.bz2 (100%) rename pandas/{io/tests => tests/io}/parser/data/test1.csv.gz (100%) rename pandas/{io/tests => tests/io}/parser/data/test2.csv (100%) rename pandas/{io/tests => tests/io}/parser/data/test_mmap.csv (100%) rename pandas/{io/tests => tests/io}/parser/data/tips.csv (100%) rename pandas/{io/tests => tests/io}/parser/data/unicode_series.csv (100%) rename pandas/{io/tests => tests/io}/parser/data/utf16_ex.txt (100%) rename pandas/{io/tests => tests/io}/parser/dialect.py (100%) rename pandas/{io/tests => tests/io}/parser/dtypes.py (100%) rename pandas/{io/tests => tests/io}/parser/header.py (100%) rename pandas/{io/tests => tests/io}/parser/index_col.py (100%) rename pandas/{io/tests => tests/io}/parser/multithread.py (100%) rename pandas/{io/tests => tests/io}/parser/na_values.py (100%) rename pandas/{io/tests => tests/io}/parser/parse_dates.py (100%) rename pandas/{io/tests => tests/io}/parser/python_parser_only.py (100%) rename pandas/{io/tests => tests/io}/parser/quoting.py (100%) rename pandas/{io/tests => tests/io}/parser/skiprows.py (100%) rename pandas/{io/tests => tests/io}/parser/test_network.py (100%) rename pandas/{io/tests => tests/io}/parser/test_parsers.py (100%) rename pandas/{io/tests => tests/io}/parser/test_read_fwf.py (100%) rename pandas/{io/tests => tests/io}/parser/test_textreader.py (100%) rename pandas/{io/tests => tests/io}/parser/test_unsupported.py (100%) rename pandas/{io/tests => tests/io}/parser/usecols.py (100%) rename pandas/{io/tests => tests/io}/sas/data/DEMO_G.csv (100%) rename pandas/{io/tests => tests/io}/sas/data/DEMO_G.xpt (100%) rename pandas/{io/tests => tests/io}/sas/data/DRXFCD_G.csv (100%) rename pandas/{io/tests => tests/io}/sas/data/DRXFCD_G.xpt (100%) rename pandas/{io/tests => tests/io}/sas/data/SSHSV1_A.csv (100%) rename pandas/{io/tests => tests/io}/sas/data/SSHSV1_A.xpt (100%) rename pandas/{io/tests => tests/io}/sas/data/airline.csv (100%) rename pandas/{io/tests => tests/io}/sas/data/airline.sas7bdat (100%) rename pandas/{io/tests => tests/io}/sas/data/paxraw_d_short.csv (100%) rename pandas/{io/tests => tests/io}/sas/data/paxraw_d_short.xpt (100%) rename pandas/{io/tests => tests/io}/sas/data/productsales.csv (100%) rename pandas/{io/tests => tests/io}/sas/data/productsales.sas7bdat (100%) rename pandas/{io/tests => tests/io}/sas/data/test1.sas7bdat (100%) rename pandas/{io/tests => tests/io}/sas/data/test10.sas7bdat (100%) rename pandas/{io/tests => tests/io}/sas/data/test11.sas7bdat (100%) rename pandas/{io/tests => tests/io}/sas/data/test12.sas7bdat (100%) rename pandas/{io/tests => tests/io}/sas/data/test13.sas7bdat (100%) rename pandas/{io/tests => tests/io}/sas/data/test14.sas7bdat (100%) rename pandas/{io/tests => tests/io}/sas/data/test15.sas7bdat (100%) rename pandas/{io/tests => tests/io}/sas/data/test16.sas7bdat (100%) rename pandas/{io/tests => tests/io}/sas/data/test2.sas7bdat (100%) rename pandas/{io/tests => tests/io}/sas/data/test3.sas7bdat (100%) rename pandas/{io/tests => tests/io}/sas/data/test4.sas7bdat (100%) rename pandas/{io/tests => tests/io}/sas/data/test5.sas7bdat (100%) rename pandas/{io/tests => tests/io}/sas/data/test6.sas7bdat (100%) rename pandas/{io/tests => tests/io}/sas/data/test7.sas7bdat (100%) rename pandas/{io/tests => tests/io}/sas/data/test8.sas7bdat (100%) rename pandas/{io/tests => tests/io}/sas/data/test9.sas7bdat (100%) rename pandas/{io/tests => tests/io}/sas/data/test_12659.csv (100%) rename pandas/{io/tests => tests/io}/sas/data/test_12659.sas7bdat (100%) rename pandas/{io/tests => tests/io}/sas/data/test_sas7bdat_1.csv (100%) rename pandas/{io/tests => tests/io}/sas/data/test_sas7bdat_2.csv (100%) rename pandas/{io/tests => tests/io}/sas/test_sas.py (100%) rename pandas/{io/tests => tests/io}/sas/test_sas7bdat.py (100%) rename pandas/{io/tests => tests/io}/sas/test_xport.py (100%) rename pandas/{io/tests => tests/io}/test_clipboard.py (100%) rename pandas/{io/tests => tests/io}/test_common.py (100%) rename pandas/{io/tests => tests/io}/test_date_converters.py (100%) rename pandas/{io/tests => tests/io}/test_excel.py (100%) rename pandas/{io/tests => tests/io}/test_feather.py (100%) rename pandas/{io/tests => tests/io}/test_gbq.py (100%) rename pandas/{io/tests => tests/io}/test_html.py (100%) rename pandas/{io/tests => tests/io}/test_packers.py (99%) rename pandas/{io/tests => tests/io}/test_pickle.py (99%) rename pandas/{io/tests => tests/io}/test_pytables.py (100%) rename pandas/{io/tests => tests/io}/test_s3.py (100%) rename pandas/{io/tests => tests/io}/test_sql.py (100%) rename pandas/{io/tests => tests/io}/test_stata.py (100%) rename pandas/{sparse/tests => tests/msgpack}/__init__.py (100%) rename pandas/tests/{test_msgpack => msgpack}/test_buffer.py (100%) rename pandas/tests/{test_msgpack => msgpack}/test_case.py (100%) rename pandas/tests/{test_msgpack => msgpack}/test_except.py (100%) rename pandas/tests/{test_msgpack => msgpack}/test_extension.py (100%) rename pandas/tests/{test_msgpack => msgpack}/test_format.py (100%) rename pandas/tests/{test_msgpack => msgpack}/test_limits.py (100%) rename pandas/tests/{test_msgpack => msgpack}/test_newspec.py (100%) rename pandas/tests/{test_msgpack => msgpack}/test_obj.py (100%) rename pandas/tests/{test_msgpack => msgpack}/test_pack.py (100%) rename pandas/tests/{test_msgpack => msgpack}/test_read_size.py (100%) rename pandas/tests/{test_msgpack => msgpack}/test_seq.py (100%) rename pandas/tests/{test_msgpack => msgpack}/test_sequnpack.py (100%) rename pandas/tests/{test_msgpack => msgpack}/test_subtype.py (100%) rename pandas/tests/{test_msgpack => msgpack}/test_unpack.py (100%) rename pandas/tests/{test_msgpack => msgpack}/test_unpack_raw.py (100%) rename pandas/tests/{test_msgpack => sparse}/__init__.py (100%) rename pandas/{sparse/tests => tests/sparse}/test_arithmetics.py (100%) rename pandas/{sparse/tests => tests/sparse}/test_array.py (100%) rename pandas/{sparse/tests => tests/sparse}/test_combine_concat.py (100%) rename pandas/{sparse/tests => tests/sparse}/test_format.py (100%) rename pandas/{sparse/tests => tests/sparse}/test_frame.py (100%) rename pandas/{sparse/tests => tests/sparse}/test_groupby.py (100%) rename pandas/{sparse/tests => tests/sparse}/test_indexing.py (100%) rename pandas/{sparse/tests => tests/sparse}/test_libsparse.py (100%) rename pandas/{sparse/tests => tests/sparse}/test_list.py (100%) rename pandas/{sparse/tests => tests/sparse}/test_pivot.py (100%) rename pandas/{sparse/tests => tests/sparse}/test_series.py (100%) rename pandas/{tools/tests => tests/tools}/__init__.py (100%) rename pandas/{tools/tests => tests/tools}/data/allow_exact_matches.csv (100%) rename pandas/{tools/tests => tests/tools}/data/allow_exact_matches_and_tolerance.csv (100%) rename pandas/{tools/tests => tests/tools}/data/asof.csv (100%) rename pandas/{tools/tests => tests/tools}/data/asof2.csv (100%) rename pandas/{tools/tests => tests/tools}/data/cut_data.csv (100%) rename pandas/{tools/tests => tests/tools}/data/quotes.csv (100%) rename pandas/{tools/tests => tests/tools}/data/quotes2.csv (100%) rename pandas/{tools/tests => tests/tools}/data/tolerance.csv (100%) rename pandas/{tools/tests => tests/tools}/data/trades.csv (100%) rename pandas/{tools/tests => tests/tools}/data/trades2.csv (100%) rename pandas/{tools/tests => tests/tools}/test_concat.py (100%) rename pandas/{tools/tests => tests/tools}/test_hashing.py (100%) rename pandas/{tools/tests => tests/tools}/test_join.py (99%) rename pandas/{tools/tests => tests/tools}/test_merge.py (100%) rename pandas/{tools/tests => tests/tools}/test_merge_asof.py (100%) rename pandas/{tools/tests => tests/tools}/test_merge_ordered.py (100%) rename pandas/{tools/tests => tests/tools}/test_pivot.py (100%) rename pandas/{tools/tests => tests/tools}/test_tile.py (100%) rename pandas/{tools/tests => tests/tools}/test_util.py (100%) diff --git a/pandas/api/tests/__init__.py b/pandas/tests/api/__init__.py similarity index 100% rename from pandas/api/tests/__init__.py rename to pandas/tests/api/__init__.py diff --git a/pandas/api/tests/test_api.py b/pandas/tests/api/test_api.py similarity index 99% rename from pandas/api/tests/test_api.py rename to pandas/tests/api/test_api.py index 05cf5dc4b7e7b..90a0c1d5c9347 100644 --- a/pandas/api/tests/test_api.py +++ b/pandas/tests/api/test_api.py @@ -133,7 +133,7 @@ def test_api(self): class TestApi(Base, tm.TestCase): - allowed = ['tests', 'types'] + allowed = ['types'] def test_api(self): diff --git a/pandas/computation/tests/__init__.py b/pandas/tests/computation/__init__.py similarity index 100% rename from pandas/computation/tests/__init__.py rename to pandas/tests/computation/__init__.py diff --git a/pandas/computation/tests/test_compat.py b/pandas/tests/computation/test_compat.py similarity index 100% rename from pandas/computation/tests/test_compat.py rename to pandas/tests/computation/test_compat.py diff --git a/pandas/computation/tests/test_eval.py b/pandas/tests/computation/test_eval.py similarity index 100% rename from pandas/computation/tests/test_eval.py rename to pandas/tests/computation/test_eval.py diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index 63bf07ec041d3..9a968a42c4247 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -1245,7 +1245,8 @@ def test_shift(self): self.assertEqual(shifted[0], self.rng[0]) self.assertEqual(shifted.offset, self.rng.offset) - with tm.assert_produces_warning(PerformanceWarning): + # PerformanceWarning + with warnings.catch_warnings(record=True): rng = date_range(START, END, freq=BMonthEnd()) shifted = rng.shift(1, freq=CDay()) self.assertEqual(shifted[0], rng[0] + CDay()) diff --git a/pandas/io/tests/__init__.py b/pandas/tests/io/__init__.py similarity index 100% rename from pandas/io/tests/__init__.py rename to pandas/tests/io/__init__.py diff --git a/pandas/io/tests/data/S4_EDUC1.dta b/pandas/tests/io/data/S4_EDUC1.dta similarity index 100% rename from pandas/io/tests/data/S4_EDUC1.dta rename to pandas/tests/io/data/S4_EDUC1.dta diff --git a/pandas/io/tests/data/banklist.csv b/pandas/tests/io/data/banklist.csv similarity index 100% rename from pandas/io/tests/data/banklist.csv rename to pandas/tests/io/data/banklist.csv diff --git a/pandas/io/tests/data/banklist.html b/pandas/tests/io/data/banklist.html similarity index 100% rename from pandas/io/tests/data/banklist.html rename to pandas/tests/io/data/banklist.html diff --git a/pandas/io/tests/data/blank.xls b/pandas/tests/io/data/blank.xls old mode 100755 new mode 100644 similarity index 100% rename from pandas/io/tests/data/blank.xls rename to pandas/tests/io/data/blank.xls diff --git a/pandas/io/tests/data/blank.xlsm b/pandas/tests/io/data/blank.xlsm old mode 100755 new mode 100644 similarity index 100% rename from pandas/io/tests/data/blank.xlsm rename to pandas/tests/io/data/blank.xlsm diff --git a/pandas/io/tests/data/blank.xlsx b/pandas/tests/io/data/blank.xlsx old mode 100755 new mode 100644 similarity index 100% rename from pandas/io/tests/data/blank.xlsx rename to pandas/tests/io/data/blank.xlsx diff --git a/pandas/io/tests/data/blank_with_header.xls b/pandas/tests/io/data/blank_with_header.xls old mode 100755 new mode 100644 similarity index 100% rename from pandas/io/tests/data/blank_with_header.xls rename to pandas/tests/io/data/blank_with_header.xls diff --git a/pandas/io/tests/data/blank_with_header.xlsm b/pandas/tests/io/data/blank_with_header.xlsm old mode 100755 new mode 100644 similarity index 100% rename from pandas/io/tests/data/blank_with_header.xlsm rename to pandas/tests/io/data/blank_with_header.xlsm diff --git a/pandas/io/tests/data/blank_with_header.xlsx b/pandas/tests/io/data/blank_with_header.xlsx old mode 100755 new mode 100644 similarity index 100% rename from pandas/io/tests/data/blank_with_header.xlsx rename to pandas/tests/io/data/blank_with_header.xlsx diff --git a/pandas/io/tests/data/categorical_0_14_1.pickle b/pandas/tests/io/data/categorical_0_14_1.pickle similarity index 100% rename from pandas/io/tests/data/categorical_0_14_1.pickle rename to pandas/tests/io/data/categorical_0_14_1.pickle diff --git a/pandas/io/tests/data/categorical_0_15_2.pickle b/pandas/tests/io/data/categorical_0_15_2.pickle similarity index 100% rename from pandas/io/tests/data/categorical_0_15_2.pickle rename to pandas/tests/io/data/categorical_0_15_2.pickle diff --git a/pandas/io/tests/data/computer_sales_page.html b/pandas/tests/io/data/computer_sales_page.html similarity index 100% rename from pandas/io/tests/data/computer_sales_page.html rename to pandas/tests/io/data/computer_sales_page.html diff --git a/pandas/io/tests/data/gbq_fake_job.txt b/pandas/tests/io/data/gbq_fake_job.txt similarity index 100% rename from pandas/io/tests/data/gbq_fake_job.txt rename to pandas/tests/io/data/gbq_fake_job.txt diff --git a/pandas/io/tests/data/html_encoding/chinese_utf-16.html b/pandas/tests/io/data/html_encoding/chinese_utf-16.html similarity index 100% rename from pandas/io/tests/data/html_encoding/chinese_utf-16.html rename to pandas/tests/io/data/html_encoding/chinese_utf-16.html diff --git a/pandas/io/tests/data/html_encoding/chinese_utf-32.html b/pandas/tests/io/data/html_encoding/chinese_utf-32.html similarity index 100% rename from pandas/io/tests/data/html_encoding/chinese_utf-32.html rename to pandas/tests/io/data/html_encoding/chinese_utf-32.html diff --git a/pandas/io/tests/data/html_encoding/chinese_utf-8.html b/pandas/tests/io/data/html_encoding/chinese_utf-8.html similarity index 100% rename from pandas/io/tests/data/html_encoding/chinese_utf-8.html rename to pandas/tests/io/data/html_encoding/chinese_utf-8.html diff --git a/pandas/io/tests/data/html_encoding/letz_latin1.html b/pandas/tests/io/data/html_encoding/letz_latin1.html similarity index 100% rename from pandas/io/tests/data/html_encoding/letz_latin1.html rename to pandas/tests/io/data/html_encoding/letz_latin1.html diff --git a/pandas/io/tests/data/iris.csv b/pandas/tests/io/data/iris.csv similarity index 100% rename from pandas/io/tests/data/iris.csv rename to pandas/tests/io/data/iris.csv diff --git a/pandas/io/tests/data/legacy_hdf/datetimetz_object.h5 b/pandas/tests/io/data/legacy_hdf/datetimetz_object.h5 similarity index 100% rename from pandas/io/tests/data/legacy_hdf/datetimetz_object.h5 rename to pandas/tests/io/data/legacy_hdf/datetimetz_object.h5 diff --git a/pandas/io/tests/data/legacy_hdf/legacy.h5 b/pandas/tests/io/data/legacy_hdf/legacy.h5 similarity index 100% rename from pandas/io/tests/data/legacy_hdf/legacy.h5 rename to pandas/tests/io/data/legacy_hdf/legacy.h5 diff --git a/pandas/io/tests/data/legacy_hdf/legacy_0.10.h5 b/pandas/tests/io/data/legacy_hdf/legacy_0.10.h5 similarity index 100% rename from pandas/io/tests/data/legacy_hdf/legacy_0.10.h5 rename to pandas/tests/io/data/legacy_hdf/legacy_0.10.h5 diff --git a/pandas/io/tests/data/legacy_hdf/legacy_table.h5 b/pandas/tests/io/data/legacy_hdf/legacy_table.h5 similarity index 100% rename from pandas/io/tests/data/legacy_hdf/legacy_table.h5 rename to pandas/tests/io/data/legacy_hdf/legacy_table.h5 diff --git a/pandas/io/tests/data/legacy_hdf/legacy_table_0.11.h5 b/pandas/tests/io/data/legacy_hdf/legacy_table_0.11.h5 similarity index 100% rename from pandas/io/tests/data/legacy_hdf/legacy_table_0.11.h5 rename to pandas/tests/io/data/legacy_hdf/legacy_table_0.11.h5 diff --git a/pandas/io/tests/data/legacy_hdf/pytables_native.h5 b/pandas/tests/io/data/legacy_hdf/pytables_native.h5 similarity index 100% rename from pandas/io/tests/data/legacy_hdf/pytables_native.h5 rename to pandas/tests/io/data/legacy_hdf/pytables_native.h5 diff --git a/pandas/io/tests/data/legacy_hdf/pytables_native2.h5 b/pandas/tests/io/data/legacy_hdf/pytables_native2.h5 similarity index 100% rename from pandas/io/tests/data/legacy_hdf/pytables_native2.h5 rename to pandas/tests/io/data/legacy_hdf/pytables_native2.h5 diff --git a/pandas/io/tests/data/legacy_msgpack/0.16.0/0.16.0_x86_64_darwin_2.7.9.msgpack b/pandas/tests/io/data/legacy_msgpack/0.16.0/0.16.0_x86_64_darwin_2.7.9.msgpack similarity index 100% rename from pandas/io/tests/data/legacy_msgpack/0.16.0/0.16.0_x86_64_darwin_2.7.9.msgpack rename to pandas/tests/io/data/legacy_msgpack/0.16.0/0.16.0_x86_64_darwin_2.7.9.msgpack diff --git a/pandas/io/tests/data/legacy_msgpack/0.16.2/0.16.2_AMD64_windows_2.7.10.msgpack b/pandas/tests/io/data/legacy_msgpack/0.16.2/0.16.2_AMD64_windows_2.7.10.msgpack similarity index 100% rename from pandas/io/tests/data/legacy_msgpack/0.16.2/0.16.2_AMD64_windows_2.7.10.msgpack rename to pandas/tests/io/data/legacy_msgpack/0.16.2/0.16.2_AMD64_windows_2.7.10.msgpack diff --git a/pandas/io/tests/data/legacy_msgpack/0.16.2/0.16.2_AMD64_windows_3.4.3.msgpack b/pandas/tests/io/data/legacy_msgpack/0.16.2/0.16.2_AMD64_windows_3.4.3.msgpack similarity index 100% rename from pandas/io/tests/data/legacy_msgpack/0.16.2/0.16.2_AMD64_windows_3.4.3.msgpack rename to pandas/tests/io/data/legacy_msgpack/0.16.2/0.16.2_AMD64_windows_3.4.3.msgpack diff --git a/pandas/io/tests/data/legacy_msgpack/0.16.2/0.16.2_x86_64_darwin_2.7.10.msgpack b/pandas/tests/io/data/legacy_msgpack/0.16.2/0.16.2_x86_64_darwin_2.7.10.msgpack similarity index 100% rename from pandas/io/tests/data/legacy_msgpack/0.16.2/0.16.2_x86_64_darwin_2.7.10.msgpack rename to pandas/tests/io/data/legacy_msgpack/0.16.2/0.16.2_x86_64_darwin_2.7.10.msgpack diff --git a/pandas/io/tests/data/legacy_msgpack/0.16.2/0.16.2_x86_64_darwin_2.7.9.msgpack b/pandas/tests/io/data/legacy_msgpack/0.16.2/0.16.2_x86_64_darwin_2.7.9.msgpack similarity index 100% rename from pandas/io/tests/data/legacy_msgpack/0.16.2/0.16.2_x86_64_darwin_2.7.9.msgpack rename to pandas/tests/io/data/legacy_msgpack/0.16.2/0.16.2_x86_64_darwin_2.7.9.msgpack diff --git a/pandas/io/tests/data/legacy_msgpack/0.16.2/0.16.2_x86_64_darwin_3.4.3.msgpack b/pandas/tests/io/data/legacy_msgpack/0.16.2/0.16.2_x86_64_darwin_3.4.3.msgpack similarity index 100% rename from pandas/io/tests/data/legacy_msgpack/0.16.2/0.16.2_x86_64_darwin_3.4.3.msgpack rename to pandas/tests/io/data/legacy_msgpack/0.16.2/0.16.2_x86_64_darwin_3.4.3.msgpack diff --git a/pandas/io/tests/data/legacy_msgpack/0.16.2/0.16.2_x86_64_linux_2.7.10.msgpack b/pandas/tests/io/data/legacy_msgpack/0.16.2/0.16.2_x86_64_linux_2.7.10.msgpack similarity index 100% rename from pandas/io/tests/data/legacy_msgpack/0.16.2/0.16.2_x86_64_linux_2.7.10.msgpack rename to pandas/tests/io/data/legacy_msgpack/0.16.2/0.16.2_x86_64_linux_2.7.10.msgpack diff --git a/pandas/io/tests/data/legacy_msgpack/0.16.2/0.16.2_x86_64_linux_3.4.3.msgpack b/pandas/tests/io/data/legacy_msgpack/0.16.2/0.16.2_x86_64_linux_3.4.3.msgpack similarity index 100% rename from pandas/io/tests/data/legacy_msgpack/0.16.2/0.16.2_x86_64_linux_3.4.3.msgpack rename to pandas/tests/io/data/legacy_msgpack/0.16.2/0.16.2_x86_64_linux_3.4.3.msgpack diff --git a/pandas/io/tests/data/legacy_msgpack/0.17.0/0.17.0_AMD64_windows_2.7.11.msgpack b/pandas/tests/io/data/legacy_msgpack/0.17.0/0.17.0_AMD64_windows_2.7.11.msgpack similarity index 100% rename from pandas/io/tests/data/legacy_msgpack/0.17.0/0.17.0_AMD64_windows_2.7.11.msgpack rename to pandas/tests/io/data/legacy_msgpack/0.17.0/0.17.0_AMD64_windows_2.7.11.msgpack diff --git a/pandas/io/tests/data/legacy_msgpack/0.17.0/0.17.0_AMD64_windows_3.4.4.msgpack b/pandas/tests/io/data/legacy_msgpack/0.17.0/0.17.0_AMD64_windows_3.4.4.msgpack similarity index 100% rename from pandas/io/tests/data/legacy_msgpack/0.17.0/0.17.0_AMD64_windows_3.4.4.msgpack rename to pandas/tests/io/data/legacy_msgpack/0.17.0/0.17.0_AMD64_windows_3.4.4.msgpack diff --git a/pandas/io/tests/data/legacy_msgpack/0.17.0/0.17.0_x86_64_darwin_2.7.11.msgpack b/pandas/tests/io/data/legacy_msgpack/0.17.0/0.17.0_x86_64_darwin_2.7.11.msgpack similarity index 100% rename from pandas/io/tests/data/legacy_msgpack/0.17.0/0.17.0_x86_64_darwin_2.7.11.msgpack rename to pandas/tests/io/data/legacy_msgpack/0.17.0/0.17.0_x86_64_darwin_2.7.11.msgpack diff --git a/pandas/io/tests/data/legacy_msgpack/0.17.0/0.17.0_x86_64_darwin_3.4.4.msgpack b/pandas/tests/io/data/legacy_msgpack/0.17.0/0.17.0_x86_64_darwin_3.4.4.msgpack similarity index 100% rename from pandas/io/tests/data/legacy_msgpack/0.17.0/0.17.0_x86_64_darwin_3.4.4.msgpack rename to pandas/tests/io/data/legacy_msgpack/0.17.0/0.17.0_x86_64_darwin_3.4.4.msgpack diff --git a/pandas/io/tests/data/legacy_msgpack/0.17.0/0.17.0_x86_64_linux_2.7.11.msgpack b/pandas/tests/io/data/legacy_msgpack/0.17.0/0.17.0_x86_64_linux_2.7.11.msgpack similarity index 100% rename from pandas/io/tests/data/legacy_msgpack/0.17.0/0.17.0_x86_64_linux_2.7.11.msgpack rename to pandas/tests/io/data/legacy_msgpack/0.17.0/0.17.0_x86_64_linux_2.7.11.msgpack diff --git a/pandas/io/tests/data/legacy_msgpack/0.17.0/0.17.0_x86_64_linux_3.4.4.msgpack b/pandas/tests/io/data/legacy_msgpack/0.17.0/0.17.0_x86_64_linux_3.4.4.msgpack similarity index 100% rename from pandas/io/tests/data/legacy_msgpack/0.17.0/0.17.0_x86_64_linux_3.4.4.msgpack rename to pandas/tests/io/data/legacy_msgpack/0.17.0/0.17.0_x86_64_linux_3.4.4.msgpack diff --git a/pandas/io/tests/data/legacy_msgpack/0.17.0/0.17.1_AMD64_windows_2.7.11.msgpack b/pandas/tests/io/data/legacy_msgpack/0.17.0/0.17.1_AMD64_windows_2.7.11.msgpack similarity index 100% rename from pandas/io/tests/data/legacy_msgpack/0.17.0/0.17.1_AMD64_windows_2.7.11.msgpack rename to pandas/tests/io/data/legacy_msgpack/0.17.0/0.17.1_AMD64_windows_2.7.11.msgpack diff --git a/pandas/io/tests/data/legacy_msgpack/0.17.0/0.17.1_AMD64_windows_3.5.1.msgpack b/pandas/tests/io/data/legacy_msgpack/0.17.0/0.17.1_AMD64_windows_3.5.1.msgpack similarity index 100% rename from pandas/io/tests/data/legacy_msgpack/0.17.0/0.17.1_AMD64_windows_3.5.1.msgpack rename to pandas/tests/io/data/legacy_msgpack/0.17.0/0.17.1_AMD64_windows_3.5.1.msgpack diff --git a/pandas/io/tests/data/legacy_msgpack/0.17.1/0.17.1_AMD64_windows_2.7.11.msgpack b/pandas/tests/io/data/legacy_msgpack/0.17.1/0.17.1_AMD64_windows_2.7.11.msgpack similarity index 100% rename from pandas/io/tests/data/legacy_msgpack/0.17.1/0.17.1_AMD64_windows_2.7.11.msgpack rename to pandas/tests/io/data/legacy_msgpack/0.17.1/0.17.1_AMD64_windows_2.7.11.msgpack diff --git a/pandas/io/tests/data/legacy_msgpack/0.17.1/0.17.1_AMD64_windows_3.5.1.msgpack b/pandas/tests/io/data/legacy_msgpack/0.17.1/0.17.1_AMD64_windows_3.5.1.msgpack similarity index 100% rename from pandas/io/tests/data/legacy_msgpack/0.17.1/0.17.1_AMD64_windows_3.5.1.msgpack rename to pandas/tests/io/data/legacy_msgpack/0.17.1/0.17.1_AMD64_windows_3.5.1.msgpack diff --git a/pandas/io/tests/data/legacy_msgpack/0.17.1/0.17.1_x86_64_darwin_2.7.11.msgpack b/pandas/tests/io/data/legacy_msgpack/0.17.1/0.17.1_x86_64_darwin_2.7.11.msgpack similarity index 100% rename from pandas/io/tests/data/legacy_msgpack/0.17.1/0.17.1_x86_64_darwin_2.7.11.msgpack rename to pandas/tests/io/data/legacy_msgpack/0.17.1/0.17.1_x86_64_darwin_2.7.11.msgpack diff --git a/pandas/io/tests/data/legacy_msgpack/0.17.1/0.17.1_x86_64_darwin_3.5.1.msgpack b/pandas/tests/io/data/legacy_msgpack/0.17.1/0.17.1_x86_64_darwin_3.5.1.msgpack similarity index 100% rename from pandas/io/tests/data/legacy_msgpack/0.17.1/0.17.1_x86_64_darwin_3.5.1.msgpack rename to pandas/tests/io/data/legacy_msgpack/0.17.1/0.17.1_x86_64_darwin_3.5.1.msgpack diff --git a/pandas/io/tests/data/legacy_msgpack/0.17.1/0.17.1_x86_64_linux_2.7.11.msgpack b/pandas/tests/io/data/legacy_msgpack/0.17.1/0.17.1_x86_64_linux_2.7.11.msgpack similarity index 100% rename from pandas/io/tests/data/legacy_msgpack/0.17.1/0.17.1_x86_64_linux_2.7.11.msgpack rename to pandas/tests/io/data/legacy_msgpack/0.17.1/0.17.1_x86_64_linux_2.7.11.msgpack diff --git a/pandas/io/tests/data/legacy_msgpack/0.17.1/0.17.1_x86_64_linux_3.4.4.msgpack b/pandas/tests/io/data/legacy_msgpack/0.17.1/0.17.1_x86_64_linux_3.4.4.msgpack similarity index 100% rename from pandas/io/tests/data/legacy_msgpack/0.17.1/0.17.1_x86_64_linux_3.4.4.msgpack rename to pandas/tests/io/data/legacy_msgpack/0.17.1/0.17.1_x86_64_linux_3.4.4.msgpack diff --git a/pandas/io/tests/data/legacy_msgpack/0.18.0/0.18.0_AMD64_windows_2.7.11.msgpack b/pandas/tests/io/data/legacy_msgpack/0.18.0/0.18.0_AMD64_windows_2.7.11.msgpack similarity index 100% rename from pandas/io/tests/data/legacy_msgpack/0.18.0/0.18.0_AMD64_windows_2.7.11.msgpack rename to pandas/tests/io/data/legacy_msgpack/0.18.0/0.18.0_AMD64_windows_2.7.11.msgpack diff --git a/pandas/io/tests/data/legacy_msgpack/0.18.0/0.18.0_AMD64_windows_3.5.1.msgpack b/pandas/tests/io/data/legacy_msgpack/0.18.0/0.18.0_AMD64_windows_3.5.1.msgpack similarity index 100% rename from pandas/io/tests/data/legacy_msgpack/0.18.0/0.18.0_AMD64_windows_3.5.1.msgpack rename to pandas/tests/io/data/legacy_msgpack/0.18.0/0.18.0_AMD64_windows_3.5.1.msgpack diff --git a/pandas/io/tests/data/legacy_msgpack/0.18.0/0.18.0_x86_64_darwin_2.7.11.msgpack b/pandas/tests/io/data/legacy_msgpack/0.18.0/0.18.0_x86_64_darwin_2.7.11.msgpack similarity index 100% rename from pandas/io/tests/data/legacy_msgpack/0.18.0/0.18.0_x86_64_darwin_2.7.11.msgpack rename to pandas/tests/io/data/legacy_msgpack/0.18.0/0.18.0_x86_64_darwin_2.7.11.msgpack diff --git a/pandas/io/tests/data/legacy_msgpack/0.18.0/0.18.0_x86_64_darwin_3.5.1.msgpack b/pandas/tests/io/data/legacy_msgpack/0.18.0/0.18.0_x86_64_darwin_3.5.1.msgpack similarity index 100% rename from pandas/io/tests/data/legacy_msgpack/0.18.0/0.18.0_x86_64_darwin_3.5.1.msgpack rename to pandas/tests/io/data/legacy_msgpack/0.18.0/0.18.0_x86_64_darwin_3.5.1.msgpack diff --git a/pandas/io/tests/data/legacy_msgpack/0.18.1/0.18.1_x86_64_darwin_2.7.12.msgpack b/pandas/tests/io/data/legacy_msgpack/0.18.1/0.18.1_x86_64_darwin_2.7.12.msgpack similarity index 100% rename from pandas/io/tests/data/legacy_msgpack/0.18.1/0.18.1_x86_64_darwin_2.7.12.msgpack rename to pandas/tests/io/data/legacy_msgpack/0.18.1/0.18.1_x86_64_darwin_2.7.12.msgpack diff --git a/pandas/io/tests/data/legacy_msgpack/0.18.1/0.18.1_x86_64_darwin_3.5.2.msgpack b/pandas/tests/io/data/legacy_msgpack/0.18.1/0.18.1_x86_64_darwin_3.5.2.msgpack similarity index 100% rename from pandas/io/tests/data/legacy_msgpack/0.18.1/0.18.1_x86_64_darwin_3.5.2.msgpack rename to pandas/tests/io/data/legacy_msgpack/0.18.1/0.18.1_x86_64_darwin_3.5.2.msgpack diff --git a/pandas/io/tests/data/legacy_pickle/0.10.1/AMD64_windows_2.7.3.pickle b/pandas/tests/io/data/legacy_pickle/0.10.1/AMD64_windows_2.7.3.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.10.1/AMD64_windows_2.7.3.pickle rename to pandas/tests/io/data/legacy_pickle/0.10.1/AMD64_windows_2.7.3.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.10.1/x86_64_linux_2.7.3.pickle b/pandas/tests/io/data/legacy_pickle/0.10.1/x86_64_linux_2.7.3.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.10.1/x86_64_linux_2.7.3.pickle rename to pandas/tests/io/data/legacy_pickle/0.10.1/x86_64_linux_2.7.3.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.11.0/0.11.0_x86_64_linux_3.3.0.pickle b/pandas/tests/io/data/legacy_pickle/0.11.0/0.11.0_x86_64_linux_3.3.0.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.11.0/0.11.0_x86_64_linux_3.3.0.pickle rename to pandas/tests/io/data/legacy_pickle/0.11.0/0.11.0_x86_64_linux_3.3.0.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.11.0/x86_64_linux_2.7.3.pickle b/pandas/tests/io/data/legacy_pickle/0.11.0/x86_64_linux_2.7.3.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.11.0/x86_64_linux_2.7.3.pickle rename to pandas/tests/io/data/legacy_pickle/0.11.0/x86_64_linux_2.7.3.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.11.0/x86_64_linux_3.3.0.pickle b/pandas/tests/io/data/legacy_pickle/0.11.0/x86_64_linux_3.3.0.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.11.0/x86_64_linux_3.3.0.pickle rename to pandas/tests/io/data/legacy_pickle/0.11.0/x86_64_linux_3.3.0.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.12.0/0.12.0_AMD64_windows_2.7.3.pickle b/pandas/tests/io/data/legacy_pickle/0.12.0/0.12.0_AMD64_windows_2.7.3.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.12.0/0.12.0_AMD64_windows_2.7.3.pickle rename to pandas/tests/io/data/legacy_pickle/0.12.0/0.12.0_AMD64_windows_2.7.3.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.12.0/0.12.0_x86_64_linux_2.7.3.pickle b/pandas/tests/io/data/legacy_pickle/0.12.0/0.12.0_x86_64_linux_2.7.3.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.12.0/0.12.0_x86_64_linux_2.7.3.pickle rename to pandas/tests/io/data/legacy_pickle/0.12.0/0.12.0_x86_64_linux_2.7.3.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_AMD64_windows_2.7.3.pickle b/pandas/tests/io/data/legacy_pickle/0.13.0/0.13.0_AMD64_windows_2.7.3.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_AMD64_windows_2.7.3.pickle rename to pandas/tests/io/data/legacy_pickle/0.13.0/0.13.0_AMD64_windows_2.7.3.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_i686_linux_2.6.5.pickle b/pandas/tests/io/data/legacy_pickle/0.13.0/0.13.0_i686_linux_2.6.5.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_i686_linux_2.6.5.pickle rename to pandas/tests/io/data/legacy_pickle/0.13.0/0.13.0_i686_linux_2.6.5.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_i686_linux_2.7.3.pickle b/pandas/tests/io/data/legacy_pickle/0.13.0/0.13.0_i686_linux_2.7.3.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_i686_linux_2.7.3.pickle rename to pandas/tests/io/data/legacy_pickle/0.13.0/0.13.0_i686_linux_2.7.3.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_i686_linux_3.2.3.pickle b/pandas/tests/io/data/legacy_pickle/0.13.0/0.13.0_i686_linux_3.2.3.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_i686_linux_3.2.3.pickle rename to pandas/tests/io/data/legacy_pickle/0.13.0/0.13.0_i686_linux_3.2.3.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_x86_64_darwin_2.7.5.pickle b/pandas/tests/io/data/legacy_pickle/0.13.0/0.13.0_x86_64_darwin_2.7.5.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_x86_64_darwin_2.7.5.pickle rename to pandas/tests/io/data/legacy_pickle/0.13.0/0.13.0_x86_64_darwin_2.7.5.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_x86_64_darwin_2.7.6.pickle b/pandas/tests/io/data/legacy_pickle/0.13.0/0.13.0_x86_64_darwin_2.7.6.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_x86_64_darwin_2.7.6.pickle rename to pandas/tests/io/data/legacy_pickle/0.13.0/0.13.0_x86_64_darwin_2.7.6.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_x86_64_linux_2.7.3.pickle b/pandas/tests/io/data/legacy_pickle/0.13.0/0.13.0_x86_64_linux_2.7.3.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_x86_64_linux_2.7.3.pickle rename to pandas/tests/io/data/legacy_pickle/0.13.0/0.13.0_x86_64_linux_2.7.3.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_x86_64_linux_2.7.8.pickle b/pandas/tests/io/data/legacy_pickle/0.13.0/0.13.0_x86_64_linux_2.7.8.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_x86_64_linux_2.7.8.pickle rename to pandas/tests/io/data/legacy_pickle/0.13.0/0.13.0_x86_64_linux_2.7.8.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_x86_64_linux_3.3.0.pickle b/pandas/tests/io/data/legacy_pickle/0.13.0/0.13.0_x86_64_linux_3.3.0.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.13.0/0.13.0_x86_64_linux_3.3.0.pickle rename to pandas/tests/io/data/legacy_pickle/0.13.0/0.13.0_x86_64_linux_3.3.0.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.14.0/0.14.0_x86_64_darwin_2.7.6.pickle b/pandas/tests/io/data/legacy_pickle/0.14.0/0.14.0_x86_64_darwin_2.7.6.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.14.0/0.14.0_x86_64_darwin_2.7.6.pickle rename to pandas/tests/io/data/legacy_pickle/0.14.0/0.14.0_x86_64_darwin_2.7.6.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.14.0/0.14.0_x86_64_linux_2.7.8.pickle b/pandas/tests/io/data/legacy_pickle/0.14.0/0.14.0_x86_64_linux_2.7.8.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.14.0/0.14.0_x86_64_linux_2.7.8.pickle rename to pandas/tests/io/data/legacy_pickle/0.14.0/0.14.0_x86_64_linux_2.7.8.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.14.1/0.14.1_x86_64_darwin_2.7.12.pickle b/pandas/tests/io/data/legacy_pickle/0.14.1/0.14.1_x86_64_darwin_2.7.12.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.14.1/0.14.1_x86_64_darwin_2.7.12.pickle rename to pandas/tests/io/data/legacy_pickle/0.14.1/0.14.1_x86_64_darwin_2.7.12.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.14.1/0.14.1_x86_64_linux_2.7.8.pickle b/pandas/tests/io/data/legacy_pickle/0.14.1/0.14.1_x86_64_linux_2.7.8.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.14.1/0.14.1_x86_64_linux_2.7.8.pickle rename to pandas/tests/io/data/legacy_pickle/0.14.1/0.14.1_x86_64_linux_2.7.8.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.15.0/0.15.0_x86_64_darwin_2.7.12.pickle b/pandas/tests/io/data/legacy_pickle/0.15.0/0.15.0_x86_64_darwin_2.7.12.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.15.0/0.15.0_x86_64_darwin_2.7.12.pickle rename to pandas/tests/io/data/legacy_pickle/0.15.0/0.15.0_x86_64_darwin_2.7.12.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.15.0/0.15.0_x86_64_linux_2.7.8.pickle b/pandas/tests/io/data/legacy_pickle/0.15.0/0.15.0_x86_64_linux_2.7.8.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.15.0/0.15.0_x86_64_linux_2.7.8.pickle rename to pandas/tests/io/data/legacy_pickle/0.15.0/0.15.0_x86_64_linux_2.7.8.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.15.2/0.15.2_x86_64_darwin_2.7.9.pickle b/pandas/tests/io/data/legacy_pickle/0.15.2/0.15.2_x86_64_darwin_2.7.9.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.15.2/0.15.2_x86_64_darwin_2.7.9.pickle rename to pandas/tests/io/data/legacy_pickle/0.15.2/0.15.2_x86_64_darwin_2.7.9.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.16.0/0.16.0_x86_64_darwin_2.7.9.pickle b/pandas/tests/io/data/legacy_pickle/0.16.0/0.16.0_x86_64_darwin_2.7.9.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.16.0/0.16.0_x86_64_darwin_2.7.9.pickle rename to pandas/tests/io/data/legacy_pickle/0.16.0/0.16.0_x86_64_darwin_2.7.9.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.16.2/0.16.2_AMD64_windows_2.7.10.pickle b/pandas/tests/io/data/legacy_pickle/0.16.2/0.16.2_AMD64_windows_2.7.10.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.16.2/0.16.2_AMD64_windows_2.7.10.pickle rename to pandas/tests/io/data/legacy_pickle/0.16.2/0.16.2_AMD64_windows_2.7.10.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.16.2/0.16.2_AMD64_windows_3.4.3.pickle b/pandas/tests/io/data/legacy_pickle/0.16.2/0.16.2_AMD64_windows_3.4.3.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.16.2/0.16.2_AMD64_windows_3.4.3.pickle rename to pandas/tests/io/data/legacy_pickle/0.16.2/0.16.2_AMD64_windows_3.4.3.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.16.2/0.16.2_x86_64_darwin_2.7.10.pickle b/pandas/tests/io/data/legacy_pickle/0.16.2/0.16.2_x86_64_darwin_2.7.10.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.16.2/0.16.2_x86_64_darwin_2.7.10.pickle rename to pandas/tests/io/data/legacy_pickle/0.16.2/0.16.2_x86_64_darwin_2.7.10.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.16.2/0.16.2_x86_64_darwin_2.7.9.pickle b/pandas/tests/io/data/legacy_pickle/0.16.2/0.16.2_x86_64_darwin_2.7.9.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.16.2/0.16.2_x86_64_darwin_2.7.9.pickle rename to pandas/tests/io/data/legacy_pickle/0.16.2/0.16.2_x86_64_darwin_2.7.9.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.16.2/0.16.2_x86_64_darwin_3.4.3.pickle b/pandas/tests/io/data/legacy_pickle/0.16.2/0.16.2_x86_64_darwin_3.4.3.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.16.2/0.16.2_x86_64_darwin_3.4.3.pickle rename to pandas/tests/io/data/legacy_pickle/0.16.2/0.16.2_x86_64_darwin_3.4.3.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.16.2/0.16.2_x86_64_linux_2.7.10.pickle b/pandas/tests/io/data/legacy_pickle/0.16.2/0.16.2_x86_64_linux_2.7.10.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.16.2/0.16.2_x86_64_linux_2.7.10.pickle rename to pandas/tests/io/data/legacy_pickle/0.16.2/0.16.2_x86_64_linux_2.7.10.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.16.2/0.16.2_x86_64_linux_3.4.3.pickle b/pandas/tests/io/data/legacy_pickle/0.16.2/0.16.2_x86_64_linux_3.4.3.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.16.2/0.16.2_x86_64_linux_3.4.3.pickle rename to pandas/tests/io/data/legacy_pickle/0.16.2/0.16.2_x86_64_linux_3.4.3.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.17.0/0.17.0_AMD64_windows_2.7.11.pickle b/pandas/tests/io/data/legacy_pickle/0.17.0/0.17.0_AMD64_windows_2.7.11.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.17.0/0.17.0_AMD64_windows_2.7.11.pickle rename to pandas/tests/io/data/legacy_pickle/0.17.0/0.17.0_AMD64_windows_2.7.11.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.17.0/0.17.0_AMD64_windows_3.4.4.pickle b/pandas/tests/io/data/legacy_pickle/0.17.0/0.17.0_AMD64_windows_3.4.4.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.17.0/0.17.0_AMD64_windows_3.4.4.pickle rename to pandas/tests/io/data/legacy_pickle/0.17.0/0.17.0_AMD64_windows_3.4.4.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.17.0/0.17.0_x86_64_darwin_2.7.11.pickle b/pandas/tests/io/data/legacy_pickle/0.17.0/0.17.0_x86_64_darwin_2.7.11.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.17.0/0.17.0_x86_64_darwin_2.7.11.pickle rename to pandas/tests/io/data/legacy_pickle/0.17.0/0.17.0_x86_64_darwin_2.7.11.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.17.0/0.17.0_x86_64_darwin_3.4.4.pickle b/pandas/tests/io/data/legacy_pickle/0.17.0/0.17.0_x86_64_darwin_3.4.4.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.17.0/0.17.0_x86_64_darwin_3.4.4.pickle rename to pandas/tests/io/data/legacy_pickle/0.17.0/0.17.0_x86_64_darwin_3.4.4.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.17.0/0.17.0_x86_64_linux_2.7.11.pickle b/pandas/tests/io/data/legacy_pickle/0.17.0/0.17.0_x86_64_linux_2.7.11.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.17.0/0.17.0_x86_64_linux_2.7.11.pickle rename to pandas/tests/io/data/legacy_pickle/0.17.0/0.17.0_x86_64_linux_2.7.11.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.17.0/0.17.0_x86_64_linux_3.4.4.pickle b/pandas/tests/io/data/legacy_pickle/0.17.0/0.17.0_x86_64_linux_3.4.4.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.17.0/0.17.0_x86_64_linux_3.4.4.pickle rename to pandas/tests/io/data/legacy_pickle/0.17.0/0.17.0_x86_64_linux_3.4.4.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.17.0/0.17.1_AMD64_windows_2.7.11.pickle b/pandas/tests/io/data/legacy_pickle/0.17.0/0.17.1_AMD64_windows_2.7.11.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.17.0/0.17.1_AMD64_windows_2.7.11.pickle rename to pandas/tests/io/data/legacy_pickle/0.17.0/0.17.1_AMD64_windows_2.7.11.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.17.1/0.17.1_AMD64_windows_2.7.11.pickle b/pandas/tests/io/data/legacy_pickle/0.17.1/0.17.1_AMD64_windows_2.7.11.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.17.1/0.17.1_AMD64_windows_2.7.11.pickle rename to pandas/tests/io/data/legacy_pickle/0.17.1/0.17.1_AMD64_windows_2.7.11.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.17.1/0.17.1_x86_64_darwin_2.7.11.pickle b/pandas/tests/io/data/legacy_pickle/0.17.1/0.17.1_x86_64_darwin_2.7.11.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.17.1/0.17.1_x86_64_darwin_2.7.11.pickle rename to pandas/tests/io/data/legacy_pickle/0.17.1/0.17.1_x86_64_darwin_2.7.11.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.18.0/0.18.0_AMD64_windows_2.7.11.pickle b/pandas/tests/io/data/legacy_pickle/0.18.0/0.18.0_AMD64_windows_2.7.11.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.18.0/0.18.0_AMD64_windows_2.7.11.pickle rename to pandas/tests/io/data/legacy_pickle/0.18.0/0.18.0_AMD64_windows_2.7.11.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.18.0/0.18.0_AMD64_windows_3.5.1.pickle b/pandas/tests/io/data/legacy_pickle/0.18.0/0.18.0_AMD64_windows_3.5.1.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.18.0/0.18.0_AMD64_windows_3.5.1.pickle rename to pandas/tests/io/data/legacy_pickle/0.18.0/0.18.0_AMD64_windows_3.5.1.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.18.0/0.18.0_x86_64_darwin_2.7.11.pickle b/pandas/tests/io/data/legacy_pickle/0.18.0/0.18.0_x86_64_darwin_2.7.11.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.18.0/0.18.0_x86_64_darwin_2.7.11.pickle rename to pandas/tests/io/data/legacy_pickle/0.18.0/0.18.0_x86_64_darwin_2.7.11.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.18.0/0.18.0_x86_64_darwin_3.5.1.pickle b/pandas/tests/io/data/legacy_pickle/0.18.0/0.18.0_x86_64_darwin_3.5.1.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.18.0/0.18.0_x86_64_darwin_3.5.1.pickle rename to pandas/tests/io/data/legacy_pickle/0.18.0/0.18.0_x86_64_darwin_3.5.1.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.18.1/0.18.1_x86_64_darwin_2.7.12.pickle b/pandas/tests/io/data/legacy_pickle/0.18.1/0.18.1_x86_64_darwin_2.7.12.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.18.1/0.18.1_x86_64_darwin_2.7.12.pickle rename to pandas/tests/io/data/legacy_pickle/0.18.1/0.18.1_x86_64_darwin_2.7.12.pickle diff --git a/pandas/io/tests/data/legacy_pickle/0.18.1/0.18.1_x86_64_darwin_3.5.2.pickle b/pandas/tests/io/data/legacy_pickle/0.18.1/0.18.1_x86_64_darwin_3.5.2.pickle similarity index 100% rename from pandas/io/tests/data/legacy_pickle/0.18.1/0.18.1_x86_64_darwin_3.5.2.pickle rename to pandas/tests/io/data/legacy_pickle/0.18.1/0.18.1_x86_64_darwin_3.5.2.pickle diff --git a/pandas/io/tests/data/macau.html b/pandas/tests/io/data/macau.html similarity index 100% rename from pandas/io/tests/data/macau.html rename to pandas/tests/io/data/macau.html diff --git a/pandas/io/tests/data/nyse_wsj.html b/pandas/tests/io/data/nyse_wsj.html similarity index 100% rename from pandas/io/tests/data/nyse_wsj.html rename to pandas/tests/io/data/nyse_wsj.html diff --git a/pandas/io/tests/data/spam.html b/pandas/tests/io/data/spam.html similarity index 100% rename from pandas/io/tests/data/spam.html rename to pandas/tests/io/data/spam.html diff --git a/pandas/io/tests/data/stata10_115.dta b/pandas/tests/io/data/stata10_115.dta old mode 100755 new mode 100644 similarity index 100% rename from pandas/io/tests/data/stata10_115.dta rename to pandas/tests/io/data/stata10_115.dta diff --git a/pandas/io/tests/data/stata10_117.dta b/pandas/tests/io/data/stata10_117.dta old mode 100755 new mode 100644 similarity index 100% rename from pandas/io/tests/data/stata10_117.dta rename to pandas/tests/io/data/stata10_117.dta diff --git a/pandas/io/tests/data/stata11_115.dta b/pandas/tests/io/data/stata11_115.dta old mode 100755 new mode 100644 similarity index 100% rename from pandas/io/tests/data/stata11_115.dta rename to pandas/tests/io/data/stata11_115.dta diff --git a/pandas/io/tests/data/stata11_117.dta b/pandas/tests/io/data/stata11_117.dta old mode 100755 new mode 100644 similarity index 100% rename from pandas/io/tests/data/stata11_117.dta rename to pandas/tests/io/data/stata11_117.dta diff --git a/pandas/io/tests/data/stata12_117.dta b/pandas/tests/io/data/stata12_117.dta similarity index 100% rename from pandas/io/tests/data/stata12_117.dta rename to pandas/tests/io/data/stata12_117.dta diff --git a/pandas/io/tests/data/stata14_118.dta b/pandas/tests/io/data/stata14_118.dta similarity index 100% rename from pandas/io/tests/data/stata14_118.dta rename to pandas/tests/io/data/stata14_118.dta diff --git a/pandas/io/tests/data/stata15.dta b/pandas/tests/io/data/stata15.dta similarity index 100% rename from pandas/io/tests/data/stata15.dta rename to pandas/tests/io/data/stata15.dta diff --git a/pandas/io/tests/data/stata1_114.dta b/pandas/tests/io/data/stata1_114.dta similarity index 100% rename from pandas/io/tests/data/stata1_114.dta rename to pandas/tests/io/data/stata1_114.dta diff --git a/pandas/io/tests/data/stata1_117.dta b/pandas/tests/io/data/stata1_117.dta similarity index 100% rename from pandas/io/tests/data/stata1_117.dta rename to pandas/tests/io/data/stata1_117.dta diff --git a/pandas/io/tests/data/stata1_encoding.dta b/pandas/tests/io/data/stata1_encoding.dta similarity index 100% rename from pandas/io/tests/data/stata1_encoding.dta rename to pandas/tests/io/data/stata1_encoding.dta diff --git a/pandas/io/tests/data/stata2_113.dta b/pandas/tests/io/data/stata2_113.dta similarity index 100% rename from pandas/io/tests/data/stata2_113.dta rename to pandas/tests/io/data/stata2_113.dta diff --git a/pandas/io/tests/data/stata2_114.dta b/pandas/tests/io/data/stata2_114.dta similarity index 100% rename from pandas/io/tests/data/stata2_114.dta rename to pandas/tests/io/data/stata2_114.dta diff --git a/pandas/io/tests/data/stata2_115.dta b/pandas/tests/io/data/stata2_115.dta similarity index 100% rename from pandas/io/tests/data/stata2_115.dta rename to pandas/tests/io/data/stata2_115.dta diff --git a/pandas/io/tests/data/stata2_117.dta b/pandas/tests/io/data/stata2_117.dta similarity index 100% rename from pandas/io/tests/data/stata2_117.dta rename to pandas/tests/io/data/stata2_117.dta diff --git a/pandas/io/tests/data/stata3.csv b/pandas/tests/io/data/stata3.csv similarity index 100% rename from pandas/io/tests/data/stata3.csv rename to pandas/tests/io/data/stata3.csv diff --git a/pandas/io/tests/data/stata3_113.dta b/pandas/tests/io/data/stata3_113.dta similarity index 100% rename from pandas/io/tests/data/stata3_113.dta rename to pandas/tests/io/data/stata3_113.dta diff --git a/pandas/io/tests/data/stata3_114.dta b/pandas/tests/io/data/stata3_114.dta similarity index 100% rename from pandas/io/tests/data/stata3_114.dta rename to pandas/tests/io/data/stata3_114.dta diff --git a/pandas/io/tests/data/stata3_115.dta b/pandas/tests/io/data/stata3_115.dta similarity index 100% rename from pandas/io/tests/data/stata3_115.dta rename to pandas/tests/io/data/stata3_115.dta diff --git a/pandas/io/tests/data/stata3_117.dta b/pandas/tests/io/data/stata3_117.dta similarity index 100% rename from pandas/io/tests/data/stata3_117.dta rename to pandas/tests/io/data/stata3_117.dta diff --git a/pandas/io/tests/data/stata4_113.dta b/pandas/tests/io/data/stata4_113.dta similarity index 100% rename from pandas/io/tests/data/stata4_113.dta rename to pandas/tests/io/data/stata4_113.dta diff --git a/pandas/io/tests/data/stata4_114.dta b/pandas/tests/io/data/stata4_114.dta similarity index 100% rename from pandas/io/tests/data/stata4_114.dta rename to pandas/tests/io/data/stata4_114.dta diff --git a/pandas/io/tests/data/stata4_115.dta b/pandas/tests/io/data/stata4_115.dta similarity index 100% rename from pandas/io/tests/data/stata4_115.dta rename to pandas/tests/io/data/stata4_115.dta diff --git a/pandas/io/tests/data/stata4_117.dta b/pandas/tests/io/data/stata4_117.dta similarity index 100% rename from pandas/io/tests/data/stata4_117.dta rename to pandas/tests/io/data/stata4_117.dta diff --git a/pandas/io/tests/data/stata5.csv b/pandas/tests/io/data/stata5.csv similarity index 100% rename from pandas/io/tests/data/stata5.csv rename to pandas/tests/io/data/stata5.csv diff --git a/pandas/io/tests/data/stata5_113.dta b/pandas/tests/io/data/stata5_113.dta similarity index 100% rename from pandas/io/tests/data/stata5_113.dta rename to pandas/tests/io/data/stata5_113.dta diff --git a/pandas/io/tests/data/stata5_114.dta b/pandas/tests/io/data/stata5_114.dta similarity index 100% rename from pandas/io/tests/data/stata5_114.dta rename to pandas/tests/io/data/stata5_114.dta diff --git a/pandas/io/tests/data/stata5_115.dta b/pandas/tests/io/data/stata5_115.dta similarity index 100% rename from pandas/io/tests/data/stata5_115.dta rename to pandas/tests/io/data/stata5_115.dta diff --git a/pandas/io/tests/data/stata5_117.dta b/pandas/tests/io/data/stata5_117.dta similarity index 100% rename from pandas/io/tests/data/stata5_117.dta rename to pandas/tests/io/data/stata5_117.dta diff --git a/pandas/io/tests/data/stata6.csv b/pandas/tests/io/data/stata6.csv similarity index 100% rename from pandas/io/tests/data/stata6.csv rename to pandas/tests/io/data/stata6.csv diff --git a/pandas/io/tests/data/stata6_113.dta b/pandas/tests/io/data/stata6_113.dta similarity index 100% rename from pandas/io/tests/data/stata6_113.dta rename to pandas/tests/io/data/stata6_113.dta diff --git a/pandas/io/tests/data/stata6_114.dta b/pandas/tests/io/data/stata6_114.dta similarity index 100% rename from pandas/io/tests/data/stata6_114.dta rename to pandas/tests/io/data/stata6_114.dta diff --git a/pandas/io/tests/data/stata6_115.dta b/pandas/tests/io/data/stata6_115.dta similarity index 100% rename from pandas/io/tests/data/stata6_115.dta rename to pandas/tests/io/data/stata6_115.dta diff --git a/pandas/io/tests/data/stata6_117.dta b/pandas/tests/io/data/stata6_117.dta similarity index 100% rename from pandas/io/tests/data/stata6_117.dta rename to pandas/tests/io/data/stata6_117.dta diff --git a/pandas/io/tests/data/stata7_111.dta b/pandas/tests/io/data/stata7_111.dta similarity index 100% rename from pandas/io/tests/data/stata7_111.dta rename to pandas/tests/io/data/stata7_111.dta diff --git a/pandas/io/tests/data/stata7_115.dta b/pandas/tests/io/data/stata7_115.dta similarity index 100% rename from pandas/io/tests/data/stata7_115.dta rename to pandas/tests/io/data/stata7_115.dta diff --git a/pandas/io/tests/data/stata7_117.dta b/pandas/tests/io/data/stata7_117.dta similarity index 100% rename from pandas/io/tests/data/stata7_117.dta rename to pandas/tests/io/data/stata7_117.dta diff --git a/pandas/io/tests/data/stata8_113.dta b/pandas/tests/io/data/stata8_113.dta similarity index 100% rename from pandas/io/tests/data/stata8_113.dta rename to pandas/tests/io/data/stata8_113.dta diff --git a/pandas/io/tests/data/stata8_115.dta b/pandas/tests/io/data/stata8_115.dta similarity index 100% rename from pandas/io/tests/data/stata8_115.dta rename to pandas/tests/io/data/stata8_115.dta diff --git a/pandas/io/tests/data/stata8_117.dta b/pandas/tests/io/data/stata8_117.dta similarity index 100% rename from pandas/io/tests/data/stata8_117.dta rename to pandas/tests/io/data/stata8_117.dta diff --git a/pandas/io/tests/data/stata9_115.dta b/pandas/tests/io/data/stata9_115.dta similarity index 100% rename from pandas/io/tests/data/stata9_115.dta rename to pandas/tests/io/data/stata9_115.dta diff --git a/pandas/io/tests/data/stata9_117.dta b/pandas/tests/io/data/stata9_117.dta similarity index 100% rename from pandas/io/tests/data/stata9_117.dta rename to pandas/tests/io/data/stata9_117.dta diff --git a/pandas/io/tests/data/test1.csv b/pandas/tests/io/data/test1.csv similarity index 100% rename from pandas/io/tests/data/test1.csv rename to pandas/tests/io/data/test1.csv diff --git a/pandas/io/tests/data/test1.xls b/pandas/tests/io/data/test1.xls similarity index 100% rename from pandas/io/tests/data/test1.xls rename to pandas/tests/io/data/test1.xls diff --git a/pandas/io/tests/data/test1.xlsm b/pandas/tests/io/data/test1.xlsm similarity index 100% rename from pandas/io/tests/data/test1.xlsm rename to pandas/tests/io/data/test1.xlsm diff --git a/pandas/io/tests/data/test1.xlsx b/pandas/tests/io/data/test1.xlsx similarity index 100% rename from pandas/io/tests/data/test1.xlsx rename to pandas/tests/io/data/test1.xlsx diff --git a/pandas/io/tests/data/test2.xls b/pandas/tests/io/data/test2.xls similarity index 100% rename from pandas/io/tests/data/test2.xls rename to pandas/tests/io/data/test2.xls diff --git a/pandas/io/tests/data/test2.xlsm b/pandas/tests/io/data/test2.xlsm similarity index 100% rename from pandas/io/tests/data/test2.xlsm rename to pandas/tests/io/data/test2.xlsm diff --git a/pandas/io/tests/data/test2.xlsx b/pandas/tests/io/data/test2.xlsx similarity index 100% rename from pandas/io/tests/data/test2.xlsx rename to pandas/tests/io/data/test2.xlsx diff --git a/pandas/io/tests/data/test3.xls b/pandas/tests/io/data/test3.xls similarity index 100% rename from pandas/io/tests/data/test3.xls rename to pandas/tests/io/data/test3.xls diff --git a/pandas/io/tests/data/test3.xlsm b/pandas/tests/io/data/test3.xlsm similarity index 100% rename from pandas/io/tests/data/test3.xlsm rename to pandas/tests/io/data/test3.xlsm diff --git a/pandas/io/tests/data/test3.xlsx b/pandas/tests/io/data/test3.xlsx similarity index 100% rename from pandas/io/tests/data/test3.xlsx rename to pandas/tests/io/data/test3.xlsx diff --git a/pandas/io/tests/data/test4.xls b/pandas/tests/io/data/test4.xls similarity index 100% rename from pandas/io/tests/data/test4.xls rename to pandas/tests/io/data/test4.xls diff --git a/pandas/io/tests/data/test4.xlsm b/pandas/tests/io/data/test4.xlsm similarity index 100% rename from pandas/io/tests/data/test4.xlsm rename to pandas/tests/io/data/test4.xlsm diff --git a/pandas/io/tests/data/test4.xlsx b/pandas/tests/io/data/test4.xlsx similarity index 100% rename from pandas/io/tests/data/test4.xlsx rename to pandas/tests/io/data/test4.xlsx diff --git a/pandas/io/tests/data/test5.xls b/pandas/tests/io/data/test5.xls similarity index 100% rename from pandas/io/tests/data/test5.xls rename to pandas/tests/io/data/test5.xls diff --git a/pandas/io/tests/data/test5.xlsm b/pandas/tests/io/data/test5.xlsm similarity index 100% rename from pandas/io/tests/data/test5.xlsm rename to pandas/tests/io/data/test5.xlsm diff --git a/pandas/io/tests/data/test5.xlsx b/pandas/tests/io/data/test5.xlsx similarity index 100% rename from pandas/io/tests/data/test5.xlsx rename to pandas/tests/io/data/test5.xlsx diff --git a/pandas/io/tests/data/test_converters.xls b/pandas/tests/io/data/test_converters.xls similarity index 100% rename from pandas/io/tests/data/test_converters.xls rename to pandas/tests/io/data/test_converters.xls diff --git a/pandas/io/tests/data/test_converters.xlsm b/pandas/tests/io/data/test_converters.xlsm similarity index 100% rename from pandas/io/tests/data/test_converters.xlsm rename to pandas/tests/io/data/test_converters.xlsm diff --git a/pandas/io/tests/data/test_converters.xlsx b/pandas/tests/io/data/test_converters.xlsx similarity index 100% rename from pandas/io/tests/data/test_converters.xlsx rename to pandas/tests/io/data/test_converters.xlsx diff --git a/pandas/io/tests/data/test_index_name_pre17.xls b/pandas/tests/io/data/test_index_name_pre17.xls similarity index 100% rename from pandas/io/tests/data/test_index_name_pre17.xls rename to pandas/tests/io/data/test_index_name_pre17.xls diff --git a/pandas/io/tests/data/test_index_name_pre17.xlsm b/pandas/tests/io/data/test_index_name_pre17.xlsm similarity index 100% rename from pandas/io/tests/data/test_index_name_pre17.xlsm rename to pandas/tests/io/data/test_index_name_pre17.xlsm diff --git a/pandas/io/tests/data/test_index_name_pre17.xlsx b/pandas/tests/io/data/test_index_name_pre17.xlsx similarity index 100% rename from pandas/io/tests/data/test_index_name_pre17.xlsx rename to pandas/tests/io/data/test_index_name_pre17.xlsx diff --git a/pandas/io/tests/data/test_mmap.csv b/pandas/tests/io/data/test_mmap.csv similarity index 100% rename from pandas/io/tests/data/test_mmap.csv rename to pandas/tests/io/data/test_mmap.csv diff --git a/pandas/io/tests/data/test_multisheet.xls b/pandas/tests/io/data/test_multisheet.xls similarity index 100% rename from pandas/io/tests/data/test_multisheet.xls rename to pandas/tests/io/data/test_multisheet.xls diff --git a/pandas/io/tests/data/test_multisheet.xlsm b/pandas/tests/io/data/test_multisheet.xlsm similarity index 100% rename from pandas/io/tests/data/test_multisheet.xlsm rename to pandas/tests/io/data/test_multisheet.xlsm diff --git a/pandas/io/tests/data/test_multisheet.xlsx b/pandas/tests/io/data/test_multisheet.xlsx similarity index 100% rename from pandas/io/tests/data/test_multisheet.xlsx rename to pandas/tests/io/data/test_multisheet.xlsx diff --git a/pandas/io/tests/data/test_squeeze.xls b/pandas/tests/io/data/test_squeeze.xls similarity index 100% rename from pandas/io/tests/data/test_squeeze.xls rename to pandas/tests/io/data/test_squeeze.xls diff --git a/pandas/io/tests/data/test_squeeze.xlsm b/pandas/tests/io/data/test_squeeze.xlsm similarity index 100% rename from pandas/io/tests/data/test_squeeze.xlsm rename to pandas/tests/io/data/test_squeeze.xlsm diff --git a/pandas/io/tests/data/test_squeeze.xlsx b/pandas/tests/io/data/test_squeeze.xlsx similarity index 100% rename from pandas/io/tests/data/test_squeeze.xlsx rename to pandas/tests/io/data/test_squeeze.xlsx diff --git a/pandas/io/tests/data/test_types.xls b/pandas/tests/io/data/test_types.xls similarity index 100% rename from pandas/io/tests/data/test_types.xls rename to pandas/tests/io/data/test_types.xls diff --git a/pandas/io/tests/data/test_types.xlsm b/pandas/tests/io/data/test_types.xlsm similarity index 100% rename from pandas/io/tests/data/test_types.xlsm rename to pandas/tests/io/data/test_types.xlsm diff --git a/pandas/io/tests/data/test_types.xlsx b/pandas/tests/io/data/test_types.xlsx similarity index 100% rename from pandas/io/tests/data/test_types.xlsx rename to pandas/tests/io/data/test_types.xlsx diff --git a/pandas/io/tests/data/testdateoverflow.xls b/pandas/tests/io/data/testdateoverflow.xls similarity index 100% rename from pandas/io/tests/data/testdateoverflow.xls rename to pandas/tests/io/data/testdateoverflow.xls diff --git a/pandas/io/tests/data/testdateoverflow.xlsm b/pandas/tests/io/data/testdateoverflow.xlsm similarity index 100% rename from pandas/io/tests/data/testdateoverflow.xlsm rename to pandas/tests/io/data/testdateoverflow.xlsm diff --git a/pandas/io/tests/data/testdateoverflow.xlsx b/pandas/tests/io/data/testdateoverflow.xlsx similarity index 100% rename from pandas/io/tests/data/testdateoverflow.xlsx rename to pandas/tests/io/data/testdateoverflow.xlsx diff --git a/pandas/io/tests/data/testdtype.xls b/pandas/tests/io/data/testdtype.xls similarity index 100% rename from pandas/io/tests/data/testdtype.xls rename to pandas/tests/io/data/testdtype.xls diff --git a/pandas/io/tests/data/testdtype.xlsm b/pandas/tests/io/data/testdtype.xlsm similarity index 100% rename from pandas/io/tests/data/testdtype.xlsm rename to pandas/tests/io/data/testdtype.xlsm diff --git a/pandas/io/tests/data/testdtype.xlsx b/pandas/tests/io/data/testdtype.xlsx similarity index 100% rename from pandas/io/tests/data/testdtype.xlsx rename to pandas/tests/io/data/testdtype.xlsx diff --git a/pandas/io/tests/data/testmultiindex.xls b/pandas/tests/io/data/testmultiindex.xls similarity index 100% rename from pandas/io/tests/data/testmultiindex.xls rename to pandas/tests/io/data/testmultiindex.xls diff --git a/pandas/io/tests/data/testmultiindex.xlsm b/pandas/tests/io/data/testmultiindex.xlsm similarity index 100% rename from pandas/io/tests/data/testmultiindex.xlsm rename to pandas/tests/io/data/testmultiindex.xlsm diff --git a/pandas/io/tests/data/testmultiindex.xlsx b/pandas/tests/io/data/testmultiindex.xlsx similarity index 100% rename from pandas/io/tests/data/testmultiindex.xlsx rename to pandas/tests/io/data/testmultiindex.xlsx diff --git a/pandas/io/tests/data/testskiprows.xls b/pandas/tests/io/data/testskiprows.xls similarity index 100% rename from pandas/io/tests/data/testskiprows.xls rename to pandas/tests/io/data/testskiprows.xls diff --git a/pandas/io/tests/data/testskiprows.xlsm b/pandas/tests/io/data/testskiprows.xlsm similarity index 100% rename from pandas/io/tests/data/testskiprows.xlsm rename to pandas/tests/io/data/testskiprows.xlsm diff --git a/pandas/io/tests/data/testskiprows.xlsx b/pandas/tests/io/data/testskiprows.xlsx similarity index 100% rename from pandas/io/tests/data/testskiprows.xlsx rename to pandas/tests/io/data/testskiprows.xlsx diff --git a/pandas/io/tests/data/times_1900.xls b/pandas/tests/io/data/times_1900.xls similarity index 100% rename from pandas/io/tests/data/times_1900.xls rename to pandas/tests/io/data/times_1900.xls diff --git a/pandas/io/tests/data/times_1900.xlsm b/pandas/tests/io/data/times_1900.xlsm similarity index 100% rename from pandas/io/tests/data/times_1900.xlsm rename to pandas/tests/io/data/times_1900.xlsm diff --git a/pandas/io/tests/data/times_1900.xlsx b/pandas/tests/io/data/times_1900.xlsx similarity index 100% rename from pandas/io/tests/data/times_1900.xlsx rename to pandas/tests/io/data/times_1900.xlsx diff --git a/pandas/io/tests/data/times_1904.xls b/pandas/tests/io/data/times_1904.xls similarity index 100% rename from pandas/io/tests/data/times_1904.xls rename to pandas/tests/io/data/times_1904.xls diff --git a/pandas/io/tests/data/times_1904.xlsm b/pandas/tests/io/data/times_1904.xlsm similarity index 100% rename from pandas/io/tests/data/times_1904.xlsm rename to pandas/tests/io/data/times_1904.xlsm diff --git a/pandas/io/tests/data/times_1904.xlsx b/pandas/tests/io/data/times_1904.xlsx similarity index 100% rename from pandas/io/tests/data/times_1904.xlsx rename to pandas/tests/io/data/times_1904.xlsx diff --git a/pandas/io/tests/data/tips.csv b/pandas/tests/io/data/tips.csv similarity index 100% rename from pandas/io/tests/data/tips.csv rename to pandas/tests/io/data/tips.csv diff --git a/pandas/io/tests/data/valid_markup.html b/pandas/tests/io/data/valid_markup.html similarity index 100% rename from pandas/io/tests/data/valid_markup.html rename to pandas/tests/io/data/valid_markup.html diff --git a/pandas/io/tests/data/wikipedia_states.html b/pandas/tests/io/data/wikipedia_states.html similarity index 100% rename from pandas/io/tests/data/wikipedia_states.html rename to pandas/tests/io/data/wikipedia_states.html diff --git a/pandas/io/tests/generate_legacy_storage_files.py b/pandas/tests/io/generate_legacy_storage_files.py similarity index 100% rename from pandas/io/tests/generate_legacy_storage_files.py rename to pandas/tests/io/generate_legacy_storage_files.py diff --git a/pandas/io/tests/json/__init__.py b/pandas/tests/io/json/__init__.py similarity index 100% rename from pandas/io/tests/json/__init__.py rename to pandas/tests/io/json/__init__.py diff --git a/pandas/io/tests/json/data/tsframe_iso_v012.json b/pandas/tests/io/json/data/tsframe_iso_v012.json similarity index 100% rename from pandas/io/tests/json/data/tsframe_iso_v012.json rename to pandas/tests/io/json/data/tsframe_iso_v012.json diff --git a/pandas/io/tests/json/data/tsframe_v012.json b/pandas/tests/io/json/data/tsframe_v012.json similarity index 100% rename from pandas/io/tests/json/data/tsframe_v012.json rename to pandas/tests/io/json/data/tsframe_v012.json diff --git a/pandas/io/tests/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py similarity index 100% rename from pandas/io/tests/json/test_normalize.py rename to pandas/tests/io/json/test_normalize.py diff --git a/pandas/io/tests/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py similarity index 100% rename from pandas/io/tests/json/test_pandas.py rename to pandas/tests/io/json/test_pandas.py diff --git a/pandas/io/tests/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py similarity index 100% rename from pandas/io/tests/json/test_ujson.py rename to pandas/tests/io/json/test_ujson.py diff --git a/pandas/io/tests/parser/__init__.py b/pandas/tests/io/parser/__init__.py similarity index 100% rename from pandas/io/tests/parser/__init__.py rename to pandas/tests/io/parser/__init__.py diff --git a/pandas/io/tests/parser/c_parser_only.py b/pandas/tests/io/parser/c_parser_only.py similarity index 100% rename from pandas/io/tests/parser/c_parser_only.py rename to pandas/tests/io/parser/c_parser_only.py diff --git a/pandas/io/tests/parser/comment.py b/pandas/tests/io/parser/comment.py similarity index 100% rename from pandas/io/tests/parser/comment.py rename to pandas/tests/io/parser/comment.py diff --git a/pandas/io/tests/parser/common.py b/pandas/tests/io/parser/common.py similarity index 100% rename from pandas/io/tests/parser/common.py rename to pandas/tests/io/parser/common.py diff --git a/pandas/io/tests/parser/compression.py b/pandas/tests/io/parser/compression.py similarity index 100% rename from pandas/io/tests/parser/compression.py rename to pandas/tests/io/parser/compression.py diff --git a/pandas/io/tests/parser/converters.py b/pandas/tests/io/parser/converters.py similarity index 100% rename from pandas/io/tests/parser/converters.py rename to pandas/tests/io/parser/converters.py diff --git a/pandas/io/tests/parser/data/iris.csv b/pandas/tests/io/parser/data/iris.csv similarity index 100% rename from pandas/io/tests/parser/data/iris.csv rename to pandas/tests/io/parser/data/iris.csv diff --git a/pandas/io/tests/parser/data/salaries.csv b/pandas/tests/io/parser/data/salaries.csv similarity index 100% rename from pandas/io/tests/parser/data/salaries.csv rename to pandas/tests/io/parser/data/salaries.csv diff --git a/pandas/io/tests/parser/data/salaries.csv.bz2 b/pandas/tests/io/parser/data/salaries.csv.bz2 similarity index 100% rename from pandas/io/tests/parser/data/salaries.csv.bz2 rename to pandas/tests/io/parser/data/salaries.csv.bz2 diff --git a/pandas/io/tests/parser/data/salaries.csv.gz b/pandas/tests/io/parser/data/salaries.csv.gz similarity index 100% rename from pandas/io/tests/parser/data/salaries.csv.gz rename to pandas/tests/io/parser/data/salaries.csv.gz diff --git a/pandas/io/tests/parser/data/salaries.csv.xz b/pandas/tests/io/parser/data/salaries.csv.xz similarity index 100% rename from pandas/io/tests/parser/data/salaries.csv.xz rename to pandas/tests/io/parser/data/salaries.csv.xz diff --git a/pandas/io/tests/parser/data/salaries.csv.zip b/pandas/tests/io/parser/data/salaries.csv.zip similarity index 100% rename from pandas/io/tests/parser/data/salaries.csv.zip rename to pandas/tests/io/parser/data/salaries.csv.zip diff --git a/pandas/io/tests/parser/data/sauron.SHIFT_JIS.csv b/pandas/tests/io/parser/data/sauron.SHIFT_JIS.csv similarity index 100% rename from pandas/io/tests/parser/data/sauron.SHIFT_JIS.csv rename to pandas/tests/io/parser/data/sauron.SHIFT_JIS.csv diff --git a/pandas/io/tests/parser/data/test1.csv b/pandas/tests/io/parser/data/test1.csv similarity index 100% rename from pandas/io/tests/parser/data/test1.csv rename to pandas/tests/io/parser/data/test1.csv diff --git a/pandas/io/tests/parser/data/test1.csv.bz2 b/pandas/tests/io/parser/data/test1.csv.bz2 similarity index 100% rename from pandas/io/tests/parser/data/test1.csv.bz2 rename to pandas/tests/io/parser/data/test1.csv.bz2 diff --git a/pandas/io/tests/parser/data/test1.csv.gz b/pandas/tests/io/parser/data/test1.csv.gz similarity index 100% rename from pandas/io/tests/parser/data/test1.csv.gz rename to pandas/tests/io/parser/data/test1.csv.gz diff --git a/pandas/io/tests/parser/data/test2.csv b/pandas/tests/io/parser/data/test2.csv similarity index 100% rename from pandas/io/tests/parser/data/test2.csv rename to pandas/tests/io/parser/data/test2.csv diff --git a/pandas/io/tests/parser/data/test_mmap.csv b/pandas/tests/io/parser/data/test_mmap.csv similarity index 100% rename from pandas/io/tests/parser/data/test_mmap.csv rename to pandas/tests/io/parser/data/test_mmap.csv diff --git a/pandas/io/tests/parser/data/tips.csv b/pandas/tests/io/parser/data/tips.csv similarity index 100% rename from pandas/io/tests/parser/data/tips.csv rename to pandas/tests/io/parser/data/tips.csv diff --git a/pandas/io/tests/parser/data/unicode_series.csv b/pandas/tests/io/parser/data/unicode_series.csv similarity index 100% rename from pandas/io/tests/parser/data/unicode_series.csv rename to pandas/tests/io/parser/data/unicode_series.csv diff --git a/pandas/io/tests/parser/data/utf16_ex.txt b/pandas/tests/io/parser/data/utf16_ex.txt similarity index 100% rename from pandas/io/tests/parser/data/utf16_ex.txt rename to pandas/tests/io/parser/data/utf16_ex.txt diff --git a/pandas/io/tests/parser/dialect.py b/pandas/tests/io/parser/dialect.py similarity index 100% rename from pandas/io/tests/parser/dialect.py rename to pandas/tests/io/parser/dialect.py diff --git a/pandas/io/tests/parser/dtypes.py b/pandas/tests/io/parser/dtypes.py similarity index 100% rename from pandas/io/tests/parser/dtypes.py rename to pandas/tests/io/parser/dtypes.py diff --git a/pandas/io/tests/parser/header.py b/pandas/tests/io/parser/header.py similarity index 100% rename from pandas/io/tests/parser/header.py rename to pandas/tests/io/parser/header.py diff --git a/pandas/io/tests/parser/index_col.py b/pandas/tests/io/parser/index_col.py similarity index 100% rename from pandas/io/tests/parser/index_col.py rename to pandas/tests/io/parser/index_col.py diff --git a/pandas/io/tests/parser/multithread.py b/pandas/tests/io/parser/multithread.py similarity index 100% rename from pandas/io/tests/parser/multithread.py rename to pandas/tests/io/parser/multithread.py diff --git a/pandas/io/tests/parser/na_values.py b/pandas/tests/io/parser/na_values.py similarity index 100% rename from pandas/io/tests/parser/na_values.py rename to pandas/tests/io/parser/na_values.py diff --git a/pandas/io/tests/parser/parse_dates.py b/pandas/tests/io/parser/parse_dates.py similarity index 100% rename from pandas/io/tests/parser/parse_dates.py rename to pandas/tests/io/parser/parse_dates.py diff --git a/pandas/io/tests/parser/python_parser_only.py b/pandas/tests/io/parser/python_parser_only.py similarity index 100% rename from pandas/io/tests/parser/python_parser_only.py rename to pandas/tests/io/parser/python_parser_only.py diff --git a/pandas/io/tests/parser/quoting.py b/pandas/tests/io/parser/quoting.py similarity index 100% rename from pandas/io/tests/parser/quoting.py rename to pandas/tests/io/parser/quoting.py diff --git a/pandas/io/tests/parser/skiprows.py b/pandas/tests/io/parser/skiprows.py similarity index 100% rename from pandas/io/tests/parser/skiprows.py rename to pandas/tests/io/parser/skiprows.py diff --git a/pandas/io/tests/parser/test_network.py b/pandas/tests/io/parser/test_network.py similarity index 100% rename from pandas/io/tests/parser/test_network.py rename to pandas/tests/io/parser/test_network.py diff --git a/pandas/io/tests/parser/test_parsers.py b/pandas/tests/io/parser/test_parsers.py similarity index 100% rename from pandas/io/tests/parser/test_parsers.py rename to pandas/tests/io/parser/test_parsers.py diff --git a/pandas/io/tests/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py similarity index 100% rename from pandas/io/tests/parser/test_read_fwf.py rename to pandas/tests/io/parser/test_read_fwf.py diff --git a/pandas/io/tests/parser/test_textreader.py b/pandas/tests/io/parser/test_textreader.py similarity index 100% rename from pandas/io/tests/parser/test_textreader.py rename to pandas/tests/io/parser/test_textreader.py diff --git a/pandas/io/tests/parser/test_unsupported.py b/pandas/tests/io/parser/test_unsupported.py similarity index 100% rename from pandas/io/tests/parser/test_unsupported.py rename to pandas/tests/io/parser/test_unsupported.py diff --git a/pandas/io/tests/parser/usecols.py b/pandas/tests/io/parser/usecols.py similarity index 100% rename from pandas/io/tests/parser/usecols.py rename to pandas/tests/io/parser/usecols.py diff --git a/pandas/io/tests/sas/data/DEMO_G.csv b/pandas/tests/io/sas/data/DEMO_G.csv similarity index 100% rename from pandas/io/tests/sas/data/DEMO_G.csv rename to pandas/tests/io/sas/data/DEMO_G.csv diff --git a/pandas/io/tests/sas/data/DEMO_G.xpt b/pandas/tests/io/sas/data/DEMO_G.xpt similarity index 100% rename from pandas/io/tests/sas/data/DEMO_G.xpt rename to pandas/tests/io/sas/data/DEMO_G.xpt diff --git a/pandas/io/tests/sas/data/DRXFCD_G.csv b/pandas/tests/io/sas/data/DRXFCD_G.csv similarity index 100% rename from pandas/io/tests/sas/data/DRXFCD_G.csv rename to pandas/tests/io/sas/data/DRXFCD_G.csv diff --git a/pandas/io/tests/sas/data/DRXFCD_G.xpt b/pandas/tests/io/sas/data/DRXFCD_G.xpt similarity index 100% rename from pandas/io/tests/sas/data/DRXFCD_G.xpt rename to pandas/tests/io/sas/data/DRXFCD_G.xpt diff --git a/pandas/io/tests/sas/data/SSHSV1_A.csv b/pandas/tests/io/sas/data/SSHSV1_A.csv similarity index 100% rename from pandas/io/tests/sas/data/SSHSV1_A.csv rename to pandas/tests/io/sas/data/SSHSV1_A.csv diff --git a/pandas/io/tests/sas/data/SSHSV1_A.xpt b/pandas/tests/io/sas/data/SSHSV1_A.xpt similarity index 100% rename from pandas/io/tests/sas/data/SSHSV1_A.xpt rename to pandas/tests/io/sas/data/SSHSV1_A.xpt diff --git a/pandas/io/tests/sas/data/airline.csv b/pandas/tests/io/sas/data/airline.csv similarity index 100% rename from pandas/io/tests/sas/data/airline.csv rename to pandas/tests/io/sas/data/airline.csv diff --git a/pandas/io/tests/sas/data/airline.sas7bdat b/pandas/tests/io/sas/data/airline.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/airline.sas7bdat rename to pandas/tests/io/sas/data/airline.sas7bdat diff --git a/pandas/io/tests/sas/data/paxraw_d_short.csv b/pandas/tests/io/sas/data/paxraw_d_short.csv similarity index 100% rename from pandas/io/tests/sas/data/paxraw_d_short.csv rename to pandas/tests/io/sas/data/paxraw_d_short.csv diff --git a/pandas/io/tests/sas/data/paxraw_d_short.xpt b/pandas/tests/io/sas/data/paxraw_d_short.xpt similarity index 100% rename from pandas/io/tests/sas/data/paxraw_d_short.xpt rename to pandas/tests/io/sas/data/paxraw_d_short.xpt diff --git a/pandas/io/tests/sas/data/productsales.csv b/pandas/tests/io/sas/data/productsales.csv similarity index 100% rename from pandas/io/tests/sas/data/productsales.csv rename to pandas/tests/io/sas/data/productsales.csv diff --git a/pandas/io/tests/sas/data/productsales.sas7bdat b/pandas/tests/io/sas/data/productsales.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/productsales.sas7bdat rename to pandas/tests/io/sas/data/productsales.sas7bdat diff --git a/pandas/io/tests/sas/data/test1.sas7bdat b/pandas/tests/io/sas/data/test1.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/test1.sas7bdat rename to pandas/tests/io/sas/data/test1.sas7bdat diff --git a/pandas/io/tests/sas/data/test10.sas7bdat b/pandas/tests/io/sas/data/test10.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/test10.sas7bdat rename to pandas/tests/io/sas/data/test10.sas7bdat diff --git a/pandas/io/tests/sas/data/test11.sas7bdat b/pandas/tests/io/sas/data/test11.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/test11.sas7bdat rename to pandas/tests/io/sas/data/test11.sas7bdat diff --git a/pandas/io/tests/sas/data/test12.sas7bdat b/pandas/tests/io/sas/data/test12.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/test12.sas7bdat rename to pandas/tests/io/sas/data/test12.sas7bdat diff --git a/pandas/io/tests/sas/data/test13.sas7bdat b/pandas/tests/io/sas/data/test13.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/test13.sas7bdat rename to pandas/tests/io/sas/data/test13.sas7bdat diff --git a/pandas/io/tests/sas/data/test14.sas7bdat b/pandas/tests/io/sas/data/test14.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/test14.sas7bdat rename to pandas/tests/io/sas/data/test14.sas7bdat diff --git a/pandas/io/tests/sas/data/test15.sas7bdat b/pandas/tests/io/sas/data/test15.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/test15.sas7bdat rename to pandas/tests/io/sas/data/test15.sas7bdat diff --git a/pandas/io/tests/sas/data/test16.sas7bdat b/pandas/tests/io/sas/data/test16.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/test16.sas7bdat rename to pandas/tests/io/sas/data/test16.sas7bdat diff --git a/pandas/io/tests/sas/data/test2.sas7bdat b/pandas/tests/io/sas/data/test2.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/test2.sas7bdat rename to pandas/tests/io/sas/data/test2.sas7bdat diff --git a/pandas/io/tests/sas/data/test3.sas7bdat b/pandas/tests/io/sas/data/test3.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/test3.sas7bdat rename to pandas/tests/io/sas/data/test3.sas7bdat diff --git a/pandas/io/tests/sas/data/test4.sas7bdat b/pandas/tests/io/sas/data/test4.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/test4.sas7bdat rename to pandas/tests/io/sas/data/test4.sas7bdat diff --git a/pandas/io/tests/sas/data/test5.sas7bdat b/pandas/tests/io/sas/data/test5.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/test5.sas7bdat rename to pandas/tests/io/sas/data/test5.sas7bdat diff --git a/pandas/io/tests/sas/data/test6.sas7bdat b/pandas/tests/io/sas/data/test6.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/test6.sas7bdat rename to pandas/tests/io/sas/data/test6.sas7bdat diff --git a/pandas/io/tests/sas/data/test7.sas7bdat b/pandas/tests/io/sas/data/test7.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/test7.sas7bdat rename to pandas/tests/io/sas/data/test7.sas7bdat diff --git a/pandas/io/tests/sas/data/test8.sas7bdat b/pandas/tests/io/sas/data/test8.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/test8.sas7bdat rename to pandas/tests/io/sas/data/test8.sas7bdat diff --git a/pandas/io/tests/sas/data/test9.sas7bdat b/pandas/tests/io/sas/data/test9.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/test9.sas7bdat rename to pandas/tests/io/sas/data/test9.sas7bdat diff --git a/pandas/io/tests/sas/data/test_12659.csv b/pandas/tests/io/sas/data/test_12659.csv similarity index 100% rename from pandas/io/tests/sas/data/test_12659.csv rename to pandas/tests/io/sas/data/test_12659.csv diff --git a/pandas/io/tests/sas/data/test_12659.sas7bdat b/pandas/tests/io/sas/data/test_12659.sas7bdat similarity index 100% rename from pandas/io/tests/sas/data/test_12659.sas7bdat rename to pandas/tests/io/sas/data/test_12659.sas7bdat diff --git a/pandas/io/tests/sas/data/test_sas7bdat_1.csv b/pandas/tests/io/sas/data/test_sas7bdat_1.csv similarity index 100% rename from pandas/io/tests/sas/data/test_sas7bdat_1.csv rename to pandas/tests/io/sas/data/test_sas7bdat_1.csv diff --git a/pandas/io/tests/sas/data/test_sas7bdat_2.csv b/pandas/tests/io/sas/data/test_sas7bdat_2.csv similarity index 100% rename from pandas/io/tests/sas/data/test_sas7bdat_2.csv rename to pandas/tests/io/sas/data/test_sas7bdat_2.csv diff --git a/pandas/io/tests/sas/test_sas.py b/pandas/tests/io/sas/test_sas.py similarity index 100% rename from pandas/io/tests/sas/test_sas.py rename to pandas/tests/io/sas/test_sas.py diff --git a/pandas/io/tests/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py similarity index 100% rename from pandas/io/tests/sas/test_sas7bdat.py rename to pandas/tests/io/sas/test_sas7bdat.py diff --git a/pandas/io/tests/sas/test_xport.py b/pandas/tests/io/sas/test_xport.py similarity index 100% rename from pandas/io/tests/sas/test_xport.py rename to pandas/tests/io/sas/test_xport.py diff --git a/pandas/io/tests/test_clipboard.py b/pandas/tests/io/test_clipboard.py similarity index 100% rename from pandas/io/tests/test_clipboard.py rename to pandas/tests/io/test_clipboard.py diff --git a/pandas/io/tests/test_common.py b/pandas/tests/io/test_common.py similarity index 100% rename from pandas/io/tests/test_common.py rename to pandas/tests/io/test_common.py diff --git a/pandas/io/tests/test_date_converters.py b/pandas/tests/io/test_date_converters.py similarity index 100% rename from pandas/io/tests/test_date_converters.py rename to pandas/tests/io/test_date_converters.py diff --git a/pandas/io/tests/test_excel.py b/pandas/tests/io/test_excel.py similarity index 100% rename from pandas/io/tests/test_excel.py rename to pandas/tests/io/test_excel.py diff --git a/pandas/io/tests/test_feather.py b/pandas/tests/io/test_feather.py similarity index 100% rename from pandas/io/tests/test_feather.py rename to pandas/tests/io/test_feather.py diff --git a/pandas/io/tests/test_gbq.py b/pandas/tests/io/test_gbq.py similarity index 100% rename from pandas/io/tests/test_gbq.py rename to pandas/tests/io/test_gbq.py diff --git a/pandas/io/tests/test_html.py b/pandas/tests/io/test_html.py similarity index 100% rename from pandas/io/tests/test_html.py rename to pandas/tests/io/test_html.py diff --git a/pandas/io/tests/test_packers.py b/pandas/tests/io/test_packers.py similarity index 99% rename from pandas/io/tests/test_packers.py rename to pandas/tests/io/test_packers.py index 4bb6f4a69bab3..911cd8164571d 100644 --- a/pandas/io/tests/test_packers.py +++ b/pandas/tests/io/test_packers.py @@ -795,7 +795,7 @@ class TestMsgpack(): @classmethod def setup_class(cls): - from pandas.io.tests.generate_legacy_storage_files import ( + from pandas.tests.io.generate_legacy_storage_files import ( create_msgpack_data, create_data) cls.data = create_msgpack_data() cls.all_data = create_data() diff --git a/pandas/io/tests/test_pickle.py b/pandas/tests/io/test_pickle.py similarity index 99% rename from pandas/io/tests/test_pickle.py rename to pandas/tests/io/test_pickle.py index 588b2d5f04888..5445c506b050c 100644 --- a/pandas/io/tests/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -33,7 +33,7 @@ class TestPickle(): @classmethod def setup_class(cls): - from pandas.io.tests.generate_legacy_storage_files import ( + from pandas.tests.io.generate_legacy_storage_files import ( create_pickle_data) cls.data = create_pickle_data() cls.path = u('__%s__.pickle' % tm.rands(10)) diff --git a/pandas/io/tests/test_pytables.py b/pandas/tests/io/test_pytables.py similarity index 100% rename from pandas/io/tests/test_pytables.py rename to pandas/tests/io/test_pytables.py diff --git a/pandas/io/tests/test_s3.py b/pandas/tests/io/test_s3.py similarity index 100% rename from pandas/io/tests/test_s3.py rename to pandas/tests/io/test_s3.py diff --git a/pandas/io/tests/test_sql.py b/pandas/tests/io/test_sql.py similarity index 100% rename from pandas/io/tests/test_sql.py rename to pandas/tests/io/test_sql.py diff --git a/pandas/io/tests/test_stata.py b/pandas/tests/io/test_stata.py similarity index 100% rename from pandas/io/tests/test_stata.py rename to pandas/tests/io/test_stata.py diff --git a/pandas/sparse/tests/__init__.py b/pandas/tests/msgpack/__init__.py similarity index 100% rename from pandas/sparse/tests/__init__.py rename to pandas/tests/msgpack/__init__.py diff --git a/pandas/tests/test_msgpack/test_buffer.py b/pandas/tests/msgpack/test_buffer.py similarity index 100% rename from pandas/tests/test_msgpack/test_buffer.py rename to pandas/tests/msgpack/test_buffer.py diff --git a/pandas/tests/test_msgpack/test_case.py b/pandas/tests/msgpack/test_case.py similarity index 100% rename from pandas/tests/test_msgpack/test_case.py rename to pandas/tests/msgpack/test_case.py diff --git a/pandas/tests/test_msgpack/test_except.py b/pandas/tests/msgpack/test_except.py similarity index 100% rename from pandas/tests/test_msgpack/test_except.py rename to pandas/tests/msgpack/test_except.py diff --git a/pandas/tests/test_msgpack/test_extension.py b/pandas/tests/msgpack/test_extension.py similarity index 100% rename from pandas/tests/test_msgpack/test_extension.py rename to pandas/tests/msgpack/test_extension.py diff --git a/pandas/tests/test_msgpack/test_format.py b/pandas/tests/msgpack/test_format.py similarity index 100% rename from pandas/tests/test_msgpack/test_format.py rename to pandas/tests/msgpack/test_format.py diff --git a/pandas/tests/test_msgpack/test_limits.py b/pandas/tests/msgpack/test_limits.py similarity index 100% rename from pandas/tests/test_msgpack/test_limits.py rename to pandas/tests/msgpack/test_limits.py diff --git a/pandas/tests/test_msgpack/test_newspec.py b/pandas/tests/msgpack/test_newspec.py similarity index 100% rename from pandas/tests/test_msgpack/test_newspec.py rename to pandas/tests/msgpack/test_newspec.py diff --git a/pandas/tests/test_msgpack/test_obj.py b/pandas/tests/msgpack/test_obj.py similarity index 100% rename from pandas/tests/test_msgpack/test_obj.py rename to pandas/tests/msgpack/test_obj.py diff --git a/pandas/tests/test_msgpack/test_pack.py b/pandas/tests/msgpack/test_pack.py similarity index 100% rename from pandas/tests/test_msgpack/test_pack.py rename to pandas/tests/msgpack/test_pack.py diff --git a/pandas/tests/test_msgpack/test_read_size.py b/pandas/tests/msgpack/test_read_size.py similarity index 100% rename from pandas/tests/test_msgpack/test_read_size.py rename to pandas/tests/msgpack/test_read_size.py diff --git a/pandas/tests/test_msgpack/test_seq.py b/pandas/tests/msgpack/test_seq.py similarity index 100% rename from pandas/tests/test_msgpack/test_seq.py rename to pandas/tests/msgpack/test_seq.py diff --git a/pandas/tests/test_msgpack/test_sequnpack.py b/pandas/tests/msgpack/test_sequnpack.py similarity index 100% rename from pandas/tests/test_msgpack/test_sequnpack.py rename to pandas/tests/msgpack/test_sequnpack.py diff --git a/pandas/tests/test_msgpack/test_subtype.py b/pandas/tests/msgpack/test_subtype.py similarity index 100% rename from pandas/tests/test_msgpack/test_subtype.py rename to pandas/tests/msgpack/test_subtype.py diff --git a/pandas/tests/test_msgpack/test_unpack.py b/pandas/tests/msgpack/test_unpack.py similarity index 100% rename from pandas/tests/test_msgpack/test_unpack.py rename to pandas/tests/msgpack/test_unpack.py diff --git a/pandas/tests/test_msgpack/test_unpack_raw.py b/pandas/tests/msgpack/test_unpack_raw.py similarity index 100% rename from pandas/tests/test_msgpack/test_unpack_raw.py rename to pandas/tests/msgpack/test_unpack_raw.py diff --git a/pandas/tests/test_msgpack/__init__.py b/pandas/tests/sparse/__init__.py similarity index 100% rename from pandas/tests/test_msgpack/__init__.py rename to pandas/tests/sparse/__init__.py diff --git a/pandas/sparse/tests/test_arithmetics.py b/pandas/tests/sparse/test_arithmetics.py similarity index 100% rename from pandas/sparse/tests/test_arithmetics.py rename to pandas/tests/sparse/test_arithmetics.py diff --git a/pandas/sparse/tests/test_array.py b/pandas/tests/sparse/test_array.py similarity index 100% rename from pandas/sparse/tests/test_array.py rename to pandas/tests/sparse/test_array.py diff --git a/pandas/sparse/tests/test_combine_concat.py b/pandas/tests/sparse/test_combine_concat.py similarity index 100% rename from pandas/sparse/tests/test_combine_concat.py rename to pandas/tests/sparse/test_combine_concat.py diff --git a/pandas/sparse/tests/test_format.py b/pandas/tests/sparse/test_format.py similarity index 100% rename from pandas/sparse/tests/test_format.py rename to pandas/tests/sparse/test_format.py diff --git a/pandas/sparse/tests/test_frame.py b/pandas/tests/sparse/test_frame.py similarity index 100% rename from pandas/sparse/tests/test_frame.py rename to pandas/tests/sparse/test_frame.py diff --git a/pandas/sparse/tests/test_groupby.py b/pandas/tests/sparse/test_groupby.py similarity index 100% rename from pandas/sparse/tests/test_groupby.py rename to pandas/tests/sparse/test_groupby.py diff --git a/pandas/sparse/tests/test_indexing.py b/pandas/tests/sparse/test_indexing.py similarity index 100% rename from pandas/sparse/tests/test_indexing.py rename to pandas/tests/sparse/test_indexing.py diff --git a/pandas/sparse/tests/test_libsparse.py b/pandas/tests/sparse/test_libsparse.py similarity index 100% rename from pandas/sparse/tests/test_libsparse.py rename to pandas/tests/sparse/test_libsparse.py diff --git a/pandas/sparse/tests/test_list.py b/pandas/tests/sparse/test_list.py similarity index 100% rename from pandas/sparse/tests/test_list.py rename to pandas/tests/sparse/test_list.py diff --git a/pandas/sparse/tests/test_pivot.py b/pandas/tests/sparse/test_pivot.py similarity index 100% rename from pandas/sparse/tests/test_pivot.py rename to pandas/tests/sparse/test_pivot.py diff --git a/pandas/sparse/tests/test_series.py b/pandas/tests/sparse/test_series.py similarity index 100% rename from pandas/sparse/tests/test_series.py rename to pandas/tests/sparse/test_series.py diff --git a/pandas/tools/tests/__init__.py b/pandas/tests/tools/__init__.py similarity index 100% rename from pandas/tools/tests/__init__.py rename to pandas/tests/tools/__init__.py diff --git a/pandas/tools/tests/data/allow_exact_matches.csv b/pandas/tests/tools/data/allow_exact_matches.csv similarity index 100% rename from pandas/tools/tests/data/allow_exact_matches.csv rename to pandas/tests/tools/data/allow_exact_matches.csv diff --git a/pandas/tools/tests/data/allow_exact_matches_and_tolerance.csv b/pandas/tests/tools/data/allow_exact_matches_and_tolerance.csv similarity index 100% rename from pandas/tools/tests/data/allow_exact_matches_and_tolerance.csv rename to pandas/tests/tools/data/allow_exact_matches_and_tolerance.csv diff --git a/pandas/tools/tests/data/asof.csv b/pandas/tests/tools/data/asof.csv similarity index 100% rename from pandas/tools/tests/data/asof.csv rename to pandas/tests/tools/data/asof.csv diff --git a/pandas/tools/tests/data/asof2.csv b/pandas/tests/tools/data/asof2.csv similarity index 100% rename from pandas/tools/tests/data/asof2.csv rename to pandas/tests/tools/data/asof2.csv diff --git a/pandas/tools/tests/data/cut_data.csv b/pandas/tests/tools/data/cut_data.csv similarity index 100% rename from pandas/tools/tests/data/cut_data.csv rename to pandas/tests/tools/data/cut_data.csv diff --git a/pandas/tools/tests/data/quotes.csv b/pandas/tests/tools/data/quotes.csv similarity index 100% rename from pandas/tools/tests/data/quotes.csv rename to pandas/tests/tools/data/quotes.csv diff --git a/pandas/tools/tests/data/quotes2.csv b/pandas/tests/tools/data/quotes2.csv similarity index 100% rename from pandas/tools/tests/data/quotes2.csv rename to pandas/tests/tools/data/quotes2.csv diff --git a/pandas/tools/tests/data/tolerance.csv b/pandas/tests/tools/data/tolerance.csv similarity index 100% rename from pandas/tools/tests/data/tolerance.csv rename to pandas/tests/tools/data/tolerance.csv diff --git a/pandas/tools/tests/data/trades.csv b/pandas/tests/tools/data/trades.csv similarity index 100% rename from pandas/tools/tests/data/trades.csv rename to pandas/tests/tools/data/trades.csv diff --git a/pandas/tools/tests/data/trades2.csv b/pandas/tests/tools/data/trades2.csv similarity index 100% rename from pandas/tools/tests/data/trades2.csv rename to pandas/tests/tools/data/trades2.csv diff --git a/pandas/tools/tests/test_concat.py b/pandas/tests/tools/test_concat.py similarity index 100% rename from pandas/tools/tests/test_concat.py rename to pandas/tests/tools/test_concat.py diff --git a/pandas/tools/tests/test_hashing.py b/pandas/tests/tools/test_hashing.py similarity index 100% rename from pandas/tools/tests/test_hashing.py rename to pandas/tests/tools/test_hashing.py diff --git a/pandas/tools/tests/test_join.py b/pandas/tests/tools/test_join.py similarity index 99% rename from pandas/tools/tests/test_join.py rename to pandas/tests/tools/test_join.py index fe5821a637205..ab42b1212301b 100644 --- a/pandas/tools/tests/test_join.py +++ b/pandas/tests/tools/test_join.py @@ -11,7 +11,7 @@ import pandas._join as _join import pandas.util.testing as tm -from pandas.tools.tests.test_merge import get_test_data, N, NGROUPS +from pandas.tests.tools.test_merge import get_test_data, N, NGROUPS a_ = np.array diff --git a/pandas/tools/tests/test_merge.py b/pandas/tests/tools/test_merge.py similarity index 100% rename from pandas/tools/tests/test_merge.py rename to pandas/tests/tools/test_merge.py diff --git a/pandas/tools/tests/test_merge_asof.py b/pandas/tests/tools/test_merge_asof.py similarity index 100% rename from pandas/tools/tests/test_merge_asof.py rename to pandas/tests/tools/test_merge_asof.py diff --git a/pandas/tools/tests/test_merge_ordered.py b/pandas/tests/tools/test_merge_ordered.py similarity index 100% rename from pandas/tools/tests/test_merge_ordered.py rename to pandas/tests/tools/test_merge_ordered.py diff --git a/pandas/tools/tests/test_pivot.py b/pandas/tests/tools/test_pivot.py similarity index 100% rename from pandas/tools/tests/test_pivot.py rename to pandas/tests/tools/test_pivot.py diff --git a/pandas/tools/tests/test_tile.py b/pandas/tests/tools/test_tile.py similarity index 100% rename from pandas/tools/tests/test_tile.py rename to pandas/tests/tools/test_tile.py diff --git a/pandas/tools/tests/test_util.py b/pandas/tests/tools/test_util.py similarity index 100% rename from pandas/tools/tests/test_util.py rename to pandas/tests/tools/test_util.py diff --git a/setup.py b/setup.py index edec53e9cefb0..cbcadce459c67 100755 --- a/setup.py +++ b/setup.py @@ -622,12 +622,10 @@ def pxd(name): version=versioneer.get_version(), packages=['pandas', 'pandas.api', - 'pandas.api.tests', 'pandas.api.types', 'pandas.compat', 'pandas.compat.numpy', 'pandas.computation', - 'pandas.computation.tests', 'pandas.core', 'pandas.indexes', 'pandas.io', @@ -635,59 +633,61 @@ def pxd(name): 'pandas.io.sas', 'pandas.formats', 'pandas.sparse', - 'pandas.sparse.tests', 'pandas.stats', 'pandas.util', 'pandas.tests', + 'pandas.tests.api', + 'pandas.tests.computation', 'pandas.tests.frame', 'pandas.tests.indexes', 'pandas.tests.indexes.datetimes', 'pandas.tests.indexes.timedeltas', 'pandas.tests.indexes.period', + 'pandas.tests.io', + 'pandas.tests.io.json', + 'pandas.tests.io.parser', + 'pandas.tests.io.sas', 'pandas.tests.groupby', 'pandas.tests.series', 'pandas.tests.formats', + 'pandas.tests.msgpack', 'pandas.tests.scalar', + 'pandas.tests.sparse', 'pandas.tests.tseries', + 'pandas.tests.tools', 'pandas.tests.types', - 'pandas.tests.test_msgpack', 'pandas.tests.plotting', 'pandas.tools', - 'pandas.tools.tests', 'pandas.tseries', 'pandas.types', - 'pandas.io.tests', - 'pandas.io.tests.json', - 'pandas.io.tests.parser', - 'pandas.io.tests.sas', 'pandas.msgpack', 'pandas.util.clipboard' ], - package_data={'pandas.io': ['tests/data/legacy_hdf/*.h5', - 'tests/data/legacy_pickle/*/*.pickle', - 'tests/data/legacy_msgpack/*/*.msgpack', - 'tests/data/*.csv*', - 'tests/data/*.dta', - 'tests/data/*.pickle', - 'tests/data/*.txt', - 'tests/data/*.xls', - 'tests/data/*.xlsx', - 'tests/data/*.xlsm', - 'tests/data/*.table', - 'tests/parser/data/*.csv', - 'tests/parser/data/*.gz', - 'tests/parser/data/*.bz2', - 'tests/parser/data/*.txt', - 'tests/sas/data/*.csv', - 'tests/sas/data/*.xpt', - 'tests/sas/data/*.sas7bdat', - 'tests/data/*.html', - 'tests/data/html_encoding/*.html', - 'tests/json/data/*.json'], - 'pandas.tools': ['tests/data/*.csv'], - 'pandas.tests': ['data/*.csv'], + package_data={'pandas.tests': ['data/*.csv'], 'pandas.tests.formats': ['data/*.csv'], 'pandas.tests.indexes': ['data/*.pickle'], + 'pandas.tests.io': ['data/legacy_hdf/*.h5', + 'data/legacy_pickle/*/*.pickle', + 'data/legacy_msgpack/*/*.msgpack', + 'data/*.csv*', + 'data/*.dta', + 'data/*.pickle', + 'data/*.txt', + 'data/*.xls', + 'data/*.xlsx', + 'data/*.xlsm', + 'data/*.table', + 'parser/data/*.csv', + 'parser/data/*.gz', + 'parser/data/*.bz2', + 'parser/data/*.txt', + 'sas/data/*.csv', + 'sas/data/*.xpt', + 'sas/data/*.sas7bdat', + 'data/*.html', + 'data/html_encoding/*.html', + 'json/data/*.json'], + 'pandas.tests.tools': ['data/*.csv'], 'pandas.tests.tseries': ['data/*.pickle'] }, ext_modules=extensions, From 1bcc10da51c61886362d9d4d4eeafe604ab288ea Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 12 Feb 2017 10:09:27 -0500 Subject: [PATCH 048/353] TST: fix locations for github based url tests --- pandas/tests/io/parser/common.py | 2 +- pandas/tests/io/test_excel.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py index 0671901fc170a..b667eed346355 100644 --- a/pandas/tests/io/parser/common.py +++ b/pandas/tests/io/parser/common.py @@ -617,7 +617,7 @@ def test_read_csv_parse_simple_list(self): def test_url(self): # HTTP(S) url = ('https://raw.github.com/pandas-dev/pandas/master/' - 'pandas/io/tests/parser/data/salaries.csv') + 'pandas/tests/io/parser/data/salaries.csv') url_table = self.read_table(url) dirpath = tm.get_data_path() localtable = os.path.join(dirpath, 'salaries.csv') diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index a22c89184f20d..0c2b443cffe52 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -581,7 +581,7 @@ def test_read_xlrd_Book(self): @tm.network def test_read_from_http_url(self): url = ('https://raw.github.com/pandas-dev/pandas/master/' - 'pandas/io/tests/data/test1' + self.ext) + 'pandas/tests/io/data/test1' + self.ext) url_table = read_excel(url) local_table = self.get_exceldf('test1') tm.assert_frame_equal(url_table, local_table) From f87db63d821f9b7bc347c3ed8e0f452859843081 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 12 Feb 2017 10:31:47 -0500 Subject: [PATCH 049/353] DOC: fix path in whatsnew --- doc/source/whatsnew/v0.20.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 9f86c777c665d..aa620bce0df59 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -91,7 +91,7 @@ support for bz2 compression in the python 2 c-engine improved (:issue:`14874`). url = 'https://github.com/{repo}/raw/{branch}/{path}'.format( repo = 'pandas-dev/pandas', branch = 'master', - path = 'pandas/io/tests/parser/data/salaries.csv.bz2', + path = 'pandas/tests/io/parser/data/salaries.csv.bz2', ) df = pd.read_table(url, compression='infer') # default, infer compression df = pd.read_table(url, compression='bz2') # explicitly specify compression From 1190ac6e19a431a596980c766ec1a3405a7d554a Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 11 Feb 2017 16:17:27 -0500 Subject: [PATCH 050/353] TST: use xdist for multiple cpu testing closes #15369 --- .travis.yml | 3 +- ci/script_multi.sh | 32 +++ ci/{script.sh => script_single.sh} | 10 +- pandas/tests/indexes/datetimes/test_ops.py | 244 +++++++++++---------- pandas/tests/io/test_clipboard.py | 1 + pandas/tests/io/test_pytables.py | 7 +- pandas/tests/io/test_sql.py | 19 +- pandas/tests/test_window.py | 83 ++++--- setup.cfg | 2 + test_fast.sh | 3 +- 10 files changed, 223 insertions(+), 181 deletions(-) create mode 100755 ci/script_multi.sh rename ci/{script.sh => script_single.sh} (63%) diff --git a/.travis.yml b/.travis.yml index 2ff5d508d0371..6b90e49b336b2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -320,7 +320,8 @@ before_script: script: - echo "script start" - ci/run_build_docs.sh - - ci/script.sh + - ci/script_single.sh + - ci/script_multi.sh - ci/lint.sh - echo "script done" diff --git a/ci/script_multi.sh b/ci/script_multi.sh new file mode 100755 index 0000000000000..83f8427cc57ad --- /dev/null +++ b/ci/script_multi.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +echo "[script multi]" + +source activate pandas + +# don't run the tests for the doc build +if [ x"$DOC_BUILD" != x"" ]; then + exit 0 +fi + +if [ -n "$LOCALE_OVERRIDE" ]; then + export LC_ALL="$LOCALE_OVERRIDE"; + echo "Setting LC_ALL to $LOCALE_OVERRIDE" + + pycmd='import pandas; print("pandas detected console encoding: %s" % pandas.get_option("display.encoding"))' + python -c "$pycmd" +fi + +if [ "$BUILD_TEST" ]; then + echo "We are not running pytest as this is simply a build test." +elif [ "$COVERAGE" ]; then + echo pytest -s -n 2 -m "not single" --cov=pandas --cov-append --cov-report xml:/tmp/pytest.xml $TEST_ARGS pandas + pytest -s -n 2 -m "not single" --cov=pandas --cov-append --cov-report xml:/tmp/pytest.xml $TEST_ARGS pandas +else + echo pytest -n 2 -m "not single" $TEST_ARGS pandas + pytest -n 2 -m "not single" $TEST_ARGS pandas # TODO: doctest +fi + +RET="$?" + +exit "$RET" diff --git a/ci/script.sh b/ci/script_single.sh similarity index 63% rename from ci/script.sh rename to ci/script_single.sh index c52fa0fdb33a3..38021fcac5721 100755 --- a/ci/script.sh +++ b/ci/script_single.sh @@ -1,6 +1,6 @@ #!/bin/bash -echo "inside $0" +echo "[script_single]" source activate pandas @@ -20,11 +20,11 @@ fi if [ "$BUILD_TEST" ]; then echo "We are not running pytest as this is simply a build test." elif [ "$COVERAGE" ]; then - echo pytest -s --cov=pandas --cov-report xml:/tmp/pytest.xml $TEST_ARGS pandas - pytest -s --cov=pandas --cov-report xml:/tmp/pytest.xml $TEST_ARGS pandas + echo pytest -s -m "single" --cov=pandas --cov-report xml:/tmp/pytest.xml $TEST_ARGS pandas + pytest -s -m "single" --cov=pandas --cov-report xml:/tmp/pytest.xml $TEST_ARGS pandas else - echo pytest $TEST_ARGS pandas - pytest $TEST_ARGS pandas # TODO: doctest + echo pytest -m "single" $TEST_ARGS pandas + pytest -m "single" $TEST_ARGS pandas # TODO: doctest fi RET="$?" diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index 9a968a42c4247..8eb9128d8d1c8 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -1,7 +1,9 @@ +import pytest import warnings import numpy as np from datetime import timedelta +from itertools import product import pandas as pd import pandas.tslib as tslib import pandas.util.testing as tm @@ -958,134 +960,134 @@ def test_second(self): tm.assert_index_equal(r1, r2) -class TestDatetimeIndex(tm.TestCase): - - # GH 10699 - def test_datetime64_with_DateOffset(self): - for klass, assert_func in zip([Series, DatetimeIndex], - [self.assert_series_equal, - tm.assert_index_equal]): - s = klass(date_range('2000-01-01', '2000-01-31'), name='a') - result = s + pd.DateOffset(years=1) - result2 = pd.DateOffset(years=1) + s - exp = klass(date_range('2001-01-01', '2001-01-31'), name='a') +# GH 10699 +@pytest.mark.parametrize('klass,assert_func', zip([Series, DatetimeIndex], + [tm.assert_series_equal, + tm.assert_index_equal])) +def test_datetime64_with_DateOffset(klass, assert_func): + s = klass(date_range('2000-01-01', '2000-01-31'), name='a') + result = s + pd.DateOffset(years=1) + result2 = pd.DateOffset(years=1) + s + exp = klass(date_range('2001-01-01', '2001-01-31'), name='a') + assert_func(result, exp) + assert_func(result2, exp) + + result = s - pd.DateOffset(years=1) + exp = klass(date_range('1999-01-01', '1999-01-31'), name='a') + assert_func(result, exp) + + s = klass([Timestamp('2000-01-15 00:15:00', tz='US/Central'), + pd.Timestamp('2000-02-15', tz='US/Central')], name='a') + result = s + pd.offsets.Day() + result2 = pd.offsets.Day() + s + exp = klass([Timestamp('2000-01-16 00:15:00', tz='US/Central'), + Timestamp('2000-02-16', tz='US/Central')], name='a') + assert_func(result, exp) + assert_func(result2, exp) + + s = klass([Timestamp('2000-01-15 00:15:00', tz='US/Central'), + pd.Timestamp('2000-02-15', tz='US/Central')], name='a') + result = s + pd.offsets.MonthEnd() + result2 = pd.offsets.MonthEnd() + s + exp = klass([Timestamp('2000-01-31 00:15:00', tz='US/Central'), + Timestamp('2000-02-29', tz='US/Central')], name='a') + assert_func(result, exp) + assert_func(result2, exp) + + # array of offsets - valid for Series only + if klass is Series: + with tm.assert_produces_warning(PerformanceWarning): + s = klass([Timestamp('2000-1-1'), Timestamp('2000-2-1')]) + result = s + Series([pd.offsets.DateOffset(years=1), + pd.offsets.MonthEnd()]) + exp = klass([Timestamp('2001-1-1'), Timestamp('2000-2-29') + ]) assert_func(result, exp) - assert_func(result2, exp) - result = s - pd.DateOffset(years=1) - exp = klass(date_range('1999-01-01', '1999-01-31'), name='a') + # same offset + result = s + Series([pd.offsets.DateOffset(years=1), + pd.offsets.DateOffset(years=1)]) + exp = klass([Timestamp('2001-1-1'), Timestamp('2001-2-1')]) assert_func(result, exp) - s = klass([Timestamp('2000-01-15 00:15:00', tz='US/Central'), - pd.Timestamp('2000-02-15', tz='US/Central')], name='a') - result = s + pd.offsets.Day() - result2 = pd.offsets.Day() + s - exp = klass([Timestamp('2000-01-16 00:15:00', tz='US/Central'), - Timestamp('2000-02-16', tz='US/Central')], name='a') - assert_func(result, exp) - assert_func(result2, exp) - - s = klass([Timestamp('2000-01-15 00:15:00', tz='US/Central'), - pd.Timestamp('2000-02-15', tz='US/Central')], name='a') - result = s + pd.offsets.MonthEnd() - result2 = pd.offsets.MonthEnd() + s - exp = klass([Timestamp('2000-01-31 00:15:00', tz='US/Central'), - Timestamp('2000-02-29', tz='US/Central')], name='a') - assert_func(result, exp) - assert_func(result2, exp) - - # array of offsets - valid for Series only - if klass is Series: - with tm.assert_produces_warning(PerformanceWarning): - s = klass([Timestamp('2000-1-1'), Timestamp('2000-2-1')]) - result = s + Series([pd.offsets.DateOffset(years=1), - pd.offsets.MonthEnd()]) - exp = klass([Timestamp('2001-1-1'), Timestamp('2000-2-29') - ]) - assert_func(result, exp) - - # same offset - result = s + Series([pd.offsets.DateOffset(years=1), - pd.offsets.DateOffset(years=1)]) - exp = klass([Timestamp('2001-1-1'), Timestamp('2001-2-1')]) - assert_func(result, exp) - - s = klass([Timestamp('2000-01-05 00:15:00'), + s = klass([Timestamp('2000-01-05 00:15:00'), + Timestamp('2000-01-31 00:23:00'), + Timestamp('2000-01-01'), + Timestamp('2000-03-31'), + Timestamp('2000-02-29'), + Timestamp('2000-12-31'), + Timestamp('2000-05-15'), + Timestamp('2001-06-15')]) + + # DateOffset relativedelta fastpath + relative_kwargs = [('years', 2), ('months', 5), ('days', 3), + ('hours', 5), ('minutes', 10), ('seconds', 2), + ('microseconds', 5)] + for i, kwd in enumerate(relative_kwargs): + op = pd.DateOffset(**dict([kwd])) + assert_func(klass([x + op for x in s]), s + op) + assert_func(klass([x - op for x in s]), s - op) + op = pd.DateOffset(**dict(relative_kwargs[:i + 1])) + assert_func(klass([x + op for x in s]), s + op) + assert_func(klass([x - op for x in s]), s - op) + + # assert these are equal on a piecewise basis + offsets = ['YearBegin', ('YearBegin', {'month': 5}), + 'YearEnd', ('YearEnd', {'month': 5}), + 'MonthBegin', 'MonthEnd', + 'SemiMonthEnd', 'SemiMonthBegin', + 'Week', ('Week', {'weekday': 3}), + 'BusinessDay', 'BDay', 'QuarterEnd', 'QuarterBegin', + 'CustomBusinessDay', 'CDay', 'CBMonthEnd', + 'CBMonthBegin', 'BMonthBegin', 'BMonthEnd', + 'BusinessHour', 'BYearBegin', 'BYearEnd', + 'BQuarterBegin', ('LastWeekOfMonth', {'weekday': 2}), + ('FY5253Quarter', {'qtr_with_extra_week': 1, + 'startingMonth': 1, + 'weekday': 2, + 'variation': 'nearest'}), + ('FY5253', {'weekday': 0, + 'startingMonth': 2, + 'variation': + 'nearest'}), + ('WeekOfMonth', {'weekday': 2, + 'week': 2}), + 'Easter', ('DateOffset', {'day': 4}), + ('DateOffset', {'month': 5})] + + with warnings.catch_warnings(record=True): + for normalize in (True, False): + for do in offsets: + if isinstance(do, tuple): + do, kwargs = do + else: + do = do + kwargs = {} + + for n in [0, 5]: + if (do in ['WeekOfMonth', 'LastWeekOfMonth', + 'FY5253Quarter', 'FY5253'] and n == 0): + continue + op = getattr(pd.offsets, do)(n, + normalize=normalize, + **kwargs) + assert_func(klass([x + op for x in s]), s + op) + assert_func(klass([x - op for x in s]), s - op) + assert_func(klass([op + x for x in s]), op + s) + + +@pytest.mark.parametrize('years,months', product([-1, 0, 1], [-2, 0, 2])) +def test_shift_months(years, months): + s = DatetimeIndex([Timestamp('2000-01-05 00:15:00'), Timestamp('2000-01-31 00:23:00'), Timestamp('2000-01-01'), - Timestamp('2000-03-31'), Timestamp('2000-02-29'), - Timestamp('2000-12-31'), - Timestamp('2000-05-15'), - Timestamp('2001-06-15')]) - - # DateOffset relativedelta fastpath - relative_kwargs = [('years', 2), ('months', 5), ('days', 3), - ('hours', 5), ('minutes', 10), ('seconds', 2), - ('microseconds', 5)] - for i, kwd in enumerate(relative_kwargs): - op = pd.DateOffset(**dict([kwd])) - assert_func(klass([x + op for x in s]), s + op) - assert_func(klass([x - op for x in s]), s - op) - op = pd.DateOffset(**dict(relative_kwargs[:i + 1])) - assert_func(klass([x + op for x in s]), s + op) - assert_func(klass([x - op for x in s]), s - op) - - # assert these are equal on a piecewise basis - offsets = ['YearBegin', ('YearBegin', {'month': 5}), 'YearEnd', - ('YearEnd', {'month': 5}), 'MonthBegin', 'MonthEnd', - 'SemiMonthEnd', 'SemiMonthBegin', - 'Week', ('Week', { - 'weekday': 3 - }), 'BusinessDay', 'BDay', 'QuarterEnd', 'QuarterBegin', - 'CustomBusinessDay', 'CDay', 'CBMonthEnd', - 'CBMonthBegin', 'BMonthBegin', 'BMonthEnd', - 'BusinessHour', 'BYearBegin', 'BYearEnd', - 'BQuarterBegin', ('LastWeekOfMonth', { - 'weekday': 2 - }), ('FY5253Quarter', {'qtr_with_extra_week': 1, - 'startingMonth': 1, - 'weekday': 2, - 'variation': 'nearest'}), - ('FY5253', {'weekday': 0, - 'startingMonth': 2, - 'variation': - 'nearest'}), ('WeekOfMonth', {'weekday': 2, - 'week': 2}), - 'Easter', ('DateOffset', {'day': 4}), - ('DateOffset', {'month': 5})] - - with warnings.catch_warnings(record=True): - for normalize in (True, False): - for do in offsets: - if isinstance(do, tuple): - do, kwargs = do - else: - do = do - kwargs = {} - - for n in [0, 5]: - if (do in ['WeekOfMonth', 'LastWeekOfMonth', - 'FY5253Quarter', 'FY5253'] and n == 0): - continue - op = getattr(pd.offsets, do)(n, - normalize=normalize, - **kwargs) - assert_func(klass([x + op for x in s]), s + op) - assert_func(klass([x - op for x in s]), s - op) - assert_func(klass([op + x for x in s]), op + s) - - def test_shift_months(self): - s = DatetimeIndex([Timestamp('2000-01-05 00:15:00'), Timestamp( - '2000-01-31 00:23:00'), Timestamp('2000-01-01'), Timestamp( - '2000-02-29'), Timestamp('2000-12-31')]) - for years in [-1, 0, 1]: - for months in [-2, 0, 2]: - actual = DatetimeIndex(tslib.shift_months(s.asi8, years * 12 + - months)) - expected = DatetimeIndex([x + offsets.DateOffset( - years=years, months=months) for x in s]) - tm.assert_index_equal(actual, expected) + Timestamp('2000-12-31')]) + actual = DatetimeIndex(tslib.shift_months(s.asi8, years * 12 + + months)) + expected = DatetimeIndex([x + offsets.DateOffset( + years=years, months=months) for x in s]) + tm.assert_index_equal(actual, expected) class TestBusinessDatetimeIndex(tm.TestCase): diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py index 3abd1093362f4..2e701143357e3 100644 --- a/pandas/tests/io/test_clipboard.py +++ b/pandas/tests/io/test_clipboard.py @@ -20,6 +20,7 @@ _DEPS_INSTALLED = 0 +@pytest.mark.single @pytest.mark.skipif(not _DEPS_INSTALLED, reason="clipboard primitives not installed") class TestClipboard(tm.TestCase): diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 3fa0eb2ef52dc..a840ff46aa845 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -36,12 +36,6 @@ from pandas import concat, Timestamp from pandas import compat from pandas.compat import range, lrange, u - -try: - import tables -except ImportError: - pytest.skip('no pytables') - from distutils.version import LooseVersion _default_compressor = ('blosc' if LooseVersion(tables.__version__) >= '2.2' @@ -165,6 +159,7 @@ def tearDown(self): pass +@pytest.mark.single class TestHDFStore(Base, tm.TestCase): def test_factory_fun(self): diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index a6f4d96001021..78560611da7aa 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -18,13 +18,13 @@ """ from __future__ import print_function +import pytest import unittest import sqlite3 import csv import os import sys -import pytest import warnings import numpy as np import pandas as pd @@ -839,6 +839,7 @@ def test_unicode_column_name(self): df.to_sql('test_unicode', self.conn, index=False) +@pytest.mark.single class TestSQLApi(SQLAlchemyMixIn, _TestSQLApi, unittest.TestCase): """ Test the public API as it would be used directly @@ -1024,10 +1025,12 @@ def tearDown(self): super(_EngineToConnMixin, self).tearDown() +@pytest.mark.single class TestSQLApiConn(_EngineToConnMixin, TestSQLApi, unittest.TestCase): pass +@pytest.mark.single class TestSQLiteFallbackApi(SQLiteMixIn, _TestSQLApi, unittest.TestCase): """ Test the public sqlite connection fallback API @@ -1875,30 +1878,36 @@ def test_schema_support(self): tm.assert_frame_equal(res1, res2) +@pytest.mark.single class TestMySQLAlchemy(_TestMySQLAlchemy, _TestSQLAlchemy, unittest.TestCase): pass +@pytest.mark.single class TestMySQLAlchemyConn(_TestMySQLAlchemy, _TestSQLAlchemyConn, unittest.TestCase): pass +@pytest.mark.single class TestPostgreSQLAlchemy(_TestPostgreSQLAlchemy, _TestSQLAlchemy, unittest.TestCase): pass +@pytest.mark.single class TestPostgreSQLAlchemyConn(_TestPostgreSQLAlchemy, _TestSQLAlchemyConn, unittest.TestCase): pass +@pytest.mark.single class TestSQLiteAlchemy(_TestSQLiteAlchemy, _TestSQLAlchemy, unittest.TestCase): pass +@pytest.mark.single class TestSQLiteAlchemyConn(_TestSQLiteAlchemy, _TestSQLAlchemyConn, unittest.TestCase): pass @@ -1907,6 +1916,7 @@ class TestSQLiteAlchemyConn(_TestSQLiteAlchemy, _TestSQLAlchemyConn, # ----------------------------------------------------------------------------- # -- Test Sqlite / MySQL fallback +@pytest.mark.single class TestSQLiteFallback(SQLiteMixIn, PandasSQLTest, unittest.TestCase): """ Test the fallback mode against an in-memory sqlite database. @@ -2133,6 +2143,7 @@ def _skip_if_no_pymysql(): pytest.skip('pymysql not installed, skipping') +@pytest.mark.single class TestXSQLite(SQLiteMixIn, tm.TestCase): def setUp(self): @@ -2343,6 +2354,7 @@ def clean_up(test_table_to_drop): clean_up(table_name) +@pytest.mark.single class TestSQLFlavorDeprecation(tm.TestCase): """ gh-13611: test that the 'flavor' parameter @@ -2367,8 +2379,9 @@ def test_deprecated_flavor(self): getattr(sql, func)(self.con, flavor='sqlite') -@unittest.skip("gh-13611: there is no support for MySQL " - "if SQLAlchemy is not installed") +@pytest.mark.single +@pytest.mark.skip(reason="gh-13611: there is no support for MySQL " + "if SQLAlchemy is not installed") class TestXMySQL(MySQLMixIn, tm.TestCase): @classmethod diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 3add568c1ea99..1bb1f91423a9d 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -2,6 +2,7 @@ import pytest import sys import warnings +from warnings import catch_warnings from datetime import datetime from numpy.random import randn @@ -291,8 +292,7 @@ def test_how_compat(self): for op in ['mean', 'sum', 'std', 'var', 'kurt', 'skew']: for t in ['rolling', 'expanding']: - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): + with catch_warnings(record=True): dfunc = getattr(pd, "{0}_{1}".format(t, op)) if dfunc is None: @@ -526,7 +526,7 @@ def setUp(self): def test_deprecations(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): mom.rolling_mean(np.ones(10), 3, center=True, axis=0) mom.rolling_mean(Series(np.ones(10)), 3, center=True, axis=0) @@ -791,7 +791,7 @@ def test_cmov_mean(self): xp = np.array([np.nan, np.nan, 9.962, 11.27, 11.564, 12.516, 12.818, 12.952, np.nan, np.nan]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): rs = mom.rolling_mean(vals, 5, center=True) tm.assert_almost_equal(xp, rs) @@ -808,7 +808,7 @@ def test_cmov_window(self): xp = np.array([np.nan, np.nan, 9.962, 11.27, 11.564, 12.516, 12.818, 12.952, np.nan, np.nan]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): rs = mom.rolling_window(vals, 5, 'boxcar', center=True) tm.assert_almost_equal(xp, rs) @@ -823,19 +823,19 @@ def test_cmov_window_corner(self): # all nan vals = np.empty(10, dtype=float) vals.fill(np.nan) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): rs = mom.rolling_window(vals, 5, 'boxcar', center=True) self.assertTrue(np.isnan(rs).all()) # empty vals = np.array([]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): rs = mom.rolling_window(vals, 5, 'boxcar', center=True) self.assertEqual(len(rs), 0) # shorter than window vals = np.random.randn(5) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): rs = mom.rolling_window(vals, 10, 'boxcar') self.assertTrue(np.isnan(rs).all()) self.assertEqual(len(rs), 5) @@ -1014,16 +1014,16 @@ def test_cmov_window_special_linear_range(self): tm.assert_series_equal(xp, rs) def test_rolling_median(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): self._check_moment_func(mom.rolling_median, np.median, name='median') def test_rolling_min(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): self._check_moment_func(mom.rolling_min, np.min, name='min') - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): a = np.array([1, 2, 3, 4, 5]) b = mom.rolling_min(a, window=100, min_periods=1) tm.assert_almost_equal(b, np.ones(len(a))) @@ -1033,10 +1033,10 @@ def test_rolling_min(self): def test_rolling_max(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): self._check_moment_func(mom.rolling_max, np.max, name='max') - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): a = np.array([1, 2, 3, 4, 5], dtype=np.float64) b = mom.rolling_max(a, window=100, min_periods=1) tm.assert_almost_equal(a, b) @@ -1102,11 +1102,11 @@ def test_rolling_apply_out_of_bounds(self): arr = np.arange(4) # it works! - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): result = mom.rolling_apply(arr, 10, np.sum) self.assertTrue(isnull(result).all()) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): result = mom.rolling_apply(arr, 10, np.sum, min_periods=1) tm.assert_almost_equal(result, result) @@ -1117,19 +1117,19 @@ def test_rolling_std(self): name='std', ddof=0) def test_rolling_std_1obs(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): result = mom.rolling_std(np.array([1., 2., 3., 4., 5.]), 1, min_periods=1) expected = np.array([np.nan] * 5) tm.assert_almost_equal(result, expected) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): result = mom.rolling_std(np.array([1., 2., 3., 4., 5.]), 1, min_periods=1, ddof=0) expected = np.zeros(5) tm.assert_almost_equal(result, expected) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): result = mom.rolling_std(np.array([np.nan, np.nan, 3., 4., 5.]), 3, min_periods=2) self.assertTrue(np.isnan(result[2])) @@ -1142,11 +1142,11 @@ def test_rolling_std_neg_sqrt(self): a = np.array([0.0011448196318903589, 0.00028718669878572767, 0.00028718669878572767, 0.00028718669878572767, 0.00028718669878572767]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): b = mom.rolling_std(a, window=3) self.assertTrue(np.isfinite(b[2:]).all()) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): b = mom.ewmstd(a, span=3) self.assertTrue(np.isfinite(b[2:]).all()) @@ -1184,25 +1184,25 @@ def test_fperr_robustness(self): if sys.byteorder != "little": arr = arr.byteswap().newbyteorder() - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): result = mom.rolling_sum(arr, 2) self.assertTrue((result[1:] >= 0).all()) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): result = mom.rolling_mean(arr, 2) self.assertTrue((result[1:] >= 0).all()) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): result = mom.rolling_var(arr, 2) self.assertTrue((result[1:] >= 0).all()) # #2527, ugh arr = np.array([0.00012456, 0.0003, 0]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): result = mom.rolling_mean(arr, 1) self.assertTrue(result[-1] >= 0) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): result = mom.rolling_mean(-arr, 1) self.assertTrue(result[-1] <= 0) @@ -1327,15 +1327,13 @@ def get_result(obj, window, min_periods=None, freq=None, center=False): # catch a freq deprecation warning if freq is provided and not # None - w = FutureWarning if freq is not None else None - with tm.assert_produces_warning(w, check_stacklevel=False): + with catch_warnings(record=True): r = obj.rolling(window=window, min_periods=min_periods, freq=freq, center=center) return getattr(r, name)(**kwargs) # check via the moments API - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): + with catch_warnings(record=True): return f(obj, window=window, min_periods=min_periods, freq=freq, center=center, **kwargs) @@ -1419,7 +1417,7 @@ def test_ewma(self): arr = np.zeros(1000) arr[5] = 1 - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): result = mom.ewma(arr, span=100, adjust=False).sum() self.assertTrue(np.abs(result - 1) < 1e-2) @@ -1506,7 +1504,7 @@ def test_ewmvol(self): self._check_ew(mom.ewmvol, name='vol') def test_ewma_span_com_args(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): A = mom.ewma(self.arr, com=9.5) B = mom.ewma(self.arr, span=20) tm.assert_almost_equal(A, B) @@ -1515,7 +1513,7 @@ def test_ewma_span_com_args(self): self.assertRaises(ValueError, mom.ewma, self.arr) def test_ewma_halflife_arg(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): A = mom.ewma(self.arr, com=13.932726172912965) B = mom.ewma(self.arr, halflife=10.0) tm.assert_almost_equal(A, B) @@ -1530,7 +1528,7 @@ def test_ewma_halflife_arg(self): def test_ewma_alpha_old_api(self): # GH 10789 - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): a = mom.ewma(self.arr, alpha=0.61722699889169674) b = mom.ewma(self.arr, com=0.62014947789973052) c = mom.ewma(self.arr, span=2.240298955799461) @@ -1541,7 +1539,7 @@ def test_ewma_alpha_old_api(self): def test_ewma_alpha_arg_old_api(self): # GH 10789 - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): self.assertRaises(ValueError, mom.ewma, self.arr) self.assertRaises(ValueError, mom.ewma, self.arr, com=10.0, alpha=0.5) @@ -1598,13 +1596,12 @@ def test_ew_empty_arrays(self): funcs = [mom.ewma, mom.ewmvol, mom.ewmvar] for f in funcs: - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): + with catch_warnings(record=True): result = f(arr, 3) tm.assert_almost_equal(result, arr) def _check_ew(self, func, name=None): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): self._check_ew_ndarray(func, name=name) self._check_ew_structures(func, name=name) @@ -2870,7 +2867,7 @@ def test_rolling_max_gh6297(self): expected = Series([1.0, 2.0, 6.0, 4.0, 5.0], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): x = series.rolling(window=1, freq='D').max() tm.assert_series_equal(expected, x) @@ -2889,14 +2886,14 @@ def test_rolling_max_how_resample(self): # Default how should be max expected = Series([0.0, 1.0, 2.0, 3.0, 20.0], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): x = series.rolling(window=1, freq='D').max() tm.assert_series_equal(expected, x) # Now specify median (10.0) expected = Series([0.0, 1.0, 2.0, 3.0, 10.0], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): x = series.rolling(window=1, freq='D').max(how='median') tm.assert_series_equal(expected, x) @@ -2904,7 +2901,7 @@ def test_rolling_max_how_resample(self): v = (4.0 + 10.0 + 20.0) / 3.0 expected = Series([0.0, 1.0, 2.0, 3.0, v], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): x = series.rolling(window=1, freq='D').max(how='mean') tm.assert_series_equal(expected, x) @@ -2923,7 +2920,7 @@ def test_rolling_min_how_resample(self): # Default how should be min expected = Series([0.0, 1.0, 2.0, 3.0, 4.0], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): r = series.rolling(window=1, freq='D') tm.assert_series_equal(expected, r.min()) @@ -2942,7 +2939,7 @@ def test_rolling_median_how_resample(self): # Default how should be median expected = Series([0.0, 1.0, 2.0, 3.0, 10], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): x = series.rolling(window=1, freq='D').median() tm.assert_series_equal(expected, x) diff --git a/setup.cfg b/setup.cfg index 45d98dd733f1f..b9de7a3532209 100644 --- a/setup.cfg +++ b/setup.cfg @@ -25,3 +25,5 @@ split_penalty_logical_operator = 30 # Silencing the warning until then addopts = --disable-pytest-warnings testpaths = pandas +markers = + single: mark a test as single cpu only diff --git a/test_fast.sh b/test_fast.sh index 0b394cffa3d74..43eb376f879cd 100755 --- a/test_fast.sh +++ b/test_fast.sh @@ -1,2 +1 @@ -# nosetests -A "not slow and not network" pandas --with-id $* -pytest pandas --skip-slow +pytest pandas --skip-slow --skip-network -m "not single" -n 4 From 0915857cc9209548d9c26122e822eaef841c6b24 Mon Sep 17 00:00:00 2001 From: Andrew Kittredge Date: Sun, 12 Feb 2017 12:37:13 -0500 Subject: [PATCH 051/353] Typo (#15377) --- doc/source/advanced.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst index 8833d73cb0a84..b6f015c15606d 100644 --- a/doc/source/advanced.rst +++ b/doc/source/advanced.rst @@ -59,7 +59,7 @@ Creating a MultiIndex (hierarchical index) object The ``MultiIndex`` object is the hierarchical analogue of the standard ``Index`` object which typically stores the axis labels in pandas objects. You -can think of ``MultiIndex`` an array of tuples where each tuple is unique. A +can think of ``MultiIndex`` as an array of tuples where each tuple is unique. A ``MultiIndex`` can be created from a list of arrays (using ``MultiIndex.from_arrays``), an array of tuples (using ``MultiIndex.from_tuples``), or a crossed set of iterables (using From a0f7fc061ca37ab992e320bd3d1b7b130e500469 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 12 Feb 2017 11:46:48 -0500 Subject: [PATCH 052/353] TST: control skipping of numexpr tests if its installed / used --- pandas/tests/test_expressions.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index 0318757f76a11..3032a288032a2 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -20,9 +20,6 @@ import pandas.util.testing as tm -if not expr._USE_NUMEXPR: - numexpr = pytest.importorskip('numexpr') - _frame = DataFrame(randn(10000, 4), columns=list('ABCD'), dtype='float64') _frame2 = DataFrame(randn(100, 4), columns=list('ABCD'), dtype='float64') _mixed = DataFrame({'A': _frame['A'].copy(), @@ -50,6 +47,7 @@ _mixed2_panel = Panel(dict(ItemA=_mixed2, ItemB=(_mixed2 + 3))) +@pytest.mark.skipif(not expr._USE_NUMEXPR, reason='not using numexpr') class TestExpressions(tm.TestCase): def setUp(self): From dda3c4292b28d4dbead8bb6ae9927373aea9fe23 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 12 Feb 2017 12:51:11 -0500 Subject: [PATCH 053/353] TST: make test_gbq single cpu --- pandas/tests/io/test_gbq.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pandas/tests/io/test_gbq.py b/pandas/tests/io/test_gbq.py index 0868edd2147b5..0317ebc49ad2c 100644 --- a/pandas/tests/io/test_gbq.py +++ b/pandas/tests/io/test_gbq.py @@ -253,6 +253,7 @@ def test_generate_bq_schema_deprecated(): gbq.generate_bq_schema(df) +@pytest.mark.single class TestGBQConnectorIntegrationWithLocalUserAccountAuth(tm.TestCase): def setUp(self): @@ -298,6 +299,7 @@ def test_get_application_default_credentials_returns_credentials(self): self.assertTrue(isinstance(credentials, GoogleCredentials)) +@pytest.mark.single class TestGBQConnectorIntegrationWithServiceAccountKeyPath(tm.TestCase): def setUp(self): _setup_common() @@ -329,6 +331,7 @@ def test_should_be_able_to_get_results_from_query(self): self.assertTrue(pages is not None) +@pytest.mark.single class TestGBQConnectorIntegrationWithServiceAccountKeyContents(tm.TestCase): def setUp(self): _setup_common() @@ -360,6 +363,7 @@ def test_should_be_able_to_get_results_from_query(self): self.assertTrue(pages is not None) +@pytest.mark.single class GBQUnitTests(tm.TestCase): def setUp(self): @@ -446,6 +450,7 @@ def test_read_gbq_with_corrupted_private_key_json_should_fail(self): private_key=re.sub('[a-z]', '9', _get_private_key_contents())) +@pytest.mark.single class TestReadGBQIntegration(tm.TestCase): @classmethod @@ -499,6 +504,7 @@ def test_should_read_as_service_account_with_key_contents(self): tm.assert_frame_equal(df, DataFrame({'valid_string': ['PI']})) +@pytest.mark.single class TestReadGBQIntegrationWithServiceAccountKeyPath(tm.TestCase): @classmethod @@ -901,6 +907,7 @@ def test_configuration_without_query(self): configuration=config) +@pytest.mark.single class TestToGBQIntegrationWithServiceAccountKeyPath(tm.TestCase): # Changes to BigQuery table schema may take up to 2 minutes as of May 2015 # As a workaround to this issue, each test should use a unique table name. @@ -1215,6 +1222,7 @@ def test_dataset_does_not_exist(self): DATASET_ID + "_not_found"), 'Expected dataset not to exist') +@pytest.mark.single class TestToGBQIntegrationWithLocalUserAccountAuth(tm.TestCase): # Changes to BigQuery table schema may take up to 2 minutes as of May 2015 # As a workaround to this issue, each test should use a unique table name. @@ -1272,6 +1280,7 @@ def test_upload_data(self): self.assertEqual(result['num_rows'][0], test_size) +@pytest.mark.single class TestToGBQIntegrationWithServiceAccountKeyContents(tm.TestCase): # Changes to BigQuery table schema may take up to 2 minutes as of May 2015 # As a workaround to this issue, each test should use a unique table name. From 010393c4cb650b78e3e51af417e7037737e8d3b6 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 12 Feb 2017 21:43:50 -0500 Subject: [PATCH 054/353] ENH: expose Int64VectorData in hashtable.pxd --- pandas/hashtable.pxd | 14 ++++++++++++++ pandas/src/hashtable_class_helper.pxi.in | 12 +++++++++++- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/pandas/hashtable.pxd b/pandas/hashtable.pxd index cd06b938310a8..cabfa43a76f26 100644 --- a/pandas/hashtable.pxd +++ b/pandas/hashtable.pxd @@ -1,5 +1,6 @@ from khash cimport (kh_int64_t, kh_uint64_t, kh_float64_t, kh_pymap_t, kh_str_t, uint64_t, int64_t, float64_t) +from numpy cimport ndarray # prototypes for sharing @@ -35,3 +36,16 @@ cdef class StringHashTable(HashTable): cpdef get_item(self, object val) cpdef set_item(self, object key, Py_ssize_t val) + +cdef struct Int64VectorData: + int64_t *data + size_t n, m + +cdef class Int64Vector: + cdef Int64VectorData *data + cdef ndarray ao + + cdef resize(self) + cpdef to_array(self) + cdef inline void append(self, int64_t x) + cdef extend(self, int64_t[:] x) diff --git a/pandas/src/hashtable_class_helper.pxi.in b/pandas/src/hashtable_class_helper.pxi.in index 74c38dfdb393e..ef385ba7dca1c 100644 --- a/pandas/src/hashtable_class_helper.pxi.in +++ b/pandas/src/hashtable_class_helper.pxi.in @@ -24,10 +24,14 @@ dtypes = [('Float64', 'float64', 'float64_t'), {{for name, dtype, arg in dtypes}} +{{if dtype != 'int64'}} + ctypedef struct {{name}}VectorData: {{arg}} *data size_t n, m +{{endif}} + @cython.wraparound(False) @cython.boundscheck(False) @@ -65,9 +69,11 @@ dtypes = [('Float64', 'float64', 'float64_t', 'np.float64'), cdef class {{name}}Vector: + {{if dtype != 'int64'}} cdef: {{name}}VectorData *data ndarray ao + {{endif}} def __cinit__(self): self.data = <{{name}}VectorData *>PyMem_Malloc( @@ -92,7 +98,7 @@ cdef class {{name}}Vector: def __len__(self): return self.data.n - def to_array(self): + cpdef to_array(self): self.ao.resize(self.data.n) self.data.m = self.data.n return self.ao @@ -104,6 +110,10 @@ cdef class {{name}}Vector: append_data_{{dtype}}(self.data, x) + cdef extend(self, {{arg}}[:] x): + for i in range(len(x)): + self.append(x[i]) + {{endfor}} cdef class StringVector: From d9e75c7e724e5f7449c8c57624ce9395c9ffe11a Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 12 Feb 2017 21:54:11 -0500 Subject: [PATCH 055/353] TST: xfail most test_gbq tests for now --- pandas/tests/io/test_gbq.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/pandas/tests/io/test_gbq.py b/pandas/tests/io/test_gbq.py index 0317ebc49ad2c..316afaf306011 100644 --- a/pandas/tests/io/test_gbq.py +++ b/pandas/tests/io/test_gbq.py @@ -253,7 +253,7 @@ def test_generate_bq_schema_deprecated(): gbq.generate_bq_schema(df) -@pytest.mark.single +@pytest.mark.xfail(run=False, reason="flaky tests") class TestGBQConnectorIntegrationWithLocalUserAccountAuth(tm.TestCase): def setUp(self): @@ -299,7 +299,7 @@ def test_get_application_default_credentials_returns_credentials(self): self.assertTrue(isinstance(credentials, GoogleCredentials)) -@pytest.mark.single +@pytest.mark.xfail(run=False, reason="flaky tests") class TestGBQConnectorIntegrationWithServiceAccountKeyPath(tm.TestCase): def setUp(self): _setup_common() @@ -331,7 +331,7 @@ def test_should_be_able_to_get_results_from_query(self): self.assertTrue(pages is not None) -@pytest.mark.single +@pytest.mark.xfail(run=False, reason="flaky tests") class TestGBQConnectorIntegrationWithServiceAccountKeyContents(tm.TestCase): def setUp(self): _setup_common() @@ -363,7 +363,6 @@ def test_should_be_able_to_get_results_from_query(self): self.assertTrue(pages is not None) -@pytest.mark.single class GBQUnitTests(tm.TestCase): def setUp(self): @@ -450,7 +449,7 @@ def test_read_gbq_with_corrupted_private_key_json_should_fail(self): private_key=re.sub('[a-z]', '9', _get_private_key_contents())) -@pytest.mark.single +@pytest.mark.xfail(run=False, reason="flaky tests") class TestReadGBQIntegration(tm.TestCase): @classmethod @@ -504,7 +503,7 @@ def test_should_read_as_service_account_with_key_contents(self): tm.assert_frame_equal(df, DataFrame({'valid_string': ['PI']})) -@pytest.mark.single +@pytest.mark.xfail(run=False, reason="flaky tests") class TestReadGBQIntegrationWithServiceAccountKeyPath(tm.TestCase): @classmethod @@ -907,7 +906,7 @@ def test_configuration_without_query(self): configuration=config) -@pytest.mark.single +@pytest.mark.xfail(run=False, reason="flaky tests") class TestToGBQIntegrationWithServiceAccountKeyPath(tm.TestCase): # Changes to BigQuery table schema may take up to 2 minutes as of May 2015 # As a workaround to this issue, each test should use a unique table name. @@ -1022,8 +1021,6 @@ def test_upload_data_if_table_exists_append(self): def test_upload_data_if_table_exists_replace(self): - pytest.skip("buggy test") - destination_table = DESTINATION_TABLE + "4" test_size = 10 @@ -1222,7 +1219,7 @@ def test_dataset_does_not_exist(self): DATASET_ID + "_not_found"), 'Expected dataset not to exist') -@pytest.mark.single +@pytest.mark.xfail(run=False, reason="flaky tests") class TestToGBQIntegrationWithLocalUserAccountAuth(tm.TestCase): # Changes to BigQuery table schema may take up to 2 minutes as of May 2015 # As a workaround to this issue, each test should use a unique table name. @@ -1280,7 +1277,7 @@ def test_upload_data(self): self.assertEqual(result['num_rows'][0], test_size) -@pytest.mark.single +@pytest.mark.xfail(run=False, reason="flaky tests") class TestToGBQIntegrationWithServiceAccountKeyContents(tm.TestCase): # Changes to BigQuery table schema may take up to 2 minutes as of May 2015 # As a workaround to this issue, each test should use a unique table name. From 86ca84d8ec79eba5fe31bf0d4cbb24ec78fc333a Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Tue, 14 Feb 2017 08:29:18 -0500 Subject: [PATCH 056/353] TST: Fix gbq integration tests. gbq._Dataset.dataset() would not return full results This PR resolves an issue where `gbq._Dataset.datasets()` would not return all datasets under a Google BigQuery project. If `'nextPageToken'` is populated, then another `datasets().list()` request should be sent with `'pageToken'` set to collect more results. In the past few days, additional datasets were added under the Google BigQuery project id used by pandas as part of the following github project : https://github.com/pydata/pandas-gbq . The addition of datasets caused many gbq unit tests to fail because in function `clean_gbq_environment()`, we check to see if the dataset exists using the incomplete results from `gbq._Dataset.datasets()` before we attempt to delete it. Author: Anthonios Partheniou Closes #15381 from parthea/fix-broken-gbq-unit-tests and squashes the following commits: 61bc1e7 [Anthonios Partheniou] TST: Fix gbq tests. gbq.dataset()/gbq.tables would not return full results. --- pandas/io/gbq.py | 67 ++++++++++++++++++++++++------------- pandas/tests/io/test_gbq.py | 16 ++++----- 2 files changed, 52 insertions(+), 31 deletions(-) diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index 169a2b1df9b4c..0ffb6b4bf8c05 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -1056,21 +1056,32 @@ def datasets(self): List of datasets under the specific project """ - try: - list_dataset_response = self.service.datasets().list( - projectId=self.project_id).execute().get('datasets', None) + dataset_list = [] + next_page_token = None + first_query = True - if not list_dataset_response: - return [] + while first_query or next_page_token: + first_query = False - dataset_list = list() + try: + list_dataset_response = self.service.datasets().list( + projectId=self.project_id, + pageToken=next_page_token).execute() - for row_num, raw_row in enumerate(list_dataset_response): - dataset_list.append(raw_row['datasetReference']['datasetId']) + dataset_response = list_dataset_response.get('datasets') + next_page_token = list_dataset_response.get('nextPageToken') - return dataset_list - except self.http_error as ex: - self.process_http_error(ex) + if not dataset_response: + return dataset_list + + for row_num, raw_row in enumerate(dataset_response): + dataset_list.append( + raw_row['datasetReference']['datasetId']) + + except self.http_error as ex: + self.process_http_error(ex) + + return dataset_list def create(self, dataset_id): """ Create a dataset in Google BigQuery @@ -1140,19 +1151,29 @@ def tables(self, dataset_id): List of tables under the specific dataset """ - try: - list_table_response = self.service.tables().list( - projectId=self.project_id, - datasetId=dataset_id).execute().get('tables', None) + table_list = [] + next_page_token = None + first_query = True - if not list_table_response: - return [] + while first_query or next_page_token: + first_query = False - table_list = list() + try: + list_table_response = self.service.tables().list( + projectId=self.project_id, + datasetId=dataset_id, + pageToken=next_page_token).execute() - for row_num, raw_row in enumerate(list_table_response): - table_list.append(raw_row['tableReference']['tableId']) + table_response = list_table_response.get('tables') + next_page_token = list_table_response.get('nextPageToken') - return table_list - except self.http_error as ex: - self.process_http_error(ex) + if not table_response: + return table_list + + for row_num, raw_row in enumerate(table_response): + table_list.append(raw_row['tableReference']['tableId']) + + except self.http_error as ex: + self.process_http_error(ex) + + return table_list diff --git a/pandas/tests/io/test_gbq.py b/pandas/tests/io/test_gbq.py index 316afaf306011..dfbf3ca69b111 100644 --- a/pandas/tests/io/test_gbq.py +++ b/pandas/tests/io/test_gbq.py @@ -253,7 +253,7 @@ def test_generate_bq_schema_deprecated(): gbq.generate_bq_schema(df) -@pytest.mark.xfail(run=False, reason="flaky tests") +@pytest.mark.single class TestGBQConnectorIntegrationWithLocalUserAccountAuth(tm.TestCase): def setUp(self): @@ -299,7 +299,7 @@ def test_get_application_default_credentials_returns_credentials(self): self.assertTrue(isinstance(credentials, GoogleCredentials)) -@pytest.mark.xfail(run=False, reason="flaky tests") +@pytest.mark.single class TestGBQConnectorIntegrationWithServiceAccountKeyPath(tm.TestCase): def setUp(self): _setup_common() @@ -331,7 +331,7 @@ def test_should_be_able_to_get_results_from_query(self): self.assertTrue(pages is not None) -@pytest.mark.xfail(run=False, reason="flaky tests") +@pytest.mark.single class TestGBQConnectorIntegrationWithServiceAccountKeyContents(tm.TestCase): def setUp(self): _setup_common() @@ -449,7 +449,7 @@ def test_read_gbq_with_corrupted_private_key_json_should_fail(self): private_key=re.sub('[a-z]', '9', _get_private_key_contents())) -@pytest.mark.xfail(run=False, reason="flaky tests") +@pytest.mark.single class TestReadGBQIntegration(tm.TestCase): @classmethod @@ -503,7 +503,7 @@ def test_should_read_as_service_account_with_key_contents(self): tm.assert_frame_equal(df, DataFrame({'valid_string': ['PI']})) -@pytest.mark.xfail(run=False, reason="flaky tests") +@pytest.mark.single class TestReadGBQIntegrationWithServiceAccountKeyPath(tm.TestCase): @classmethod @@ -906,7 +906,7 @@ def test_configuration_without_query(self): configuration=config) -@pytest.mark.xfail(run=False, reason="flaky tests") +@pytest.mark.single class TestToGBQIntegrationWithServiceAccountKeyPath(tm.TestCase): # Changes to BigQuery table schema may take up to 2 minutes as of May 2015 # As a workaround to this issue, each test should use a unique table name. @@ -1219,7 +1219,7 @@ def test_dataset_does_not_exist(self): DATASET_ID + "_not_found"), 'Expected dataset not to exist') -@pytest.mark.xfail(run=False, reason="flaky tests") +@pytest.mark.single class TestToGBQIntegrationWithLocalUserAccountAuth(tm.TestCase): # Changes to BigQuery table schema may take up to 2 minutes as of May 2015 # As a workaround to this issue, each test should use a unique table name. @@ -1277,7 +1277,7 @@ def test_upload_data(self): self.assertEqual(result['num_rows'][0], test_size) -@pytest.mark.xfail(run=False, reason="flaky tests") +@pytest.mark.single class TestToGBQIntegrationWithServiceAccountKeyContents(tm.TestCase): # Changes to BigQuery table schema may take up to 2 minutes as of May 2015 # As a workaround to this issue, each test should use a unique table name. From ff0deecbc8f8e9ae3d274e5e7cd7c0056de1a6c2 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 14 Feb 2017 08:33:34 -0500 Subject: [PATCH 057/353] Bug: Raise ValueError with interpolate & fillna limit = 0 (#9217) closes #9217 Author: Matt Roeschke Closes #14994 from mroeschke/fix_9217 and squashes the following commits: c1790ee [Matt Roeschke] Unify ValueError message and correct cython limits 6f041e6 [Matt Roeschke] Bug: Raise ValueError with interpolate limit = 0 --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/generic.py | 6 ++--- pandas/core/internals.py | 4 +++ pandas/core/missing.py | 8 ++++-- pandas/src/algos_common_helper.pxi.in | 36 ++++++++++++++++++--------- pandas/tests/series/test_missing.py | 18 ++++++++++++++ 6 files changed, 56 insertions(+), 17 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index aa620bce0df59..d76e33caffbf1 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -421,6 +421,7 @@ Other API Changes - ``SparseArray.cumsum()`` and ``SparseSeries.cumsum()`` will now always return ``SparseArray`` and ``SparseSeries`` respectively (:issue:`12855`) - ``DataFrame.applymap()`` with an empty ``DataFrame`` will return a copy of the empty ``DataFrame`` instead of a ``Series`` (:issue:`8222`) - ``.loc`` has compat with ``.ix`` for accepting iterators, and NamedTuples (:issue:`15120`) +- ``interpolate()`` and ``fillna()`` will raise a ``ValueError`` if the ``limit`` keyword argument is not greater than 0. (:issue:`9217`) - ``pd.read_csv()`` will now issue a ``ParserWarning`` whenever there are conflicting values provided by the ``dialect`` parameter and the user (:issue:`14898`) - ``pd.read_csv()`` will now raise a ``ValueError`` for the C engine if the quote character is larger than than one byte (:issue:`11592`) - ``inplace`` arguments now require a boolean value, else a ``ValueError`` is thrown (:issue:`14189`) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 228dd2acd2124..20e6e027dbf09 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3262,7 +3262,7 @@ def convert_objects(self, convert_dates=True, convert_numeric=False, a gap with more than this number of consecutive NaNs, it will only be partially filled. If method is not specified, this is the maximum number of entries along the entire axis where NaNs will be - filled. + filled. Must be greater than 0 if not None. downcast : dict, default is None a dict of item->dtype of what to downcast if possible, or the string 'infer' which will try to downcast to an appropriate @@ -3281,6 +3281,7 @@ def convert_objects(self, convert_dates=True, convert_numeric=False, def fillna(self, value=None, method=None, axis=None, inplace=False, limit=None, downcast=None): inplace = validate_bool_kwarg(inplace, 'inplace') + if isinstance(value, (list, tuple)): raise TypeError('"value" parameter must be a scalar or dict, but ' 'you passed a "{0}"'.format(type(value).__name__)) @@ -3292,7 +3293,6 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, axis = 0 axis = self._get_axis_number(axis) method = missing.clean_fill_method(method) - from pandas import DataFrame if value is None: if method is None: @@ -3687,7 +3687,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, * 0: fill column-by-column * 1: fill row-by-row limit : int, default None. - Maximum number of consecutive NaNs to fill. + Maximum number of consecutive NaNs to fill. Must be greater than 0. limit_direction : {'forward', 'backward', 'both'}, default 'forward' If limit is specified, consecutive NaNs will be filled in this direction. diff --git a/pandas/core/internals.py b/pandas/core/internals.py index f0b1516d786c6..6cd5eceed5f2a 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -372,6 +372,10 @@ def fillna(self, value, limit=None, inplace=False, downcast=None, original_value = value mask = isnull(self.values) if limit is not None: + if not is_integer(limit): + raise ValueError('Limit must be an integer') + if limit < 1: + raise ValueError('Limit must be greater than 0') if self.ndim > 2: raise NotImplementedError("number of dimensions for 'fillna' " "is currently limited to 2") diff --git a/pandas/core/missing.py b/pandas/core/missing.py index e83a0518d97f6..ffd0423572f5e 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -12,7 +12,7 @@ is_float_dtype, is_datetime64_dtype, is_datetime64tz_dtype, is_integer_dtype, _ensure_float64, is_scalar, - needs_i8_conversion) + needs_i8_conversion, is_integer) from pandas.types.missing import isnull @@ -169,7 +169,11 @@ def _interp_limit(invalid, fw_limit, bw_limit): # the beginning (see issues #9218 and #10420) violate_limit = sorted(start_nans) - if limit: + if limit is not None: + if not is_integer(limit): + raise ValueError('Limit must be an integer') + if limit < 1: + raise ValueError('Limit must be greater than 0') if limit_direction == 'forward': violate_limit = sorted(start_nans | set(_interp_limit(invalid, limit, 0))) diff --git a/pandas/src/algos_common_helper.pxi.in b/pandas/src/algos_common_helper.pxi.in index 5e87528943005..42089f9520ab6 100644 --- a/pandas/src/algos_common_helper.pxi.in +++ b/pandas/src/algos_common_helper.pxi.in @@ -83,8 +83,10 @@ def pad_{{name}}(ndarray[{{c_type}}] old, ndarray[{{c_type}}] new, if limit is None: lim = nright else: - if limit < 0: - raise ValueError('Limit must be non-negative') + if not util.is_integer_object(limit): + raise ValueError('Limit must be an integer') + if limit < 1: + raise ValueError('Limit must be greater than 0') lim = limit if nleft == 0 or nright == 0 or new[nright - 1] < old[0]: @@ -146,8 +148,10 @@ def pad_inplace_{{name}}(ndarray[{{c_type}}] values, if limit is None: lim = N else: - if limit < 0: - raise ValueError('Limit must be non-negative') + if not util.is_integer_object(limit): + raise ValueError('Limit must be an integer') + if limit < 1: + raise ValueError('Limit must be greater than 0') lim = limit val = values[0] @@ -180,8 +184,10 @@ def pad_2d_inplace_{{name}}(ndarray[{{c_type}}, ndim=2] values, if limit is None: lim = N else: - if limit < 0: - raise ValueError('Limit must be non-negative') + if not util.is_integer_object(limit): + raise ValueError('Limit must be an integer') + if limit < 1: + raise ValueError('Limit must be greater than 0') lim = limit for j in range(K): @@ -240,8 +246,10 @@ def backfill_{{name}}(ndarray[{{c_type}}] old, ndarray[{{c_type}}] new, if limit is None: lim = nright else: - if limit < 0: - raise ValueError('Limit must be non-negative') + if not util.is_integer_object(limit): + raise ValueError('Limit must be an integer') + if limit < 1: + raise ValueError('Limit must be greater than 0') lim = limit if nleft == 0 or nright == 0 or new[0] > old[nleft - 1]: @@ -304,8 +312,10 @@ def backfill_inplace_{{name}}(ndarray[{{c_type}}] values, if limit is None: lim = N else: - if limit < 0: - raise ValueError('Limit must be non-negative') + if not util.is_integer_object(limit): + raise ValueError('Limit must be an integer') + if limit < 1: + raise ValueError('Limit must be greater than 0') lim = limit val = values[N - 1] @@ -338,8 +348,10 @@ def backfill_2d_inplace_{{name}}(ndarray[{{c_type}}, ndim=2] values, if limit is None: lim = N else: - if limit < 0: - raise ValueError('Limit must be non-negative') + if not util.is_integer_object(limit): + raise ValueError('Limit must be an integer') + if limit < 1: + raise ValueError('Limit must be greater than 0') lim = limit for j in range(K): diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 405d6c98a5d37..23eb6a40f5f1d 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -295,6 +295,13 @@ def test_fillna_raise(self): self.assertRaises(TypeError, s.fillna, [1, 2]) self.assertRaises(TypeError, s.fillna, (1, 2)) + # related GH 9217, make sure limit is an int and greater than 0 + s = Series([1, 2, 3, None]) + for limit in [-1, 0, 1., 2.]: + for method in ['backfill', 'bfill', 'pad', 'ffill', None]: + with tm.assertRaises(ValueError): + s.fillna(1, limit=limit, method=method) + def test_fillna_nat(self): series = Series([0, 1, 2, tslib.iNaT], dtype='M8[ns]') @@ -865,6 +872,17 @@ def test_interp_limit(self): result = s.interpolate(method='linear', limit=2) assert_series_equal(result, expected) + # GH 9217, make sure limit is an int and greater than 0 + methods = ['linear', 'time', 'index', 'values', 'nearest', 'zero', + 'slinear', 'quadratic', 'cubic', 'barycentric', 'krogh', + 'polynomial', 'spline', 'piecewise_polynomial', None, + 'from_derivatives', 'pchip', 'akima'] + s = pd.Series([1, 2, np.nan, np.nan, 5]) + for limit in [-1, 0, 1., 2.]: + for method in methods: + with tm.assertRaises(ValueError): + s.interpolate(limit=limit, method=method) + def test_interp_limit_forward(self): s = Series([1, 3, np.nan, np.nan, np.nan, 11]) From 5959fe1fffe4b5749de63d6a26ac64349bc791ac Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 14 Feb 2017 17:35:05 -0500 Subject: [PATCH 058/353] CLN: create core/sorting.py just a small reorg to put sorting / grouping utilities into a separate area Author: Jeff Reback Closes #15402 from jreback/sorting and squashes the following commits: fdcf9a1 [Jeff Reback] change a couple of sorting.py functions to be non-private (public to pandas internals) 90ff22d [Jeff Reback] split up some value_counts groupby tests a bit 18ea902 [Jeff Reback] CLN: create core/sorting.py 92dcb07 [Jeff Reback] CLN: remove numpy_groupby as not used --- pandas/core/frame.py | 26 +- pandas/core/groupby.py | 376 +--------------------- pandas/core/reshape.py | 13 +- pandas/core/series.py | 10 +- pandas/core/sorting.py | 357 ++++++++++++++++++++ pandas/indexes/multi.py | 12 +- pandas/tests/groupby/test_filters.py | 21 -- pandas/tests/groupby/test_groupby.py | 169 ---------- pandas/tests/groupby/test_misc.py | 101 ------ pandas/tests/groupby/test_value_counts.py | 60 ++++ pandas/tests/test_sorting.py | 339 +++++++++++++++++++ pandas/tests/tools/test_merge.py | 135 +------- pandas/tools/merge.py | 4 +- 13 files changed, 802 insertions(+), 821 deletions(-) create mode 100644 pandas/core/sorting.py delete mode 100644 pandas/tests/groupby/test_misc.py create mode 100644 pandas/tests/groupby/test_value_counts.py create mode 100644 pandas/tests/test_sorting.py diff --git a/pandas/core/frame.py b/pandas/core/frame.py index aa03bfb9a54b9..16f8d4658dc20 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3141,7 +3141,7 @@ def duplicated(self, subset=None, keep='first'): ------- duplicated : Series """ - from pandas.core.groupby import get_group_index + from pandas.core.sorting import get_group_index from pandas.hashtable import duplicated_int64, _SIZE_HINT_LIMIT def f(vals): @@ -3179,7 +3179,7 @@ def sort_values(self, by, axis=0, ascending=True, inplace=False, raise ValueError('Length of ascending (%d) != length of by (%d)' % (len(ascending), len(by))) if len(by) > 1: - from pandas.core.groupby import _lexsort_indexer + from pandas.core.sorting import lexsort_indexer def trans(v): if needs_i8_conversion(v): @@ -3193,11 +3193,11 @@ def trans(v): raise ValueError('Cannot sort by duplicate column %s' % str(x)) keys.append(trans(k)) - indexer = _lexsort_indexer(keys, orders=ascending, - na_position=na_position) + indexer = lexsort_indexer(keys, orders=ascending, + na_position=na_position) indexer = _ensure_platform_int(indexer) else: - from pandas.core.groupby import _nargsort + from pandas.core.sorting import nargsort by = by[0] k = self.xs(by, axis=other_axis).values @@ -3214,8 +3214,8 @@ def trans(v): if isinstance(ascending, (tuple, list)): ascending = ascending[0] - indexer = _nargsort(k, kind=kind, ascending=ascending, - na_position=na_position) + indexer = nargsort(k, kind=kind, ascending=ascending, + na_position=na_position) new_data = self._data.take(indexer, axis=self._get_block_manager_axis(axis), @@ -3300,17 +3300,17 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, sort_remaining=sort_remaining) elif isinstance(labels, MultiIndex): - from pandas.core.groupby import _lexsort_indexer + from pandas.core.sorting import lexsort_indexer # make sure that the axis is lexsorted to start # if not we need to reconstruct to get the correct indexer if not labels.is_lexsorted(): labels = MultiIndex.from_tuples(labels.values) - indexer = _lexsort_indexer(labels.labels, orders=ascending, - na_position=na_position) + indexer = lexsort_indexer(labels.labels, orders=ascending, + na_position=na_position) else: - from pandas.core.groupby import _nargsort + from pandas.core.sorting import nargsort # GH11080 - Check monotonic-ness before sort an index # if monotonic (already sorted), return None or copy() according @@ -3322,8 +3322,8 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, else: return self.copy() - indexer = _nargsort(labels, kind=kind, ascending=ascending, - na_position=na_position) + indexer = nargsort(labels, kind=kind, ascending=ascending, + na_position=na_position) new_data = self._data.take(indexer, axis=self._get_block_manager_axis(axis), diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index a228861270aea..23c835318b0e6 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -7,7 +7,7 @@ import copy from pandas.compat import ( - zip, range, long, lzip, + zip, range, lzip, callable, map ) from pandas import compat @@ -47,6 +47,9 @@ from pandas.core.internals import BlockManager, make_block from pandas.core.series import Series from pandas.core.panel import Panel +from pandas.core.sorting import (get_group_index_sorter, get_group_index, + compress_group_index, get_flattened_iterator, + decons_obs_group_ids, get_indexer_dict) from pandas.util.decorators import (cache_readonly, Substitution, Appender, make_signature, deprecate_kwarg) from pandas.formats.printing import pprint_thing @@ -59,7 +62,6 @@ from pandas.lib import Timestamp import pandas.tslib as tslib import pandas.algos as _algos -import pandas.hashtable as _hash _doc_template = """ @@ -729,7 +731,7 @@ def _cumcount_array(self, ascending=True): (though the default is sort=True) for groupby in general """ ids, _, ngroups = self.grouper.group_info - sorter = _get_group_index_sorter(ids, ngroups) + sorter = get_group_index_sorter(ids, ngroups) ids, count = ids[sorter], len(ids) if count == 0: @@ -1616,9 +1618,12 @@ def _get_group_keys(self): return self.levels[0] else: comp_ids, _, ngroups = self.group_info + # provide "flattened" iterator for multi-group setting - mapper = _KeyMapper(comp_ids, ngroups, self.labels, self.levels) - return [mapper.get_key(i) for i in range(ngroups)] + return get_flattened_iterator(comp_ids, + ngroups, + self.levels, + self.labels) def apply(self, f, data, axis=0): mutated = self.mutated @@ -1662,7 +1667,7 @@ def indices(self): label_list = [ping.labels for ping in self.groupings] keys = [_values_from_object(ping.group_index) for ping in self.groupings] - return _get_indices_dict(label_list, keys) + return get_indexer_dict(label_list, keys) @property def labels(self): @@ -1726,7 +1731,7 @@ def _get_compressed_labels(self): if len(all_labels) > 1: group_index = get_group_index(all_labels, self.shape, sort=True, xnull=True) - return _compress_group_index(group_index, sort=self.sort) + return compress_group_index(group_index, sort=self.sort) ping = self.groupings[0] return ping.labels, np.arange(len(ping.group_index)) @@ -2027,7 +2032,7 @@ def _aggregate_series_fast(self, obj, func): # avoids object / Series creation overhead dummy = obj._get_values(slice(None, 0)).to_dense() - indexer = _get_group_index_sorter(group_index, ngroups) + indexer = get_group_index_sorter(group_index, ngroups) obj = obj.take(indexer, convert=False) group_index = algos.take_nd(group_index, indexer, allow_fill=False) grouper = lib.SeriesGrouper(obj, func, group_index, ngroups, @@ -2424,7 +2429,6 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True, a BaseGrouper. """ - group_axis = obj._get_axis(axis) # validate that the passed level is compatible with the passed @@ -4206,7 +4210,7 @@ def slabels(self): @cache_readonly def sort_idx(self): # Counting sort indexer - return _get_group_index_sorter(self.labels, self.ngroups) + return get_group_index_sorter(self.labels, self.ngroups) def __iter__(self): sdata = self._get_sorted_data() @@ -4302,355 +4306,3 @@ def get_splitter(data, *args, **kwargs): klass = NDFrameSplitter return klass(data, *args, **kwargs) - - -# ---------------------------------------------------------------------- -# Misc utilities - - -def get_group_index(labels, shape, sort, xnull): - """ - For the particular label_list, gets the offsets into the hypothetical list - representing the totally ordered cartesian product of all possible label - combinations, *as long as* this space fits within int64 bounds; - otherwise, though group indices identify unique combinations of - labels, they cannot be deconstructed. - - If `sort`, rank of returned ids preserve lexical ranks of labels. - i.e. returned id's can be used to do lexical sort on labels; - - If `xnull` nulls (-1 labels) are passed through. - - Parameters - ---------- - labels: sequence of arrays - Integers identifying levels at each location - shape: sequence of ints same length as labels - Number of unique levels at each location - sort: boolean - If the ranks of returned ids should match lexical ranks of labels - xnull: boolean - If true nulls are excluded. i.e. -1 values in the labels are - passed through - Returns - ------- - An array of type int64 where two elements are equal if their corresponding - labels are equal at all location. - """ - def _int64_cut_off(shape): - acc = long(1) - for i, mul in enumerate(shape): - acc *= long(mul) - if not acc < _INT64_MAX: - return i - return len(shape) - - def loop(labels, shape): - # how many levels can be done without overflow: - nlev = _int64_cut_off(shape) - - # compute flat ids for the first `nlev` levels - stride = np.prod(shape[1:nlev], dtype='i8') - out = stride * labels[0].astype('i8', subok=False, copy=False) - - for i in range(1, nlev): - if shape[i] == 0: - stride = 0 - else: - stride //= shape[i] - out += labels[i] * stride - - if xnull: # exclude nulls - mask = labels[0] == -1 - for lab in labels[1:nlev]: - mask |= lab == -1 - out[mask] = -1 - - if nlev == len(shape): # all levels done! - return out - - # compress what has been done so far in order to avoid overflow - # to retain lexical ranks, obs_ids should be sorted - comp_ids, obs_ids = _compress_group_index(out, sort=sort) - - labels = [comp_ids] + labels[nlev:] - shape = [len(obs_ids)] + shape[nlev:] - - return loop(labels, shape) - - def maybe_lift(lab, size): # pormote nan values - return (lab + 1, size + 1) if (lab == -1).any() else (lab, size) - - labels = map(_ensure_int64, labels) - if not xnull: - labels, shape = map(list, zip(*map(maybe_lift, labels, shape))) - - return loop(list(labels), list(shape)) - - -_INT64_MAX = np.iinfo(np.int64).max - - -def _int64_overflow_possible(shape): - the_prod = long(1) - for x in shape: - the_prod *= long(x) - - return the_prod >= _INT64_MAX - - -def decons_group_index(comp_labels, shape): - # reconstruct labels - if _int64_overflow_possible(shape): - # at some point group indices are factorized, - # and may not be deconstructed here! wrong path! - raise ValueError('cannot deconstruct factorized group indices!') - - label_list = [] - factor = 1 - y = 0 - x = comp_labels - for i in reversed(range(len(shape))): - labels = (x - y) % (factor * shape[i]) // factor - np.putmask(labels, comp_labels < 0, -1) - label_list.append(labels) - y = labels * factor - factor *= shape[i] - return label_list[::-1] - - -def decons_obs_group_ids(comp_ids, obs_ids, shape, labels, xnull): - """ - reconstruct labels from observed group ids - - Parameters - ---------- - xnull: boolean, - if nulls are excluded; i.e. -1 labels are passed through - """ - from pandas.hashtable import unique_label_indices - - if not xnull: - lift = np.fromiter(((a == -1).any() for a in labels), dtype='i8') - shape = np.asarray(shape, dtype='i8') + lift - - if not _int64_overflow_possible(shape): - # obs ids are deconstructable! take the fast route! - out = decons_group_index(obs_ids, shape) - return out if xnull or not lift.any() \ - else [x - y for x, y in zip(out, lift)] - - i = unique_label_indices(comp_ids) - i8copy = lambda a: a.astype('i8', subok=False, copy=True) - return [i8copy(lab[i]) for lab in labels] - - -def _indexer_from_factorized(labels, shape, compress=True): - ids = get_group_index(labels, shape, sort=True, xnull=False) - - if not compress: - ngroups = (ids.size and ids.max()) + 1 - else: - ids, obs = _compress_group_index(ids, sort=True) - ngroups = len(obs) - - return _get_group_index_sorter(ids, ngroups) - - -def _lexsort_indexer(keys, orders=None, na_position='last'): - labels = [] - shape = [] - if isinstance(orders, bool): - orders = [orders] * len(keys) - elif orders is None: - orders = [True] * len(keys) - - for key, order in zip(keys, orders): - - # we are already a Categorical - if is_categorical_dtype(key): - c = key - - # create the Categorical - else: - c = Categorical(key, ordered=True) - - if na_position not in ['last', 'first']: - raise ValueError('invalid na_position: {!r}'.format(na_position)) - - n = len(c.categories) - codes = c.codes.copy() - - mask = (c.codes == -1) - if order: # ascending - if na_position == 'last': - codes = np.where(mask, n, codes) - elif na_position == 'first': - codes += 1 - else: # not order means descending - if na_position == 'last': - codes = np.where(mask, n, n - codes - 1) - elif na_position == 'first': - codes = np.where(mask, 0, n - codes) - if mask.any(): - n += 1 - - shape.append(n) - labels.append(codes) - - return _indexer_from_factorized(labels, shape) - - -def _nargsort(items, kind='quicksort', ascending=True, na_position='last'): - """ - This is intended to be a drop-in replacement for np.argsort which - handles NaNs. It adds ascending and na_position parameters. - GH #6399, #5231 - """ - - # specially handle Categorical - if is_categorical_dtype(items): - return items.argsort(ascending=ascending) - - items = np.asanyarray(items) - idx = np.arange(len(items)) - mask = isnull(items) - non_nans = items[~mask] - non_nan_idx = idx[~mask] - nan_idx = np.nonzero(mask)[0] - if not ascending: - non_nans = non_nans[::-1] - non_nan_idx = non_nan_idx[::-1] - indexer = non_nan_idx[non_nans.argsort(kind=kind)] - if not ascending: - indexer = indexer[::-1] - # Finally, place the NaNs at the end or the beginning according to - # na_position - if na_position == 'last': - indexer = np.concatenate([indexer, nan_idx]) - elif na_position == 'first': - indexer = np.concatenate([nan_idx, indexer]) - else: - raise ValueError('invalid na_position: {!r}'.format(na_position)) - return indexer - - -class _KeyMapper(object): - - """ - Ease my suffering. Map compressed group id -> key tuple - """ - - def __init__(self, comp_ids, ngroups, labels, levels): - self.levels = levels - self.labels = labels - self.comp_ids = comp_ids.astype(np.int64) - - self.k = len(labels) - self.tables = [_hash.Int64HashTable(ngroups) for _ in range(self.k)] - - self._populate_tables() - - def _populate_tables(self): - for labs, table in zip(self.labels, self.tables): - table.map(self.comp_ids, labs.astype(np.int64)) - - def get_key(self, comp_id): - return tuple(level[table.get_item(comp_id)] - for table, level in zip(self.tables, self.levels)) - - -def _get_indices_dict(label_list, keys): - shape = list(map(len, keys)) - - group_index = get_group_index(label_list, shape, sort=True, xnull=True) - ngroups = ((group_index.size and group_index.max()) + 1) \ - if _int64_overflow_possible(shape) \ - else np.prod(shape, dtype='i8') - - sorter = _get_group_index_sorter(group_index, ngroups) - - sorted_labels = [lab.take(sorter) for lab in label_list] - group_index = group_index.take(sorter) - - return lib.indices_fast(sorter, group_index, keys, sorted_labels) - - -# ---------------------------------------------------------------------- -# sorting levels...cleverly? - -def _get_group_index_sorter(group_index, ngroups): - """ - _algos.groupsort_indexer implements `counting sort` and it is at least - O(ngroups), where - ngroups = prod(shape) - shape = map(len, keys) - that is, linear in the number of combinations (cartesian product) of unique - values of groupby keys. This can be huge when doing multi-key groupby. - np.argsort(kind='mergesort') is O(count x log(count)) where count is the - length of the data-frame; - Both algorithms are `stable` sort and that is necessary for correctness of - groupby operations. e.g. consider: - df.groupby(key)[col].transform('first') - """ - count = len(group_index) - alpha = 0.0 # taking complexities literally; there may be - beta = 1.0 # some room for fine-tuning these parameters - do_groupsort = (count > 0 and ((alpha + beta * ngroups) < - (count * np.log(count)))) - if do_groupsort: - sorter, _ = _algos.groupsort_indexer(_ensure_int64(group_index), - ngroups) - return _ensure_platform_int(sorter) - else: - return group_index.argsort(kind='mergesort') - - -def _compress_group_index(group_index, sort=True): - """ - Group_index is offsets into cartesian product of all possible labels. This - space can be huge, so this function compresses it, by computing offsets - (comp_ids) into the list of unique labels (obs_group_ids). - """ - - size_hint = min(len(group_index), _hash._SIZE_HINT_LIMIT) - table = _hash.Int64HashTable(size_hint) - - group_index = _ensure_int64(group_index) - - # note, group labels come out ascending (ie, 1,2,3 etc) - comp_ids, obs_group_ids = table.get_labels_groupby(group_index) - - if sort and len(obs_group_ids) > 0: - obs_group_ids, comp_ids = _reorder_by_uniques(obs_group_ids, comp_ids) - - return comp_ids, obs_group_ids - - -def _reorder_by_uniques(uniques, labels): - # sorter is index where elements ought to go - sorter = uniques.argsort() - - # reverse_indexer is where elements came from - reverse_indexer = np.empty(len(sorter), dtype=np.int64) - reverse_indexer.put(sorter, np.arange(len(sorter))) - - mask = labels < 0 - - # move labels to right locations (ie, unsort ascending labels) - labels = algos.take_nd(reverse_indexer, labels, allow_fill=False) - np.putmask(labels, mask, -1) - - # sort observed ids - uniques = algos.take_nd(uniques, sorter, allow_fill=False) - - return uniques, labels - - -def numpy_groupby(data, labels, axis=0): - s = np.argsort(labels) - keys, inv = np.unique(labels, return_inverse=True) - i = inv.take(s) - groups_at = np.where(i != np.concatenate(([-1], i[:-1])))[0] - ordered_data = data.take(s, axis=axis) - group_sums = np.add.reduceat(ordered_data, groups_at, axis=axis) - - return group_sums diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index cebaf4e3fd89b..5fc0d590a6885 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -20,7 +20,8 @@ from pandas._sparse import IntIndex from pandas.core.categorical import Categorical, _factorize_from_iterable -from pandas.core.groupby import get_group_index, _compress_group_index +from pandas.core.sorting import (get_group_index, compress_group_index, + decons_obs_group_ids) import pandas.core.algorithms as algos import pandas.algos as _algos @@ -156,7 +157,7 @@ def get_result(self): # filter out missing levels if values.shape[1] > 0: - col_inds, obs_ids = _compress_group_index(self.sorted_labels[-1]) + col_inds, obs_ids = compress_group_index(self.sorted_labels[-1]) # rare case, level values not observed if len(obs_ids) < self.full_shape[1]: inds = (value_mask.sum(0) > 0).nonzero()[0] @@ -245,8 +246,6 @@ def get_new_index(self): def _unstack_multiple(data, clocs): - from pandas.core.groupby import decons_obs_group_ids - if len(clocs) == 0: return data @@ -268,7 +267,7 @@ def _unstack_multiple(data, clocs): shape = [len(x) for x in clevels] group_index = get_group_index(clabels, shape, sort=False, xnull=False) - comp_ids, obs_ids = _compress_group_index(group_index, sort=False) + comp_ids, obs_ids = compress_group_index(group_index, sort=False) recons_labels = decons_obs_group_ids(comp_ids, obs_ids, shape, clabels, xnull=False) @@ -459,10 +458,8 @@ def _unstack_frame(obj, level, fill_value=None): def get_compressed_ids(labels, sizes): - from pandas.core.groupby import get_group_index - ids = get_group_index(labels, sizes, sort=True, xnull=False) - return _compress_group_index(ids, sort=True) + return compress_group_index(ids, sort=True) def stack(frame, level=-1, dropna=True): diff --git a/pandas/core/series.py b/pandas/core/series.py index e1eac8f66017e..da47ab5dfb003 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1786,12 +1786,12 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, new_index, indexer = index.sortlevel(level, ascending=ascending, sort_remaining=sort_remaining) elif isinstance(index, MultiIndex): - from pandas.core.groupby import _lexsort_indexer - indexer = _lexsort_indexer(index.labels, orders=ascending) + from pandas.core.sorting import lexsort_indexer + indexer = lexsort_indexer(index.labels, orders=ascending) else: - from pandas.core.groupby import _nargsort - indexer = _nargsort(index, kind=kind, ascending=ascending, - na_position=na_position) + from pandas.core.sorting import nargsort + indexer = nargsort(index, kind=kind, ascending=ascending, + na_position=na_position) indexer = _ensure_platform_int(indexer) new_index = index.take(indexer) diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py new file mode 100644 index 0000000000000..71314da7745c0 --- /dev/null +++ b/pandas/core/sorting.py @@ -0,0 +1,357 @@ +""" miscellaneous sorting / groupby utilities """ + +import numpy as np +from pandas.compat import long +from pandas.core.categorical import Categorical +from pandas.types.common import (_ensure_platform_int, + _ensure_int64, + is_categorical_dtype) +from pandas.types.missing import isnull +import pandas.core.algorithms as algos +import pandas.algos as _algos +import pandas.hashtable as _hash +from pandas import lib + + +_INT64_MAX = np.iinfo(np.int64).max + + +def get_group_index(labels, shape, sort, xnull): + """ + For the particular label_list, gets the offsets into the hypothetical list + representing the totally ordered cartesian product of all possible label + combinations, *as long as* this space fits within int64 bounds; + otherwise, though group indices identify unique combinations of + labels, they cannot be deconstructed. + - If `sort`, rank of returned ids preserve lexical ranks of labels. + i.e. returned id's can be used to do lexical sort on labels; + - If `xnull` nulls (-1 labels) are passed through. + + Parameters + ---------- + labels: sequence of arrays + Integers identifying levels at each location + shape: sequence of ints same length as labels + Number of unique levels at each location + sort: boolean + If the ranks of returned ids should match lexical ranks of labels + xnull: boolean + If true nulls are excluded. i.e. -1 values in the labels are + passed through + Returns + ------- + An array of type int64 where two elements are equal if their corresponding + labels are equal at all location. + """ + def _int64_cut_off(shape): + acc = long(1) + for i, mul in enumerate(shape): + acc *= long(mul) + if not acc < _INT64_MAX: + return i + return len(shape) + + def loop(labels, shape): + # how many levels can be done without overflow: + nlev = _int64_cut_off(shape) + + # compute flat ids for the first `nlev` levels + stride = np.prod(shape[1:nlev], dtype='i8') + out = stride * labels[0].astype('i8', subok=False, copy=False) + + for i in range(1, nlev): + if shape[i] == 0: + stride = 0 + else: + stride //= shape[i] + out += labels[i] * stride + + if xnull: # exclude nulls + mask = labels[0] == -1 + for lab in labels[1:nlev]: + mask |= lab == -1 + out[mask] = -1 + + if nlev == len(shape): # all levels done! + return out + + # compress what has been done so far in order to avoid overflow + # to retain lexical ranks, obs_ids should be sorted + comp_ids, obs_ids = compress_group_index(out, sort=sort) + + labels = [comp_ids] + labels[nlev:] + shape = [len(obs_ids)] + shape[nlev:] + + return loop(labels, shape) + + def maybe_lift(lab, size): # pormote nan values + return (lab + 1, size + 1) if (lab == -1).any() else (lab, size) + + labels = map(_ensure_int64, labels) + if not xnull: + labels, shape = map(list, zip(*map(maybe_lift, labels, shape))) + + return loop(list(labels), list(shape)) + + +def is_int64_overflow_possible(shape): + the_prod = long(1) + for x in shape: + the_prod *= long(x) + + return the_prod >= _INT64_MAX + + +def decons_group_index(comp_labels, shape): + # reconstruct labels + if is_int64_overflow_possible(shape): + # at some point group indices are factorized, + # and may not be deconstructed here! wrong path! + raise ValueError('cannot deconstruct factorized group indices!') + + label_list = [] + factor = 1 + y = 0 + x = comp_labels + for i in reversed(range(len(shape))): + labels = (x - y) % (factor * shape[i]) // factor + np.putmask(labels, comp_labels < 0, -1) + label_list.append(labels) + y = labels * factor + factor *= shape[i] + return label_list[::-1] + + +def decons_obs_group_ids(comp_ids, obs_ids, shape, labels, xnull): + """ + reconstruct labels from observed group ids + + Parameters + ---------- + xnull: boolean, + if nulls are excluded; i.e. -1 labels are passed through + """ + from pandas.hashtable import unique_label_indices + + if not xnull: + lift = np.fromiter(((a == -1).any() for a in labels), dtype='i8') + shape = np.asarray(shape, dtype='i8') + lift + + if not is_int64_overflow_possible(shape): + # obs ids are deconstructable! take the fast route! + out = decons_group_index(obs_ids, shape) + return out if xnull or not lift.any() \ + else [x - y for x, y in zip(out, lift)] + + i = unique_label_indices(comp_ids) + i8copy = lambda a: a.astype('i8', subok=False, copy=True) + return [i8copy(lab[i]) for lab in labels] + + +def indexer_from_factorized(labels, shape, compress=True): + ids = get_group_index(labels, shape, sort=True, xnull=False) + + if not compress: + ngroups = (ids.size and ids.max()) + 1 + else: + ids, obs = compress_group_index(ids, sort=True) + ngroups = len(obs) + + return get_group_index_sorter(ids, ngroups) + + +def lexsort_indexer(keys, orders=None, na_position='last'): + labels = [] + shape = [] + if isinstance(orders, bool): + orders = [orders] * len(keys) + elif orders is None: + orders = [True] * len(keys) + + for key, order in zip(keys, orders): + + # we are already a Categorical + if is_categorical_dtype(key): + c = key + + # create the Categorical + else: + c = Categorical(key, ordered=True) + + if na_position not in ['last', 'first']: + raise ValueError('invalid na_position: {!r}'.format(na_position)) + + n = len(c.categories) + codes = c.codes.copy() + + mask = (c.codes == -1) + if order: # ascending + if na_position == 'last': + codes = np.where(mask, n, codes) + elif na_position == 'first': + codes += 1 + else: # not order means descending + if na_position == 'last': + codes = np.where(mask, n, n - codes - 1) + elif na_position == 'first': + codes = np.where(mask, 0, n - codes) + if mask.any(): + n += 1 + + shape.append(n) + labels.append(codes) + + return indexer_from_factorized(labels, shape) + + +def nargsort(items, kind='quicksort', ascending=True, na_position='last'): + """ + This is intended to be a drop-in replacement for np.argsort which + handles NaNs. It adds ascending and na_position parameters. + GH #6399, #5231 + """ + + # specially handle Categorical + if is_categorical_dtype(items): + return items.argsort(ascending=ascending) + + items = np.asanyarray(items) + idx = np.arange(len(items)) + mask = isnull(items) + non_nans = items[~mask] + non_nan_idx = idx[~mask] + nan_idx = np.nonzero(mask)[0] + if not ascending: + non_nans = non_nans[::-1] + non_nan_idx = non_nan_idx[::-1] + indexer = non_nan_idx[non_nans.argsort(kind=kind)] + if not ascending: + indexer = indexer[::-1] + # Finally, place the NaNs at the end or the beginning according to + # na_position + if na_position == 'last': + indexer = np.concatenate([indexer, nan_idx]) + elif na_position == 'first': + indexer = np.concatenate([nan_idx, indexer]) + else: + raise ValueError('invalid na_position: {!r}'.format(na_position)) + return indexer + + +class _KeyMapper(object): + + """ + Ease my suffering. Map compressed group id -> key tuple + """ + + def __init__(self, comp_ids, ngroups, levels, labels): + self.levels = levels + self.labels = labels + self.comp_ids = comp_ids.astype(np.int64) + + self.k = len(labels) + self.tables = [_hash.Int64HashTable(ngroups) for _ in range(self.k)] + + self._populate_tables() + + def _populate_tables(self): + for labs, table in zip(self.labels, self.tables): + table.map(self.comp_ids, labs.astype(np.int64)) + + def get_key(self, comp_id): + return tuple(level[table.get_item(comp_id)] + for table, level in zip(self.tables, self.levels)) + + +def get_flattened_iterator(comp_ids, ngroups, levels, labels): + # provide "flattened" iterator for multi-group setting + mapper = _KeyMapper(comp_ids, ngroups, levels, labels) + return [mapper.get_key(i) for i in range(ngroups)] + + +def get_indexer_dict(label_list, keys): + """ return a diction of {labels} -> {indexers} """ + shape = list(map(len, keys)) + + group_index = get_group_index(label_list, shape, sort=True, xnull=True) + ngroups = ((group_index.size and group_index.max()) + 1) \ + if is_int64_overflow_possible(shape) \ + else np.prod(shape, dtype='i8') + + sorter = get_group_index_sorter(group_index, ngroups) + + sorted_labels = [lab.take(sorter) for lab in label_list] + group_index = group_index.take(sorter) + + return lib.indices_fast(sorter, group_index, keys, sorted_labels) + + +# ---------------------------------------------------------------------- +# sorting levels...cleverly? + +def get_group_index_sorter(group_index, ngroups): + """ + _algos.groupsort_indexer implements `counting sort` and it is at least + O(ngroups), where + ngroups = prod(shape) + shape = map(len, keys) + that is, linear in the number of combinations (cartesian product) of unique + values of groupby keys. This can be huge when doing multi-key groupby. + np.argsort(kind='mergesort') is O(count x log(count)) where count is the + length of the data-frame; + Both algorithms are `stable` sort and that is necessary for correctness of + groupby operations. e.g. consider: + df.groupby(key)[col].transform('first') + """ + count = len(group_index) + alpha = 0.0 # taking complexities literally; there may be + beta = 1.0 # some room for fine-tuning these parameters + do_groupsort = (count > 0 and ((alpha + beta * ngroups) < + (count * np.log(count)))) + if do_groupsort: + sorter, _ = _algos.groupsort_indexer(_ensure_int64(group_index), + ngroups) + return _ensure_platform_int(sorter) + else: + return group_index.argsort(kind='mergesort') + + +def compress_group_index(group_index, sort=True): + """ + Group_index is offsets into cartesian product of all possible labels. This + space can be huge, so this function compresses it, by computing offsets + (comp_ids) into the list of unique labels (obs_group_ids). + """ + + size_hint = min(len(group_index), _hash._SIZE_HINT_LIMIT) + table = _hash.Int64HashTable(size_hint) + + group_index = _ensure_int64(group_index) + + # note, group labels come out ascending (ie, 1,2,3 etc) + comp_ids, obs_group_ids = table.get_labels_groupby(group_index) + + if sort and len(obs_group_ids) > 0: + obs_group_ids, comp_ids = _reorder_by_uniques(obs_group_ids, comp_ids) + + return comp_ids, obs_group_ids + + +def _reorder_by_uniques(uniques, labels): + # sorter is index where elements ought to go + sorter = uniques.argsort() + + # reverse_indexer is where elements came from + reverse_indexer = np.empty(len(sorter), dtype=np.int64) + reverse_indexer.put(sorter, np.arange(len(sorter))) + + mask = labels < 0 + + # move labels to right locations (ie, unsort ascending labels) + labels = algos.take_nd(reverse_indexer, labels, allow_fill=False) + np.putmask(labels, mask, -1) + + # sort observed ids + uniques = algos.take_nd(uniques, sorter, allow_fill=False) + + return uniques, labels diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index 9ab07d87fd13b..653ba1fee5691 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -663,7 +663,7 @@ def is_unique(self): False: 'first'}) @Appender(base._shared_docs['duplicated'] % ibase._index_doc_kwargs) def duplicated(self, keep='first'): - from pandas.core.groupby import get_group_index + from pandas.core.sorting import get_group_index from pandas.hashtable import duplicated_int64 shape = map(len, self.levels) @@ -1405,7 +1405,7 @@ def sortlevel(self, level=0, ascending=True, sort_remaining=True): Indices of output values in original index """ - from pandas.core.groupby import _indexer_from_factorized + from pandas.core.sorting import indexer_from_factorized if isinstance(level, (compat.string_types, int)): level = [level] @@ -1417,8 +1417,8 @@ def sortlevel(self, level=0, ascending=True, sort_remaining=True): if not len(level) == len(ascending): raise ValueError("level must have same length as ascending") - from pandas.core.groupby import _lexsort_indexer - indexer = _lexsort_indexer(self.labels, orders=ascending) + from pandas.core.sorting import lexsort_indexer + indexer = lexsort_indexer(self.labels, orders=ascending) # level ordering else: @@ -1436,8 +1436,8 @@ def sortlevel(self, level=0, ascending=True, sort_remaining=True): else: sortorder = level[0] - indexer = _indexer_from_factorized(primary, primshp, - compress=False) + indexer = indexer_from_factorized(primary, primshp, + compress=False) if not ascending: indexer = indexer[::-1] diff --git a/pandas/tests/groupby/test_filters.py b/pandas/tests/groupby/test_filters.py index 1640858802047..46ddb5a5318fb 100644 --- a/pandas/tests/groupby/test_filters.py +++ b/pandas/tests/groupby/test_filters.py @@ -616,24 +616,3 @@ def _check_groupby(df, result, keys, field, f=lambda x: x.sum()): expected = f(df.groupby(tups)[field]) for k, v in compat.iteritems(expected): assert (result[k] == v) - - -def test_decons(): - from pandas.core.groupby import decons_group_index, get_group_index - - def testit(label_list, shape): - group_index = get_group_index(label_list, shape, sort=True, xnull=True) - label_list2 = decons_group_index(group_index, shape) - - for a, b in zip(label_list, label_list2): - assert (np.array_equal(a, b)) - - shape = (4, 5, 6) - label_list = [np.tile([0, 1, 2, 3, 0, 1, 2, 3], 100), np.tile( - [0, 2, 4, 3, 0, 1, 2, 3], 100), np.tile( - [5, 1, 0, 2, 3, 0, 5, 4], 100)] - testit(label_list, shape) - - shape = (10000, 10000) - label_list = [np.tile(np.arange(10000), 5), np.tile(np.arange(10000), 5)] - testit(label_list, shape) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index d625fa07d932c..3a6a9eaaa8e72 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1510,59 +1510,6 @@ def check_nunique(df, keys, as_index=True): check_nunique(frame, ['jim'], as_index=False) check_nunique(frame, ['jim', 'joe'], as_index=False) - def test_series_groupby_value_counts(self): - from itertools import product - np.random.seed(1234) - - def rebuild_index(df): - arr = list(map(df.index.get_level_values, range(df.index.nlevels))) - df.index = MultiIndex.from_arrays(arr, names=df.index.names) - return df - - def check_value_counts(df, keys, bins): - for isort, normalize, sort, ascending, dropna \ - in product((False, True), repeat=5): - - kwargs = dict(normalize=normalize, sort=sort, - ascending=ascending, dropna=dropna, bins=bins) - - gr = df.groupby(keys, sort=isort) - left = gr['3rd'].value_counts(**kwargs) - - gr = df.groupby(keys, sort=isort) - right = gr['3rd'].apply(Series.value_counts, **kwargs) - right.index.names = right.index.names[:-1] + ['3rd'] - - # have to sort on index because of unstable sort on values - left, right = map(rebuild_index, (left, right)) # xref GH9212 - assert_series_equal(left.sort_index(), right.sort_index()) - - def loop(df): - bins = None, np.arange(0, max(5, df['3rd'].max()) + 1, 2) - keys = '1st', '2nd', ('1st', '2nd') - for k, b in product(keys, bins): - check_value_counts(df, k, b) - - days = date_range('2015-08-24', periods=10) - - for n, m in product((100, 1000), (5, 20)): - frame = DataFrame({ - '1st': np.random.choice( - list('abcd'), n), - '2nd': np.random.choice(days, n), - '3rd': np.random.randint(1, m + 1, n) - }) - - loop(frame) - - frame.loc[1::11, '1st'] = nan - frame.loc[3::17, '2nd'] = nan - frame.loc[7::19, '3rd'] = nan - frame.loc[8::19, '3rd'] = nan - frame.loc[9::19, '3rd'] = nan - - loop(frame) - def test_multiindex_passthru(self): # GH 7997 @@ -3071,22 +3018,6 @@ def test_panel_groupby(self): agged = grouped.mean() self.assert_index_equal(agged.minor_axis, Index([0, 1])) - def test_numpy_groupby(self): - from pandas.core.groupby import numpy_groupby - - data = np.random.randn(100, 100) - labels = np.random.randint(0, 10, size=100) - - df = DataFrame(data) - - result = df.groupby(labels).sum().values - expected = numpy_groupby(data, labels) - assert_almost_equal(result, expected) - - result = df.groupby(labels, axis=1).sum().values - expected = numpy_groupby(data, labels, axis=1) - assert_almost_equal(result, expected) - def test_groupby_2d_malformed(self): d = DataFrame(index=lrange(2)) d['group'] = ['g1', 'g2'] @@ -3112,85 +3043,6 @@ def test_int32_overflow(self): right = df.groupby(['D', 'C', 'B', 'A']).sum() self.assertEqual(len(left), len(right)) - def test_int64_overflow(self): - from pandas.core.groupby import _int64_overflow_possible - - B = np.concatenate((np.arange(1000), np.arange(1000), np.arange(500))) - A = np.arange(2500) - df = DataFrame({'A': A, - 'B': B, - 'C': A, - 'D': B, - 'E': A, - 'F': B, - 'G': A, - 'H': B, - 'values': np.random.randn(2500)}) - - lg = df.groupby(['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H']) - rg = df.groupby(['H', 'G', 'F', 'E', 'D', 'C', 'B', 'A']) - - left = lg.sum()['values'] - right = rg.sum()['values'] - - exp_index, _ = left.index.sortlevel() - self.assert_index_equal(left.index, exp_index) - - exp_index, _ = right.index.sortlevel(0) - self.assert_index_equal(right.index, exp_index) - - tups = list(map(tuple, df[['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H' - ]].values)) - tups = com._asarray_tuplesafe(tups) - - expected = df.groupby(tups).sum()['values'] - - for k, v in compat.iteritems(expected): - self.assertEqual(left[k], right[k[::-1]]) - self.assertEqual(left[k], v) - self.assertEqual(len(left), len(right)) - - # GH9096 - values = range(55109) - data = pd.DataFrame.from_dict({'a': values, - 'b': values, - 'c': values, - 'd': values}) - grouped = data.groupby(['a', 'b', 'c', 'd']) - self.assertEqual(len(grouped), len(values)) - - arr = np.random.randint(-1 << 12, 1 << 12, (1 << 15, 5)) - i = np.random.choice(len(arr), len(arr) * 4) - arr = np.vstack((arr, arr[i])) # add sume duplicate rows - - i = np.random.permutation(len(arr)) - arr = arr[i] # shuffle rows - - df = DataFrame(arr, columns=list('abcde')) - df['jim'], df['joe'] = np.random.randn(2, len(df)) * 10 - gr = df.groupby(list('abcde')) - - # verify this is testing what it is supposed to test! - self.assertTrue(_int64_overflow_possible(gr.grouper.shape)) - - # mannually compute groupings - jim, joe = defaultdict(list), defaultdict(list) - for key, a, b in zip(map(tuple, arr), df['jim'], df['joe']): - jim[key].append(a) - joe[key].append(b) - - self.assertEqual(len(gr), len(jim)) - mi = MultiIndex.from_tuples(jim.keys(), names=list('abcde')) - - def aggr(func): - f = lambda a: np.fromiter(map(func, a), dtype='f8') - arr = np.vstack((f(jim.values()), f(joe.values()))).T - res = DataFrame(arr, columns=['jim', 'joe'], index=mi) - return res.sort_index() - - assert_frame_equal(gr.mean(), aggr(np.mean)) - assert_frame_equal(gr.median(), aggr(np.median)) - def test_groupby_sort_multi(self): df = DataFrame({'a': ['foo', 'bar', 'baz'], 'b': [3, 2, 1], @@ -4451,24 +4303,3 @@ def _check_groupby(df, result, keys, field, f=lambda x: x.sum()): expected = f(df.groupby(tups)[field]) for k, v in compat.iteritems(expected): assert (result[k] == v) - - -def test_decons(): - from pandas.core.groupby import decons_group_index, get_group_index - - def testit(label_list, shape): - group_index = get_group_index(label_list, shape, sort=True, xnull=True) - label_list2 = decons_group_index(group_index, shape) - - for a, b in zip(label_list, label_list2): - assert (np.array_equal(a, b)) - - shape = (4, 5, 6) - label_list = [np.tile([0, 1, 2, 3, 0, 1, 2, 3], 100), np.tile( - [0, 2, 4, 3, 0, 1, 2, 3], 100), np.tile( - [5, 1, 0, 2, 3, 0, 5, 4], 100)] - testit(label_list, shape) - - shape = (10000, 10000) - label_list = [np.tile(np.arange(10000), 5), np.tile(np.arange(10000), 5)] - testit(label_list, shape) diff --git a/pandas/tests/groupby/test_misc.py b/pandas/tests/groupby/test_misc.py deleted file mode 100644 index 9395304385681..0000000000000 --- a/pandas/tests/groupby/test_misc.py +++ /dev/null @@ -1,101 +0,0 @@ -""" misc non-groupby routines, as they are defined in core/groupby.py """ - -import pytest -import numpy as np -from numpy import nan -from pandas.util import testing as tm -from pandas.core.groupby import _nargsort, _lexsort_indexer - - -class TestSorting(tm.TestCase): - - def test_lexsort_indexer(self): - keys = [[nan] * 5 + list(range(100)) + [nan] * 5] - # orders=True, na_position='last' - result = _lexsort_indexer(keys, orders=True, na_position='last') - exp = list(range(5, 105)) + list(range(5)) + list(range(105, 110)) - tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp)) - - # orders=True, na_position='first' - result = _lexsort_indexer(keys, orders=True, na_position='first') - exp = list(range(5)) + list(range(105, 110)) + list(range(5, 105)) - tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp)) - - # orders=False, na_position='last' - result = _lexsort_indexer(keys, orders=False, na_position='last') - exp = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110)) - tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp)) - - # orders=False, na_position='first' - result = _lexsort_indexer(keys, orders=False, na_position='first') - exp = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1)) - tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp)) - - def test_nargsort(self): - # np.argsort(items) places NaNs last - items = [nan] * 5 + list(range(100)) + [nan] * 5 - # np.argsort(items2) may not place NaNs first - items2 = np.array(items, dtype='O') - - try: - # GH 2785; due to a regression in NumPy1.6.2 - np.argsort(np.array([[1, 2], [1, 3], [1, 2]], dtype='i')) - np.argsort(items2, kind='mergesort') - except TypeError: - pytest.skip('requested sort not available for type') - - # mergesort is the most difficult to get right because we want it to be - # stable. - - # According to numpy/core/tests/test_multiarray, """The number of - # sorted items must be greater than ~50 to check the actual algorithm - # because quick and merge sort fall over to insertion sort for small - # arrays.""" - - # mergesort, ascending=True, na_position='last' - result = _nargsort(items, kind='mergesort', ascending=True, - na_position='last') - exp = list(range(5, 105)) + list(range(5)) + list(range(105, 110)) - tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) - - # mergesort, ascending=True, na_position='first' - result = _nargsort(items, kind='mergesort', ascending=True, - na_position='first') - exp = list(range(5)) + list(range(105, 110)) + list(range(5, 105)) - tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) - - # mergesort, ascending=False, na_position='last' - result = _nargsort(items, kind='mergesort', ascending=False, - na_position='last') - exp = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110)) - tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) - - # mergesort, ascending=False, na_position='first' - result = _nargsort(items, kind='mergesort', ascending=False, - na_position='first') - exp = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1)) - tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) - - # mergesort, ascending=True, na_position='last' - result = _nargsort(items2, kind='mergesort', ascending=True, - na_position='last') - exp = list(range(5, 105)) + list(range(5)) + list(range(105, 110)) - tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) - - # mergesort, ascending=True, na_position='first' - result = _nargsort(items2, kind='mergesort', ascending=True, - na_position='first') - exp = list(range(5)) + list(range(105, 110)) + list(range(5, 105)) - tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) - - # mergesort, ascending=False, na_position='last' - result = _nargsort(items2, kind='mergesort', ascending=False, - na_position='last') - exp = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110)) - tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) - - # mergesort, ascending=False, na_position='first' - result = _nargsort(items2, kind='mergesort', ascending=False, - na_position='first') - exp = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1)) - tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py new file mode 100644 index 0000000000000..801d0da070112 --- /dev/null +++ b/pandas/tests/groupby/test_value_counts.py @@ -0,0 +1,60 @@ +import pytest + +from itertools import product +import numpy as np + +from pandas.util import testing as tm +from pandas import MultiIndex, DataFrame, Series, date_range + + +@pytest.mark.parametrize("n,m", product((100, 1000), (5, 20))) +def test_series_groupby_value_counts(n, m): + np.random.seed(1234) + + def rebuild_index(df): + arr = list(map(df.index.get_level_values, range(df.index.nlevels))) + df.index = MultiIndex.from_arrays(arr, names=df.index.names) + return df + + def check_value_counts(df, keys, bins): + for isort, normalize, sort, ascending, dropna \ + in product((False, True), repeat=5): + + kwargs = dict(normalize=normalize, sort=sort, + ascending=ascending, dropna=dropna, bins=bins) + + gr = df.groupby(keys, sort=isort) + left = gr['3rd'].value_counts(**kwargs) + + gr = df.groupby(keys, sort=isort) + right = gr['3rd'].apply(Series.value_counts, **kwargs) + right.index.names = right.index.names[:-1] + ['3rd'] + + # have to sort on index because of unstable sort on values + left, right = map(rebuild_index, (left, right)) # xref GH9212 + tm.assert_series_equal(left.sort_index(), right.sort_index()) + + def loop(df): + bins = None, np.arange(0, max(5, df['3rd'].max()) + 1, 2) + keys = '1st', '2nd', ('1st', '2nd') + for k, b in product(keys, bins): + check_value_counts(df, k, b) + + days = date_range('2015-08-24', periods=10) + + frame = DataFrame({ + '1st': np.random.choice( + list('abcd'), n), + '2nd': np.random.choice(days, n), + '3rd': np.random.randint(1, m + 1, n) + }) + + loop(frame) + + frame.loc[1::11, '1st'] = np.nan + frame.loc[3::17, '2nd'] = np.nan + frame.loc[7::19, '3rd'] = np.nan + frame.loc[8::19, '3rd'] = np.nan + frame.loc[9::19, '3rd'] = np.nan + + loop(frame) diff --git a/pandas/tests/test_sorting.py b/pandas/tests/test_sorting.py new file mode 100644 index 0000000000000..99361695b2371 --- /dev/null +++ b/pandas/tests/test_sorting.py @@ -0,0 +1,339 @@ +import pytest +from itertools import product +from collections import defaultdict + +import numpy as np +from numpy import nan +import pandas as pd +from pandas.core import common as com +from pandas import DataFrame, MultiIndex, merge, concat, Series, compat +from pandas.util import testing as tm +from pandas.util.testing import assert_frame_equal, assert_series_equal +from pandas.core.sorting import (is_int64_overflow_possible, + decons_group_index, + get_group_index, + nargsort, + lexsort_indexer) + + +class TestSorting(tm.TestCase): + + def test_int64_overflow(self): + + B = np.concatenate((np.arange(1000), np.arange(1000), np.arange(500))) + A = np.arange(2500) + df = DataFrame({'A': A, + 'B': B, + 'C': A, + 'D': B, + 'E': A, + 'F': B, + 'G': A, + 'H': B, + 'values': np.random.randn(2500)}) + + lg = df.groupby(['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H']) + rg = df.groupby(['H', 'G', 'F', 'E', 'D', 'C', 'B', 'A']) + + left = lg.sum()['values'] + right = rg.sum()['values'] + + exp_index, _ = left.index.sortlevel() + self.assert_index_equal(left.index, exp_index) + + exp_index, _ = right.index.sortlevel(0) + self.assert_index_equal(right.index, exp_index) + + tups = list(map(tuple, df[['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H' + ]].values)) + tups = com._asarray_tuplesafe(tups) + + expected = df.groupby(tups).sum()['values'] + + for k, v in compat.iteritems(expected): + self.assertEqual(left[k], right[k[::-1]]) + self.assertEqual(left[k], v) + self.assertEqual(len(left), len(right)) + + # GH9096 + values = range(55109) + data = pd.DataFrame.from_dict({'a': values, + 'b': values, + 'c': values, + 'd': values}) + grouped = data.groupby(['a', 'b', 'c', 'd']) + self.assertEqual(len(grouped), len(values)) + + arr = np.random.randint(-1 << 12, 1 << 12, (1 << 15, 5)) + i = np.random.choice(len(arr), len(arr) * 4) + arr = np.vstack((arr, arr[i])) # add sume duplicate rows + + i = np.random.permutation(len(arr)) + arr = arr[i] # shuffle rows + + df = DataFrame(arr, columns=list('abcde')) + df['jim'], df['joe'] = np.random.randn(2, len(df)) * 10 + gr = df.groupby(list('abcde')) + + # verify this is testing what it is supposed to test! + self.assertTrue(is_int64_overflow_possible(gr.grouper.shape)) + + # mannually compute groupings + jim, joe = defaultdict(list), defaultdict(list) + for key, a, b in zip(map(tuple, arr), df['jim'], df['joe']): + jim[key].append(a) + joe[key].append(b) + + self.assertEqual(len(gr), len(jim)) + mi = MultiIndex.from_tuples(jim.keys(), names=list('abcde')) + + def aggr(func): + f = lambda a: np.fromiter(map(func, a), dtype='f8') + arr = np.vstack((f(jim.values()), f(joe.values()))).T + res = DataFrame(arr, columns=['jim', 'joe'], index=mi) + return res.sort_index() + + assert_frame_equal(gr.mean(), aggr(np.mean)) + assert_frame_equal(gr.median(), aggr(np.median)) + + def test_lexsort_indexer(self): + keys = [[nan] * 5 + list(range(100)) + [nan] * 5] + # orders=True, na_position='last' + result = lexsort_indexer(keys, orders=True, na_position='last') + exp = list(range(5, 105)) + list(range(5)) + list(range(105, 110)) + tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp)) + + # orders=True, na_position='first' + result = lexsort_indexer(keys, orders=True, na_position='first') + exp = list(range(5)) + list(range(105, 110)) + list(range(5, 105)) + tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp)) + + # orders=False, na_position='last' + result = lexsort_indexer(keys, orders=False, na_position='last') + exp = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110)) + tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp)) + + # orders=False, na_position='first' + result = lexsort_indexer(keys, orders=False, na_position='first') + exp = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1)) + tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp)) + + def test_nargsort(self): + # np.argsort(items) places NaNs last + items = [nan] * 5 + list(range(100)) + [nan] * 5 + # np.argsort(items2) may not place NaNs first + items2 = np.array(items, dtype='O') + + try: + # GH 2785; due to a regression in NumPy1.6.2 + np.argsort(np.array([[1, 2], [1, 3], [1, 2]], dtype='i')) + np.argsort(items2, kind='mergesort') + except TypeError: + pytest.skip('requested sort not available for type') + + # mergesort is the most difficult to get right because we want it to be + # stable. + + # According to numpy/core/tests/test_multiarray, """The number of + # sorted items must be greater than ~50 to check the actual algorithm + # because quick and merge sort fall over to insertion sort for small + # arrays.""" + + # mergesort, ascending=True, na_position='last' + result = nargsort(items, kind='mergesort', ascending=True, + na_position='last') + exp = list(range(5, 105)) + list(range(5)) + list(range(105, 110)) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) + + # mergesort, ascending=True, na_position='first' + result = nargsort(items, kind='mergesort', ascending=True, + na_position='first') + exp = list(range(5)) + list(range(105, 110)) + list(range(5, 105)) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) + + # mergesort, ascending=False, na_position='last' + result = nargsort(items, kind='mergesort', ascending=False, + na_position='last') + exp = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110)) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) + + # mergesort, ascending=False, na_position='first' + result = nargsort(items, kind='mergesort', ascending=False, + na_position='first') + exp = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1)) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) + + # mergesort, ascending=True, na_position='last' + result = nargsort(items2, kind='mergesort', ascending=True, + na_position='last') + exp = list(range(5, 105)) + list(range(5)) + list(range(105, 110)) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) + + # mergesort, ascending=True, na_position='first' + result = nargsort(items2, kind='mergesort', ascending=True, + na_position='first') + exp = list(range(5)) + list(range(105, 110)) + list(range(5, 105)) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) + + # mergesort, ascending=False, na_position='last' + result = nargsort(items2, kind='mergesort', ascending=False, + na_position='last') + exp = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110)) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) + + # mergesort, ascending=False, na_position='first' + result = nargsort(items2, kind='mergesort', ascending=False, + na_position='first') + exp = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1)) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) + + +class TestMerge(tm.TestCase): + + @pytest.mark.slow + def test_int64_overflow_issues(self): + + # #2690, combinatorial explosion + df1 = DataFrame(np.random.randn(1000, 7), + columns=list('ABCDEF') + ['G1']) + df2 = DataFrame(np.random.randn(1000, 7), + columns=list('ABCDEF') + ['G2']) + + # it works! + result = merge(df1, df2, how='outer') + self.assertTrue(len(result) == 2000) + + low, high, n = -1 << 10, 1 << 10, 1 << 20 + left = DataFrame(np.random.randint(low, high, (n, 7)), + columns=list('ABCDEFG')) + left['left'] = left.sum(axis=1) + + # one-2-one match + i = np.random.permutation(len(left)) + right = left.iloc[i].copy() + right.columns = right.columns[:-1].tolist() + ['right'] + right.index = np.arange(len(right)) + right['right'] *= -1 + + out = merge(left, right, how='outer') + self.assertEqual(len(out), len(left)) + assert_series_equal(out['left'], - out['right'], check_names=False) + result = out.iloc[:, :-2].sum(axis=1) + assert_series_equal(out['left'], result, check_names=False) + self.assertTrue(result.name is None) + + out.sort_values(out.columns.tolist(), inplace=True) + out.index = np.arange(len(out)) + for how in ['left', 'right', 'outer', 'inner']: + assert_frame_equal(out, merge(left, right, how=how, sort=True)) + + # check that left merge w/ sort=False maintains left frame order + out = merge(left, right, how='left', sort=False) + assert_frame_equal(left, out[left.columns.tolist()]) + + out = merge(right, left, how='left', sort=False) + assert_frame_equal(right, out[right.columns.tolist()]) + + # one-2-many/none match + n = 1 << 11 + left = DataFrame(np.random.randint(low, high, (n, 7)).astype('int64'), + columns=list('ABCDEFG')) + + # confirm that this is checking what it is supposed to check + shape = left.apply(Series.nunique).values + self.assertTrue(is_int64_overflow_possible(shape)) + + # add duplicates to left frame + left = concat([left, left], ignore_index=True) + + right = DataFrame(np.random.randint(low, high, (n // 2, 7)) + .astype('int64'), + columns=list('ABCDEFG')) + + # add duplicates & overlap with left to the right frame + i = np.random.choice(len(left), n) + right = concat([right, right, left.iloc[i]], ignore_index=True) + + left['left'] = np.random.randn(len(left)) + right['right'] = np.random.randn(len(right)) + + # shuffle left & right frames + i = np.random.permutation(len(left)) + left = left.iloc[i].copy() + left.index = np.arange(len(left)) + + i = np.random.permutation(len(right)) + right = right.iloc[i].copy() + right.index = np.arange(len(right)) + + # manually compute outer merge + ldict, rdict = defaultdict(list), defaultdict(list) + + for idx, row in left.set_index(list('ABCDEFG')).iterrows(): + ldict[idx].append(row['left']) + + for idx, row in right.set_index(list('ABCDEFG')).iterrows(): + rdict[idx].append(row['right']) + + vals = [] + for k, lval in ldict.items(): + rval = rdict.get(k, [np.nan]) + for lv, rv in product(lval, rval): + vals.append(k + tuple([lv, rv])) + + for k, rval in rdict.items(): + if k not in ldict: + for rv in rval: + vals.append(k + tuple([np.nan, rv])) + + def align(df): + df = df.sort_values(df.columns.tolist()) + df.index = np.arange(len(df)) + return df + + def verify_order(df): + kcols = list('ABCDEFG') + assert_frame_equal(df[kcols].copy(), + df[kcols].sort_values(kcols, kind='mergesort')) + + out = DataFrame(vals, columns=list('ABCDEFG') + ['left', 'right']) + out = align(out) + + jmask = {'left': out['left'].notnull(), + 'right': out['right'].notnull(), + 'inner': out['left'].notnull() & out['right'].notnull(), + 'outer': np.ones(len(out), dtype='bool')} + + for how in 'left', 'right', 'outer', 'inner': + mask = jmask[how] + frame = align(out[mask].copy()) + self.assertTrue(mask.all() ^ mask.any() or how == 'outer') + + for sort in [False, True]: + res = merge(left, right, how=how, sort=sort) + if sort: + verify_order(res) + + # as in GH9092 dtypes break with outer/right join + assert_frame_equal(frame, align(res), + check_dtype=how not in ('right', 'outer')) + + +def test_decons(): + + def testit(label_list, shape): + group_index = get_group_index(label_list, shape, sort=True, xnull=True) + label_list2 = decons_group_index(group_index, shape) + + for a, b in zip(label_list, label_list2): + assert (np.array_equal(a, b)) + + shape = (4, 5, 6) + label_list = [np.tile([0, 1, 2, 3, 0, 1, 2, 3], 100), np.tile( + [0, 2, 4, 3, 0, 1, 2, 3], 100), np.tile( + [5, 1, 0, 2, 3, 0, 5, 4], 100)] + testit(label_list, shape) + + shape = (10000, 10000) + label_list = [np.tile(np.arange(10000), 5), np.tile(np.arange(10000), 5)] + testit(label_list, shape) diff --git a/pandas/tests/tools/test_merge.py b/pandas/tests/tools/test_merge.py index d66cd793ec0be..472d8674f9f8d 100644 --- a/pandas/tests/tools/test_merge.py +++ b/pandas/tests/tools/test_merge.py @@ -10,9 +10,7 @@ from pandas.compat import lrange, lzip from pandas.tools.concat import concat from pandas.tools.merge import merge, MergeError -from pandas.util.testing import (assert_frame_equal, - assert_series_equal, - slow) +from pandas.util.testing import assert_frame_equal, assert_series_equal from pandas import DataFrame, Index, MultiIndex, Series, Categorical import pandas.util.testing as tm @@ -1092,137 +1090,6 @@ def test_merge_na_keys(self): tm.assert_frame_equal(result, expected) - @slow - def test_int64_overflow_issues(self): - from itertools import product - from collections import defaultdict - from pandas.core.groupby import _int64_overflow_possible - - # #2690, combinatorial explosion - df1 = DataFrame(np.random.randn(1000, 7), - columns=list('ABCDEF') + ['G1']) - df2 = DataFrame(np.random.randn(1000, 7), - columns=list('ABCDEF') + ['G2']) - - # it works! - result = merge(df1, df2, how='outer') - self.assertTrue(len(result) == 2000) - - low, high, n = -1 << 10, 1 << 10, 1 << 20 - left = DataFrame(np.random.randint(low, high, (n, 7)), - columns=list('ABCDEFG')) - left['left'] = left.sum(axis=1) - - # one-2-one match - i = np.random.permutation(len(left)) - right = left.iloc[i].copy() - right.columns = right.columns[:-1].tolist() + ['right'] - right.index = np.arange(len(right)) - right['right'] *= -1 - - out = merge(left, right, how='outer') - self.assertEqual(len(out), len(left)) - assert_series_equal(out['left'], - out['right'], check_names=False) - result = out.iloc[:, :-2].sum(axis=1) - assert_series_equal(out['left'], result, check_names=False) - self.assertTrue(result.name is None) - - out.sort_values(out.columns.tolist(), inplace=True) - out.index = np.arange(len(out)) - for how in ['left', 'right', 'outer', 'inner']: - assert_frame_equal(out, merge(left, right, how=how, sort=True)) - - # check that left merge w/ sort=False maintains left frame order - out = merge(left, right, how='left', sort=False) - assert_frame_equal(left, out[left.columns.tolist()]) - - out = merge(right, left, how='left', sort=False) - assert_frame_equal(right, out[right.columns.tolist()]) - - # one-2-many/none match - n = 1 << 11 - left = DataFrame(np.random.randint(low, high, (n, 7)).astype('int64'), - columns=list('ABCDEFG')) - - # confirm that this is checking what it is supposed to check - shape = left.apply(Series.nunique).values - self.assertTrue(_int64_overflow_possible(shape)) - - # add duplicates to left frame - left = concat([left, left], ignore_index=True) - - right = DataFrame(np.random.randint(low, high, (n // 2, 7)) - .astype('int64'), - columns=list('ABCDEFG')) - - # add duplicates & overlap with left to the right frame - i = np.random.choice(len(left), n) - right = concat([right, right, left.iloc[i]], ignore_index=True) - - left['left'] = np.random.randn(len(left)) - right['right'] = np.random.randn(len(right)) - - # shuffle left & right frames - i = np.random.permutation(len(left)) - left = left.iloc[i].copy() - left.index = np.arange(len(left)) - - i = np.random.permutation(len(right)) - right = right.iloc[i].copy() - right.index = np.arange(len(right)) - - # manually compute outer merge - ldict, rdict = defaultdict(list), defaultdict(list) - - for idx, row in left.set_index(list('ABCDEFG')).iterrows(): - ldict[idx].append(row['left']) - - for idx, row in right.set_index(list('ABCDEFG')).iterrows(): - rdict[idx].append(row['right']) - - vals = [] - for k, lval in ldict.items(): - rval = rdict.get(k, [np.nan]) - for lv, rv in product(lval, rval): - vals.append(k + tuple([lv, rv])) - - for k, rval in rdict.items(): - if k not in ldict: - for rv in rval: - vals.append(k + tuple([np.nan, rv])) - - def align(df): - df = df.sort_values(df.columns.tolist()) - df.index = np.arange(len(df)) - return df - - def verify_order(df): - kcols = list('ABCDEFG') - assert_frame_equal(df[kcols].copy(), - df[kcols].sort_values(kcols, kind='mergesort')) - - out = DataFrame(vals, columns=list('ABCDEFG') + ['left', 'right']) - out = align(out) - - jmask = {'left': out['left'].notnull(), - 'right': out['right'].notnull(), - 'inner': out['left'].notnull() & out['right'].notnull(), - 'outer': np.ones(len(out), dtype='bool')} - - for how in 'left', 'right', 'outer', 'inner': - mask = jmask[how] - frame = align(out[mask].copy()) - self.assertTrue(mask.all() ^ mask.any() or how == 'outer') - - for sort in [False, True]: - res = merge(left, right, how=how, sort=sort) - if sort: - verify_order(res) - - # as in GH9092 dtypes break with outer/right join - assert_frame_equal(frame, align(res), - check_dtype=how not in ('right', 'outer')) - def test_join_multi_levels(self): # GH 3662 diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index d938c2eeacbef..e82e702cb6e55 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -34,6 +34,7 @@ concatenate_block_managers) from pandas.util.decorators import Appender, Substitution +from pandas.core.sorting import is_int64_overflow_possible import pandas.core.algorithms as algos import pandas.core.common as com @@ -1397,10 +1398,9 @@ def _sort_labels(uniques, left, right): def _get_join_keys(llab, rlab, shape, sort): - from pandas.core.groupby import _int64_overflow_possible # how many levels can be done without overflow - pred = lambda i: not _int64_overflow_possible(shape[:i]) + pred = lambda i: not is_int64_overflow_possible(shape[:i]) nlev = next(filter(pred, range(len(shape), 0, -1))) # get keys for the first `nlev` levels From 4b97db4caa94690691316df6303092f4954e7e6f Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 14 Feb 2017 19:57:51 -0500 Subject: [PATCH 059/353] TST: disable gbq tests again --- pandas/tests/io/test_gbq.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pandas/tests/io/test_gbq.py b/pandas/tests/io/test_gbq.py index dfbf3ca69b111..0a76267054ee6 100644 --- a/pandas/tests/io/test_gbq.py +++ b/pandas/tests/io/test_gbq.py @@ -253,7 +253,7 @@ def test_generate_bq_schema_deprecated(): gbq.generate_bq_schema(df) -@pytest.mark.single +@pytest.mark.xfail(run=False, reason="intermittent failures") class TestGBQConnectorIntegrationWithLocalUserAccountAuth(tm.TestCase): def setUp(self): @@ -299,7 +299,7 @@ def test_get_application_default_credentials_returns_credentials(self): self.assertTrue(isinstance(credentials, GoogleCredentials)) -@pytest.mark.single +@pytest.mark.xfail(run=False, reason="intermittent failures") class TestGBQConnectorIntegrationWithServiceAccountKeyPath(tm.TestCase): def setUp(self): _setup_common() @@ -331,7 +331,7 @@ def test_should_be_able_to_get_results_from_query(self): self.assertTrue(pages is not None) -@pytest.mark.single +@pytest.mark.xfail(run=False, reason="intermittent failures") class TestGBQConnectorIntegrationWithServiceAccountKeyContents(tm.TestCase): def setUp(self): _setup_common() @@ -449,7 +449,7 @@ def test_read_gbq_with_corrupted_private_key_json_should_fail(self): private_key=re.sub('[a-z]', '9', _get_private_key_contents())) -@pytest.mark.single +@pytest.mark.xfail(run=False, reason="intermittent failures") class TestReadGBQIntegration(tm.TestCase): @classmethod @@ -503,7 +503,7 @@ def test_should_read_as_service_account_with_key_contents(self): tm.assert_frame_equal(df, DataFrame({'valid_string': ['PI']})) -@pytest.mark.single +@pytest.mark.xfail(run=False, reason="intermittent failures") class TestReadGBQIntegrationWithServiceAccountKeyPath(tm.TestCase): @classmethod @@ -906,7 +906,7 @@ def test_configuration_without_query(self): configuration=config) -@pytest.mark.single +@pytest.mark.xfail(run=False, reason="intermittent failures") class TestToGBQIntegrationWithServiceAccountKeyPath(tm.TestCase): # Changes to BigQuery table schema may take up to 2 minutes as of May 2015 # As a workaround to this issue, each test should use a unique table name. @@ -1219,7 +1219,7 @@ def test_dataset_does_not_exist(self): DATASET_ID + "_not_found"), 'Expected dataset not to exist') -@pytest.mark.single +@pytest.mark.xfail(run=False, reason="intermittent failures") class TestToGBQIntegrationWithLocalUserAccountAuth(tm.TestCase): # Changes to BigQuery table schema may take up to 2 minutes as of May 2015 # As a workaround to this issue, each test should use a unique table name. @@ -1277,7 +1277,7 @@ def test_upload_data(self): self.assertEqual(result['num_rows'][0], test_size) -@pytest.mark.single +@pytest.mark.xfail(run=False, reason="intermittent failures") class TestToGBQIntegrationWithServiceAccountKeyContents(tm.TestCase): # Changes to BigQuery table schema may take up to 2 minutes as of May 2015 # As a workaround to this issue, each test should use a unique table name. From 25fb173dcaff5401f2b496e17beba28d14d54c66 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 14 Feb 2017 20:15:20 -0500 Subject: [PATCH 060/353] TST: fix incorrect url in compressed url network tests in parser --- pandas/tests/io/parser/test_network.py | 53 ++++++++++---------------- 1 file changed, 21 insertions(+), 32 deletions(-) diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py index 4d75b59b09560..6e762368f82c5 100644 --- a/pandas/tests/io/parser/test_network.py +++ b/pandas/tests/io/parser/test_network.py @@ -7,7 +7,6 @@ import os import pytest -import functools from itertools import product import pandas.util.testing as tm @@ -15,42 +14,32 @@ from pandas.io.parsers import read_csv, read_table -class TestCompressedUrl(object): +@pytest.fixture(scope='module') +def salaries_table(): + path = os.path.join(tm.get_data_path(), 'salaries.csv') + return read_table(path) - compression_to_extension = { - 'gzip': '.gz', - 'bz2': '.bz2', - 'zip': '.zip', - 'xz': '.xz', - } - def setup(self): - path = os.path.join(tm.get_data_path(), 'salaries.csv') - self.local_table = read_table(path) - self.base_url = ('https://github.com/pandas-dev/pandas/raw/master/' - 'pandas/io/tests/parser/data/salaries.csv') +@tm.network +@pytest.mark.parametrize( + "compression,extension", [('gzip', '.gz'), ('bz2', '.bz2'), + ('zip', '.zip'), ('xz', '.xz')]) +def test_compressed_urls(salaries_table, compression, extension): + # test reading compressed urls with various engines and + # extension inference + base_url = ('https://github.com/pandas-dev/pandas/raw/master/' + 'pandas/tests/io/parser/data/salaries.csv') + + url = base_url + extension + + # args is a (compression, engine) tuple + for (c, engine) in product([compression, 'infer'], ['python', 'c']): - @tm.network - def test_compressed_urls(self): - # Test reading compressed tables from URL. - msg = ('Test reading {}-compressed tables from URL: ' - 'compression="{}", engine="{}"') - - for compression, extension in self.compression_to_extension.items(): - url = self.base_url + extension - # args is a (compression, engine) tuple - for args in product([compression, 'infer'], ['python', 'c']): - # test_fxn is a workaround for more descriptive nose reporting. - # See http://stackoverflow.com/a/37393684/4651668. - test_fxn = functools.partial(self.check_table) - test_fxn.description = msg.format(compression, *args) - yield (test_fxn, url) + args - - def check_table(self, url, compression, engine): if url.endswith('.xz'): tm._skip_if_no_lzma() - url_table = read_table(url, compression=compression, engine=engine) - tm.assert_frame_equal(url_table, self.local_table) + + url_table = read_table(url, compression=c, engine=engine) + tm.assert_frame_equal(url_table, salaries_table) class TestS3(tm.TestCase): From 03bb9003b3b3db92f3c20a60e88fd2001d6b3948 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 14 Feb 2017 20:44:44 -0500 Subject: [PATCH 061/353] TST: incorrect skip in when --skip-network is run closes #15407 --- pandas/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index b3683de3a173b..623feb99e9cdc 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -17,5 +17,5 @@ def pytest_runtest_setup(item): if 'slow' not in item.keywords and item.config.getoption("--only-slow"): pytest.skip("skipping due to --only-slow") - if 'skip' in item.keywords and item.config.getoption("--skip-network"): + if 'network' in item.keywords and item.config.getoption("--skip-network"): pytest.skip("skipping due to --skip-network") From bbb583c30bcee83ed3a2e9a3acfc83535f270632 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 14 Feb 2017 22:25:23 -0500 Subject: [PATCH 062/353] TST: fix test_nework.py fixture under py27 --- pandas/tests/io/parser/test_network.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py index 6e762368f82c5..721d447262149 100644 --- a/pandas/tests/io/parser/test_network.py +++ b/pandas/tests/io/parser/test_network.py @@ -20,11 +20,15 @@ def salaries_table(): return read_table(path) -@tm.network @pytest.mark.parametrize( "compression,extension", [('gzip', '.gz'), ('bz2', '.bz2'), ('zip', '.zip'), ('xz', '.xz')]) def test_compressed_urls(salaries_table, compression, extension): + check_compressed_urls(salaries_table, compression, extension) + + +@tm.network +def check_compressed_urls(salaries_table, compression, extension): # test reading compressed urls with various engines and # extension inference base_url = ('https://github.com/pandas-dev/pandas/raw/master/' From 2372d275b4b2565b4c406d3dfc7c4b4993f1e625 Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Wed, 15 Feb 2017 10:20:27 -0500 Subject: [PATCH 063/353] BLD: Numexpr 2.4.6 required closes #15213 Author: Francesc Alted Closes #15383 from FrancescAlted/numexpr-2.4.6 and squashes the following commits: c417fe2 [Francesc Alted] Simplify and remove UserWarning testing on numexpr import e1b34a9 [Francesc Alted] Force a reload of pd.computation for actually triggering the UserWarning c081199 [Francesc Alted] Relax the exact message for the ImportError 73f0319 [Francesc Alted] numexpr requisite raised to 2.4.6 0d4ab9a [Francesc Alted] Restored the old numexpr version dependencies to adjust for old requirements c1aae19 [Francesc Alted] Fixed a lint error 7575ba2 [Francesc Alted] Using constants instead of literals for numexpr version 7a275ce [Francesc Alted] Fixed a typo 93f54aa [Francesc Alted] numexpr section moved to Other API changes section 3b6e58b [Francesc Alted] Removed recomendation for numexpr 2.6.2 f225598 [Francesc Alted] Updated test_compat for numexpr 2.4.6 8bd4ed1 [Francesc Alted] numexpr 2.4.6 requirement moved to other enhancements section e45b742 [Francesc Alted] Moved pinned versions in CI folder to 2.4.6 6e12e29 [Francesc Alted] Added a notice on the recommended numexpr version ac62653 [Francesc Alted] Require numexpr 2.4.6 ab79c54 [Francesc Alted] Require numexpr 2.6.2 --- ci/requirements-3.4_SLOW.run | 2 +- doc/source/install.rst | 2 +- doc/source/whatsnew/v0.20.0.txt | 4 +++- pandas/computation/__init__.py | 17 +++++------------ pandas/tests/computation/test_compat.py | 15 ++++----------- 5 files changed, 14 insertions(+), 26 deletions(-) diff --git a/ci/requirements-3.4_SLOW.run b/ci/requirements-3.4_SLOW.run index 39018439a1223..90156f62c6e71 100644 --- a/ci/requirements-3.4_SLOW.run +++ b/ci/requirements-3.4_SLOW.run @@ -9,7 +9,7 @@ html5lib patsy beautiful-soup scipy -numexpr=2.4.4 +numexpr=2.4.6 pytables matplotlib lxml diff --git a/doc/source/install.rst b/doc/source/install.rst index 1c7cbc9326614..80a5d7e7d375b 100644 --- a/doc/source/install.rst +++ b/doc/source/install.rst @@ -226,7 +226,7 @@ Recommended Dependencies * `numexpr `__: for accelerating certain numerical operations. ``numexpr`` uses multiple cores as well as smart chunking and caching to achieve large speedups. - If installed, must be Version 2.1 or higher (excluding a buggy 2.4.4). Version 2.4.6 or higher is highly recommended. + If installed, must be Version 2.4.6 or higher. * `bottleneck `__: for accelerating certain types of ``nan`` evaluations. ``bottleneck`` uses specialized cython routines to achieve large speedups. diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index d76e33caffbf1..26006083d81b4 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1,6 +1,6 @@ .. _whatsnew_0200: -v0.20.0 (????, 2016) +v0.20.0 (????, 2017) -------------------- This is a major release from 0.19 and includes a small number of API changes, several new features, @@ -158,6 +158,7 @@ Other enhancements .. _whatsnew_0200.api_breaking: + Backwards incompatible API changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -429,6 +430,7 @@ Other API Changes - ``DataFrame.asof()`` will return a null filled ``Series`` instead the scalar ``NaN`` if a match is not found (:issue:`15118`) - The :func:`pd.read_gbq` method now stores ``INTEGER`` columns as ``dtype=object`` if they contain ``NULL`` values. Otherwise they are stored as ``int64``. This prevents precision lost for integers greather than 2**53. Furthermore ``FLOAT`` columns with values above 10**4 are no more casted to ``int64`` which also caused precision lost (:issue: `14064`, :issue:`14305`). - Reorganization of timeseries development tests (:issue:`14854`) +- ``numexpr`` version is now required to be >= 2.4.6 and it will not be used at all if this requisite is not fulfilled (:issue:`15213`). .. _whatsnew_0200.deprecations: diff --git a/pandas/computation/__init__.py b/pandas/computation/__init__.py index 9e94215eecf62..e13faf890d1f8 100644 --- a/pandas/computation/__init__.py +++ b/pandas/computation/__init__.py @@ -3,26 +3,19 @@ from distutils.version import LooseVersion _NUMEXPR_INSTALLED = False +_MIN_NUMEXPR_VERSION = "2.4.6" try: import numexpr as ne ver = ne.__version__ - _NUMEXPR_INSTALLED = ver >= LooseVersion('2.1') + _NUMEXPR_INSTALLED = ver >= LooseVersion(_MIN_NUMEXPR_VERSION) - # we specifically disallow 2.4.4 as - # has some hard-to-diagnose bugs - if ver == LooseVersion('2.4.4'): - _NUMEXPR_INSTALLED = False - warnings.warn( - "The installed version of numexpr {ver} is not supported " - "in pandas and will be not be used\n".format(ver=ver), - UserWarning) - - elif not _NUMEXPR_INSTALLED: + if not _NUMEXPR_INSTALLED: warnings.warn( "The installed version of numexpr {ver} is not supported " "in pandas and will be not be used\nThe minimum supported " - "version is 2.1\n".format(ver=ver), UserWarning) + "version is {min_ver}\n".format( + ver=ver, min_ver=_MIN_NUMEXPR_VERSION), UserWarning) except ImportError: # pragma: no cover pass diff --git a/pandas/tests/computation/test_compat.py b/pandas/tests/computation/test_compat.py index 599d0c10336dc..77994ac6d2f53 100644 --- a/pandas/tests/computation/test_compat.py +++ b/pandas/tests/computation/test_compat.py @@ -10,6 +10,7 @@ from pandas.computation.engines import _engines import pandas.computation.expr as expr +from pandas.computation import _MIN_NUMEXPR_VERSION ENGINES_PARSERS = list(product(_engines, expr._parsers)) @@ -21,15 +22,10 @@ def test_compat(): try: import numexpr as ne ver = ne.__version__ - if ver == LooseVersion('2.4.4'): + if ver < LooseVersion(_MIN_NUMEXPR_VERSION): assert not _NUMEXPR_INSTALLED - elif ver < LooseVersion('2.1'): - with tm.assert_produces_warning(UserWarning, - check_stacklevel=False): - assert not _NUMEXPR_INSTALLED else: assert _NUMEXPR_INSTALLED - except ImportError: pytest.skip("not testing numexpr version compat") @@ -51,12 +47,9 @@ def testit(): except ImportError: pytest.skip("no numexpr") else: - if ne.__version__ < LooseVersion('2.1'): - with tm.assertRaisesRegexp(ImportError, "'numexpr' version is " - ".+, must be >= 2.1"): + if ne.__version__ < LooseVersion(_MIN_NUMEXPR_VERSION): + with tm.assertRaises(ImportError): testit() - elif ne.__version__ == LooseVersion('2.4.4'): - pytest.skip("numexpr version==2.4.4") else: testit() else: From b261dfe38f114b57e358ad09051501684d88587f Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 15 Feb 2017 10:23:36 -0500 Subject: [PATCH 064/353] TST: print skipped tests files xref #15341 Author: Jeff Reback Closes #15408 from jreback/skip and squashes the following commits: 547bee6 [Jeff Reback] TST: print skipped tests files --- .travis.yml | 3 ++- ci/install_travis.sh | 1 + ci/print_skipped.py | 7 ++++--- ci/script_multi.sh | 8 ++++---- ci/script_single.sh | 8 ++++---- 5 files changed, 15 insertions(+), 12 deletions(-) diff --git a/.travis.yml b/.travis.yml index 6b90e49b336b2..6245213cec06f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -332,5 +332,6 @@ after_script: - echo "after_script start" - ci/install_test.sh - source activate pandas && python -c "import pandas; pandas.show_versions();" - - ci/print_skipped.py /tmp/pytest.xml + - ci/print_skipped.py /tmp/single.xml + - ci/print_skipped.py /tmp/multiple.xml - echo "after_script done" diff --git a/ci/install_travis.sh b/ci/install_travis.sh index ad804b96a0d82..802d8c9f6b776 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -112,6 +112,7 @@ fi source activate pandas pip install pytest-xdist + if [ "$LINT" ]; then conda install flake8 pip install cpplint diff --git a/ci/print_skipped.py b/ci/print_skipped.py index 9fb05df64bcea..dd2180f6eeb19 100755 --- a/ci/print_skipped.py +++ b/ci/print_skipped.py @@ -30,20 +30,21 @@ def parse_results(filename): i += 1 assert i - 1 == len(skipped) assert i - 1 == len(skipped) - assert len(skipped) == int(root.attrib['skip']) + # assert len(skipped) == int(root.attrib['skip']) return '\n'.join(skipped) def main(args): print('SKIPPED TESTS:') - print(parse_results(args.filename)) + for fn in args.filename: + print(parse_results(fn)) return 0 def parse_args(): import argparse parser = argparse.ArgumentParser() - parser.add_argument('filename', help='XUnit file to parse') + parser.add_argument('filename', nargs='+', help='XUnit file to parse') return parser.parse_args() diff --git a/ci/script_multi.sh b/ci/script_multi.sh index 83f8427cc57ad..f5fbcbbc12f83 100755 --- a/ci/script_multi.sh +++ b/ci/script_multi.sh @@ -20,11 +20,11 @@ fi if [ "$BUILD_TEST" ]; then echo "We are not running pytest as this is simply a build test." elif [ "$COVERAGE" ]; then - echo pytest -s -n 2 -m "not single" --cov=pandas --cov-append --cov-report xml:/tmp/pytest.xml $TEST_ARGS pandas - pytest -s -n 2 -m "not single" --cov=pandas --cov-append --cov-report xml:/tmp/pytest.xml $TEST_ARGS pandas + echo pytest -s -n 2 -m "not single" --cov=pandas --cov-append --cov-report xml:/tmp/cov.xml --junitxml=/tmp/multiple.xml $TEST_ARGS pandas + pytest -s -n 2 -m "not single" --cov=pandas --cov-append --cov-report xml:/tmp/cov.xml --junitxml=/tmp/multiple.xml $TEST_ARGS pandas else - echo pytest -n 2 -m "not single" $TEST_ARGS pandas - pytest -n 2 -m "not single" $TEST_ARGS pandas # TODO: doctest + echo pytest -n 2 -m "not single" --junitxml=/tmp/multiple.xml $TEST_ARGS pandas + pytest -n 2 -m "not single" --junitxml=/tmp/multiple.xml $TEST_ARGS pandas # TODO: doctest fi RET="$?" diff --git a/ci/script_single.sh b/ci/script_single.sh index 38021fcac5721..2d7962352842b 100755 --- a/ci/script_single.sh +++ b/ci/script_single.sh @@ -20,11 +20,11 @@ fi if [ "$BUILD_TEST" ]; then echo "We are not running pytest as this is simply a build test." elif [ "$COVERAGE" ]; then - echo pytest -s -m "single" --cov=pandas --cov-report xml:/tmp/pytest.xml $TEST_ARGS pandas - pytest -s -m "single" --cov=pandas --cov-report xml:/tmp/pytest.xml $TEST_ARGS pandas + echo pytest -s -m "single" --cov=pandas --cov-report xml:/tmp/cov.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas + pytest -s -m "single" --cov=pandas --cov-report xml:/tmp/cov.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas else - echo pytest -m "single" $TEST_ARGS pandas - pytest -m "single" $TEST_ARGS pandas # TODO: doctest + echo pytest -m "single" --junitxml=/tmp/single.xml $TEST_ARGS pandas + pytest -m "single" --junitxml=/tmp/single.xml $TEST_ARGS pandas # TODO: doctest fi RET="$?" From e351ed0fd211a204f960b9116bc13f75ed1f97c4 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 15 Feb 2017 10:24:45 -0500 Subject: [PATCH 065/353] PERF: high memory in MI closes #13904 Creates an efficient MultiIndexHashTable in cython. This allows us to efficiently store a multi-index for fast indexing (.get_loc() and .get_indexer()), with the current tuple-based (and gil holding) use of the PyObject Hash Table. This uses the pandas.tools.hashing routines to hash each of the 'values' of a MI to a single uint64. So this makes MI more memory friendly and much more efficient. You get these speedups, because the creation of the hashtable is now much more efficient. Author: Jeff Reback Closes #15245 from jreback/mi and squashes the following commits: 7df6c34 [Jeff Reback] PERF: high memory in MI --- asv_bench/benchmarks/indexing.py | 30 +++- asv_bench/benchmarks/reindex.py | 4 +- doc/source/whatsnew/v0.20.0.txt | 4 +- pandas/core/algorithms.py | 3 +- pandas/core/frame.py | 3 +- pandas/hashtable.pxd | 8 + pandas/index.pyx | 39 ++++- pandas/indexes/base.py | 5 +- pandas/indexes/multi.py | 203 ++++++++++++++++++---- pandas/io/pytables.py | 4 +- pandas/src/algos_common_helper.pxi.in | 4 +- pandas/src/hashtable_class_helper.pxi.in | 152 +++++++++++++--- pandas/tests/frame/test_mutate_columns.py | 29 +++- pandas/tests/frame/test_repr_info.py | 32 ++++ pandas/tests/groupby/test_groupby.py | 2 +- pandas/tests/indexes/test_multi.py | 136 +++++++++++++-- pandas/tests/indexing/test_multiindex.py | 3 +- pandas/tests/test_multilevel.py | 4 +- pandas/tests/tools/test_hashing.py | 12 ++ pandas/tests/tools/test_join.py | 6 +- pandas/tools/hashing.py | 44 +++-- pandas/types/cast.py | 3 +- 22 files changed, 605 insertions(+), 125 deletions(-) diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py index 27cd320c661e0..d938cc6a6dc4d 100644 --- a/asv_bench/benchmarks/indexing.py +++ b/asv_bench/benchmarks/indexing.py @@ -88,7 +88,7 @@ def setup(self): def time_getitem_scalar(self): self.ts[self.dt] - + class DataFrameIndexing(object): goal_time = 0.2 @@ -189,6 +189,15 @@ def setup(self): self.eps_C = 5 self.eps_D = 5000 self.mdt2 = self.mdt.set_index(['A', 'B', 'C', 'D']).sortlevel() + self.miint = MultiIndex.from_product( + [np.arange(1000), + np.arange(1000)], names=['one', 'two']) + + import string + self.mistring = MultiIndex.from_product( + [np.arange(1000), + np.arange(20), list(string.ascii_letters)], + names=['one', 'two', 'three']) def time_series_xs_mi_ix(self): self.s.ix[999] @@ -197,7 +206,24 @@ def time_frame_xs_mi_ix(self): self.df.ix[999] def time_multiindex_slicers(self): - self.mdt2.loc[self.idx[(self.test_A - self.eps_A):(self.test_A + self.eps_A), (self.test_B - self.eps_B):(self.test_B + self.eps_B), (self.test_C - self.eps_C):(self.test_C + self.eps_C), (self.test_D - self.eps_D):(self.test_D + self.eps_D)], :] + self.mdt2.loc[self.idx[ + (self.test_A - self.eps_A):(self.test_A + self.eps_A), + (self.test_B - self.eps_B):(self.test_B + self.eps_B), + (self.test_C - self.eps_C):(self.test_C + self.eps_C), + (self.test_D - self.eps_D):(self.test_D + self.eps_D)], :] + + def time_multiindex_get_indexer(self): + self.miint.get_indexer( + np.array([(0, 10), (0, 11), (0, 12), + (0, 13), (0, 14), (0, 15), + (0, 16), (0, 17), (0, 18), + (0, 19)], dtype=object)) + + def time_multiindex_string_get_loc(self): + self.mistring.get_loc((999, 19, 'Z')) + + def time_is_monotonic(self): + self.miint.is_monotonic class PanelIndexing(object): diff --git a/asv_bench/benchmarks/reindex.py b/asv_bench/benchmarks/reindex.py index 8db0cd7629332..6fe6c32a96df9 100644 --- a/asv_bench/benchmarks/reindex.py +++ b/asv_bench/benchmarks/reindex.py @@ -16,8 +16,8 @@ def setup(self): data=np.random.rand(10000, 30), columns=range(30)) # multi-index - N = 1000 - K = 20 + N = 5000 + K = 200 level1 = tm.makeStringIndex(N).values.repeat(K) level2 = np.tile(tm.makeStringIndex(K).values, N) index = MultiIndex.from_arrays([level1, level2]) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 26006083d81b4..4708abe4d592e 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -472,7 +472,7 @@ Performance Improvements - Improved performance of timeseries plotting with an irregular DatetimeIndex (or with ``compat_x=True``) (:issue:`15073`). - Improved performance of ``groupby().cummin()`` and ``groupby().cummax()`` (:issue:`15048`, :issue:`15109`) - +- Improved performance and reduced memory when indexing with a ``MultiIndex`` (:issue:`15245`) - When reading buffer object in ``read_sas()`` method without specified format, filepath string is inferred rather than buffer object. @@ -502,6 +502,8 @@ Bug Fixes - Bug in ``DataFrame.loc`` with indexing a ``MultiIndex`` with a ``Series`` indexer (:issue:`14730`) + +- Bug in the display of ``.info()`` where a qualifier (+) would always be displayed with a ``MultiIndex`` that contains only non-strings (:issue:`15245`) - Bug in ``pd.read_msgpack()`` in which ``Series`` categoricals were being improperly processed (:issue:`14901`) - Bug in ``Series.ffill()`` with mixed dtypes containing tz-aware datetimes. (:issue:`14956`) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 05cfb1bd9ec27..c922ac21e12eb 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1250,7 +1250,7 @@ def take_nd(arr, indexer, axis=0, out=None, fill_value=np.nan, mask_info=None, indexer = np.arange(arr.shape[axis], dtype=np.int64) dtype, fill_value = arr.dtype, arr.dtype.type() else: - indexer = _ensure_int64(indexer) + indexer = _ensure_int64(indexer, copy=False) if not allow_fill: dtype, fill_value = arr.dtype, arr.dtype.type() mask_info = None, False @@ -1303,7 +1303,6 @@ def take_nd(arr, indexer, axis=0, out=None, fill_value=np.nan, mask_info=None, func = _get_take_nd_function(arr.ndim, arr.dtype, out.dtype, axis=axis, mask_info=mask_info) - indexer = _ensure_int64(indexer) func(arr, indexer, out, fill_value) if flip_order: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 16f8d4658dc20..9c66f6dbb273e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1752,7 +1752,8 @@ def _sizeof_fmt(num, size_qualifier): # all cases (e.g., it misses categorical data even with object # categories) deep = False - if 'object' in counts or is_object_dtype(self.index): + if ('object' in counts or + self.index._is_memory_usage_qualified()): size_qualifier = '+' mem_usage = self.memory_usage(index=True, deep=deep).sum() lines.append("memory usage: %s\n" % diff --git a/pandas/hashtable.pxd b/pandas/hashtable.pxd index cabfa43a76f26..9b352ae1c003b 100644 --- a/pandas/hashtable.pxd +++ b/pandas/hashtable.pxd @@ -31,6 +31,14 @@ cdef class PyObjectHashTable(HashTable): cpdef get_item(self, object val) cpdef set_item(self, object key, Py_ssize_t val) +cdef class MultiIndexHashTable(HashTable): + cdef: + kh_uint64_t *table + object mi + + cpdef get_item(self, object val) + cpdef set_item(self, object key, Py_ssize_t val) + cdef class StringHashTable(HashTable): cdef kh_str_t *table diff --git a/pandas/index.pyx b/pandas/index.pyx index 0c975d1775a03..37fe7d90bebe0 100644 --- a/pandas/index.pyx +++ b/pandas/index.pyx @@ -182,7 +182,7 @@ cdef class IndexEngine: Py_ssize_t i, n int last_true - values = self._get_index_values() + values = np.array(self._get_index_values(), copy=False) n = len(values) result = np.empty(n, dtype=bool) @@ -284,7 +284,6 @@ cdef class IndexEngine: if not self.is_mapping_populated: values = self._get_index_values() - self.mapping = self._make_hash_table(len(values)) self.mapping.map_locations(values) @@ -322,7 +321,7 @@ cdef class IndexEngine: Py_ssize_t i, j, n, n_t, n_alloc self._ensure_mapping_populated() - values = self._get_index_values() + values = np.array(self._get_index_values(), copy=False) stargets = set(targets) n = len(values) n_t = len(targets) @@ -554,5 +553,39 @@ cdef inline bint _is_utc(object tz): return tz is UTC or isinstance(tz, _du_utc) +cdef class MultiIndexEngine(IndexEngine): + + def _call_monotonic(self, object mi): + # defer these back to the mi iteself + return (mi.is_monotonic_increasing, + mi.is_monotonic_decreasing, + mi.is_unique) + + def get_backfill_indexer(self, other, limit=None): + # we coerce to ndarray-of-tuples + values = np.array(self._get_index_values()) + return algos.backfill_object(values, other, limit=limit) + + def get_pad_indexer(self, other, limit=None): + # we coerce to ndarray-of-tuples + values = np.array(self._get_index_values()) + return algos.pad_object(values, other, limit=limit) + + cpdef get_loc(self, object val): + if is_definitely_invalid_key(val): + raise TypeError("'{val}' is an invalid key".format(val=val)) + + self._ensure_mapping_populated() + if not self.unique: + return self._get_loc_duplicates(val) + + try: + return self.mapping.get_item(val) + except TypeError: + raise KeyError(val) + + cdef _make_hash_table(self, n): + return _hash.MultiIndexHashTable(n) + # Generated from template. include "index_class_helper.pxi" diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index bb2941a121452..c483fb0764a4c 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -1431,6 +1431,10 @@ def inferred_type(self): """ return a string of the type inferred from the values """ return lib.infer_dtype(self) + def _is_memory_usage_qualified(self): + """ return a boolean if we need a qualified .info display """ + return self.is_object() + def is_type_compatible(self, kind): return kind == self.inferred_type @@ -2446,7 +2450,6 @@ def _get_fill_indexer_searchsorted(self, target, method, limit=None): 'if index and target are monotonic' % method) side = 'left' if method == 'pad' else 'right' - target = np.asarray(target) # find exact matches first (this simplifies the algorithm) indexer = self.get_indexer(target) diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index 653ba1fee5691..57739548a17d6 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -14,7 +14,6 @@ from pandas.compat.numpy import function as nv from pandas import compat - from pandas.types.common import (_ensure_int64, _ensure_platform_int, is_object_dtype, @@ -73,6 +72,7 @@ class MultiIndex(Index): _levels = FrozenList() _labels = FrozenList() _comparables = ['names'] + _engine_type = _index.MultiIndexEngine rename = Index.set_names def __new__(cls, levels=None, labels=None, sortorder=None, names=None, @@ -114,7 +114,6 @@ def __new__(cls, levels=None, labels=None, sortorder=None, names=None, result._verify_integrity() if _set_identity: result._reset_identity() - return result def _verify_integrity(self, labels=None, levels=None): @@ -429,6 +428,12 @@ def _shallow_copy(self, values=None, **kwargs): def dtype(self): return np.dtype('O') + def _is_memory_usage_qualified(self): + """ return a boolean if we need a qualified .info display """ + def f(l): + return 'mixed' in l or 'string' in l or 'unicode' in l + return any([f(l) for l in self._inferred_type_levels]) + @Appender(Index.memory_usage.__doc__) def memory_usage(self, deep=False): # we are overwriting our base class to avoid @@ -619,6 +624,10 @@ def _get_level_number(self, level): _tuples = None + @cache_readonly + def _engine(self): + return self._engine_type(lambda: self, len(self)) + @property def values(self): if self._tuples is not None: @@ -655,10 +664,95 @@ def _has_complex_internals(self): # to disable groupby tricks return True + @cache_readonly + def is_monotonic(self): + """ + return if the index is monotonic increasing (only equal or + increasing) values. + """ + return self.is_monotonic_increasing + + @cache_readonly + def is_monotonic_increasing(self): + """ + return if the index is monotonic increasing (only equal or + increasing) values. + """ + + # reversed() because lexsort() wants the most significant key last. + values = [self._get_level_values(i) + for i in reversed(range(len(self.levels)))] + try: + sort_order = np.lexsort(values) + return Index(sort_order).is_monotonic + except TypeError: + + # we have mixed types and np.lexsort is not happy + return Index(self.values).is_monotonic + + @property + def is_monotonic_decreasing(self): + """ + return if the index is monotonic decreasing (only equal or + decreasing) values. + """ + return False + @cache_readonly def is_unique(self): return not self.duplicated().any() + @cache_readonly + def _have_mixed_levels(self): + """ return a boolean list indicated if we have mixed levels """ + return ['mixed' in l for l in self._inferred_type_levels] + + @cache_readonly + def _inferred_type_levels(self): + """ return a list of the inferred types, one for each level """ + return [i.inferred_type for i in self.levels] + + @cache_readonly + def _hashed_values(self): + """ return a uint64 ndarray of my hashed values """ + from pandas.tools.hashing import hash_tuples + return hash_tuples(self) + + def _hashed_indexing_key(self, key): + """ + validate and return the hash for the provided key + + *this is internal for use for the cython routines* + + Paramters + --------- + key : string or tuple + + Returns + ------- + np.uint64 + + Notes + ----- + we need to stringify if we have mixed levels + + """ + from pandas.tools.hashing import hash_tuples + + if not isinstance(key, tuple): + return hash_tuples(key) + + if not len(key) == self.nlevels: + raise KeyError + + def f(k, stringify): + if stringify and not isinstance(k, compat.string_types): + k = str(k) + return k + key = tuple([f(k, stringify) + for k, stringify in zip(key, self._have_mixed_levels)]) + return hash_tuples(key) + @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', False: 'first'}) @Appender(base._shared_docs['duplicated'] % ibase._index_doc_kwargs) @@ -748,26 +842,44 @@ def _try_mi(k): raise InvalidIndexError(key) - def get_level_values(self, level): + def _get_level_values(self, level): """ - Return vector of label values for requested level, equal to the length - of the index + Return vector of label values for requested level, + equal to the length of the index + + **this is an internal method** Parameters ---------- - level : int or level name + level : int level Returns ------- values : ndarray """ - num = self._get_level_number(level) - unique = self.levels[num] # .values - labels = self.labels[num] - filled = algos.take_1d(unique.values, labels, + + unique = self.levels[level] + labels = self.labels[level] + filled = algos.take_1d(unique._values, labels, fill_value=unique._na_value) - values = unique._shallow_copy(filled) - return values + return filled + + def get_level_values(self, level): + """ + Return vector of label values for requested level, + equal to the length of the index + + Parameters + ---------- + level : int or level name + + Returns + ------- + values : Index + """ + level = self._get_level_number(level) + values = self._get_level_values(level) + return self.levels[level]._shallow_copy(values) def format(self, space=2, sparsify=None, adjoin=True, names=False, na_rep=None, formatter=None): @@ -852,7 +964,8 @@ def to_frame(self, index=True): from pandas import DataFrame result = DataFrame({(name or level): self.get_level_values(level) for name, level in - zip(self.names, range(len(self.levels)))}) + zip(self.names, range(len(self.levels)))}, + copy=False) if index: result.index = self return result @@ -1482,29 +1595,41 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): method = missing.clean_reindex_fill_method(method) target = _ensure_index(target) - target_index = target - if isinstance(target, MultiIndex): - target_index = target._tuple_index + # empty indexer + if is_list_like(target) and not len(target): + return _ensure_platform_int(np.array([])) + + if not isinstance(target, MultiIndex): + try: + target = MultiIndex.from_tuples(target) + except (TypeError, ValueError): - if not is_object_dtype(target_index.dtype): - return np.ones(len(target_index)) * -1 + # let's instead try with a straight Index + if method is None: + return Index(self.values).get_indexer(target, + method=method, + limit=limit, + tolerance=tolerance) if not self.is_unique: raise Exception('Reindexing only valid with uniquely valued Index ' 'objects') - self_index = self._tuple_index - if method == 'pad' or method == 'backfill': if tolerance is not None: raise NotImplementedError("tolerance not implemented yet " 'for MultiIndex') - indexer = self_index._get_fill_indexer(target, method, limit) + indexer = self._get_fill_indexer(target, method, limit) elif method == 'nearest': raise NotImplementedError("method='nearest' not implemented yet " 'for MultiIndex; see GitHub issue 9365') else: - indexer = self_index._engine.get_indexer(target._values) + # we may not compare equally because of hashing if we + # don't have the same dtypes + if self._inferred_type_levels != target._inferred_type_levels: + return Index(self.values).get_indexer(target.values) + + indexer = self._engine.get_indexer(target) return _ensure_platform_int(indexer) @@ -1571,17 +1696,6 @@ def reindex(self, target, method=None, level=None, limit=None, return target, indexer - @cache_readonly - def _tuple_index(self): - """ - Convert MultiIndex to an Index of tuples - - Returns - ------- - index : Index - """ - return Index(self._values) - def get_slice_bound(self, label, side, kind): if not isinstance(label, tuple): @@ -1828,8 +1942,9 @@ def partial_selection(key, indexer=None): key = tuple(self[indexer].tolist()[0]) - return (self._engine.get_loc(_values_from_object(key)), - None) + return (self._engine.get_loc( + _values_from_object(key)), None) + else: return partial_selection(key) else: @@ -2115,10 +2230,24 @@ def equals(self, other): return False for i in range(self.nlevels): + slabels = self.labels[i] + slabels = slabels[slabels != -1] svalues = algos.take_nd(np.asarray(self.levels[i]._values), - self.labels[i], allow_fill=False) + slabels, allow_fill=False) + + olabels = other.labels[i] + olabels = olabels[olabels != -1] ovalues = algos.take_nd(np.asarray(other.levels[i]._values), - other.labels[i], allow_fill=False) + olabels, allow_fill=False) + + # since we use NaT both datetime64 and timedelta64 + # we can have a situation where a level is typed say + # timedelta64 in self (IOW it has other values than NaT) + # but types datetime64 in other (where its all NaT) + # but these are equivalent + if len(svalues) == 0 and len(ovalues) == 0: + continue + if not array_equivalent(svalues, ovalues): return False diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 9224f7d3d9a94..d8de1dcd61977 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -3787,9 +3787,9 @@ def read(self, where=None, columns=None, **kwargs): lp = DataFrame(c.data, index=long_index, columns=c.values) # need a better algorithm - tuple_index = long_index._tuple_index + tuple_index = long_index.values - unique_tuples = lib.fast_unique(tuple_index.values) + unique_tuples = lib.fast_unique(tuple_index) unique_tuples = _asarray_tuplesafe(unique_tuples) indexer = match(unique_tuples, tuple_index) diff --git a/pandas/src/algos_common_helper.pxi.in b/pandas/src/algos_common_helper.pxi.in index 42089f9520ab6..b83dec1d26242 100644 --- a/pandas/src/algos_common_helper.pxi.in +++ b/pandas/src/algos_common_helper.pxi.in @@ -579,12 +579,12 @@ def get_dispatch(dtypes): {{for name, c_type, dtype in get_dispatch(dtypes)}} -cpdef ensure_{{name}}(object arr): +cpdef ensure_{{name}}(object arr, copy=True): if util.is_array(arr): if ( arr).descr.type_num == NPY_{{c_type}}: return arr else: - return arr.astype(np.{{dtype}}) + return arr.astype(np.{{dtype}}, copy=copy) else: return np.array(arr, dtype=np.{{dtype}}) diff --git a/pandas/src/hashtable_class_helper.pxi.in b/pandas/src/hashtable_class_helper.pxi.in index ef385ba7dca1c..3ce82dace40a9 100644 --- a/pandas/src/hashtable_class_helper.pxi.in +++ b/pandas/src/hashtable_class_helper.pxi.in @@ -262,13 +262,6 @@ cdef class {{name}}HashTable(HashTable): else: raise KeyError(val) - def get_iter_test(self, {{dtype}}_t key, Py_ssize_t iterations): - cdef Py_ssize_t i, val=0 - for i in range(iterations): - k = kh_get_{{dtype}}(self.table, val) - if k != self.table.n_buckets: - val = self.table.vals[k] - cpdef set_item(self, {{dtype}}_t key, Py_ssize_t val): cdef: khiter_t k @@ -501,18 +494,6 @@ cdef class StringHashTable(HashTable): else: raise KeyError(val) - def get_iter_test(self, object key, Py_ssize_t iterations): - cdef: - Py_ssize_t i, val - char *v - - v = util.get_c_string(key) - - for i in range(iterations): - k = kh_get_str(self.table, v) - if k != self.table.n_buckets: - val = self.table.vals[k] - cpdef set_item(self, object key, Py_ssize_t val): cdef: khiter_t k @@ -755,15 +736,6 @@ cdef class PyObjectHashTable(HashTable): else: raise KeyError(val) - def get_iter_test(self, object key, Py_ssize_t iterations): - cdef Py_ssize_t i, val - if key != key or key is None: - key = na_sentinel - for i in range(iterations): - k = kh_get_pymap(self.table, key) - if k != self.table.n_buckets: - val = self.table.vals[k] - cpdef set_item(self, object key, Py_ssize_t val): cdef: khiter_t k @@ -874,3 +846,127 @@ cdef class PyObjectHashTable(HashTable): count += 1 return np.asarray(labels) + + +cdef class MultiIndexHashTable(HashTable): + + def __init__(self, size_hint=1): + self.table = kh_init_uint64() + self.mi = None + kh_resize_uint64(self.table, size_hint) + + def __dealloc__(self): + if self.table is not NULL: + kh_destroy_uint64(self.table) + self.table = NULL + + def __len__(self): + return self.table.size + + def sizeof(self, deep=False): + """ return the size of my table in bytes """ + return self.table.n_buckets * (sizeof(uint64_t) + # keys + sizeof(size_t) + # vals + sizeof(uint32_t)) # flags + + def _check_for_collisions(self, int64_t[:] locs, object mi): + # validate that the locs map to the actual values + # provided in the mi + # we can only check if we *don't* have any missing values + # :< + cdef: + ndarray[int64_t] alocs + + alocs = np.asarray(locs) + if (alocs != -1).all(): + + result = self.mi.take(locs) + if isinstance(mi, tuple): + from pandas import Index + mi = Index([mi]) + if not result.equals(mi): + raise AssertionError( + "hash collision\nlocs:\n{}\n" + "result:\n{}\nmi:\n{}".format(alocs, result, mi)) + + def __contains__(self, object key): + try: + self.get_item(key) + return True + except (KeyError, ValueError, TypeError): + return False + + cpdef get_item(self, object key): + cdef: + khiter_t k + uint64_t value + int64_t[:] locs + Py_ssize_t loc + + value = self.mi._hashed_indexing_key(key) + k = kh_get_uint64(self.table, value) + if k != self.table.n_buckets: + loc = self.table.vals[k] + locs = np.array([loc], dtype=np.int64) + self._check_for_collisions(locs, key) + return loc + else: + raise KeyError(key) + + cpdef set_item(self, object key, Py_ssize_t val): + raise NotImplementedError + + @cython.boundscheck(False) + def map_locations(self, object mi): + cdef: + Py_ssize_t i, n + ndarray[uint64_t] values + uint64_t val + int ret = 0 + khiter_t k + + self.mi = mi + n = len(mi) + values = mi._hashed_values + + with nogil: + for i in range(n): + val = values[i] + k = kh_put_uint64(self.table, val, &ret) + self.table.vals[k] = i + + @cython.boundscheck(False) + def lookup(self, object mi): + # look up with a target mi + cdef: + Py_ssize_t i, n + ndarray[uint64_t] values + int ret = 0 + uint64_t val + khiter_t k + int64_t[:] locs + + n = len(mi) + values = mi._hashed_values + + locs = np.empty(n, dtype=np.int64) + + with nogil: + for i in range(n): + val = values[i] + k = kh_get_uint64(self.table, val) + if k != self.table.n_buckets: + locs[i] = self.table.vals[k] + else: + locs[i] = -1 + + self._check_for_collisions(locs, mi) + return np.asarray(locs) + + def unique(self, object mi): + raise NotImplementedError + + def get_labels(self, object mi, ObjectVector uniques, + Py_ssize_t count_prior, int64_t na_sentinel, + bint check_null=True): + raise NotImplementedError diff --git a/pandas/tests/frame/test_mutate_columns.py b/pandas/tests/frame/test_mutate_columns.py index 6b4c56747c981..fe3f3c554a9b5 100644 --- a/pandas/tests/frame/test_mutate_columns.py +++ b/pandas/tests/frame/test_mutate_columns.py @@ -1,11 +1,11 @@ # -*- coding: utf-8 -*- from __future__ import print_function - +import pytest from pandas.compat import range, lrange import numpy as np -from pandas import DataFrame, Series, Index +from pandas import DataFrame, Series, Index, MultiIndex from pandas.util.testing import (assert_series_equal, assert_frame_equal, @@ -165,6 +165,31 @@ def test_delitem(self): del self.frame['A'] self.assertNotIn('A', self.frame) + def test_delitem_multiindex(self): + midx = MultiIndex.from_product([['A', 'B'], [1, 2]]) + df = DataFrame(np.random.randn(4, 4), columns=midx) + assert len(df.columns) == 4 + assert ('A', ) in df.columns + assert 'A' in df.columns + + result = df['A'] + assert isinstance(result, DataFrame) + del df['A'] + + assert len(df.columns) == 2 + + # A still in the levels, BUT get a KeyError if trying + # to delete + assert ('A', ) not in df.columns + with pytest.raises(KeyError): + del df[('A',)] + + # xref: https://github.com/pandas-dev/pandas/issues/2770 + # the 'A' is STILL in the columns! + assert 'A' in df.columns + with pytest.raises(KeyError): + del df['A'] + def test_pop(self): self.frame.columns.name = 'baz' diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index 2df297d03bcdf..024e11e63a924 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -301,10 +301,12 @@ def test_info_memory_usage(self): data[i] = np.random.randint(2, size=n).astype(dtype) df = DataFrame(data) buf = StringIO() + # display memory usage case df.info(buf=buf, memory_usage=True) res = buf.getvalue().splitlines() self.assertTrue("memory usage: " in res[-1]) + # do not display memory usage cas df.info(buf=buf, memory_usage=False) res = buf.getvalue().splitlines() @@ -312,11 +314,13 @@ def test_info_memory_usage(self): df.info(buf=buf, memory_usage=True) res = buf.getvalue().splitlines() + # memory usage is a lower bound, so print it as XYZ+ MB self.assertTrue(re.match(r"memory usage: [^+]+\+", res[-1])) df.iloc[:, :5].info(buf=buf, memory_usage=True) res = buf.getvalue().splitlines() + # excluded column with object dtype, so estimate is accurate self.assertFalse(re.match(r"memory usage: [^+]+\+", res[-1])) @@ -380,6 +384,34 @@ def test_info_memory_usage(self): diff = df.memory_usage(deep=True).sum() - sys.getsizeof(df) self.assertTrue(abs(diff) < 100) + def test_info_memory_usage_qualified(self): + + buf = StringIO() + df = DataFrame(1, columns=list('ab'), + index=[1, 2, 3]) + df.info(buf=buf) + self.assertFalse('+' in buf.getvalue()) + + buf = StringIO() + df = DataFrame(1, columns=list('ab'), + index=list('ABC')) + df.info(buf=buf) + self.assertTrue('+' in buf.getvalue()) + + buf = StringIO() + df = DataFrame(1, columns=list('ab'), + index=pd.MultiIndex.from_product( + [range(3), range(3)])) + df.info(buf=buf) + self.assertFalse('+' in buf.getvalue()) + + buf = StringIO() + df = DataFrame(1, columns=list('ab'), + index=pd.MultiIndex.from_product( + [range(3), ['foo', 'bar']])) + df.info(buf=buf) + self.assertTrue('+' in buf.getvalue()) + def test_info_memory_usage_bug_on_multiindex(self): # GH 14308 # memory usage introspection should not materialize .values diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 3a6a9eaaa8e72..d53446870beb1 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1588,7 +1588,7 @@ def test_groupby_as_index_cython(self): result = grouped.mean() expected = data.groupby(['A', 'B']).mean() - arrays = lzip(*expected.index._tuple_index) + arrays = lzip(*expected.index.values) expected.insert(0, 'A', arrays[0]) expected.insert(1, 'B', arrays[1]) expected.index = np.arange(len(expected)) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 702c4758da245..5611492b4af1b 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -1046,6 +1046,21 @@ def test_contains(self): self.assertNotIn(('bar', 'two'), self.index) self.assertNotIn(None, self.index) + def test_contains_top_level(self): + midx = MultiIndex.from_product([['A', 'B'], [1, 2]]) + assert 'A' in midx + assert 'A' not in midx._engine + + def test_contains_with_nat(self): + # MI with a NaT + mi = MultiIndex(levels=[['C'], + pd.date_range('2012-01-01', periods=5)], + labels=[[0, 0, 0, 0, 0, 0], [-1, 0, 1, 2, 3, 4]], + names=[None, 'B']) + self.assertTrue(('C', pd.Timestamp('2012-01-01')) in mi) + for val in mi.values: + self.assertTrue(val in mi) + def test_is_all_dates(self): self.assertFalse(self.index.is_all_dates) @@ -1102,6 +1117,17 @@ def test_get_loc_duplicates(self): xp = 0 assert (rs == xp) + def test_get_value_duplicates(self): + index = MultiIndex(levels=[['D', 'B', 'C'], + [0, 26, 27, 37, 57, 67, 75, 82]], + labels=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], + [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]], + names=['tag', 'day']) + + assert index.get_loc('D') == slice(0, 3) + with pytest.raises(KeyError): + index._engine.get_value(np.array([]), 'D') + def test_get_loc_level(self): index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( @@ -1294,7 +1320,7 @@ def test_get_indexer(self): assert_almost_equal(r1, rbfill1) # pass non-MultiIndex - r1 = idx1.get_indexer(idx2._tuple_index) + r1 = idx1.get_indexer(idx2.values) rexp1 = idx1.get_indexer(idx2) assert_almost_equal(r1, rexp1) @@ -1316,6 +1342,19 @@ def test_get_indexer_nearest(self): with tm.assertRaises(NotImplementedError): midx.get_indexer(['a'], method='pad', tolerance=2) + def test_hash_collisions(self): + # non-smoke test that we don't get hash collisions + + index = MultiIndex.from_product([np.arange(1000), np.arange(1000)], + names=['one', 'two']) + result = index.get_indexer(index.values) + self.assert_numpy_array_equal(result, + np.arange(len(index), dtype='int64')) + + for i in [0, 1, len(index) - 2, len(index) - 1]: + result = index.get_loc(index[i]) + self.assertEqual(result, i) + def test_format(self): self.index.format() self.index[:0].format() @@ -1420,12 +1459,13 @@ def test_bounds(self): self.index._bounds def test_equals_multi(self): - self.assertTrue(self.index.equals(self.index)) - self.assertTrue(self.index.equal_levels(self.index)) - - self.assertFalse(self.index.equals(self.index[:-1])) + assert self.index.equals(self.index) + assert not self.index.equals(self.index.values) + assert self.index.equals(Index(self.index.values)) - self.assertTrue(self.index.equals(self.index._tuple_index)) + assert self.index.equal_levels(self.index) + assert not self.index.equals(self.index[:-1]) + assert not self.index.equals(self.index[-1]) # different number of levels index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( @@ -1433,8 +1473,8 @@ def test_equals_multi(self): [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) index2 = MultiIndex(levels=index.levels[:-1], labels=index.labels[:-1]) - self.assertFalse(index.equals(index2)) - self.assertFalse(index.equal_levels(index2)) + assert not index.equals(index2) + assert not index.equal_levels(index2) # levels are different major_axis = Index(lrange(4)) @@ -1445,8 +1485,8 @@ def test_equals_multi(self): index = MultiIndex(levels=[major_axis, minor_axis], labels=[major_labels, minor_labels]) - self.assertFalse(self.index.equals(index)) - self.assertFalse(self.index.equal_levels(index)) + assert not self.index.equals(index) + assert not self.index.equal_levels(index) # some of the labels are different major_axis = Index(['foo', 'bar', 'baz', 'qux']) @@ -1457,7 +1497,16 @@ def test_equals_multi(self): index = MultiIndex(levels=[major_axis, minor_axis], labels=[major_labels, minor_labels]) - self.assertFalse(self.index.equals(index)) + assert not self.index.equals(index) + + def test_equals_missing_values(self): + # make sure take is not using -1 + i = pd.MultiIndex.from_tuples([(0, pd.NaT), + (0, pd.Timestamp('20130101'))]) + result = i[0:1].equals(i[0]) + self.assertFalse(result) + result = i[1:2].equals(i[1]) + self.assertFalse(result) def test_identical(self): mi = self.index.copy() @@ -1510,7 +1559,7 @@ def test_union(self): the_union = piece1 | piece2 - tups = sorted(self.index._tuple_index) + tups = sorted(self.index.values) expected = MultiIndex.from_tuples(tups) self.assertTrue(the_union.equals(expected)) @@ -1523,7 +1572,7 @@ def test_union(self): self.assertIs(the_union, self.index) # won't work in python 3 - # tuples = self.index._tuple_index + # tuples = self.index.values # result = self.index[:4] | tuples[4:] # self.assertTrue(result.equals(tuples)) @@ -1543,7 +1592,7 @@ def test_intersection(self): piece2 = self.index[3:] the_int = piece1 & piece2 - tups = sorted(self.index[3:5]._tuple_index) + tups = sorted(self.index[3:5].values) expected = MultiIndex.from_tuples(tups) self.assertTrue(the_int.equals(expected)) @@ -1557,7 +1606,7 @@ def test_intersection(self): self.assertTrue(empty.equals(expected)) # can't do in python 3 - # tuples = self.index._tuple_index + # tuples = self.index.values # result = self.index & tuples # self.assertTrue(result.equals(tuples)) @@ -1616,7 +1665,7 @@ def test_difference(self): self.assertEqual(len(result), 0) # raise Exception called with non-MultiIndex - result = first.difference(first._tuple_index) + result = first.difference(first.values) self.assertTrue(result.equals(first[:0])) # name from empty array @@ -1642,7 +1691,7 @@ def test_from_tuples(self): def test_argsort(self): result = self.index.argsort() - expected = self.index._tuple_index.argsort() + expected = self.index.values.argsort() tm.assert_numpy_array_equal(result, expected) def test_sortlevel(self): @@ -2297,11 +2346,60 @@ def test_level_setting_resets_attributes(self): ind = MultiIndex.from_arrays([ ['A', 'A', 'B', 'B', 'B'], [1, 2, 1, 2, 3] ]) - assert ind.is_monotonic + self.assertTrue(ind.is_monotonic) ind.set_levels([['A', 'B', 'A', 'A', 'B'], [2, 1, 3, -2, 5]], inplace=True) + # if this fails, probably didn't reset the cache correctly. - assert not ind.is_monotonic + self.assertFalse(ind.is_monotonic) + + def test_is_monotonic(self): + i = MultiIndex.from_product([np.arange(10), + np.arange(10)], names=['one', 'two']) + self.assertTrue(i.is_monotonic) + self.assertTrue(Index(i.values).is_monotonic) + + i = MultiIndex.from_product([np.arange(10, 0, -1), + np.arange(10)], names=['one', 'two']) + self.assertFalse(i.is_monotonic) + self.assertFalse(Index(i.values).is_monotonic) + + i = MultiIndex.from_product([np.arange(10), + np.arange(10, 0, -1)], + names=['one', 'two']) + self.assertFalse(i.is_monotonic) + self.assertFalse(Index(i.values).is_monotonic) + + i = MultiIndex.from_product([[1.0, np.nan, 2.0], ['a', 'b', 'c']]) + self.assertFalse(i.is_monotonic) + self.assertFalse(Index(i.values).is_monotonic) + + # string ordering + i = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], + ['one', 'two', 'three']], + labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=['first', 'second']) + self.assertFalse(i.is_monotonic) + self.assertFalse(Index(i.values).is_monotonic) + + i = MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'], + ['mom', 'next', 'zenith']], + labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=['first', 'second']) + self.assertTrue(i.is_monotonic) + self.assertTrue(Index(i.values).is_monotonic) + + # mixed levels, hits the TypeError + i = MultiIndex( + levels=[[1, 2, 3, 4], ['gb00b03mlx29', 'lu0197800237', + 'nl0000289783', + 'nl0000289965', 'nl0000301109']], + labels=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]], + names=['household_id', 'asset_id']) + + self.assertFalse(i.is_monotonic) def test_isin(self): values = [('foo', 2), ('bar', 3), ('quux', 4)] diff --git a/pandas/tests/indexing/test_multiindex.py b/pandas/tests/indexing/test_multiindex.py index 1e6ecbbcdc756..b6b9ac93b234c 100644 --- a/pandas/tests/indexing/test_multiindex.py +++ b/pandas/tests/indexing/test_multiindex.py @@ -413,9 +413,10 @@ def f(): df.loc[idx[:, :, 'Stock'], 'price'] *= 2 tm.assert_frame_equal(df, expected) - def test_getitem_multiindex(self): + def test_getitem_duplicates_multiindex(self): # GH 5725 the 'A' happens to be a valid Timestamp so the doesn't raise # the appropriate error, only in PY3 of course! + index = MultiIndex(levels=[['D', 'B', 'C'], [0, 26, 27, 37, 57, 67, 75, 82]], labels=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 8e0628eefa392..0f36af2c8c4e7 100755 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -1469,7 +1469,7 @@ def test_frame_getitem_not_sorted(self): df = self.frame.T df['foo', 'four'] = 'foo' - arrays = [np.array(x) for x in zip(*df.columns._tuple_index)] + arrays = [np.array(x) for x in zip(*df.columns.values)] result = df['foo'] result2 = df.loc[:, 'foo'] @@ -1493,7 +1493,7 @@ def test_series_getitem_not_sorted(self): index = MultiIndex.from_tuples(tuples) s = Series(randn(8), index=index) - arrays = [np.array(x) for x in zip(*index._tuple_index)] + arrays = [np.array(x) for x in zip(*index.values)] result = s['qux'] result2 = s.loc['qux'] diff --git a/pandas/tests/tools/test_hashing.py b/pandas/tests/tools/test_hashing.py index 05a352f259e8b..9bed0d428bc41 100644 --- a/pandas/tests/tools/test_hashing.py +++ b/pandas/tests/tools/test_hashing.py @@ -152,6 +152,18 @@ def test_categorical_consistency(self): tm.assert_series_equal(h1, h2) tm.assert_series_equal(h1, h3) + def test_categorical_with_nan_consistency(self): + c = pd.Categorical.from_codes( + [-1, 0, 1, 2, 3, 4], + categories=pd.date_range('2012-01-01', periods=5, name='B')) + expected = hash_array(c, categorize=False) + c = pd.Categorical.from_codes( + [-1, 0], + categories=[pd.Timestamp('2012-01-01')]) + result = hash_array(c, categorize=False) + assert result[0] in expected + assert result[1] in expected + def test_pandas_errors(self): for obj in [pd.Timestamp('20130101'), tm.makePanel()]: diff --git a/pandas/tests/tools/test_join.py b/pandas/tests/tools/test_join.py index ab42b1212301b..ee6b3d57b852d 100644 --- a/pandas/tests/tools/test_join.py +++ b/pandas/tests/tools/test_join.py @@ -7,7 +7,7 @@ from pandas.compat import lrange import pandas.compat as compat from pandas.util.testing import assert_frame_equal -from pandas import DataFrame, MultiIndex, Series, merge, concat +from pandas import DataFrame, MultiIndex, Series, Index, merge, concat import pandas._join as _join import pandas.util.testing as tm @@ -368,7 +368,7 @@ def test_join_multiindex(self): df2 = df2.sort_index(level=0) joined = df1.join(df2, how='outer') - ex_index = index1._tuple_index.union(index2._tuple_index) + ex_index = Index(index1.values).union(Index(index2.values)) expected = df1.reindex(ex_index).join(df2.reindex(ex_index)) expected.index.names = index1.names assert_frame_equal(joined, expected) @@ -378,7 +378,7 @@ def test_join_multiindex(self): df2 = df2.sort_index(level=1) joined = df1.join(df2, how='outer').sort_index(level=0) - ex_index = index1._tuple_index.union(index2._tuple_index) + ex_index = Index(index1.values).union(Index(index2.values)) expected = df1.reindex(ex_index).join(df2.reindex(ex_index)) expected.index.names = index1.names diff --git a/pandas/tools/hashing.py b/pandas/tools/hashing.py index 800e0b8815443..ef863510cdd87 100644 --- a/pandas/tools/hashing.py +++ b/pandas/tools/hashing.py @@ -5,7 +5,6 @@ import numpy as np from pandas import _hash, Series, factorize, Categorical, Index, MultiIndex -import pandas.core.algorithms as algos from pandas.lib import is_bool_array from pandas.types.generic import ABCIndexClass, ABCSeries, ABCDataFrame from pandas.types.common import (is_categorical_dtype, is_numeric_dtype, @@ -142,20 +141,18 @@ def hash_tuples(vals, encoding='utf8', hash_key=None): if not isinstance(vals, MultiIndex): vals = MultiIndex.from_tuples(vals) - # create a list-of-ndarrays - def get_level_values(num): - unique = vals.levels[num] # .values - labels = vals.labels[num] - filled = algos.take_1d(unique._values, labels, - fill_value=unique._na_value) - return filled - - vals = [get_level_values(level) + # create a list-of-Categoricals + vals = [Categorical(vals.labels[level], + vals.levels[level], + ordered=False, + fastpath=True) for level in range(vals.nlevels)] # hash the list-of-ndarrays - hashes = (hash_array(l, encoding=encoding, hash_key=hash_key) - for l in vals) + hashes = (_hash_categorical(cat, + encoding=encoding, + hash_key=hash_key) + for cat in vals) h = _combine_hash_arrays(hashes, len(vals)) if is_tuple: h = h[0] @@ -178,9 +175,26 @@ def _hash_categorical(c, encoding, hash_key): ------- ndarray of hashed values array, same size as len(c) """ - cat_hashed = hash_array(c.categories.values, encoding, hash_key, - categorize=False).astype(np.uint64, copy=False) - return c.rename_categories(cat_hashed).astype(np.uint64, copy=False) + hashed = hash_array(c.categories.values, encoding, hash_key, + categorize=False) + + # we have uint64, as we don't directly support missing values + # we don't want to use take_nd which will coerce to float + # instead, directly construt the result with a + # max(np.uint64) as the missing value indicator + # + # TODO: GH 15362 + + mask = c.isnull() + if len(hashed): + result = hashed.take(c.codes) + else: + result = np.zeros(len(mask), dtype='uint64') + + if mask.any(): + result[mask] = np.iinfo(np.uint64).max + + return result def hash_array(vals, encoding='utf8', hash_key=None, categorize=True): diff --git a/pandas/types/cast.py b/pandas/types/cast.py index 6b1c3f9c00351..b1a17df64aecf 100644 --- a/pandas/types/cast.py +++ b/pandas/types/cast.py @@ -12,7 +12,8 @@ is_datetime64tz_dtype, is_datetime64_dtype, is_timedelta64_dtype, is_dtype_equal, is_float_dtype, is_complex_dtype, - is_integer_dtype, is_datetime_or_timedelta_dtype, + is_integer_dtype, + is_datetime_or_timedelta_dtype, is_bool_dtype, is_scalar, _string_dtypes, _coerce_to_dtype, From 93f5e3a0c11c82ad6b7365e83637d133c1a6e8a5 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 15 Feb 2017 12:59:11 -0500 Subject: [PATCH 066/353] STYLE: flake8 upgraded to 3.3 on conda (#15412) fixes for E305, 2 blank lines after a class definition --- pandas/compat/numpy/__init__.py | 1 + pandas/compat/numpy/function.py | 7 +++++++ pandas/computation/expr.py | 1 + pandas/core/algorithms.py | 2 ++ pandas/core/config.py | 1 + pandas/core/config_init.py | 2 ++ pandas/core/frame.py | 2 +- pandas/core/indexing.py | 2 ++ pandas/formats/format.py | 2 +- pandas/indexes/numeric.py | 3 +++ pandas/indexes/range.py | 1 + pandas/io/common.py | 2 ++ pandas/io/excel.py | 5 +++++ pandas/io/gbq.py | 1 + pandas/io/packers.py | 2 ++ pandas/io/parsers.py | 2 ++ pandas/io/pytables.py | 4 ++++ pandas/io/sql.py | 1 + pandas/io/stata.py | 1 + pandas/msgpack/__init__.py | 1 + pandas/sparse/frame.py | 1 + pandas/sparse/series.py | 1 + pandas/stats/moments.py | 3 +++ pandas/tests/sparse/test_libsparse.py | 2 +- pandas/tests/test_generic.py | 1 + pandas/tools/merge.py | 5 +++++ pandas/tools/plotting.py | 1 + pandas/tseries/frequencies.py | 2 ++ pandas/tseries/holiday.py | 2 ++ pandas/tseries/index.py | 1 + pandas/tseries/interval.py | 3 --- pandas/tseries/offsets.py | 4 ++++ pandas/tseries/resample.py | 4 ++++ pandas/tseries/timedeltas.py | 1 + pandas/types/generic.py | 1 + pandas/util/print_versions.py | 1 + pandas/util/terminal.py | 1 + pandas/util/testing.py | 3 +++ 38 files changed, 74 insertions(+), 6 deletions(-) diff --git a/pandas/compat/numpy/__init__.py b/pandas/compat/numpy/__init__.py index bfd770d7af2c6..4a9a2647ece0f 100644 --- a/pandas/compat/numpy/__init__.py +++ b/pandas/compat/numpy/__init__.py @@ -67,6 +67,7 @@ def np_array_datetime64_compat(arr, *args, **kwargs): return np.array(arr, *args, **kwargs) + __all__ = ['np', '_np_version_under1p8', '_np_version_under1p9', diff --git a/pandas/compat/numpy/function.py b/pandas/compat/numpy/function.py index eb9e9ecc359b2..4053994efa005 100644 --- a/pandas/compat/numpy/function.py +++ b/pandas/compat/numpy/function.py @@ -55,6 +55,7 @@ def __call__(self, args, kwargs, fname=None, raise ValueError("invalid validation method " "'{method}'".format(method=method)) + ARGMINMAX_DEFAULTS = dict(out=None) validate_argmin = CompatValidator(ARGMINMAX_DEFAULTS, fname='argmin', method='both', max_fname_arg_count=1) @@ -97,6 +98,7 @@ def validate_argmax_with_skipna(skipna, args, kwargs): validate_argmax(args, kwargs) return skipna + ARGSORT_DEFAULTS = OrderedDict() ARGSORT_DEFAULTS['axis'] = -1 ARGSORT_DEFAULTS['kind'] = 'quicksort' @@ -121,6 +123,7 @@ def validate_argsort_with_ascending(ascending, args, kwargs): validate_argsort(args, kwargs, max_fname_arg_count=1) return ascending + CLIP_DEFAULTS = dict(out=None) validate_clip = CompatValidator(CLIP_DEFAULTS, fname='clip', method='both', max_fname_arg_count=3) @@ -141,6 +144,7 @@ def validate_clip_with_axis(axis, args, kwargs): validate_clip(args, kwargs) return axis + COMPRESS_DEFAULTS = OrderedDict() COMPRESS_DEFAULTS['axis'] = None COMPRESS_DEFAULTS['out'] = None @@ -170,6 +174,7 @@ def validate_cum_func_with_skipna(skipna, args, kwargs, name): validate_cum_func(args, kwargs, fname=name) return skipna + LOGICAL_FUNC_DEFAULTS = dict(out=None) validate_logical_func = CompatValidator(LOGICAL_FUNC_DEFAULTS, method='kwargs') @@ -236,6 +241,7 @@ def validate_take_with_convert(convert, args, kwargs): validate_take(args, kwargs, max_fname_arg_count=3, method='both') return convert + TRANSPOSE_DEFAULTS = dict(axes=None) validate_transpose = CompatValidator(TRANSPOSE_DEFAULTS, fname='transpose', method='both', max_fname_arg_count=0) @@ -318,6 +324,7 @@ def validate_groupby_func(name, args, kwargs, allowed=None): "with groupby. Use .groupby(...)." "{func}() instead".format(func=name))) + RESAMPLER_NUMPY_OPS = ('min', 'max', 'sum', 'prod', 'mean', 'std', 'var') diff --git a/pandas/computation/expr.py b/pandas/computation/expr.py index f1cf210754d12..a782287175327 100644 --- a/pandas/computation/expr.py +++ b/pandas/computation/expr.py @@ -669,6 +669,7 @@ def visitor(x, y): operands = node.values return reduce(visitor, operands) + # ast.Call signature changed on 3.5, # conditionally change which methods is named # visit_Call depending on Python version, #11097 diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index c922ac21e12eb..4ae46fe33a5cc 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -926,6 +926,7 @@ def _finalize_nsmallest(arr, kth_val, n, keep, narr): else: return inds + _dtype_map = {'datetime64[ns]': 'int64', 'timedelta64[ns]': 'int64'} @@ -959,6 +960,7 @@ def _hashtable_algo(f, values, return_dtype=None): # use Object return f(htable.PyObjectHashTable, _ensure_object) + _hashtables = { 'float64': (htable.Float64HashTable, htable.Float64Vector), 'uint64': (htable.UInt64HashTable, htable.UInt64Vector), diff --git a/pandas/core/config.py b/pandas/core/config.py index ed63c865ebfb4..1c0eb60b8ec2f 100644 --- a/pandas/core/config.py +++ b/pandas/core/config.py @@ -804,6 +804,7 @@ def inner(x): return inner + # common type validators, for convenience # usage: register_option(... , validator = is_int) is_int = is_type_factory(int) diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index fe47391c9ff81..d3db633f3aa04 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -278,6 +278,7 @@ def mpl_style_cb(key): return val + with cf.config_prefix('display'): cf.register_option('precision', 6, pc_precision_doc, validator=is_int) cf.register_option('float_format', None, float_format_doc, @@ -380,6 +381,7 @@ def use_inf_as_null_cb(key): from pandas.types.missing import _use_inf_as_null _use_inf_as_null(key) + with cf.config_prefix('mode'): cf.register_option('use_inf_as_null', False, use_inf_as_null_doc, cb=use_inf_as_null_cb) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9c66f6dbb273e..f7c306ea7ce95 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5741,9 +5741,9 @@ def _from_nested_dict(data): def _put_str(s, space): return ('%s' % s)[:space].ljust(space) + # ---------------------------------------------------------------------- # Add plotting methods to DataFrame - DataFrame.plot = base.AccessorProperty(gfx.FramePlotMethods, gfx.FramePlotMethods) DataFrame.hist = gfx.hist_frame diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 6bb2d1c479844..66510a7708e64 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -36,6 +36,7 @@ def get_indexers_list(): ('iat', _iAtIndexer), ] + # "null slice" _NS = slice(None, None) @@ -1850,6 +1851,7 @@ def _convert_key(self, key, is_setter=False): "indexers") return key + # 32-bit floating point machine epsilon _eps = np.finfo('f4').eps diff --git a/pandas/formats/format.py b/pandas/formats/format.py index 1a7a06199ad8a..6b235b5e1bc33 100644 --- a/pandas/formats/format.py +++ b/pandas/formats/format.py @@ -2479,9 +2479,9 @@ def _has_names(index): else: return index.name is not None + # ----------------------------------------------------------------------------- # Global formatting options - _initial_defencoding = None diff --git a/pandas/indexes/numeric.py b/pandas/indexes/numeric.py index 0b9b337731d7f..00ddf5b0c918d 100644 --- a/pandas/indexes/numeric.py +++ b/pandas/indexes/numeric.py @@ -159,6 +159,7 @@ def _assert_safe_casting(cls, data, subarr): raise TypeError('Unsafe NumPy casting, you must ' 'explicitly cast') + Int64Index._add_numeric_methods() Int64Index._add_logical_methods() @@ -238,6 +239,7 @@ def _assert_safe_casting(cls, data, subarr): raise TypeError('Unsafe NumPy casting, you must ' 'explicitly cast') + UInt64Index._add_numeric_methods() UInt64Index._add_logical_methods() @@ -391,5 +393,6 @@ def isin(self, values, level=None): return lib.ismember_nans(np.array(self), value_set, isnull(list(value_set)).any()) + Float64Index._add_numeric_methods() Float64Index._add_logical_methods_disabled() diff --git a/pandas/indexes/range.py b/pandas/indexes/range.py index 7a7902b503bd6..cc78361f843bf 100644 --- a/pandas/indexes/range.py +++ b/pandas/indexes/range.py @@ -652,5 +652,6 @@ def _evaluate_numeric_binop(self, other): reversed=True, step=operator.div) + RangeIndex._add_numeric_methods() RangeIndex._add_logical_methods() diff --git a/pandas/io/common.py b/pandas/io/common.py index b24acb256c4a9..74c51b74ca18a 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -75,6 +75,7 @@ class ParserError(ValueError): """ pass + # gh-12665: Alias for now and remove later. CParserError = ParserError @@ -116,6 +117,7 @@ def __iter__(self): def __next__(self): raise AbstractMethodError(self) + if not compat.PY3: BaseIterator.next = lambda self: self.__next__() diff --git a/pandas/io/excel.py b/pandas/io/excel.py index f34ba65cf7b51..2821983213646 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -886,12 +886,14 @@ def _convert_to_style(cls, style_dict): return xls_style + register_writer(_Openpyxl1Writer) class _OpenpyxlWriter(_Openpyxl1Writer): engine = 'openpyxl' + register_writer(_OpenpyxlWriter) @@ -1368,6 +1370,7 @@ def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0): for k, v in style_kwargs.items(): setattr(xcell, k, v) + register_writer(_Openpyxl22Writer) @@ -1491,6 +1494,7 @@ def _convert_to_style(cls, style_dict, num_format_str=None): return style + register_writer(_XlwtWriter) @@ -1603,4 +1607,5 @@ def _convert_to_style(self, style_dict, num_format_str=None): return xl_format + register_writer(_XlsxWriter) diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index 0ffb6b4bf8c05..a5558866937cf 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -58,6 +58,7 @@ def _test_google_api_imports(): raise ImportError("Missing module required for Google BigQuery " "support: {0}".format(str(e))) + logger = logging.getLogger('pandas.io.gbq') logger.setLevel(logging.ERROR) diff --git a/pandas/io/packers.py b/pandas/io/packers.py index ab44e46c96b77..3f4be6ad459d8 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -217,6 +217,7 @@ def read(fh): raise ValueError('path_or_buf needs to be a string file path or file-like') + dtype_dict = {21: np.dtype('M8[ns]'), u('datetime64[ns]'): np.dtype('M8[ns]'), u('datetime64[us]'): np.dtype('M8[us]'), @@ -237,6 +238,7 @@ def dtype_for(t): return dtype_dict[t] return np.typeDict.get(t, t) + c2f_dict = {'complex': np.float64, 'complex128': np.float64, 'complex64': np.float32} diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index f8905dfa315c4..88d0c6c12c04f 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -409,6 +409,7 @@ def _read(filepath_or_buffer, kwds): return data + _parser_defaults = { 'delimiter': None, @@ -655,6 +656,7 @@ def parser_f(filepath_or_buffer, return parser_f + read_csv = _make_parser_function('read_csv', sep=',') read_csv = Appender(_read_csv_doc)(read_csv) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index d8de1dcd61977..65ac4e5654dce 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -74,6 +74,7 @@ def _ensure_encoding(encoding): encoding = _default_encoding return encoding + Term = Expr @@ -112,6 +113,7 @@ class ClosedFileError(Exception): class IncompatibilityWarning(Warning): pass + incompatibility_doc = """ where criteria is being ignored as this version [%s] is too old (or not-defined), read the file in and write it out to a new file to upgrade (with @@ -122,6 +124,7 @@ class IncompatibilityWarning(Warning): class AttributeConflictWarning(Warning): pass + attribute_conflict_doc = """ the [%s] attribute of the existing index is [%s] which conflicts with the new [%s], resetting the attribute to None @@ -131,6 +134,7 @@ class AttributeConflictWarning(Warning): class DuplicateWarning(Warning): pass + duplicate_doc = """ duplicate entries in table, taking most recently appended """ diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 9fa01c413aca8..55e145b493dd9 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -495,6 +495,7 @@ def has_table(table_name, con, flavor=None, schema=None): pandas_sql = pandasSQL_builder(con, flavor=flavor, schema=schema) return pandas_sql.has_table(table_name) + table_exists = has_table diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 2be7657883e88..1698ade4c0102 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -459,6 +459,7 @@ class PossiblePrecisionLoss(Warning): class ValueLabelTypeMismatch(Warning): pass + value_label_mismatch_doc = """ Stata value labels (pandas categories) must be strings. Column {0} contains non-string labels which will be converted to strings. Please check that the diff --git a/pandas/msgpack/__init__.py b/pandas/msgpack/__init__.py index 33d60a12ef0a3..4d6e241171281 100644 --- a/pandas/msgpack/__init__.py +++ b/pandas/msgpack/__init__.py @@ -41,6 +41,7 @@ def packb(o, **kwargs): """ return Packer(**kwargs).pack(o) + # alias for compatibility to simplejson/marshal/pickle. load = unpack loads = unpackb diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py index 1fc93a967bdbb..61b8434b0ea09 100644 --- a/pandas/sparse/frame.py +++ b/pandas/sparse/frame.py @@ -863,6 +863,7 @@ def homogenize(series_dict): return output + # use unaccelerated ops for sparse objects ops.add_flex_arithmetic_methods(SparseDataFrame, use_numexpr=False, **ops.frame_flex_funcs) diff --git a/pandas/sparse/series.py b/pandas/sparse/series.py index 2d3a9effe6939..dfdbb3c89814a 100644 --- a/pandas/sparse/series.py +++ b/pandas/sparse/series.py @@ -832,6 +832,7 @@ def from_coo(cls, A, dense_index=False): """ return _coo_to_sparse_series(A, dense_index=dense_index) + # overwrite series methods with unaccelerated versions ops.add_special_arithmetic_methods(SparseSeries, use_numexpr=False, **ops.series_special_funcs) diff --git a/pandas/stats/moments.py b/pandas/stats/moments.py index 95b209aee0b0c..914c4c08863a2 100644 --- a/pandas/stats/moments.py +++ b/pandas/stats/moments.py @@ -385,6 +385,7 @@ def ewmstd(arg, com=None, span=None, halflife=None, alpha=None, min_periods=0, bias=bias, func_kw=['bias']) + ewmvol = ewmstd @@ -476,6 +477,7 @@ def f(arg, window, min_periods=None, freq=None, center=False, **kwargs) return f + rolling_max = _rolling_func('max', 'Moving maximum.', how='max') rolling_min = _rolling_func('min', 'Moving minimum.', how='min') rolling_sum = _rolling_func('sum', 'Moving sum.') @@ -683,6 +685,7 @@ def f(arg, min_periods=1, freq=None, **kwargs): **kwargs) return f + expanding_max = _expanding_func('max', 'Expanding maximum.') expanding_min = _expanding_func('min', 'Expanding minimum.') expanding_sum = _expanding_func('sum', 'Expanding sum.') diff --git a/pandas/tests/sparse/test_libsparse.py b/pandas/tests/sparse/test_libsparse.py index 4d5a93d77cf14..0435b732911da 100644 --- a/pandas/tests/sparse/test_libsparse.py +++ b/pandas/tests/sparse/test_libsparse.py @@ -560,8 +560,8 @@ def _check_case(xloc, xlen, yloc, ylen, eloc, elen): check_cases(_check_case) -# too cute? oh but how I abhor code duplication +# too cute? oh but how I abhor code duplication check_ops = ['add', 'sub', 'mul', 'truediv', 'floordiv'] diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py index 28f1dc61533c1..b087ca21d3c25 100644 --- a/pandas/tests/test_generic.py +++ b/pandas/tests/test_generic.py @@ -1588,6 +1588,7 @@ def test_to_xarray(self): # non-convertible self.assertRaises(ValueError, lambda: result.to_pandas()) + # run all the tests, but wrap each in a warning catcher for t in ['test_rename', 'test_rename_axis', 'test_get_numeric_data', 'test_get_default', 'test_nonzero', diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index e82e702cb6e55..ba53d42fccec7 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -53,6 +53,7 @@ def wrapper(*args, **kwargs): return pd.concat(*args, **kwargs) return wrapper + concat = concat_wrap() @@ -66,6 +67,8 @@ def merge(left, right, how='inner', on=None, left_on=None, right_on=None, right_index=right_index, sort=sort, suffixes=suffixes, copy=copy, indicator=indicator) return op.get_result() + + if __debug__: merge.__doc__ = _merge_doc % '\nleft : DataFrame' @@ -264,6 +267,7 @@ def _merger(x, y): result = _merger(left, right) return result + ordered_merge.__doc__ = merge_ordered.__doc__ @@ -1334,6 +1338,7 @@ def _right_outer_join(x, y, max_groups): right_indexer, left_indexer = _join.left_outer_join(y, x, max_groups) return left_indexer, right_indexer + _join_functions = { 'inner': _join.inner_join, 'left': _join.left_outer_join, diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index 0b1ced97d2b81..b2050d7d8d81e 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -149,6 +149,7 @@ def _mpl_ge_2_0_0(): except ImportError: return False + if _mpl_ge_1_5_0(): # Compat with mp 1.5, which uses cycler. import cycler diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index e0c602bf5a037..957a934d13f09 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -660,6 +660,7 @@ def get_standard_freq(freq): warnings.warn(msg, FutureWarning, stacklevel=2) return to_offset(freq).rule_code + # --------------------------------------------------------------------- # Period codes @@ -795,6 +796,7 @@ def infer_freq(index, warn=True): inferer = _FrequencyInferer(index, warn=warn) return inferer.get_freq() + _ONE_MICRO = long(1000) _ONE_MILLI = _ONE_MICRO * 1000 _ONE_SECOND = _ONE_MILLI * 1000 diff --git a/pandas/tseries/holiday.py b/pandas/tseries/holiday.py index 31e40c6bcbb2c..d3d936693c266 100644 --- a/pandas/tseries/holiday.py +++ b/pandas/tseries/holiday.py @@ -286,6 +286,7 @@ def _apply_rule(self, dates): dates += offset return dates + holiday_calendars = {} @@ -461,6 +462,7 @@ def merge(self, other, inplace=False): else: return holidays + USMemorialDay = Holiday('MemorialDay', month=5, day=31, offset=DateOffset(weekday=MO(-1))) USLaborDay = Holiday('Labor Day', month=9, day=1, diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 6cbb696783e09..5f00e8b648689 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -138,6 +138,7 @@ def _ensure_datetime64(other): return other raise TypeError('%s type object %s' % (type(other), str(other))) + _midnight = time(0, 0) diff --git a/pandas/tseries/interval.py b/pandas/tseries/interval.py index 6698c7e924758..22801318a1853 100644 --- a/pandas/tseries/interval.py +++ b/pandas/tseries/interval.py @@ -33,6 +33,3 @@ def __new__(self, starts, ends): def dtype(self): return self.values.dtype - -if __name__ == '__main__': - pass diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 370dd00762896..79227f6de90a5 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -1652,6 +1652,7 @@ class WeekDay(object): SAT = 5 SUN = 6 + _int_to_weekday = { WeekDay.MON: 'MON', WeekDay.TUE: 'TUE', @@ -1924,6 +1925,7 @@ def onOffset(self, dt): modMonth = (dt.month - self.startingMonth) % 3 return BMonthEnd().onOffset(dt) and modMonth == 0 + _int_to_month = tslib._MONTH_ALIASES _month_to_int = dict((v, k) for k, v in _int_to_month.items()) @@ -2799,6 +2801,7 @@ def _delta_to_tick(delta): else: # pragma: no cover return Nano(nanos) + _delta_to_nanoseconds = tslib._delta_to_nanoseconds @@ -2931,6 +2934,7 @@ def generate_range(start=None, end=None, periods=None, raise ValueError('Offset %s did not decrement date' % offset) cur = next_date + prefix_mapping = dict((offset._prefix, offset) for offset in [ YearBegin, # 'AS' YearEnd, # 'A' diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py index 5692d6c5cabde..a6a10c08966d6 100755 --- a/pandas/tseries/resample.py +++ b/pandas/tseries/resample.py @@ -552,6 +552,8 @@ def var(self, ddof=1, *args, **kwargs): """ nv.validate_resampler_func('var', args, kwargs) return self._downsample('var', ddof=ddof) + + Resampler._deprecated_valids += dir(Resampler) # downsample methods @@ -969,6 +971,8 @@ def resample(obj, kind=None, **kwds): """ create a TimeGrouper and return our resampler """ tg = TimeGrouper(**kwds) return tg._get_resampler(obj, kind=kind) + + resample.__doc__ = Resampler.__doc__ diff --git a/pandas/tseries/timedeltas.py b/pandas/tseries/timedeltas.py index 9bf39652a4e00..5a5d1533bfa91 100644 --- a/pandas/tseries/timedeltas.py +++ b/pandas/tseries/timedeltas.py @@ -87,6 +87,7 @@ def to_timedelta(arg, unit='ns', box=True, errors='raise'): return _coerce_scalar_to_timedelta_type(arg, unit=unit, box=box, errors=errors) + _unit_map = { 'Y': 'Y', 'y': 'Y', diff --git a/pandas/types/generic.py b/pandas/types/generic.py index 756fb47596700..e7b54ccc6f25e 100644 --- a/pandas/types/generic.py +++ b/pandas/types/generic.py @@ -57,4 +57,5 @@ class _ABCGeneric(type): def __instancecheck__(cls, inst): return hasattr(inst, "_data") + ABCGeneric = _ABCGeneric("ABCGeneric", tuple(), {}) diff --git a/pandas/util/print_versions.py b/pandas/util/print_versions.py index 7c5148caf7e74..b0f5d3994ed64 100644 --- a/pandas/util/print_versions.py +++ b/pandas/util/print_versions.py @@ -153,5 +153,6 @@ def main(): return 0 + if __name__ == "__main__": sys.exit(main()) diff --git a/pandas/util/terminal.py b/pandas/util/terminal.py index 6b8428ff75806..dadd09ae74ea4 100644 --- a/pandas/util/terminal.py +++ b/pandas/util/terminal.py @@ -115,6 +115,7 @@ def ioctl_GWINSZ(fd): return None return int(cr[1]), int(cr[0]) + if __name__ == "__main__": sizex, sizey = get_terminal_size() print('width = %s height = %s' % (sizex, sizey)) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 566ceec027b2b..cda386781e2ec 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -74,6 +74,7 @@ def reset_testing_mode(): if 'deprecate' in testing_mode: warnings.simplefilter('ignore', _testing_mode_warnings) + set_testing_mode() @@ -1381,6 +1382,7 @@ def assert_panelnd_equal(left, right, for i, item in enumerate(right._get_axis(0)): assert item in left, "non-matching item (left) '%s'" % item + # TODO: strangely check_names fails in py3 ? _panel_frame_equal = partial(assert_frame_equal, check_names=False) assert_panel_equal = partial(assert_panelnd_equal, @@ -2076,6 +2078,7 @@ def dec(f): return wrapper + # skip tests on exceptions with this message _network_error_messages = ( # 'urlopen error timed out', From 86ef3ca3ff7c836c5b7c01eb918201ec7c44c000 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 15 Feb 2017 13:00:36 -0500 Subject: [PATCH 067/353] DOC: use shared_docs for Index.get_indexer, get_indexer_non_unique (#15411) * STYLE: flake8 upgraded to 3.3 on conda fixes for E305, 2 blank lines after a class definition * DOC: use shared_docs for Index.get_indexer, get_indexer_non_unique fix non-populated doc-strings for some methods in Index (take) --- pandas/indexes/base.py | 41 +++++++++++++++++++++++++++++--------- pandas/indexes/category.py | 40 +++++++------------------------------ pandas/indexes/multi.py | 40 ++++++++++--------------------------- pandas/tseries/period.py | 5 +++++ 4 files changed, 55 insertions(+), 71 deletions(-) diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index c483fb0764a4c..e51824e72a2a0 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -65,6 +65,7 @@ _unsortable_types = frozenset(('mixed', 'mixed-integer')) _index_doc_kwargs = dict(klass='Index', inplace='', + target_klass='Index', unique='Index', duplicated='np.ndarray') _index_shared_docs = dict() @@ -1605,7 +1606,7 @@ def _append_same_dtype(self, to_concat, name): numpy.ndarray.take """ - @Appender(_index_shared_docs['take']) + @Appender(_index_shared_docs['take'] % _index_doc_kwargs) def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): nv.validate_take(tuple(), kwargs) @@ -2350,15 +2351,14 @@ def get_level_values(self, level): self._validate_index_level(level) return self - def get_indexer(self, target, method=None, limit=None, tolerance=None): - """ + _index_shared_docs['get_indexer'] = """ Compute indexer and mask for new index given the current index. The indexer should be then used as an input to ndarray.take to align the current data to the new index. Parameters ---------- - target : Index + target : %(target_klass)s method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional * default: exact matches only. * pad / ffill: find the PREVIOUS index value if no exact match. @@ -2387,6 +2387,9 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): positions matches the corresponding target values. Missing values in the target are marked by -1. """ + + @Appender(_index_shared_docs['get_indexer'] % _index_doc_kwargs) + def get_indexer(self, target, method=None, limit=None, tolerance=None): method = missing.clean_reindex_fill_method(method) target = _ensure_index(target) if tolerance is not None: @@ -2496,11 +2499,28 @@ def _filter_indexer_tolerance(self, target, indexer, tolerance): indexer = np.where(distance <= tolerance, indexer, -1) return indexer + _index_shared_docs['get_indexer_non_unique'] = """ + Compute indexer and mask for new index given the current index. The + indexer should be then used as an input to ndarray.take to align the + current data to the new index. + + Parameters + ---------- + target : %(target_klass)s + + Returns + ------- + indexer : ndarray of int + Integers from 0 to n - 1 indicating that the index at these + positions matches the corresponding target values. Missing values + in the target are marked by -1. + missing : ndarray of int + An indexer into the target of the values not found. + These correspond to the -1 in the indexer array + """ + + @Appender(_index_shared_docs['get_indexer_non_unique'] % _index_doc_kwargs) def get_indexer_non_unique(self, target): - """ return an indexer suitable for taking from a non unique index - return the labels in the same order as the target, and - return a missing indexer into the target (missing are marked as -1 - in the indexer); target must be an iterable """ target = _ensure_index(target) pself, ptarget = self._possibly_promote(target) if pself is not self or ptarget is not target: @@ -2516,7 +2536,10 @@ def get_indexer_non_unique(self, target): return Index(indexer), missing def get_indexer_for(self, target, **kwargs): - """ guaranteed return of an indexer even when non-unique """ + """ + guaranteed return of an indexer even when non-unique + This dispatches to get_indexer or get_indexer_nonunique as appropriate + """ if self.is_unique: return self.get_indexer(target, **kwargs) indexer, _ = self.get_indexer_non_unique(target, **kwargs) diff --git a/pandas/indexes/category.py b/pandas/indexes/category.py index e2e0fd056b111..acb2758641a62 100644 --- a/pandas/indexes/category.py +++ b/pandas/indexes/category.py @@ -18,6 +18,8 @@ import pandas.core.base as base import pandas.core.missing as missing import pandas.indexes.base as ibase +_index_doc_kwargs = dict(ibase._index_doc_kwargs) +_index_doc_kwargs.update(dict(target_klass='CategoricalIndex')) class CategoricalIndex(Index, base.PandasDelegate): @@ -289,7 +291,7 @@ def _engine(self): def is_unique(self): return not self.duplicated().any() - @Appender(base._shared_docs['unique'] % ibase._index_doc_kwargs) + @Appender(base._shared_docs['unique'] % _index_doc_kwargs) def unique(self): result = base.IndexOpsMixin.unique(self) # CategoricalIndex._shallow_copy uses keeps original categories @@ -299,7 +301,7 @@ def unique(self): @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', False: 'first'}) - @Appender(base._shared_docs['duplicated'] % ibase._index_doc_kwargs) + @Appender(base._shared_docs['duplicated'] % _index_doc_kwargs) def duplicated(self, keep='first'): from pandas.hashtable import duplicated_int64 codes = self.codes.astype('i8') @@ -425,34 +427,8 @@ def _reindex_non_unique(self, target): return new_target, indexer, new_indexer + @Appender(_index_shared_docs['get_indexer'] % _index_doc_kwargs) def get_indexer(self, target, method=None, limit=None, tolerance=None): - """ - Compute indexer and mask for new index given the current index. The - indexer should be then used as an input to ndarray.take to align the - current data to the new index. The mask determines whether labels are - found or not in the current index - - Parameters - ---------- - target : MultiIndex or Index (of tuples) - method : {'pad', 'ffill', 'backfill', 'bfill'} - pad / ffill: propagate LAST valid observation forward to next valid - backfill / bfill: use NEXT valid observation to fill gap - - Notes - ----- - This is a low-level method and probably should be used at your own risk - - Examples - -------- - >>> indexer, mask = index.get_indexer(new_index) - >>> new_values = cur_values.take(indexer) - >>> new_values[-mask] = np.nan - - Returns - ------- - (indexer, mask) : (ndarray, ndarray) - """ method = missing.clean_reindex_fill_method(method) target = ibase._ensure_index(target) @@ -472,10 +448,8 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): return _ensure_platform_int(indexer) + @Appender(_index_shared_docs['get_indexer_non_unique'] % _index_doc_kwargs) def get_indexer_non_unique(self, target): - """ this is the same for a CategoricalIndex for get_indexer; the API - returns the missing values as well - """ target = ibase._ensure_index(target) if isinstance(target, CategoricalIndex): @@ -497,7 +471,7 @@ def _convert_list_indexer(self, keyarr, kind=None): return None - @Appender(_index_shared_docs['take']) + @Appender(_index_shared_docs['take'] % _index_doc_kwargs) def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): nv.validate_take(tuple(), kwargs) diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index 57739548a17d6..18e1da7303d6d 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -43,6 +43,10 @@ _get_na_value, InvalidIndexError, _index_shared_docs) import pandas.indexes.base as ibase +_index_doc_kwargs = dict(ibase._index_doc_kwargs) +_index_doc_kwargs.update( + dict(klass='MultiIndex', + target_klass='MultiIndex or list of tuples')) class MultiIndex(Index): @@ -755,7 +759,7 @@ def f(k, stringify): @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', False: 'first'}) - @Appender(base._shared_docs['duplicated'] % ibase._index_doc_kwargs) + @Appender(base._shared_docs['duplicated'] % _index_doc_kwargs) def duplicated(self, keep='first'): from pandas.core.sorting import get_group_index from pandas.hashtable import duplicated_int64 @@ -1244,7 +1248,7 @@ def __getitem__(self, key): names=self.names, sortorder=sortorder, verify_integrity=False) - @Appender(_index_shared_docs['take']) + @Appender(_index_shared_docs['take'] % _index_doc_kwargs) def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): nv.validate_take(tuple(), kwargs) @@ -1564,34 +1568,8 @@ def sortlevel(self, level=0, ascending=True, sort_remaining=True): return new_index, indexer + @Appender(_index_shared_docs['get_indexer'] % _index_doc_kwargs) def get_indexer(self, target, method=None, limit=None, tolerance=None): - """ - Compute indexer and mask for new index given the current index. The - indexer should be then used as an input to ndarray.take to align the - current data to the new index. The mask determines whether labels are - found or not in the current index - - Parameters - ---------- - target : MultiIndex or Index (of tuples) - method : {'pad', 'ffill', 'backfill', 'bfill'} - pad / ffill: propagate LAST valid observation forward to next valid - backfill / bfill: use NEXT valid observation to fill gap - - Notes - ----- - This is a low-level method and probably should be used at your own risk - - Examples - -------- - >>> indexer, mask = index.get_indexer(new_index) - >>> new_values = cur_values.take(indexer) - >>> new_values[-mask] = np.nan - - Returns - ------- - (indexer, mask) : (ndarray, ndarray) - """ method = missing.clean_reindex_fill_method(method) target = _ensure_index(target) @@ -1633,6 +1611,10 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): return _ensure_platform_int(indexer) + @Appender(_index_shared_docs['get_indexer_non_unique'] % _index_doc_kwargs) + def get_indexer_non_unique(self, target): + return super(MultiIndex, self).get_indexer_non_unique(target) + def reindex(self, target, method=None, level=None, limit=None, tolerance=None): """ diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 98151d5b6130c..8a6b0c153bb50 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -44,6 +44,10 @@ from pandas.lib import infer_dtype import pandas.tslib as tslib from pandas.compat import zip, u +import pandas.indexes.base as ibase +_index_doc_kwargs = dict(ibase._index_doc_kwargs) +_index_doc_kwargs.update( + dict(target_klass='PeriodIndex or list of Periods')) def _field_accessor(name, alias, docstring=None): @@ -759,6 +763,7 @@ def get_value(self, series, key): return com._maybe_box(self, self._engine.get_value(s, key), series, key) + @Appender(_index_shared_docs['get_indexer'] % _index_doc_kwargs) def get_indexer(self, target, method=None, limit=None, tolerance=None): target = _ensure_index(target) From d6f8b460325fd79faa90858e2743878a7cc74dec Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 15 Feb 2017 15:20:52 -0500 Subject: [PATCH 068/353] BLD: use latest conda version with latest miniconda installer on appveyor change 3.6 build to use numpy=1.12 & add back xlwt (was not on defaults for a while) Author: Jeff Reback Closes #15415 from jreback/appveyor and squashes the following commits: 2019f37 [Jeff Reback] force numpy version f82877b [Jeff Reback] remove extra conda list 3ace9f2 [Jeff Reback] CI: use numpy=1.12 on appveyor 6855a7b [Jeff Reback] BLD: use latest conda version with latest miniconda installer on appveyor --- appveyor.yml | 15 ++++++--------- ci/requirements-3.5-64.run | 2 +- ci/requirements-3.6-64.run | 4 ++-- 3 files changed, 9 insertions(+), 12 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index 42c3be13af809..d96e1dfcf76de 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -18,19 +18,19 @@ environment: matrix: - - CONDA_ROOT: "C:\\Miniconda3.5_64" + - CONDA_ROOT: "C:\\Miniconda3_64" PYTHON_VERSION: "3.6" PYTHON_ARCH: "64" CONDA_PY: "36" - CONDA_NPY: "111" + CONDA_NPY: "112" - - CONDA_ROOT: "C:\\Miniconda3.5_64" + - CONDA_ROOT: "C:\\Miniconda3_64" PYTHON_VERSION: "2.7" PYTHON_ARCH: "64" CONDA_PY: "27" CONDA_NPY: "110" - - CONDA_ROOT: "C:\\Miniconda3.5_64" + - CONDA_ROOT: "C:\\Miniconda3_64" PYTHON_VERSION: "3.5" PYTHON_ARCH: "64" CONDA_PY: "35" @@ -66,8 +66,7 @@ install: # install our build environment - cmd: conda config --set show_channel_urls true --set always_yes true --set changeps1 false - #- cmd: conda update -q conda - - cmd: conda install conda=4.2.15 + - cmd: conda update -q conda - cmd: conda config --set ssl_verify false # add the pandas channel *before* defaults to have defaults take priority @@ -83,7 +82,7 @@ install: - cmd: '%CMD_IN_ENV% conda build ci\appveyor.recipe -q' # create our env - - cmd: conda create -q -n pandas python=%PYTHON_VERSION% nose pytest + - cmd: conda create -q -n pandas python=%PYTHON_VERSION% pytest - cmd: activate pandas - SET REQ=ci\requirements-%PYTHON_VERSION%-%PYTHON_ARCH%.run - cmd: echo "installing requirements from %REQ%" @@ -95,7 +94,5 @@ install: test_script: # tests - cmd: activate pandas - - cmd: conda list - cmd: cd \ - cmd: python -c "import pandas; pandas.test(['--skip-slow', '--skip-network'])" - diff --git a/ci/requirements-3.5-64.run b/ci/requirements-3.5-64.run index 905c2ff3625bd..ad66f578d702a 100644 --- a/ci/requirements-3.5-64.run +++ b/ci/requirements-3.5-64.run @@ -1,6 +1,6 @@ python-dateutil pytz -numpy +numpy=1.11* openpyxl xlsxwriter xlrd diff --git a/ci/requirements-3.6-64.run b/ci/requirements-3.6-64.run index 58ba103504b2c..840d2867e9297 100644 --- a/ci/requirements-3.6-64.run +++ b/ci/requirements-3.6-64.run @@ -1,10 +1,10 @@ python-dateutil pytz -numpy +numpy=1.12* openpyxl xlsxwriter xlrd -#xlwt +xlwt scipy feather-format numexpr From f2246cfa215d01b68aebd2da4afb836d912d248d Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 16 Feb 2017 09:12:16 -0500 Subject: [PATCH 069/353] TST: convert yield based test_pickle.py to parametrized to remove warnings xref #15341 Author: Jeff Reback Closes #15416 from jreback/warn and squashes the following commits: a6af576 [Jeff Reback] TST: convert yield based test_pickle.py to parametrized to remove warnings xref #15341 --- pandas/tests/io/test_pickle.py | 535 +++++++++++++++++---------------- 1 file changed, 277 insertions(+), 258 deletions(-) diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index 5445c506b050c..1e3816c1556f6 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -1,6 +1,17 @@ # pylint: disable=E1101,E1103,W0232 -""" manage legacy pickle tests """ +""" +manage legacy pickle tests + +How to add pickle tests: + +1. Install pandas version intended to output the pickle. + +2. Execute "generate_legacy_storage_files.py" to create the pickle. +$ python generate_legacy_storage_files.py pickle + +3. Move the created pickle to "data/legacy_pickle/" directory. +""" import pytest import os @@ -9,277 +20,285 @@ import pandas as pd from pandas import Index -from pandas.compat import u, is_platform_little_endian +from pandas.compat import is_platform_little_endian import pandas import pandas.util.testing as tm from pandas.tseries.offsets import Day, MonthEnd -class TestPickle(): - """ - How to add pickle tests: +@pytest.fixture(scope='module') +def current_pickle_data(): + # our current version pickle data + from pandas.tests.io.generate_legacy_storage_files import ( + create_pickle_data) + return create_pickle_data() + + +# --------------------- +# comparision functions +# --------------------- +def compare_element(result, expected, typ, version=None): + if isinstance(expected, Index): + tm.assert_index_equal(expected, result) + return + + if typ.startswith('sp_'): + comparator = getattr(tm, "assert_%s_equal" % typ) + comparator(result, expected, exact_indices=False) + elif typ == 'timestamp': + if expected is pd.NaT: + assert result is pd.NaT + else: + tm.assert_equal(result, expected) + tm.assert_equal(result.freq, expected.freq) + else: + comparator = getattr(tm, "assert_%s_equal" % + typ, tm.assert_almost_equal) + comparator(result, expected) + + +def compare(data, vf, version): + + # py3 compat when reading py2 pickle + try: + data = pandas.read_pickle(vf) + except (ValueError) as e: + if 'unsupported pickle protocol:' in str(e): + # trying to read a py3 pickle in py2 + return + else: + raise + + m = globals() + for typ, dv in data.items(): + for dt, result in dv.items(): + try: + expected = data[typ][dt] + except (KeyError): + if version in ('0.10.1', '0.11.0') and dt == 'reg': + break + else: + raise + + # use a specific comparator + # if available + comparator = "compare_{typ}_{dt}".format(typ=typ, dt=dt) + + comparator = m.get(comparator, m['compare_element']) + comparator(result, expected, typ, version) + return data + + +def compare_sp_series_ts(res, exp, typ, version): + # SparseTimeSeries integrated into SparseSeries in 0.12.0 + # and deprecated in 0.17.0 + if version and LooseVersion(version) <= "0.12.0": + tm.assert_sp_series_equal(res, exp, check_series_type=False) + else: + tm.assert_sp_series_equal(res, exp) + + +def compare_series_ts(result, expected, typ, version): + # GH 7748 + tm.assert_series_equal(result, expected) + tm.assert_equal(result.index.freq, expected.index.freq) + tm.assert_equal(result.index.freq.normalize, False) + tm.assert_series_equal(result > 0, expected > 0) + + # GH 9291 + freq = result.index.freq + tm.assert_equal(freq + Day(1), Day(2)) + + res = freq + pandas.Timedelta(hours=1) + tm.assert_equal(isinstance(res, pandas.Timedelta), True) + tm.assert_equal(res, pandas.Timedelta(days=1, hours=1)) + + res = freq + pandas.Timedelta(nanoseconds=1) + tm.assert_equal(isinstance(res, pandas.Timedelta), True) + tm.assert_equal(res, pandas.Timedelta(days=1, nanoseconds=1)) + + +def compare_series_dt_tz(result, expected, typ, version): + # 8260 + # dtype is object < 0.17.0 + if LooseVersion(version) < '0.17.0': + expected = expected.astype(object) + tm.assert_series_equal(result, expected) + else: + tm.assert_series_equal(result, expected) - 1. Install pandas version intended to output the pickle. - 2. Execute "generate_legacy_storage_files.py" to create the pickle. - $ python generate_legacy_storage_files.py pickle +def compare_series_cat(result, expected, typ, version): + # Categorical dtype is added in 0.15.0 + # ordered is changed in 0.16.0 + if LooseVersion(version) < '0.15.0': + tm.assert_series_equal(result, expected, check_dtype=False, + check_categorical=False) + elif LooseVersion(version) < '0.16.0': + tm.assert_series_equal(result, expected, check_categorical=False) + else: + tm.assert_series_equal(result, expected) - 3. Move the created pickle to "data/legacy_pickle/" directory. - NOTE: TestPickle can't be a subclass of tm.Testcase to use test generator. - http://stackoverflow.com/questions/6689537/ - nose-test-generators-inside-class - """ +def compare_frame_dt_mixed_tzs(result, expected, typ, version): + # 8260 + # dtype is object < 0.17.0 + if LooseVersion(version) < '0.17.0': + expected = expected.astype(object) + tm.assert_frame_equal(result, expected) + else: + tm.assert_frame_equal(result, expected) - @classmethod - def setup_class(cls): - from pandas.tests.io.generate_legacy_storage_files import ( - create_pickle_data) - cls.data = create_pickle_data() - cls.path = u('__%s__.pickle' % tm.rands(10)) - def compare_element(self, result, expected, typ, version=None): - if isinstance(expected, Index): - tm.assert_index_equal(expected, result) - return +def compare_frame_cat_onecol(result, expected, typ, version): + # Categorical dtype is added in 0.15.0 + # ordered is changed in 0.16.0 + if LooseVersion(version) < '0.15.0': + tm.assert_frame_equal(result, expected, check_dtype=False, + check_categorical=False) + elif LooseVersion(version) < '0.16.0': + tm.assert_frame_equal(result, expected, check_categorical=False) + else: + tm.assert_frame_equal(result, expected) - if typ.startswith('sp_'): - comparator = getattr(tm, "assert_%s_equal" % typ) - comparator(result, expected, exact_indices=False) - elif typ == 'timestamp': - if expected is pd.NaT: - assert result is pd.NaT - else: - tm.assert_equal(result, expected) - tm.assert_equal(result.freq, expected.freq) - else: - comparator = getattr(tm, "assert_%s_equal" % - typ, tm.assert_almost_equal) - comparator(result, expected) - - def compare(self, vf, version): - - # py3 compat when reading py2 pickle - try: - data = pandas.read_pickle(vf) - except (ValueError) as e: - if 'unsupported pickle protocol:' in str(e): - # trying to read a py3 pickle in py2 - return - else: - raise - - for typ, dv in data.items(): - for dt, result in dv.items(): - try: - expected = self.data[typ][dt] - except (KeyError): - if version in ('0.10.1', '0.11.0') and dt == 'reg': - break - else: - raise - - # use a specific comparator - # if available - comparator = "compare_{typ}_{dt}".format(typ=typ, dt=dt) - comparator = getattr(self, comparator, self.compare_element) - comparator(result, expected, typ, version) - return data - - def compare_sp_series_ts(self, res, exp, typ, version): - # SparseTimeSeries integrated into SparseSeries in 0.12.0 - # and deprecated in 0.17.0 - if version and LooseVersion(version) <= "0.12.0": - tm.assert_sp_series_equal(res, exp, check_series_type=False) - else: - tm.assert_sp_series_equal(res, exp) - def compare_series_ts(self, result, expected, typ, version): - # GH 7748 - tm.assert_series_equal(result, expected) - tm.assert_equal(result.index.freq, expected.index.freq) - tm.assert_equal(result.index.freq.normalize, False) - tm.assert_series_equal(result > 0, expected > 0) - - # GH 9291 - freq = result.index.freq - tm.assert_equal(freq + Day(1), Day(2)) - - res = freq + pandas.Timedelta(hours=1) - tm.assert_equal(isinstance(res, pandas.Timedelta), True) - tm.assert_equal(res, pandas.Timedelta(days=1, hours=1)) - - res = freq + pandas.Timedelta(nanoseconds=1) - tm.assert_equal(isinstance(res, pandas.Timedelta), True) - tm.assert_equal(res, pandas.Timedelta(days=1, nanoseconds=1)) - - def compare_series_dt_tz(self, result, expected, typ, version): - # 8260 - # dtype is object < 0.17.0 - if LooseVersion(version) < '0.17.0': - expected = expected.astype(object) - tm.assert_series_equal(result, expected) - else: - tm.assert_series_equal(result, expected) - - def compare_series_cat(self, result, expected, typ, version): - # Categorical dtype is added in 0.15.0 - # ordered is changed in 0.16.0 - if LooseVersion(version) < '0.15.0': - tm.assert_series_equal(result, expected, check_dtype=False, - check_categorical=False) - elif LooseVersion(version) < '0.16.0': - tm.assert_series_equal(result, expected, check_categorical=False) - else: - tm.assert_series_equal(result, expected) - - def compare_frame_dt_mixed_tzs(self, result, expected, typ, version): - # 8260 - # dtype is object < 0.17.0 - if LooseVersion(version) < '0.17.0': - expected = expected.astype(object) - tm.assert_frame_equal(result, expected) - else: - tm.assert_frame_equal(result, expected) - - def compare_frame_cat_onecol(self, result, expected, typ, version): - # Categorical dtype is added in 0.15.0 - # ordered is changed in 0.16.0 - if LooseVersion(version) < '0.15.0': - tm.assert_frame_equal(result, expected, check_dtype=False, - check_categorical=False) - elif LooseVersion(version) < '0.16.0': - tm.assert_frame_equal(result, expected, check_categorical=False) - else: - tm.assert_frame_equal(result, expected) - - def compare_frame_cat_and_float(self, result, expected, typ, version): - self.compare_frame_cat_onecol(result, expected, typ, version) - - def compare_index_period(self, result, expected, typ, version): - tm.assert_index_equal(result, expected) - tm.assertIsInstance(result.freq, MonthEnd) - tm.assert_equal(result.freq, MonthEnd()) - tm.assert_equal(result.freqstr, 'M') - tm.assert_index_equal(result.shift(2), expected.shift(2)) - - def compare_sp_frame_float(self, result, expected, typ, version): - if LooseVersion(version) <= '0.18.1': - tm.assert_sp_frame_equal(result, expected, exact_indices=False, - check_dtype=False) - else: - tm.assert_sp_frame_equal(result, expected) - - def read_pickles(self, version): - if not is_platform_little_endian(): - pytest.skip("known failure on non-little endian") - - pth = tm.get_data_path('legacy_pickle/{0}'.format(str(version))) - n = 0 - for f in os.listdir(pth): - vf = os.path.join(pth, f) - data = self.compare(vf, version) - - if data is None: - continue - n += 1 - assert n > 0, 'Pickle files are not tested' - - def test_pickles(self): - pickle_path = tm.get_data_path('legacy_pickle') - n = 0 - for v in os.listdir(pickle_path): - pth = os.path.join(pickle_path, v) - if os.path.isdir(pth): - yield self.read_pickles, v - n += 1 - assert n > 0, 'Pickle files are not tested' - - def test_round_trip_current(self): - - try: - import cPickle as c_pickle - - def c_pickler(obj, path): - with open(path, 'wb') as fh: - c_pickle.dump(obj, fh, protocol=-1) - - def c_unpickler(path): - with open(path, 'rb') as fh: - fh.seek(0) - return c_pickle.load(fh) - except: - c_pickler = None - c_unpickler = None - - import pickle as python_pickle - - def python_pickler(obj, path): +def compare_frame_cat_and_float(result, expected, typ, version): + compare_frame_cat_onecol(result, expected, typ, version) + + +def compare_index_period(result, expected, typ, version): + tm.assert_index_equal(result, expected) + tm.assertIsInstance(result.freq, MonthEnd) + tm.assert_equal(result.freq, MonthEnd()) + tm.assert_equal(result.freqstr, 'M') + tm.assert_index_equal(result.shift(2), expected.shift(2)) + + +def compare_sp_frame_float(result, expected, typ, version): + if LooseVersion(version) <= '0.18.1': + tm.assert_sp_frame_equal(result, expected, exact_indices=False, + check_dtype=False) + else: + tm.assert_sp_frame_equal(result, expected) + + +# --------------------- +# tests +# --------------------- +def legacy_pickle_versions(): + # yield the pickle versions + pickle_path = tm.get_data_path('legacy_pickle') + for v in os.listdir(pickle_path): + pth = os.path.join(pickle_path, v) + if os.path.isdir(pth): + yield v + + +@pytest.mark.parametrize('version', legacy_pickle_versions()) +def test_pickles(current_pickle_data, version): + if not is_platform_little_endian(): + pytest.skip("known failure on non-little endian") + + pth = tm.get_data_path('legacy_pickle/{0}'.format(version)) + n = 0 + for f in os.listdir(pth): + vf = os.path.join(pth, f) + data = compare(current_pickle_data, vf, version) + + if data is None: + continue + n += 1 + assert n > 0, 'Pickle files are not tested' + + +def test_round_trip_current(current_pickle_data): + + try: + import cPickle as c_pickle + + def c_pickler(obj, path): with open(path, 'wb') as fh: - python_pickle.dump(obj, fh, protocol=-1) + c_pickle.dump(obj, fh, protocol=-1) - def python_unpickler(path): + def c_unpickler(path): with open(path, 'rb') as fh: fh.seek(0) - return python_pickle.load(fh) - - for typ, dv in self.data.items(): - for dt, expected in dv.items(): - - for writer in [pd.to_pickle, c_pickler, python_pickler]: - if writer is None: - continue - - with tm.ensure_clean(self.path) as path: - - # test writing with each pickler - writer(expected, path) - - # test reading with each unpickler - result = pd.read_pickle(path) - self.compare_element(result, expected, typ) - - if c_unpickler is not None: - result = c_unpickler(path) - self.compare_element(result, expected, typ) - - result = python_unpickler(path) - self.compare_element(result, expected, typ) - - def test_pickle_v0_14_1(self): - - # we have the name warning - # 10482 - with tm.assert_produces_warning(UserWarning): - cat = pd.Categorical(values=['a', 'b', 'c'], - categories=['a', 'b', 'c', 'd'], - name='foobar', ordered=False) - pickle_path = os.path.join(tm.get_data_path(), - 'categorical_0_14_1.pickle') - # This code was executed once on v0.14.1 to generate the pickle: - # - # cat = Categorical(labels=np.arange(3), levels=['a', 'b', 'c', 'd'], - # name='foobar') - # with open(pickle_path, 'wb') as f: pickle.dump(cat, f) - # - tm.assert_categorical_equal(cat, pd.read_pickle(pickle_path)) - - def test_pickle_v0_15_2(self): - # ordered -> _ordered - # GH 9347 - - # we have the name warning - # 10482 - with tm.assert_produces_warning(UserWarning): - cat = pd.Categorical(values=['a', 'b', 'c'], - categories=['a', 'b', 'c', 'd'], - name='foobar', ordered=False) - pickle_path = os.path.join(tm.get_data_path(), - 'categorical_0_15_2.pickle') - # This code was executed once on v0.15.2 to generate the pickle: - # - # cat = Categorical(labels=np.arange(3), levels=['a', 'b', 'c', 'd'], - # name='foobar') - # with open(pickle_path, 'wb') as f: pickle.dump(cat, f) - # - tm.assert_categorical_equal(cat, pd.read_pickle(pickle_path)) + return c_pickle.load(fh) + except: + c_pickler = None + c_unpickler = None + + import pickle as python_pickle + + def python_pickler(obj, path): + with open(path, 'wb') as fh: + python_pickle.dump(obj, fh, protocol=-1) + + def python_unpickler(path): + with open(path, 'rb') as fh: + fh.seek(0) + return python_pickle.load(fh) + + data = current_pickle_data + for typ, dv in data.items(): + for dt, expected in dv.items(): + + for writer in [pd.to_pickle, c_pickler, python_pickler]: + if writer is None: + continue + + with tm.ensure_clean() as path: + + # test writing with each pickler + writer(expected, path) + + # test reading with each unpickler + result = pd.read_pickle(path) + compare_element(result, expected, typ) + + if c_unpickler is not None: + result = c_unpickler(path) + compare_element(result, expected, typ) + + result = python_unpickler(path) + compare_element(result, expected, typ) + + +def test_pickle_v0_14_1(): + + # we have the name warning + # 10482 + with tm.assert_produces_warning(UserWarning): + cat = pd.Categorical(values=['a', 'b', 'c'], + categories=['a', 'b', 'c', 'd'], + name='foobar', ordered=False) + pickle_path = os.path.join(tm.get_data_path(), + 'categorical_0_14_1.pickle') + # This code was executed once on v0.14.1 to generate the pickle: + # + # cat = Categorical(labels=np.arange(3), levels=['a', 'b', 'c', 'd'], + # name='foobar') + # with open(pickle_path, 'wb') as f: pickle.dump(cat, f) + # + tm.assert_categorical_equal(cat, pd.read_pickle(pickle_path)) + + +def test_pickle_v0_15_2(): + # ordered -> _ordered + # GH 9347 + + # we have the name warning + # 10482 + with tm.assert_produces_warning(UserWarning): + cat = pd.Categorical(values=['a', 'b', 'c'], + categories=['a', 'b', 'c', 'd'], + name='foobar', ordered=False) + pickle_path = os.path.join(tm.get_data_path(), + 'categorical_0_15_2.pickle') + # This code was executed once on v0.15.2 to generate the pickle: + # + # cat = Categorical(labels=np.arange(3), levels=['a', 'b', 'c', 'd'], + # name='foobar') + # with open(pickle_path, 'wb') as f: pickle.dump(cat, f) + # + tm.assert_categorical_equal(cat, pd.read_pickle(pickle_path)) From ddb22f578b7c7147fd8bcd9fb7c8504a8053e313 Mon Sep 17 00:00:00 2001 From: Elliott Sales de Andrade Date: Thu, 16 Feb 2017 09:13:42 -0500 Subject: [PATCH 070/353] TST: Parametrize simple yield tests xref #15341 Author: Elliott Sales de Andrade Closes #15406 from QuLogic/pytest-simple-yield and squashes the following commits: b002752 [Elliott Sales de Andrade] TST: Set PYTHONHASHSEED so xdist doesn't break. 8368772 [Elliott Sales de Andrade] TST: Use fixtures for engine/parser where possible. c6cd346 [Elliott Sales de Andrade] TST: Parametrize remaining simple yield tests. 47bf1a1 [Elliott Sales de Andrade] TST: Replace ENGINES_PARSERS by parametrize. --- ci/script_multi.sh | 6 + pandas/tests/computation/test_compat.py | 11 +- pandas/tests/computation/test_eval.py | 233 ++++++------------------ pandas/tests/io/parser/test_network.py | 26 +-- pandas/util/testing.py | 15 +- 5 files changed, 92 insertions(+), 199 deletions(-) diff --git a/ci/script_multi.sh b/ci/script_multi.sh index f5fbcbbc12f83..41f71fd21f63f 100755 --- a/ci/script_multi.sh +++ b/ci/script_multi.sh @@ -17,6 +17,12 @@ if [ -n "$LOCALE_OVERRIDE" ]; then python -c "$pycmd" fi +# Workaround for pytest-xdist flaky collection order +# https://github.com/pytest-dev/pytest/issues/920 +# https://github.com/pytest-dev/pytest/issues/1075 +export PYTHONHASHSEED=$(python -c 'import random; print(random.randint(1, 4294967295))') +echo PYTHONHASHSEED=$PYTHONHASHSEED + if [ "$BUILD_TEST" ]; then echo "We are not running pytest as this is simply a build test." elif [ "$COVERAGE" ]; then diff --git a/pandas/tests/computation/test_compat.py b/pandas/tests/computation/test_compat.py index 77994ac6d2f53..59bdde83aedd8 100644 --- a/pandas/tests/computation/test_compat.py +++ b/pandas/tests/computation/test_compat.py @@ -12,8 +12,6 @@ import pandas.computation.expr as expr from pandas.computation import _MIN_NUMEXPR_VERSION -ENGINES_PARSERS = list(product(_engines, expr._parsers)) - def test_compat(): # test we have compat with our version of nu @@ -30,12 +28,9 @@ def test_compat(): pytest.skip("not testing numexpr version compat") -def test_invalid_numexpr_version(): - for engine, parser in ENGINES_PARSERS: - yield check_invalid_numexpr_version, engine, parser - - -def check_invalid_numexpr_version(engine, parser): +@pytest.mark.parametrize('engine', _engines) +@pytest.mark.parametrize('parser', expr._parsers) +def test_invalid_numexpr_version(engine, parser): def testit(): a, b = 1, 2 res = pd.eval('a + b', engine=engine, parser=parser) diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index ada714c8ac52e..b42f79fe5009b 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -20,6 +20,7 @@ from pandas.computation import pytables from pandas.computation.engines import _engines, NumExprClobberingError from pandas.computation.expr import PythonExprVisitor, PandasExprVisitor +from pandas.computation.expressions import _USE_NUMEXPR, _NUMEXPR_INSTALLED from pandas.computation.ops import (_binary_ops_dict, _special_case_arith_ops_syms, _arith_ops_syms, _bool_ops_syms, @@ -38,6 +39,23 @@ _scalar_skip = 'in', 'not in' +@pytest.fixture(params=( + pytest.mark.skipif(engine == 'numexpr' and not _USE_NUMEXPR, + reason='numexpr enabled->{enabled}, ' + 'installed->{installed}'.format( + enabled=_USE_NUMEXPR, + installed=_NUMEXPR_INSTALLED))(engine) + for engine in _engines +)) +def engine(request): + return request.param + + +@pytest.fixture(params=expr._parsers) +def parser(request): + return request.param + + def engine_has_neg_frac(engine): return _engines[engine].has_neg_frac @@ -774,17 +792,17 @@ def check_chained_cmp_op(self, lhs, cmp1, mid, cmp2, rhs): f = lambda *args, **kwargs: np.random.randn() -ENGINES_PARSERS = list(product(_engines, expr._parsers)) - #------------------------------------- # typecasting rules consistency with python # issue #12388 class TestTypeCasting(object): - - def check_binop_typecasting(self, engine, parser, op, dt): - tm.skip_if_no_ne(engine) + @pytest.mark.parametrize('op', ['+', '-', '*', '**', '/']) + # maybe someday... numexpr has too many upcasting rules now + # chain(*(np.sctypes[x] for x in ['uint', 'int', 'float'])) + @pytest.mark.parametrize('dt', [np.float32, np.float64]) + def test_binop_typecasting(self, engine, parser, op, dt): df = mkdf(5, 3, data_gen_f=f, dtype=dt) s = 'df {} 3'.format(op) res = pd.eval(s, engine=engine, parser=parser) @@ -798,15 +816,6 @@ def check_binop_typecasting(self, engine, parser, op, dt): assert res.values.dtype == dt assert_frame_equal(res, eval(s)) - def test_binop_typecasting(self): - for engine, parser in ENGINES_PARSERS: - for op in ['+', '-', '*', '**', '/']: - # maybe someday... numexpr has too many upcasting rules now - # for dt in chain(*(np.sctypes[x] for x in ['uint', 'int', - # 'float'])): - for dt in [np.float32, np.float64]: - yield self.check_binop_typecasting, engine, parser, op, dt - #------------------------------------- # basic and complex alignment @@ -826,19 +835,13 @@ class TestAlignment(object): index_types = 'i', 'u', 'dt' lhs_index_types = index_types + ('s',) # 'p' - def check_align_nested_unary_op(self, engine, parser): - tm.skip_if_no_ne(engine) + def test_align_nested_unary_op(self, engine, parser): s = 'df * ~2' df = mkdf(5, 3, data_gen_f=f) res = pd.eval(s, engine=engine, parser=parser) assert_frame_equal(res, df * ~2) - def test_align_nested_unary_op(self): - for engine, parser in ENGINES_PARSERS: - yield self.check_align_nested_unary_op, engine, parser - - def check_basic_frame_alignment(self, engine, parser): - tm.skip_if_no_ne(engine) + def test_basic_frame_alignment(self, engine, parser): args = product(self.lhs_index_types, self.index_types, self.index_types) with warnings.catch_warnings(record=True): @@ -856,12 +859,7 @@ def check_basic_frame_alignment(self, engine, parser): res = pd.eval('df + df2', engine=engine, parser=parser) assert_frame_equal(res, df + df2) - def test_basic_frame_alignment(self): - for engine, parser in ENGINES_PARSERS: - yield self.check_basic_frame_alignment, engine, parser - - def check_frame_comparison(self, engine, parser): - tm.skip_if_no_ne(engine) + def test_frame_comparison(self, engine, parser): args = product(self.lhs_index_types, repeat=2) for r_idx_type, c_idx_type in args: df = mkdf(10, 10, data_gen_f=f, r_idx_type=r_idx_type, @@ -874,12 +872,8 @@ def check_frame_comparison(self, engine, parser): res = pd.eval('df < df3', engine=engine, parser=parser) assert_frame_equal(res, df < df3) - def test_frame_comparison(self): - for engine, parser in ENGINES_PARSERS: - yield self.check_frame_comparison, engine, parser - - def check_medium_complex_frame_alignment(self, engine, parser): - tm.skip_if_no_ne(engine) + @slow + def test_medium_complex_frame_alignment(self, engine, parser): args = product(self.lhs_index_types, self.index_types, self.index_types, self.index_types) @@ -899,14 +893,7 @@ def check_medium_complex_frame_alignment(self, engine, parser): engine=engine, parser=parser) assert_frame_equal(res, df + df2 + df3) - @slow - def test_medium_complex_frame_alignment(self): - for engine, parser in ENGINES_PARSERS: - yield self.check_medium_complex_frame_alignment, engine, parser - - def check_basic_frame_series_alignment(self, engine, parser): - tm.skip_if_no_ne(engine) - + def test_basic_frame_series_alignment(self, engine, parser): def testit(r_idx_type, c_idx_type, index_name): df = mkdf(10, 10, data_gen_f=f, r_idx_type=r_idx_type, c_idx_type=c_idx_type) @@ -932,13 +919,7 @@ def testit(r_idx_type, c_idx_type, index_name): for r_idx_type, c_idx_type, index_name in args: testit(r_idx_type, c_idx_type, index_name) - def test_basic_frame_series_alignment(self): - for engine, parser in ENGINES_PARSERS: - yield self.check_basic_frame_series_alignment, engine, parser - - def check_basic_series_frame_alignment(self, engine, parser): - tm.skip_if_no_ne(engine) - + def test_basic_series_frame_alignment(self, engine, parser): def testit(r_idx_type, c_idx_type, index_name): df = mkdf(10, 7, data_gen_f=f, r_idx_type=r_idx_type, c_idx_type=c_idx_type) @@ -968,12 +949,7 @@ def testit(r_idx_type, c_idx_type, index_name): for r_idx_type, c_idx_type, index_name in args: testit(r_idx_type, c_idx_type, index_name) - def test_basic_series_frame_alignment(self): - for engine, parser in ENGINES_PARSERS: - yield self.check_basic_series_frame_alignment, engine, parser - - def check_series_frame_commutativity(self, engine, parser): - tm.skip_if_no_ne(engine) + def test_series_frame_commutativity(self, engine, parser): args = product(self.lhs_index_types, self.index_types, ('+', '*'), ('index', 'columns')) @@ -1000,13 +976,8 @@ def check_series_frame_commutativity(self, engine, parser): if engine == 'numexpr': assert_frame_equal(a, b) - def test_series_frame_commutativity(self): - for engine, parser in ENGINES_PARSERS: - yield self.check_series_frame_commutativity, engine, parser - - def check_complex_series_frame_alignment(self, engine, parser): - tm.skip_if_no_ne(engine) - + @slow + def test_complex_series_frame_alignment(self, engine, parser): import random args = product(self.lhs_index_types, self.index_types, self.index_types, self.index_types) @@ -1050,13 +1021,7 @@ def check_complex_series_frame_alignment(self, engine, parser): tm.assert_equal(res.shape, expected.shape) assert_frame_equal(res, expected) - @slow - def test_complex_series_frame_alignment(self): - for engine, parser in ENGINES_PARSERS: - yield self.check_complex_series_frame_alignment, engine, parser - - def check_performance_warning_for_poor_alignment(self, engine, parser): - tm.skip_if_no_ne(engine) + def test_performance_warning_for_poor_alignment(self, engine, parser): df = DataFrame(randn(1000, 10)) s = Series(randn(10000)) if engine == 'numexpr': @@ -1098,11 +1063,6 @@ def check_performance_warning_for_poor_alignment(self, engine, parser): "".format(1, 'df', np.log10(s.size - df.shape[1]))) tm.assert_equal(msg, expected) - def test_performance_warning_for_poor_alignment(self): - for engine, parser in ENGINES_PARSERS: - yield (self.check_performance_warning_for_poor_alignment, engine, - parser) - #------------------------------------ # slightly more complex ops @@ -1762,18 +1722,12 @@ def setUpClass(cls): class TestScope(object): - def check_global_scope(self, e, engine, parser): - tm.skip_if_no_ne(engine) + def test_global_scope(self, engine, parser): + e = '_var_s * 2' tm.assert_numpy_array_equal(_var_s * 2, pd.eval(e, engine=engine, parser=parser)) - def test_global_scope(self): - e = '_var_s * 2' - for engine, parser in product(_engines, expr._parsers): - yield self.check_global_scope, e, engine, parser - - def check_no_new_locals(self, engine, parser): - tm.skip_if_no_ne(engine) + def test_no_new_locals(self, engine, parser): x = 1 lcls = locals().copy() pd.eval('x + 1', local_dict=lcls, engine=engine, parser=parser) @@ -1781,22 +1735,13 @@ def check_no_new_locals(self, engine, parser): lcls2.pop('lcls') tm.assert_equal(lcls, lcls2) - def test_no_new_locals(self): - for engine, parser in product(_engines, expr._parsers): - yield self.check_no_new_locals, engine, parser - - def check_no_new_globals(self, engine, parser): - tm.skip_if_no_ne(engine) + def test_no_new_globals(self, engine, parser): x = 1 gbls = globals().copy() pd.eval('x + 1', engine=engine, parser=parser) gbls2 = globals().copy() tm.assert_equal(gbls, gbls2) - def test_no_new_globals(self): - for engine, parser in product(_engines, expr._parsers): - yield self.check_no_new_globals, engine, parser - def test_invalid_engine(): tm.skip_if_no_ne() @@ -1816,7 +1761,9 @@ def test_invalid_parser(): 'pandas': PandasExprVisitor} -def check_disallowed_nodes(engine, parser): +@pytest.mark.parametrize('engine', _parsers) +@pytest.mark.parametrize('parser', _parsers) +def test_disallowed_nodes(engine, parser): tm.skip_if_no_ne(engine) VisitorClass = _parsers[parser] uns_ops = VisitorClass.unsupported_nodes @@ -1827,38 +1774,19 @@ def check_disallowed_nodes(engine, parser): getattr(inst, ops)() -def test_disallowed_nodes(): - for engine, visitor in product(_parsers, repeat=2): - yield check_disallowed_nodes, engine, visitor - - -def check_syntax_error_exprs(engine, parser): - tm.skip_if_no_ne(engine) +def test_syntax_error_exprs(engine, parser): e = 's +' with pytest.raises(SyntaxError): pd.eval(e, engine=engine, parser=parser) -def test_syntax_error_exprs(): - for engine, parser in ENGINES_PARSERS: - yield check_syntax_error_exprs, engine, parser - - -def check_name_error_exprs(engine, parser): - tm.skip_if_no_ne(engine) +def test_name_error_exprs(engine, parser): e = 's + t' with tm.assertRaises(NameError): pd.eval(e, engine=engine, parser=parser) -def test_name_error_exprs(): - for engine, parser in ENGINES_PARSERS: - yield check_name_error_exprs, engine, parser - - -def check_invalid_local_variable_reference(engine, parser): - tm.skip_if_no_ne(engine) - +def test_invalid_local_variable_reference(engine, parser): a, b = 1, 2 exprs = 'a + @b', '@a + b', '@a + @b' for expr in exprs: @@ -1870,13 +1798,7 @@ def check_invalid_local_variable_reference(engine, parser): pd.eval(exprs, engine=engine, parser=parser) -def test_invalid_local_variable_reference(): - for engine, parser in ENGINES_PARSERS: - yield check_invalid_local_variable_reference, engine, parser - - -def check_numexpr_builtin_raises(engine, parser): - tm.skip_if_no_ne(engine) +def test_numexpr_builtin_raises(engine, parser): sin, dotted_line = 1, 2 if engine == 'numexpr': with tm.assertRaisesRegexp(NumExprClobberingError, @@ -1887,51 +1809,35 @@ def check_numexpr_builtin_raises(engine, parser): tm.assert_equal(res, sin + dotted_line) -def test_numexpr_builtin_raises(): - for engine, parser in ENGINES_PARSERS: - yield check_numexpr_builtin_raises, engine, parser - - -def check_bad_resolver_raises(engine, parser): - tm.skip_if_no_ne(engine) +def test_bad_resolver_raises(engine, parser): cannot_resolve = 42, 3.0 with tm.assertRaisesRegexp(TypeError, 'Resolver of type .+'): pd.eval('1 + 2', resolvers=cannot_resolve, engine=engine, parser=parser) -def test_bad_resolver_raises(): - for engine, parser in ENGINES_PARSERS: - yield check_bad_resolver_raises, engine, parser - - -def check_empty_string_raises(engine, parser): +def test_empty_string_raises(engine, parser): # GH 13139 - tm.skip_if_no_ne(engine) with tm.assertRaisesRegexp(ValueError, 'expr cannot be an empty string'): pd.eval('', engine=engine, parser=parser) -def test_empty_string_raises(): - for engine, parser in ENGINES_PARSERS: - yield check_empty_string_raises, engine, parser - - -def check_more_than_one_expression_raises(engine, parser): - tm.skip_if_no_ne(engine) +def test_more_than_one_expression_raises(engine, parser): with tm.assertRaisesRegexp(SyntaxError, 'only a single expression is allowed'): pd.eval('1 + 1; 2 + 2', engine=engine, parser=parser) -def test_more_than_one_expression_raises(): - for engine, parser in ENGINES_PARSERS: - yield check_more_than_one_expression_raises, engine, parser +@pytest.mark.parametrize('cmp', ('and', 'or')) +@pytest.mark.parametrize('lhs', (int, float)) +@pytest.mark.parametrize('rhs', (int, float)) +def test_bool_ops_fails_on_scalars(lhs, cmp, rhs, engine, parser): + gen = {int: lambda: np.random.randint(10), float: np.random.randn} + mid = gen[lhs]() + lhs = gen[lhs]() + rhs = gen[rhs]() -def check_bool_ops_fails_on_scalars(gen, lhs, cmp, rhs, engine, parser): - tm.skip_if_no_ne(engine) - mid = gen[type(lhs)]() ex1 = 'lhs {0} mid {1} rhs'.format(cmp, cmp) ex2 = 'lhs {0} mid and mid {1} rhs'.format(cmp, cmp) ex3 = '(lhs {0} mid) & (mid {1} rhs)'.format(cmp, cmp) @@ -1940,32 +1846,14 @@ def check_bool_ops_fails_on_scalars(gen, lhs, cmp, rhs, engine, parser): pd.eval(ex, engine=engine, parser=parser) -def test_bool_ops_fails_on_scalars(): - _bool_ops_syms = 'and', 'or' - dtypes = int, float - gen = {int: lambda: np.random.randint(10), float: np.random.randn} - for engine, parser, dtype1, cmp, dtype2 in product(_engines, expr._parsers, - dtypes, _bool_ops_syms, - dtypes): - yield (check_bool_ops_fails_on_scalars, gen, gen[dtype1](), cmp, - gen[dtype2](), engine, parser) - - -def check_inf(engine, parser): - tm.skip_if_no_ne(engine) +def test_inf(engine, parser): s = 'inf + 1' expected = np.inf result = pd.eval(s, engine=engine, parser=parser) tm.assert_equal(result, expected) -def test_inf(): - for engine, parser in ENGINES_PARSERS: - yield check_inf, engine, parser - - -def check_negate_lt_eq_le(engine, parser): - tm.skip_if_no_ne(engine) +def test_negate_lt_eq_le(engine, parser): df = pd.DataFrame([[0, 10], [1, 20]], columns=['cat', 'count']) expected = df[~(df.cat > 0)] @@ -1980,11 +1868,6 @@ def check_negate_lt_eq_le(engine, parser): tm.assert_frame_equal(result, expected) -def test_negate_lt_eq_le(): - for engine, parser in product(_engines, expr._parsers): - yield check_negate_lt_eq_le, engine, parser - - class TestValidate(tm.TestCase): def test_validate_bool_args(self): diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py index 721d447262149..4d6b6c7daa3c6 100644 --- a/pandas/tests/io/parser/test_network.py +++ b/pandas/tests/io/parser/test_network.py @@ -7,7 +7,6 @@ import os import pytest -from itertools import product import pandas.util.testing as tm from pandas import DataFrame @@ -21,14 +20,18 @@ def salaries_table(): @pytest.mark.parametrize( - "compression,extension", [('gzip', '.gz'), ('bz2', '.bz2'), - ('zip', '.zip'), ('xz', '.xz')]) -def test_compressed_urls(salaries_table, compression, extension): - check_compressed_urls(salaries_table, compression, extension) + "compression,extension", + [('gzip', '.gz'), ('bz2', '.bz2'), ('zip', '.zip'), + tm._mark_skipif_no_lzma(('xz', '.xz'))]) +@pytest.mark.parametrize('mode', ['explicit', 'infer']) +@pytest.mark.parametrize('engine', ['python', 'c']) +def test_compressed_urls(salaries_table, compression, extension, mode, engine): + check_compressed_urls(salaries_table, compression, extension, mode, engine) @tm.network -def check_compressed_urls(salaries_table, compression, extension): +def check_compressed_urls(salaries_table, compression, extension, mode, + engine): # test reading compressed urls with various engines and # extension inference base_url = ('https://github.com/pandas-dev/pandas/raw/master/' @@ -36,14 +39,11 @@ def check_compressed_urls(salaries_table, compression, extension): url = base_url + extension - # args is a (compression, engine) tuple - for (c, engine) in product([compression, 'infer'], ['python', 'c']): + if mode != 'explicit': + compression = mode - if url.endswith('.xz'): - tm._skip_if_no_lzma() - - url_table = read_table(url, compression=c, engine=engine) - tm.assert_frame_equal(url_table, salaries_table) + url_table = read_table(url, compression=compression, engine=engine) + tm.assert_frame_equal(url_table, salaries_table) class TestS3(tm.TestCase): diff --git a/pandas/util/testing.py b/pandas/util/testing.py index cda386781e2ec..1bd539469dbe3 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -307,12 +307,21 @@ def _skip_if_scipy_0_17(): pytest.skip("scipy 0.17") -def _skip_if_no_lzma(): +def _check_if_lzma(): try: return compat.import_lzma() except ImportError: - import pytest - pytest.skip('need backports.lzma to run') + return False + + +def _skip_if_no_lzma(): + return _check_if_lzma() or pytest.skip('need backports.lzma to run') + + +_mark_skipif_no_lzma = pytest.mark.skipif( + not _check_if_lzma(), + reason='need backports.lzma to run' +) def _skip_if_no_xarray(): From 5a8883b965610234366150897fe8963abffd6a7c Mon Sep 17 00:00:00 2001 From: Diego Fernandez Date: Thu, 16 Feb 2017 09:21:03 -0500 Subject: [PATCH 071/353] BUG: Ensure the right values are set in SeriesGroupBy.nunique closes #13453 Author: Diego Fernandez Closes #15418 from aiguofer/gh_13453 and squashes the following commits: c53bd70 [Diego Fernandez] Add test for #13453 in test_resample and add note to whatsnew 0daab80 [Diego Fernandez] Ensure the right values are set in SeriesGroupBy.nunique --- doc/source/whatsnew/v0.20.0.txt | 7 ++++--- pandas/core/groupby.py | 2 +- pandas/tests/groupby/test_groupby.py | 13 +++++++++++++ pandas/tests/tseries/test_resample.py | 20 ++++++++++++++++++++ 4 files changed, 38 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 4708abe4d592e..09551cfc0bcf8 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -418,6 +418,7 @@ New Behavior: Other API Changes ^^^^^^^^^^^^^^^^^ +- ``numexpr`` version is now required to be >= 2.4.6 and it will not be used at all if this requisite is not fulfilled (:issue:`15213`). - ``CParserError`` has been renamed to ``ParserError`` in ``pd.read_csv`` and will be removed in the future (:issue:`12665`) - ``SparseArray.cumsum()`` and ``SparseSeries.cumsum()`` will now always return ``SparseArray`` and ``SparseSeries`` respectively (:issue:`12855`) - ``DataFrame.applymap()`` with an empty ``DataFrame`` will return a copy of the empty ``DataFrame`` instead of a ``Series`` (:issue:`8222`) @@ -428,9 +429,8 @@ Other API Changes - ``inplace`` arguments now require a boolean value, else a ``ValueError`` is thrown (:issue:`14189`) - ``pandas.api.types.is_datetime64_ns_dtype`` will now report ``True`` on a tz-aware dtype, similar to ``pandas.api.types.is_datetime64_any_dtype`` - ``DataFrame.asof()`` will return a null filled ``Series`` instead the scalar ``NaN`` if a match is not found (:issue:`15118`) -- The :func:`pd.read_gbq` method now stores ``INTEGER`` columns as ``dtype=object`` if they contain ``NULL`` values. Otherwise they are stored as ``int64``. This prevents precision lost for integers greather than 2**53. Furthermore ``FLOAT`` columns with values above 10**4 are no more casted to ``int64`` which also caused precision lost (:issue: `14064`, :issue:`14305`). +- The :func:`pd.read_gbq` method now stores ``INTEGER`` columns as ``dtype=object`` if they contain ``NULL`` values. Otherwise they are stored as ``int64``. This prevents precision lost for integers greather than 2**53. Furthermore ``FLOAT`` columns with values above 10**4 are no longer casted to ``int64`` which also caused precision loss (:issue:`14064`, :issue:`14305`). - Reorganization of timeseries development tests (:issue:`14854`) -- ``numexpr`` version is now required to be >= 2.4.6 and it will not be used at all if this requisite is not fulfilled (:issue:`15213`). .. _whatsnew_0200.deprecations: @@ -473,7 +473,7 @@ Performance Improvements (or with ``compat_x=True``) (:issue:`15073`). - Improved performance of ``groupby().cummin()`` and ``groupby().cummax()`` (:issue:`15048`, :issue:`15109`) - Improved performance and reduced memory when indexing with a ``MultiIndex`` (:issue:`15245`) -- When reading buffer object in ``read_sas()`` method without specified format, filepath string is inferred rather than buffer object. +- When reading buffer object in ``read_sas()`` method without specified format, filepath string is inferred rather than buffer object. (:issue:`14947`) @@ -553,6 +553,7 @@ Bug Fixes - Bug in ``DataFrame.groupby().describe()`` when grouping on ``Index`` containing tuples (:issue:`14848`) - Bug in creating a ``MultiIndex`` with tuples and not passing a list of names; this will now raise ``ValueError`` (:issue:`15110`) +- Bug in ``groupby().nunique()`` with a datetimelike-grouper where bins counts were incorrect (:issue:`13453`) - Bug in catching an overflow in ``Timestamp`` + ``Timedelta/Offset`` operations (:issue:`15126`) - Bug in the HTML display with with a ``MultiIndex`` and truncation (:issue:`14882`) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 23c835318b0e6..ba2de295fa0a9 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -3032,7 +3032,7 @@ def nunique(self, dropna=True): # we might have duplications among the bins if len(res) != len(ri): res, out = np.zeros(len(ri), dtype=out.dtype), res - res[ids] = out + res[ids[idx]] = out return Series(res, index=ri, diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index d53446870beb1..59cbcab23b9e7 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -4159,6 +4159,19 @@ def test_nunique_with_empty_series(self): expected = pd.Series(name='name', dtype='int64') tm.assert_series_equal(result, expected) + def test_nunique_with_timegrouper(self): + # GH 13453 + test = pd.DataFrame({ + 'time': [Timestamp('2016-06-28 09:35:35'), + Timestamp('2016-06-28 16:09:30'), + Timestamp('2016-06-28 16:46:28')], + 'data': ['1', '2', '3']}).set_index('time') + result = test.groupby(pd.TimeGrouper(freq='h'))['data'].nunique() + expected = test.groupby( + pd.TimeGrouper(freq='h') + )['data'].apply(pd.Series.nunique) + tm.assert_series_equal(result, expected) + def test_numpy_compat(self): # see gh-12811 df = pd.DataFrame({'A': [1, 2, 1], 'B': [1, 2, 3]}) diff --git a/pandas/tests/tseries/test_resample.py b/pandas/tests/tseries/test_resample.py index afb44887fe7d1..45bbc88ef711d 100755 --- a/pandas/tests/tseries/test_resample.py +++ b/pandas/tests/tseries/test_resample.py @@ -1939,6 +1939,26 @@ def test_resample_nunique(self): result = df.ID.groupby(pd.Grouper(freq='D')).nunique() assert_series_equal(result, expected) + def test_resample_nunique_with_date_gap(self): + # GH 13453 + index = pd.date_range('1-1-2000', '2-15-2000', freq='h') + index2 = pd.date_range('4-15-2000', '5-15-2000', freq='h') + index3 = index.append(index2) + s = pd.Series(range(len(index3)), index=index3) + r = s.resample('M') + + # Since all elements are unique, these should all be the same + results = [ + r.count(), + r.nunique(), + r.agg(pd.Series.nunique), + r.agg('nunique') + ] + + assert_series_equal(results[0], results[1]) + assert_series_equal(results[0], results[2]) + assert_series_equal(results[0], results[3]) + def test_resample_group_info(self): # GH10914 for n, k in product((10000, 100000), (10, 100, 1000)): dr = date_range(start='2015-08-27', periods=n // 10, freq='T') From c7300ea9ccf6c8b4eeb5a4ae59dc2419753c9b18 Mon Sep 17 00:00:00 2001 From: abaldenko Date: Thu, 16 Feb 2017 12:39:27 -0500 Subject: [PATCH 072/353] BUG: Concat with inner join and empty DataFrame closes #15328 Author: abaldenko Closes #15397 from abaldenko/concat_empty_dataframe and squashes the following commits: 47c8735 [abaldenko] BUG: Concat with inner join and empty DataFrame fc473b7 [abaldenko] BUG: Concat with inner join and empty DataFrame b86dcb6 [abaldenko] BUG: Concat with inner join and empty DataFrame --- doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/tests/tools/test_concat.py | 10 ++++++++++ pandas/tests/tools/test_merge.py | 8 ++++++++ pandas/tools/concat.py | 4 +++- 4 files changed, 22 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 09551cfc0bcf8..ddb9088035d89 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -576,7 +576,7 @@ Bug Fixes - Bug in ``pd.read_csv()`` with ``float_precision='round_trip'`` which caused a segfault when a text entry is parsed (:issue:`15140`) - Bug in ``DataFrame.to_stata()`` and ``StataWriter`` which produces incorrectly formatted files to be produced for some locales (:issue:`13856`) - +- Bug in ``pd.concat()`` in which concatting with an empty dataframe with ``join='inner'`` was being improperly handled (:issue:`15328`) diff --git a/pandas/tests/tools/test_concat.py b/pandas/tests/tools/test_concat.py index 87a0dda34a525..2a28fccdc9b94 100644 --- a/pandas/tests/tools/test_concat.py +++ b/pandas/tests/tools/test_concat.py @@ -1825,6 +1825,16 @@ def test_concat_bug_3602(self): result = concat([df1, df2], axis=1) assert_frame_equal(result, expected) + def test_concat_inner_join_empty(self): + # GH 15328 + df_empty = pd.DataFrame() + df_a = pd.DataFrame({'a': [1, 2]}, index=[0, 1], dtype='int64') + df_expected = pd.DataFrame({'a': []}, index=[], dtype='int64') + + for how, expected in [('inner', df_expected), ('outer', df_a)]: + result = pd.concat([df_a, df_empty], axis=1, join=how) + assert_frame_equal(result, expected) + def test_concat_series_axis1_same_names_ignore_index(self): dates = date_range('01-Jan-2013', '01-Jan-2014', freq='MS')[0:-1] s1 = Series(randn(len(dates)), index=dates, name='value') diff --git a/pandas/tests/tools/test_merge.py b/pandas/tests/tools/test_merge.py index 472d8674f9f8d..b3b5e7e29319b 100644 --- a/pandas/tests/tools/test_merge.py +++ b/pandas/tests/tools/test_merge.py @@ -52,6 +52,14 @@ def setUp(self): self.right = DataFrame({'v2': np.random.randn(4)}, index=['d', 'b', 'c', 'a']) + def test_merge_inner_join_empty(self): + # GH 15328 + df_empty = pd.DataFrame() + df_a = pd.DataFrame({'a': [1, 2]}, index=[0, 1], dtype='int64') + result = pd.merge(df_empty, df_a, left_index=True, right_index=True) + expected = pd.DataFrame({'a': []}, index=[], dtype='int64') + assert_frame_equal(result, expected) + def test_merge_common(self): joined = merge(self.df, self.df2) exp = merge(self.df, self.df2, on=['key1', 'key2']) diff --git a/pandas/tools/concat.py b/pandas/tools/concat.py index dbbc831b19d1d..31d7a9eb9a01a 100644 --- a/pandas/tools/concat.py +++ b/pandas/tools/concat.py @@ -284,7 +284,9 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None, if sum(obj.shape) > 0 or isinstance(obj, Series)] if (len(non_empties) and (keys is None and names is None and - levels is None and join_axes is None)): + levels is None and + join_axes is None and + not self.intersect)): objs = non_empties sample = objs[0] From 9b5d8488e8184da0507c09482f23ebfff34ecc43 Mon Sep 17 00:00:00 2001 From: Jeff Carey Date: Thu, 16 Feb 2017 12:45:29 -0500 Subject: [PATCH 073/353] ENH: Added ability to freeze panes from DataFrame.to_excel() (#15160) closes #15160 Author: Jeff Carey Closes #15291 from jeffcarey/enh-15160 and squashes the following commits: cef8fce [Jeff Carey] ENH: Added ability to freeze panes from DataFrame.to_excel() --- doc/source/io.rst | 13 +++++++++++++ doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/frame.py | 19 ++++++++++++++++-- pandas/core/generic.py | 7 ++++++- pandas/io/excel.py | 34 ++++++++++++++++++++++++++------- pandas/tests/io/test_excel.py | 12 ++++++++++-- 6 files changed, 74 insertions(+), 12 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index 22eac33a715ba..2d6ddf98437e5 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -2777,6 +2777,7 @@ Added support for Openpyxl >= 2.2 ``'xlsxwriter'`` will produce an Excel 2007-format workbook (xlsx). If omitted, an Excel 2007-formatted workbook is produced. + .. _io.excel.writers: Excel writer engines @@ -2823,6 +2824,18 @@ argument to ``to_excel`` and to ``ExcelWriter``. The built-in engines are: df.to_excel('path_to_file.xlsx', sheet_name='Sheet1') +.. _io.excel.style: + +Style and Formatting +'''''''''''''''''''' + +The look and feel of Excel worksheets created from pandas can be modified using the following parameters on the ``DataFrame``'s ``to_excel`` method. + +- ``float_format`` : Format string for floating point numbers (default None) +- ``freeze_panes`` : A tuple of two integers representing the bottommost row and rightmost column to freeze. Each of these parameters is one-based, so (1, 1) will +freeze the first row and first column (default None) + + .. _io.clipboard: Clipboard diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index ddb9088035d89..75a8752c9bfa4 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -153,6 +153,7 @@ Other enhancements - ``Series/DataFrame.resample.asfreq`` have gained a ``fill_value`` parameter, to fill missing values during resampling (:issue:`3715`). - ``pandas.tools.hashing`` has gained a ``hash_tuples`` routine, and ``hash_pandas_object`` has gained the ability to hash a ``MultiIndex`` (:issue:`15224`) - ``Series/DataFrame.squeeze()`` have gained the ``axis`` parameter. (:issue:`15339`) +- ``DataFrame.to_excel()`` has a new ``freeze_panes`` parameter to turn on Freeze Panes when exporting to Excel (:issue:`15160`) .. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f7c306ea7ce95..3ebdf72a5cde9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1390,7 +1390,8 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='', float_format=None, columns=None, header=True, index=True, index_label=None, startrow=0, startcol=0, engine=None, - merge_cells=True, encoding=None, inf_rep='inf', verbose=True): + merge_cells=True, encoding=None, inf_rep='inf', verbose=True, + freeze_panes=None): from pandas.io.excel import ExcelWriter need_save = False if encoding is None: @@ -1406,12 +1407,26 @@ def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='', index_label=index_label, merge_cells=merge_cells, inf_rep=inf_rep) + formatted_cells = formatter.get_formatted_cells() + freeze_panes = self._validate_freeze_panes(freeze_panes) excel_writer.write_cells(formatted_cells, sheet_name, - startrow=startrow, startcol=startcol) + startrow=startrow, startcol=startcol, + freeze_panes=freeze_panes) if need_save: excel_writer.save() + def _validate_freeze_panes(self, freeze_panes): + if freeze_panes is not None: + if ( + len(freeze_panes) == 2 and + all(isinstance(item, int) for item in freeze_panes) + ): + return freeze_panes + + raise ValueError("freeze_panes must be of form (row, column)" + " where row and column are integers") + def to_stata(self, fname, convert_dates=None, write_index=True, encoding="latin-1", byteorder=None, time_stamp=None, data_label=None, variable_labels=None): diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 20e6e027dbf09..204cd91ebfab0 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1033,7 +1033,7 @@ def __setstate__(self, state): # I/O Methods _shared_docs['to_excel'] = """ - Write %(klass)s to a excel sheet + Write %(klass)s to an excel sheet %(versionadded_to_excel)s Parameters ---------- @@ -1072,6 +1072,11 @@ def __setstate__(self, state): inf_rep : string, default 'inf' Representation for infinity (there is no native representation for infinity in Excel) + freeze_panes : tuple of integer (length 2), default None + Specifies the bottommost row and rightmost column that + is to be frozen + + .. versionadded:: 0.20.0 Notes ----- diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 2821983213646..37a61b7dc9ab5 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -693,7 +693,8 @@ def engine(self): pass @abc.abstractmethod - def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0): + def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0, + freeze_panes=None): """ Write given formated cells into Excel an excel sheet @@ -705,6 +706,8 @@ def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0): Name of Excel sheet, if None, then use self.cur_sheet startrow: upper left cell row to dump data frame startcol: upper left cell column to dump data frame + freeze_panes: integer tuple of length 2 + contains the bottom-most row and right-most column to freeze """ pass @@ -804,7 +807,8 @@ def save(self): """ return self.book.save(self.path) - def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0): + def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0, + freeze_panes=None): # Write the frame cells using openpyxl. from openpyxl.cell import get_column_letter @@ -904,7 +908,8 @@ class _Openpyxl20Writer(_Openpyxl1Writer): engine = 'openpyxl20' openpyxl_majorver = 2 - def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0): + def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0, + freeze_panes=None): # Write the frame cells using openpyxl. from openpyxl.cell import get_column_letter @@ -1311,7 +1316,8 @@ class _Openpyxl22Writer(_Openpyxl20Writer): engine = 'openpyxl22' openpyxl_majorver = 2 - def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0): + def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0, + freeze_panes=None): # Write the frame cells using openpyxl. sheet_name = self._get_sheet_name(sheet_name) @@ -1324,6 +1330,10 @@ def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0): wks.title = sheet_name self.sheets[sheet_name] = wks + if freeze_panes is not None: + wks.freeze_panes = wks.cell(row=freeze_panes[0] + 1, + column=freeze_panes[1] + 1) + for cell in cells: xcell = wks.cell( row=startrow + cell.row + 1, @@ -1396,7 +1406,8 @@ def save(self): """ return self.book.save(self.path) - def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0): + def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0, + freeze_panes=None): # Write the frame cells using xlwt. sheet_name = self._get_sheet_name(sheet_name) @@ -1407,6 +1418,11 @@ def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0): wks = self.book.add_sheet(sheet_name) self.sheets[sheet_name] = wks + if freeze_panes is not None: + wks.set_panes_frozen(True) + wks.set_horz_split_pos(freeze_panes[0]) + wks.set_vert_split_pos(freeze_panes[1]) + style_dict = {} for cell in cells: @@ -1518,11 +1534,12 @@ def save(self): """ Save workbook to disk. """ + return self.book.close() - def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0): + def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0, + freeze_panes=None): # Write the frame cells using xlsxwriter. - sheet_name = self._get_sheet_name(sheet_name) if sheet_name in self.sheets: @@ -1533,6 +1550,9 @@ def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0): style_dict = {} + if freeze_panes is not None: + wks.freeze_panes(*(freeze_panes)) + for cell in cells: val = _conv_value(cell.val) diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 0c2b443cffe52..b66cb24bf44d8 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -1836,6 +1836,14 @@ def test_true_and_false_value_options(self): false_values=['bar']) tm.assert_frame_equal(read_frame, expected) + def test_freeze_panes(self): + # GH15160 + expected = DataFrame([[1, 2], [3, 4]], columns=['col1', 'col2']) + with ensure_clean(self.ext) as path: + expected.to_excel(path, "Sheet1", freeze_panes=(1, 1)) + result = read_excel(path) + tm.assert_frame_equal(expected, result) + def raise_wrapper(major_ver): def versioned_raise_wrapper(orig_method): @@ -1873,7 +1881,7 @@ class OpenpyxlTests(ExcelWriterBase, tm.TestCase): def test_to_excel_styleconverter(self): _skip_if_no_openpyxl() if not openpyxl_compat.is_compat(major_ver=1): - pytest.skip('incompatiable openpyxl version') + pytest.skip('incompatible openpyxl version') import openpyxl @@ -2095,7 +2103,7 @@ def test_to_excel_styleconverter(self): def test_write_cells_merge_styled(self): if not openpyxl_compat.is_compat(major_ver=2): - pytest.skip('incompatiable openpyxl version') + pytest.skip('incompatible openpyxl version') from pandas.formats.format import ExcelCell From c588dd1d0b7ea2dffb4e9906b8455739c9055037 Mon Sep 17 00:00:00 2001 From: Jeff Carey Date: Fri, 17 Feb 2017 00:17:38 -0800 Subject: [PATCH 074/353] Documents touch-up for DataFrame.to_excel() freeze_panes option (#15436) --- doc/source/io.rst | 4 ++-- pandas/core/generic.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index 2d6ddf98437e5..55ef2c09d43e4 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -2832,8 +2832,8 @@ Style and Formatting The look and feel of Excel worksheets created from pandas can be modified using the following parameters on the ``DataFrame``'s ``to_excel`` method. - ``float_format`` : Format string for floating point numbers (default None) -- ``freeze_panes`` : A tuple of two integers representing the bottommost row and rightmost column to freeze. Each of these parameters is one-based, so (1, 1) will -freeze the first row and first column (default None) +- ``freeze_panes`` : A tuple of two integers representing the bottommost row and rightmost column to freeze. Each of these parameters is one-based, so (1, 1) will freeze the first row and first column (default None) + .. _io.clipboard: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 204cd91ebfab0..26b9a880dd2c7 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1073,7 +1073,7 @@ def __setstate__(self, state): Representation for infinity (there is no native representation for infinity in Excel) freeze_panes : tuple of integer (length 2), default None - Specifies the bottommost row and rightmost column that + Specifies the one-based bottommost row and rightmost column that is to be frozen .. versionadded:: 0.20.0 From f4e672ccc46da0a358c4729714b6343e39fafd7b Mon Sep 17 00:00:00 2001 From: Peter Date: Fri, 17 Feb 2017 13:09:20 +0000 Subject: [PATCH 075/353] BUG: to_sql convert index name to string (#15404) (#15423) * Converted index name to string to fix issue #15404 - BUG: to_sql errors with numeric index name - needs conversion to string * Additional int to string conversion added. Associated test cases added. * PEP 8 compliance edits * Removed extraneous brackets --- pandas/io/sql.py | 5 +++-- pandas/tests/io/test_sql.py | 15 +++++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 55e145b493dd9..bace43e785dff 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -750,7 +750,8 @@ def _get_column_names_and_types(self, dtype_mapper): for i, idx_label in enumerate(self.index): idx_type = dtype_mapper( self.frame.index.get_level_values(i)) - column_names_and_types.append((idx_label, idx_type, True)) + column_names_and_types.append((text_type(idx_label), + idx_type, True)) column_names_and_types += [ (text_type(self.frame.columns[i]), @@ -1220,7 +1221,7 @@ def _create_sql_schema(self, frame, table_name, keys=None, dtype=None): def _get_unicode_name(name): try: - uname = name.encode("utf-8", "strict").decode("utf-8") + uname = text_type(name).encode("utf-8", "strict").decode("utf-8") except UnicodeError: raise ValueError("Cannot convert identifier to UTF-8: '%s'" % name) return uname diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 78560611da7aa..890f52e8c65e9 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -709,6 +709,21 @@ def test_to_sql_index_label(self): self.assertEqual(frame.columns[0], 'other_label', "Specified index_label not written to database") + # index name is integer + temp_frame.index.name = 0 + sql.to_sql(temp_frame, 'test_index_label', self.conn, + if_exists='replace') + frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn) + self.assertEqual(frame.columns[0], '0', + "Integer index label not written to database") + + temp_frame.index.name = None + sql.to_sql(temp_frame, 'test_index_label', self.conn, + if_exists='replace', index_label=0) + frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn) + self.assertEqual(frame.columns[0], '0', + "Integer index label not written to database") + def test_to_sql_index_label_multiindex(self): temp_frame = DataFrame({'col1': range(4)}, index=MultiIndex.from_product( From 54b6c6e1c443b992a1df3443669a59dbe430271f Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 17 Feb 2017 14:12:01 +0100 Subject: [PATCH 076/353] DOC: add whatsnew for #15423 --- doc/source/whatsnew/v0.20.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 75a8752c9bfa4..c68af842a4f0c 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -522,7 +522,7 @@ Bug Fixes - Bug in ``.groupby(...).rolling(...)`` when ``on`` is specified and using a ``DatetimeIndex`` (:issue:`15130`) - +- Bug in ``to_sql`` when writing a DataFrame with numeric index names (:issue:`15404`). - Bug in ``Series.iloc`` where a ``Categorical`` object for list-like indexes input was returned, where a ``Series`` was expected. (:issue:`14580`) From 763f42f7bba78acc0bf22f66281d1221b49c7238 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 17 Feb 2017 09:51:46 -0500 Subject: [PATCH 077/353] TST: remove yielding tests from test_msgpacks.py (#15427) --- pandas/tests/io/test_packers.py | 88 ++++++++++++++++++--------------- pandas/tests/io/test_pickle.py | 8 +-- 2 files changed, 52 insertions(+), 44 deletions(-) diff --git a/pandas/tests/io/test_packers.py b/pandas/tests/io/test_packers.py index 911cd8164571d..097c03937ca68 100644 --- a/pandas/tests/io/test_packers.py +++ b/pandas/tests/io/test_packers.py @@ -41,6 +41,22 @@ _ZLIB_INSTALLED = True +@pytest.fixture(scope='module') +def current_packers_data(): + # our current version packers data + from pandas.tests.io.generate_legacy_storage_files import ( + create_msgpack_data) + return create_msgpack_data() + + +@pytest.fixture(scope='module') +def all_packers_data(): + # our all of our current version packers data + from pandas.tests.io.generate_legacy_storage_files import ( + create_data) + return create_data() + + def check_arbitrary(a, b): if isinstance(a, (list, tuple)) and isinstance(b, (list, tuple)): @@ -778,7 +794,16 @@ def test_default_encoding(self): assert_frame_equal(result, frame) -class TestMsgpack(): +def legacy_packers_versions(): + # yield the packers versions + path = tm.get_data_path('legacy_msgpack') + for v in os.listdir(path): + p = os.path.join(path, v) + if os.path.isdir(p): + yield v + + +class TestMsgpack(object): """ How to add msgpack tests: @@ -788,48 +813,38 @@ class TestMsgpack(): $ python generate_legacy_storage_files.py msgpack 3. Move the created pickle to "data/legacy_msgpack/" directory. - - NOTE: TestMsgpack can't be a subclass of tm.Testcase to use test generator. - http://stackoverflow.com/questions/6689537/nose-test-generators-inside-class """ - @classmethod - def setup_class(cls): - from pandas.tests.io.generate_legacy_storage_files import ( - create_msgpack_data, create_data) - cls.data = create_msgpack_data() - cls.all_data = create_data() - cls.path = u('__%s__.msgpack' % tm.rands(10)) - cls.minimum_structure = {'series': ['float', 'int', 'mixed', - 'ts', 'mi', 'dup'], - 'frame': ['float', 'int', 'mixed', 'mi'], - 'panel': ['float'], - 'index': ['int', 'date', 'period'], - 'mi': ['reg2']} - - def check_min_structure(self, data): + minimum_structure = {'series': ['float', 'int', 'mixed', + 'ts', 'mi', 'dup'], + 'frame': ['float', 'int', 'mixed', 'mi'], + 'panel': ['float'], + 'index': ['int', 'date', 'period'], + 'mi': ['reg2']} + + def check_min_structure(self, data, version): for typ, v in self.minimum_structure.items(): assert typ in data, '"{0}" not found in unpacked data'.format(typ) for kind in v: msg = '"{0}" not found in data["{1}"]'.format(kind, typ) assert kind in data[typ], msg - def compare(self, vf, version): + def compare(self, current_data, all_data, vf, version): # GH12277 encoding default used to be latin-1, now utf-8 if LooseVersion(version) < '0.18.0': data = read_msgpack(vf, encoding='latin-1') else: data = read_msgpack(vf) - self.check_min_structure(data) + self.check_min_structure(data, version) for typ, dv in data.items(): - assert typ in self.all_data, ('unpacked data contains ' - 'extra key "{0}"' - .format(typ)) + assert typ in all_data, ('unpacked data contains ' + 'extra key "{0}"' + .format(typ)) for dt, result in dv.items(): - assert dt in self.all_data[typ], ('data["{0}"] contains extra ' - 'key "{1}"'.format(typ, dt)) + assert dt in current_data[typ], ('data["{0}"] contains extra ' + 'key "{1}"'.format(typ, dt)) try: - expected = self.data[typ][dt] + expected = current_data[typ][dt] except KeyError: continue @@ -862,9 +877,11 @@ def compare_frame_dt_mixed_tzs(self, result, expected, typ, version): else: tm.assert_frame_equal(result, expected) - def read_msgpacks(self, version): + @pytest.mark.parametrize('version', legacy_packers_versions()) + def test_msgpacks_legacy(self, current_packers_data, all_packers_data, + version): - pth = tm.get_data_path('legacy_msgpack/{0}'.format(str(version))) + pth = tm.get_data_path('legacy_msgpack/{0}'.format(version)) n = 0 for f in os.listdir(pth): # GH12142 0.17 files packed in P2 can't be read in P3 @@ -873,19 +890,10 @@ def read_msgpacks(self, version): continue vf = os.path.join(pth, f) try: - self.compare(vf, version) + self.compare(current_packers_data, all_packers_data, + vf, version) except ImportError: # blosc not installed continue n += 1 assert n > 0, 'Msgpack files are not tested' - - def test_msgpack(self): - msgpack_path = tm.get_data_path('legacy_msgpack') - n = 0 - for v in os.listdir(msgpack_path): - pth = os.path.join(msgpack_path, v) - if os.path.isdir(pth): - yield self.read_msgpacks, v - n += 1 - assert n > 0, 'Msgpack files are not tested' diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index 1e3816c1556f6..c736ec829808a 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -187,10 +187,10 @@ def compare_sp_frame_float(result, expected, typ, version): # --------------------- def legacy_pickle_versions(): # yield the pickle versions - pickle_path = tm.get_data_path('legacy_pickle') - for v in os.listdir(pickle_path): - pth = os.path.join(pickle_path, v) - if os.path.isdir(pth): + path = tm.get_data_path('legacy_pickle') + for v in os.listdir(path): + p = os.path.join(path, v) + if os.path.isdir(p): yield v From f65a6415f15d438432cc6954ead61b052c5d4d60 Mon Sep 17 00:00:00 2001 From: Elliott Sales de Andrade Date: Fri, 17 Feb 2017 10:07:11 -0500 Subject: [PATCH 078/353] ENH: Don't add rowspan/colspan if it's 1. Just a small thing I noticed in a [footnote here](https://danluu.com/web-bloat/#appendix-irony). Probably can't do much about the extra classes, but rowspan/colspan seem like easy fixes to save a few bytes per row/col and it's already done in the other code path. Author: Elliott Sales de Andrade Closes #15403 from QuLogic/no-extra-span and squashes the following commits: 9a8fcee [Elliott Sales de Andrade] Don't add rowspan/colspan if it's 1. --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/formats/style.py | 55 ++++++++++++++++-------------- pandas/tests/formats/test_style.py | 38 +++++++-------------- 3 files changed, 43 insertions(+), 51 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index c68af842a4f0c..8e48dbbb083e8 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -154,6 +154,7 @@ Other enhancements - ``pandas.tools.hashing`` has gained a ``hash_tuples`` routine, and ``hash_pandas_object`` has gained the ability to hash a ``MultiIndex`` (:issue:`15224`) - ``Series/DataFrame.squeeze()`` have gained the ``axis`` parameter. (:issue:`15339`) - ``DataFrame.to_excel()`` has a new ``freeze_panes`` parameter to turn on Freeze Panes when exporting to Excel (:issue:`15160`) +- HTML table output skips ``colspan`` or ``rowspan`` attribute if equal to 1. (:issue:`15403`) .. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations diff --git a/pandas/formats/style.py b/pandas/formats/style.py index b3e0f0f6c7462..89712910a22e1 100644 --- a/pandas/formats/style.py +++ b/pandas/formats/style.py @@ -251,21 +251,23 @@ def format_attr(pair): "class": " ".join(cs), "is_visible": True}) - for c in range(len(clabels[0])): + for c, value in enumerate(clabels[r]): cs = [COL_HEADING_CLASS, "level%s" % r, "col%s" % c] cs.extend(cell_context.get( "col_headings", {}).get(r, {}).get(c, [])) - value = clabels[r][c] - row_es.append({"type": "th", - "value": value, - "display_value": value, - "class": " ".join(cs), - "is_visible": _is_visible(c, r, col_lengths), - "attributes": [ - format_attr({"key": "colspan", - "value": col_lengths.get( - (r, c), 1)}) - ]}) + es = { + "type": "th", + "value": value, + "display_value": value, + "class": " ".join(cs), + "is_visible": _is_visible(c, r, col_lengths), + } + colspan = col_lengths.get((r, c), 0) + if colspan > 1: + es["attributes"] = [ + format_attr({"key": "colspan", "value": colspan}) + ] + row_es.append(es) head.append(row_es) if self.data.index.names and not all(x is None @@ -289,19 +291,22 @@ def format_attr(pair): body = [] for r, idx in enumerate(self.data.index): - # cs.extend( - # cell_context.get("row_headings", {}).get(r, {}).get(c, [])) - row_es = [{"type": "th", - "is_visible": _is_visible(r, c, idx_lengths), - "attributes": [ - format_attr({"key": "rowspan", - "value": idx_lengths.get((c, r), 1)}) - ], - "value": rlabels[r][c], - "class": " ".join([ROW_HEADING_CLASS, "level%s" % c, - "row%s" % r]), - "display_value": rlabels[r][c]} - for c in range(len(rlabels[r]))] + row_es = [] + for c, value in enumerate(rlabels[r]): + es = { + "type": "th", + "is_visible": _is_visible(r, c, idx_lengths), + "value": value, + "display_value": value, + "class": " ".join([ROW_HEADING_CLASS, "level%s" % c, + "row%s" % r]), + } + rowspan = idx_lengths.get((c, r), 0) + if rowspan > 1: + es["attributes"] = [ + format_attr({"key": "rowspan", "value": rowspan}) + ] + row_es.append(es) for c, col in enumerate(self.data.columns): cs = [DATA_CLASS, "row%s" % r, "col%s" % c] diff --git a/pandas/tests/formats/test_style.py b/pandas/tests/formats/test_style.py index 53bb3f9010f7e..44af0b8ebb085 100644 --- a/pandas/tests/formats/test_style.py +++ b/pandas/tests/formats/test_style.py @@ -141,21 +141,18 @@ def test_empty_index_name_doesnt_display(self): 'type': 'th', 'value': 'A', 'is_visible': True, - 'attributes': ["colspan=1"], }, {'class': 'col_heading level0 col1', 'display_value': 'B', 'type': 'th', 'value': 'B', 'is_visible': True, - 'attributes': ["colspan=1"], }, {'class': 'col_heading level0 col2', 'display_value': 'C', 'type': 'th', 'value': 'C', 'is_visible': True, - 'attributes': ["colspan=1"], }]] self.assertEqual(result['head'], expected) @@ -168,11 +165,9 @@ def test_index_name(self): expected = [[{'class': 'blank level0', 'type': 'th', 'value': '', 'display_value': '', 'is_visible': True}, {'class': 'col_heading level0 col0', 'type': 'th', - 'value': 'B', 'display_value': 'B', - 'is_visible': True, 'attributes': ['colspan=1']}, + 'value': 'B', 'display_value': 'B', 'is_visible': True}, {'class': 'col_heading level0 col1', 'type': 'th', - 'value': 'C', 'display_value': 'C', - 'is_visible': True, 'attributes': ['colspan=1']}], + 'value': 'C', 'display_value': 'C', 'is_visible': True}], [{'class': 'index_name level0', 'type': 'th', 'value': 'A'}, {'class': 'blank', 'type': 'th', 'value': ''}, @@ -191,9 +186,7 @@ def test_multiindex_name(self): {'class': 'blank level0', 'type': 'th', 'value': '', 'display_value': '', 'is_visible': True}, {'class': 'col_heading level0 col0', 'type': 'th', - 'value': 'C', 'display_value': 'C', - 'is_visible': True, 'attributes': ['colspan=1'], - }], + 'value': 'C', 'display_value': 'C', 'is_visible': True}], [{'class': 'index_name level0', 'type': 'th', 'value': 'A'}, {'class': 'index_name level1', 'type': 'th', @@ -618,16 +611,14 @@ def test_mi_sparse(self): body_1 = result['body'][0][1] expected_1 = { "value": 0, "display_value": 0, "is_visible": True, - "type": "th", "attributes": ["rowspan=1"], - "class": "row_heading level1 row0", + "type": "th", "class": "row_heading level1 row0", } tm.assert_dict_equal(body_1, expected_1) body_10 = result['body'][1][0] expected_10 = { "value": 'a', "display_value": 'a', "is_visible": False, - "type": "th", "attributes": ["rowspan=1"], - "class": "row_heading level0 row1", + "type": "th", "class": "row_heading level0 row1", } tm.assert_dict_equal(body_10, expected_10) @@ -637,9 +628,8 @@ def test_mi_sparse(self): 'is_visible': True, "display_value": ''}, {'type': 'th', 'class': 'blank level0', 'value': '', 'is_visible': True, 'display_value': ''}, - {'attributes': ['colspan=1'], 'class': 'col_heading level0 col0', - 'is_visible': True, 'type': 'th', 'value': 'A', - 'display_value': 'A'}] + {'type': 'th', 'class': 'col_heading level0 col0', 'value': 'A', + 'is_visible': True, 'display_value': 'A'}] self.assertEqual(head, expected) def test_mi_sparse_disabled(self): @@ -650,7 +640,7 @@ def test_mi_sparse_disabled(self): result = df.style._translate() body = result['body'] for row in body: - self.assertEqual(row[0]['attributes'], ['rowspan=1']) + assert 'attributes' not in row[0] def test_mi_sparse_index_names(self): df = pd.DataFrame({'A': [1, 2]}, index=pd.MultiIndex.from_arrays( @@ -686,28 +676,24 @@ def test_mi_sparse_column_names(self): 'type': 'th', 'is_visible': True}, {'class': 'index_name level1', 'value': 'col_1', 'display_value': 'col_1', 'is_visible': True, 'type': 'th'}, - {'attributes': ['colspan=1'], - 'class': 'col_heading level1 col0', + {'class': 'col_heading level1 col0', 'display_value': 1, 'is_visible': True, 'type': 'th', 'value': 1}, - {'attributes': ['colspan=1'], - 'class': 'col_heading level1 col1', + {'class': 'col_heading level1 col1', 'display_value': 0, 'is_visible': True, 'type': 'th', 'value': 0}, - {'attributes': ['colspan=1'], - 'class': 'col_heading level1 col2', + {'class': 'col_heading level1 col2', 'display_value': 1, 'is_visible': True, 'type': 'th', 'value': 1}, - {'attributes': ['colspan=1'], - 'class': 'col_heading level1 col3', + {'class': 'col_heading level1 col3', 'display_value': 0, 'is_visible': True, 'type': 'th', From a17a03a404649c0672b75983432759e8a29e0804 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 18 Feb 2017 11:52:01 +0100 Subject: [PATCH 079/353] DOC: correct rpy2 examples (GH15142) (#15450) --- doc/source/r_interface.rst | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/doc/source/r_interface.rst b/doc/source/r_interface.rst index b5d699cad69d5..88634d7f75c63 100644 --- a/doc/source/r_interface.rst +++ b/doc/source/r_interface.rst @@ -41,15 +41,17 @@ In the remainder of this page, a few examples of explicit conversion is given. T Transferring R data sets into Python ------------------------------------ -The ``pandas2ri.ri2py`` function retrieves an R data set and converts it to the -appropriate pandas object (most likely a DataFrame): +Once the pandas conversion is activated (``pandas2ri.activate()``), many conversions +of R to pandas objects will be done automatically. For example, to obtain the 'iris' dataset as a pandas DataFrame: .. ipython:: python r.data('iris') - df_iris = pandas2ri.ri2py(r['iris']) - df_iris.head() + r['iris'].head() +If the pandas conversion was not activated, the above could also be accomplished +by explicitly converting it with the ``pandas2ri.ri2py`` function +(``pandas2ri.ri2py(r['iris'])``). Converting DataFrames into R objects ------------------------------------ @@ -65,7 +67,6 @@ DataFrames into the equivalent R object (that is, **data.frame**): print(type(r_dataframe)) print(r_dataframe) - The DataFrame's index is stored as the ``rownames`` attribute of the data.frame instance. From 29aeffb8d77f56c3a3862a6bfaee993aa7660500 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sat, 18 Feb 2017 04:08:54 -0800 Subject: [PATCH 080/353] BUG: rolling not accepting Timedelta-like window args (#15443) Remove unnecessary pd.Timedelta --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/window.py | 4 +++- pandas/tests/test_window.py | 20 +++++++++++++++++++- 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 8e48dbbb083e8..ae4a3d3c3d97f 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -551,6 +551,7 @@ Bug Fixes - Bug in ``.to_json()`` causing single byte ascii characters to be expanded to four byte unicode (:issue:`15344`) - Bug in ``.read_json()`` for Python 2 where ``lines=True`` and contents contain non-ascii unicode characters (:issue:`15132`) - Bug in ``.rolling/expanding()`` functions where ``count()`` was not counting ``np.Inf``, nor handling ``object`` dtypes (:issue:`12541`) +- Bug in ``.rolling()`` where ``pd.Timedelta`` or ``datetime.timedelta`` was not accepted as a ``window`` argument (:issue:`15440`) - Bug in ``DataFrame.resample().median()`` if duplicate column names are present (:issue:`14233`) - Bug in ``DataFrame.groupby().describe()`` when grouping on ``Index`` containing tuples (:issue:`14848`) diff --git a/pandas/core/window.py b/pandas/core/window.py index 50de6b84d7cba..3f9aa2b0ff392 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -10,6 +10,7 @@ import warnings import numpy as np from collections import defaultdict +from datetime import timedelta from pandas.types.generic import (ABCSeries, ABCDataFrame, @@ -1014,7 +1015,8 @@ def validate(self): # we allow rolling on a datetimelike index if (self.is_datetimelike and - isinstance(self.window, (compat.string_types, DateOffset))): + isinstance(self.window, (compat.string_types, DateOffset, + timedelta))): self._validate_monotonic() freq = self._validate_freq() diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 1bb1f91423a9d..452e8999ab13f 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -4,7 +4,7 @@ import warnings from warnings import catch_warnings -from datetime import datetime +from datetime import datetime, timedelta from numpy.random import randn import numpy as np from distutils.version import LooseVersion @@ -401,6 +401,24 @@ def test_constructor_with_win_type(self): with self.assertRaises(ValueError): c(-1, win_type='boxcar') + def test_constructor_with_timedelta_window(self): + # GH 15440 + n = 10 + df = pd.DataFrame({'value': np.arange(n)}, + index=pd.date_range('2015-12-24', + periods=n, + freq="D")) + expected_data = np.append([0., 1.], np.arange(3., 27., 3)) + for window in [timedelta(days=3), pd.Timedelta(days=3)]: + result = df.rolling(window=window).sum() + expected = pd.DataFrame({'value': expected_data}, + index=pd.date_range('2015-12-24', + periods=n, + freq="D")) + tm.assert_frame_equal(result, expected) + expected = df.rolling('3D').sum() + tm.assert_frame_equal(result, expected) + def test_numpy_compat(self): # see gh-12811 r = rwindow.Rolling(Series([2, 4, 6]), window=2) From be4a63fe791e27c2f8a9ae4f3a419ccc255c1b5b Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 18 Feb 2017 12:04:48 -0500 Subject: [PATCH 081/353] BUG: testing on windows - we are passing builds which actually have an error - fix the small dtype issues Author: Jeff Reback Closes #15445 from jreback/windows and squashes the following commits: a5b7fb3 [Jeff Reback] change integer to power comparisions eab15c4 [Jeff Reback] don't force remove pandas cf3b9bd [Jeff Reback] more windows fixing efe6a76 [Jeff Reback] add cython to build 8194e63 [Jeff Reback] don't use appveyor recipe, just build inplace e064825 [Jeff Reback] TST: resample dtype issue xref #15418 10d9b26 [Jeff Reback] TST: run windows tests so failures show up in appeveyor --- appveyor.yml | 12 ++++---- ci/appveyor.recipe/bld.bat | 2 -- ci/appveyor.recipe/build.sh | 2 -- ci/appveyor.recipe/meta.yaml | 37 ------------------------- pandas/tests/indexing/test_timedelta.py | 3 +- pandas/tests/test_expressions.py | 10 +++---- pandas/tests/tseries/test_resample.py | 2 +- test.bat | 3 +- 8 files changed, 13 insertions(+), 58 deletions(-) delete mode 100644 ci/appveyor.recipe/bld.bat delete mode 100644 ci/appveyor.recipe/build.sh delete mode 100644 ci/appveyor.recipe/meta.yaml diff --git a/appveyor.yml b/appveyor.yml index d96e1dfcf76de..1c14698430996 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -78,21 +78,19 @@ install: # this is now the downloaded conda... - cmd: conda info -a - # build em using the local source checkout in the correct windows env - - cmd: '%CMD_IN_ENV% conda build ci\appveyor.recipe -q' - # create our env - - cmd: conda create -q -n pandas python=%PYTHON_VERSION% pytest + - cmd: conda create -q -n pandas python=%PYTHON_VERSION% cython pytest - cmd: activate pandas - SET REQ=ci\requirements-%PYTHON_VERSION%-%PYTHON_ARCH%.run - cmd: echo "installing requirements from %REQ%" - cmd: conda install -n pandas -q --file=%REQ% - cmd: conda list -n pandas - cmd: echo "installing requirements from %REQ% - done" - - ps: conda install -n pandas (conda build ci\appveyor.recipe -q --output) + + # build em using the local source checkout in the correct windows env + - cmd: '%CMD_IN_ENV% python setup.py build_ext --inplace' test_script: # tests - cmd: activate pandas - - cmd: cd \ - - cmd: python -c "import pandas; pandas.test(['--skip-slow', '--skip-network'])" + - cmd: test.bat diff --git a/ci/appveyor.recipe/bld.bat b/ci/appveyor.recipe/bld.bat deleted file mode 100644 index 284926fae8c04..0000000000000 --- a/ci/appveyor.recipe/bld.bat +++ /dev/null @@ -1,2 +0,0 @@ -@echo off -%PYTHON% setup.py install diff --git a/ci/appveyor.recipe/build.sh b/ci/appveyor.recipe/build.sh deleted file mode 100644 index f341bce6fcf96..0000000000000 --- a/ci/appveyor.recipe/build.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/sh -$PYTHON setup.py install diff --git a/ci/appveyor.recipe/meta.yaml b/ci/appveyor.recipe/meta.yaml deleted file mode 100644 index 777fd9d682d48..0000000000000 --- a/ci/appveyor.recipe/meta.yaml +++ /dev/null @@ -1,37 +0,0 @@ -package: - name: pandas - version: 0.20.0 - -build: - number: {{environ.get('APPVEYOR_BUILD_NUMBER', 0)}} # [win] - string: np{{ environ.get('CONDA_NPY') }}py{{ environ.get('CONDA_PY') }}_{{ environ.get('APPVEYOR_BUILD_NUMBER', 0) }} # [win] - -source: - - # conda-build needs a full clone - # rather than a shallow git_url type clone - # https://github.com/conda/conda-build/issues/780 - path: ../../ - -requirements: - build: - - python - - cython - - numpy x.x - - setuptools - - pytz - - python-dateutil - - run: - - python - - numpy x.x - - python-dateutil - - pytz - -test: - imports: - - pandas - -about: - home: http://pandas.pydata.org - license: BSD diff --git a/pandas/tests/indexing/test_timedelta.py b/pandas/tests/indexing/test_timedelta.py index e5ccd72cac20a..5f0088382ce57 100644 --- a/pandas/tests/indexing/test_timedelta.py +++ b/pandas/tests/indexing/test_timedelta.py @@ -13,8 +13,7 @@ def test_boolean_indexing(self): [0, 1, 2, 10, 4, 5, 6, 7, 8, 9], [10, 10, 10, 3, 4, 5, 6, 7, 8, 9]] for cond, data in zip(conditions, expected_data): - result = df.copy() - result.loc[cond, 'x'] = 10 + result = df.assign(x=df.mask(cond, 10).astype('int64')) expected = pd.DataFrame(data, index=pd.to_timedelta(range(10), unit='s'), columns=['x']) diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index 3032a288032a2..f669ebe371f9d 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -12,7 +12,7 @@ from pandas.core.api import DataFrame, Panel from pandas.computation import expressions as expr -from pandas import compat, _np_version_under1p12 +from pandas import compat, _np_version_under1p11 from pandas.util.testing import (assert_almost_equal, assert_series_equal, assert_frame_equal, assert_panel_equal, assert_panel4d_equal, slow) @@ -70,10 +70,10 @@ def run_arithmetic(self, df, other, assert_func, check_dtype=False, operations.append('div') for arith in operations: - # numpy >= 1.12 doesn't handle integers + # numpy >= 1.11 doesn't handle integers # raised to integer powers # https://github.com/pandas-dev/pandas/issues/15363 - if arith == 'pow' and not _np_version_under1p12: + if arith == 'pow' and not _np_version_under1p11: continue operator_name = arith @@ -272,10 +272,10 @@ def testit(): for op, op_str in [('add', '+'), ('sub', '-'), ('mul', '*'), ('div', '/'), ('pow', '**')]: - # numpy >= 1.12 doesn't handle integers + # numpy >= 1.11 doesn't handle integers # raised to integer powers # https://github.com/pandas-dev/pandas/issues/15363 - if op == 'pow' and not _np_version_under1p12: + if op == 'pow' and not _np_version_under1p11: continue if op == 'div': diff --git a/pandas/tests/tseries/test_resample.py b/pandas/tests/tseries/test_resample.py index 45bbc88ef711d..6e999c5b1d276 100755 --- a/pandas/tests/tseries/test_resample.py +++ b/pandas/tests/tseries/test_resample.py @@ -1944,7 +1944,7 @@ def test_resample_nunique_with_date_gap(self): index = pd.date_range('1-1-2000', '2-15-2000', freq='h') index2 = pd.date_range('4-15-2000', '5-15-2000', freq='h') index3 = index.append(index2) - s = pd.Series(range(len(index3)), index=index3) + s = pd.Series(range(len(index3)), index=index3, dtype='int64') r = s.resample('M') # Since all elements are unique, these should all be the same diff --git a/test.bat b/test.bat index 7f9244abb2bc8..2c5f25c24a637 100644 --- a/test.bat +++ b/test.bat @@ -1,4 +1,3 @@ :: test on windows -:: nosetests --exe -A "not slow and not network and not disabled" pandas %* -pytest pandas +pytest --skip-slow --skip-network pandas From 821be3991cca866a5cc9cf3407cd9f68c66c0306 Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Mon, 20 Feb 2017 09:36:19 -0500 Subject: [PATCH 082/353] BUG: MultiIndex indexing with passed Series/DataFrame/ndarray as indexers closes #15424 closes #15434 Author: Pietro Battiston Closes #15425 from toobaz/mi_indexing and squashes the following commits: 2ba2d5d [Pietro Battiston] Updated comment 900e3ce [Pietro Battiston] whatsnew 8467b57 [Pietro Battiston] Tests for previous commit 17209f3 [Pietro Battiston] BUG: support indexing MultiIndex with 1-D array 7606114 [Pietro Battiston] Whatsnew 0b719f5 [Pietro Battiston] Test for previous commit 1f2f385 [Pietro Battiston] BUG: Fix indexing MultiIndex with Series with 0 not index --- doc/source/whatsnew/v0.20.0.txt | 3 ++- pandas/core/indexing.py | 25 ++++++++++++----- pandas/tests/indexing/test_multiindex.py | 34 ++++++++++++++++++++++++ 3 files changed, 55 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index ae4a3d3c3d97f..9e71b9a11c8eb 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -501,7 +501,8 @@ Bug Fixes - Bug in ``pd.tools.hashing.hash_pandas_object()`` in which hashing of categoricals depended on the ordering of categories, instead of just their values. (:issue:`15143`) - Bug in ``.groupby(..).resample()`` when passed the ``on=`` kwarg. (:issue:`15021`) -- Bug in ``DataFrame.loc`` with indexing a ``MultiIndex`` with a ``Series`` indexer (:issue:`14730`) +- Bug in ``DataFrame.loc`` with indexing a ``MultiIndex`` with a ``Series`` indexer (:issue:`14730`, :issue:`15424`) +- Bug in ``DataFrame.loc`` with indexing a ``MultiIndex`` with a numpy array (:issue:`15434`) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 66510a7708e64..6f490875742ca 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1521,15 +1521,28 @@ def _getitem_axis(self, key, axis=0): return self._getbool_axis(key, axis=axis) elif is_list_like_indexer(key): - # GH 7349 - # possibly convert a list-like into a nested tuple - # but don't convert a list-like of tuples + # convert various list-like indexers + # to a list of keys + # we will use the *values* of the object + # and NOT the index if its a PandasObject if isinstance(labels, MultiIndex): + + if isinstance(key, (ABCSeries, np.ndarray)) and key.ndim <= 1: + # Series, or 0,1 ndim ndarray + # GH 14730 + key = list(key) + elif isinstance(key, ABCDataFrame): + # GH 15438 + raise NotImplementedError("Indexing a MultiIndex with a " + "DataFrame key is not " + "implemented") + elif hasattr(key, 'ndim') and key.ndim > 1: + raise NotImplementedError("Indexing a MultiIndex with a " + "multidimensional key is not " + "implemented") + if (not isinstance(key, tuple) and len(key) > 1 and not isinstance(key[0], tuple)): - if isinstance(key, ABCSeries): - # GH 14730 - key = list(key) key = tuple([key]) # an iterable multi-selection diff --git a/pandas/tests/indexing/test_multiindex.py b/pandas/tests/indexing/test_multiindex.py index b6b9ac93b234c..b40f0b8cd9976 100644 --- a/pandas/tests/indexing/test_multiindex.py +++ b/pandas/tests/indexing/test_multiindex.py @@ -158,12 +158,46 @@ def test_loc_getitem_series(self): result = x.loc[[1, 3]] tm.assert_series_equal(result, expected) + # GH15424 + y1 = Series([1, 3], index=[1, 2]) + result = x.loc[y1] + tm.assert_series_equal(result, expected) + empty = Series(data=[], dtype=np.float64) expected = Series([], index=MultiIndex( levels=index.levels, labels=[[], []], dtype=np.float64)) result = x.loc[empty] tm.assert_series_equal(result, expected) + def test_loc_getitem_array(self): + # GH15434 + # passing an array as a key with a MultiIndex + index = MultiIndex.from_product([[1, 2, 3], ['A', 'B', 'C']]) + x = Series(index=index, data=range(9), dtype=np.float64) + y = np.array([1, 3]) + expected = Series( + data=[0, 1, 2, 6, 7, 8], + index=MultiIndex.from_product([[1, 3], ['A', 'B', 'C']]), + dtype=np.float64) + result = x.loc[y] + tm.assert_series_equal(result, expected) + + # empty array: + empty = np.array([]) + expected = Series([], index=MultiIndex( + levels=index.levels, labels=[[], []], dtype=np.float64)) + result = x.loc[empty] + tm.assert_series_equal(result, expected) + + # 0-dim array (scalar): + scalar = np.int64(1) + expected = Series( + data=[0, 1, 2], + index=['A', 'B', 'C'], + dtype=np.float64) + result = x.loc[scalar] + tm.assert_series_equal(result, expected) + def test_iloc_getitem_multiindex(self): mi_labels = DataFrame(np.random.randn(4, 3), columns=[['i', 'i', 'j'], ['A', 'A', 'B']], From 12f2c6a101cf866527df5dac4184a8803792fd78 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 20 Feb 2017 10:13:38 -0500 Subject: [PATCH 083/353] TST: make sure test_fash uses the same seed for launching processes --- test_fast.sh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/test_fast.sh b/test_fast.sh index 43eb376f879cd..30ac7f84cbe8b 100755 --- a/test_fast.sh +++ b/test_fast.sh @@ -1 +1,8 @@ +#!/bin/bash + +# Workaround for pytest-xdist flaky collection order +# https://github.com/pytest-dev/pytest/issues/920 +# https://github.com/pytest-dev/pytest/issues/1075 +export PYTHONHASHSEED=$(python -c 'import random; print(random.randint(1, 4294967295))') + pytest pandas --skip-slow --skip-network -m "not single" -n 4 From 0b4fdf988e3125f7c55aaf6e08a2dfa7d9e2e8a0 Mon Sep 17 00:00:00 2001 From: Brian McFee Date: Mon, 20 Feb 2017 14:12:28 -0500 Subject: [PATCH 084/353] ENH: Add __copy__ and __deepcopy__ to NDFrame closes #15370 Author: Brian McFee Author: Jeff Reback Closes #15444 from bmcfee/deepcopy-ndframe and squashes the following commits: bf36f35 [Jeff Reback] TST: skip the panel4d deepcopy tests d58b1f6 [Brian McFee] added tests for copy and deepcopy 35f3e0f [Brian McFee] relocated Index.__deepcopy__ to live near __copy__ 1aea940 [Brian McFee] switched deepcopy test to using generic comparator 7e67e7d [Brian McFee] ndframe and index __copy__ are now proper methods 820664c [Brian McFee] moved deepcopy test to generic.py 9721041 [Brian McFee] added copy/deepcopy to ndframe, fixes #15370 --- doc/source/whatsnew/v0.20.0.txt | 3 ++- pandas/core/generic.py | 8 ++++++++ pandas/indexes/base.py | 13 +++++++------ pandas/tests/test_generic.py | 24 ++++++++++++++++++++++++ 4 files changed, 41 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 9e71b9a11c8eb..40b068547c360 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -433,6 +433,7 @@ Other API Changes - ``DataFrame.asof()`` will return a null filled ``Series`` instead the scalar ``NaN`` if a match is not found (:issue:`15118`) - The :func:`pd.read_gbq` method now stores ``INTEGER`` columns as ``dtype=object`` if they contain ``NULL`` values. Otherwise they are stored as ``int64``. This prevents precision lost for integers greather than 2**53. Furthermore ``FLOAT`` columns with values above 10**4 are no longer casted to ``int64`` which also caused precision loss (:issue:`14064`, :issue:`14305`). - Reorganization of timeseries development tests (:issue:`14854`) +- Specific support for ``copy.copy()`` and ``copy.deepcopy()`` functions on NDFrame objects (:issue:`15444`) .. _whatsnew_0200.deprecations: @@ -500,7 +501,7 @@ Bug Fixes - Bug in ``pd.read_csv()`` in which a file containing a row with many columns followed by rows with fewer columns would cause a crash (:issue:`14125`) - Bug in ``pd.tools.hashing.hash_pandas_object()`` in which hashing of categoricals depended on the ordering of categories, instead of just their values. (:issue:`15143`) - Bug in ``.groupby(..).resample()`` when passed the ``on=`` kwarg. (:issue:`15021`) - +- Bug in using ``__deepcopy__`` on empty NDFrame objects (:issue:`15370`) - Bug in ``DataFrame.loc`` with indexing a ``MultiIndex`` with a ``Series`` indexer (:issue:`14730`, :issue:`15424`) - Bug in ``DataFrame.loc`` with indexing a ``MultiIndex`` with a numpy array (:issue:`15434`) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 26b9a880dd2c7..76fbb9884753d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3161,6 +3161,14 @@ def copy(self, deep=True): data = self._data.copy(deep=deep) return self._constructor(data).__finalize__(self) + def __copy__(self, deep=True): + return self.copy(deep=deep) + + def __deepcopy__(self, memo=None): + if memo is None: + memo = {} + return self.copy(deep=True) + def _convert(self, datetime=False, numeric=False, timedelta=False, coerce=False, copy=True): """ diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index e51824e72a2a0..f1f37622b2a74 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -724,7 +724,13 @@ def copy(self, name=None, deep=False, dtype=None, **kwargs): new_index = new_index.astype(dtype) return new_index - __copy__ = copy + def __copy__(self, **kwargs): + return self.copy(**kwargs) + + def __deepcopy__(self, memo=None): + if memo is None: + memo = {} + return self.copy(deep=True) def _validate_names(self, name=None, names=None, deep=False): """ @@ -1480,11 +1486,6 @@ def __setstate__(self, state): _unpickle_compat = __setstate__ - def __deepcopy__(self, memo=None): - if memo is None: - memo = {} - return self.copy(deep=True) - def __nonzero__(self): raise ValueError("The truth value of a {0} is ambiguous. " "Use a.empty, a.bool(), a.item(), a.any() or a.all()." diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py index b087ca21d3c25..40cdbe083acd7 100644 --- a/pandas/tests/test_generic.py +++ b/pandas/tests/test_generic.py @@ -2,6 +2,7 @@ # pylint: disable-msg=E1101,W0612 from operator import methodcaller +from copy import copy, deepcopy import pytest import numpy as np from numpy import nan @@ -675,6 +676,18 @@ def test_validate_bool_args(self): with self.assertRaises(ValueError): super(DataFrame, df).mask(cond=df.a > 2, inplace=value) + def test_copy_and_deepcopy(self): + # GH 15444 + for shape in [0, 1, 2]: + obj = self._construct(shape) + for func in [copy, + deepcopy, + lambda x: x.copy(deep=False), + lambda x: x.copy(deep=True)]: + obj_copy = func(obj) + self.assertIsNot(obj_copy, obj) + self._compare(obj_copy, obj) + class TestSeries(tm.TestCase, Generic): _typ = Series @@ -1539,6 +1552,14 @@ def test_to_xarray(self): expected, check_index_type=False) + def test_deepcopy_empty(self): + # This test covers empty frame copying with non-empty column sets + # as reported in issue GH15370 + empty_frame = DataFrame(data=[], index=[], columns=['A']) + empty_frame_copy = deepcopy(empty_frame) + + self._compare(empty_frame_copy, empty_frame) + class TestPanel(tm.TestCase, Generic): _typ = Panel @@ -1569,6 +1590,9 @@ class TestPanel4D(tm.TestCase, Generic): def test_sample(self): pytest.skip("sample on Panel4D") + def test_copy_and_deepcopy(self): + pytest.skip("copy_and_deepcopy on Panel4D") + def test_to_xarray(self): tm._skip_if_no_xarray() From e1d54074ce8448bfcc69dc08d8a800ef9ef918ff Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 20 Feb 2017 21:39:10 -0500 Subject: [PATCH 085/353] CI: add circle ci support this adds support for using CircleCI; configured to put 4 of our builds (3.4, 3.4-slow, 2.7 compat, and 3.5 ascii), they are still on Travis ATM. They are built/tested simultaneously on CircleCI (as we get 4 containers for open-source). Author: Jeff Reback Closes #15464 from jreback/circle and squashes the following commits: 3756674 [Jeff Reback] CI: add circle ci support --- .travis.yml | 2 +- ci/install_circle.sh | 88 ++++++++++++++++++++++ ci/install_db_circle.sh | 8 ++ ci/{install_db.sh => install_db_travis.sh} | 0 ci/run_circle.sh | 9 +++ ci/show_circle.sh | 8 ++ circle.yml | 35 +++++++++ 7 files changed, 149 insertions(+), 1 deletion(-) create mode 100755 ci/install_circle.sh create mode 100755 ci/install_db_circle.sh rename ci/{install_db.sh => install_db_travis.sh} (100%) create mode 100755 ci/run_circle.sh create mode 100755 ci/show_circle.sh create mode 100644 circle.yml diff --git a/.travis.yml b/.travis.yml index 6245213cec06f..bb96ab210c088 100644 --- a/.travis.yml +++ b/.travis.yml @@ -315,7 +315,7 @@ install: before_script: - source activate pandas && pip install codecov - - ci/install_db.sh + - ci/install_db_travis.sh script: - echo "script start" diff --git a/ci/install_circle.sh b/ci/install_circle.sh new file mode 100755 index 0000000000000..485586e9d4f49 --- /dev/null +++ b/ci/install_circle.sh @@ -0,0 +1,88 @@ +#!/usr/bin/env bash + +home_dir=$(pwd) +echo "[home_dir: $home_dir]" + +echo "[ls -ltr]" +ls -ltr + +echo "[Using clean Miniconda install]" +rm -rf "$MINICONDA_DIR" + +# install miniconda +wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -q -O miniconda.sh || exit 1 +bash miniconda.sh -b -p "$MINICONDA_DIR" || exit 1 + +export PATH="$MINICONDA_DIR/bin:$PATH" + +echo "[update conda]" +conda config --set ssl_verify false || exit 1 +conda config --set always_yes true --set changeps1 false || exit 1 +conda update -q conda + +# add the pandas channel to take priority +# to add extra packages +echo "[add channels]" +conda config --add channels pandas || exit 1 +conda config --remove channels defaults || exit 1 +conda config --add channels defaults || exit 1 + +# Useful for debugging any issues with conda +conda info -a || exit 1 + +# support env variables passed +export ENVS_FILE=".envs" + +# make sure that the .envs file exists. it is ok if it is empty +touch $ENVS_FILE + +# assume all command line arguments are environmental variables +for var in "$@" +do + echo "export $var" >> $ENVS_FILE +done + +echo "[environmental variable file]" +cat $ENVS_FILE +source $ENVS_FILE + +export REQ_BUILD=ci/requirements-${PYTHON_VERSION}${JOB_TAG}.build +export REQ_RUN=ci/requirements-${PYTHON_VERSION}${JOB_TAG}.run +export REQ_PIP=ci/requirements-${PYTHON_VERSION}${JOB_TAG}.pip + +# edit the locale override if needed +if [ -n "$LOCALE_OVERRIDE" ]; then + echo "[Adding locale to the first line of pandas/__init__.py]" + rm -f pandas/__init__.pyc + sedc="3iimport locale\nlocale.setlocale(locale.LC_ALL, '$LOCALE_OVERRIDE')\n" + sed -i "$sedc" pandas/__init__.py + echo "[head -4 pandas/__init__.py]" + head -4 pandas/__init__.py + echo +fi + +# create new env +echo "[create env]" +time conda create -q -n pandas python=${PYTHON_VERSION} pytest || exit 1 + +source activate pandas + +# build deps +echo "[build installs: ${REQ_BUILD}]" +time conda install -q --file=${REQ_BUILD} || exit 1 + +# build but don't install +echo "[build em]" +time python setup.py build_ext --inplace || exit 1 + +# we may have run installations +echo "[conda installs: ${REQ_RUN}]" +if [ -e ${REQ_RUN} ]; then + time conda install -q --file=${REQ_RUN} || exit 1 +fi + +# we may have additional pip installs +echo "[pip installs: ${REQ_PIP}]" +if [ -e ${REQ_PIP} ]; then + pip install -q -r $REQ_PIP +fi diff --git a/ci/install_db_circle.sh b/ci/install_db_circle.sh new file mode 100755 index 0000000000000..a00f74f009f54 --- /dev/null +++ b/ci/install_db_circle.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +echo "installing dbs" +mysql -e 'create database pandas_nosetest;' +psql -c 'create database pandas_nosetest;' -U postgres + +echo "done" +exit 0 diff --git a/ci/install_db.sh b/ci/install_db_travis.sh similarity index 100% rename from ci/install_db.sh rename to ci/install_db_travis.sh diff --git a/ci/run_circle.sh b/ci/run_circle.sh new file mode 100755 index 0000000000000..0e46d28ab6fc4 --- /dev/null +++ b/ci/run_circle.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +echo "[running tests]" +export PATH="$MINICONDA_DIR/bin:$PATH" + +source activate pandas + +echo "pytest --junitxml=$CIRCLE_TEST_REPORTS/reports/junit.xml $@ pandas" +pytest --junitxml=$CIRCLE_TEST_REPORTS/reports/junit.xml $@ pandas diff --git a/ci/show_circle.sh b/ci/show_circle.sh new file mode 100755 index 0000000000000..bfaa65c1d84f2 --- /dev/null +++ b/ci/show_circle.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +echo "[installed versions]" + +export PATH="$MINICONDA_DIR/bin:$PATH" +source activate pandas + +python -c "import pandas; pandas.show_versions();" diff --git a/circle.yml b/circle.yml new file mode 100644 index 0000000000000..97136d368ae6f --- /dev/null +++ b/circle.yml @@ -0,0 +1,35 @@ +machine: + environment: + # these are globally set + MINICONDA_DIR: /home/ubuntu/miniconda3 + +database: + override: + - ./ci/install_db_circle.sh + +checkout: + post: + # since circleci does a shallow fetch + # we need to populate our tags + - git fetch --depth=1000 + - git fetch --tags + +dependencies: + override: + - > + case $CIRCLE_NODE_INDEX in + 0) + sudo apt-get install language-pack-it && ./ci/install_circle.sh PYTHON_VERSION=2.7 JOB_TAG="_COMPAT" LOCALE_OVERRIDE="it_IT.UTF-8" ;; + 1) + sudo apt-get install language-pack-zh-hans && ./ci/install_circle.sh PYTHON_VERSION=3.4 JOB_TAG="_SLOW" LOCALE_OVERRIDE="zh_CN.UTF-8" ;; + 2) + sudo apt-get install language-pack-zh-hans && ./ci/install_circle.sh PYTHON_VERSION=3.4 JOB_TAG="" LOCALE_OVERRIDE="zh_CN.UTF-8" ;; + 3) + ./ci/install_circle.sh PYTHON_VERSION=3.5 JOB_TAG="_ASCII" LOCALE_OVERRIDE="C" ;; + esac + - ./ci/show_circle.sh + +test: + override: + - case $CIRCLE_NODE_INDEX in 0) ./ci/run_circle.sh --skip-slow --skip-network ;; 1) ./ci/run_circle.sh --only-slow --skip-network ;; 2) ./ci/run_circle.sh --skip-slow --skip-network ;; 3) ./ci/run_circle.sh --skip-slow --skip-network ;; esac: + parallel: true From bb2144a32cb30bc7428b117389a280b2515e9cf1 Mon Sep 17 00:00:00 2001 From: tzinckgraf Date: Tue, 21 Feb 2017 08:29:55 -0500 Subject: [PATCH 086/353] BUG: Bug on reset_index for a MultiIndex of all NaNs closes #6322 Author: tzinckgraf Closes #15466 from tzinckgraf/GH6322 and squashes the following commits: 35f97f4 [tzinckgraf] GH6322, Bug on reset_index for a MultiIndex of all NaNs --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/frame.py | 15 +++++++++++---- pandas/tests/frame/test_alter_axes.py | 27 +++++++++++++++++++++++++++ 3 files changed, 39 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 40b068547c360..86f916bc0acfb 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -575,6 +575,7 @@ Bug Fixes - Incorrect dtyped ``Series`` was returned by comparison methods (e.g., ``lt``, ``gt``, ...) against a constant for an empty ``DataFrame`` (:issue:`15077`) - Bug in ``Series.dt.round`` inconsistent behaviour on NAT's with different arguments (:issue:`14940`) - Bug in ``DataFrame.fillna()`` where the argument ``downcast`` was ignored when fillna value was of type ``dict`` (:issue:`15277`) +- Bug in ``.reset_index()`` when an all ``NaN`` level of a ``MultiIndex`` would fail (:issue:`6322`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3ebdf72a5cde9..bfef2cfbd0d51 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2973,10 +2973,17 @@ def _maybe_casted_values(index, labels=None): # if we have the labels, extract the values with a mask if labels is not None: mask = labels == -1 - values = values.take(labels) - if mask.any(): - values, changed = _maybe_upcast_putmask(values, mask, - np.nan) + + # we can have situations where the whole mask is -1, + # meaning there is nothing found in labels, so make all nan's + if mask.all(): + values = np.empty(len(mask)) + values.fill(np.nan) + else: + values = values.take(labels) + if mask.any(): + values, changed = _maybe_upcast_putmask(values, mask, + np.nan) return values new_index = _default_index(len(new_obj)) diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index e84bb6407fafc..e52bfdbd4f837 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -624,6 +624,33 @@ def test_reset_index_multiindex_col(self): ['a', 'mean', 'median', 'mean']]) assert_frame_equal(rs, xp) + def test_reset_index_multiindex_nan(self): + # GH6322, testing reset_index on MultiIndexes + # when we have a nan or all nan + df = pd.DataFrame({'A': ['a', 'b', 'c'], + 'B': [0, 1, np.nan], + 'C': np.random.rand(3)}) + rs = df.set_index(['A', 'B']).reset_index() + assert_frame_equal(rs, df) + + df = pd.DataFrame({'A': [np.nan, 'b', 'c'], + 'B': [0, 1, 2], + 'C': np.random.rand(3)}) + rs = df.set_index(['A', 'B']).reset_index() + assert_frame_equal(rs, df) + + df = pd.DataFrame({'A': ['a', 'b', 'c'], + 'B': [0, 1, 2], + 'C': [np.nan, 1.1, 2.2]}) + rs = df.set_index(['A', 'B']).reset_index() + assert_frame_equal(rs, df) + + df = pd.DataFrame({'A': ['a', 'b', 'c'], + 'B': [np.nan, np.nan, np.nan], + 'C': np.random.rand(3)}) + rs = df.set_index(['A', 'B']).reset_index() + assert_frame_equal(rs, df) + def test_reset_index_with_datetimeindex_cols(self): # GH5818 # From f62e8f242d90d849e4854f4fe82e9ebb5b731d74 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Tue, 21 Feb 2017 17:26:03 -0500 Subject: [PATCH 087/353] DOC: Link CONTRIBUTING.md to contributing.rst (#15451) Previously, we were trying to maintain two different copies of the documentation, one in the ".github" directory, and the other in the "docs/," which just imposes greater maintenance burden in the long-run. We now use the ".github" to refer to different portions of the "docs/" version with short summaries for ease of navigation. Closes gh-15349. --- .github/CONTRIBUTING.md | 519 ++-------------------------------------- 1 file changed, 14 insertions(+), 505 deletions(-) diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 7898822e0e11d..95729f845ff5c 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -1,515 +1,24 @@ Contributing to pandas ====================== -Where to start? ---------------- - -All contributions, bug reports, bug fixes, documentation improvements, enhancements and ideas are welcome. - -If you are simply looking to start working with the *pandas* codebase, navigate to the [GitHub "issues" tab](https://github.com/pandas-dev/pandas/issues) and start looking through interesting issues. There are a number of issues listed under [Docs](https://github.com/pandas-dev/pandas/issues?labels=Docs&sort=updated&state=open) and [Difficulty Novice](https://github.com/pandas-dev/pandas/issues?q=is%3Aopen+is%3Aissue+label%3A%22Difficulty+Novice%22) where you could start out. - -Or maybe through using *pandas* you have an idea of you own or are looking for something in the documentation and thinking 'this can be improved'...you can do something about it! - -Feel free to ask questions on the [mailing list](https://groups.google.com/forum/?fromgroups#!forum/pydata) or on [Gitter](https://gitter.im/pydata/pandas). - -Bug reports and enhancement requests ------------------------------------- - -Bug reports are an important part of making *pandas* more stable. Having a complete bug report will allow others to reproduce the bug and provide insight into fixing. Because many versions of *pandas* are supported, knowing version information will also identify improvements made since previous versions. Trying the bug-producing code out on the *master* branch is often a worthwhile exercise to confirm the bug still exists. It is also worth searching existing bug reports and pull requests to see if the issue has already been reported and/or fixed. - -Bug reports must: - -1. Include a short, self-contained Python snippet reproducing the problem. You can format the code nicely by using [GitHub Flavored Markdown](http://github.github.com/github-flavored-markdown/): - - ```python - >>> from pandas import DataFrame - >>> df = DataFrame(...) - ... - ``` - -2. Include the full version string of *pandas* and its dependencies. In versions of *pandas* after 0.12 you can use a built in function: - - >>> from pandas.util.print_versions import show_versions - >>> show_versions() - - and in *pandas* 0.13.1 onwards: - - >>> pd.show_versions() - -3. Explain why the current behavior is wrong/not desired and what you expect instead. - -The issue will then show up to the *pandas* community and be open to comments/ideas from others. - -Working with the code ---------------------- - -Now that you have an issue you want to fix, enhancement to add, or documentation to improve, you need to learn how to work with GitHub and the *pandas* code base. - -### Version control, Git, and GitHub - -To the new user, working with Git is one of the more daunting aspects of contributing to *pandas*. It can very quickly become overwhelming, but sticking to the guidelines below will help keep the process straightforward and mostly trouble free. As always, if you are having difficulties please feel free to ask for help. - -The code is hosted on [GitHub](https://www.github.com/pandas-dev/pandas). To contribute you will need to sign up for a [free GitHub account](https://github.com/signup/free). We use [Git](http://git-scm.com/) for version control to allow many people to work together on the project. - -Some great resources for learning Git: - -- the [GitHub help pages](http://help.github.com/). -- the [NumPy's documentation](http://docs.scipy.org/doc/numpy/dev/index.html). -- Matthew Brett's [Pydagogue](http://matthew-brett.github.com/pydagogue/). - -### Getting started with Git - -[GitHub has instructions](http://help.github.com/set-up-git-redirect) for installing git, setting up your SSH key, and configuring git. All these steps need to be completed before you can work seamlessly between your local repository and GitHub. - -### Forking - -You will need your own fork to work on the code. Go to the [pandas project page](https://github.com/pandas-dev/pandas) and hit the `Fork` button. You will want to clone your fork to your machine: - - git clone git@github.com:your-user-name/pandas.git pandas-yourname - cd pandas-yourname - git remote add upstream git://github.com/pandas-dev/pandas.git - -This creates the directory pandas-yourname and connects your repository to the upstream (main project) *pandas* repository. - -The testing suite will run automatically on Travis-CI once your pull request is submitted. However, if you wish to run the test suite on a branch prior to submitting the pull request, then Travis-CI needs to be hooked up to your GitHub repository. Instructions for doing so are [here](http://about.travis-ci.org/docs/user/getting-started/). - -### Creating a branch - -You want your master branch to reflect only production-ready code, so create a feature branch for making your changes. For example: - - git branch shiny-new-feature - git checkout shiny-new-feature - -The above can be simplified to: - - git checkout -b shiny-new-feature - -This changes your working directory to the shiny-new-feature branch. Keep any changes in this branch specific to one bug or feature so it is clear what the branch brings to *pandas*. You can have many shiny-new-features and switch in between them using the git checkout command. - -To update this branch, you need to retrieve the changes from the master branch: - - git fetch upstream - git rebase upstream/master - -This will replay your commits on top of the lastest pandas git master. If this leads to merge conflicts, you must resolve these before submitting your pull request. If you have uncommitted changes, you will need to `stash` them prior to updating. This will effectively store your changes and they can be reapplied after updating. - -### Creating a development environment - -An easy way to create a *pandas* development environment is as follows. - -- Install either Anaconda <install.anaconda> or miniconda <install.miniconda> -- Make sure that you have cloned the repository <contributing.forking> -- `cd` to the *pandas* source directory - -Tell conda to create a new environment, named `pandas_dev`, or any other name you would like for this environment, by running: - - conda create -n pandas_dev --file ci/requirements_dev.txt - -For a python 3 environment: - - conda create -n pandas_dev python=3 --file ci/requirements_dev.txt - -If you are on Windows, then you will also need to install the compiler linkages: - - conda install -n pandas_dev libpython - -This will create the new environment, and not touch any of your existing environments, nor any existing python installation. It will install all of the basic dependencies of *pandas*, as well as the development and testing tools. If you would like to install other dependencies, you can install them as follows: - - conda install -n pandas_dev -c pandas pytables scipy - -To install *all* pandas dependencies you can do the following: - - conda install -n pandas_dev -c pandas --file ci/requirements_all.txt - -To work in this environment, Windows users should `activate` it as follows: - - activate pandas_dev - -Mac OSX and Linux users should use: - - source activate pandas_dev - -You will then see a confirmation message to indicate you are in the new development environment. - -To view your environments: - - conda info -e - -To return to you home root environment: - - deactivate - -See the full conda docs [here](http://conda.pydata.org/docs). - -At this point you can easily do an *in-place* install, as detailed in the next section. - -### Making changes - -Before making your code changes, it is often necessary to build the code that was just checked out. There are two primary methods of doing this. - -1. The best way to develop *pandas* is to build the C extensions in-place by running: - - python setup.py build_ext --inplace - - If you startup the Python interpreter in the *pandas* source directory you will call the built C extensions - -2. Another very common option is to do a `develop` install of *pandas*: - - python setup.py develop - - This makes a symbolic link that tells the Python interpreter to import *pandas* from your development directory. Thus, you can always be using the development version on your system without being inside the clone directory. - -Contributing to the documentation ---------------------------------- - -If you're not the developer type, contributing to the documentation is still of huge value. You don't even have to be an expert on *pandas* to do so! Something as simple as rewriting small passages for clarity as you reference the docs is a simple but effective way to contribute. The next person to read that passage will be in your debt! - -In fact, there are sections of the docs that are worse off after being written by experts. If something in the docs doesn't make sense to you, updating the relevant section after you figure it out is a simple way to ensure it will help the next person. - -### About the *pandas* documentation - -The documentation is written in **reStructuredText**, which is almost like writing in plain English, and built using [Sphinx](http://sphinx.pocoo.org/). The Sphinx Documentation has an excellent [introduction to reST](http://sphinx.pocoo.org/rest.html). Review the Sphinx docs to perform more complex changes to the documentation as well. - -Some other important things to know about the docs: - -- The *pandas* documentation consists of two parts: the docstrings in the code itself and the docs in this folder `pandas/doc/`. - - The docstrings provide a clear explanation of the usage of the individual functions, while the documentation in this folder consists of tutorial-like overviews per topic together with some other information (what's new, installation, etc). - -- The docstrings follow the **Numpy Docstring Standard**, which is used widely in the Scientific Python community. This standard specifies the format of the different sections of the docstring. See [this document](https://github.com/numpy/numpy/blob/master/doc/HOWTO_DOCUMENT.rst.txt) for a detailed explanation, or look at some of the existing functions to extend it in a similar manner. -- The tutorials make heavy use of the [ipython directive](http://matplotlib.org/sampledoc/ipython_directive.html) sphinx extension. This directive lets you put code in the documentation which will be run during the doc build. For example: - - .. ipython:: python - - x = 2 - x**3 - - will be rendered as: - - In [1]: x = 2 - - In [2]: x**3 - Out[2]: 8 - - Almost all code examples in the docs are run (and the output saved) during the doc build. This approach means that code examples will always be up to date, but it does make the doc building a bit more complex. - -> **note** -> -> The `.rst` files are used to automatically generate Markdown and HTML versions of the docs. For this reason, please do not edit `CONTRIBUTING.md` directly, but instead make any changes to `doc/source/contributing.rst`. Then, to generate `CONTRIBUTING.md`, use [pandoc](http://johnmacfarlane.net/pandoc/) with the following command: -> -> pandoc doc/source/contributing.rst -t markdown_github > CONTRIBUTING.md - -The utility script `scripts/api_rst_coverage.py` can be used to compare the list of methods documented in `doc/source/api.rst` (which is used to generate the [API Reference](http://pandas.pydata.org/pandas-docs/stable/api.html) page) and the actual public methods. This will identify methods documented in in `doc/source/api.rst` that are not actually class methods, and existing methods that are not documented in `doc/source/api.rst`. - -### How to build the *pandas* documentation - -#### Requirements - -To build the *pandas* docs there are some extra requirements: you will need to have `sphinx` and `ipython` installed. [numpydoc](https://github.com/numpy/numpydoc) is used to parse the docstrings that follow the Numpy Docstring Standard (see above), but you don't need to install this because a local copy of numpydoc is included in the *pandas* source code. - -It is easiest to create a development environment <contributing.dev\_env>, then install: - - conda install -n pandas_dev sphinx ipython - -Furthermore, it is recommended to have all [optional dependencies](http://pandas.pydata.org/pandas-docs/dev/install.html#optional-dependencies) installed. This is not strictly necessary, but be aware that you will see some error messages when building the docs. This happens because all the code in the documentation is executed during the doc build, and so code examples using optional dependencies will generate errors. Run `pd.show_versions()` to get an overview of the installed version of all dependencies. - -> **warning** -> -> You need to have `sphinx` version 1.2.2 or newer, but older than version 1.3. Versions before 1.1.3 should also work. - -#### Building the documentation - -So how do you build the docs? Navigate to your local `pandas/doc/` directory in the console and run: - - python make.py html - -Then you can find the HTML output in the folder `pandas/doc/build/html/`. - -The first time you build the docs, it will take quite a while because it has to run all the code examples and build all the generated docstring pages. In subsequent evocations, sphinx will try to only build the pages that have been modified. - -If you want to do a full clean build, do: - - python make.py clean - python make.py build - -Starting with *pandas* 0.13.1 you can tell `make.py` to compile only a single section of the docs, greatly reducing the turn-around time for checking your changes. You will be prompted to delete `.rst` files that aren't required. This is okay because the prior versions of these files can be checked out from git. However, you must make sure not to commit the file deletions to your Git repository! - - #omit autosummary and API section - python make.py clean - python make.py --no-api - - # compile the docs with only a single - # section, that which is in indexing.rst - python make.py clean - python make.py --single indexing - -For comparison, a full documentation build may take 10 minutes, a `-no-api` build may take 3 minutes and a single section may take 15 seconds. Subsequent builds, which only process portions you have changed, will be faster. Open the following file in a web browser to see the full documentation you just built: - - pandas/docs/build/html/index.html +Whether you are a novice or experienced software developer, all contributions and suggestions are welcome! -And you'll have the satisfaction of seeing your new and improved documentation! +Our main contribution docs can be found [here](https://github.com/pandas-dev/pandas/blob/master/doc/source/contributing.rst), but if you do not want to read it in its entirety, we will summarize the main ways in which you can contribute and point to relevant places in the docs for further information. -#### Building master branch documentation - -When pull requests are merged into the *pandas* `master` branch, the main parts of the documentation are also built by Travis-CI. These docs are then hosted [here](http://pandas-docs.github.io/pandas-docs-travis). - -Contributing to the code base ------------------------------ - -### Code standards - -*pandas* uses the [PEP8](http://www.python.org/dev/peps/pep-0008/) standard. There are several tools to ensure you abide by this standard. - -We've written a tool to check that your commits are PEP8 great, [pip install pep8radius](https://github.com/hayd/pep8radius). Look at PEP8 fixes in your branch vs master with: - - pep8radius master --diff - -and make these changes with: - - pep8radius master --diff --in-place - -Alternatively, use the [flake8](http://pypi.python.org/pypi/flake8) tool for checking the style of your code. Additional standards are outlined on the [code style wiki page](https://github.com/pandas-dev/pandas/wiki/Code-Style-and-Conventions). - -Please try to maintain backward compatibility. *pandas* has lots of users with lots of existing code, so don't break it if at all possible. If you think breakage is required, clearly state why as part of the pull request. Also, be careful when changing method signatures and add deprecation warnings where needed. - -### Test-driven development/code writing - -*pandas* is serious about testing and strongly encourages contributors to embrace [test-driven development (TDD)](http://en.wikipedia.org/wiki/Test-driven_development). This development process "relies on the repetition of a very short development cycle: first the developer writes an (initially failing) automated test case that defines a desired improvement or new function, then produces the minimum amount of code to pass that test." So, before actually writing any code, you should write your tests. Often the test can be taken from the original GitHub issue. However, it is always worth considering additional use cases and writing corresponding tests. - -Adding tests is one of the most common requests after code is pushed to *pandas*. Therefore, it is worth getting in the habit of writing tests ahead of time so this is never an issue. - -Like many packages, *pandas* uses the [Nose testing system](https://nose.readthedocs.io/en/latest/index.html) and the convenient extensions in [numpy.testing](http://docs.scipy.org/doc/numpy/reference/routines.testing.html). - -#### Writing tests - -All tests should go into the `tests` subdirectory of the specific package. This folder contains many current examples of tests, and we suggest looking to these for inspiration. If your test requires working with files or network connectivity, there is more information on the [testing page](https://github.com/pandas-dev/pandas/wiki/Testing) of the wiki. - -The `pandas.util.testing` module has many special `assert` functions that make it easier to make statements about whether Series or DataFrame objects are equivalent. The easiest way to verify that your code is correct is to explicitly construct the result you expect, then compare the actual result to the expected correct result: - - def test_pivot(self): - data = { - 'index' : ['A', 'B', 'C', 'C', 'B', 'A'], - 'columns' : ['One', 'One', 'One', 'Two', 'Two', 'Two'], - 'values' : [1., 2., 3., 3., 2., 1.] - } - - frame = DataFrame(data) - pivoted = frame.pivot(index='index', columns='columns', values='values') - - expected = DataFrame({ - 'One' : {'A' : 1., 'B' : 2., 'C' : 3.}, - 'Two' : {'A' : 1., 'B' : 2., 'C' : 3.} - }) - - assert_frame_equal(pivoted, expected) - -#### Running the test suite - -The tests can then be run directly inside your Git clone (without having to install *pandas*) by typing: - - nosetests pandas - -The tests suite is exhaustive and takes around 20 minutes to run. Often it is worth running only a subset of tests first around your changes before running the entire suite. This is done using one of the following constructs: - - nosetests pandas/tests/[test-module].py - nosetests pandas/tests/[test-module].py:[TestClass] - nosetests pandas/tests/[test-module].py:[TestClass].[test_method] - -#### Running the performance test suite - -Performance matters and it is worth considering whether your code has introduced performance regressions. *pandas* is in the process of migrating to the [asv library](https://github.com/spacetelescope/asv) to enable easy monitoring of the performance of critical *pandas* operations. These benchmarks are all found in the `pandas/asv_bench` directory. asv supports both python2 and python3. - -> **note** -> -> The asv benchmark suite was translated from the previous framework, vbench, so many stylistic issues are likely a result of automated transformation of the code. - -To use asv you will need either `conda` or `virtualenv`. For more details please check the [asv installation webpage](https://asv.readthedocs.io/en/latest/installing.html). - -To install asv: - - pip install git+https://github.com/spacetelescope/asv - -If you need to run a benchmark, change your directory to `/asv_bench/` and run the following if you have been developing on `master`: - - asv continuous master - -If you are working on another branch, either of the following can be used: - - asv continuous master HEAD - asv continuous master your_branch - -This will check out the master revision and run the suite on both master and your commit. Running the full test suite can take up to one hour and use up to 3GB of RAM. Usually it is sufficient to paste only a subset of the results into the pull request to show that the committed changes do not cause unexpected performance regressions. - -You can run specific benchmarks using the `-b` flag, which takes a regular expression. For example, this will only run tests from a `pandas/asv_bench/benchmarks/groupby.py` file: - - asv continuous master -b groupby - -If you want to only run a specific group of tests from a file, you can do it using `.` as a separator. For example: - - asv continuous master -b groupby.groupby_agg_builtins1 - -will only run a `groupby_agg_builtins1` test defined in a `groupby` file. - -It can also be useful to run tests in your current environment. You can simply do it by: - - asv dev - -This command is equivalent to: - - asv run --quick --show-stderr --python=same - -This will launch every test only once, display stderr from the benchmarks, and use your local `python` that comes from your `$PATH`. - -Information on how to write a benchmark can be found in the [asv documentation](https://asv.readthedocs.io/en/latest/writing_benchmarks.html). - -#### Running the vbench performance test suite (phasing out) - -Historically, *pandas* used [vbench library](https://github.com/pydata/vbench) to enable easy monitoring of the performance of critical *pandas* operations. These benchmarks are all found in the `pandas/vb_suite` directory. vbench currently only works on python2. - -To install vbench: - - pip install git+https://github.com/pydata/vbench - -Vbench also requires `sqlalchemy`, `gitpython`, and `psutil`, which can all be installed using pip. If you need to run a benchmark, change your directory to the *pandas* root and run: - - ./test_perf.sh -b master -t HEAD - -This will check out the master revision and run the suite on both master and your commit. Running the full test suite can take up to one hour and use up to 3GB of RAM. Usually it is sufficient to paste a subset of the results into the Pull Request to show that the committed changes do not cause unexpected performance regressions. - -You can run specific benchmarks using the `-r` flag, which takes a regular expression. - -See the [performance testing wiki](https://github.com/pandas-dev/pandas/wiki/Performance-Testing) for information on how to write a benchmark. - -### Documenting your code - -Changes should be reflected in the release notes located in `doc/source/whatsnew/vx.y.z.txt`. This file contains an ongoing change log for each release. Add an entry to this file to document your fix, enhancement or (unavoidable) breaking change. Make sure to include the GitHub issue number when adding your entry (using `` :issue:`1234` `` where 1234 is the issue/pull request number). - -If your code is an enhancement, it is most likely necessary to add usage examples to the existing documentation. This can be done following the section regarding documentation above <contributing.documentation>. Further, to let users know when this feature was added, the `versionadded` directive is used. The sphinx syntax for that is: - -``` sourceCode -.. versionadded:: 0.17.0 -``` - -This will put the text *New in version 0.17.0* wherever you put the sphinx directive. This should also be put in the docstring when adding a new function or method ([example](https://github.com/pandas-dev/pandas/blob/v0.16.2/pandas/core/generic.py#L1959)) or a new keyword argument ([example](https://github.com/pandas-dev/pandas/blob/v0.16.2/pandas/core/frame.py#L1171)). - -Contributing your changes to *pandas* -------------------------------------- - -### Committing your code - -Keep style fixes to a separate commit to make your pull request more readable. - -Once you've made changes, you can see them by typing: - - git status - -If you have created a new file, it is not being tracked by git. Add it by typing: - - git add path/to/file-to-be-added.py - -Doing 'git status' again should give something like: - - # On branch shiny-new-feature - # - # modified: /relative/path/to/file-you-added.py - # - -Finally, commit your changes to your local repository with an explanatory message. *Pandas* uses a convention for commit message prefixes and layout. Here are some common prefixes along with general guidelines for when to use them: - -> - ENH: Enhancement, new functionality -> - BUG: Bug fix -> - DOC: Additions/updates to documentation -> - TST: Additions/updates to tests -> - BLD: Updates to the build process/scripts -> - PERF: Performance improvement -> - CLN: Code cleanup - -The following defines how a commit message should be structured. Please reference the relevant GitHub issues in your commit message using GH1234 or \#1234. Either style is fine, but the former is generally preferred: - -> - a subject line with < 80 chars. -> - One blank line. -> - Optionally, a commit message body. - -Now you can commit your changes in your local repository: - - git commit -m - -### Combining commits - -If you have multiple commits, you may want to combine them into one commit, often referred to as "squashing" or "rebasing". This is a common request by package maintainers when submitting a pull request as it maintains a more compact commit history. To rebase your commits: - - git rebase -i HEAD~# - -Where \# is the number of commits you want to combine. Then you can pick the relevant commit message and discard others. - -To squash to the master branch do: - - git rebase -i master - -Use the `s` option on a commit to `squash`, meaning to keep the commit messages, or `f` to `fixup`, meaning to merge the commit messages. - -Then you will need to push the branch (see below) forcefully to replace the current commits with the new ones: - - git push origin shiny-new-feature -f - -### Pushing your changes - -When you want your changes to appear publicly on your GitHub page, push your forked feature branch's commits: - - git push origin shiny-new-feature - -Here `origin` is the default name given to your remote repository on GitHub. You can see the remote repositories: - - git remote -v - -If you added the upstream repository as described above you will see something like: - - origin git@github.com:yourname/pandas.git (fetch) - origin git@github.com:yourname/pandas.git (push) - upstream git://github.com/pandas-dev/pandas.git (fetch) - upstream git://github.com/pandas-dev/pandas.git (push) - -Now your code is on GitHub, but it is not yet a part of the *pandas* project. For that to happen, a pull request needs to be submitted on GitHub. - -### Review your code - -When you're ready to ask for a code review, file a pull request. Before you do, once again make sure that you have followed all the guidelines outlined in this document regarding code style, tests, performance tests, and documentation. You should also double check your branch changes against the branch it was based on: - -1. Navigate to your repository on GitHub -- -2. Click on `Branches` -3. Click on the `Compare` button for your feature branch -4. Select the `base` and `compare` branches, if necessary. This will be `master` and `shiny-new-feature`, respectively. - -### Finally, make the pull request - -If everything looks good, you are ready to make a pull request. A pull request is how code from a local repository becomes available to the GitHub community and can be looked at and eventually merged into the master version. This pull request and its associated changes will eventually be committed to the master branch and available in the next release. To submit a pull request: - -1. Navigate to your repository on GitHub -2. Click on the `Pull Request` button -3. You can then click on `Commits` and `Files Changed` to make sure everything looks okay one last time -4. Write a description of your changes in the `Preview Discussion` tab -5. Click `Send Pull Request`. - -This request then goes to the repository maintainers, and they will review the code. If you need to make more changes, you can make them in your branch, push them to GitHub, and the pull request will be automatically updated. Pushing them to GitHub again is done by: - - git push -f origin shiny-new-feature - -This will automatically update your pull request with the latest code and restart the Travis-CI tests. - -### Delete your merged branch (optional) - -Once your feature branch is accepted into upstream, you'll probably want to get rid of the branch. First, merge upstream master into your branch so git knows it is safe to delete your branch: - - git fetch upstream - git checkout master - git merge upstream/master +Getting Started +--------------- +If you are looking to contribute to the *pandas* codebase, the best place to start is the [GitHub "issues" tab](https://github.com/pandas-dev/pandas/issues). This is also a great place for filing bug reports and making suggestions for ways in which we can improve the code and documentation. -Then you can just do: +If you have additional questions, feel free to ask them on the [mailing list](https://groups.google.com/forum/?fromgroups#!forum/pydata) or on [Gitter](https://gitter.im/pydata/pandas). Further information can also be found in our [Getting Started](https://github.com/pandas-dev/pandas/blob/master/doc/source/contributing.rst#where-to-start) section of our main contribution doc. - git branch -d shiny-new-feature +Filing Issues +------------- +If you notice a bug in the code or in docs or have suggestions for how we can improve either, feel free to create an issue on the [GitHub "issues" tab](https://github.com/pandas-dev/pandas/issues) using [GitHub's "issue" form](https://github.com/pandas-dev/pandas/issues/new). The form contains some questions that will help us best address your issue. For more information regarding how to file issues against *pandas*, please refer to the [Bug reports and enhancement requests](https://github.com/pandas-dev/pandas/blob/master/doc/source/contributing.rst#bug-reports-and-enhancement-requests) section of our main contribution doc. -Make sure you use a lower-case `-d`, or else git won't warn you if your feature branch has not actually been merged. +Contributing to the Codebase +---------------------------- +The code is hosted on [GitHub](https://www.github.com/pandas-dev/pandas), so you will need to use [Git](http://git-scm.com/) to clone the project and make changes to the codebase. Once you have obtained a copy of the code, you should create a development environment that is separate from your existing Python environment so that you can make and test changes without compromising your own work environment. For more information, please refer to our [Working with the code](https://github.com/pandas-dev/pandas/blob/master/doc/source/contributing.rst#working-with-the-code) section of our main contribution docs. -The branch will still exist on GitHub, so to delete it there do: +Before submitting your changes for review, make sure to check that your changes do not break any tests. You can find more information about our test suites can be found [here](https://github.com/pandas-dev/pandas/blob/master/doc/source/contributing.rst#test-driven-development-code-writing). We also have guidelines regarding coding style that will be enforced during testing. Details about coding style can be found [here](https://github.com/pandas-dev/pandas/blob/master/doc/source/contributing.rst#code-standards). - git push origin --delete shiny-new-feature +Once your changes are ready to be submitted, make sure to push your changes to GitHub before creating a pull request. Details about how to do that can be found in the [Contributing your changes to pandas](https://github.com/pandas-dev/pandas/blob/master/doc/source/contributing.rst#contributing-your-changes-to-pandas) section of our main contribution docs. We will review your changes, and you will most likely be asked to make additional changes before it is finally ready to merge. However, once it's ready, we will merge it, and you will have successfully contributed to the codebase! From 4136c0c75359705a565a414e60b94dfdfb571a6d Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 22 Feb 2017 10:01:48 -0500 Subject: [PATCH 088/353] TST: remove 4 builds from travis that are on circleci (#15465) --- .travis.yml | 109 ---------------------------------------------------- circle.yml | 5 ++- 2 files changed, 4 insertions(+), 110 deletions(-) diff --git a/.travis.yml b/.travis.yml index bb96ab210c088..97bf881f3b6fc 100644 --- a/.travis.yml +++ b/.travis.yml @@ -91,21 +91,6 @@ matrix: packages: - libatlas-base-dev - gfortran -# In allow_failures - - python: 2.7 - env: - - PYTHON_VERSION=2.7 - - JOB_NAME: "27_nslow_nnet_COMPAT" - - TEST_ARGS="--skip-slow --skip-network" - - LOCALE_OVERRIDE="it_IT.UTF-8" - - INSTALL_TEST=true - - JOB_TAG=_COMPAT - - CACHE_NAME="27_nslow_nnet_COMPAT" - - USE_CACHE=true - addons: - apt: - packages: - - language-pack-it # In allow_failures - python: 2.7 env: @@ -127,37 +112,6 @@ matrix: - BUILD_TEST=true - CACHE_NAME="27_build_test_conda" - USE_CACHE=true -# In allow_failures - - python: 3.4 - env: - - PYTHON_VERSION=3.4 - - JOB_NAME: "34_nslow" - - LOCALE_OVERRIDE="zh_CN.UTF-8" - - TEST_ARGS="--skip-slow" - - FULL_DEPS=true - - CLIPBOARD=xsel - - CACHE_NAME="34_nslow" - - USE_CACHE=true - addons: - apt: - packages: - - xsel - - language-pack-zh-hans -# In allow_failures - - python: 3.4 - env: - - PYTHON_VERSION=3.4 - - JOB_NAME: "34_slow" - - JOB_TAG=_SLOW - - TEST_ARGS="--only-slow --skip-network" - - FULL_DEPS=true - - CLIPBOARD=xsel - - CACHE_NAME="34_slow" - - USE_CACHE=true - addons: - apt: - packages: - - xsel # In allow_failures - python: 3.5 env: @@ -173,16 +127,6 @@ matrix: packages: - libatlas-base-dev - gfortran -# In allow_failures - - python: 3.5 - env: - - PYTHON_VERSION=3.5 - - JOB_NAME: "35_ascii" - - JOB_TAG=_ASCII - - TEST_ARGS="--skip-slow --skip-network" - - LOCALE_OVERRIDE="C" - - CACHE_NAME="35_ascii" - - USE_CACHE=true # In allow_failures - python: 3.5 env: @@ -203,20 +147,6 @@ matrix: - FULL_DEPS=true - CACHE_NAME="27_slow" - USE_CACHE=true - - python: 3.4 - env: - - PYTHON_VERSION=3.4 - - JOB_NAME: "34_slow" - - JOB_TAG=_SLOW - - TEST_ARGS="--only-slow --skip-network" - - FULL_DEPS=true - - CLIPBOARD=xsel - - CACHE_NAME="34_slow" - - USE_CACHE=true - addons: - apt: - packages: - - xsel - python: 2.7 env: - PYTHON_VERSION=2.7 @@ -227,21 +157,6 @@ matrix: - BUILD_TEST=true - CACHE_NAME="27_build_test_conda" - USE_CACHE=true - - python: 3.4 - env: - - PYTHON_VERSION=3.4 - - JOB_NAME: "34_nslow" - - LOCALE_OVERRIDE="zh_CN.UTF-8" - - TEST_ARGS="--skip-slow" - - FULL_DEPS=true - - CLIPBOARD=xsel - - CACHE_NAME="34_nslow" - - USE_CACHE=true - addons: - apt: - packages: - - xsel - - language-pack-zh-hans - python: 3.5 env: - PYTHON_VERSION=3.5 @@ -256,29 +171,6 @@ matrix: packages: - libatlas-base-dev - gfortran - - python: 2.7 - env: - - PYTHON_VERSION=2.7 - - JOB_NAME: "27_nslow_nnet_COMPAT" - - TEST_ARGS="--skip-slow --skip-network" - - LOCALE_OVERRIDE="it_IT.UTF-8" - - INSTALL_TEST=true - - JOB_TAG=_COMPAT - - CACHE_NAME="27_nslow_nnet_COMPAT" - - USE_CACHE=true - addons: - apt: - packages: - - language-pack-it - - python: 3.5 - env: - - PYTHON_VERSION=3.5 - - JOB_NAME: "35_ascii" - - JOB_TAG=_ASCII - - TEST_ARGS="--skip-slow --skip-network" - - LOCALE_OVERRIDE="C" - - CACHE_NAME="35_ascii" - - USE_CACHE=true - python: 3.5 env: - PYTHON_VERSION=3.5 @@ -299,7 +191,6 @@ before_install: - pwd - uname -a - python -V -# git info & get tags - git --version - git tag - ci/before_install_travis.sh diff --git a/circle.yml b/circle.yml index 97136d368ae6f..046af6e9e1389 100644 --- a/circle.yml +++ b/circle.yml @@ -3,16 +3,18 @@ machine: # these are globally set MINICONDA_DIR: /home/ubuntu/miniconda3 + database: override: - ./ci/install_db_circle.sh + checkout: post: # since circleci does a shallow fetch # we need to populate our tags - git fetch --depth=1000 - - git fetch --tags + dependencies: override: @@ -29,6 +31,7 @@ dependencies: esac - ./ci/show_circle.sh + test: override: - case $CIRCLE_NODE_INDEX in 0) ./ci/run_circle.sh --skip-slow --skip-network ;; 1) ./ci/run_circle.sh --only-slow --skip-network ;; 2) ./ci/run_circle.sh --skip-slow --skip-network ;; 3) ./ci/run_circle.sh --skip-slow --skip-network ;; esac: From f9d774263af3e0cafbdec5ff82d086ff97e41bd6 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 22 Feb 2017 10:40:47 -0500 Subject: [PATCH 089/353] update README.md for badges (circleci and fix anaconda cloud pointer) --- README.md | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 4293d7294d5e0..195b76f64b37f 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ - + @@ -30,6 +30,15 @@ + + + + + From b94186d4c58ee055656a84f55618be537db0095a Mon Sep 17 00:00:00 2001 From: Peter Csizsek Date: Thu, 23 Feb 2017 08:23:11 -0500 Subject: [PATCH 097/353] BUG: The roll_quantile function now throws an exception instead of causing a segfault when quantile is out of range closes #15463 Author: Peter Csizsek Closes #15476 from csizsek/fix-rolling-quantile-segfault and squashes the following commits: e31e5be [Peter Csizsek] Correctly catching exception in the test for Rolling.quantile. 4eea34a [Peter Csizsek] Refactored and moved exception throwing test to a new function for Rolling.quantile(). 8b1e020 [Peter Csizsek] Added a note about the Rolling.quantile bug fix to the changelog. f39b122 [Peter Csizsek] Added a new test case to roll_quantile_test to trigger a TypeError when called with a string. f736ca2 [Peter Csizsek] The roll_quantile function in window.pyx now raises a ValueError when the quantile value is not in [0.0, 1.0] --- doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/tests/test_window.py | 14 +++++++++++++- pandas/window.pyx | 7 +++++-- 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index e65276fe51fe8..fa24c973a7549 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -539,7 +539,7 @@ Bug Fixes - Bug in using ``__deepcopy__`` on empty NDFrame objects (:issue:`15370`) - Bug in ``DataFrame.loc`` with indexing a ``MultiIndex`` with a ``Series`` indexer (:issue:`14730`, :issue:`15424`) - Bug in ``DataFrame.loc`` with indexing a ``MultiIndex`` with a numpy array (:issue:`15434`) - +- Bug in ``Rolling.quantile`` function that caused a segmentation fault when called with a quantile value outside of the range [0, 1] (:issue:`15463`) - Bug in the display of ``.info()`` where a qualifier (+) would always be displayed with a ``MultiIndex`` that contains only non-strings (:issue:`15245`) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 452e8999ab13f..3f2973a9834ca 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -1063,7 +1063,7 @@ def test_rolling_max(self): window=3, min_periods=5) def test_rolling_quantile(self): - qs = [.1, .5, .9] + qs = [0.0, .1, .5, .9, 1.0] def scoreatpercentile(a, per): values = np.sort(a, axis=0) @@ -1084,6 +1084,18 @@ def alt(x): self._check_moment_func(f, alt, name='quantile', quantile=q) + def test_rolling_quantile_param(self): + ser = Series([0.0, .1, .5, .9, 1.0]) + + with self.assertRaises(ValueError): + ser.rolling(3).quantile(-0.1) + + with self.assertRaises(ValueError): + ser.rolling(3).quantile(10.0) + + with self.assertRaises(TypeError): + ser.rolling(3).quantile('foo') + def test_rolling_apply(self): # suppress warnings about empty slices, as we are deliberately testing # with a 0-length Series diff --git a/pandas/window.pyx b/pandas/window.pyx index 8235d68e2a88b..005d42c9f68be 100644 --- a/pandas/window.pyx +++ b/pandas/window.pyx @@ -134,8 +134,8 @@ cdef class WindowIndexer: bint is_variable def get_data(self): - return (self.start, self.end, self.N, - self.win, self.minp, + return (self.start, self.end, self.N, + self.win, self.minp, self.is_variable) @@ -1285,6 +1285,9 @@ def roll_quantile(ndarray[float64_t, cast=True] input, int64_t win, ndarray[int64_t] start, end ndarray[double_t] output + if quantile < 0.0 or quantile > 1.0: + raise ValueError("quantile value {0} not in [0, 1]".format(quantile)) + # we use the Fixed/Variable Indexer here as the # actual skiplist ops outweigh any window computation costs start, end, N, win, minp, is_variable = get_window_indexer( From 2819478d3e199e8760684b30642fe41bee547173 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 24 Feb 2017 07:05:11 -0500 Subject: [PATCH 098/353] TST: add pytest to asv conf --- asv_bench/asv.conf.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/asv_bench/asv.conf.json b/asv_bench/asv.conf.json index 155deb5bdbd1f..4fc6f9f634426 100644 --- a/asv_bench/asv.conf.json +++ b/asv_bench/asv.conf.json @@ -50,7 +50,8 @@ "openpyxl": [], "xlsxwriter": [], "xlrd": [], - "xlwt": [] + "xlwt": [], + "pytest": [], }, // Combinations of libraries/python versions can be excluded/included From 81c57e20da278494dfebc2f1043f5ff361a234f3 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 24 Feb 2017 07:13:46 -0500 Subject: [PATCH 099/353] CLN: split off frozen (immutable) data structures into pandas/indexes/frozen.py should make it a bit easier to work with these; and no reason to be in pandas/core/base.py Author: Jeff Reback Closes #15477 from jreback/frozen and squashes the following commits: 2a64a4f [Jeff Reback] CLN: split off frozen (immutable) data structures into pandas/indexes/frozen.py --- pandas/compat/pickle_compat.py | 34 +++++++- pandas/core/base.py | 105 ----------------------- pandas/indexes/base.py | 13 +-- pandas/indexes/frozen.py | 126 ++++++++++++++++++++++++++++ pandas/indexes/multi.py | 6 +- pandas/tests/indexes/test_frozen.py | 68 +++++++++++++++ pandas/tests/test_base.py | 68 +-------------- 7 files changed, 231 insertions(+), 189 deletions(-) create mode 100644 pandas/indexes/frozen.py create mode 100644 pandas/tests/indexes/test_frozen.py diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index 1cdf8afd563c6..240baa848adbc 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -52,12 +52,40 @@ def load_reduce(self): stack[-1] = value + +# if classes are moved, provide compat here +_class_locations_map = { + + # 15477 + ('pandas.core.base', 'FrozenNDArray'): ('pandas.indexes.frozen', 'FrozenNDArray'), + ('pandas.core.base', 'FrozenList'): ('pandas.indexes.frozen', 'FrozenList') + } + + +# our Unpickler sub-class to override methods and some dispatcher +# functions for compat + if compat.PY3: class Unpickler(pkl._Unpickler): - pass + + def find_class(self, module, name): + # override superclass + key = (module, name) + module, name = _class_locations_map.get(key, key) + return super(Unpickler, self).find_class(module, name) + else: + class Unpickler(pkl.Unpickler): - pass + + def find_class(self, module, name): + # override superclass + key = (module, name) + module, name = _class_locations_map.get(key, key) + __import__(module) + mod = sys.modules[module] + klass = getattr(mod, name) + return klass Unpickler.dispatch = copy.copy(Unpickler.dispatch) Unpickler.dispatch[pkl.REDUCE[0]] = load_reduce @@ -76,8 +104,6 @@ def load_newobj(self): self.stack[-1] = obj Unpickler.dispatch[pkl.NEWOBJ[0]] = load_newobj -# py3 compat - def load_newobj_ex(self): kwargs = self.stack.pop() diff --git a/pandas/core/base.py b/pandas/core/base.py index 92ec6bb3d73e6..55149198b0dbf 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -17,7 +17,6 @@ from pandas.util.decorators import (Appender, cache_readonly, deprecate_kwarg, Substitution) from pandas.core.common import AbstractMethodError -from pandas.formats.printing import pprint_thing _shared_docs = dict() _indexops_doc_kwargs = dict(klass='IndexOpsMixin', inplace='', @@ -694,110 +693,6 @@ def _gotitem(self, key, ndim, subset=None): return self -class FrozenList(PandasObject, list): - - """ - Container that doesn't allow setting item *but* - because it's technically non-hashable, will be used - for lookups, appropriately, etc. - """ - # Sidenote: This has to be of type list, otherwise it messes up PyTables - # typechecks - - def __add__(self, other): - if isinstance(other, tuple): - other = list(other) - return self.__class__(super(FrozenList, self).__add__(other)) - - __iadd__ = __add__ - - # Python 2 compat - def __getslice__(self, i, j): - return self.__class__(super(FrozenList, self).__getslice__(i, j)) - - def __getitem__(self, n): - # Python 3 compat - if isinstance(n, slice): - return self.__class__(super(FrozenList, self).__getitem__(n)) - return super(FrozenList, self).__getitem__(n) - - def __radd__(self, other): - if isinstance(other, tuple): - other = list(other) - return self.__class__(other + list(self)) - - def __eq__(self, other): - if isinstance(other, (tuple, FrozenList)): - other = list(other) - return super(FrozenList, self).__eq__(other) - - __req__ = __eq__ - - def __mul__(self, other): - return self.__class__(super(FrozenList, self).__mul__(other)) - - __imul__ = __mul__ - - def __reduce__(self): - return self.__class__, (list(self),) - - def __hash__(self): - return hash(tuple(self)) - - def _disabled(self, *args, **kwargs): - """This method will not function because object is immutable.""" - raise TypeError("'%s' does not support mutable operations." % - self.__class__.__name__) - - def __unicode__(self): - return pprint_thing(self, quote_strings=True, - escape_chars=('\t', '\r', '\n')) - - def __repr__(self): - return "%s(%s)" % (self.__class__.__name__, - str(self)) - - __setitem__ = __setslice__ = __delitem__ = __delslice__ = _disabled - pop = append = extend = remove = sort = insert = _disabled - - -class FrozenNDArray(PandasObject, np.ndarray): - - # no __array_finalize__ for now because no metadata - def __new__(cls, data, dtype=None, copy=False): - if copy is None: - copy = not isinstance(data, FrozenNDArray) - res = np.array(data, dtype=dtype, copy=copy).view(cls) - return res - - def _disabled(self, *args, **kwargs): - """This method will not function because object is immutable.""" - raise TypeError("'%s' does not support mutable operations." % - self.__class__) - - __setitem__ = __setslice__ = __delitem__ = __delslice__ = _disabled - put = itemset = fill = _disabled - - def _shallow_copy(self): - return self.view() - - def values(self): - """returns *copy* of underlying array""" - arr = self.view(np.ndarray).copy() - return arr - - def __unicode__(self): - """ - Return a string representation for this object. - - Invoked by unicode(df) in py2 only. Yields a Unicode String in both - py2/py3. - """ - prepr = pprint_thing(self, escape_chars=('\t', '\r', '\n'), - quote_strings=True) - return "%s(%s, dtype='%s')" % (type(self).__name__, prepr, self.dtype) - - class IndexOpsMixin(object): """ common ops mixin to support a unified inteface / docs for Series / Index diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index f1f37622b2a74..4837fc0d7438c 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -35,16 +35,15 @@ needs_i8_conversion, is_iterator, is_list_like, is_scalar) -from pandas.types.cast import _coerce_indexer_dtype from pandas.core.common import (is_bool_indexer, _values_from_object, _asarray_tuplesafe) -from pandas.core.base import (PandasObject, FrozenList, FrozenNDArray, - IndexOpsMixin) +from pandas.core.base import PandasObject, IndexOpsMixin import pandas.core.base as base from pandas.util.decorators import (Appender, Substitution, cache_readonly, deprecate, deprecate_kwarg) +from pandas.indexes.frozen import FrozenList import pandas.core.common as com import pandas.types.concat as _concat import pandas.core.missing as missing @@ -3844,14 +3843,6 @@ def _get_na_value(dtype): np.timedelta64: tslib.NaT}.get(dtype, np.nan) -def _ensure_frozen(array_like, categories, copy=False): - array_like = _coerce_indexer_dtype(array_like, categories) - array_like = array_like.view(FrozenNDArray) - if copy: - array_like = array_like.copy() - return array_like - - def _ensure_has_len(seq): """If seq is an iterator, put its values into a list.""" try: diff --git a/pandas/indexes/frozen.py b/pandas/indexes/frozen.py new file mode 100644 index 0000000000000..e043ba64bbad7 --- /dev/null +++ b/pandas/indexes/frozen.py @@ -0,0 +1,126 @@ +""" +frozen (immutable) data structures to support MultiIndexing + +These are used for: + +- .names (FrozenList) +- .levels & .labels (FrozenNDArray) + +""" + +import numpy as np +from pandas.core.base import PandasObject +from pandas.types.cast import _coerce_indexer_dtype +from pandas.formats.printing import pprint_thing + + +class FrozenList(PandasObject, list): + + """ + Container that doesn't allow setting item *but* + because it's technically non-hashable, will be used + for lookups, appropriately, etc. + """ + # Sidenote: This has to be of type list, otherwise it messes up PyTables + # typechecks + + def __add__(self, other): + if isinstance(other, tuple): + other = list(other) + return self.__class__(super(FrozenList, self).__add__(other)) + + __iadd__ = __add__ + + # Python 2 compat + def __getslice__(self, i, j): + return self.__class__(super(FrozenList, self).__getslice__(i, j)) + + def __getitem__(self, n): + # Python 3 compat + if isinstance(n, slice): + return self.__class__(super(FrozenList, self).__getitem__(n)) + return super(FrozenList, self).__getitem__(n) + + def __radd__(self, other): + if isinstance(other, tuple): + other = list(other) + return self.__class__(other + list(self)) + + def __eq__(self, other): + if isinstance(other, (tuple, FrozenList)): + other = list(other) + return super(FrozenList, self).__eq__(other) + + __req__ = __eq__ + + def __mul__(self, other): + return self.__class__(super(FrozenList, self).__mul__(other)) + + __imul__ = __mul__ + + def __reduce__(self): + return self.__class__, (list(self),) + + def __hash__(self): + return hash(tuple(self)) + + def _disabled(self, *args, **kwargs): + """This method will not function because object is immutable.""" + raise TypeError("'%s' does not support mutable operations." % + self.__class__.__name__) + + def __unicode__(self): + return pprint_thing(self, quote_strings=True, + escape_chars=('\t', '\r', '\n')) + + def __repr__(self): + return "%s(%s)" % (self.__class__.__name__, + str(self)) + + __setitem__ = __setslice__ = __delitem__ = __delslice__ = _disabled + pop = append = extend = remove = sort = insert = _disabled + + +class FrozenNDArray(PandasObject, np.ndarray): + + # no __array_finalize__ for now because no metadata + def __new__(cls, data, dtype=None, copy=False): + if copy is None: + copy = not isinstance(data, FrozenNDArray) + res = np.array(data, dtype=dtype, copy=copy).view(cls) + return res + + def _disabled(self, *args, **kwargs): + """This method will not function because object is immutable.""" + raise TypeError("'%s' does not support mutable operations." % + self.__class__) + + __setitem__ = __setslice__ = __delitem__ = __delslice__ = _disabled + put = itemset = fill = _disabled + + def _shallow_copy(self): + return self.view() + + def values(self): + """returns *copy* of underlying array""" + arr = self.view(np.ndarray).copy() + return arr + + def __unicode__(self): + """ + Return a string representation for this object. + + Invoked by unicode(df) in py2 only. Yields a Unicode String in both + py2/py3. + """ + prepr = pprint_thing(self, escape_chars=('\t', '\r', '\n'), + quote_strings=True) + return "%s(%s, dtype='%s')" % (type(self).__name__, prepr, self.dtype) + + +def _ensure_frozen(array_like, categories, copy=False): + array_like = _coerce_indexer_dtype(array_like, categories) + array_like = array_like.view(FrozenNDArray) + if copy: + array_like = array_like.copy() + return array_like diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index 18e1da7303d6d..ec30d2c44efd7 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -28,7 +28,6 @@ UnsortedIndexError) -from pandas.core.base import FrozenList import pandas.core.base as base from pandas.util.decorators import (Appender, cache_readonly, deprecate, deprecate_kwarg) @@ -39,9 +38,10 @@ from pandas.core.config import get_option -from pandas.indexes.base import (Index, _ensure_index, _ensure_frozen, +from pandas.indexes.base import (Index, _ensure_index, _get_na_value, InvalidIndexError, _index_shared_docs) +from pandas.indexes.frozen import FrozenNDArray, FrozenList, _ensure_frozen import pandas.indexes.base as ibase _index_doc_kwargs = dict(ibase._index_doc_kwargs) _index_doc_kwargs.update( @@ -1276,7 +1276,7 @@ def _assert_take_fillable(self, values, indices, allow_fill=True, for new_label in taken: label_values = new_label.values() label_values[mask] = na_value - masked.append(base.FrozenNDArray(label_values)) + masked.append(FrozenNDArray(label_values)) taken = masked else: taken = [lab.take(indices) for lab in self.labels] diff --git a/pandas/tests/indexes/test_frozen.py b/pandas/tests/indexes/test_frozen.py new file mode 100644 index 0000000000000..a82409fbf9513 --- /dev/null +++ b/pandas/tests/indexes/test_frozen.py @@ -0,0 +1,68 @@ +import numpy as np +from pandas.util import testing as tm +from pandas.tests.test_base import CheckImmutable, CheckStringMixin +from pandas.indexes.frozen import FrozenList, FrozenNDArray +from pandas.compat import u + + +class TestFrozenList(CheckImmutable, CheckStringMixin, tm.TestCase): + mutable_methods = ('extend', 'pop', 'remove', 'insert') + unicode_container = FrozenList([u("\u05d0"), u("\u05d1"), "c"]) + + def setUp(self): + self.lst = [1, 2, 3, 4, 5] + self.container = FrozenList(self.lst) + self.klass = FrozenList + + def test_add(self): + result = self.container + (1, 2, 3) + expected = FrozenList(self.lst + [1, 2, 3]) + self.check_result(result, expected) + + result = (1, 2, 3) + self.container + expected = FrozenList([1, 2, 3] + self.lst) + self.check_result(result, expected) + + def test_inplace(self): + q = r = self.container + q += [5] + self.check_result(q, self.lst + [5]) + # other shouldn't be mutated + self.check_result(r, self.lst) + + +class TestFrozenNDArray(CheckImmutable, CheckStringMixin, tm.TestCase): + mutable_methods = ('put', 'itemset', 'fill') + unicode_container = FrozenNDArray([u("\u05d0"), u("\u05d1"), "c"]) + + def setUp(self): + self.lst = [3, 5, 7, -2] + self.container = FrozenNDArray(self.lst) + self.klass = FrozenNDArray + + def test_shallow_copying(self): + original = self.container.copy() + self.assertIsInstance(self.container.view(), FrozenNDArray) + self.assertFalse(isinstance( + self.container.view(np.ndarray), FrozenNDArray)) + self.assertIsNot(self.container.view(), self.container) + self.assert_numpy_array_equal(self.container, original) + # shallow copy should be the same too + self.assertIsInstance(self.container._shallow_copy(), FrozenNDArray) + + # setting should not be allowed + def testit(container): + container[0] = 16 + + self.check_mutable_error(testit, self.container) + + def test_values(self): + original = self.container.view(np.ndarray).copy() + n = original[0] + 15 + vals = self.container.values() + self.assert_numpy_array_equal(original, vals) + self.assertIsNot(original, vals) + vals[0] = n + self.assertIsInstance(self.container, FrozenNDArray) + self.assert_numpy_array_equal(self.container.values(), original) + self.assertEqual(vals[0], n) diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 473f1d81c9532..8264ad33950f9 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -14,10 +14,9 @@ import pandas.util.testing as tm from pandas import (Series, Index, DatetimeIndex, TimedeltaIndex, PeriodIndex, Timedelta) -from pandas.compat import u, StringIO +from pandas.compat import StringIO from pandas.compat.numpy import np_array_datetime64_compat -from pandas.core.base import (FrozenList, FrozenNDArray, PandasDelegate, - NoNewAttributesMixin) +from pandas.core.base import PandasDelegate, NoNewAttributesMixin from pandas.tseries.base import DatetimeIndexOpsMixin @@ -83,69 +82,6 @@ def check_result(self, result, expected, klass=None): self.assertEqual(result, expected) -class TestFrozenList(CheckImmutable, CheckStringMixin, tm.TestCase): - mutable_methods = ('extend', 'pop', 'remove', 'insert') - unicode_container = FrozenList([u("\u05d0"), u("\u05d1"), "c"]) - - def setUp(self): - self.lst = [1, 2, 3, 4, 5] - self.container = FrozenList(self.lst) - self.klass = FrozenList - - def test_add(self): - result = self.container + (1, 2, 3) - expected = FrozenList(self.lst + [1, 2, 3]) - self.check_result(result, expected) - - result = (1, 2, 3) + self.container - expected = FrozenList([1, 2, 3] + self.lst) - self.check_result(result, expected) - - def test_inplace(self): - q = r = self.container - q += [5] - self.check_result(q, self.lst + [5]) - # other shouldn't be mutated - self.check_result(r, self.lst) - - -class TestFrozenNDArray(CheckImmutable, CheckStringMixin, tm.TestCase): - mutable_methods = ('put', 'itemset', 'fill') - unicode_container = FrozenNDArray([u("\u05d0"), u("\u05d1"), "c"]) - - def setUp(self): - self.lst = [3, 5, 7, -2] - self.container = FrozenNDArray(self.lst) - self.klass = FrozenNDArray - - def test_shallow_copying(self): - original = self.container.copy() - self.assertIsInstance(self.container.view(), FrozenNDArray) - self.assertFalse(isinstance( - self.container.view(np.ndarray), FrozenNDArray)) - self.assertIsNot(self.container.view(), self.container) - self.assert_numpy_array_equal(self.container, original) - # shallow copy should be the same too - self.assertIsInstance(self.container._shallow_copy(), FrozenNDArray) - - # setting should not be allowed - def testit(container): - container[0] = 16 - - self.check_mutable_error(testit, self.container) - - def test_values(self): - original = self.container.view(np.ndarray).copy() - n = original[0] + 15 - vals = self.container.values() - self.assert_numpy_array_equal(original, vals) - self.assertIsNot(original, vals) - vals[0] = n - self.assertIsInstance(self.container, pd.core.base.FrozenNDArray) - self.assert_numpy_array_equal(self.container.values(), original) - self.assertEqual(vals[0], n) - - class TestPandasDelegate(tm.TestCase): class Delegator(object): From 924c16667ee3db5d025c0963f99a778de8aad398 Mon Sep 17 00:00:00 2001 From: Fumito Hamamura Date: Sat, 25 Feb 2017 04:53:43 +0900 Subject: [PATCH 100/353] DOC: Fix to docstrings of is_type_factory and is_instance_factory (#15499) Closes #15485 --- pandas/core/config.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/config.py b/pandas/core/config.py index 1c0eb60b8ec2f..39ed2f9545266 100644 --- a/pandas/core/config.py +++ b/pandas/core/config.py @@ -747,8 +747,8 @@ def is_type_factory(_type): Returns ------- - validator - a function of a single argument x , which returns the - True if type(x) is equal to `_type` + validator - a function of a single argument x , which raises + ValueError if type(x) is not equal to `_type` """ @@ -768,8 +768,8 @@ def is_instance_factory(_type): Returns ------- - validator - a function of a single argument x , which returns the - True if x is an instance of `_type` + validator - a function of a single argument x , which raises + ValueError if x is not an instance of `_type` """ if isinstance(_type, (tuple, list)): From 3fe85afef47e9e079a0fa24f826bb6faaa2341d5 Mon Sep 17 00:00:00 2001 From: Prasanjit Prakash Date: Fri, 24 Feb 2017 14:56:09 -0500 Subject: [PATCH 101/353] BUG: incorrect ranking in an ordered categorical check for categorical, and then pass the underlying integer codes. closes #15420 Author: Prasanjit Prakash Closes #15422 from ikilledthecat/rank_categorical and squashes the following commits: a7e573b [Prasanjit Prakash] moved test for categorical, in rank, to top 3ba4e3a [Prasanjit Prakash] corrections after rebasing c43a029 [Prasanjit Prakash] using if/else construct to pick sorting function for categoricals f8ec019 [Prasanjit Prakash] ask Categorical for ranking function 40d88c1 [Prasanjit Prakash] return values for rank from categorical object 049c0fc [Prasanjit Prakash] GH#15420 added support for na_option when ranking categorical 5e5bbeb [Prasanjit Prakash] BUG: GH#15420 rank for categoricals ef999c3 [Prasanjit Prakash] merged with upstream master fbaba1b [Prasanjit Prakash] return values for rank from categorical object fa0b4c2 [Prasanjit Prakash] BUG: GH15420 - _rank private method on Categorical 9a6b5cd [Prasanjit Prakash] BUG: GH15420 - _rank private method on Categorical 4220e56 [Prasanjit Prakash] BUG: GH15420 - _rank private method on Categorical 6b70921 [Prasanjit Prakash] GH#15420 move rank inside categoricals bf4e36c [Prasanjit Prakash] GH#15420 added support for na_option when ranking categorical ce90207 [Prasanjit Prakash] BUG: GH#15420 rank for categoricals 85b267a [Prasanjit Prakash] Added support for categorical datatype in rank - issue#15420 --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/algorithms.py | 5 +- pandas/core/categorical.py | 22 ++++++++ pandas/tests/series/test_analytics.py | 78 +++++++++++++++++++++++++++ 4 files changed, 105 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index fa24c973a7549..0b501adba5039 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -578,6 +578,7 @@ Bug Fixes +- Bug in ``.rank()`` which incorrectly ranks ordered categories (:issue:`15420`) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 4ae46fe33a5cc..b11927a80fb2e 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -973,6 +973,10 @@ def _hashtable_algo(f, values, return_dtype=None): def _get_data_algo(values, func_map): f = None + + if is_categorical_dtype(values): + values = values._values_for_rank() + if is_float_dtype(values): f = func_map['float64'] values = _ensure_float64(values) @@ -988,7 +992,6 @@ def _get_data_algo(values, func_map): elif is_unsigned_integer_dtype(values): f = func_map['uint64'] values = _ensure_uint64(values) - else: values = _ensure_object(values) diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index b6898f11ffa74..b88a6b171b316 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -1404,6 +1404,28 @@ def sort_values(self, inplace=False, ascending=True, na_position='last'): return self._constructor(values=codes, categories=self.categories, ordered=self.ordered, fastpath=True) + def _values_for_rank(self): + """ + For correctly ranking ordered categorical data. See GH#15420 + + Ordered categorical data should be ranked on the basis of + codes with -1 translated to NaN. + + Returns + ------- + numpy array + + """ + if self.ordered: + values = self.codes + mask = values == -1 + if mask.any(): + values = values.astype('float64') + values[mask] = np.nan + else: + values = np.array(self) + return values + def order(self, inplace=False, ascending=True, na_position='last'): """ DEPRECATED: use :meth:`Categorical.sort_values`. That function diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 222165e9d3633..b092e4f084767 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -1057,6 +1057,84 @@ def test_rank(self): iranks = iseries.rank() assert_series_equal(iranks, exp) + def test_rank_categorical(self): + # GH issue #15420 rank incorrectly orders ordered categories + + # Test ascending/descending ranking for ordered categoricals + exp = pd.Series([1., 2., 3., 4., 5., 6.]) + exp_desc = pd.Series([6., 5., 4., 3., 2., 1.]) + ordered = pd.Series( + ['first', 'second', 'third', 'fourth', 'fifth', 'sixth'] + ).astype('category', ).cat.set_categories( + ['first', 'second', 'third', 'fourth', 'fifth', 'sixth'], + ordered=True + ) + assert_series_equal(ordered.rank(), exp) + assert_series_equal(ordered.rank(ascending=False), exp_desc) + + # Unordered categoricals should be ranked as objects + unordered = pd.Series( + ['first', 'second', 'third', 'fourth', 'fifth', 'sixth'], + ).astype('category').cat.set_categories( + ['first', 'second', 'third', 'fourth', 'fifth', 'sixth'], + ordered=False + ) + exp_unordered = pd.Series([2., 4., 6., 3., 1., 5.]) + res = unordered.rank() + assert_series_equal(res, exp_unordered) + + # Test na_option for rank data + na_ser = pd.Series( + ['first', 'second', 'third', 'fourth', 'fifth', 'sixth', np.NaN] + ).astype('category', ).cat.set_categories( + [ + 'first', 'second', 'third', 'fourth', + 'fifth', 'sixth', 'seventh' + ], + ordered=True + ) + + exp_top = pd.Series([2., 3., 4., 5., 6., 7., 1.]) + exp_bot = pd.Series([1., 2., 3., 4., 5., 6., 7.]) + exp_keep = pd.Series([1., 2., 3., 4., 5., 6., np.NaN]) + + assert_series_equal(na_ser.rank(na_option='top'), exp_top) + assert_series_equal(na_ser.rank(na_option='bottom'), exp_bot) + assert_series_equal(na_ser.rank(na_option='keep'), exp_keep) + + # Test na_option for rank data with ascending False + exp_top = pd.Series([7., 6., 5., 4., 3., 2., 1.]) + exp_bot = pd.Series([6., 5., 4., 3., 2., 1., 7.]) + exp_keep = pd.Series([6., 5., 4., 3., 2., 1., np.NaN]) + + assert_series_equal( + na_ser.rank(na_option='top', ascending=False), + exp_top + ) + assert_series_equal( + na_ser.rank(na_option='bottom', ascending=False), + exp_bot + ) + assert_series_equal( + na_ser.rank(na_option='keep', ascending=False), + exp_keep + ) + + # Test with pct=True + na_ser = pd.Series( + ['first', 'second', 'third', 'fourth', np.NaN], + ).astype('category').cat.set_categories( + ['first', 'second', 'third', 'fourth'], + ordered=True + ) + exp_top = pd.Series([0.4, 0.6, 0.8, 1., 0.2]) + exp_bot = pd.Series([0.2, 0.4, 0.6, 0.8, 1.]) + exp_keep = pd.Series([0.25, 0.5, 0.75, 1., np.NaN]) + + assert_series_equal(na_ser.rank(na_option='top', pct=True), exp_top) + assert_series_equal(na_ser.rank(na_option='bottom', pct=True), exp_bot) + assert_series_equal(na_ser.rank(na_option='keep', pct=True), exp_keep) + def test_rank_signature(self): s = Series([0, 1]) s.rank(method='average') From 7e0a71b02d77a8efbadf2e8c804dbff59639061e Mon Sep 17 00:00:00 2001 From: gfyoung Date: Fri, 24 Feb 2017 14:59:04 -0500 Subject: [PATCH 102/353] BUG: Accept Generic Array-Like for .where Author: gfyoung Closes #15414 from gfyoung/generic-where-gen-array and squashes the following commits: 5037932 [gfyoung] BUG: Accept generic array-like in .where --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/generic.py | 42 +++++----- pandas/indexes/base.py | 2 +- pandas/tests/frame/test_indexing.py | 89 ++++++++++++++++++++++ pandas/tests/indexes/common.py | 12 +++ pandas/tests/indexes/period/test_period.py | 11 ++- pandas/tests/indexes/test_category.py | 12 ++- pandas/tests/indexes/test_multi.py | 9 +++ pandas/tests/series/test_indexing.py | 54 +++++++++++++ 9 files changed, 211 insertions(+), 21 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 0b501adba5039..4b3a65780f939 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -548,6 +548,7 @@ Bug Fixes +- Bug in ``Series.where()`` and ``DataFrame.where()`` where array-like conditionals were being rejected (:issue:`15414`) - Bug in ``Series`` construction with a datetimetz (:issue:`14928`) - Bug in compat for passing long integers to ``Timestamp.replace`` (:issue:`15030`) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 76fbb9884753d..921fa2fb1bd48 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4726,25 +4726,37 @@ def _where(self, cond, other=np.nan, inplace=False, axis=None, level=None, """ inplace = validate_bool_kwarg(inplace, 'inplace') + # align the cond to same shape as myself cond = com._apply_if_callable(cond, self) - if isinstance(cond, NDFrame): cond, _ = cond.align(self, join='right', broadcast_axis=1) else: if not hasattr(cond, 'shape'): - raise ValueError('where requires an ndarray like object for ' - 'its condition') + cond = np.asanyarray(cond) if cond.shape != self.shape: raise ValueError('Array conditional must be same shape as ' 'self') cond = self._constructor(cond, **self._construct_axes_dict()) - if inplace: - cond = -(cond.fillna(True).astype(bool)) + # make sure we are boolean + fill_value = True if inplace else False + cond = cond.fillna(fill_value) + + msg = "Boolean array expected for the condition, not {dtype}" + + if not isinstance(cond, pd.DataFrame): + # This is a single-dimensional object. + if not is_bool_dtype(cond): + raise ValueError(msg.format(dtype=cond.dtype)) else: - cond = cond.fillna(False).astype(bool) + for dt in cond.dtypes: + if not is_bool_dtype(dt): + raise ValueError(msg.format(dtype=dt)) - # try to align + cond = cond.astype(bool, copy=False) + cond = -cond if inplace else cond + + # try to align with other try_quick = True if hasattr(other, 'align'): @@ -4891,26 +4903,20 @@ def _where(self, cond, other=np.nan, inplace=False, axis=None, level=None, Parameters ---------- - cond : boolean %(klass)s, array or callable + cond : boolean %(klass)s, array-like, or callable If cond is callable, it is computed on the %(klass)s and - should return boolean %(klass)s or array. - The callable must not change input %(klass)s - (though pandas doesn't check it). + should return boolean %(klass)s or array. The callable must + not change input %(klass)s (though pandas doesn't check it). .. versionadded:: 0.18.1 - A callable can be used as cond. - other : scalar, %(klass)s, or callable If other is callable, it is computed on the %(klass)s and - should return scalar or %(klass)s. - The callable must not change input %(klass)s - (though pandas doesn't check it). + should return scalar or %(klass)s. The callable must not + change input %(klass)s (though pandas doesn't check it). .. versionadded:: 0.18.1 - A callable can be used as other. - inplace : boolean, default False Whether to perform the operation in place on the data axis : alignment axis if needed, default None diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index 4837fc0d7438c..dcbcccdfcd610 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -573,7 +573,7 @@ def repeat(self, repeats, *args, **kwargs): Parameters ---------- - cond : boolean same length as self + cond : boolean array-like with the same length as self other : scalar, or array-like """ diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index c06faa75ed346..18fb17b98570a 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -2479,6 +2479,95 @@ def _check_set(df, cond, check_dtypes=True): expected = df[df['a'] == 1].reindex(df.index) assert_frame_equal(result, expected) + def test_where_array_like(self): + # see gh-15414 + klasses = [list, tuple, np.array] + + df = DataFrame({'a': [1, 2, 3]}) + cond = [[False], [True], [True]] + expected = DataFrame({'a': [np.nan, 2, 3]}) + + for klass in klasses: + result = df.where(klass(cond)) + assert_frame_equal(result, expected) + + df['b'] = 2 + expected['b'] = [2, np.nan, 2] + cond = [[False, True], [True, False], [True, True]] + + for klass in klasses: + result = df.where(klass(cond)) + assert_frame_equal(result, expected) + + def test_where_invalid_input(self): + # see gh-15414: only boolean arrays accepted + df = DataFrame({'a': [1, 2, 3]}) + msg = "Boolean array expected for the condition" + + conds = [ + [[1], [0], [1]], + Series([[2], [5], [7]]), + DataFrame({'a': [2, 5, 7]}), + [["True"], ["False"], ["True"]], + [[Timestamp("2017-01-01")], + [pd.NaT], [Timestamp("2017-01-02")]] + ] + + for cond in conds: + with tm.assertRaisesRegexp(ValueError, msg): + df.where(cond) + + df['b'] = 2 + conds = [ + [[0, 1], [1, 0], [1, 1]], + Series([[0, 2], [5, 0], [4, 7]]), + [["False", "True"], ["True", "False"], + ["True", "True"]], + DataFrame({'a': [2, 5, 7], 'b': [4, 8, 9]}), + [[pd.NaT, Timestamp("2017-01-01")], + [Timestamp("2017-01-02"), pd.NaT], + [Timestamp("2017-01-03"), Timestamp("2017-01-03")]] + ] + + for cond in conds: + with tm.assertRaisesRegexp(ValueError, msg): + df.where(cond) + + def test_where_dataframe_col_match(self): + df = DataFrame([[1, 2, 3], [4, 5, 6]]) + cond = DataFrame([[True, False, True], [False, False, True]]) + + out = df.where(cond) + expected = DataFrame([[1.0, np.nan, 3], [np.nan, np.nan, 6]]) + tm.assert_frame_equal(out, expected) + + cond.columns = ["a", "b", "c"] # Columns no longer match. + msg = "Boolean array expected for the condition" + with tm.assertRaisesRegexp(ValueError, msg): + df.where(cond) + + def test_where_ndframe_align(self): + msg = "Array conditional must be same shape as self" + df = DataFrame([[1, 2, 3], [4, 5, 6]]) + + cond = [True] + with tm.assertRaisesRegexp(ValueError, msg): + df.where(cond) + + expected = DataFrame([[1, 2, 3], [np.nan, np.nan, np.nan]]) + + out = df.where(Series(cond)) + tm.assert_frame_equal(out, expected) + + cond = np.array([False, True, False, True]) + with tm.assertRaisesRegexp(ValueError, msg): + df.where(cond) + + expected = DataFrame([[np.nan, np.nan, np.nan], [4, 5, 6]]) + + out = df.where(Series(cond)) + tm.assert_frame_equal(out, expected) + def test_where_bug(self): # GH 2793 diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 81ad0524807f3..7b39a33266ffa 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -497,6 +497,18 @@ def test_where(self): result = i.where(cond) tm.assert_index_equal(result, expected) + def test_where_array_like(self): + i = self.create_index() + + _nan = i._na_value + cond = [False] + [True] * (len(i) - 1) + klasses = [list, tuple, np.array, pd.Series] + expected = pd.Index([_nan] + i[1:].tolist(), dtype=i.dtype) + + for klass in klasses: + result = i.where(klass(cond)) + tm.assert_index_equal(result, expected) + def test_setops_errorcases(self): for name, idx in compat.iteritems(self.indices): # # non-iterable input diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 6a8128bb8985f..b80ab6feeeb23 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -89,13 +89,22 @@ def test_where(self): expected = i tm.assert_index_equal(result, expected) - i2 = i.copy() i2 = pd.PeriodIndex([pd.NaT, pd.NaT] + i[2:].tolist(), freq='D') result = i.where(notnull(i2)) expected = i2 tm.assert_index_equal(result, expected) + def test_where_array_like(self): + i = self.create_index() + cond = [False] + [True] * (len(i) - 1) + klasses = [list, tuple, np.array, Series] + expected = pd.PeriodIndex([pd.NaT] + i[1:].tolist(), freq='D') + + for klass in klasses: + result = i.where(klass(cond)) + tm.assert_index_equal(result, expected) + def test_where_other(self): i = self.create_index() diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index 6b6885c082533..64a0e71bd5ace 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -240,13 +240,23 @@ def test_where(self): expected = i tm.assert_index_equal(result, expected) - i2 = i.copy() i2 = pd.CategoricalIndex([np.nan, np.nan] + i[2:].tolist(), categories=i.categories) result = i.where(notnull(i2)) expected = i2 tm.assert_index_equal(result, expected) + def test_where_array_like(self): + i = self.create_index() + cond = [False] + [True] * (len(i) - 1) + klasses = [list, tuple, np.array, pd.Series] + expected = pd.CategoricalIndex([np.nan] + i[1:].tolist(), + categories=i.categories) + + for klass in klasses: + result = i.where(klass(cond)) + tm.assert_index_equal(result, expected) + def test_append(self): ci = self.create_index() diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 5611492b4af1b..80ff67ab3d043 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -88,6 +88,15 @@ def f(): self.assertRaises(NotImplementedError, f) + def test_where_array_like(self): + i = MultiIndex.from_tuples([('A', 1), ('A', 2)]) + klasses = [list, tuple, np.array, pd.Series] + cond = [False, True] + + for klass in klasses: + f = lambda: i.where(klass(cond)) + self.assertRaises(NotImplementedError, f) + def test_repeat(self): reps = 2 numbers = [1, 2, 3] diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index a20cb8324d2a3..8a2cc53b42938 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -1193,6 +1193,60 @@ def f(): expected = Series(np.nan, index=[9]) assert_series_equal(result, expected) + def test_where_array_like(self): + # see gh-15414 + s = Series([1, 2, 3]) + cond = [False, True, True] + expected = Series([np.nan, 2, 3]) + klasses = [list, tuple, np.array, Series] + + for klass in klasses: + result = s.where(klass(cond)) + assert_series_equal(result, expected) + + def test_where_invalid_input(self): + # see gh-15414: only boolean arrays accepted + s = Series([1, 2, 3]) + msg = "Boolean array expected for the condition" + + conds = [ + [1, 0, 1], + Series([2, 5, 7]), + ["True", "False", "True"], + [Timestamp("2017-01-01"), + pd.NaT, Timestamp("2017-01-02")] + ] + + for cond in conds: + with tm.assertRaisesRegexp(ValueError, msg): + s.where(cond) + + msg = "Array conditional must be same shape as self" + with tm.assertRaisesRegexp(ValueError, msg): + s.where([True]) + + def test_where_ndframe_align(self): + msg = "Array conditional must be same shape as self" + s = Series([1, 2, 3]) + + cond = [True] + with tm.assertRaisesRegexp(ValueError, msg): + s.where(cond) + + expected = Series([1, np.nan, np.nan]) + + out = s.where(Series(cond)) + tm.assert_series_equal(out, expected) + + cond = np.array([False, True, False, True]) + with tm.assertRaisesRegexp(ValueError, msg): + s.where(cond) + + expected = Series([np.nan, 2, np.nan]) + + out = s.where(Series(cond)) + tm.assert_series_equal(out, expected) + def test_where_setitem_invalid(self): # GH 2702 From 595580464a256fb883e8baa5b6e62f2013f0cf1a Mon Sep 17 00:00:00 2001 From: Dr-Irv Date: Fri, 24 Feb 2017 15:07:25 -0500 Subject: [PATCH 103/353] BUG: GH #12223, GH #15262. Allow ints for names in MultiIndex closes #12223 closes #15262 Author: Dr-Irv Closes #15478 from Dr-Irv/Issue15262 and squashes the following commits: 15d8433 [Dr-Irv] Address jreback comments 10667a3 [Dr-Irv] Fix types for test 8935068 [Dr-Irv] resolve conflicts 385ca3e [Dr-Irv] BUG: GH #12223, GH #15262. Allow ints for names in MultiIndex --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/frame.py | 6 +++--- pandas/core/groupby.py | 6 +++--- pandas/core/reshape.py | 3 ++- pandas/formats/format.py | 2 +- pandas/indexes/base.py | 10 ++++++---- pandas/indexes/multi.py | 14 ++++++++------ pandas/io/sql.py | 2 +- pandas/tests/frame/test_combine_concat.py | 18 ++++++++++++++++++ pandas/util/doctools.py | 6 +++--- 10 files changed, 46 insertions(+), 22 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 4b3a65780f939..7426b5ca2a69d 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -550,6 +550,7 @@ Bug Fixes - Bug in ``Series.where()`` and ``DataFrame.where()`` where array-like conditionals were being rejected (:issue:`15414`) - Bug in ``Series`` construction with a datetimetz (:issue:`14928`) +- Bug in output formatting of a ``MultiIndex`` when names are integers (:issue:`12223`, :issue:`15262`) - Bug in compat for passing long integers to ``Timestamp.replace`` (:issue:`15030`) - Bug in ``.loc`` that would not return the correct dtype for scalar access for a DataFrame (:issue:`11617`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index bfef2cfbd0d51..ce3481fc17c5b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2876,7 +2876,7 @@ def set_index(self, keys, drop=True, append=False, inplace=False, names = [x for x in self.index.names] if isinstance(self.index, MultiIndex): for i in range(self.index.nlevels): - arrays.append(self.index.get_level_values(i)) + arrays.append(self.index._get_level_values(i)) else: arrays.append(self.index) @@ -2886,9 +2886,9 @@ def set_index(self, keys, drop=True, append=False, inplace=False, # append all but the last column so we don't have to modify # the end of this loop for n in range(col.nlevels - 1): - arrays.append(col.get_level_values(n)) + arrays.append(col._get_level_values(n)) - level = col.get_level_values(col.nlevels - 1) + level = col._get_level_values(col.nlevels - 1) names.extend(col.names) elif isinstance(col, Series): level = col._values diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 0b3fcba1c1ba5..831ca3886773e 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -291,8 +291,8 @@ def _set_grouper(self, obj, sort=False): # equivalent to the axis name if isinstance(ax, MultiIndex): level = ax._get_level_number(level) - ax = Index(ax.get_level_values( - level), name=ax.names[level]) + ax = Index(ax._get_level_values(level), + name=ax.names[level]) else: if level not in (0, ax.name): @@ -761,7 +761,7 @@ def _index_with_as_index(self, b): gp = self.grouper levels = chain((gp.levels[i][gp.labels[i][b]] for i in range(len(gp.groupings))), - (original.get_level_values(i)[b] + (original._get_level_values(i)[b] for i in range(original.nlevels))) new = MultiIndex.from_arrays(list(levels)) new.names = gp.names + original.names diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index 5fc0d590a6885..87cb088c2e91e 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -811,7 +811,8 @@ def melt(frame, id_vars=None, value_vars=None, var_name=None, mdata[value_name] = frame.values.ravel('F') for i, col in enumerate(var_name): # asanyarray will keep the columns as an Index - mdata[col] = np.asanyarray(frame.columns.get_level_values(i)).repeat(N) + mdata[col] = np.asanyarray(frame.columns + ._get_level_values(i)).repeat(N) return DataFrame(mdata, columns=mcolumns) diff --git a/pandas/formats/format.py b/pandas/formats/format.py index 6b235b5e1bc33..4c081770e0125 100644 --- a/pandas/formats/format.py +++ b/pandas/formats/format.py @@ -1566,7 +1566,7 @@ def _save_header(self): if isinstance(index_label, list) and len(index_label) > 1: col_line.extend([''] * (len(index_label) - 1)) - col_line.extend(columns.get_level_values(i)) + col_line.extend(columns._get_level_values(i)) writer.writerow(col_line) diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index dcbcccdfcd610..5d43d2d32af67 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -2334,9 +2334,9 @@ def set_value(self, arr, key, value): self._engine.set_value(_values_from_object(arr), _values_from_object(key), value) - def get_level_values(self, level): + def _get_level_values(self, level): """ - Return vector of label values for requested level, equal to the length + Return an Index of values for requested level, equal to the length of the index Parameters @@ -2345,12 +2345,14 @@ def get_level_values(self, level): Returns ------- - values : ndarray + values : Index """ - # checks that level number is actually just 1 + self._validate_index_level(level) return self + get_level_values = _get_level_values + _index_shared_docs['get_indexer'] = """ Compute indexer and mask for new index given the current index. The indexer should be then used as an input to ndarray.take to align the diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index ec30d2c44efd7..23a42265a149b 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -684,7 +684,7 @@ def is_monotonic_increasing(self): """ # reversed() because lexsort() wants the most significant key last. - values = [self._get_level_values(i) + values = [self._get_level_values(i).values for i in reversed(range(len(self.levels)))] try: sort_order = np.lexsort(values) @@ -866,7 +866,8 @@ def _get_level_values(self, level): labels = self.labels[level] filled = algos.take_1d(unique._values, labels, fill_value=unique._na_value) - return filled + values = unique._shallow_copy(filled) + return values def get_level_values(self, level): """ @@ -883,7 +884,7 @@ def get_level_values(self, level): """ level = self._get_level_number(level) values = self._get_level_values(level) - return self.levels[level]._shallow_copy(values) + return values def format(self, space=2, sparsify=None, adjoin=True, names=False, na_rep=None, formatter=None): @@ -966,7 +967,8 @@ def to_frame(self, index=True): """ from pandas import DataFrame - result = DataFrame({(name or level): self.get_level_values(level) + result = DataFrame({(name or level): + self._get_level_values(level) for name, level in zip(self.names, range(len(self.levels)))}, copy=False) @@ -1301,8 +1303,8 @@ def append(self, other): for o in other): arrays = [] for i in range(self.nlevels): - label = self.get_level_values(i) - appended = [o.get_level_values(i) for o in other] + label = self._get_level_values(i) + appended = [o._get_level_values(i) for o in other] arrays.append(label.append(appended)) return MultiIndex.from_arrays(arrays, names=self.names) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index bace43e785dff..2ab642b3af0c7 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -749,7 +749,7 @@ def _get_column_names_and_types(self, dtype_mapper): if self.index is not None: for i, idx_label in enumerate(self.index): idx_type = dtype_mapper( - self.frame.index.get_level_values(i)) + self.frame.index._get_level_values(i)) column_names_and_types.append((text_type(idx_label), idx_type, True)) diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py index eed4d6261d6e8..6f06a55ad065e 100644 --- a/pandas/tests/frame/test_combine_concat.py +++ b/pandas/tests/frame/test_combine_concat.py @@ -422,6 +422,24 @@ def test_concat_axis_parameter(self): with assertRaisesRegexp(ValueError, 'No axis named'): pd.concat([series1, series2], axis='something') + def test_concat_numerical_names(self): + # #15262 # #12223 + df = pd.DataFrame({'col': range(9)}, + dtype='int32', + index=(pd.MultiIndex + .from_product([['A0', 'A1', 'A2'], + ['B0', 'B1', 'B2']], + names=[1, 2]))) + result = pd.concat((df.iloc[:2, :], df.iloc[-2:, :])) + expected = pd.DataFrame({'col': [0, 1, 7, 8]}, + dtype='int32', + index=pd.MultiIndex.from_tuples([('A0', 'B0'), + ('A0', 'B1'), + ('A2', 'B1'), + ('A2', 'B2')], + names=[1, 2])) + tm.assert_frame_equal(result, expected) + class TestDataFrameCombineFirst(tm.TestCase, TestData): diff --git a/pandas/util/doctools.py b/pandas/util/doctools.py index 62dcba1405581..6df6444aeafab 100644 --- a/pandas/util/doctools.py +++ b/pandas/util/doctools.py @@ -113,12 +113,12 @@ def _insert_index(self, data): else: for i in range(idx_nlevels): data.insert(i, 'Index{0}'.format(i), - data.index.get_level_values(i)) + data.index._get_level_values(i)) col_nlevels = data.columns.nlevels if col_nlevels > 1: - col = data.columns.get_level_values(0) - values = [data.columns.get_level_values(i).values + col = data.columns._get_level_values(0) + values = [data.columns._get_level_values(i).values for i in range(1, col_nlevels)] col_df = pd.DataFrame(values) data.columns = col_df.columns From d80275dfaa6a8ad50bc49dbaef9eacd5509008dc Mon Sep 17 00:00:00 2001 From: Arco Bast Date: Fri, 24 Feb 2017 15:37:18 -0500 Subject: [PATCH 104/353] BUG: msgpack supports CategoricalIndex closes #15487 Author: Arco Bast Closes #15493 from abast/CategoricalIndex_msgpack and squashes the following commits: c1c68e4 [Arco Bast] corrections 3c1f2e7 [Arco Bast] whatsnew 215c2aa [Arco Bast] improve tests cd9354f [Arco Bast] improve tests 7895c16 [Arco Bast] flake8 f3f492a [Arco Bast] fix test 91d85cb [Arco Bast] msgpack supports CategoricalIndex --- doc/source/whatsnew/v0.20.0.txt | 2 ++ pandas/io/packers.py | 2 +- pandas/tests/io/test_packers.py | 8 ++++++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 7426b5ca2a69d..c94429b469641 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -615,6 +615,7 @@ Bug Fixes - Bug in ``DataFrame.fillna()`` where the argument ``downcast`` was ignored when fillna value was of type ``dict`` (:issue:`15277`) - Bug in ``.reset_index()`` when an all ``NaN`` level of a ``MultiIndex`` would fail (:issue:`6322`) +- Bug in ``pd.read_msgpack`` when deserializing a ``CategoricalIndex`` (:issue:`15487`) - Bug in ``pd.read_csv()`` with ``float_precision='round_trip'`` which caused a segfault when a text entry is parsed (:issue:`15140`) @@ -630,3 +631,4 @@ Bug Fixes - Bug in ``Series.replace`` and ``DataFrame.replace`` which failed on empty replacement dicts (:issue:`15289`) - Bug in ``pd.melt()`` where passing a tuple value for ``value_vars`` caused a ``TypeError`` (:issue:`15348`) - Bug in ``.eval()`` which caused multiline evals to fail with local variables not on the first line (:issue:`15342`) +- Bug in ``pd.read_msgpack`` which did not allow to load dataframe with an index of type ``CategoricalIndex`` (:issue:`15487`) diff --git a/pandas/io/packers.py b/pandas/io/packers.py index 3f4be6ad459d8..7afe8a06b6af1 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -54,7 +54,7 @@ from pandas import (Timestamp, Period, Series, DataFrame, # noqa Index, MultiIndex, Float64Index, Int64Index, Panel, RangeIndex, PeriodIndex, DatetimeIndex, NaT, - Categorical) + Categorical, CategoricalIndex) from pandas.tslib import NaTType from pandas.sparse.api import SparseSeries, SparseDataFrame from pandas.sparse.array import BlockIndex, IntIndex diff --git a/pandas/tests/io/test_packers.py b/pandas/tests/io/test_packers.py index 097c03937ca68..251c6ae8b4dec 100644 --- a/pandas/tests/io/test_packers.py +++ b/pandas/tests/io/test_packers.py @@ -311,6 +311,7 @@ def setUp(self): 'period': Index(period_range('2012-1-1', freq='M', periods=3)), 'date2': Index(date_range('2013-01-1', periods=10)), 'bdate': Index(bdate_range('2013-01-02', periods=10)), + 'cat': tm.makeCategoricalIndex(100) } self.mi = { @@ -349,6 +350,13 @@ def test_unicode(self): i_rec = self.encode_decode(i) self.assert_index_equal(i, i_rec) + def categorical_index(self): + # GH15487 + df = DataFrame(np.random.randn(10, 2)) + df = df.astype({0: 'category'}).set_index(0) + result = self.encode_decode(df) + tm.assert_frame_equal(result, df) + class TestSeries(TestPackers): From 303541eba0797f30c6f10084acbd522220cbc56a Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 25 Feb 2017 22:38:56 +0100 Subject: [PATCH 105/353] DOC: fix doc build warnings (#15505) --- doc/source/advanced.rst | 1 + doc/source/basics.rst | 2 +- doc/source/contributing.rst | 6 +++--- doc/source/install.rst | 2 +- doc/source/io.rst | 8 ++++---- doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/core/generic.py | 1 + pandas/io/html.py | 2 +- pandas/io/json/normalize.py | 9 ++++----- pandas/io/parsers.py | 10 +++++----- 10 files changed, 22 insertions(+), 21 deletions(-) diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst index b6f015c15606d..f380070ddac79 100644 --- a/doc/source/advanced.rst +++ b/doc/source/advanced.rst @@ -965,6 +965,7 @@ The different indexing operation can potentially change the dtype of a ``Series` res .. ipython:: python + series2 = pd.Series([True]) series2.dtype res = series2.reindex_like(series1) diff --git a/doc/source/basics.rst b/doc/source/basics.rst index f5f7c73223595..f649b3fd8a9a3 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -1889,7 +1889,7 @@ gotchas Performing selection operations on ``integer`` type data can easily upcast the data to ``floating``. The dtype of the input data will be preserved in cases where ``nans`` are not introduced (starting in 0.11.0) -See also :ref:`Support for integer ``NA`` ` +See also :ref:`Support for integer NA ` .. ipython:: python diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index 5c2bb9b73d618..2f838a3ab2386 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -461,7 +461,7 @@ C (cpplint) *pandas* uses the `Google `_ standard. Google provides an open source style checker called ``cpplint``, but we -use a fork of it that can be found `here `_. +use a fork of it that can be found `here `__. Here are *some* of the more common ``cpplint`` issues: - we restrict line-length to 80 characters to promote readability @@ -479,7 +479,7 @@ You can also run this command on an entire directory if necessary:: To make your commits compliant with this standard, you can install the `ClangFormat `_ tool, which can be -downloaded `here `_. To configure, in your home directory, +downloaded `here `__. To configure, in your home directory, run the following command:: clang-format style=google -dump-config > .clang-format @@ -611,7 +611,7 @@ Or with one of the following constructs:: pytest pandas/tests/[test-module].py::[TestClass] pytest pandas/tests/[test-module].py::[TestClass]::[test_method] -For more, see the `pytest`_ documentation. +For more, see the `pytest `_ documentation. .. versionadded:: 0.20.0 diff --git a/doc/source/install.rst b/doc/source/install.rst index 80a5d7e7d375b..8b0fec6a3dac3 100644 --- a/doc/source/install.rst +++ b/doc/source/install.rst @@ -282,7 +282,7 @@ Optional Dependencies okay.) * `BeautifulSoup4`_ and `lxml`_ * `BeautifulSoup4`_ and `html5lib`_ and `lxml`_ - * Only `lxml`_, although see :ref:`HTML Table Parsing ` + * Only `lxml`_, although see :ref:`HTML Table Parsing ` for reasons as to why you should probably **not** take this approach. .. warning:: diff --git a/doc/source/io.rst b/doc/source/io.rst index 55ef2c09d43e4..35e8b77782183 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -2043,7 +2043,7 @@ Reading HTML Content .. warning:: - We **highly encourage** you to read the :ref:`HTML Table Parsing gotchas` + We **highly encourage** you to read the :ref:`HTML Table Parsing gotchas ` below regarding the issues surrounding the BeautifulSoup4/html5lib/lxml parsers. .. versionadded:: 0.12.0 @@ -4681,7 +4681,7 @@ The key functions are: Supported Data Types -++++++++++++++++++++ +'''''''''''''''''''' Pandas supports all these `BigQuery data types `__: ``STRING``, ``INTEGER`` (64bit), ``FLOAT`` (64 bit), ``BOOLEAN`` and @@ -4689,7 +4689,7 @@ Pandas supports all these `BigQuery data types `. + HTML parsing libraries `. Expect to do some cleanup after you call this function. For example, you might need to manually assign column names if the column names are diff --git a/pandas/io/json/normalize.py b/pandas/io/json/normalize.py index d684441c5974d..f29472155da17 100644 --- a/pandas/io/json/normalize.py +++ b/pandas/io/json/normalize.py @@ -106,11 +106,10 @@ def json_normalize(data, record_path=None, meta=None, path to records is ['foo', 'bar'] meta_prefix : string, default None errors : {'raise', 'ignore'}, default 'raise' - - * ignore : will ignore KeyError if keys listed in meta are not - always present - * raise : will raise KeyError if keys listed in meta are not - always present + * 'ignore' : will ignore KeyError if keys listed in meta are not + always present + * 'raise' : will raise KeyError if keys listed in meta are not + always present .. versionadded:: 0.20.0 diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 88d0c6c12c04f..78c5247818970 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -181,7 +181,7 @@ If True and parse_dates is enabled, pandas will attempt to infer the format of the datetime strings in the columns, and if it can be inferred, switch to a faster method of parsing them. In some cases this can increase the - parsing speed by ~5-10x. + parsing speed by 5-10x. keep_date_col : boolean, default False If True and parse_dates specifies combining multiple columns then keep the original columns. @@ -200,10 +200,10 @@ Return TextFileReader object for iteration or getting chunks with ``get_chunk()``. chunksize : int, default None - Return TextFileReader object for iteration. `See IO Tools docs for more - information - `_ on - ``iterator`` and ``chunksize``. + Return TextFileReader object for iteration. + See the `IO Tools docs + `_ + for more information on ``iterator`` and ``chunksize``. compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default 'infer' For on-the-fly decompression of on-disk data. If 'infer', then use gzip, bz2, zip or xz if filepath_or_buffer is a string ending in '.gz', '.bz2', From b3ae4c7698de4623e1279d579b46192ef79250d1 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sat, 25 Feb 2017 17:26:06 -0500 Subject: [PATCH 106/353] DOC: Fix versionadded for cond in .where (#15509) [ci skip] --- pandas/core/generic.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 85c7130ca2827..cdc37e00f70e0 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4910,6 +4910,7 @@ def _where(self, cond, other=np.nan, inplace=False, axis=None, level=None, not change input %(klass)s (though pandas doesn't check it). .. versionadded:: 0.18.1 + A callable can be used as cond. other : scalar, %(klass)s, or callable If other is callable, it is computed on the %(klass)s and @@ -4917,6 +4918,7 @@ def _where(self, cond, other=np.nan, inplace=False, axis=None, level=None, change input %(klass)s (though pandas doesn't check it). .. versionadded:: 0.18.1 + A callable can be used as other. inplace : boolean, default False Whether to perform the operation in place on the data From fb7dc7dcbde1d81dea28b1b83e1c3bd171a7e73d Mon Sep 17 00:00:00 2001 From: Stephen Rauch Date: Mon, 27 Feb 2017 09:39:27 -0500 Subject: [PATCH 107/353] BUG: Parse two date columns broken in read_csv with multiple headers In `io/parsers/_try_convert_dates()` when selecting columns based on a column index from a set of columns with multi- level names, the column `name` was converted to a string. This appears to be a bug since the `name` was a tuple before the conversion. This causes problems downstream when there is an attempt to use this name to lookup a column, and that lookup fails because the desired column is keyed from the tuple, not its string representation closes #15376 Author: Stephen Rauch Closes #15378 from stephenrauch/fix_read_csv_merge_datetime and squashes the following commits: 030f5ec [Stephen Rauch] BUG: Parse two date columns broken in read_csv with multiple headers --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/io/parsers.py | 2 +- pandas/tests/io/parser/parse_dates.py | 19 +++++++++++++++++++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 123fc346441cb..be487e165c602 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -625,6 +625,7 @@ Bug Fixes +- Bug in ``.read_csv()`` with ``parse_dates`` when multiline headers are specified (:issue:`15376`) - Bug in ``DataFrame.boxplot`` where ``fontsize`` was not applied to the tick labels on both axes (:issue:`15108`) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 78c5247818970..811844ec35deb 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -2858,7 +2858,7 @@ def _try_convert_dates(parser, colspec, data_dict, columns): if c in colset: colnames.append(c) elif isinstance(c, int) and c not in columns: - colnames.append(str(columns[c])) + colnames.append(columns[c]) else: colnames.append(c) diff --git a/pandas/tests/io/parser/parse_dates.py b/pandas/tests/io/parser/parse_dates.py index 6197d07d4eafa..b1960159bb41d 100644 --- a/pandas/tests/io/parser/parse_dates.py +++ b/pandas/tests/io/parser/parse_dates.py @@ -18,6 +18,7 @@ import pandas.tseries.tools as tools import pandas.util.testing as tm +import pandas.io.date_converters as conv from pandas import DataFrame, Series, Index, DatetimeIndex from pandas import compat from pandas.compat import parse_date, StringIO, lrange @@ -491,3 +492,21 @@ def test_parse_dates_noconvert_thousands(self): result = self.read_csv(StringIO(data), index_col=[0, 1], parse_dates=True, thousands='.') tm.assert_frame_equal(result, expected) + + def test_parse_date_time_multi_level_column_name(self): + data = """\ +D,T,A,B +date, time,a,b +2001-01-05, 09:00:00, 0.0, 10. +2001-01-06, 00:00:00, 1.0, 11. +""" + datecols = {'date_time': [0, 1]} + result = self.read_csv(StringIO(data), sep=',', header=[0, 1], + parse_dates=datecols, + date_parser=conv.parse_date_time) + + expected_data = [[datetime(2001, 1, 5, 9, 0, 0), 0., 10.], + [datetime(2001, 1, 6, 0, 0, 0), 1., 11.]] + expected = DataFrame(expected_data, + columns=['date_time', ('A', 'a'), ('B', 'b')]) + tm.assert_frame_equal(result, expected) From 6c17f67aafd7de8af96032aa415fc798fa3b73ca Mon Sep 17 00:00:00 2001 From: Stephen Rauch Date: Mon, 27 Feb 2017 10:41:56 -0500 Subject: [PATCH 108/353] BUG: GH15426 timezone lost in groupby-agg with cython functions closes #15426 Author: Stephen Rauch Closes #15433 from stephenrauch/tz-lost-in-groupby-agg and squashes the following commits: 64a84ca [Stephen Rauch] BUG: GH15426 timezone lost in groupby-agg with cython functions --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/tests/groupby/test_aggregate.py | 31 +++++++++++++++++++++++++- pandas/tests/types/test_cast.py | 12 +++++++++- pandas/types/cast.py | 3 ++- 4 files changed, 44 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index be487e165c602..f337d4404abfc 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -622,6 +622,7 @@ Bug Fixes - Bug in ``DataFrame.to_stata()`` and ``StataWriter`` which produces incorrectly formatted files to be produced for some locales (:issue:`13856`) - Bug in ``pd.concat()`` in which concatting with an empty dataframe with ``join='inner'`` was being improperly handled (:issue:`15328`) +- Bug in ``groupby.agg()`` incorrectly localizing timezone on ``datetime`` (:issue:`15426`, :issue:`10668`) diff --git a/pandas/tests/groupby/test_aggregate.py b/pandas/tests/groupby/test_aggregate.py index a1fc97eb8d780..cb739546a2312 100644 --- a/pandas/tests/groupby/test_aggregate.py +++ b/pandas/tests/groupby/test_aggregate.py @@ -6,7 +6,7 @@ """ from __future__ import print_function -from datetime import datetime +from datetime import datetime, timedelta from functools import partial import numpy as np @@ -738,3 +738,32 @@ def test_agg_over_numpy_arrays(self): columns=expected_column) assert_frame_equal(result, expected) + + def test_agg_timezone_round_trip(self): + # GH 15426 + ts = pd.Timestamp("2016-01-01 12:00:00", tz='US/Pacific') + df = pd.DataFrame({'a': 1, 'b': [ts + timedelta(minutes=nn) + for nn in range(10)]}) + + result1 = df.groupby('a')['b'].agg(np.min).iloc[0] + result2 = df.groupby('a')['b'].agg(lambda x: np.min(x)).iloc[0] + result3 = df.groupby('a')['b'].min().iloc[0] + + assert result1 == ts + assert result2 == ts + assert result3 == ts + + dates = [pd.Timestamp("2016-01-0%d 12:00:00" % i, tz='US/Pacific') + for i in range(1, 5)] + df = pd.DataFrame({'A': ['a', 'b'] * 2, 'B': dates}) + grouped = df.groupby('A') + + ts = df['B'].iloc[0] + assert ts == grouped.nth(0)['B'].iloc[0] + assert ts == grouped.head(1)['B'].iloc[0] + assert ts == grouped.first()['B'].iloc[0] + assert ts == grouped.apply(lambda x: x.iloc[0])[0] + + ts = df['B'].iloc[2] + assert ts == grouped.last()['B'].iloc[0] + assert ts == grouped.apply(lambda x: x.iloc[-1])[0] diff --git a/pandas/tests/types/test_cast.py b/pandas/tests/types/test_cast.py index 497130b117289..70f69cc7d5701 100644 --- a/pandas/tests/types/test_cast.py +++ b/pandas/tests/types/test_cast.py @@ -8,7 +8,7 @@ from datetime import datetime import numpy as np -from pandas import Timedelta, Timestamp +from pandas import Timedelta, Timestamp, DatetimeIndex from pandas.types.cast import (_possibly_downcast_to_dtype, _possibly_convert_objects, _infer_dtype_from_scalar, @@ -71,6 +71,16 @@ def test_datetimelikes_nan(self): res = _possibly_downcast_to_dtype(arr, 'timedelta64[ns]') tm.assert_numpy_array_equal(res, exp) + def test_datetime_with_timezone(self): + # GH 15426 + ts = Timestamp("2016-01-01 12:00:00", tz='US/Pacific') + exp = DatetimeIndex([ts, ts]) + res = _possibly_downcast_to_dtype(exp, exp.dtype) + tm.assert_index_equal(res, exp) + + res = _possibly_downcast_to_dtype(exp.asi8, exp.dtype) + tm.assert_index_equal(res, exp) + class TestInferDtype(tm.TestCase): diff --git a/pandas/types/cast.py b/pandas/types/cast.py index b1a17df64aecf..8cc3fe41f73c8 100644 --- a/pandas/types/cast.py +++ b/pandas/types/cast.py @@ -133,7 +133,8 @@ def trans(x): # noqa if dtype.tz: # convert to datetime and change timezone from pandas import to_datetime - result = to_datetime(result).tz_localize(dtype.tz) + result = to_datetime(result).tz_localize('utc') + result = result.tz_convert(dtype.tz) except: pass From 25dcff597162a12dbe419da2ae23d9b0d6322bee Mon Sep 17 00:00:00 2001 From: Alexis Mignon Date: Thu, 16 Jun 2016 15:11:46 +0200 Subject: [PATCH 109/353] BUG: Fix a bug occuring when using DataFrame.to_records with unicode column names in python 2. closes #11879 closes #13462 --- doc/source/whatsnew/v0.20.0.txt | 3 ++- pandas/core/frame.py | 12 ++++++++---- pandas/tests/frame/test_convert_to.py | 15 +++++++++++++++ 3 files changed, 25 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index f337d4404abfc..947a114f1ce95 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -615,7 +615,8 @@ Bug Fixes - Bug in ``DataFrame.fillna()`` where the argument ``downcast`` was ignored when fillna value was of type ``dict`` (:issue:`15277`) - Bug in ``.reset_index()`` when an all ``NaN`` level of a ``MultiIndex`` would fail (:issue:`6322`) -- Bug in ``pd.read_msgpack`` when deserializing a ``CategoricalIndex`` (:issue:`15487`) +- Bug in ``pd.read_msgpack()`` when deserializing a ``CategoricalIndex`` (:issue:`15487`) +- Bug in ``pd.DataFrame.to_records()`` which failed with unicode characters in column names (:issue:`11879`) - Bug in ``pd.read_csv()`` with ``float_precision='round_trip'`` which caused a segfault when a text entry is parsed (:issue:`15140`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ce3481fc17c5b..adf397e63984f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1105,13 +1105,17 @@ def to_records(self, index=True, convert_datetime64=True): count += 1 elif index_names[0] is None: index_names = ['index'] - names = lmap(str, index_names) + lmap(str, self.columns) + names = (lmap(compat.text_type, index_names) + + lmap(compat.text_type, self.columns)) else: arrays = [self[c].get_values() for c in self.columns] - names = lmap(str, self.columns) + names = lmap(compat.text_type, self.columns) - dtype = np.dtype([(x, v.dtype) for x, v in zip(names, arrays)]) - return np.rec.fromarrays(arrays, dtype=dtype, names=names) + formats = [v.dtype for v in arrays] + return np.rec.fromarrays( + arrays, + dtype={'names': names, 'formats': formats} + ) @classmethod def from_items(cls, items, columns=None, orient='columns'): diff --git a/pandas/tests/frame/test_convert_to.py b/pandas/tests/frame/test_convert_to.py index 1bc8313726d0c..0dde113dd5147 100644 --- a/pandas/tests/frame/test_convert_to.py +++ b/pandas/tests/frame/test_convert_to.py @@ -177,3 +177,18 @@ def test_to_records_with_unicode_index(self): .to_records() expected = np.rec.array([('x', 'y')], dtype=[('a', 'O'), ('b', 'O')]) tm.assert_almost_equal(result, expected) + + def test_to_records_with_unicode_column_names(self): + # xref issue: https://github.com/numpy/numpy/issues/2407 + # Issue #11879. to_records used to raise an exception when used + # with column names containing non ascii caracters in Python 2 + result = DataFrame(data={u"accented_name_é": [1.0]}).to_records() + + # Note that numpy allows for unicode field names but dtypes need + # to be specified using dictionnary intsead of list of tuples. + expected = np.rec.array( + [(0, 1.0)], + dtype={"names": ["index", u"accented_name_é"], + "formats": [' Date: Mon, 27 Feb 2017 14:26:07 -0500 Subject: [PATCH 110/353] BUG: reindex_like after shape comparison in assert_frame_equal, if check_like, the former code reindex_like before shape comparison. for example: if left.shape=(2,2), right.shpae=(2.0), after reindex_like, left.shape=(2,0),right.shape=(2,0),then the shape comparison will not find out that the two dataframes are different. For that, the assert_frame_equal will not raise assertion errors. But in fact it should raise. Author: jojomdt Closes #15496 from jojomdt/master and squashes the following commits: 7b3437b [jojomdt] fix test_frame_equal_message error 0340b5c [jojomdt] change check_like description c03e0af [jojomdt] add test for TestAssertFrameEqual 470dbaa [jojomdt] combine row and column shape comparison ce7bd74 [jojomdt] reindex_like after shape comparison --- pandas/tests/test_testing.py | 32 +++++++++++++++++--------------- pandas/util/testing.py | 25 ++++++++----------------- 2 files changed, 25 insertions(+), 32 deletions(-) diff --git a/pandas/tests/test_testing.py b/pandas/tests/test_testing.py index 07bfdc8fc9078..2fb58ef70e3cb 100644 --- a/pandas/tests/test_testing.py +++ b/pandas/tests/test_testing.py @@ -13,8 +13,6 @@ RNGContext) from pandas.compat import is_platform_windows -# let's get meta. - class TestAssertAlmostEqual(tm.TestCase): @@ -594,6 +592,20 @@ def _assert_not_equal(self, a, b, **kwargs): self.assertRaises(AssertionError, assert_frame_equal, a, b, **kwargs) self.assertRaises(AssertionError, assert_frame_equal, b, a, **kwargs) + def test_equal_with_different_row_order(self): + # check_like=True ignores row-column orderings + df1 = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, + index=['a', 'b', 'c']) + df2 = pd.DataFrame({'A': [3, 2, 1], 'B': [6, 5, 4]}, + index=['c', 'b', 'a']) + + self._assert_equal(df1, df2, check_like=True) + self._assert_not_equal(df1, df2) + + def test_not_equal_with_different_shape(self): + self._assert_not_equal(pd.DataFrame({'A': [1, 2, 3]}), + pd.DataFrame({'A': [1, 2, 3, 4]})) + def test_index_dtype(self): df1 = DataFrame.from_records( {'a': [1, 2], 'c': ['l1', 'l2']}, index=['a']) @@ -621,19 +633,9 @@ def test_frame_equal_message(self): expected = """DataFrame are different -DataFrame shape \\(number of rows\\) are different -\\[left\\]: 3, RangeIndex\\(start=0, stop=3, step=1\\) -\\[right\\]: 4, RangeIndex\\(start=0, stop=4, step=1\\)""" - - with assertRaisesRegexp(AssertionError, expected): - assert_frame_equal(pd.DataFrame({'A': [1, 2, 3]}), - pd.DataFrame({'A': [1, 2, 3, 4]})) - - expected = """DataFrame are different - -DataFrame shape \\(number of columns\\) are different -\\[left\\]: 2, Index\\(\\[u?'A', u?'B'\\], dtype='object'\\) -\\[right\\]: 1, Index\\(\\[u?'A'\\], dtype='object'\\)""" +DataFrame shape mismatch +\\[left\\]: \\(3, 2\\) +\\[right\\]: \\(3, 1\\)""" with assertRaisesRegexp(AssertionError, expected): assert_frame_equal(pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}), diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 1bd539469dbe3..e4b10488c69b2 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -1254,7 +1254,7 @@ def assert_frame_equal(left, right, check_dtype=True, check_categorical : bool, default True Whether to compare internal Categorical exactly. check_like : bool, default False - If true, then reindex_like operands + If true, ignore the order of rows & columns obj : str, default 'DataFrame' Specify object name being compared, internally used to show appropriate assertion message @@ -1270,25 +1270,16 @@ def assert_frame_equal(left, right, check_dtype=True, assertIsInstance(left, type(right)) # assert_class_equal(left, right, obj=obj) + # shape comparison + if left.shape != right.shape: + raise_assert_detail(obj, + 'DataFrame shape mismatch', + '({0}, {1})'.format(*left.shape), + '({0}, {1})'.format(*right.shape)) + if check_like: left, right = left.reindex_like(right), right - # shape comparison (row) - if left.shape[0] != right.shape[0]: - raise_assert_detail(obj, - 'DataFrame shape (number of rows) are different', - '{0}, {1}'.format(left.shape[0], left.index), - '{0}, {1}'.format(right.shape[0], right.index)) - # shape comparison (columns) - if left.shape[1] != right.shape[1]: - raise_assert_detail(obj, - 'DataFrame shape (number of columns) ' - 'are different', - '{0}, {1}'.format(left.shape[1], - left.columns), - '{0}, {1}'.format(right.shape[1], - right.columns)) - # index comparison assert_index_equal(left.index, right.index, exact=check_index_type, check_names=check_names, From fed1827afaabb2ed2988643aba2d2be627634cf9 Mon Sep 17 00:00:00 2001 From: Aleksey Bilogur Date: Mon, 27 Feb 2017 15:26:00 -0500 Subject: [PATCH 111/353] TST: DataFrame.hist() does not get along with matplotlib.pyplot.tight_layout() (#15515) * Add unit test for #9351 * Tweaks. * add _check_plot_works; rm aux method * Add whatsnew entry. --- doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/tests/plotting/test_hist_method.py | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 947a114f1ce95..f13b584a4ee13 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -629,7 +629,7 @@ Bug Fixes - Bug in ``.read_csv()`` with ``parse_dates`` when multiline headers are specified (:issue:`15376`) - +- Bug in ``DataFrame.hist`` where ``plt.tight_layout`` caused an ``AttributeError`` (use ``matplotlib >= 0.2.0``) (:issue:`9351`) - Bug in ``DataFrame.boxplot`` where ``fontsize`` was not applied to the tick labels on both axes (:issue:`15108`) - Bug in ``Series.replace`` and ``DataFrame.replace`` which failed on empty replacement dicts (:issue:`15289`) - Bug in ``pd.melt()`` where passing a tuple value for ``value_vars`` caused a ``TypeError`` (:issue:`15348`) diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py index 4f64f66bd3c4d..22de7055e3cea 100644 --- a/pandas/tests/plotting/test_hist_method.py +++ b/pandas/tests/plotting/test_hist_method.py @@ -238,6 +238,16 @@ def test_hist_layout(self): with tm.assertRaises(ValueError): df.hist(layout=(-1, -1)) + @slow + # GH 9351 + def test_tight_layout(self): + if self.mpl_ge_2_0_0: + df = DataFrame(randn(100, 2)) + _check_plot_works(df.hist) + self.plt.tight_layout() + + tm.close() + @tm.mplskip class TestDataFrameGroupByPlots(TestPlotBase): From e15de4d484dbff8f941c9d5cc31869d503d9c020 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 27 Feb 2017 15:47:10 -0500 Subject: [PATCH 112/353] CLN: remove pandas/io/gbq.py and tests and replace with pandas-gbq closes #15347 Author: Jeff Reback Closes #15484 from jreback/gbq and squashes the following commits: 0fd8d06 [Jeff Reback] wip 3222de1 [Jeff Reback] CLN: remove pandas/io/gbq.py and tests and replace with pandas-gbq --- ci/requirements-2.7.pip | 5 +- ci/requirements-3.4.pip | 3 - ci/requirements-3.4_SLOW.pip | 3 - ci/requirements-3.5.pip | 1 + doc/source/io.rst | 289 +------ doc/source/whatsnew/v0.20.0.txt | 9 + pandas/core/frame.py | 8 +- pandas/io/gbq.py | 1192 +---------------------------- pandas/tests/io/test_gbq.py | 1242 +------------------------------ pandas/util/decorators.py | 38 +- pandas/util/print_versions.py | 3 +- 11 files changed, 116 insertions(+), 2677 deletions(-) delete mode 100644 ci/requirements-3.4_SLOW.pip diff --git a/ci/requirements-2.7.pip b/ci/requirements-2.7.pip index d16b932c8be4f..08240184f2934 100644 --- a/ci/requirements-2.7.pip +++ b/ci/requirements-2.7.pip @@ -1,8 +1,5 @@ blosc -httplib2 -google-api-python-client==1.2 -python-gflags==2.0 -oauth2client==1.5.0 +pandas-gbq pathlib backports.lzma py diff --git a/ci/requirements-3.4.pip b/ci/requirements-3.4.pip index 55986a0220bf0..4e5fe52d56cf1 100644 --- a/ci/requirements-3.4.pip +++ b/ci/requirements-3.4.pip @@ -1,5 +1,2 @@ python-dateutil==2.2 blosc -httplib2 -google-api-python-client -oauth2client diff --git a/ci/requirements-3.4_SLOW.pip b/ci/requirements-3.4_SLOW.pip deleted file mode 100644 index 05c938abcbab6..0000000000000 --- a/ci/requirements-3.4_SLOW.pip +++ /dev/null @@ -1,3 +0,0 @@ -httplib2 -google-api-python-client -oauth2client diff --git a/ci/requirements-3.5.pip b/ci/requirements-3.5.pip index 0d9e44cf39fa4..6e4f7b65f9728 100644 --- a/ci/requirements-3.5.pip +++ b/ci/requirements-3.5.pip @@ -1 +1,2 @@ xarray==0.9.1 +pandas-gbq diff --git a/doc/source/io.rst b/doc/source/io.rst index 35e8b77782183..b36ae8c2ed450 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -4652,293 +4652,18 @@ And then issue the following queries: Google BigQuery --------------- -.. versionadded:: 0.13.0 - -The :mod:`pandas.io.gbq` module provides a wrapper for Google's BigQuery -analytics web service to simplify retrieving results from BigQuery tables -using SQL-like queries. Result sets are parsed into a pandas -DataFrame with a shape and data types derived from the source table. -Additionally, DataFrames can be inserted into new BigQuery tables or appended -to existing tables. - -.. warning:: - - To use this module, you will need a valid BigQuery account. Refer to the - `BigQuery Documentation `__ - for details on the service itself. - -The key functions are: - -.. currentmodule:: pandas.io.gbq - -.. autosummary:: - :toctree: generated/ - - read_gbq - to_gbq - -.. currentmodule:: pandas - - -Supported Data Types -'''''''''''''''''''' - -Pandas supports all these `BigQuery data types `__: -``STRING``, ``INTEGER`` (64bit), ``FLOAT`` (64 bit), ``BOOLEAN`` and -``TIMESTAMP`` (microsecond precision). Data types ``BYTES`` and ``RECORD`` -are not supported. - -Integer and boolean ``NA`` handling -''''''''''''''''''''''''''''''''''' - -.. versionadded:: 0.20 - -Since all columns in BigQuery queries are nullable, and NumPy lacks of ``NA`` -support for integer and boolean types, this module will store ``INTEGER`` or -``BOOLEAN`` columns with at least one ``NULL`` value as ``dtype=object``. -Otherwise those columns will be stored as ``dtype=int64`` or ``dtype=bool`` -respectively. - -This is opposite to default pandas behaviour which will promote integer -type to float in order to store NAs. See the :ref:`gotchas` -for detailed explaination. - -While this trade-off works well for most cases, it breaks down for storing -values greater than 2**53. Such values in BigQuery can represent identifiers -and unnoticed precision lost for identifier is what we want to avoid. - -.. _io.bigquery_deps: - -Dependencies -'''''''''''' - -This module requires following additional dependencies: - -- `httplib2 `__: HTTP client -- `google-api-python-client `__: Google's API client -- `oauth2client `__: authentication and authorization for Google's API - -.. _io.bigquery_authentication: - -Authentication -'''''''''''''' - -.. versionadded:: 0.18.0 - -Authentication to the Google ``BigQuery`` service is via ``OAuth 2.0``. -Is possible to authenticate with either user account credentials or service account credentials. - -Authenticating with user account credentials is as simple as following the prompts in a browser window -which will be automatically opened for you. You will be authenticated to the specified -``BigQuery`` account using the product name ``pandas GBQ``. It is only possible on local host. -The remote authentication using user account credentials is not currently supported in pandas. -Additional information on the authentication mechanism can be found -`here `__. - -Authentication with service account credentials is possible via the `'private_key'` parameter. This method -is particularly useful when working on remote servers (eg. jupyter iPython notebook on remote host). -Additional information on service accounts can be found -`here `__. - -Authentication via ``application default credentials`` is also possible. This is only valid -if the parameter ``private_key`` is not provided. This method also requires that -the credentials can be fetched from the environment the code is running in. -Otherwise, the OAuth2 client-side authentication is used. -Additional information on -`application default credentials `__. - -.. versionadded:: 0.19.0 - -.. note:: - - The `'private_key'` parameter can be set to either the file path of the service account key - in JSON format, or key contents of the service account key in JSON format. - -.. note:: - - A private key can be obtained from the Google developers console by clicking - `here `__. Use JSON key type. - -.. _io.bigquery_reader: - -Querying -'''''''' - -Suppose you want to load all data from an existing BigQuery table : `test_dataset.test_table` -into a DataFrame using the :func:`~pandas.io.gbq.read_gbq` function. - -.. code-block:: python - - # Insert your BigQuery Project ID Here - # Can be found in the Google web console - projectid = "xxxxxxxx" - - data_frame = pd.read_gbq('SELECT * FROM test_dataset.test_table', projectid) - - -You can define which column from BigQuery to use as an index in the -destination DataFrame as well as a preferred column order as follows: - -.. code-block:: python - - data_frame = pd.read_gbq('SELECT * FROM test_dataset.test_table', - index_col='index_column_name', - col_order=['col1', 'col2', 'col3'], projectid) - - -Starting with 0.20.0, you can specify the query config as parameter to use additional options of your job. -For more information about query configuration parameters see -`here `__. - -.. code-block:: python - - configuration = { - 'query': { - "useQueryCache": False - } - } - data_frame = pd.read_gbq('SELECT * FROM test_dataset.test_table', - configuration=configuration, projectid) - - -.. note:: - - You can find your project id in the `Google developers console `__. - - -.. note:: - - You can toggle the verbose output via the ``verbose`` flag which defaults to ``True``. - -.. note:: - - The ``dialect`` argument can be used to indicate whether to use BigQuery's ``'legacy'`` SQL - or BigQuery's ``'standard'`` SQL (beta). The default value is ``'legacy'``. For more information - on BigQuery's standard SQL, see `BigQuery SQL Reference - `__ - -.. _io.bigquery_writer: - -Writing DataFrames -'''''''''''''''''' - -Assume we want to write a DataFrame ``df`` into a BigQuery table using :func:`~pandas.DataFrame.to_gbq`. - -.. ipython:: python - - df = pd.DataFrame({'my_string': list('abc'), - 'my_int64': list(range(1, 4)), - 'my_float64': np.arange(4.0, 7.0), - 'my_bool1': [True, False, True], - 'my_bool2': [False, True, False], - 'my_dates': pd.date_range('now', periods=3)}) - - df - df.dtypes - -.. code-block:: python - - df.to_gbq('my_dataset.my_table', projectid) - -.. note:: - - The destination table and destination dataset will automatically be created if they do not already exist. - -The ``if_exists`` argument can be used to dictate whether to ``'fail'``, ``'replace'`` -or ``'append'`` if the destination table already exists. The default value is ``'fail'``. - -For example, assume that ``if_exists`` is set to ``'fail'``. The following snippet will raise -a ``TableCreationError`` if the destination table already exists. - -.. code-block:: python - - df.to_gbq('my_dataset.my_table', projectid, if_exists='fail') - -.. note:: - - If the ``if_exists`` argument is set to ``'append'``, the destination dataframe will - be written to the table using the defined table schema and column types. The - dataframe must match the destination table in structure and data types. - If the ``if_exists`` argument is set to ``'replace'``, and the existing table has a - different schema, a delay of 2 minutes will be forced to ensure that the new schema - has propagated in the Google environment. See - `Google BigQuery issue 191 `__. - -Writing large DataFrames can result in errors due to size limitations being exceeded. -This can be avoided by setting the ``chunksize`` argument when calling :func:`~pandas.DataFrame.to_gbq`. -For example, the following writes ``df`` to a BigQuery table in batches of 10000 rows at a time: - -.. code-block:: python - - df.to_gbq('my_dataset.my_table', projectid, chunksize=10000) - -You can also see the progress of your post via the ``verbose`` flag which defaults to ``True``. -For example: - -.. code-block:: python - - In [8]: df.to_gbq('my_dataset.my_table', projectid, chunksize=10000, verbose=True) - - Streaming Insert is 10% Complete - Streaming Insert is 20% Complete - Streaming Insert is 30% Complete - Streaming Insert is 40% Complete - Streaming Insert is 50% Complete - Streaming Insert is 60% Complete - Streaming Insert is 70% Complete - Streaming Insert is 80% Complete - Streaming Insert is 90% Complete - Streaming Insert is 100% Complete - -.. note:: - - If an error occurs while streaming data to BigQuery, see - `Troubleshooting BigQuery Errors `__. - -.. note:: - - The BigQuery SQL query language has some oddities, see the - `BigQuery Query Reference Documentation `__. - -.. note:: - - While BigQuery uses SQL-like syntax, it has some important differences from traditional - databases both in functionality, API limitations (size and quantity of queries or uploads), - and how Google charges for use of the service. You should refer to `Google BigQuery documentation `__ - often as the service seems to be changing and evolving. BiqQuery is best for analyzing large - sets of data quickly, but it is not a direct replacement for a transactional database. - -.. _io.bigquery_create_tables: - -Creating BigQuery Tables -'''''''''''''''''''''''' - .. warning:: - As of 0.17, the function :func:`~pandas.io.gbq.generate_bq_schema` has been deprecated and will be - removed in a future version. - -As of 0.15.2, the gbq module has a function :func:`~pandas.io.gbq.generate_bq_schema` which will -produce the dictionary representation schema of the specified pandas DataFrame. - -.. code-block:: ipython - - In [10]: gbq.generate_bq_schema(df, default_type='STRING') + Starting in 0.20.0, pandas has split off Google BigQuery support into the + separate package ``pandas-gbq``. You can ``pip install pandas-gbq`` to get it. - Out[10]: {'fields': [{'name': 'my_bool1', 'type': 'BOOLEAN'}, - {'name': 'my_bool2', 'type': 'BOOLEAN'}, - {'name': 'my_dates', 'type': 'TIMESTAMP'}, - {'name': 'my_float64', 'type': 'FLOAT'}, - {'name': 'my_int64', 'type': 'INTEGER'}, - {'name': 'my_string', 'type': 'STRING'}]} - -.. note:: +The ``pandas-gbq`` package provides functionality to read/write from Google BigQuery. - If you delete and re-create a BigQuery table with the same name, but different table schema, - you must wait 2 minutes before streaming data into the table. As a workaround, consider creating - the new table with a different name. Refer to - `Google BigQuery issue 191 `__. +pandas integrates with this external package. if ``pandas-gbq`` is installed, you can +use the pandas methods ``pd.read_gbq`` and ``DataFrame.to_gbq``, which will call the +respective functions from ``pandas-gbq``. +Full cocumentation can be found `here `__ .. _io.stata: diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index f13b584a4ee13..f0e4176472861 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -360,6 +360,15 @@ New Behavior: In [5]: df['a']['2011-12-31 23:59:59'] Out[5]: 1 +.. _whatsnew_0200.api_breaking.gbq: + +Pandas Google BigQuery support has moved +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +pandas has split off Google BigQuery support into a separate package ``pandas-gbq``. You can ``pip install pandas-gbq`` to get it. +The functionality of ``pd.read_gbq()`` and ``.to_gbq()`` remains the same with the currently released version of ``pandas-gbq=0.1.2``. (:issue:`15347`) +Documentation is now hosted `here `__ + .. _whatsnew_0200.api_breaking.memory_usage: Memory Usage for Index is more Accurate diff --git a/pandas/core/frame.py b/pandas/core/frame.py index adf397e63984f..7b02926ea8837 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -77,7 +77,8 @@ OrderedDict, raise_with_traceback) from pandas import compat from pandas.compat.numpy import function as nv -from pandas.util.decorators import deprecate_kwarg, Appender, Substitution +from pandas.util.decorators import (deprecate_kwarg, Appender, + Substitution, docstring_wrapper) from pandas.util.validators import validate_bool_kwarg from pandas.tseries.period import PeriodIndex @@ -941,6 +942,11 @@ def to_gbq(self, destination_table, project_id, chunksize=10000, chunksize=chunksize, verbose=verbose, reauth=reauth, if_exists=if_exists, private_key=private_key) + def _f(): + from pandas.io.gbq import _try_import + return _try_import().to_gbq.__doc__ + to_gbq = docstring_wrapper(to_gbq, _f) + @classmethod def from_records(cls, data, index=None, exclude=None, columns=None, coerce_float=False, nrows=None): diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index a5558866937cf..3407f51af5e83 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -1,1180 +1,52 @@ -import warnings -from datetime import datetime -import json -import logging -from time import sleep -import uuid -import time -import sys +""" Google BigQuery support """ -import numpy as np +from pandas.util.decorators import docstring_wrapper -from distutils.version import StrictVersion -from pandas import compat, DataFrame, concat -from pandas.core.common import PandasError -from pandas.compat import lzip, bytes_to_str - - -def _check_google_client_version(): +def _try_import(): + # since pandas is a dependency of pandas-gbq + # we need to import on first use try: - import pkg_resources - + import pandas_gbq except ImportError: - raise ImportError('Could not import pkg_resources (setuptools).') - - if compat.PY3: - google_api_minimum_version = '1.4.1' - else: - google_api_minimum_version = '1.2.0' - - _GOOGLE_API_CLIENT_VERSION = pkg_resources.get_distribution( - 'google-api-python-client').version - - if (StrictVersion(_GOOGLE_API_CLIENT_VERSION) < - StrictVersion(google_api_minimum_version)): - raise ImportError("pandas requires google-api-python-client >= {0} " - "for Google BigQuery support, " - "current version {1}" - .format(google_api_minimum_version, - _GOOGLE_API_CLIENT_VERSION)) - - -def _test_google_api_imports(): - - try: - import httplib2 # noqa - try: - from googleapiclient.discovery import build # noqa - from googleapiclient.errors import HttpError # noqa - except: - from apiclient.discovery import build # noqa - from apiclient.errors import HttpError # noqa - from oauth2client.client import AccessTokenRefreshError # noqa - from oauth2client.client import OAuth2WebServerFlow # noqa - from oauth2client.file import Storage # noqa - from oauth2client.tools import run_flow, argparser # noqa - except ImportError as e: - raise ImportError("Missing module required for Google BigQuery " - "support: {0}".format(str(e))) - - -logger = logging.getLogger('pandas.io.gbq') -logger.setLevel(logging.ERROR) - - -class InvalidPrivateKeyFormat(PandasError, ValueError): - """ - Raised when provided private key has invalid format. - """ - pass - - -class AccessDenied(PandasError, ValueError): - """ - Raised when invalid credentials are provided, or tokens have expired. - """ - pass - - -class DatasetCreationError(PandasError, ValueError): - """ - Raised when the create dataset method fails - """ - pass - - -class GenericGBQException(PandasError, ValueError): - """ - Raised when an unrecognized Google API Error occurs. - """ - pass - - -class InvalidColumnOrder(PandasError, ValueError): - """ - Raised when the provided column order for output - results DataFrame does not match the schema - returned by BigQuery. - """ - pass - - -class InvalidPageToken(PandasError, ValueError): - """ - Raised when Google BigQuery fails to return, - or returns a duplicate page token. - """ - pass - - -class InvalidSchema(PandasError, ValueError): - """ - Raised when the provided DataFrame does - not match the schema of the destination - table in BigQuery. - """ - pass - - -class NotFoundException(PandasError, ValueError): - """ - Raised when the project_id, table or dataset provided in the query could - not be found. - """ - pass - - -class StreamingInsertError(PandasError, ValueError): - """ - Raised when BigQuery reports a streaming insert error. - For more information see `Streaming Data Into BigQuery - `__ - """ - - -class TableCreationError(PandasError, ValueError): - """ - Raised when the create table method fails - """ - pass - - -class GbqConnector(object): - scope = 'https://www.googleapis.com/auth/bigquery' - - def __init__(self, project_id, reauth=False, verbose=False, - private_key=None, dialect='legacy'): - _check_google_client_version() - _test_google_api_imports() - self.project_id = project_id - self.reauth = reauth - self.verbose = verbose - self.private_key = private_key - self.dialect = dialect - self.credentials = self.get_credentials() - self.service = self.get_service() - - def get_credentials(self): - if self.private_key: - return self.get_service_account_credentials() - else: - # Try to retrieve Application Default Credentials - credentials = self.get_application_default_credentials() - if not credentials: - credentials = self.get_user_account_credentials() - return credentials - - def get_application_default_credentials(self): - """ - This method tries to retrieve the "default application credentials". - This could be useful for running code on Google Cloud Platform. - - .. versionadded:: 0.19.0 - - Parameters - ---------- - None - - Returns - ------- - - GoogleCredentials, - If the default application credentials can be retrieved - from the environment. The retrieved credentials should also - have access to the project (self.project_id) on BigQuery. - - OR None, - If default application credentials can not be retrieved - from the environment. Or, the retrieved credentials do not - have access to the project (self.project_id) on BigQuery. - """ - import httplib2 - try: - from googleapiclient.discovery import build - except ImportError: - from apiclient.discovery import build - try: - from oauth2client.client import GoogleCredentials - except ImportError: - return None - - try: - credentials = GoogleCredentials.get_application_default() - except: - return None - - http = httplib2.Http() - try: - http = credentials.authorize(http) - bigquery_service = build('bigquery', 'v2', http=http) - # Check if the application has rights to the BigQuery project - jobs = bigquery_service.jobs() - job_data = {'configuration': {'query': {'query': 'SELECT 1'}}} - jobs.insert(projectId=self.project_id, body=job_data).execute() - return credentials - except: - return None - - def get_user_account_credentials(self): - from oauth2client.client import OAuth2WebServerFlow - from oauth2client.file import Storage - from oauth2client.tools import run_flow, argparser - - flow = OAuth2WebServerFlow( - client_id=('495642085510-k0tmvj2m941jhre2nbqka17vqpjfddtd' - '.apps.googleusercontent.com'), - client_secret='kOc9wMptUtxkcIFbtZCcrEAc', - scope=self.scope, - redirect_uri='urn:ietf:wg:oauth:2.0:oob') - - storage = Storage('bigquery_credentials.dat') - credentials = storage.get() - - if credentials is None or credentials.invalid or self.reauth: - credentials = run_flow(flow, storage, argparser.parse_args([])) - - return credentials - - def get_service_account_credentials(self): - # Bug fix for https://github.com/pandas-dev/pandas/issues/12572 - # We need to know that a supported version of oauth2client is installed - # Test that either of the following is installed: - # - SignedJwtAssertionCredentials from oauth2client.client - # - ServiceAccountCredentials from oauth2client.service_account - # SignedJwtAssertionCredentials is available in oauthclient < 2.0.0 - # ServiceAccountCredentials is available in oauthclient >= 2.0.0 - oauth2client_v1 = True - oauth2client_v2 = True - - try: - from oauth2client.client import SignedJwtAssertionCredentials - except ImportError: - oauth2client_v1 = False - - try: - from oauth2client.service_account import ServiceAccountCredentials - except ImportError: - oauth2client_v2 = False - - if not oauth2client_v1 and not oauth2client_v2: - raise ImportError("Missing oauth2client required for BigQuery " - "service account support") - - from os.path import isfile - - try: - if isfile(self.private_key): - with open(self.private_key) as f: - json_key = json.loads(f.read()) - else: - # ugly hack: 'private_key' field has new lines inside, - # they break json parser, but we need to preserve them - json_key = json.loads(self.private_key.replace('\n', ' ')) - json_key['private_key'] = json_key['private_key'].replace( - ' ', '\n') - - if compat.PY3: - json_key['private_key'] = bytes( - json_key['private_key'], 'UTF-8') - - if oauth2client_v1: - return SignedJwtAssertionCredentials( - json_key['client_email'], - json_key['private_key'], - self.scope, - ) - else: - return ServiceAccountCredentials.from_json_keyfile_dict( - json_key, - self.scope) - except (KeyError, ValueError, TypeError, AttributeError): - raise InvalidPrivateKeyFormat( - "Private key is missing or invalid. It should be service " - "account private key JSON (file path or string contents) " - "with at least two keys: 'client_email' and 'private_key'. " - "Can be obtained from: https://console.developers.google." - "com/permissions/serviceaccounts") - - def _print(self, msg, end='\n'): - if self.verbose: - sys.stdout.write(msg + end) - sys.stdout.flush() - - def _start_timer(self): - self.start = time.time() - - def get_elapsed_seconds(self): - return round(time.time() - self.start, 2) - - def print_elapsed_seconds(self, prefix='Elapsed', postfix='s.', - overlong=7): - sec = self.get_elapsed_seconds() - if sec > overlong: - self._print('{} {} {}'.format(prefix, sec, postfix)) - - # http://stackoverflow.com/questions/1094841/reusable-library-to-get-human-readable-version-of-file-size - @staticmethod - def sizeof_fmt(num, suffix='b'): - fmt = "%3.1f %s%s" - for unit in ['', 'k', 'M', 'G', 'T', 'P', 'E', 'Z']: - if abs(num) < 1024.0: - return fmt % (num, unit, suffix) - num /= 1024.0 - return fmt % (num, 'Y', suffix) - - def get_service(self): - import httplib2 - try: - from googleapiclient.discovery import build - except: - from apiclient.discovery import build - - http = httplib2.Http() - http = self.credentials.authorize(http) - bigquery_service = build('bigquery', 'v2', http=http) - - return bigquery_service - - @staticmethod - def process_http_error(ex): - # See `BigQuery Troubleshooting Errors - # `__ - - status = json.loads(bytes_to_str(ex.content))['error'] - errors = status.get('errors', None) - - if errors: - for error in errors: - reason = error['reason'] - message = error['message'] - - raise GenericGBQException( - "Reason: {0}, Message: {1}".format(reason, message)) - - raise GenericGBQException(errors) - - def process_insert_errors(self, insert_errors): - for insert_error in insert_errors: - row = insert_error['index'] - errors = insert_error.get('errors', None) - for error in errors: - reason = error['reason'] - message = error['message'] - location = error['location'] - error_message = ('Error at Row: {0}, Reason: {1}, ' - 'Location: {2}, Message: {3}' - .format(row, reason, location, message)) - - # Report all error messages if verbose is set - if self.verbose: - self._print(error_message) - else: - raise StreamingInsertError(error_message + - '\nEnable verbose logging to ' - 'see all errors') - - raise StreamingInsertError - - def run_query(self, query, **kwargs): - try: - from googleapiclient.errors import HttpError - except: - from apiclient.errors import HttpError - from oauth2client.client import AccessTokenRefreshError - - _check_google_client_version() - - job_collection = self.service.jobs() - - job_config = { - 'query': { - 'query': query, - 'useLegacySql': self.dialect == 'legacy' - # 'allowLargeResults', 'createDisposition', - # 'preserveNulls', destinationTable, useQueryCache - } - } - config = kwargs.get('configuration') - if config is not None: - if len(config) != 1: - raise ValueError("Only one job type must be specified, but " - "given {}".format(','.join(config.keys()))) - if 'query' in config: - if 'query' in config['query'] and query is not None: - raise ValueError("Query statement can't be specified " - "inside config while it is specified " - "as parameter") - - job_config['query'].update(config['query']) - else: - raise ValueError("Only 'query' job type is supported") - - job_data = { - 'configuration': job_config - } - - self._start_timer() - try: - self._print('Requesting query... ', end="") - query_reply = job_collection.insert( - projectId=self.project_id, body=job_data).execute() - self._print('ok.\nQuery running...') - except (AccessTokenRefreshError, ValueError): - if self.private_key: - raise AccessDenied( - "The service account credentials are not valid") - else: - raise AccessDenied( - "The credentials have been revoked or expired, " - "please re-run the application to re-authorize") - except HttpError as ex: - self.process_http_error(ex) - - job_reference = query_reply['jobReference'] - - while not query_reply.get('jobComplete', False): - self.print_elapsed_seconds(' Elapsed', 's. Waiting...') - try: - query_reply = job_collection.getQueryResults( - projectId=job_reference['projectId'], - jobId=job_reference['jobId']).execute() - except HttpError as ex: - self.process_http_error(ex) - - if self.verbose: - if query_reply['cacheHit']: - self._print('Query done.\nCache hit.\n') - else: - bytes_processed = int(query_reply.get( - 'totalBytesProcessed', '0')) - self._print('Query done.\nProcessed: {}\n'.format( - self.sizeof_fmt(bytes_processed))) - - self._print('Retrieving results...') - - total_rows = int(query_reply['totalRows']) - result_pages = list() - seen_page_tokens = list() - current_row = 0 - # Only read schema on first page - schema = query_reply['schema'] - - # Loop through each page of data - while 'rows' in query_reply and current_row < total_rows: - page = query_reply['rows'] - result_pages.append(page) - current_row += len(page) - - self.print_elapsed_seconds( - ' Got page: {}; {}% done. Elapsed'.format( - len(result_pages), - round(100.0 * current_row / total_rows))) - - if current_row == total_rows: - break - - page_token = query_reply.get('pageToken', None) - - if not page_token and current_row < total_rows: - raise InvalidPageToken("Required pageToken was missing. " - "Received {0} of {1} rows" - .format(current_row, total_rows)) - - elif page_token in seen_page_tokens: - raise InvalidPageToken("A duplicate pageToken was returned") - - seen_page_tokens.append(page_token) - - try: - query_reply = job_collection.getQueryResults( - projectId=job_reference['projectId'], - jobId=job_reference['jobId'], - pageToken=page_token).execute() - except HttpError as ex: - self.process_http_error(ex) - - if current_row < total_rows: - raise InvalidPageToken() - - # print basic query stats - self._print('Got {} rows.\n'.format(total_rows)) - - return schema, result_pages - - def load_data(self, dataframe, dataset_id, table_id, chunksize): - try: - from googleapiclient.errors import HttpError - except: - from apiclient.errors import HttpError - - job_id = uuid.uuid4().hex - rows = [] - remaining_rows = len(dataframe) - - total_rows = remaining_rows - self._print("\n\n") - - for index, row in dataframe.reset_index(drop=True).iterrows(): - row_dict = dict() - row_dict['json'] = json.loads(row.to_json(force_ascii=False, - date_unit='s', - date_format='iso')) - row_dict['insertId'] = job_id + str(index) - rows.append(row_dict) - remaining_rows -= 1 - - if (len(rows) % chunksize == 0) or (remaining_rows == 0): - self._print("\rStreaming Insert is {0}% Complete".format( - ((total_rows - remaining_rows) * 100) / total_rows)) - - body = {'rows': rows} - - try: - response = self.service.tabledata().insertAll( - projectId=self.project_id, - datasetId=dataset_id, - tableId=table_id, - body=body).execute() - except HttpError as ex: - self.process_http_error(ex) - - # For streaming inserts, even if you receive a success HTTP - # response code, you'll need to check the insertErrors property - # of the response to determine if the row insertions were - # successful, because it's possible that BigQuery was only - # partially successful at inserting the rows. See the `Success - # HTTP Response Codes - # `__ - # section - - insert_errors = response.get('insertErrors', None) - if insert_errors: - self.process_insert_errors(insert_errors) - - sleep(1) # Maintains the inserts "per second" rate per API - rows = [] - - self._print("\n") - - def verify_schema(self, dataset_id, table_id, schema): - try: - from googleapiclient.errors import HttpError - except: - from apiclient.errors import HttpError - - try: - remote_schema = self.service.tables().get( - projectId=self.project_id, - datasetId=dataset_id, - tableId=table_id).execute()['schema'] - - fields_remote = set([json.dumps(field_remote) - for field_remote in remote_schema['fields']]) - fields_local = set(json.dumps(field_local) - for field_local in schema['fields']) - - return fields_remote == fields_local - except HttpError as ex: - self.process_http_error(ex) - - def delete_and_recreate_table(self, dataset_id, table_id, table_schema): - delay = 0 - - # Changes to table schema may take up to 2 minutes as of May 2015 See - # `Issue 191 - # `__ - # Compare previous schema with new schema to determine if there should - # be a 120 second delay - - if not self.verify_schema(dataset_id, table_id, table_schema): - self._print('The existing table has a different schema. Please ' - 'wait 2 minutes. See Google BigQuery issue #191') - delay = 120 - - table = _Table(self.project_id, dataset_id, - private_key=self.private_key) - table.delete(table_id) - table.create(table_id, table_schema) - sleep(delay) - - -def _parse_data(schema, rows): - # see: - # http://pandas.pydata.org/pandas-docs/dev/missing_data.html - # #missing-data-casting-rules-and-indexing - dtype_map = {'FLOAT': np.dtype(float), - 'TIMESTAMP': 'M8[ns]'} - - fields = schema['fields'] - col_types = [field['type'] for field in fields] - col_names = [str(field['name']) for field in fields] - col_dtypes = [dtype_map.get(field['type'], object) for field in fields] - page_array = np.zeros((len(rows),), dtype=lzip(col_names, col_dtypes)) - for row_num, raw_row in enumerate(rows): - entries = raw_row.get('f', []) - for col_num, field_type in enumerate(col_types): - field_value = _parse_entry(entries[col_num].get('v', ''), - field_type) - page_array[row_num][col_num] = field_value - - return DataFrame(page_array, columns=col_names) + # give a nice error message + raise ImportError("Load data from Google BigQuery\n" + "\n" + "the pandas-gbq package is not installed\n" + "see the docs: https://pandas-gbq.readthedocs.io\n" + "\n" + "you can install via:\n" + "pip install pandas-gbq\n") -def _parse_entry(field_value, field_type): - if field_value is None or field_value == 'null': - return None - if field_type == 'INTEGER': - return int(field_value) - elif field_type == 'FLOAT': - return float(field_value) - elif field_type == 'TIMESTAMP': - timestamp = datetime.utcfromtimestamp(float(field_value)) - return np.datetime64(timestamp) - elif field_type == 'BOOLEAN': - return field_value == 'true' - return field_value + return pandas_gbq def read_gbq(query, project_id=None, index_col=None, col_order=None, reauth=False, verbose=True, private_key=None, dialect='legacy', **kwargs): - r"""Load data from Google BigQuery. + pandas_gbq = _try_import() + return pandas_gbq.read_gbq( + query, project_id=project_id, + index_col=index_col, col_order=col_order, + reauth=reauth, verbose=verbose, + private_key=private_key, + dialect=dialect, + **kwargs) - THIS IS AN EXPERIMENTAL LIBRARY - The main method a user calls to execute a Query in Google BigQuery - and read results into a pandas DataFrame. - - Google BigQuery API Client Library v2 for Python is used. - Documentation is available at - https://developers.google.com/api-client-library/python/apis/bigquery/v2 - - Authentication to the Google BigQuery service is via OAuth 2.0. - - - If "private_key" is not provided: - - By default "application default credentials" are used. - - .. versionadded:: 0.19.0 - - If default application credentials are not found or are restrictive, - user account credentials are used. In this case, you will be asked to - grant permissions for product name 'pandas GBQ'. - - - If "private_key" is provided: - - Service account credentials will be used to authenticate. - - Parameters - ---------- - query : str - SQL-Like Query to return data values - project_id : str - Google BigQuery Account project ID. - index_col : str (optional) - Name of result column to use for index in results DataFrame - col_order : list(str) (optional) - List of BigQuery column names in the desired order for results - DataFrame - reauth : boolean (default False) - Force Google BigQuery to reauthenticate the user. This is useful - if multiple accounts are used. - verbose : boolean (default True) - Verbose output - private_key : str (optional) - Service account private key in JSON format. Can be file path - or string contents. This is useful for remote server - authentication (eg. jupyter iPython notebook on remote host) - - .. versionadded:: 0.18.1 - - dialect : {'legacy', 'standard'}, default 'legacy' - 'legacy' : Use BigQuery's legacy SQL dialect. - 'standard' : Use BigQuery's standard SQL (beta), which is - compliant with the SQL 2011 standard. For more information - see `BigQuery SQL Reference - `__ - - .. versionadded:: 0.19.0 - - **kwargs : Arbitrary keyword arguments - configuration (dict): query config parameters for job processing. - For example: - - configuration = {'query': {'useQueryCache': False}} - - For more information see `BigQuery SQL Reference - ` - - .. versionadded:: 0.20.0 - - Returns - ------- - df: DataFrame - DataFrame representing results of query - - """ - - if not project_id: - raise TypeError("Missing required parameter: project_id") - - if dialect not in ('legacy', 'standard'): - raise ValueError("'{0}' is not valid for dialect".format(dialect)) - - connector = GbqConnector(project_id, reauth=reauth, verbose=verbose, - private_key=private_key, - dialect=dialect) - schema, pages = connector.run_query(query, **kwargs) - dataframe_list = [] - while len(pages) > 0: - page = pages.pop() - dataframe_list.append(_parse_data(schema, page)) - - if len(dataframe_list) > 0: - final_df = concat(dataframe_list, ignore_index=True) - else: - final_df = _parse_data(schema, []) - - # Reindex the DataFrame on the provided column - if index_col is not None: - if index_col in final_df.columns: - final_df.set_index(index_col, inplace=True) - else: - raise InvalidColumnOrder( - 'Index column "{0}" does not exist in DataFrame.' - .format(index_col) - ) - - # Change the order of columns in the DataFrame based on provided list - if col_order is not None: - if sorted(col_order) == sorted(final_df.columns): - final_df = final_df[col_order] - else: - raise InvalidColumnOrder( - 'Column order does not match this DataFrame.' - ) - - # cast BOOLEAN and INTEGER columns from object to bool/int - # if they dont have any nulls - type_map = {'BOOLEAN': bool, 'INTEGER': int} - for field in schema['fields']: - if field['type'] in type_map and \ - final_df[field['name']].notnull().all(): - final_df[field['name']] = \ - final_df[field['name']].astype(type_map[field['type']]) - - connector.print_elapsed_seconds( - 'Total time taken', - datetime.now().strftime('s.\nFinished at %Y-%m-%d %H:%M:%S.'), - 0 - ) - - return final_df +read_gbq = docstring_wrapper(read_gbq, + lambda: _try_import().read_gbq.__doc__) def to_gbq(dataframe, destination_table, project_id, chunksize=10000, verbose=True, reauth=False, if_exists='fail', private_key=None): - """Write a DataFrame to a Google BigQuery table. - - THIS IS AN EXPERIMENTAL LIBRARY - - The main method a user calls to export pandas DataFrame contents to - Google BigQuery table. - - Google BigQuery API Client Library v2 for Python is used. - Documentation is available at - https://developers.google.com/api-client-library/python/apis/bigquery/v2 - - Authentication to the Google BigQuery service is via OAuth 2.0. - - - If "private_key" is not provided: - - By default "application default credentials" are used. - - .. versionadded:: 0.19.0 - - If default application credentials are not found or are restrictive, - user account credentials are used. In this case, you will be asked to - grant permissions for product name 'pandas GBQ'. - - - If "private_key" is provided: - - Service account credentials will be used to authenticate. - - Parameters - ---------- - dataframe : DataFrame - DataFrame to be written - destination_table : string - Name of table to be written, in the form 'dataset.tablename' - project_id : str - Google BigQuery Account project ID. - chunksize : int (default 10000) - Number of rows to be inserted in each chunk from the dataframe. - verbose : boolean (default True) - Show percentage complete - reauth : boolean (default False) - Force Google BigQuery to reauthenticate the user. This is useful - if multiple accounts are used. - if_exists : {'fail', 'replace', 'append'}, default 'fail' - 'fail': If table exists, do nothing. - 'replace': If table exists, drop it, recreate it, and insert data. - 'append': If table exists, insert data. Create if does not exist. - private_key : str (optional) - Service account private key in JSON format. Can be file path - or string contents. This is useful for remote server - authentication (eg. jupyter iPython notebook on remote host) - """ - - if if_exists not in ('fail', 'replace', 'append'): - raise ValueError("'{0}' is not valid for if_exists".format(if_exists)) - - if '.' not in destination_table: - raise NotFoundException( - "Invalid Table Name. Should be of the form 'datasetId.tableId' ") - - connector = GbqConnector(project_id, reauth=reauth, verbose=verbose, - private_key=private_key) - dataset_id, table_id = destination_table.rsplit('.', 1) - - table = _Table(project_id, dataset_id, reauth=reauth, - private_key=private_key) - - table_schema = _generate_bq_schema(dataframe) - - # If table exists, check if_exists parameter - if table.exists(table_id): - if if_exists == 'fail': - raise TableCreationError("Could not create the table because it " - "already exists. " - "Change the if_exists parameter to " - "append or replace data.") - elif if_exists == 'replace': - connector.delete_and_recreate_table( - dataset_id, table_id, table_schema) - elif if_exists == 'append': - if not connector.verify_schema(dataset_id, table_id, table_schema): - raise InvalidSchema("Please verify that the structure and " - "data types in the DataFrame match the " - "schema of the destination table.") - else: - table.create(table_id, table_schema) - - connector.load_data(dataframe, dataset_id, table_id, chunksize) - - -def generate_bq_schema(df, default_type='STRING'): - # deprecation TimeSeries, #11121 - warnings.warn("generate_bq_schema is deprecated and will be removed in " - "a future version", FutureWarning, stacklevel=2) - - return _generate_bq_schema(df, default_type=default_type) - - -def _generate_bq_schema(df, default_type='STRING'): - """ Given a passed df, generate the associated Google BigQuery schema. - - Parameters - ---------- - df : DataFrame - default_type : string - The default big query type in case the type of the column - does not exist in the schema. - """ - - type_mapping = { - 'i': 'INTEGER', - 'b': 'BOOLEAN', - 'f': 'FLOAT', - 'O': 'STRING', - 'S': 'STRING', - 'U': 'STRING', - 'M': 'TIMESTAMP' - } - - fields = [] - for column_name, dtype in df.dtypes.iteritems(): - fields.append({'name': column_name, - 'type': type_mapping.get(dtype.kind, default_type)}) - - return {'fields': fields} - - -class _Table(GbqConnector): - - def __init__(self, project_id, dataset_id, reauth=False, verbose=False, - private_key=None): - try: - from googleapiclient.errors import HttpError - except: - from apiclient.errors import HttpError - self.http_error = HttpError - self.dataset_id = dataset_id - super(_Table, self).__init__(project_id, reauth, verbose, private_key) - - def exists(self, table_id): - """ Check if a table exists in Google BigQuery - - .. versionadded:: 0.17.0 - - Parameters - ---------- - table : str - Name of table to be verified - - Returns - ------- - boolean - true if table exists, otherwise false - """ - - try: - self.service.tables().get( - projectId=self.project_id, - datasetId=self.dataset_id, - tableId=table_id).execute() - return True - except self.http_error as ex: - if ex.resp.status == 404: - return False - else: - self.process_http_error(ex) - - def create(self, table_id, schema): - """ Create a table in Google BigQuery given a table and schema - - .. versionadded:: 0.17.0 - - Parameters - ---------- - table : str - Name of table to be written - schema : str - Use the generate_bq_schema to generate your table schema from a - dataframe. - """ - - if self.exists(table_id): - raise TableCreationError( - "The table could not be created because it already exists") - - if not _Dataset(self.project_id, - private_key=self.private_key).exists(self.dataset_id): - _Dataset(self.project_id, - private_key=self.private_key).create(self.dataset_id) - - body = { - 'schema': schema, - 'tableReference': { - 'tableId': table_id, - 'projectId': self.project_id, - 'datasetId': self.dataset_id - } - } - - try: - self.service.tables().insert( - projectId=self.project_id, - datasetId=self.dataset_id, - body=body).execute() - except self.http_error as ex: - self.process_http_error(ex) - - def delete(self, table_id): - """ Delete a table in Google BigQuery - - .. versionadded:: 0.17.0 - - Parameters - ---------- - table : str - Name of table to be deleted - """ - - if not self.exists(table_id): - raise NotFoundException("Table does not exist") - - try: - self.service.tables().delete( - datasetId=self.dataset_id, - projectId=self.project_id, - tableId=table_id).execute() - except self.http_error as ex: - self.process_http_error(ex) - - -class _Dataset(GbqConnector): - - def __init__(self, project_id, reauth=False, verbose=False, - private_key=None): - try: - from googleapiclient.errors import HttpError - except: - from apiclient.errors import HttpError - self.http_error = HttpError - super(_Dataset, self).__init__(project_id, reauth, verbose, - private_key) - - def exists(self, dataset_id): - """ Check if a dataset exists in Google BigQuery - - .. versionadded:: 0.17.0 - - Parameters - ---------- - dataset_id : str - Name of dataset to be verified - - Returns - ------- - boolean - true if dataset exists, otherwise false - """ - - try: - self.service.datasets().get( - projectId=self.project_id, - datasetId=dataset_id).execute() - return True - except self.http_error as ex: - if ex.resp.status == 404: - return False - else: - self.process_http_error(ex) - - def datasets(self): - """ Return a list of datasets in Google BigQuery - - .. versionadded:: 0.17.0 - - Parameters - ---------- - None - - Returns - ------- - list - List of datasets under the specific project - """ - - dataset_list = [] - next_page_token = None - first_query = True - - while first_query or next_page_token: - first_query = False - - try: - list_dataset_response = self.service.datasets().list( - projectId=self.project_id, - pageToken=next_page_token).execute() - - dataset_response = list_dataset_response.get('datasets') - next_page_token = list_dataset_response.get('nextPageToken') - - if not dataset_response: - return dataset_list - - for row_num, raw_row in enumerate(dataset_response): - dataset_list.append( - raw_row['datasetReference']['datasetId']) - - except self.http_error as ex: - self.process_http_error(ex) - - return dataset_list - - def create(self, dataset_id): - """ Create a dataset in Google BigQuery - - .. versionadded:: 0.17.0 - - Parameters - ---------- - dataset : str - Name of dataset to be written - """ - - if self.exists(dataset_id): - raise DatasetCreationError( - "The dataset could not be created because it already exists") - - body = { - 'datasetReference': { - 'projectId': self.project_id, - 'datasetId': dataset_id - } - } - - try: - self.service.datasets().insert( - projectId=self.project_id, - body=body).execute() - except self.http_error as ex: - self.process_http_error(ex) - - def delete(self, dataset_id): - """ Delete a dataset in Google BigQuery - - .. versionadded:: 0.17.0 - - Parameters - ---------- - dataset : str - Name of dataset to be deleted - """ - - if not self.exists(dataset_id): - raise NotFoundException( - "Dataset {0} does not exist".format(dataset_id)) - - try: - self.service.datasets().delete( - datasetId=dataset_id, - projectId=self.project_id).execute() - - except self.http_error as ex: - self.process_http_error(ex) - - def tables(self, dataset_id): - """ List tables in the specific dataset in Google BigQuery - - .. versionadded:: 0.17.0 - - Parameters - ---------- - dataset : str - Name of dataset to list tables for - - Returns - ------- - list - List of tables under the specific dataset - """ - - table_list = [] - next_page_token = None - first_query = True - - while first_query or next_page_token: - first_query = False - - try: - list_table_response = self.service.tables().list( - projectId=self.project_id, - datasetId=dataset_id, - pageToken=next_page_token).execute() - - table_response = list_table_response.get('tables') - next_page_token = list_table_response.get('nextPageToken') - - if not table_response: - return table_list - - for row_num, raw_row in enumerate(table_response): - table_list.append(raw_row['tableReference']['tableId']) + pandas_gbq = _try_import() + pandas_gbq.to_gbq(dataframe, destination_table, project_id, + chunksize=chunksize, + verbose=verbose, reauth=reauth, + if_exists=if_exists, private_key=private_key) - except self.http_error as ex: - self.process_http_error(ex) - return table_list +to_gbq = docstring_wrapper(to_gbq, + lambda: _try_import().to_gbq.__doc__) diff --git a/pandas/tests/io/test_gbq.py b/pandas/tests/io/test_gbq.py index 0a76267054ee6..13529e7b54714 100644 --- a/pandas/tests/io/test_gbq.py +++ b/pandas/tests/io/test_gbq.py @@ -1,23 +1,18 @@ -import re -from datetime import datetime import pytest +from datetime import datetime import pytz import platform from time import sleep import os -import logging import numpy as np +import pandas as pd +from pandas import compat, DataFrame -from distutils.version import StrictVersion -from pandas import compat - -from pandas import NaT -from pandas.compat import u, range -from pandas.core.frame import DataFrame -import pandas.io.gbq as gbq +from pandas.compat import range import pandas.util.testing as tm -from pandas.compat.numpy import np_datetime64_compat + +pandas_gbq = pytest.importorskip('pandas_gbq') PROJECT_ID = None PRIVATE_KEY_JSON_PATH = None @@ -33,12 +28,6 @@ VERSION = platform.python_version() -_IMPORTS = False -_GOOGLE_API_CLIENT_INSTALLED = False -_GOOGLE_API_CLIENT_VALID_VERSION = False -_HTTPLIB2_INSTALLED = False -_SETUPTOOLS_INSTALLED = False - def _skip_if_no_project_id(): if not _get_project_id(): @@ -46,23 +35,12 @@ def _skip_if_no_project_id(): "Cannot run integration tests without a project id") -def _skip_local_auth_if_in_travis_env(): - if _in_travis_environment(): - pytest.skip("Cannot run local auth in travis environment") - - def _skip_if_no_private_key_path(): if not _get_private_key_path(): pytest.skip("Cannot run integration tests without a " "private key json file path") -def _skip_if_no_private_key_contents(): - if not _get_private_key_contents(): - pytest.skip("Cannot run integration tests without a " - "private key json contents") - - def _in_travis_environment(): return 'TRAVIS_BUILD_DIR' in os.environ and \ 'GBQ_PROJECT_ID' in os.environ @@ -83,146 +61,15 @@ def _get_private_key_path(): return PRIVATE_KEY_JSON_PATH -def _get_private_key_contents(): - if _in_travis_environment(): - with open(os.path.join(*[os.environ.get('TRAVIS_BUILD_DIR'), 'ci', - 'travis_gbq.json'])) as f: - return f.read() - else: - return PRIVATE_KEY_JSON_CONTENTS - - -def _test_imports(): - global _GOOGLE_API_CLIENT_INSTALLED, _GOOGLE_API_CLIENT_VALID_VERSION, \ - _HTTPLIB2_INSTALLED, _SETUPTOOLS_INSTALLED - - try: - import pkg_resources - _SETUPTOOLS_INSTALLED = True - except ImportError: - _SETUPTOOLS_INSTALLED = False - - if compat.PY3: - google_api_minimum_version = '1.4.1' - else: - google_api_minimum_version = '1.2.0' - - if _SETUPTOOLS_INSTALLED: - try: - try: - from googleapiclient.discovery import build # noqa - from googleapiclient.errors import HttpError # noqa - except: - from apiclient.discovery import build # noqa - from apiclient.errors import HttpError # noqa - - from oauth2client.client import OAuth2WebServerFlow # noqa - from oauth2client.client import AccessTokenRefreshError # noqa - - from oauth2client.file import Storage # noqa - from oauth2client.tools import run_flow # noqa - _GOOGLE_API_CLIENT_INSTALLED = True - _GOOGLE_API_CLIENT_VERSION = pkg_resources.get_distribution( - 'google-api-python-client').version - - if (StrictVersion(_GOOGLE_API_CLIENT_VERSION) >= - StrictVersion(google_api_minimum_version)): - _GOOGLE_API_CLIENT_VALID_VERSION = True - - except ImportError: - _GOOGLE_API_CLIENT_INSTALLED = False - - try: - import httplib2 # noqa - _HTTPLIB2_INSTALLED = True - except ImportError: - _HTTPLIB2_INSTALLED = False - - if not _SETUPTOOLS_INSTALLED: - raise ImportError('Could not import pkg_resources (setuptools).') - - if not _GOOGLE_API_CLIENT_INSTALLED: - raise ImportError('Could not import Google API Client.') - - if not _GOOGLE_API_CLIENT_VALID_VERSION: - raise ImportError("pandas requires google-api-python-client >= {0} " - "for Google BigQuery support, " - "current version {1}" - .format(google_api_minimum_version, - _GOOGLE_API_CLIENT_VERSION)) - - if not _HTTPLIB2_INSTALLED: - raise ImportError( - "pandas requires httplib2 for Google BigQuery support") - - # Bug fix for https://github.com/pandas-dev/pandas/issues/12572 - # We need to know that a supported version of oauth2client is installed - # Test that either of the following is installed: - # - SignedJwtAssertionCredentials from oauth2client.client - # - ServiceAccountCredentials from oauth2client.service_account - # SignedJwtAssertionCredentials is available in oauthclient < 2.0.0 - # ServiceAccountCredentials is available in oauthclient >= 2.0.0 - oauth2client_v1 = True - oauth2client_v2 = True - - try: - from oauth2client.client import SignedJwtAssertionCredentials # noqa - except ImportError: - oauth2client_v1 = False - - try: - from oauth2client.service_account import ServiceAccountCredentials # noqa - except ImportError: - oauth2client_v2 = False - - if not oauth2client_v1 and not oauth2client_v2: - raise ImportError("Missing oauth2client required for BigQuery " - "service account support") - - -def _setup_common(): - try: - _test_imports() - except (ImportError, NotImplementedError) as import_exception: - pytest.skip(import_exception) - - if _in_travis_environment(): - logging.getLogger('oauth2client').setLevel(logging.ERROR) - logging.getLogger('apiclient').setLevel(logging.ERROR) - - -def _check_if_can_get_correct_default_credentials(): - # Checks if "Application Default Credentials" can be fetched - # from the environment the tests are running in. - # See Issue #13577 - - import httplib2 - try: - from googleapiclient.discovery import build - except ImportError: - from apiclient.discovery import build - try: - from oauth2client.client import GoogleCredentials - credentials = GoogleCredentials.get_application_default() - http = httplib2.Http() - http = credentials.authorize(http) - bigquery_service = build('bigquery', 'v2', http=http) - jobs = bigquery_service.jobs() - job_data = {'configuration': {'query': {'query': 'SELECT 1'}}} - jobs.insert(projectId=_get_project_id(), body=job_data).execute() - return True - except: - return False - - def clean_gbq_environment(private_key=None): - dataset = gbq._Dataset(_get_project_id(), private_key=private_key) + dataset = pandas_gbq.gbq._Dataset(_get_project_id(), + private_key=private_key) for i in range(1, 10): if DATASET_ID + str(i) in dataset.datasets(): dataset_id = DATASET_ID + str(i) - table = gbq._Table(_get_project_id(), dataset_id, - private_key=private_key) + table = pandas_gbq.gbq._Table(_get_project_id(), dataset_id, + private_key=private_key) for j in range(1, 20): if TABLE_ID + str(j) in dataset.tables(dataset_id): table.delete(TABLE_ID + str(j)) @@ -246,673 +93,8 @@ def make_mixed_dataframe_v2(test_size): index=range(test_size)) -def test_generate_bq_schema_deprecated(): - # 11121 Deprecation of generate_bq_schema - with tm.assert_produces_warning(FutureWarning): - df = make_mixed_dataframe_v2(10) - gbq.generate_bq_schema(df) - - -@pytest.mark.xfail(run=False, reason="intermittent failures") -class TestGBQConnectorIntegrationWithLocalUserAccountAuth(tm.TestCase): - - def setUp(self): - _setup_common() - _skip_if_no_project_id() - _skip_local_auth_if_in_travis_env() - - self.sut = gbq.GbqConnector(_get_project_id()) - - def test_should_be_able_to_make_a_connector(self): - self.assertTrue(self.sut is not None, - 'Could not create a GbqConnector') - - def test_should_be_able_to_get_valid_credentials(self): - credentials = self.sut.get_credentials() - self.assertFalse(credentials.invalid, 'Returned credentials invalid') - - def test_should_be_able_to_get_a_bigquery_service(self): - bigquery_service = self.sut.get_service() - self.assertTrue(bigquery_service is not None, 'No service returned') - - def test_should_be_able_to_get_schema_from_query(self): - schema, pages = self.sut.run_query('SELECT 1') - self.assertTrue(schema is not None) - - def test_should_be_able_to_get_results_from_query(self): - schema, pages = self.sut.run_query('SELECT 1') - self.assertTrue(pages is not None) - - def test_get_application_default_credentials_does_not_throw_error(self): - if _check_if_can_get_correct_default_credentials(): - pytest.skip("Can get default_credentials " - "from the environment!") - credentials = self.sut.get_application_default_credentials() - self.assertIsNone(credentials) - - def test_get_application_default_credentials_returns_credentials(self): - if not _check_if_can_get_correct_default_credentials(): - pytest.skip("Cannot get default_credentials " - "from the environment!") - from oauth2client.client import GoogleCredentials - credentials = self.sut.get_application_default_credentials() - self.assertTrue(isinstance(credentials, GoogleCredentials)) - - -@pytest.mark.xfail(run=False, reason="intermittent failures") -class TestGBQConnectorIntegrationWithServiceAccountKeyPath(tm.TestCase): - def setUp(self): - _setup_common() - - _skip_if_no_project_id() - _skip_if_no_private_key_path() - - self.sut = gbq.GbqConnector(_get_project_id(), - private_key=_get_private_key_path()) - - def test_should_be_able_to_make_a_connector(self): - self.assertTrue(self.sut is not None, - 'Could not create a GbqConnector') - - def test_should_be_able_to_get_valid_credentials(self): - credentials = self.sut.get_credentials() - self.assertFalse(credentials.invalid, 'Returned credentials invalid') - - def test_should_be_able_to_get_a_bigquery_service(self): - bigquery_service = self.sut.get_service() - self.assertTrue(bigquery_service is not None, 'No service returned') - - def test_should_be_able_to_get_schema_from_query(self): - schema, pages = self.sut.run_query('SELECT 1') - self.assertTrue(schema is not None) - - def test_should_be_able_to_get_results_from_query(self): - schema, pages = self.sut.run_query('SELECT 1') - self.assertTrue(pages is not None) - - -@pytest.mark.xfail(run=False, reason="intermittent failures") -class TestGBQConnectorIntegrationWithServiceAccountKeyContents(tm.TestCase): - def setUp(self): - _setup_common() - - _skip_if_no_project_id() - _skip_if_no_private_key_contents() - - self.sut = gbq.GbqConnector(_get_project_id(), - private_key=_get_private_key_contents()) - - def test_should_be_able_to_make_a_connector(self): - self.assertTrue(self.sut is not None, - 'Could not create a GbqConnector') - - def test_should_be_able_to_get_valid_credentials(self): - credentials = self.sut.get_credentials() - self.assertFalse(credentials.invalid, 'Returned credentials invalid') - - def test_should_be_able_to_get_a_bigquery_service(self): - bigquery_service = self.sut.get_service() - self.assertTrue(bigquery_service is not None, 'No service returned') - - def test_should_be_able_to_get_schema_from_query(self): - schema, pages = self.sut.run_query('SELECT 1') - self.assertTrue(schema is not None) - - def test_should_be_able_to_get_results_from_query(self): - schema, pages = self.sut.run_query('SELECT 1') - self.assertTrue(pages is not None) - - -class GBQUnitTests(tm.TestCase): - - def setUp(self): - _setup_common() - - def test_import_google_api_python_client(self): - if compat.PY2: - with tm.assertRaises(ImportError): - from googleapiclient.discovery import build # noqa - from googleapiclient.errors import HttpError # noqa - from apiclient.discovery import build # noqa - from apiclient.errors import HttpError # noqa - else: - from googleapiclient.discovery import build # noqa - from googleapiclient.errors import HttpError # noqa - - def test_should_return_bigquery_integers_as_python_ints(self): - result = gbq._parse_entry(1, 'INTEGER') - tm.assert_equal(result, int(1)) - - def test_should_return_bigquery_floats_as_python_floats(self): - result = gbq._parse_entry(1, 'FLOAT') - tm.assert_equal(result, float(1)) - - def test_should_return_bigquery_timestamps_as_numpy_datetime(self): - result = gbq._parse_entry('0e9', 'TIMESTAMP') - tm.assert_equal(result, np_datetime64_compat('1970-01-01T00:00:00Z')) - - def test_should_return_bigquery_booleans_as_python_booleans(self): - result = gbq._parse_entry('false', 'BOOLEAN') - tm.assert_equal(result, False) - - def test_should_return_bigquery_strings_as_python_strings(self): - result = gbq._parse_entry('STRING', 'STRING') - tm.assert_equal(result, 'STRING') - - def test_to_gbq_should_fail_if_invalid_table_name_passed(self): - with tm.assertRaises(gbq.NotFoundException): - gbq.to_gbq(DataFrame(), 'invalid_table_name', project_id="1234") - - def test_to_gbq_with_no_project_id_given_should_fail(self): - with tm.assertRaises(TypeError): - gbq.to_gbq(DataFrame(), 'dataset.tablename') - - def test_read_gbq_with_no_project_id_given_should_fail(self): - with tm.assertRaises(TypeError): - gbq.read_gbq('SELECT 1') - - def test_that_parse_data_works_properly(self): - test_schema = {'fields': [ - {'mode': 'NULLABLE', 'name': 'valid_string', 'type': 'STRING'}]} - test_page = [{'f': [{'v': 'PI'}]}] - - test_output = gbq._parse_data(test_schema, test_page) - correct_output = DataFrame({'valid_string': ['PI']}) - tm.assert_frame_equal(test_output, correct_output) - - def test_read_gbq_with_invalid_private_key_json_should_fail(self): - with tm.assertRaises(gbq.InvalidPrivateKeyFormat): - gbq.read_gbq('SELECT 1', project_id='x', private_key='y') - - def test_read_gbq_with_empty_private_key_json_should_fail(self): - with tm.assertRaises(gbq.InvalidPrivateKeyFormat): - gbq.read_gbq('SELECT 1', project_id='x', private_key='{}') - - def test_read_gbq_with_private_key_json_wrong_types_should_fail(self): - with tm.assertRaises(gbq.InvalidPrivateKeyFormat): - gbq.read_gbq( - 'SELECT 1', project_id='x', - private_key='{ "client_email" : 1, "private_key" : True }') - - def test_read_gbq_with_empty_private_key_file_should_fail(self): - with tm.ensure_clean() as empty_file_path: - with tm.assertRaises(gbq.InvalidPrivateKeyFormat): - gbq.read_gbq('SELECT 1', project_id='x', - private_key=empty_file_path) - - def test_read_gbq_with_corrupted_private_key_json_should_fail(self): - _skip_if_no_private_key_contents() - - with tm.assertRaises(gbq.InvalidPrivateKeyFormat): - gbq.read_gbq( - 'SELECT 1', project_id='x', - private_key=re.sub('[a-z]', '9', _get_private_key_contents())) - - -@pytest.mark.xfail(run=False, reason="intermittent failures") -class TestReadGBQIntegration(tm.TestCase): - - @classmethod - def setUpClass(cls): - # - GLOBAL CLASS FIXTURES - - # put here any instruction you want to execute only *ONCE* *BEFORE* - # executing *ALL* tests described below. - - _skip_if_no_project_id() - - _setup_common() - - def setUp(self): - # - PER-TEST FIXTURES - - # put here any instruction you want to be run *BEFORE* *EVERY* test is - # executed. - pass - - @classmethod - def tearDownClass(cls): - # - GLOBAL CLASS FIXTURES - - # put here any instruction you want to execute only *ONCE* *AFTER* - # executing all tests. - pass - - def tearDown(self): - # - PER-TEST FIXTURES - - # put here any instructions you want to be run *AFTER* *EVERY* test is - # executed. - pass - - def test_should_read_as_user_account(self): - _skip_local_auth_if_in_travis_env() - - query = 'SELECT "PI" AS valid_string' - df = gbq.read_gbq(query, project_id=_get_project_id()) - tm.assert_frame_equal(df, DataFrame({'valid_string': ['PI']})) - - def test_should_read_as_service_account_with_key_path(self): - _skip_if_no_private_key_path() - query = 'SELECT "PI" AS valid_string' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({'valid_string': ['PI']})) - - def test_should_read_as_service_account_with_key_contents(self): - _skip_if_no_private_key_contents() - query = 'SELECT "PI" AS valid_string' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_contents()) - tm.assert_frame_equal(df, DataFrame({'valid_string': ['PI']})) - - -@pytest.mark.xfail(run=False, reason="intermittent failures") -class TestReadGBQIntegrationWithServiceAccountKeyPath(tm.TestCase): - - @classmethod - def setUpClass(cls): - # - GLOBAL CLASS FIXTURES - - # put here any instruction you want to execute only *ONCE* *BEFORE* - # executing *ALL* tests described below. - - _skip_if_no_project_id() - _skip_if_no_private_key_path() - - _setup_common() - - def setUp(self): - # - PER-TEST FIXTURES - - # put here any instruction you want to be run *BEFORE* *EVERY* test is - # executed. - pass - - @classmethod - def tearDownClass(cls): - # - GLOBAL CLASS FIXTURES - - # put here any instruction you want to execute only *ONCE* *AFTER* - # executing all tests. - pass - - def tearDown(self): - # - PER-TEST FIXTURES - - # put here any instructions you want to be run *AFTER* *EVERY* test is - # executed. - pass - - def test_should_properly_handle_valid_strings(self): - query = 'SELECT "PI" AS valid_string' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({'valid_string': ['PI']})) - - def test_should_properly_handle_empty_strings(self): - query = 'SELECT "" AS empty_string' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({'empty_string': [""]})) - - def test_should_properly_handle_null_strings(self): - query = 'SELECT STRING(NULL) AS null_string' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({'null_string': [None]})) - - def test_should_properly_handle_valid_integers(self): - query = 'SELECT INTEGER(3) AS valid_integer' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({'valid_integer': [3]})) - - def test_should_properly_handle_nullable_integers(self): - query = '''SELECT * FROM - (SELECT 1 AS nullable_integer), - (SELECT NULL AS nullable_integer)''' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal( - df, DataFrame({'nullable_integer': [1, None]}).astype(object)) - - def test_should_properly_handle_valid_longs(self): - query = 'SELECT 1 << 62 AS valid_long' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal( - df, DataFrame({'valid_long': [1 << 62]})) - - def test_should_properly_handle_nullable_longs(self): - query = '''SELECT * FROM - (SELECT 1 << 62 AS nullable_long), - (SELECT NULL AS nullable_long)''' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal( - df, DataFrame({'nullable_long': [1 << 62, None]}).astype(object)) - - def test_should_properly_handle_null_integers(self): - query = 'SELECT INTEGER(NULL) AS null_integer' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({'null_integer': [None]})) - - def test_should_properly_handle_valid_floats(self): - from math import pi - query = 'SELECT PI() AS valid_float' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame( - {'valid_float': [pi]})) - - def test_should_properly_handle_nullable_floats(self): - from math import pi - query = '''SELECT * FROM - (SELECT PI() AS nullable_float), - (SELECT NULL AS nullable_float)''' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal( - df, DataFrame({'nullable_float': [pi, None]})) - - def test_should_properly_handle_valid_doubles(self): - from math import pi - query = 'SELECT PI() * POW(10, 307) AS valid_double' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame( - {'valid_double': [pi * 10 ** 307]})) - - def test_should_properly_handle_nullable_doubles(self): - from math import pi - query = '''SELECT * FROM - (SELECT PI() * POW(10, 307) AS nullable_double), - (SELECT NULL AS nullable_double)''' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal( - df, DataFrame({'nullable_double': [pi * 10 ** 307, None]})) - - def test_should_properly_handle_null_floats(self): - query = 'SELECT FLOAT(NULL) AS null_float' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({'null_float': [np.nan]})) - - def test_should_properly_handle_timestamp_unix_epoch(self): - query = 'SELECT TIMESTAMP("1970-01-01 00:00:00") AS unix_epoch' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame( - {'unix_epoch': [np.datetime64('1970-01-01T00:00:00.000000Z')]})) - - def test_should_properly_handle_arbitrary_timestamp(self): - query = 'SELECT TIMESTAMP("2004-09-15 05:00:00") AS valid_timestamp' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({ - 'valid_timestamp': [np.datetime64('2004-09-15T05:00:00.000000Z')] - })) - - def test_should_properly_handle_null_timestamp(self): - query = 'SELECT TIMESTAMP(NULL) AS null_timestamp' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({'null_timestamp': [NaT]})) - - def test_should_properly_handle_true_boolean(self): - query = 'SELECT BOOLEAN(TRUE) AS true_boolean' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({'true_boolean': [True]})) - - def test_should_properly_handle_false_boolean(self): - query = 'SELECT BOOLEAN(FALSE) AS false_boolean' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({'false_boolean': [False]})) - - def test_should_properly_handle_null_boolean(self): - query = 'SELECT BOOLEAN(NULL) AS null_boolean' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal(df, DataFrame({'null_boolean': [None]})) - - def test_should_properly_handle_nullable_booleans(self): - query = '''SELECT * FROM - (SELECT BOOLEAN(TRUE) AS nullable_boolean), - (SELECT NULL AS nullable_boolean)''' - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal( - df, DataFrame({'nullable_boolean': [True, None]}).astype(object)) - - def test_unicode_string_conversion_and_normalization(self): - correct_test_datatype = DataFrame( - {'unicode_string': [u("\xe9\xfc")]} - ) - - unicode_string = "\xc3\xa9\xc3\xbc" - - if compat.PY3: - unicode_string = unicode_string.encode('latin-1').decode('utf8') - - query = 'SELECT "{0}" AS unicode_string'.format(unicode_string) - - df = gbq.read_gbq(query, project_id=_get_project_id(), - private_key=_get_private_key_path()) - tm.assert_frame_equal(df, correct_test_datatype) - - def test_index_column(self): - query = "SELECT 'a' AS string_1, 'b' AS string_2" - result_frame = gbq.read_gbq(query, project_id=_get_project_id(), - index_col="string_1", - private_key=_get_private_key_path()) - correct_frame = DataFrame( - {'string_1': ['a'], 'string_2': ['b']}).set_index("string_1") - tm.assert_equal(result_frame.index.name, correct_frame.index.name) - - def test_column_order(self): - query = "SELECT 'a' AS string_1, 'b' AS string_2, 'c' AS string_3" - col_order = ['string_3', 'string_1', 'string_2'] - result_frame = gbq.read_gbq(query, project_id=_get_project_id(), - col_order=col_order, - private_key=_get_private_key_path()) - correct_frame = DataFrame({'string_1': ['a'], 'string_2': [ - 'b'], 'string_3': ['c']})[col_order] - tm.assert_frame_equal(result_frame, correct_frame) - - def test_column_order_plus_index(self): - query = "SELECT 'a' AS string_1, 'b' AS string_2, 'c' AS string_3" - col_order = ['string_3', 'string_2'] - result_frame = gbq.read_gbq(query, project_id=_get_project_id(), - index_col='string_1', col_order=col_order, - private_key=_get_private_key_path()) - correct_frame = DataFrame( - {'string_1': ['a'], 'string_2': ['b'], 'string_3': ['c']}) - correct_frame.set_index('string_1', inplace=True) - correct_frame = correct_frame[col_order] - tm.assert_frame_equal(result_frame, correct_frame) - - def test_malformed_query(self): - with tm.assertRaises(gbq.GenericGBQException): - gbq.read_gbq("SELCET * FORM [publicdata:samples.shakespeare]", - project_id=_get_project_id(), - private_key=_get_private_key_path()) - - def test_bad_project_id(self): - with tm.assertRaises(gbq.GenericGBQException): - gbq.read_gbq("SELECT 1", project_id='001', - private_key=_get_private_key_path()) - - def test_bad_table_name(self): - with tm.assertRaises(gbq.GenericGBQException): - gbq.read_gbq("SELECT * FROM [publicdata:samples.nope]", - project_id=_get_project_id(), - private_key=_get_private_key_path()) - - def test_download_dataset_larger_than_200k_rows(self): - test_size = 200005 - # Test for known BigQuery bug in datasets larger than 100k rows - # http://stackoverflow.com/questions/19145587/bq-py-not-paging-results - df = gbq.read_gbq("SELECT id FROM [publicdata:samples.wikipedia] " - "GROUP EACH BY id ORDER BY id ASC LIMIT {0}" - .format(test_size), - project_id=_get_project_id(), - private_key=_get_private_key_path()) - self.assertEqual(len(df.drop_duplicates()), test_size) - - def test_zero_rows(self): - # Bug fix for https://github.com/pandas-dev/pandas/issues/10273 - df = gbq.read_gbq("SELECT title, id, is_bot, " - "SEC_TO_TIMESTAMP(timestamp) ts " - "FROM [publicdata:samples.wikipedia] " - "WHERE timestamp=-9999999", - project_id=_get_project_id(), - private_key=_get_private_key_path()) - page_array = np.zeros( - (0,), dtype=[('title', object), ('id', np.dtype(int)), - ('is_bot', np.dtype(bool)), ('ts', 'M8[ns]')]) - expected_result = DataFrame( - page_array, columns=['title', 'id', 'is_bot', 'ts']) - self.assert_frame_equal(df, expected_result) - - def test_legacy_sql(self): - legacy_sql = "SELECT id FROM [publicdata.samples.wikipedia] LIMIT 10" - - # Test that a legacy sql statement fails when - # setting dialect='standard' - with tm.assertRaises(gbq.GenericGBQException): - gbq.read_gbq(legacy_sql, project_id=_get_project_id(), - dialect='standard', - private_key=_get_private_key_path()) - - # Test that a legacy sql statement succeeds when - # setting dialect='legacy' - df = gbq.read_gbq(legacy_sql, project_id=_get_project_id(), - dialect='legacy', - private_key=_get_private_key_path()) - self.assertEqual(len(df.drop_duplicates()), 10) - - def test_standard_sql(self): - standard_sql = "SELECT DISTINCT id FROM " \ - "`publicdata.samples.wikipedia` LIMIT 10" - - # Test that a standard sql statement fails when using - # the legacy SQL dialect (default value) - with tm.assertRaises(gbq.GenericGBQException): - gbq.read_gbq(standard_sql, project_id=_get_project_id(), - private_key=_get_private_key_path()) - - # Test that a standard sql statement succeeds when - # setting dialect='standard' - df = gbq.read_gbq(standard_sql, project_id=_get_project_id(), - dialect='standard', - private_key=_get_private_key_path()) - self.assertEqual(len(df.drop_duplicates()), 10) - - def test_invalid_option_for_sql_dialect(self): - sql_statement = "SELECT DISTINCT id FROM " \ - "`publicdata.samples.wikipedia` LIMIT 10" - - # Test that an invalid option for `dialect` raises ValueError - with tm.assertRaises(ValueError): - gbq.read_gbq(sql_statement, project_id=_get_project_id(), - dialect='invalid', - private_key=_get_private_key_path()) - - # Test that a correct option for dialect succeeds - # to make sure ValueError was due to invalid dialect - gbq.read_gbq(sql_statement, project_id=_get_project_id(), - dialect='standard', private_key=_get_private_key_path()) - - def test_query_with_parameters(self): - sql_statement = "SELECT @param1 + @param2 AS valid_result" - config = { - 'query': { - "useLegacySql": False, - "parameterMode": "named", - "queryParameters": [ - { - "name": "param1", - "parameterType": { - "type": "INTEGER" - }, - "parameterValue": { - "value": 1 - } - }, - { - "name": "param2", - "parameterType": { - "type": "INTEGER" - }, - "parameterValue": { - "value": 2 - } - } - ] - } - } - # Test that a query that relies on parameters fails - # when parameters are not supplied via configuration - with tm.assertRaises(ValueError): - gbq.read_gbq(sql_statement, project_id=_get_project_id(), - private_key=_get_private_key_path()) - - # Test that the query is successful because we have supplied - # the correct query parameters via the 'config' option - df = gbq.read_gbq(sql_statement, project_id=_get_project_id(), - private_key=_get_private_key_path(), - configuration=config) - tm.assert_frame_equal(df, DataFrame({'valid_result': [3]})) - - def test_query_inside_configuration(self): - query_no_use = 'SELECT "PI_WRONG" AS valid_string' - query = 'SELECT "PI" AS valid_string' - config = { - 'query': { - "query": query, - "useQueryCache": False, - } - } - # Test that it can't pass query both - # inside config and as parameter - with tm.assertRaises(ValueError): - gbq.read_gbq(query_no_use, project_id=_get_project_id(), - private_key=_get_private_key_path(), - configuration=config) - - df = gbq.read_gbq(None, project_id=_get_project_id(), - private_key=_get_private_key_path(), - configuration=config) - tm.assert_frame_equal(df, DataFrame({'valid_string': ['PI']})) - - def test_configuration_without_query(self): - sql_statement = 'SELECT 1' - config = { - 'copy': { - "sourceTable": { - "projectId": _get_project_id(), - "datasetId": "publicdata:samples", - "tableId": "wikipedia" - }, - "destinationTable": { - "projectId": _get_project_id(), - "datasetId": "publicdata:samples", - "tableId": "wikipedia_copied" - }, - } - } - # Test that only 'query' configurations are supported - # nor 'copy','load','extract' - with tm.assertRaises(ValueError): - gbq.read_gbq(sql_statement, project_id=_get_project_id(), - private_key=_get_private_key_path(), - configuration=config) - - -@pytest.mark.xfail(run=False, reason="intermittent failures") +@pytest.mark.single class TestToGBQIntegrationWithServiceAccountKeyPath(tm.TestCase): - # Changes to BigQuery table schema may take up to 2 minutes as of May 2015 - # As a workaround to this issue, each test should use a unique table name. - # Make sure to modify the for loop range in the tearDownClass when a new - # test is added See `Issue 191 - # `__ @classmethod def setUpClass(cls): @@ -923,24 +105,10 @@ def setUpClass(cls): _skip_if_no_project_id() _skip_if_no_private_key_path() - _setup_common() clean_gbq_environment(_get_private_key_path()) - - gbq._Dataset(_get_project_id(), - private_key=_get_private_key_path() - ).create(DATASET_ID + "1") - - def setUp(self): - # - PER-TEST FIXTURES - - # put here any instruction you want to be run *BEFORE* *EVERY* test is - # executed. - - self.dataset = gbq._Dataset(_get_project_id(), - private_key=_get_private_key_path()) - self.table = gbq._Table(_get_project_id(), DATASET_ID + "1", - private_key=_get_private_key_path()) - self.sut = gbq.GbqConnector(_get_project_id(), - private_key=_get_private_key_path()) + pandas_gbq.gbq._Dataset(_get_project_id(), + private_key=_get_private_key_path() + ).create(DATASET_ID + "1") @classmethod def tearDownClass(cls): @@ -950,387 +118,19 @@ def tearDownClass(cls): clean_gbq_environment(_get_private_key_path()) - def tearDown(self): - # - PER-TEST FIXTURES - - # put here any instructions you want to be run *AFTER* *EVERY* test is - # executed. - pass - - def test_upload_data(self): + def test_roundtrip(self): destination_table = DESTINATION_TABLE + "1" test_size = 20001 df = make_mixed_dataframe_v2(test_size) - gbq.to_gbq(df, destination_table, _get_project_id(), chunksize=10000, - private_key=_get_private_key_path()) - - sleep(30) # <- Curses Google!!! - - result = gbq.read_gbq("SELECT COUNT(*) AS num_rows FROM {0}" - .format(destination_table), - project_id=_get_project_id(), - private_key=_get_private_key_path()) - self.assertEqual(result['num_rows'][0], test_size) - - def test_upload_data_if_table_exists_fail(self): - destination_table = DESTINATION_TABLE + "2" - - test_size = 10 - df = make_mixed_dataframe_v2(test_size) - self.table.create(TABLE_ID + "2", gbq._generate_bq_schema(df)) - - # Test the default value of if_exists is 'fail' - with tm.assertRaises(gbq.TableCreationError): - gbq.to_gbq(df, destination_table, _get_project_id(), - private_key=_get_private_key_path()) - - # Test the if_exists parameter with value 'fail' - with tm.assertRaises(gbq.TableCreationError): - gbq.to_gbq(df, destination_table, _get_project_id(), - if_exists='fail', private_key=_get_private_key_path()) - - def test_upload_data_if_table_exists_append(self): - destination_table = DESTINATION_TABLE + "3" - - test_size = 10 - df = make_mixed_dataframe_v2(test_size) - df_different_schema = tm.makeMixedDataFrame() - - # Initialize table with sample data - gbq.to_gbq(df, destination_table, _get_project_id(), chunksize=10000, - private_key=_get_private_key_path()) - - # Test the if_exists parameter with value 'append' - gbq.to_gbq(df, destination_table, _get_project_id(), - if_exists='append', private_key=_get_private_key_path()) - - sleep(30) # <- Curses Google!!! - - result = gbq.read_gbq("SELECT COUNT(*) AS num_rows FROM {0}" - .format(destination_table), - project_id=_get_project_id(), - private_key=_get_private_key_path()) - self.assertEqual(result['num_rows'][0], test_size * 2) - - # Try inserting with a different schema, confirm failure - with tm.assertRaises(gbq.InvalidSchema): - gbq.to_gbq(df_different_schema, destination_table, - _get_project_id(), if_exists='append', - private_key=_get_private_key_path()) - - def test_upload_data_if_table_exists_replace(self): - - destination_table = DESTINATION_TABLE + "4" - - test_size = 10 - df = make_mixed_dataframe_v2(test_size) - df_different_schema = tm.makeMixedDataFrame() - - # Initialize table with sample data - gbq.to_gbq(df, destination_table, _get_project_id(), chunksize=10000, - private_key=_get_private_key_path()) - - # Test the if_exists parameter with the value 'replace'. - gbq.to_gbq(df_different_schema, destination_table, - _get_project_id(), if_exists='replace', - private_key=_get_private_key_path()) - - sleep(30) # <- Curses Google!!! - - result = gbq.read_gbq("SELECT COUNT(*) AS num_rows FROM {0}" - .format(destination_table), - project_id=_get_project_id(), - private_key=_get_private_key_path()) - self.assertEqual(result['num_rows'][0], 5) - - @tm.slow - def test_google_upload_errors_should_raise_exception(self): - destination_table = DESTINATION_TABLE + "5" - - test_timestamp = datetime.now(pytz.timezone('US/Arizona')) - bad_df = DataFrame({'bools': [False, False], 'flts': [0.0, 1.0], - 'ints': [0, '1'], 'strs': ['a', 1], - 'times': [test_timestamp, test_timestamp]}, - index=range(2)) - - with tm.assertRaises(gbq.StreamingInsertError): - gbq.to_gbq(bad_df, destination_table, _get_project_id(), - verbose=True, private_key=_get_private_key_path()) - - def test_generate_schema(self): - df = tm.makeMixedDataFrame() - schema = gbq._generate_bq_schema(df) - - test_schema = {'fields': [{'name': 'A', 'type': 'FLOAT'}, - {'name': 'B', 'type': 'FLOAT'}, - {'name': 'C', 'type': 'STRING'}, - {'name': 'D', 'type': 'TIMESTAMP'}]} - - self.assertEqual(schema, test_schema) - - def test_create_table(self): - destination_table = TABLE_ID + "6" - test_schema = {'fields': [{'name': 'A', 'type': 'FLOAT'}, - {'name': 'B', 'type': 'FLOAT'}, - {'name': 'C', 'type': 'STRING'}, - {'name': 'D', 'type': 'TIMESTAMP'}]} - self.table.create(destination_table, test_schema) - self.assertTrue(self.table.exists(destination_table), - 'Expected table to exist') - - def test_table_does_not_exist(self): - self.assertTrue(not self.table.exists(TABLE_ID + "7"), - 'Expected table not to exist') - - def test_delete_table(self): - destination_table = TABLE_ID + "8" - test_schema = {'fields': [{'name': 'A', 'type': 'FLOAT'}, - {'name': 'B', 'type': 'FLOAT'}, - {'name': 'C', 'type': 'STRING'}, - {'name': 'D', 'type': 'TIMESTAMP'}]} - self.table.create(destination_table, test_schema) - self.table.delete(destination_table) - self.assertTrue(not self.table.exists( - destination_table), 'Expected table not to exist') - - def test_list_table(self): - destination_table = TABLE_ID + "9" - test_schema = {'fields': [{'name': 'A', 'type': 'FLOAT'}, - {'name': 'B', 'type': 'FLOAT'}, - {'name': 'C', 'type': 'STRING'}, - {'name': 'D', 'type': 'TIMESTAMP'}]} - self.table.create(destination_table, test_schema) - self.assertTrue( - destination_table in self.dataset.tables(DATASET_ID + "1"), - 'Expected table list to contain table {0}' - .format(destination_table)) - - def test_verify_schema_allows_flexible_column_order(self): - destination_table = TABLE_ID + "10" - test_schema_1 = {'fields': [{'name': 'A', 'type': 'FLOAT'}, - {'name': 'B', 'type': 'FLOAT'}, - {'name': 'C', 'type': 'STRING'}, - {'name': 'D', 'type': 'TIMESTAMP'}]} - test_schema_2 = {'fields': [{'name': 'A', 'type': 'FLOAT'}, - {'name': 'C', 'type': 'STRING'}, - {'name': 'B', 'type': 'FLOAT'}, - {'name': 'D', 'type': 'TIMESTAMP'}]} - - self.table.create(destination_table, test_schema_1) - self.assertTrue(self.sut.verify_schema( - DATASET_ID + "1", destination_table, test_schema_2), - 'Expected schema to match') - - def test_verify_schema_fails_different_data_type(self): - destination_table = TABLE_ID + "11" - test_schema_1 = {'fields': [{'name': 'A', 'type': 'FLOAT'}, - {'name': 'B', 'type': 'FLOAT'}, - {'name': 'C', 'type': 'STRING'}, - {'name': 'D', 'type': 'TIMESTAMP'}]} - test_schema_2 = {'fields': [{'name': 'A', 'type': 'FLOAT'}, - {'name': 'B', 'type': 'STRING'}, - {'name': 'C', 'type': 'STRING'}, - {'name': 'D', 'type': 'TIMESTAMP'}]} - - self.table.create(destination_table, test_schema_1) - self.assertFalse(self.sut.verify_schema( - DATASET_ID + "1", destination_table, test_schema_2), - 'Expected different schema') - - def test_verify_schema_fails_different_structure(self): - destination_table = TABLE_ID + "12" - test_schema_1 = {'fields': [{'name': 'A', 'type': 'FLOAT'}, - {'name': 'B', 'type': 'FLOAT'}, - {'name': 'C', 'type': 'STRING'}, - {'name': 'D', 'type': 'TIMESTAMP'}]} - test_schema_2 = {'fields': [{'name': 'A', 'type': 'FLOAT'}, - {'name': 'B2', 'type': 'FLOAT'}, - {'name': 'C', 'type': 'STRING'}, - {'name': 'D', 'type': 'TIMESTAMP'}]} - - self.table.create(destination_table, test_schema_1) - self.assertFalse(self.sut.verify_schema( - DATASET_ID + "1", destination_table, test_schema_2), - 'Expected different schema') - - def test_upload_data_flexible_column_order(self): - destination_table = DESTINATION_TABLE + "13" - - test_size = 10 - df = make_mixed_dataframe_v2(test_size) - - # Initialize table with sample data - gbq.to_gbq(df, destination_table, _get_project_id(), chunksize=10000, - private_key=_get_private_key_path()) - - df_columns_reversed = df[df.columns[::-1]] - - gbq.to_gbq(df_columns_reversed, destination_table, _get_project_id(), - if_exists='append', private_key=_get_private_key_path()) - - def test_list_dataset(self): - dataset_id = DATASET_ID + "1" - self.assertTrue(dataset_id in self.dataset.datasets(), - 'Expected dataset list to contain dataset {0}' - .format(dataset_id)) - - def test_list_table_zero_results(self): - dataset_id = DATASET_ID + "2" - self.dataset.create(dataset_id) - table_list = gbq._Dataset(_get_project_id(), - private_key=_get_private_key_path() - ).tables(dataset_id) - self.assertEqual(len(table_list), 0, - 'Expected gbq.list_table() to return 0') - - def test_create_dataset(self): - dataset_id = DATASET_ID + "3" - self.dataset.create(dataset_id) - self.assertTrue(dataset_id in self.dataset.datasets(), - 'Expected dataset to exist') - - def test_delete_dataset(self): - dataset_id = DATASET_ID + "4" - self.dataset.create(dataset_id) - self.dataset.delete(dataset_id) - self.assertTrue(dataset_id not in self.dataset.datasets(), - 'Expected dataset not to exist') - - def test_dataset_exists(self): - dataset_id = DATASET_ID + "5" - self.dataset.create(dataset_id) - self.assertTrue(self.dataset.exists(dataset_id), - 'Expected dataset to exist') - - def create_table_data_dataset_does_not_exist(self): - dataset_id = DATASET_ID + "6" - table_id = TABLE_ID + "1" - table_with_new_dataset = gbq._Table(_get_project_id(), dataset_id) - df = make_mixed_dataframe_v2(10) - table_with_new_dataset.create(table_id, gbq._generate_bq_schema(df)) - self.assertTrue(self.dataset.exists(dataset_id), - 'Expected dataset to exist') - self.assertTrue(table_with_new_dataset.exists( - table_id), 'Expected dataset to exist') - - def test_dataset_does_not_exist(self): - self.assertTrue(not self.dataset.exists( - DATASET_ID + "_not_found"), 'Expected dataset not to exist') - - -@pytest.mark.xfail(run=False, reason="intermittent failures") -class TestToGBQIntegrationWithLocalUserAccountAuth(tm.TestCase): - # Changes to BigQuery table schema may take up to 2 minutes as of May 2015 - # As a workaround to this issue, each test should use a unique table name. - # Make sure to modify the for loop range in the tearDownClass when a new - # test is added - # See `Issue 191 - # `__ - - @classmethod - def setUpClass(cls): - # - GLOBAL CLASS FIXTURES - - # put here any instruction you want to execute only *ONCE* *BEFORE* - # executing *ALL* tests described below. - - _skip_if_no_project_id() - _skip_local_auth_if_in_travis_env() - - _setup_common() - clean_gbq_environment() - - def setUp(self): - # - PER-TEST FIXTURES - - # put here any instruction you want to be run *BEFORE* *EVERY* test - # is executed. - pass - - @classmethod - def tearDownClass(cls): - # - GLOBAL CLASS FIXTURES - - # put here any instruction you want to execute only *ONCE* *AFTER* - # executing all tests. - - clean_gbq_environment() - - def tearDown(self): - # - PER-TEST FIXTURES - - # put here any instructions you want to be run *AFTER* *EVERY* test - # is executed. - pass - - def test_upload_data(self): - destination_table = "{0}.{1}".format(DATASET_ID + "2", TABLE_ID + "1") - - test_size = 10 - df = make_mixed_dataframe_v2(test_size) - - gbq.to_gbq(df, destination_table, _get_project_id(), chunksize=10000) - - sleep(30) # <- Curses Google!!! - - result = gbq.read_gbq( - "SELECT COUNT(*) AS num_rows FROM {0}".format(destination_table), - project_id=_get_project_id()) - - self.assertEqual(result['num_rows'][0], test_size) - - -@pytest.mark.xfail(run=False, reason="intermittent failures") -class TestToGBQIntegrationWithServiceAccountKeyContents(tm.TestCase): - # Changes to BigQuery table schema may take up to 2 minutes as of May 2015 - # As a workaround to this issue, each test should use a unique table name. - # Make sure to modify the for loop range in the tearDownClass when a new - # test is added - # See `Issue 191 - # `__ - - @classmethod - def setUpClass(cls): - # - GLOBAL CLASS FIXTURES - - # put here any instruction you want to execute only *ONCE* *BEFORE* - # executing *ALL* tests described below. - - _setup_common() - _skip_if_no_project_id() - _skip_if_no_private_key_contents() - - clean_gbq_environment(_get_private_key_contents()) - - def setUp(self): - # - PER-TEST FIXTURES - - # put here any instruction you want to be run *BEFORE* *EVERY* test - # is executed. - pass - - @classmethod - def tearDownClass(cls): - # - GLOBAL CLASS FIXTURES - - # put here any instruction you want to execute only *ONCE* *AFTER* - # executing all tests. - - clean_gbq_environment(_get_private_key_contents()) - - def tearDown(self): - # - PER-TEST FIXTURES - - # put here any instructions you want to be run *AFTER* *EVERY* test - # is executed. - pass - - def test_upload_data(self): - destination_table = "{0}.{1}".format(DATASET_ID + "3", TABLE_ID + "1") - - test_size = 10 - df = make_mixed_dataframe_v2(test_size) - - gbq.to_gbq(df, destination_table, _get_project_id(), chunksize=10000, - private_key=_get_private_key_contents()) + df.to_gbq(destination_table, _get_project_id(), chunksize=10000, + private_key=_get_private_key_path()) sleep(30) # <- Curses Google!!! - result = gbq.read_gbq( - "SELECT COUNT(*) AS num_rows FROM {0}".format(destination_table), - project_id=_get_project_id(), - private_key=_get_private_key_contents()) + result = pd.read_gbq("SELECT COUNT(*) AS num_rows FROM {0}" + .format(destination_table), + project_id=_get_project_id(), + private_key=_get_private_key_path()) self.assertEqual(result['num_rows'][0], test_size) diff --git a/pandas/util/decorators.py b/pandas/util/decorators.py index 1b501eb1d9bda..d966d6b7a1b32 100644 --- a/pandas/util/decorators.py +++ b/pandas/util/decorators.py @@ -3,7 +3,7 @@ import sys import warnings from textwrap import dedent -from functools import wraps +from functools import wraps, update_wrapper def deprecate(name, alternative, alt_name=None): @@ -233,3 +233,39 @@ def make_signature(func): if spec.keywords: args.append('**' + spec.keywords) return args, spec.args + + +class docstring_wrapper(object): + """ + decorator to wrap a function, + provide a dynamically evaluated doc-string + + Parameters + ---------- + func : callable + creator : callable + return the doc-string + default : str, optional + return this doc-string on error + """ + _attrs = ['__module__', '__name__', + '__qualname__', '__annotations__'] + + def __init__(self, func, creator, default=None): + self.func = func + self.creator = creator + self.default = default + update_wrapper( + self, func, [attr for attr in self._attrs + if hasattr(func, attr)]) + + def __call__(self, func, *args, **kwargs): + return self.func(*args, **kwargs) + + @property + def __doc__(self): + try: + return self.creator() + except Exception as exc: + msg = self.default or str(exc) + return msg diff --git a/pandas/util/print_versions.py b/pandas/util/print_versions.py index b0f5d3994ed64..ca75d4d02e927 100644 --- a/pandas/util/print_versions.py +++ b/pandas/util/print_versions.py @@ -88,13 +88,12 @@ def show_versions(as_json=False): ("lxml", lambda mod: mod.etree.__version__), ("bs4", lambda mod: mod.__version__), ("html5lib", lambda mod: mod.__version__), - ("httplib2", lambda mod: mod.__version__), - ("apiclient", lambda mod: mod.__version__), ("sqlalchemy", lambda mod: mod.__version__), ("pymysql", lambda mod: mod.__version__), ("psycopg2", lambda mod: mod.__version__), ("jinja2", lambda mod: mod.__version__), ("s3fs", lambda mod: mod.__version__), + ("pandas_gbq", lambda mod: mod.__version__), ("pandas_datareader", lambda mod: mod.__version__) ] From 251826f0861159160bd1d51eafadb6e0b4161f77 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 27 Feb 2017 14:51:20 -0500 Subject: [PATCH 113/353] BUG: GH15429 transform result of timedelta from datetime The transform() operation needs to return a like-indexed. To facilitate this, transform starts with a copy of the original series. Then, after the computation for each group, sets the appropriate elements of the copied series equal to the result. At that point is does a type comparison, and discovers that the timedelta is not cast- able to a datetime. closes #10972 Author: Jeff Reback Author: Stephen Rauch Closes #15430 from stephenrauch/group-by-transform-timedelta-from-datetime and squashes the following commits: c3b0dd0 [Jeff Reback] PEP fix 2f48549 [Jeff Reback] fixup slow transforms cc43503 [Stephen Rauch] BUG: GH15429 transform result of timedelta from datetime --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/groupby.py | 34 +++++++++++----------- pandas/tests/groupby/test_filters.py | 1 + pandas/tests/groupby/test_transform.py | 39 +++++++++++++++++++++++++- 4 files changed, 57 insertions(+), 18 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index f0e4176472861..7b32cee7f7064 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -637,6 +637,7 @@ Bug Fixes - Bug in ``.read_csv()`` with ``parse_dates`` when multiline headers are specified (:issue:`15376`) +- Bug in ``groupby.transform()`` that would coerce the resultant dtypes back to the original (:issue:`10972`) - Bug in ``DataFrame.hist`` where ``plt.tight_layout`` caused an ``AttributeError`` (use ``matplotlib >= 0.2.0``) (:issue:`9351`) - Bug in ``DataFrame.boxplot`` where ``fontsize`` was not applied to the tick labels on both axes (:issue:`15108`) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 831ca3886773e..2c61a73d6814e 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -2890,32 +2890,32 @@ def transform(self, func, *args, **kwargs): lambda: getattr(self, func)(*args, **kwargs)) # reg transform - dtype = self._selected_obj.dtype - result = self._selected_obj.values.copy() - + klass = self._selected_obj.__class__ + results = [] wrapper = lambda x: func(x, *args, **kwargs) - for i, (name, group) in enumerate(self): + for name, group in self: object.__setattr__(group, 'name', name) res = wrapper(group) if hasattr(res, 'values'): res = res.values - # may need to astype - try: - common_type = np.common_type(np.array(res), result) - if common_type != result.dtype: - result = result.astype(common_type) - except: - pass - indexer = self._get_index(name) - result[indexer] = res + s = klass(res, indexer) + results.append(s) - result = _possibly_downcast_to_dtype(result, dtype) - return self._selected_obj.__class__(result, - index=self._selected_obj.index, - name=self._selected_obj.name) + from pandas.tools.concat import concat + result = concat(results).sort_index() + + # we will only try to coerce the result type if + # we have a numeric dtype + dtype = self._selected_obj.dtype + if is_numeric_dtype(dtype): + result = _possibly_downcast_to_dtype(result, dtype) + + result.name = self._selected_obj.name + result.index = self._selected_obj.index + return result def _transform_fast(self, func): """ diff --git a/pandas/tests/groupby/test_filters.py b/pandas/tests/groupby/test_filters.py index 46ddb5a5318fb..de6757786a363 100644 --- a/pandas/tests/groupby/test_filters.py +++ b/pandas/tests/groupby/test_filters.py @@ -216,6 +216,7 @@ def test_filter_against_workaround(self): grouper = s.apply(lambda x: np.round(x, -1)) grouped = s.groupby(grouper) f = lambda x: x.mean() > 10 + old_way = s[grouped.transform(f).astype('bool')] new_way = grouped.filter(f) assert_series_equal(new_way.sort_values(), old_way.sort_values()) diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index cf5e9eb26ff13..51920ec642705 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -3,7 +3,7 @@ import numpy as np import pandas as pd from pandas.util import testing as tm -from pandas import Series, DataFrame, Timestamp, MultiIndex, concat +from pandas import Series, DataFrame, Timestamp, MultiIndex, concat, date_range from pandas.types.common import _ensure_platform_int from .common import MixIn, assert_fp_equal @@ -190,6 +190,43 @@ def test_transform_bug(self): expected = Series(np.arange(5, 0, step=-1), name='B') assert_series_equal(result, expected) + def test_transform_datetime_to_timedelta(self): + # GH 15429 + # transforming a datetime to timedelta + df = DataFrame(dict(A=Timestamp('20130101'), B=np.arange(5))) + expected = pd.Series([ + Timestamp('20130101') - Timestamp('20130101')] * 5, name='A') + + # this does date math without changing result type in transform + base_time = df['A'][0] + result = df.groupby('A')['A'].transform( + lambda x: x.max() - x.min() + base_time) - base_time + assert_series_equal(result, expected) + + # this does date math and causes the transform to return timedelta + result = df.groupby('A')['A'].transform(lambda x: x.max() - x.min()) + assert_series_equal(result, expected) + + def test_transform_datetime_to_numeric(self): + # GH 10972 + # convert dt to float + df = DataFrame({ + 'a': 1, 'b': date_range('2015-01-01', periods=2, freq='D')}) + result = df.groupby('a').b.transform( + lambda x: x.dt.dayofweek - x.dt.dayofweek.mean()) + + expected = Series([-0.5, 0.5], name='b') + assert_series_equal(result, expected) + + # convert dt to int + df = DataFrame({ + 'a': 1, 'b': date_range('2015-01-01', periods=2, freq='D')}) + result = df.groupby('a').b.transform( + lambda x: x.dt.dayofweek - x.dt.dayofweek.min()) + + expected = Series([0, 1], name='b') + assert_series_equal(result, expected) + def test_transform_multiple(self): grouped = self.ts.groupby([lambda x: x.year, lambda x: x.month]) From 61fa8bed7c3aa620828923098c64af5610e0f9e3 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 27 Feb 2017 15:07:49 -0500 Subject: [PATCH 114/353] BUG: fix groupby.aggregate resulting dtype coercion, xref #11444, #13046 make sure .size includes the name of the grouped --- doc/source/whatsnew/v0.20.0.txt | 4 ++-- pandas/core/groupby.py | 23 ++++++++++++++------ pandas/tests/groupby/test_aggregate.py | 23 ++++++++++++++++++++ pandas/tests/groupby/test_transform.py | 29 +++++++++++++++++++++++++- pandas/tests/tseries/test_resample.py | 6 ++---- 5 files changed, 72 insertions(+), 13 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 7b32cee7f7064..9b4e6fbe3be10 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -632,12 +632,12 @@ Bug Fixes - Bug in ``DataFrame.to_stata()`` and ``StataWriter`` which produces incorrectly formatted files to be produced for some locales (:issue:`13856`) - Bug in ``pd.concat()`` in which concatting with an empty dataframe with ``join='inner'`` was being improperly handled (:issue:`15328`) -- Bug in ``groupby.agg()`` incorrectly localizing timezone on ``datetime`` (:issue:`15426`, :issue:`10668`) +- Bug in ``groupby.agg()`` incorrectly localizing timezone on ``datetime`` (:issue:`15426`, :issue:`10668`, :issue:`13046`) - Bug in ``.read_csv()`` with ``parse_dates`` when multiline headers are specified (:issue:`15376`) -- Bug in ``groupby.transform()`` that would coerce the resultant dtypes back to the original (:issue:`10972`) +- Bug in ``groupby.transform()`` that would coerce the resultant dtypes back to the original (:issue:`10972`, :issue:`11444`) - Bug in ``DataFrame.hist`` where ``plt.tight_layout`` caused an ``AttributeError`` (use ``matplotlib >= 0.2.0``) (:issue:`9351`) - Bug in ``DataFrame.boxplot`` where ``fontsize`` was not applied to the tick labels on both axes (:issue:`15108`) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 2c61a73d6814e..3828e5dac5729 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -767,11 +767,14 @@ def _index_with_as_index(self, b): new.names = gp.names + original.names return new - def _try_cast(self, result, obj): + def _try_cast(self, result, obj, numeric_only=False): """ try to cast the result to our obj original type, we may have roundtripped thru object in the mean-time + if numeric_only is True, then only try to cast numerics + and not datetimelikes + """ if obj.ndim > 1: dtype = obj.values.dtype @@ -779,7 +782,8 @@ def _try_cast(self, result, obj): dtype = obj.dtype if not is_scalar(result): - result = _possibly_downcast_to_dtype(result, dtype) + if numeric_only and is_numeric_dtype(dtype) or not numeric_only: + result = _possibly_downcast_to_dtype(result, dtype) return result @@ -830,7 +834,7 @@ def _python_agg_general(self, func, *args, **kwargs): for name, obj in self._iterate_slices(): try: result, counts = self.grouper.agg_series(obj, f) - output[name] = self._try_cast(result, obj) + output[name] = self._try_cast(result, obj, numeric_only=True) except TypeError: continue @@ -1117,7 +1121,11 @@ def sem(self, ddof=1): @Appender(_doc_template) def size(self): """Compute group sizes""" - return self.grouper.size() + result = self.grouper.size() + + if isinstance(self.obj, Series): + result.name = getattr(self, 'name', None) + return result sum = _groupby_function('sum', 'add', np.sum) prod = _groupby_function('prod', 'prod', np.prod) @@ -1689,7 +1697,9 @@ def size(self): ids, _, ngroup = self.group_info ids = _ensure_platform_int(ids) out = np.bincount(ids[ids != -1], minlength=ngroup or None) - return Series(out, index=self.result_index, dtype='int64') + return Series(out, + index=self.result_index, + dtype='int64') @cache_readonly def _max_groupsize(self): @@ -2908,7 +2918,8 @@ def transform(self, func, *args, **kwargs): result = concat(results).sort_index() # we will only try to coerce the result type if - # we have a numeric dtype + # we have a numeric dtype, as these are *always* udfs + # the cython take a different path (and casting) dtype = self._selected_obj.dtype if is_numeric_dtype(dtype): result = _possibly_downcast_to_dtype(result, dtype) diff --git a/pandas/tests/groupby/test_aggregate.py b/pandas/tests/groupby/test_aggregate.py index cb739546a2312..52b35048b6762 100644 --- a/pandas/tests/groupby/test_aggregate.py +++ b/pandas/tests/groupby/test_aggregate.py @@ -154,6 +154,29 @@ def test_agg_dict_parameter_cast_result_dtypes(self): assert_series_equal(grouped.time.last(), exp['time']) assert_series_equal(grouped.time.agg('last'), exp['time']) + # count + exp = pd.Series([2, 2, 2, 2], + index=Index(list('ABCD'), name='class'), + name='time') + assert_series_equal(grouped.time.agg(len), exp) + assert_series_equal(grouped.time.size(), exp) + + exp = pd.Series([0, 1, 1, 2], + index=Index(list('ABCD'), name='class'), + name='time') + assert_series_equal(grouped.time.count(), exp) + + def test_agg_cast_results_dtypes(self): + # similar to GH12821 + # xref #11444 + u = [datetime(2015, x + 1, 1) for x in range(12)] + v = list('aaabbbbbbccd') + df = pd.DataFrame({'X': v, 'Y': u}) + + result = df.groupby('X')['Y'].agg(len) + expected = df.groupby('X')['Y'].count() + assert_series_equal(result, expected) + def test_agg_must_agg(self): grouped = self.df.groupby('A')['C'] self.assertRaises(Exception, grouped.agg, lambda x: x.describe()) diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index 51920ec642705..2d21eab5822fe 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -4,7 +4,8 @@ import pandas as pd from pandas.util import testing as tm from pandas import Series, DataFrame, Timestamp, MultiIndex, concat, date_range -from pandas.types.common import _ensure_platform_int +from pandas.types.common import _ensure_platform_int, is_timedelta64_dtype +from pandas.compat import StringIO from .common import MixIn, assert_fp_equal from pandas.util.testing import assert_frame_equal, assert_series_equal @@ -227,6 +228,32 @@ def test_transform_datetime_to_numeric(self): expected = Series([0, 1], name='b') assert_series_equal(result, expected) + def test_transform_casting(self): + # 13046 + data = """ + idx A ID3 DATETIME + 0 B-028 b76cd912ff "2014-10-08 13:43:27" + 1 B-054 4a57ed0b02 "2014-10-08 14:26:19" + 2 B-076 1a682034f8 "2014-10-08 14:29:01" + 3 B-023 b76cd912ff "2014-10-08 18:39:34" + 4 B-023 f88g8d7sds "2014-10-08 18:40:18" + 5 B-033 b76cd912ff "2014-10-08 18:44:30" + 6 B-032 b76cd912ff "2014-10-08 18:46:00" + 7 B-037 b76cd912ff "2014-10-08 18:52:15" + 8 B-046 db959faf02 "2014-10-08 18:59:59" + 9 B-053 b76cd912ff "2014-10-08 19:17:48" + 10 B-065 b76cd912ff "2014-10-08 19:21:38" + """ + df = pd.read_csv(StringIO(data), sep='\s+', + index_col=[0], parse_dates=['DATETIME']) + + result = df.groupby('ID3')['DATETIME'].transform(lambda x: x.diff()) + assert is_timedelta64_dtype(result.dtype) + + result = df[['ID3', 'DATETIME']].groupby('ID3').transform( + lambda x: x.diff()) + assert is_timedelta64_dtype(result.DATETIME.dtype) + def test_transform_multiple(self): grouped = self.ts.groupby([lambda x: x.year, lambda x: x.month]) diff --git a/pandas/tests/tseries/test_resample.py b/pandas/tests/tseries/test_resample.py index 6e999c5b1d276..1535bd665fe8b 100755 --- a/pandas/tests/tseries/test_resample.py +++ b/pandas/tests/tseries/test_resample.py @@ -757,10 +757,8 @@ def test_resample_empty_series(self): freq in ['M', 'D']): # GH12871 - TODO: name should propagate, but currently # doesn't on lower / same frequency with PeriodIndex - assert_series_equal(result, expected, check_dtype=False, - check_names=False) - # this assert will break when fixed - self.assertTrue(result.name is None) + assert_series_equal(result, expected, check_dtype=False) + else: assert_series_equal(result, expected, check_dtype=False) From e0647ba059e57cd391cba1296d7cc039e7b2fc7e Mon Sep 17 00:00:00 2001 From: "Dr. Irv" Date: Mon, 27 Feb 2017 16:43:01 -0500 Subject: [PATCH 115/353] DOC: Update contributing for test_fast, fix doc Windows build (#15523) * DOC: Update contributing for test_fast, fix doc Windows build * add pip install for xdist --- doc/make.py | 4 ++-- doc/source/contributing.rst | 29 ++++++++++++++++++++--------- test_fast.bat | 3 +++ 3 files changed, 25 insertions(+), 11 deletions(-) create mode 100644 test_fast.bat diff --git a/doc/make.py b/doc/make.py index d46be2611ce3d..8a6d4e5df24f0 100755 --- a/doc/make.py +++ b/doc/make.py @@ -202,8 +202,8 @@ def html(): raise SystemExit("Building HTML failed.") try: # remove stale file - os.system('rm source/html-styling.html') - os.system('cd build; rm -f html/pandas.zip;') + os.remove('source/html-styling.html') + os.remove('build/html/pandas.zip') except: pass diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index 2f838a3ab2386..83f99b4f01b26 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -520,15 +520,6 @@ submitting code to run the check yourself on the diff:: git diff master | flake8 --diff -Furthermore, we've written a tool to check that your commits are PEP8 great, `pip install pep8radius -`_. Look at PEP8 fixes in your branch vs master with:: - - pep8radius master --diff - -and make these changes with:: - - pep8radius master --diff --in-place - Backwards Compatibility ~~~~~~~~~~~~~~~~~~~~~~~ @@ -611,6 +602,26 @@ Or with one of the following constructs:: pytest pandas/tests/[test-module].py::[TestClass] pytest pandas/tests/[test-module].py::[TestClass]::[test_method] +Using `pytest-xdist `_, one can +speed up local testing on multicore machines. To use this feature, you will +need to install `pytest-xdist` via:: + + pip install pytest-xdist + +Two scripts are provided to assist with this. These scripts distribute +testing across 4 threads. + +On Unix variants, one can type:: + + test_fast.sh + +On Windows, one can type:: + + test_fast.bat + +This can significantly reduce the time it takes to locally run tests before +submitting a pull request. + For more, see the `pytest `_ documentation. .. versionadded:: 0.20.0 diff --git a/test_fast.bat b/test_fast.bat new file mode 100644 index 0000000000000..17dc54b580137 --- /dev/null +++ b/test_fast.bat @@ -0,0 +1,3 @@ +:: test on windows +set PYTHONHASHSEED=314159265 +pytest --skip-slow --skip-network -m "not single" -n 4 pandas From edd29390403baf9fc3de577871d6472c52a6ca80 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 27 Feb 2017 20:18:21 -0500 Subject: [PATCH 116/353] BUG: fix to_gbq calling convention; now its a bound method of DataFrame xref #15484 --- pandas/core/frame.py | 16 +++++++++++----- pandas/util/decorators.py | 2 +- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7b02926ea8837..0963d14762ce5 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -942,11 +942,6 @@ def to_gbq(self, destination_table, project_id, chunksize=10000, chunksize=chunksize, verbose=verbose, reauth=reauth, if_exists=if_exists, private_key=private_key) - def _f(): - from pandas.io.gbq import _try_import - return _try_import().to_gbq.__doc__ - to_gbq = docstring_wrapper(to_gbq, _f) - @classmethod def from_records(cls, data, index=None, exclude=None, columns=None, coerce_float=False, nrows=None): @@ -5430,6 +5425,17 @@ def combineMult(self, other): _EMPTY_SERIES = Series([]) +# patch in the doc-string for to_gbq +# and bind this method +def _f(): + from pandas.io.gbq import _try_import + return _try_import().to_gbq.__doc__ + + +DataFrame.to_gbq = types.MethodType(docstring_wrapper(DataFrame.to_gbq, _f), + DataFrame) + + def _arrays_to_mgr(arrays, arr_names, index, columns, dtype=None): """ Segregate Series based on type and coerce into matrices. diff --git a/pandas/util/decorators.py b/pandas/util/decorators.py index d966d6b7a1b32..d1ca480f7a568 100644 --- a/pandas/util/decorators.py +++ b/pandas/util/decorators.py @@ -259,7 +259,7 @@ def __init__(self, func, creator, default=None): self, func, [attr for attr in self._attrs if hasattr(func, attr)]) - def __call__(self, func, *args, **kwargs): + def __call__(self, *args, **kwargs): return self.func(*args, **kwargs) @property From 23889d3ec8396925269210d6d5782574e61769bd Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 27 Feb 2017 23:43:16 -0500 Subject: [PATCH 117/353] BUG: fix calling convention for to_gbq, take 2 --- pandas/core/frame.py | 3 +-- pandas/util/decorators.py | 5 +++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 0963d14762ce5..c47490bfbede4 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5432,8 +5432,7 @@ def _f(): return _try_import().to_gbq.__doc__ -DataFrame.to_gbq = types.MethodType(docstring_wrapper(DataFrame.to_gbq, _f), - DataFrame) +DataFrame.to_gbq = docstring_wrapper(DataFrame.to_gbq, _f) def _arrays_to_mgr(arrays, arr_names, index, columns, dtype=None): diff --git a/pandas/util/decorators.py b/pandas/util/decorators.py index d1ca480f7a568..ee7e2f4302b10 100644 --- a/pandas/util/decorators.py +++ b/pandas/util/decorators.py @@ -1,5 +1,6 @@ from pandas.compat import StringIO, callable, signature from pandas.lib import cache_readonly # noqa +import types import sys import warnings from textwrap import dedent @@ -259,6 +260,10 @@ def __init__(self, func, creator, default=None): self, func, [attr for attr in self._attrs if hasattr(func, attr)]) + def __get__(self, instance, cls=None): + # we want to return the actual passed instance + return types.MethodType(self, instance) + def __call__(self, *args, **kwargs): return self.func(*args, **kwargs) From 7b84eb603d3a3d62f0a7cf9483acac5c168b7533 Mon Sep 17 00:00:00 2001 From: Sam Foo Date: Tue, 28 Feb 2017 02:43:22 -0800 Subject: [PATCH 118/353] DEPR: rename consolidate to _consolidate and create deprecation warning (#15501) --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/generic.py | 14 +++++++++++--- pandas/core/groupby.py | 4 ++-- pandas/io/pytables.py | 6 +++--- pandas/tests/frame/test_block_internals.py | 11 ++++++++--- pandas/tests/frame/test_nonunique_indexes.py | 2 +- pandas/tests/io/test_pytables.py | 14 +++++++------- pandas/tests/test_generic.py | 2 +- pandas/tests/test_panel4d.py | 2 +- pandas/tools/concat.py | 2 +- pandas/tseries/resample.py | 2 +- 11 files changed, 37 insertions(+), 23 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 9b4e6fbe3be10..f91ffcdb81f9b 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -493,6 +493,7 @@ Deprecations - ``DataFrame.astype()`` has deprecated the ``raise_on_error`` parameter in favor of ``errors`` (:issue:`14878`) - ``Series.sortlevel`` and ``DataFrame.sortlevel`` have been deprecated in favor of ``Series.sort_index`` and ``DataFrame.sort_index`` (:issue:`15099`) - importing ``concat`` from ``pandas.tools.merge`` has been deprecated in favor of imports from the ``pandas`` namespace. This should only affect explict imports (:issue:`15358`) +- ``Series/DataFrame/Panel.consolidate()`` been deprecated as a public method. (:issue:`15483`) .. _whatsnew_0200.prior_deprecations: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index cdc37e00f70e0..127aac970fbc1 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2875,11 +2875,10 @@ def f(): self._protect_consolidate(f) - def consolidate(self, inplace=False): + def _consolidate(self, inplace=False): """ Compute NDFrame with "consolidated" internals (data of each dtype - grouped together in a single ndarray). Mainly an internal API function, - but available here to the savvy user + grouped together in a single ndarray). Parameters ---------- @@ -2898,6 +2897,15 @@ def consolidate(self, inplace=False): cons_data = self._protect_consolidate(f) return self._constructor(cons_data).__finalize__(self) + def consolidate(self, inplace=False): + """ + DEPRECATED: consolidate will be an internal implementation only. + """ + # 15483 + warnings.warn("consolidate is deprecated and will be removed in a " + "future release.", FutureWarning, stacklevel=2) + return self._consolidate(inplace) + @property def _is_mixed_type(self): f = lambda: self._data.is_mixed_type diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 3828e5dac5729..381a8edcb5192 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -3904,7 +3904,7 @@ def _wrap_aggregated_output(self, output, names=None): if not self.as_index: result = DataFrame(output, columns=output_keys) self._insert_inaxis_grouper_inplace(result) - result = result.consolidate() + result = result._consolidate() else: index = self.grouper.result_index result = DataFrame(output, index=index, columns=output_keys) @@ -3924,7 +3924,7 @@ def _wrap_agged_blocks(self, items, blocks): result = DataFrame(mgr) self._insert_inaxis_grouper_inplace(result) - result = result.consolidate() + result = result._consolidate() else: index = self.grouper.result_index mgr = BlockManager(blocks, [items, index]) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 65ac4e5654dce..06154a86f95fa 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -835,7 +835,7 @@ def func(_start, _stop, _where): # concat and return return concat(objs, axis=axis, - verify_integrity=False).consolidate() + verify_integrity=False)._consolidate() # create the iterator it = TableIterator(self, s, func, where=where, nrows=nrows, @@ -3442,7 +3442,7 @@ def get_blk_items(mgr, blocks): return [mgr.items.take(blk.mgr_locs) for blk in blocks] # figure out data_columns and get out blocks - block_obj = self.get_object(obj).consolidate() + block_obj = self.get_object(obj)._consolidate() blocks = block_obj._data.blocks blk_items = get_blk_items(block_obj._data, blocks) if len(self.non_index_axes): @@ -3809,7 +3809,7 @@ def read(self, where=None, columns=None, **kwargs): if len(objs) == 1: wp = objs[0] else: - wp = concat(objs, axis=0, verify_integrity=False).consolidate() + wp = concat(objs, axis=0, verify_integrity=False)._consolidate() # apply the selection filters & axis orderings wp = self.process_axes(wp, columns=columns) diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index 7b64dea8c102d..accd3ddeb03d7 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -40,19 +40,24 @@ def test_cast_internals(self): def test_consolidate(self): self.frame['E'] = 7. - consolidated = self.frame.consolidate() + consolidated = self.frame._consolidate() self.assertEqual(len(consolidated._data.blocks), 1) # Ensure copy, do I want this? - recons = consolidated.consolidate() + recons = consolidated._consolidate() self.assertIsNot(recons, consolidated) assert_frame_equal(recons, consolidated) self.frame['F'] = 8. self.assertEqual(len(self.frame._data.blocks), 3) - self.frame.consolidate(inplace=True) + self.frame._consolidate(inplace=True) self.assertEqual(len(self.frame._data.blocks), 1) + def test_consolidate_deprecation(self): + self.frame['E'] = 7 + with tm.assert_produces_warning(FutureWarning): + self.frame.consolidate() + def test_consolidate_inplace(self): frame = self.frame.copy() # noqa diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py index 4ad88a12a2625..d6bcb85e01910 100644 --- a/pandas/tests/frame/test_nonunique_indexes.py +++ b/pandas/tests/frame/test_nonunique_indexes.py @@ -87,7 +87,7 @@ def check(result, expected=None): check(df, expected) # consolidate - df = df.consolidate() + df = df._consolidate() expected = DataFrame([[1, 1, 'bah', 3], [1, 2, 'bah', 3], [2, 3, 'bah', 3]], columns=['foo', 'foo', 'string', 'foo2']) diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index a840ff46aa845..d5a8b380d01f9 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -418,7 +418,7 @@ def test_repr(self): df['datetime1'] = datetime.datetime(2001, 1, 2, 0, 0) df['datetime2'] = datetime.datetime(2001, 1, 3, 0, 0) df.loc[3:6, ['obj1']] = np.nan - df = df.consolidate()._convert(datetime=True) + df = df._consolidate()._convert(datetime=True) warnings.filterwarnings('ignore', category=PerformanceWarning) store['df'] = df @@ -762,7 +762,7 @@ def test_put_mixed_type(self): df['datetime1'] = datetime.datetime(2001, 1, 2, 0, 0) df['datetime2'] = datetime.datetime(2001, 1, 3, 0, 0) df.loc[3:6, ['obj1']] = np.nan - df = df.consolidate()._convert(datetime=True) + df = df._consolidate()._convert(datetime=True) with ensure_clean_store(self.path) as store: _maybe_remove(store, 'df') @@ -2077,7 +2077,7 @@ def test_table_mixed_dtypes(self): df['datetime1'] = datetime.datetime(2001, 1, 2, 0, 0) df['datetime2'] = datetime.datetime(2001, 1, 3, 0, 0) df.loc[3:6, ['obj1']] = np.nan - df = df.consolidate()._convert(datetime=True) + df = df._consolidate()._convert(datetime=True) with ensure_clean_store(self.path) as store: store.append('df1_mixed', df) @@ -2091,7 +2091,7 @@ def test_table_mixed_dtypes(self): wp['bool2'] = wp['ItemB'] > 0 wp['int1'] = 1 wp['int2'] = 2 - wp = wp.consolidate() + wp = wp._consolidate() with ensure_clean_store(self.path) as store: store.append('p1_mixed', wp) @@ -2106,7 +2106,7 @@ def test_table_mixed_dtypes(self): wp['bool2'] = wp['l2'] > 0 wp['int1'] = 1 wp['int2'] = 2 - wp = wp.consolidate() + wp = wp._consolidate() with ensure_clean_store(self.path) as store: store.append('p4d_mixed', wp) @@ -2134,7 +2134,7 @@ def test_unimplemented_dtypes_table_columns(self): df['obj1'] = 'foo' df['obj2'] = 'bar' df['datetime1'] = datetime.date(2001, 1, 2) - df = df.consolidate()._convert(datetime=True) + df = df._consolidate()._convert(datetime=True) with ensure_clean_store(self.path) as store: # this fails because we have a date in the object block...... @@ -2949,7 +2949,7 @@ def _make_one(): df['bool2'] = df['B'] > 0 df['int1'] = 1 df['int2'] = 2 - return df.consolidate() + return df._consolidate() df1 = _make_one() df2 = _make_one() diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py index 40cdbe083acd7..a2329e2d1768e 100644 --- a/pandas/tests/test_generic.py +++ b/pandas/tests/test_generic.py @@ -658,7 +658,7 @@ def test_validate_bool_args(self): super(DataFrame, df).sort_index(inplace=value) with self.assertRaises(ValueError): - super(DataFrame, df).consolidate(inplace=value) + super(DataFrame, df)._consolidate(inplace=value) with self.assertRaises(ValueError): super(DataFrame, df).fillna(value=0, inplace=value) diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py index 902b42e7d77d7..2491bac2a7f19 100644 --- a/pandas/tests/test_panel4d.py +++ b/pandas/tests/test_panel4d.py @@ -677,7 +677,7 @@ def test_consolidate(self): self.panel4d['foo'] = 1. self.assertFalse(self.panel4d._data.is_consolidated()) - panel4d = self.panel4d.consolidate() + panel4d = self.panel4d._consolidate() self.assertTrue(panel4d._data.is_consolidated()) def test_ctor_dict(self): diff --git a/pandas/tools/concat.py b/pandas/tools/concat.py index 31d7a9eb9a01a..6405106118472 100644 --- a/pandas/tools/concat.py +++ b/pandas/tools/concat.py @@ -263,7 +263,7 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None, raise TypeError("cannot concatenate a non-NDFrame object") # consolidate - obj.consolidate(inplace=True) + obj._consolidate(inplace=True) ndims.add(obj.ndim) # get the sample diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py index a6a10c08966d6..75e550a065fd2 100755 --- a/pandas/tseries/resample.py +++ b/pandas/tseries/resample.py @@ -221,7 +221,7 @@ def _convert_obj(self, obj): ------- obj : converted object """ - obj = obj.consolidate() + obj = obj._consolidate() return obj def _get_binner_for_time(self): From dd368eb574f7f62f8e8e8d667d68b5d06ae241de Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 28 Feb 2017 09:17:08 -0500 Subject: [PATCH 119/353] DEPR: remove pd.TimeSeries & Series.is_time_series xref #10890 Author: Jeff Reback Closes #15098 from jreback/time_series and squashes the following commits: d9101bc [Jeff Reback] fix back-compat for < 0.13 ed57bd5 [Jeff Reback] DEPR: remove legacy pd.TimeSeries class in favor of pd.Series --- doc/source/whatsnew/v0.20.0.txt | 42 ++++++++++++++++++++- pandas/compat/pickle_compat.py | 5 ++- pandas/core/api.py | 2 +- pandas/core/series.py | 17 --------- pandas/io/pytables.py | 4 -- pandas/tests/api/test_api.py | 2 +- pandas/tests/indexes/data/s1-0.12.0.pickle | Bin 862 -> 0 bytes pandas/tests/indexes/data/s2-0.12.0.pickle | Bin 814 -> 0 bytes pandas/tests/indexes/test_base.py | 11 ------ pandas/tests/io/data/legacy_hdf/legacy.h5 | Bin 14928 -> 0 bytes pandas/tests/io/test_pytables.py | 9 ----- pandas/tests/series/test_alter_axes.py | 3 -- pandas/tests/series/test_constructors.py | 15 -------- pandas/tests/series/test_timeseries.py | 2 - 14 files changed, 47 insertions(+), 65 deletions(-) delete mode 100644 pandas/tests/indexes/data/s1-0.12.0.pickle delete mode 100644 pandas/tests/indexes/data/s2-0.12.0.pickle delete mode 100644 pandas/tests/io/data/legacy_hdf/legacy.h5 diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index f91ffcdb81f9b..671df5760fb84 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -246,6 +246,46 @@ Using ``.iloc``. Here we will get the location of the 'A' column, then use *posi df.iloc[[0, 2], df.columns.get_loc('A')] +.. _whatsnew.api_breaking.io_compat + +Possible incompat for HDF5 formats for pandas < 0.13.0 +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``pd.TimeSeries`` was deprecated officially in 0.17.0, though has only been an alias since 0.13.0. It has +been dropped in favor of ``pd.Series``. (:issue:``15098). + +This *may* cause HDF5 files that were created in prior versions to become unreadable if ``pd.TimeSeries`` +was used. This is most likely to be for pandas < 0.13.0. If you find yourself in this situation. +You can use a recent prior version of pandas to read in your HDF5 files, +then write them out again after applying the procedure below. + +.. code-block:: ipython + + In [2]: s = pd.TimeSeries([1,2,3], index=pd.date_range('20130101', periods=3)) + + In [3]: s + Out[3]: + 2013-01-01 1 + 2013-01-02 2 + 2013-01-03 3 + Freq: D, dtype: int64 + + In [4]: type(s) + Out[4]: pandas.core.series.TimeSeries + + In [5]: s = pd.Series(s) + + In [6]: s + Out[6]: + 2013-01-01 1 + 2013-01-02 2 + 2013-01-03 3 + Freq: D, dtype: int64 + + In [7]: type(s) + Out[7]: pandas.core.series.Series + + .. _whatsnew_0200.api_breaking.index_map: Map on Index types now return other Index types @@ -507,7 +547,7 @@ Removal of prior version deprecations/changes Similar functionality can be found in the `Google2Pandas `__ package. - ``pd.to_datetime`` and ``pd.to_timedelta`` have dropped the ``coerce`` parameter in favor of ``errors`` (:issue:`13602`) - ``pandas.stats.fama_macbeth``, ``pandas.stats.ols``, ``pandas.stats.plm`` and ``pandas.stats.var``, as well as the top-level ``pandas.fama_macbeth`` and ``pandas.ols`` routines are removed. Similar functionaility can be found in the `statsmodels `__ package. (:issue:`11898`) - +- ``Series.is_time_series`` is dropped in favor of ``Series.index.is_all_dates`` (:issue:``) .. _whatsnew_0200.performance: diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index 240baa848adbc..b8ccd13c153d4 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -58,7 +58,10 @@ def load_reduce(self): # 15477 ('pandas.core.base', 'FrozenNDArray'): ('pandas.indexes.frozen', 'FrozenNDArray'), - ('pandas.core.base', 'FrozenList'): ('pandas.indexes.frozen', 'FrozenList') + ('pandas.core.base', 'FrozenList'): ('pandas.indexes.frozen', 'FrozenList'), + + # 10890 + ('pandas.core.series', 'TimeSeries'): ('pandas.core.series', 'Series') } diff --git a/pandas/core/api.py b/pandas/core/api.py index 177e7b31cbd4f..eaebf45a038a0 100644 --- a/pandas/core/api.py +++ b/pandas/core/api.py @@ -13,7 +13,7 @@ UInt64Index, RangeIndex, Float64Index, MultiIndex) -from pandas.core.series import Series, TimeSeries +from pandas.core.series import Series from pandas.core.frame import DataFrame from pandas.core.panel import Panel, WidePanel from pandas.core.panel4d import Panel4D diff --git a/pandas/core/series.py b/pandas/core/series.py index da47ab5dfb003..ffe1be26fda54 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -277,13 +277,6 @@ def _constructor_expanddim(self): def _can_hold_na(self): return self._data._can_hold_na - @property - def is_time_series(self): - warnings.warn("is_time_series is deprecated. Please use " - "Series.index.is_all_dates", FutureWarning, stacklevel=2) - # return self._subtyp in ['time_series', 'sparse_time_series'] - return self.index.is_all_dates - _index = None def _set_axis(self, axis, labels, fastpath=False): @@ -2985,16 +2978,6 @@ def create_from_value(value, index, dtype): return subarr -# backwards compatiblity -class TimeSeries(Series): - - def __init__(self, *args, **kwargs): - # deprecation TimeSeries, #10890 - warnings.warn("TimeSeries is deprecated. Please use Series", - FutureWarning, stacklevel=2) - - super(TimeSeries, self).__init__(*args, **kwargs) - # ---------------------------------------------------------------------- # Add plotting methods to Series diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 06154a86f95fa..9ad53db305b59 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -23,8 +23,6 @@ from pandas.types.missing import array_equivalent import numpy as np - -import pandas as pd from pandas import (Series, DataFrame, Panel, Panel4D, Index, MultiIndex, Int64Index, isnull, concat, SparseSeries, SparseDataFrame, PeriodIndex, @@ -166,7 +164,6 @@ class DuplicateWarning(Warning): Series: u('series'), SparseSeries: u('sparse_series'), - pd.TimeSeries: u('series'), DataFrame: u('frame'), SparseDataFrame: u('sparse_frame'), Panel: u('wide'), @@ -175,7 +172,6 @@ class DuplicateWarning(Warning): # storer class map _STORER_MAP = { - u('TimeSeries'): 'LegacySeriesFixed', u('Series'): 'LegacySeriesFixed', u('DataFrame'): 'LegacyFrameFixed', u('DataMatrix'): 'LegacyFrameFixed', diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 90a0c1d5c9347..8ca369f8df83a 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -57,7 +57,7 @@ class TestPDApi(Base, tm.TestCase): 'TimedeltaIndex', 'Timestamp'] # these are already deprecated; awaiting removal - deprecated_classes = ['TimeSeries', 'WidePanel', + deprecated_classes = ['WidePanel', 'SparseTimeSeries', 'Panel4D', 'SparseList'] diff --git a/pandas/tests/indexes/data/s1-0.12.0.pickle b/pandas/tests/indexes/data/s1-0.12.0.pickle deleted file mode 100644 index 0ce9cfdf3aa94fdfd9f8ad6ea00e72fa7eda6552..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 862 zcmZo*O3o|IEvVE>&M!*U%Pq|*$xJLNO049HFG@|$&nqq|DorloDr8J9NX$z~EQTm6 zPA$qzE#?Zz%uNl3FbkQy8CpXbliGs{nKir_z2#aV#&V^UR2HOi6|#gfrCE40cryYO zuxfZShcepu`T2SM2LdqR%}|om8T!FSUEoT>tcUxr)s_8Ijs3LWI_;WmV#&?@vK#N0 zoL>KQf8a;{X^E~M_s?K#x%;8zsr`i_ZuK`-fA@cJS*24K`*Oe8y*l?PA3p8(`ei62 zbnS%w5$TfaS1jJ`*NZZ%)|&BS|JvJ~j)$H<-hVEx)^DEoqx~v>jx4H;Il6z+xzj(_ zHvYH2f1A@KgW>9a)gQa(NFF=8|Ec5GEr(Bh*?<3;k-CNYJNvscUe7tHch>&PvcCaG zj~}sj+&X>D?sxa?tt`$aH9h;b|8i~p|IC=r_VS!ZE_W{fxSv5`anj?~XZ!izWbc?U z_u77j@IL~N8t(6RE7W>y+IiN#Bm*9>C6GYX%gjqjt>AJ=EJ-Z^2CpZWSI7nrU3N(5 zW&i`!7Z#u#8s03J0a~jO%9K|Oj0+$vX#)o@1H;3TA1{@885j)MdM#9dbp3S2b`X6| zW7|~_ExXH0MHNVM<#7n80qMy9bK*esiV22mLG*XUW4}SP+vX@Qbs%k=E@7qtq?>-M zX#vp}I~^~BXa(tOikd(=e@#{ah>l6nh|vPlt3U2p1)_g71^xulo+8iOwSn}ErB%~F zbZewVlMax+^6Jb55UpI3D6b2oi+I1~g6Mh<-JF)+2r15n#=YH_g(Fr^oAYBm;f)fRGVgmUKP z7v(1AWLBjX@^pr>K~xv=CZ|B7g9{WVDXBRniCl$zz~l^szy!@7%9354Qd}rd8_JmH R%~+b`*WQ-o*VdM#2LML=S0Vra diff --git a/pandas/tests/indexes/data/s2-0.12.0.pickle b/pandas/tests/indexes/data/s2-0.12.0.pickle deleted file mode 100644 index 2318be2d9978bd2edefc4fe1afb2244e0f4c4601..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 814 zcmZY7Z%7ki9KiA0G;?Q5kzN!cDrru$iA6FbI2KcLS0k+>lx5wvGnekR`!h;wro&Cv zs|e9i=8K{d(<1sqr0`HpGi+k`r@(CBaFL0)GE-C5b1r?;J#e2p`0>2>@~ao?X{*Un zBiEZ*N^Y`N^G1?o$r|(wOX9fc?+Ad{*{T=rTjKyZwHh~7?*j!)rvISJi}9740r_w|xsf(d z7f}_-Q#{OAwEo9LZC2bGu2>1f9oq;OEyE-K4`-7RVw!`^;o+U@84+Yi_IxZ=iXe$E z)v0Mvl#Y(u11$};l?!%U%jp*UoznkDu;59VIvsl8+|FDX)V|c!DEPsp91%=T7*EN7 zn!Cn+Dzz!U~i}3ioJMQuxLl1a4*z-x3)#lQL3nVSi_Bq=86iA;yLuNl{3;5 z$Eo1vGLno22DLFUGD1R|Srb`ptfiQ3E+Q&C%}Dizf7wx?y@9IcNtD?R*ApJNps_?` Y)dd5`#MuZDjf<>0O_NinaXNMKziLWDZU6uP diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 2f5b98d145e57..79d10cbda565e 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -9,8 +9,6 @@ from pandas.compat import (range, lrange, lzip, u, text_type, zip, PY3, PY36) import operator -import os - import numpy as np from pandas import (period_range, date_range, Series, @@ -381,15 +379,6 @@ def test_view_with_args(self): # with arguments ind.view('i8') - def test_legacy_pickle_identity(self): - - # GH 8431 - pth = tm.get_data_path() - s1 = pd.read_pickle(os.path.join(pth, 's1-0.12.0.pickle')) - s2 = pd.read_pickle(os.path.join(pth, 's2-0.12.0.pickle')) - self.assertFalse(s1.index.identical(s2.index)) - self.assertFalse(s1.index.equals(s2.index)) - def test_astype(self): casted = self.intIndex.astype('i8') diff --git a/pandas/tests/io/data/legacy_hdf/legacy.h5 b/pandas/tests/io/data/legacy_hdf/legacy.h5 deleted file mode 100644 index 38b822dd169945b5f5022a219cc784a4c9951320..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 14928 zcmeHN2|Sfq8^0(aNlls-)tzdjME0fVxU%M2vRxdOIp-4O%o+G)mJFVB)2b> zR!SU1C>2wKk%ViP>|4lp-|f3TrP24zeA85~pZ9;3=RNN^&w0-Cod0=msJ^!D6rnjn zB;*nhAWa~NqDkU5j9;cJu@WLr)H~n_9vt`K$l}EkJS2V+Qa1}P7scBr^I-)Ic)5j{ zjt+?xV&MLp&<5Sx#dQlO6X@v4 zboJ-jvnC<%foN`QqRqpJUbwlE1SY({0m44XV(*tD`rjx1emOE;okBGHe({gCmWa=V zGq5h2;yva1qmRSi@o+pO>H~2{Nc3A2alo7lu`T4{<5&bDf)pd0oQZgzmzNiPQh?(x zkpv%)=wuFXuH|@(SZ+FQQAnYL8L#nZJ&g3@h=~&qQ_RdLHk>?H{%t>wm?$AHFQdT8 zjg%jsp3#RmH_ZNd|84&%#6lnhMj>IQ`*3}Ys{U-VOdXnvA;WUrCc2Wbs;$4VqShJ} zIjX5?fRhr{z{Fpkrf2DE%kXtIwAHtuu3=dC2T-?K(3q;$dQMs@6cq;3NZU8qc8i|5 zk-s5b(NLKYpcD|`tK{WG*L77=Hc{T>>`Hf)w=&Z6u-37qF^z1Tf&y*b6cv2dIT^?) zY+7UKq2;ckX1>nD*2%!%YK@Y=v5m67vBD->McqJy&1tzUG$3 zI*yK;186Q@pNf@cND*qJh?G2|6krD7KXgv^Qm=)}oqlUGwV+&J~O5cICj* zZ3za2T@7%CnqcS7NhPrKzVO7_;w)%hQd0SAPAL#ns~O-CZ-)!geN}8D?f{aDII|pC z85vqzQ52`|L~%>R9u%M62t~0$G7?2?lNc0ZnLndA7|lYFmUR(DPo*>z^VVmfxOjIi zin~|^D3(7eLa~ifg(B6x4n@|%W)vsfXhYHASvQIS#uJ|-{VNO*LUDQYR227R&OouI zem07Iwo6g8_)Z4Jqyz;Ng&(P+=-#D)Am6H*nxpxH@cG52z&*pcu*kXnY4D6bSjueJ zb|fepyf|qhq?%F&9d15}kIHxfLlQ#r{F|Zy%cxH*n%w{rCy*q>O5ecU*L-{u=)VDx z74z!y5*lE)Mc>BRU)O-C8fUdv);)s9&UA0DkS>FEGn4E#oahG^=1(r`yD$Xp?b3gU zF{^?_#mfd(@TCF%FvI%P^-(}Tl=n>X+-D#(M_R_ZI~59xSlOOl(+LNbP{rBOU2x@G z+bP~bselrebh>&OoVZ&L%t5fqn zyZNPPt|b3$z!~kUW{Ol4H5i_JP$EU5=nCbhFrcP zjJCsY^mcLd-xv_ga76`>u!K+vB0g$61~}^aIeKBWNcp?@3Rwngf4qF9bOk0)?DMeP zm;2|U0b~t~94imb_jz!`n~zt1sNHkrksYT1X)}lLAWb25J#wGuAK*v#c0=r05Vdtx z{JmrKgAd=YP9phZ(;jV#XpOmW^R zOp--MjPM3zKHlYTM$U`5s~;WXrSJALm0x2D9Iu}_F&mR7_T9Mpzrbgrfw9g-_ThF! zsNs)37a@3YtT^@t8vY{gy86(#kI9eZy(8oPe|vt(Lz;}mdffjJ{jQvYb(qWNMNv8A zT$1pAh{V`_Z}}9gg};~I8_nnU{mur3@GG>r*l54pF%66rN4fStzPQ4be>=G5fA2AhT7S)Q2Cm-{EDfzffDjE~R(wvRLg{NYV{=NJk7T1x{Vwdb9JTV4fdmIwU zfjou)UQGC(6N}$3_o>wWb#Yw?wNn}%SVs2)r=9=t-3bvLdw)8J+cEJZHbb8pFX#HD zxA#vYc{lew`c^5s8AQHx5|XJbfk|X(E&H4% zaQ^ZJT`QLs&e?tU9owU?p|l+TuR8(4zu83%;mnN(w*>z zaonK#&H->cQAuqmGXs{{x5*7|dkJ@_UcILGT_sd^lrpEfJq2+hZ#vKK?Sm}WBX{eL zS3>&4V;&tBs=!sZZjJ2kSztwE((`2YSs+81)*r!}1`gD0$l4{526@W;4d!_~ftUFQ zXY2F6fT;^3ykgqw;NI=5r$JAj1Njv$p356`EsK<~ot!xElTJ1qTrc8xes44MovI^g*j@^L zig-kQl2QQV*H;&a=rzFRy^^p_fQwL(Q+hY~rA_posTSZ9zgWq+Jd@pTpuLmc6>8NE#b;eRs2JS{dG#3a*_R4HB14?^t9~UoK>eO&|N1eI1pSuP@!0*L2D|tF*LH-l$TM=GQ2^FkY8tclco|j2e>lmA(87q>#g^u6I^Lnad?YafwghNv2`3pFSH*77_Tt zG`tdiZr=_G-d2fkbXd}uEBjF1hJ5!ia^xJ66B|XYNBoldqJhsw17q>bJbYadiukj5 zCT$t^1{(e%?tbp$=9y$Utiy!+_;8--z7%WW@69t$;CjHP?=2vBW*+|i{4D{z0NaIg zGT6`6#uEEv%184%f_HH3e|+bdT=}>8&%4i$|7f0xetPE23&EJMDf)l&9IhTd)NlOn zdVV+0#J;<0{+rKZ7XHWc%tOkU#{XKL`Dg3@lUev*2J)9^pky^Kxc}@KX#Tm8-d*E6 zUGwW;!?CP=Cpmnt9+t-J>}ml&9&epOmP-J!?hcGxk8+s4%P_<(z7x)EbW$*w*9aay zc~u>@{yw<+u(sc30vpEMuav1=R0XNM$>Jt^TEY3;_Wc*<6oPuWc7fyO9bms{_T7rs zSFlt3)PtNDrw9I9t3jdI$><+ezJ`19^|XfVs-V=JoCVvGdO?=N#YHyWzW{qpSiF2e zEszYk{b2A|Jx~ysrt3{@0z6UIQqGd^fmTJv-lMv$;2)i~X@{;S0K343@8m3dfpht` zqmifYf(=!V8`xXeFvF&g{5_=#lz7s7ww-SW`aAk}2(Ntw*{-HLX!onYy!Gl^4((_F zvZu~IIrQ=gbU&T_;^d)PxKvtVi|*lWu(@jcw~>!{GCDTNOpZVI3W$6iQhM064SYp) z6MQ;ltuv5t^$X0lVP*>1@ebc!d_0x?i9g773V>!UxKr z+ghXP*Y|Wo+5@RW^$%{t#i?E4TCg4%T=jc(di^y(qu5^d=^cQASsFlS#ubp@F!P>y zOB?JPvba}t^(;K2bP6O!6v81dp`FrlQK0X5wm_(CCS2^Uus7g!4M^O*Dk!XY7`UYQ z(+xY*;R*5liEW_|U_|hip)A!dC{etOA?QrXXg+Ls>e!2VsC{f1Fwd_52f{br-TpKa z9L;fy6mzWrb`Q7aR;yP7i+Hn+21Pd1F7mLf%pCyYB7&6zce0_Lg44AMhblOHxZ;5Q z<3iY4U(@HY_9f&yAtjX_(+FO=#b!&LZUzZ`hWl?l=mJ-7?Dd;5aR8V+HVqNqT?bko x8?i3QR6}yy+Rzlh2C-)qD{|JB!30;^z>@kW@LtB&8lx@sKrE{CS2Ol6@LyVSHP`?E diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index d5a8b380d01f9..821d9956a2dfa 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -4454,15 +4454,6 @@ def test_pytables_native2_read(self): d1 = store['detector'] self.assertIsInstance(d1, DataFrame) - def test_legacy_read(self): - with ensure_clean_store( - tm.get_data_path('legacy_hdf/legacy.h5'), - mode='r') as store: - store['a'] - store['b'] - store['c'] - store['d'] - def test_legacy_table_read(self): # legacy table types with ensure_clean_store( diff --git a/pandas/tests/series/test_alter_axes.py b/pandas/tests/series/test_alter_axes.py index 6473dbeeaa1bc..5997b91097cbc 100644 --- a/pandas/tests/series/test_alter_axes.py +++ b/pandas/tests/series/test_alter_axes.py @@ -107,9 +107,6 @@ def test_set_index_makes_timeseries(self): s = Series(lrange(10)) s.index = idx - - with tm.assert_produces_warning(FutureWarning): - self.assertTrue(s.is_time_series) self.assertTrue(s.index.is_all_dates) def test_reset_index(self): diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index aef4c9269bc62..c15171f331df3 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -37,22 +37,11 @@ def test_scalar_conversion(self): self.assertEqual(int(Series([1.])), 1) self.assertEqual(long(Series([1.])), 1) - def test_TimeSeries_deprecation(self): - - # deprecation TimeSeries, #10890 - with tm.assert_produces_warning(FutureWarning): - pd.TimeSeries(1, index=date_range('20130101', periods=3)) - def test_constructor(self): - # Recognize TimeSeries - with tm.assert_produces_warning(FutureWarning): - self.assertTrue(self.ts.is_time_series) self.assertTrue(self.ts.index.is_all_dates) # Pass in Series derived = Series(self.ts) - with tm.assert_produces_warning(FutureWarning): - self.assertTrue(derived.is_time_series) self.assertTrue(derived.index.is_all_dates) self.assertTrue(tm.equalContents(derived.index, self.ts.index)) @@ -64,11 +53,7 @@ def test_constructor(self): self.assertEqual(mixed.dtype, np.object_) self.assertIs(mixed[1], np.NaN) - with tm.assert_produces_warning(FutureWarning): - self.assertFalse(self.empty.is_time_series) self.assertFalse(self.empty.index.is_all_dates) - with tm.assert_produces_warning(FutureWarning): - self.assertFalse(Series({}).is_time_series) self.assertFalse(Series({}).index.is_all_dates) self.assertRaises(Exception, Series, np.random.randn(3, 3), index=np.arange(3)) diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index e0db813e60c14..8c22b3f047210 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -383,8 +383,6 @@ def test_mpl_compat_hack(self): def test_timeseries_coercion(self): idx = tm.makeDateIndex(10000) ser = Series(np.random.randn(len(idx)), idx.astype(object)) - with tm.assert_produces_warning(FutureWarning): - self.assertTrue(ser.is_time_series) self.assertTrue(ser.index.is_all_dates) self.assertIsInstance(ser.index, DatetimeIndex) From d0a281fd60a2099c932151280af88d5392ea9a84 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 28 Feb 2017 09:26:10 -0500 Subject: [PATCH 120/353] BUG: DataFrame index & column returned by corr & cov are the same (#14617) closes #14617 Author: Matt Roeschke Closes #15528 from mroeschke/fix_14617 and squashes the following commits: 5a46f0a [Matt Roeschke] Bug:DataFrame index & column returned by corr & cov are the same (#14617) --- doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/core/frame.py | 6 ++++-- pandas/tests/frame/test_analytics.py | 9 +++++++++ 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 671df5760fb84..54df7514a882d 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -631,7 +631,7 @@ Bug Fixes - Bug in ``.rank()`` which incorrectly ranks ordered categories (:issue:`15420`) - +- Bug in ``.corr()`` and ``.cov()`` where the column and index were the same object (:issue:`14617`) - Require at least 0.23 version of cython to avoid problems with character encodings (:issue:`14699`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c47490bfbede4..021ce59e3402b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4725,6 +4725,7 @@ def corr(self, method='pearson', min_periods=1): """ numeric_df = self._get_numeric_data() cols = numeric_df.columns + idx = cols.copy() mat = numeric_df.values if method == 'pearson': @@ -4757,7 +4758,7 @@ def corr(self, method='pearson', min_periods=1): correl[i, j] = c correl[j, i] = c - return self._constructor(correl, index=cols, columns=cols) + return self._constructor(correl, index=idx, columns=cols) def cov(self, min_periods=None): """ @@ -4780,6 +4781,7 @@ def cov(self, min_periods=None): """ numeric_df = self._get_numeric_data() cols = numeric_df.columns + idx = cols.copy() mat = numeric_df.values if notnull(mat).all(): @@ -4793,7 +4795,7 @@ def cov(self, min_periods=None): baseCov = _algos.nancorr(_ensure_float64(mat), cov=True, minp=min_periods) - return self._constructor(baseCov, index=cols, columns=cols) + return self._constructor(baseCov, index=idx, columns=cols) def corrwith(self, other, axis=0, drop=False): """ diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 1f0d16e959cd7..111195363beb2 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -118,6 +118,15 @@ def test_corr_int_and_boolean(self): for meth in ['pearson', 'kendall', 'spearman']: tm.assert_frame_equal(df.corr(meth), expected) + def test_corr_cov_independent_index_column(self): + # GH 14617 + df = pd.DataFrame(np.random.randn(4 * 10).reshape(10, 4), + columns=list("abcd")) + for method in ['cov', 'corr']: + result = getattr(df, method)() + assert result.index is not result.columns + assert result.index.equals(result.columns) + def test_cov(self): # min_periods no NAs (corner case) expected = self.frame.cov() From 2340fb8b97a3f65ce4f630075849e42fb256e3be Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 1 Mar 2017 14:16:50 -0500 Subject: [PATCH 121/353] BLD: fix 2.7_LOCALE build simplify install_travis.sh, always using a clean miniconda install bump matplotlib to 1.4.0 for _LOCALE, numpy to 1.8.2 So this removes our testing of mpl 1.2.1 with numpy 1.7.1. We *still* test elsewhere 1.7.1, and mpl 1.3.1 (with 1.8.2) Author: Jeff Reback Closes #15540 from jreback/build and squashes the following commits: 58b6f2f [Jeff Reback] BLD: fix 2.7_LOCALE build --- ci/install_travis.sh | 88 +++++++++++++++----------------- ci/requirements-2.7_LOCALE.build | 2 +- ci/requirements-2.7_LOCALE.run | 8 ++- 3 files changed, 44 insertions(+), 54 deletions(-) diff --git a/ci/install_travis.sh b/ci/install_travis.sh index 802d8c9f6b776..b337f6e443be2 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -32,58 +32,50 @@ edit_init home_dir=$(pwd) echo "[home_dir: $home_dir]" -MINICONDA_DIR="$HOME/miniconda3" +# install miniconda +echo "[Using clean Miniconda install]" -if [ -d "$MINICONDA_DIR" ] && [ -e "$MINICONDA_DIR/bin/conda" ] && [ "$USE_CACHE" ]; then - echo "[Miniconda install already present from cache: $MINICONDA_DIR]" - - conda config --set always_yes yes --set changeps1 no || exit 1 - echo "[update conda]" - conda update -q conda || exit 1 - - # Useful for debugging any issues with conda - conda info -a || exit 1 - - # set the compiler cache to work - if [ "${TRAVIS_OS_NAME}" == "linux" ]; then - echo "[Using ccache]" - export PATH=/usr/lib/ccache:/usr/lib64/ccache:$PATH - gcc=$(which gcc) - echo "[gcc: $gcc]" - ccache=$(which ccache) - echo "[ccache: $ccache]" - export CC='ccache gcc' - fi +MINICONDA_DIR="$HOME/miniconda3" +if [ -d "$MINICONDA_DIR" ]; then + rm -rf "$MINICONDA_DIR" +fi +# install miniconda +if [ "${TRAVIS_OS_NAME}" == "osx" ]; then + wget http://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh || exit 1 +else + wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh || exit 1 +fi +bash miniconda.sh -b -p "$MINICONDA_DIR" || exit 1 + +echo "[update conda]" +conda config --set ssl_verify false || exit 1 +conda config --set always_yes true --set changeps1 false || exit 1 +conda update -q conda + +# add the pandas channel to take priority +# to add extra packages +echo "[add channels]" +conda config --add channels pandas || exit 1 +conda config --remove channels defaults || exit 1 +conda config --add channels defaults || exit 1 + +conda install anaconda-client + +# Useful for debugging any issues with conda +conda info -a || exit 1 + +# set the compiler cache to work +if [ "$USE_CACHE" ] && "${TRAVIS_OS_NAME}" == "linux" ]; then + echo "[Using ccache]" + export PATH=/usr/lib/ccache:/usr/lib64/ccache:$PATH + gcc=$(which gcc) + echo "[gcc: $gcc]" + ccache=$(which ccache) + echo "[ccache: $ccache]" + export CC='ccache gcc' else - echo "[Using clean Miniconda install]" echo "[Not using ccache]" - rm -rf "$MINICONDA_DIR" - # install miniconda - if [ "${TRAVIS_OS_NAME}" == "osx" ]; then - wget http://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh || exit 1 - else - wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh || exit 1 - fi - bash miniconda.sh -b -p "$MINICONDA_DIR" || exit 1 - - echo "[update conda]" - conda config --set ssl_verify false || exit 1 - conda config --set always_yes true --set changeps1 false || exit 1 - conda update -q conda - - # add the pandas channel to take priority - # to add extra packages - echo "[add channels]" - conda config --add channels pandas || exit 1 - conda config --remove channels defaults || exit 1 - conda config --add channels defaults || exit 1 - - conda install anaconda-client - - # Useful for debugging any issues with conda - conda info -a || exit 1 - fi # may have installation instructions for this build diff --git a/ci/requirements-2.7_LOCALE.build b/ci/requirements-2.7_LOCALE.build index c17730b912651..28e2b96851eff 100644 --- a/ci/requirements-2.7_LOCALE.build +++ b/ci/requirements-2.7_LOCALE.build @@ -1,4 +1,4 @@ python-dateutil pytz=2013b -numpy=1.7.1 +numpy=1.8.2 cython=0.23 diff --git a/ci/requirements-2.7_LOCALE.run b/ci/requirements-2.7_LOCALE.run index 1a9b42d832b0b..5d7cc31b7d55e 100644 --- a/ci/requirements-2.7_LOCALE.run +++ b/ci/requirements-2.7_LOCALE.run @@ -1,16 +1,14 @@ python-dateutil pytz=2013b -numpy=1.7.1 +numpy=1.8.2 xlwt=0.7.5 openpyxl=1.6.2 xlsxwriter=0.4.6 xlrd=0.9.2 bottleneck=0.8.0 -matplotlib=1.2.1 -patsy=0.1.0 +matplotlib=1.3.1 sqlalchemy=0.8.1 html5lib=1.0b2 lxml=3.2.1 -scipy=0.11.0 +scipy beautiful-soup=4.2.1 -bigquery=2.0.17 From 1c106c8427513775c59e1e93a20829fc67a0a983 Mon Sep 17 00:00:00 2001 From: Prasanjit Prakash Date: Wed, 1 Mar 2017 16:16:12 -0500 Subject: [PATCH 122/353] PERF: Rank categorical perf closes #15498 Author: Prasanjit Prakash Closes #15518 from ikilledthecat/rank_categorical_perf and squashes the following commits: 30b49b9 [Prasanjit Prakash] PERF: GH15498 - pep8 changes ad38544 [Prasanjit Prakash] PERF: GH15498 - asv tests and whatsnew 1ebdb56 [Prasanjit Prakash] PERF: categorical rank GH#15498 a67cd85 [Prasanjit Prakash] PERF: categorical rank GH#15498 81df7df [Prasanjit Prakash] PERF: categorical rank GH#15498 45dd125 [Prasanjit Prakash] PERF: categorical rank GH#15498 33249b3 [Prasanjit Prakash] PERF: categorical rank GH#15498 --- asv_bench/benchmarks/categoricals.py | 34 +++++++++++++++++++++++++++ doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/algorithms.py | 1 + pandas/core/categorical.py | 9 ++++++- pandas/tests/series/test_analytics.py | 33 +++++++++++++++++++------- 5 files changed, 69 insertions(+), 9 deletions(-) diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py index cca652c68cf15..153107911ca2c 100644 --- a/asv_bench/benchmarks/categoricals.py +++ b/asv_bench/benchmarks/categoricals.py @@ -63,3 +63,37 @@ def time_value_counts_dropna(self): def time_rendering(self): str(self.sel) + + +class Categoricals3(object): + goal_time = 0.2 + + def setup(self): + N = 100000 + ncats = 100 + + self.s1 = Series(np.array(tm.makeCategoricalIndex(N, ncats))) + self.s1_cat = self.s1.astype('category') + self.s1_cat_ordered = self.s1.astype('category', ordered=True) + + self.s2 = Series(np.random.randint(0, ncats, size=N)) + self.s2_cat = self.s2.astype('category') + self.s2_cat_ordered = self.s2.astype('category', ordered=True) + + def time_rank_string(self): + self.s1.rank() + + def time_rank_string_cat(self): + self.s1_cat.rank() + + def time_rank_string_cat_ordered(self): + self.s1_cat_ordered.rank() + + def time_rank_int(self): + self.s2.rank() + + def time_rank_int_cat(self): + self.s2_cat.rank() + + def time_rank_int_cat_ordered(self): + self.s2_cat_ordered.rank() diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 54df7514a882d..6e9dfb92dfd90 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -562,6 +562,7 @@ Performance Improvements - Improved performance of ``groupby().cummin()`` and ``groupby().cummax()`` (:issue:`15048`, :issue:`15109`) - Improved performance and reduced memory when indexing with a ``MultiIndex`` (:issue:`15245`) - When reading buffer object in ``read_sas()`` method without specified format, filepath string is inferred rather than buffer object. (:issue:`14947`) +- Improved performance of `rank()` for categorical data (:issue:`15498`) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index b11927a80fb2e..55d404f05dd1d 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -992,6 +992,7 @@ def _get_data_algo(values, func_map): elif is_unsigned_integer_dtype(values): f = func_map['uint64'] values = _ensure_uint64(values) + else: values = _ensure_object(values) diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index b88a6b171b316..d5dce250275d9 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -1416,14 +1416,21 @@ def _values_for_rank(self): numpy array """ + from pandas import Series if self.ordered: values = self.codes mask = values == -1 if mask.any(): values = values.astype('float64') values[mask] = np.nan - else: + elif self.categories.is_numeric(): values = np.array(self) + else: + # reorder the categories (so rank can use the float codes) + # instead of passing an object array to rank + values = np.array( + self.rename_categories(Series(self.categories).rank()) + ) return values def order(self, inplace=False, ascending=True, na_position='last'): diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index b092e4f084767..b6985abb64e40 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -1065,8 +1065,10 @@ def test_rank_categorical(self): exp_desc = pd.Series([6., 5., 4., 3., 2., 1.]) ordered = pd.Series( ['first', 'second', 'third', 'fourth', 'fifth', 'sixth'] - ).astype('category', ).cat.set_categories( - ['first', 'second', 'third', 'fourth', 'fifth', 'sixth'], + ).astype( + 'category', + categories=['first', 'second', 'third', + 'fourth', 'fifth', 'sixth'], ordered=True ) assert_series_equal(ordered.rank(), exp) @@ -1075,19 +1077,33 @@ def test_rank_categorical(self): # Unordered categoricals should be ranked as objects unordered = pd.Series( ['first', 'second', 'third', 'fourth', 'fifth', 'sixth'], - ).astype('category').cat.set_categories( - ['first', 'second', 'third', 'fourth', 'fifth', 'sixth'], + ).astype( + 'category', + categories=['first', 'second', 'third', + 'fourth', 'fifth', 'sixth'], ordered=False ) exp_unordered = pd.Series([2., 4., 6., 3., 1., 5.]) res = unordered.rank() assert_series_equal(res, exp_unordered) + unordered1 = pd.Series( + [1, 2, 3, 4, 5, 6], + ).astype( + 'category', + categories=[1, 2, 3, 4, 5, 6], + ordered=False + ) + exp_unordered1 = pd.Series([1., 2., 3., 4., 5., 6.]) + res1 = unordered1.rank() + assert_series_equal(res1, exp_unordered1) + # Test na_option for rank data na_ser = pd.Series( ['first', 'second', 'third', 'fourth', 'fifth', 'sixth', np.NaN] - ).astype('category', ).cat.set_categories( - [ + ).astype( + 'category', + categories=[ 'first', 'second', 'third', 'fourth', 'fifth', 'sixth', 'seventh' ], @@ -1123,8 +1139,9 @@ def test_rank_categorical(self): # Test with pct=True na_ser = pd.Series( ['first', 'second', 'third', 'fourth', np.NaN], - ).astype('category').cat.set_categories( - ['first', 'second', 'third', 'fourth'], + ).astype( + 'category', + categories=['first', 'second', 'third', 'fourth'], ordered=True ) exp_top = pd.Series([0.4, 0.6, 0.8, 1., 0.2]) From 4121c75e5447a2983d2db9f40f196a6684e0a6b6 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 1 Mar 2017 19:39:14 -0500 Subject: [PATCH 123/353] COMPAT: if docstring_wrapper is activated on a class, don't fail --- pandas/util/decorators.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/util/decorators.py b/pandas/util/decorators.py index ee7e2f4302b10..62ff6ef14418a 100644 --- a/pandas/util/decorators.py +++ b/pandas/util/decorators.py @@ -261,6 +261,11 @@ def __init__(self, func, creator, default=None): if hasattr(func, attr)]) def __get__(self, instance, cls=None): + + # we are called with a class + if instance is None: + return self + # we want to return the actual passed instance return types.MethodType(self, instance) From 5441d39237e078308b45c65faca6a8355de8bd27 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 1 Mar 2017 19:54:04 -0500 Subject: [PATCH 124/353] DOC: add pandas-gbq to doc-build --- ci/requirements-3.5_DOC_BUILD.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ci/requirements-3.5_DOC_BUILD.sh b/ci/requirements-3.5_DOC_BUILD.sh index 25bc63acc96d1..1a5d4643edcf2 100644 --- a/ci/requirements-3.5_DOC_BUILD.sh +++ b/ci/requirements-3.5_DOC_BUILD.sh @@ -4,6 +4,8 @@ source activate pandas echo "[install DOC_BUILD deps]" +pip install pandas-gbq + conda install -n pandas -c conda-forge feather-format conda install -n pandas -c r r rpy2 --yes From 29d81f3df81eb0a4d077ae1317df74d509cdc446 Mon Sep 17 00:00:00 2001 From: Chris Date: Wed, 1 Mar 2017 21:19:55 -0500 Subject: [PATCH 125/353] DOC: Styler.set_table_attributes docstring Author: Chris Closes #15545 from chris-b1/styler-docstring and squashes the following commits: d77a9f1 [Chris] DOC: Style.set_table_attributes docstring --- pandas/formats/style.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pandas/formats/style.py b/pandas/formats/style.py index 89712910a22e1..e712010a8b4f2 100644 --- a/pandas/formats/style.py +++ b/pandas/formats/style.py @@ -631,11 +631,17 @@ def set_table_attributes(self, attributes): Parameters ---------- - precision: int + attributes : string Returns ------- self : Styler + + Examples + -------- + >>> df = pd.DataFrame(np.random.randn(10, 4)) + >>> df.style.set_table_attributes('class="pure-table"') + # ...
latest release
latest releaselatest release
Package Status
+ + circleci build status + +
From 1400305899d55bee21253952de9f6f0cf245b089 Mon Sep 17 00:00:00 2001 From: James Goppert Date: Wed, 22 Feb 2017 10:58:41 -0500 Subject: [PATCH 090/353] ENH: Adds custom plot formatting for TimedeltaIndex. Author: James Goppert Author: James Goppert Closes #8711 Closes #15067 from jgoppert/tdi_plot_fix and squashes the following commits: 945ec14 [James Goppert] Merge branch 'master' into tdi_plot_fix 7db61ec [James Goppert] Create TimeSeries_TimedeltaFormatter. 232efe6 [James Goppert] Fix comment format and exception type for tdi plotting. 4eff697 [James Goppert] Add more time delta series plotting tests. f5f32bc [James Goppert] Link time delta index docs to better matplotlib docs. d588c2c [James Goppert] Fixes test for tdi w/o autofmt_xdate. b6e6a81 [James Goppert] Disables autofmt_xdate testing. c7851e3 [James Goppert] Adjusts tdi test draw calls to try to fix CI issue. 7d28842 [James Goppert] Switch to draw_idle to try to fix bug on xticks update. 3abc310 [James Goppert] Try plt.draw() instead of canvas.draw() to fix issue on osx 3.5. 91954bd [James Goppert] Finished unit test for timedelta plotting. 41ebc85 [James Goppert] Fixes for review comments from #15067. f021cbd [James Goppert] Support nano-second level precision x-axis labels. 5ec65fa [James Goppert] Plot fix for tdi and added more comments. b967d24 [James Goppert] flake8 fixes for tdi plotting. efe5636 [James Goppert] Adds custom plot formatting for TimedeltaIndex. --- doc/source/visualization.rst | 12 ++++ doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/tests/plotting/test_datetimelike.py | 58 +++++++++++++++++ pandas/tools/plotting.py | 2 +- pandas/tseries/converter.py | 30 +++++++++ pandas/tseries/plotting.py | 73 +++++++++++++++------- 6 files changed, 154 insertions(+), 23 deletions(-) diff --git a/doc/source/visualization.rst b/doc/source/visualization.rst index 2b2012dbf0b8a..e8998bf6f6f5c 100644 --- a/doc/source/visualization.rst +++ b/doc/source/visualization.rst @@ -1245,6 +1245,18 @@ in ``pandas.plot_params`` can be used in a `with statement`: plt.close('all') +Automatic Date Tick Adjustment +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. versionadded:: 0.20.0 + +``TimedeltaIndex`` now uses the native matplotlib +tick locator methods, it is useful to call the automatic +date tick adjustment from matplotlib for figures whose ticklabels overlap. + +See the :meth:`autofmt_xdate ` method and the +`matplotlib documentation `__ for more. + Subplots ~~~~~~~~ diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 86f916bc0acfb..9124929ee5665 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -155,7 +155,7 @@ Other enhancements - ``Series/DataFrame.squeeze()`` have gained the ``axis`` parameter. (:issue:`15339`) - ``DataFrame.to_excel()`` has a new ``freeze_panes`` parameter to turn on Freeze Panes when exporting to Excel (:issue:`15160`) - HTML table output skips ``colspan`` or ``rowspan`` attribute if equal to 1. (:issue:`15403`) - +- ``pd.TimedeltaIndex`` now has a custom datetick formatter specifically designed for nanosecond level precision (:issue:`8711`) .. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations .. _whatsnew_0200.api_breaking: diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index 25568f7eb61dc..cdacded4d7f35 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -9,6 +9,7 @@ from pandas import Index, Series, DataFrame from pandas.tseries.index import date_range, bdate_range +from pandas.tseries.tdi import timedelta_range from pandas.tseries.offsets import DateOffset from pandas.tseries.period import period_range, Period, PeriodIndex from pandas.tseries.resample import DatetimeIndex @@ -1270,6 +1271,63 @@ def test_plot_outofbounds_datetime(self): values = [datetime(1677, 1, 1, 12), datetime(1677, 1, 2, 12)] self.plt.plot(values) + def test_format_timedelta_ticks_narrow(self): + + expected_labels = [ + '00:00:00.00000000{:d}'.format(i) + for i in range(10)] + + rng = timedelta_range('0', periods=10, freq='ns') + df = DataFrame(np.random.randn(len(rng), 3), rng) + ax = df.plot(fontsize=2) + fig = ax.get_figure() + fig.canvas.draw() + labels = ax.get_xticklabels() + self.assertEqual(len(labels), len(expected_labels)) + for l, l_expected in zip(labels, expected_labels): + self.assertEqual(l.get_text(), l_expected) + + def test_format_timedelta_ticks_wide(self): + + expected_labels = [ + '00:00:00', + '1 days 03:46:40', + '2 days 07:33:20', + '3 days 11:20:00', + '4 days 15:06:40', + '5 days 18:53:20', + '6 days 22:40:00', + '8 days 02:26:40', + '' + ] + + rng = timedelta_range('0', periods=10, freq='1 d') + df = DataFrame(np.random.randn(len(rng), 3), rng) + ax = df.plot(fontsize=2) + fig = ax.get_figure() + fig.canvas.draw() + labels = ax.get_xticklabels() + self.assertEqual(len(labels), len(expected_labels)) + for l, l_expected in zip(labels, expected_labels): + self.assertEqual(l.get_text(), l_expected) + + def test_timedelta_plot(self): + # test issue #8711 + s = Series(range(5), timedelta_range('1day', periods=5)) + _check_plot_works(s.plot) + + # test long period + index = timedelta_range('1 day 2 hr 30 min 10 s', + periods=10, freq='1 d') + s = Series(np.random.randn(len(index)), index) + _check_plot_works(s.plot) + + # test short period + index = timedelta_range('1 day 2 hr 30 min 10 s', + periods=10, freq='1 ns') + s = Series(np.random.randn(len(index)), index) + _check_plot_works(s.plot) + def _check_plot_works(f, freq=None, series=None, *args, **kwargs): import matplotlib.pyplot as plt diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index b2050d7d8d81e..d46c38c117445 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -1781,7 +1781,7 @@ def _ts_plot(cls, ax, x, data, style=None, **kwds): lines = cls._plot(ax, data.index, data.values, style=style, **kwds) # set date formatter, locators and rescale limits - format_dateaxis(ax, ax.freq) + format_dateaxis(ax, ax.freq, data.index) return lines def _get_stacking_id(self): diff --git a/pandas/tseries/converter.py b/pandas/tseries/converter.py index 95ff9578fa3ee..db7049ebc89b3 100644 --- a/pandas/tseries/converter.py +++ b/pandas/tseries/converter.py @@ -1000,3 +1000,33 @@ def __call__(self, x, pos=0): else: fmt = self.formatdict.pop(x, '') return Period(ordinal=int(x), freq=self.freq).strftime(fmt) + + +class TimeSeries_TimedeltaFormatter(Formatter): + """ + Formats the ticks along an axis controlled by a :class:`TimedeltaIndex`. + """ + + @staticmethod + def format_timedelta_ticks(x, pos, n_decimals): + """ + Convert seconds to 'D days HH:MM:SS.F' + """ + s, ns = divmod(x, 1e9) + m, s = divmod(s, 60) + h, m = divmod(m, 60) + d, h = divmod(h, 24) + decimals = int(ns * 10**(n_decimals - 9)) + s = r'{:02d}:{:02d}:{:02d}'.format(int(h), int(m), int(s)) + if n_decimals > 0: + s += '.{{:0{:0d}d}}'.format(n_decimals).format(decimals) + if d != 0: + s = '{:d} days '.format(int(d)) + s + return s + + def __call__(self, x, pos=0): + (vmin, vmax) = tuple(self.axis.get_view_interval()) + n_decimals = int(np.ceil(np.log10(100 * 1e9 / (vmax - vmin)))) + if n_decimals > 9: + n_decimals = 9 + return self.format_timedelta_ticks(x, pos, n_decimals) diff --git a/pandas/tseries/plotting.py b/pandas/tseries/plotting.py index 89aecf2acc07e..4eddf54701889 100644 --- a/pandas/tseries/plotting.py +++ b/pandas/tseries/plotting.py @@ -12,11 +12,14 @@ from pandas.tseries.offsets import DateOffset import pandas.tseries.frequencies as frequencies from pandas.tseries.index import DatetimeIndex +from pandas.tseries.period import PeriodIndex +from pandas.tseries.tdi import TimedeltaIndex from pandas.formats.printing import pprint_thing import pandas.compat as compat from pandas.tseries.converter import (TimeSeries_DateLocator, - TimeSeries_DateFormatter) + TimeSeries_DateFormatter, + TimeSeries_TimedeltaFormatter) # --------------------------------------------------------------------- # Plotting functions and monkey patches @@ -49,7 +52,7 @@ def tsplot(series, plotf, ax=None, **kwargs): lines = plotf(ax, series.index._mpl_repr(), series.values, **kwargs) # set date formatter, locators and rescale limits - format_dateaxis(ax, ax.freq) + format_dateaxis(ax, ax.freq, series.index) return lines @@ -278,8 +281,24 @@ def _maybe_convert_index(ax, data): # Patch methods for subplot. Only format_dateaxis is currently used. # Do we need the rest for convenience? - -def format_dateaxis(subplot, freq): +def format_timedelta_ticks(x, pos, n_decimals): + """ + Convert seconds to 'D days HH:MM:SS.F' + """ + s, ns = divmod(x, 1e9) + m, s = divmod(s, 60) + h, m = divmod(m, 60) + d, h = divmod(h, 24) + decimals = int(ns * 10**(n_decimals - 9)) + s = r'{:02d}:{:02d}:{:02d}'.format(int(h), int(m), int(s)) + if n_decimals > 0: + s += '.{{:0{:0d}d}}'.format(n_decimals).format(decimals) + if d != 0: + s = '{:d} days '.format(int(d)) + s + return s + + +def format_dateaxis(subplot, freq, index): """ Pretty-formats the date axis (x-axis). @@ -288,26 +307,38 @@ def format_dateaxis(subplot, freq): default, changing the limits of the x axis will intelligently change the positions of the ticks. """ - majlocator = TimeSeries_DateLocator(freq, dynamic_mode=True, - minor_locator=False, - plot_obj=subplot) - minlocator = TimeSeries_DateLocator(freq, dynamic_mode=True, - minor_locator=True, - plot_obj=subplot) - subplot.xaxis.set_major_locator(majlocator) - subplot.xaxis.set_minor_locator(minlocator) - - majformatter = TimeSeries_DateFormatter(freq, dynamic_mode=True, + + # handle index specific formatting + # Note: DatetimeIndex does not use this + # interface. DatetimeIndex uses matplotlib.date directly + if isinstance(index, PeriodIndex): + + majlocator = TimeSeries_DateLocator(freq, dynamic_mode=True, minor_locator=False, plot_obj=subplot) - minformatter = TimeSeries_DateFormatter(freq, dynamic_mode=True, + minlocator = TimeSeries_DateLocator(freq, dynamic_mode=True, minor_locator=True, plot_obj=subplot) - subplot.xaxis.set_major_formatter(majformatter) - subplot.xaxis.set_minor_formatter(minformatter) - - # x and y coord info - subplot.format_coord = lambda t, y: ( - "t = {0} y = {1:8f}".format(Period(ordinal=int(t), freq=freq), y)) + subplot.xaxis.set_major_locator(majlocator) + subplot.xaxis.set_minor_locator(minlocator) + + majformatter = TimeSeries_DateFormatter(freq, dynamic_mode=True, + minor_locator=False, + plot_obj=subplot) + minformatter = TimeSeries_DateFormatter(freq, dynamic_mode=True, + minor_locator=True, + plot_obj=subplot) + subplot.xaxis.set_major_formatter(majformatter) + subplot.xaxis.set_minor_formatter(minformatter) + + # x and y coord info + subplot.format_coord = lambda t, y: ( + "t = {0} y = {1:8f}".format(Period(ordinal=int(t), freq=freq), y)) + + elif isinstance(index, TimedeltaIndex): + subplot.xaxis.set_major_formatter( + TimeSeries_TimedeltaFormatter()) + else: + raise TypeError('index type not supported') pylab.draw_if_interactive() From 486e384a0525dc348ae8cfc30da3de6f1dc9c500 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 22 Feb 2017 11:04:25 -0500 Subject: [PATCH 091/353] TST: skip some timedelta plotting tests on mac (on travis) for precision display issues xref #15067 --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/tests/plotting/test_datetimelike.py | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 9124929ee5665..355756c6e605c 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -156,6 +156,7 @@ Other enhancements - ``DataFrame.to_excel()`` has a new ``freeze_panes`` parameter to turn on Freeze Panes when exporting to Excel (:issue:`15160`) - HTML table output skips ``colspan`` or ``rowspan`` attribute if equal to 1. (:issue:`15403`) - ``pd.TimedeltaIndex`` now has a custom datetick formatter specifically designed for nanosecond level precision (:issue:`8711`) + .. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations .. _whatsnew_0200.api_breaking: diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index cdacded4d7f35..673c34903b259 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -7,7 +7,7 @@ import numpy as np from pandas import Index, Series, DataFrame - +from pandas.compat import is_platform_mac from pandas.tseries.index import date_range, bdate_range from pandas.tseries.tdi import timedelta_range from pandas.tseries.offsets import DateOffset @@ -1272,6 +1272,8 @@ def test_plot_outofbounds_datetime(self): self.plt.plot(values) def test_format_timedelta_ticks_narrow(self): + if is_platform_mac(): + pytest.skip("skip on mac for precision display issue on older mpl") expected_labels = [ '00:00:00.00000000{:d}'.format(i) @@ -1288,6 +1290,8 @@ def test_format_timedelta_ticks_narrow(self): self.assertEqual(l.get_text(), l_expected) def test_format_timedelta_ticks_wide(self): + if is_platform_mac(): + pytest.skip("skip on mac for precision display issue on older mpl") expected_labels = [ '00:00:00', From 14fee4f3925994f5fd9761bd455b42b5a97b7a38 Mon Sep 17 00:00:00 2001 From: Justin Solinsky Date: Wed, 22 Feb 2017 11:21:27 -0500 Subject: [PATCH 092/353] ENH union_categoricals supports ignore_order GH13410 xref #13410 (ignore_order portion) Author: Justin Solinsky Closes #15219 from js3711/GH13410-ENHunion_categoricals and squashes the following commits: e9d00de [Justin Solinsky] GH15219 Documentation fixes based on feedback d278d62 [Justin Solinsky] ENH union_categoricals supports ignore_order GH13410 9b827ef [Justin Solinsky] ENH union_categoricals supports ignore_order GH13410 --- doc/source/categorical.rst | 11 +++++++ doc/source/whatsnew/v0.20.0.txt | 2 ++ pandas/tests/tools/test_concat.py | 54 +++++++++++++++++++++++++++++++ pandas/types/concat.py | 16 ++++++--- 4 files changed, 79 insertions(+), 4 deletions(-) diff --git a/doc/source/categorical.rst b/doc/source/categorical.rst index 18e429cfc92fa..db974922e1d76 100644 --- a/doc/source/categorical.rst +++ b/doc/source/categorical.rst @@ -693,6 +693,17 @@ The below raises ``TypeError`` because the categories are ordered and not identi Out[3]: TypeError: to union ordered Categoricals, all categories must be the same +.. versionadded:: 0.20.0 + +Ordered categoricals with different categories or orderings can be combined by +using the ``ignore_ordered=True`` argument. + +.. ipython:: python + + a = pd.Categorical(["a", "b", "c"], ordered=True) + b = pd.Categorical(["c", "b", "a"], ordered=True) + union_categoricals([a, b], ignore_order=True) + ``union_categoricals`` also works with a ``CategoricalIndex``, or ``Series`` containing categorical data, but note that the resulting array will always be a plain ``Categorical`` diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 355756c6e605c..bb5f19b301dc8 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -156,9 +156,11 @@ Other enhancements - ``DataFrame.to_excel()`` has a new ``freeze_panes`` parameter to turn on Freeze Panes when exporting to Excel (:issue:`15160`) - HTML table output skips ``colspan`` or ``rowspan`` attribute if equal to 1. (:issue:`15403`) - ``pd.TimedeltaIndex`` now has a custom datetick formatter specifically designed for nanosecond level precision (:issue:`8711`) +- ``pd.types.concat.union_categoricals`` gained the ``ignore_ordered`` argument to allow ignoring the ordered attribute of unioned categoricals (:issue:`13410`). See the :ref:`categorical union docs ` for more information. .. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations + .. _whatsnew_0200.api_breaking: diff --git a/pandas/tests/tools/test_concat.py b/pandas/tests/tools/test_concat.py index 2a28fccdc9b94..6d40de465bff8 100644 --- a/pandas/tests/tools/test_concat.py +++ b/pandas/tests/tools/test_concat.py @@ -1662,6 +1662,60 @@ def test_union_categoricals_ordered(self): with tm.assertRaisesRegexp(TypeError, msg): union_categoricals([c1, c2]) + def test_union_categoricals_ignore_order(self): + # GH 15219 + c1 = Categorical([1, 2, 3], ordered=True) + c2 = Categorical([1, 2, 3], ordered=False) + + res = union_categoricals([c1, c2], ignore_order=True) + exp = Categorical([1, 2, 3, 1, 2, 3]) + tm.assert_categorical_equal(res, exp) + + msg = 'Categorical.ordered must be the same' + with tm.assertRaisesRegexp(TypeError, msg): + union_categoricals([c1, c2], ignore_order=False) + + res = union_categoricals([c1, c1], ignore_order=True) + exp = Categorical([1, 2, 3, 1, 2, 3]) + tm.assert_categorical_equal(res, exp) + + res = union_categoricals([c1, c1], ignore_order=False) + exp = Categorical([1, 2, 3, 1, 2, 3], + categories=[1, 2, 3], ordered=True) + tm.assert_categorical_equal(res, exp) + + c1 = Categorical([1, 2, 3, np.nan], ordered=True) + c2 = Categorical([3, 2], categories=[1, 2, 3], ordered=True) + + res = union_categoricals([c1, c2], ignore_order=True) + exp = Categorical([1, 2, 3, np.nan, 3, 2]) + tm.assert_categorical_equal(res, exp) + + c1 = Categorical([1, 2, 3], ordered=True) + c2 = Categorical([1, 2, 3], categories=[3, 2, 1], ordered=True) + + res = union_categoricals([c1, c2], ignore_order=True) + exp = Categorical([1, 2, 3, 1, 2, 3]) + tm.assert_categorical_equal(res, exp) + + res = union_categoricals([c2, c1], ignore_order=True, + sort_categories=True) + exp = Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3]) + tm.assert_categorical_equal(res, exp) + + c1 = Categorical([1, 2, 3], ordered=True) + c2 = Categorical([4, 5, 6], ordered=True) + result = union_categoricals([c1, c2], ignore_order=True) + expected = Categorical([1, 2, 3, 4, 5, 6]) + tm.assert_categorical_equal(result, expected) + + msg = "to union ordered Categoricals, all categories must be the same" + with tm.assertRaisesRegexp(TypeError, msg): + union_categoricals([c1, c2], ignore_order=False) + + with tm.assertRaisesRegexp(TypeError, msg): + union_categoricals([c1, c2]) + def test_union_categoricals_sort(self): # GH 13846 c1 = Categorical(['x', 'y', 'z']) diff --git a/pandas/types/concat.py b/pandas/types/concat.py index 827eb160c452d..9e47a97dd621a 100644 --- a/pandas/types/concat.py +++ b/pandas/types/concat.py @@ -208,7 +208,7 @@ def _concat_asobject(to_concat): return _concat_asobject(to_concat) -def union_categoricals(to_union, sort_categories=False): +def union_categoricals(to_union, sort_categories=False, ignore_order=False): """ Combine list-like of Categorical-like, unioning categories. All categories must have the same dtype. @@ -222,6 +222,11 @@ def union_categoricals(to_union, sort_categories=False): sort_categories : boolean, default False If true, resulting categories will be lexsorted, otherwise they will be ordered as they appear in the data. + ignore_order: boolean, default False + If true, the ordered attribute of the Categoricals will be ignored. + Results in an unordered categorical. + + .. versionadded:: 0.20.0 Returns ------- @@ -235,7 +240,7 @@ def union_categoricals(to_union, sort_categories=False): - all inputs are ordered and their categories are not identical - sort_categories=True and Categoricals are ordered ValueError - Emmpty list of categoricals passed + Empty list of categoricals passed """ from pandas import Index, Categorical, CategoricalIndex, Series @@ -264,7 +269,7 @@ def _maybe_unwrap(x): ordered = first.ordered new_codes = np.concatenate([c.codes for c in to_union]) - if sort_categories and ordered: + if sort_categories and not ignore_order and ordered: raise TypeError("Cannot use sort_categories=True with " "ordered Categoricals") @@ -272,7 +277,7 @@ def _maybe_unwrap(x): categories = categories.sort_values() indexer = categories.get_indexer(first.categories) new_codes = take_1d(indexer, new_codes, fill_value=-1) - elif all(not c.ordered for c in to_union): + elif ignore_order or all(not c.ordered for c in to_union): # different categories - union and recode cats = first.categories.append([c.categories for c in to_union[1:]]) categories = Index(cats.unique()) @@ -297,6 +302,9 @@ def _maybe_unwrap(x): else: raise TypeError('Categorical.ordered must be the same') + if ignore_order: + ordered = False + return Categorical(new_codes, categories=categories, ordered=ordered, fastpath=True) From 9ff3e52c53660269a4dcaaca25705139e4beade4 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 22 Feb 2017 11:29:28 -0500 Subject: [PATCH 093/353] TST: break out union_categoricals to separate test file --- pandas/tests/tools/test_concat.py | 332 ----------------- pandas/tests/tools/test_union_categoricals.py | 339 ++++++++++++++++++ 2 files changed, 339 insertions(+), 332 deletions(-) create mode 100644 pandas/tests/tools/test_union_categoricals.py diff --git a/pandas/tests/tools/test_concat.py b/pandas/tests/tools/test_concat.py index 6d40de465bff8..f292aeda8cbe0 100644 --- a/pandas/tests/tools/test_concat.py +++ b/pandas/tests/tools/test_concat.py @@ -8,7 +8,6 @@ read_csv, isnull, Series, date_range, Index, Panel, MultiIndex, Timestamp, DatetimeIndex, Categorical, CategoricalIndex) -from pandas.types.concat import union_categoricals from pandas.util import testing as tm from pandas.util.testing import (assert_frame_equal, makeCustomDataframe as mkdf, @@ -1511,337 +1510,6 @@ def test_concat_keys_with_none(self): keys=['b', 'c', 'd', 'e']) tm.assert_frame_equal(result, expected) - def test_union_categorical(self): - # GH 13361 - data = [ - (list('abc'), list('abd'), list('abcabd')), - ([0, 1, 2], [2, 3, 4], [0, 1, 2, 2, 3, 4]), - ([0, 1.2, 2], [2, 3.4, 4], [0, 1.2, 2, 2, 3.4, 4]), - - (['b', 'b', np.nan, 'a'], ['a', np.nan, 'c'], - ['b', 'b', np.nan, 'a', 'a', np.nan, 'c']), - - (pd.date_range('2014-01-01', '2014-01-05'), - pd.date_range('2014-01-06', '2014-01-07'), - pd.date_range('2014-01-01', '2014-01-07')), - - (pd.date_range('2014-01-01', '2014-01-05', tz='US/Central'), - pd.date_range('2014-01-06', '2014-01-07', tz='US/Central'), - pd.date_range('2014-01-01', '2014-01-07', tz='US/Central')), - - (pd.period_range('2014-01-01', '2014-01-05'), - pd.period_range('2014-01-06', '2014-01-07'), - pd.period_range('2014-01-01', '2014-01-07')), - ] - - for a, b, combined in data: - for box in [Categorical, CategoricalIndex, Series]: - result = union_categoricals([box(Categorical(a)), - box(Categorical(b))]) - expected = Categorical(combined) - tm.assert_categorical_equal(result, expected, - check_category_order=True) - - # new categories ordered by appearance - s = Categorical(['x', 'y', 'z']) - s2 = Categorical(['a', 'b', 'c']) - result = union_categoricals([s, s2]) - expected = Categorical(['x', 'y', 'z', 'a', 'b', 'c'], - categories=['x', 'y', 'z', 'a', 'b', 'c']) - tm.assert_categorical_equal(result, expected) - - s = Categorical([0, 1.2, 2], ordered=True) - s2 = Categorical([0, 1.2, 2], ordered=True) - result = union_categoricals([s, s2]) - expected = Categorical([0, 1.2, 2, 0, 1.2, 2], ordered=True) - tm.assert_categorical_equal(result, expected) - - # must exactly match types - s = Categorical([0, 1.2, 2]) - s2 = Categorical([2, 3, 4]) - msg = 'dtype of categories must be the same' - with tm.assertRaisesRegexp(TypeError, msg): - union_categoricals([s, s2]) - - msg = 'No Categoricals to union' - with tm.assertRaisesRegexp(ValueError, msg): - union_categoricals([]) - - def test_union_categoricals_nan(self): - # GH 13759 - res = union_categoricals([pd.Categorical([1, 2, np.nan]), - pd.Categorical([3, 2, np.nan])]) - exp = Categorical([1, 2, np.nan, 3, 2, np.nan]) - tm.assert_categorical_equal(res, exp) - - res = union_categoricals([pd.Categorical(['A', 'B']), - pd.Categorical(['B', 'B', np.nan])]) - exp = Categorical(['A', 'B', 'B', 'B', np.nan]) - tm.assert_categorical_equal(res, exp) - - val1 = [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-03-01'), - pd.NaT] - val2 = [pd.NaT, pd.Timestamp('2011-01-01'), - pd.Timestamp('2011-02-01')] - - res = union_categoricals([pd.Categorical(val1), pd.Categorical(val2)]) - exp = Categorical(val1 + val2, - categories=[pd.Timestamp('2011-01-01'), - pd.Timestamp('2011-03-01'), - pd.Timestamp('2011-02-01')]) - tm.assert_categorical_equal(res, exp) - - # all NaN - res = union_categoricals([pd.Categorical([np.nan, np.nan]), - pd.Categorical(['X'])]) - exp = Categorical([np.nan, np.nan, 'X']) - tm.assert_categorical_equal(res, exp) - - res = union_categoricals([pd.Categorical([np.nan, np.nan]), - pd.Categorical([np.nan, np.nan])]) - exp = Categorical([np.nan, np.nan, np.nan, np.nan]) - tm.assert_categorical_equal(res, exp) - - def test_union_categoricals_empty(self): - # GH 13759 - res = union_categoricals([pd.Categorical([]), - pd.Categorical([])]) - exp = Categorical([]) - tm.assert_categorical_equal(res, exp) - - res = union_categoricals([pd.Categorical([]), - pd.Categorical([1.0])]) - exp = Categorical([1.0]) - tm.assert_categorical_equal(res, exp) - - # to make dtype equal - nanc = pd.Categorical(np.array([np.nan], dtype=np.float64)) - res = union_categoricals([nanc, - pd.Categorical([])]) - tm.assert_categorical_equal(res, nanc) - - def test_union_categorical_same_category(self): - # check fastpath - c1 = Categorical([1, 2, 3, 4], categories=[1, 2, 3, 4]) - c2 = Categorical([3, 2, 1, np.nan], categories=[1, 2, 3, 4]) - res = union_categoricals([c1, c2]) - exp = Categorical([1, 2, 3, 4, 3, 2, 1, np.nan], - categories=[1, 2, 3, 4]) - tm.assert_categorical_equal(res, exp) - - c1 = Categorical(['z', 'z', 'z'], categories=['x', 'y', 'z']) - c2 = Categorical(['x', 'x', 'x'], categories=['x', 'y', 'z']) - res = union_categoricals([c1, c2]) - exp = Categorical(['z', 'z', 'z', 'x', 'x', 'x'], - categories=['x', 'y', 'z']) - tm.assert_categorical_equal(res, exp) - - def test_union_categoricals_ordered(self): - c1 = Categorical([1, 2, 3], ordered=True) - c2 = Categorical([1, 2, 3], ordered=False) - - msg = 'Categorical.ordered must be the same' - with tm.assertRaisesRegexp(TypeError, msg): - union_categoricals([c1, c2]) - - res = union_categoricals([c1, c1]) - exp = Categorical([1, 2, 3, 1, 2, 3], ordered=True) - tm.assert_categorical_equal(res, exp) - - c1 = Categorical([1, 2, 3, np.nan], ordered=True) - c2 = Categorical([3, 2], categories=[1, 2, 3], ordered=True) - - res = union_categoricals([c1, c2]) - exp = Categorical([1, 2, 3, np.nan, 3, 2], ordered=True) - tm.assert_categorical_equal(res, exp) - - c1 = Categorical([1, 2, 3], ordered=True) - c2 = Categorical([1, 2, 3], categories=[3, 2, 1], ordered=True) - - msg = "to union ordered Categoricals, all categories must be the same" - with tm.assertRaisesRegexp(TypeError, msg): - union_categoricals([c1, c2]) - - def test_union_categoricals_ignore_order(self): - # GH 15219 - c1 = Categorical([1, 2, 3], ordered=True) - c2 = Categorical([1, 2, 3], ordered=False) - - res = union_categoricals([c1, c2], ignore_order=True) - exp = Categorical([1, 2, 3, 1, 2, 3]) - tm.assert_categorical_equal(res, exp) - - msg = 'Categorical.ordered must be the same' - with tm.assertRaisesRegexp(TypeError, msg): - union_categoricals([c1, c2], ignore_order=False) - - res = union_categoricals([c1, c1], ignore_order=True) - exp = Categorical([1, 2, 3, 1, 2, 3]) - tm.assert_categorical_equal(res, exp) - - res = union_categoricals([c1, c1], ignore_order=False) - exp = Categorical([1, 2, 3, 1, 2, 3], - categories=[1, 2, 3], ordered=True) - tm.assert_categorical_equal(res, exp) - - c1 = Categorical([1, 2, 3, np.nan], ordered=True) - c2 = Categorical([3, 2], categories=[1, 2, 3], ordered=True) - - res = union_categoricals([c1, c2], ignore_order=True) - exp = Categorical([1, 2, 3, np.nan, 3, 2]) - tm.assert_categorical_equal(res, exp) - - c1 = Categorical([1, 2, 3], ordered=True) - c2 = Categorical([1, 2, 3], categories=[3, 2, 1], ordered=True) - - res = union_categoricals([c1, c2], ignore_order=True) - exp = Categorical([1, 2, 3, 1, 2, 3]) - tm.assert_categorical_equal(res, exp) - - res = union_categoricals([c2, c1], ignore_order=True, - sort_categories=True) - exp = Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3]) - tm.assert_categorical_equal(res, exp) - - c1 = Categorical([1, 2, 3], ordered=True) - c2 = Categorical([4, 5, 6], ordered=True) - result = union_categoricals([c1, c2], ignore_order=True) - expected = Categorical([1, 2, 3, 4, 5, 6]) - tm.assert_categorical_equal(result, expected) - - msg = "to union ordered Categoricals, all categories must be the same" - with tm.assertRaisesRegexp(TypeError, msg): - union_categoricals([c1, c2], ignore_order=False) - - with tm.assertRaisesRegexp(TypeError, msg): - union_categoricals([c1, c2]) - - def test_union_categoricals_sort(self): - # GH 13846 - c1 = Categorical(['x', 'y', 'z']) - c2 = Categorical(['a', 'b', 'c']) - result = union_categoricals([c1, c2], sort_categories=True) - expected = Categorical(['x', 'y', 'z', 'a', 'b', 'c'], - categories=['a', 'b', 'c', 'x', 'y', 'z']) - tm.assert_categorical_equal(result, expected) - - # fastpath - c1 = Categorical(['a', 'b'], categories=['b', 'a', 'c']) - c2 = Categorical(['b', 'c'], categories=['b', 'a', 'c']) - result = union_categoricals([c1, c2], sort_categories=True) - expected = Categorical(['a', 'b', 'b', 'c'], - categories=['a', 'b', 'c']) - tm.assert_categorical_equal(result, expected) - - c1 = Categorical(['a', 'b'], categories=['c', 'a', 'b']) - c2 = Categorical(['b', 'c'], categories=['c', 'a', 'b']) - result = union_categoricals([c1, c2], sort_categories=True) - expected = Categorical(['a', 'b', 'b', 'c'], - categories=['a', 'b', 'c']) - tm.assert_categorical_equal(result, expected) - - # fastpath - skip resort - c1 = Categorical(['a', 'b'], categories=['a', 'b', 'c']) - c2 = Categorical(['b', 'c'], categories=['a', 'b', 'c']) - result = union_categoricals([c1, c2], sort_categories=True) - expected = Categorical(['a', 'b', 'b', 'c'], - categories=['a', 'b', 'c']) - tm.assert_categorical_equal(result, expected) - - c1 = Categorical(['x', np.nan]) - c2 = Categorical([np.nan, 'b']) - result = union_categoricals([c1, c2], sort_categories=True) - expected = Categorical(['x', np.nan, np.nan, 'b'], - categories=['b', 'x']) - tm.assert_categorical_equal(result, expected) - - c1 = Categorical([np.nan]) - c2 = Categorical([np.nan]) - result = union_categoricals([c1, c2], sort_categories=True) - expected = Categorical([np.nan, np.nan], categories=[]) - tm.assert_categorical_equal(result, expected) - - c1 = Categorical([]) - c2 = Categorical([]) - result = union_categoricals([c1, c2], sort_categories=True) - expected = Categorical([]) - tm.assert_categorical_equal(result, expected) - - c1 = Categorical(['b', 'a'], categories=['b', 'a', 'c'], ordered=True) - c2 = Categorical(['a', 'c'], categories=['b', 'a', 'c'], ordered=True) - with tm.assertRaises(TypeError): - union_categoricals([c1, c2], sort_categories=True) - - def test_union_categoricals_sort_false(self): - # GH 13846 - c1 = Categorical(['x', 'y', 'z']) - c2 = Categorical(['a', 'b', 'c']) - result = union_categoricals([c1, c2], sort_categories=False) - expected = Categorical(['x', 'y', 'z', 'a', 'b', 'c'], - categories=['x', 'y', 'z', 'a', 'b', 'c']) - tm.assert_categorical_equal(result, expected) - - # fastpath - c1 = Categorical(['a', 'b'], categories=['b', 'a', 'c']) - c2 = Categorical(['b', 'c'], categories=['b', 'a', 'c']) - result = union_categoricals([c1, c2], sort_categories=False) - expected = Categorical(['a', 'b', 'b', 'c'], - categories=['b', 'a', 'c']) - tm.assert_categorical_equal(result, expected) - - # fastpath - skip resort - c1 = Categorical(['a', 'b'], categories=['a', 'b', 'c']) - c2 = Categorical(['b', 'c'], categories=['a', 'b', 'c']) - result = union_categoricals([c1, c2], sort_categories=False) - expected = Categorical(['a', 'b', 'b', 'c'], - categories=['a', 'b', 'c']) - tm.assert_categorical_equal(result, expected) - - c1 = Categorical(['x', np.nan]) - c2 = Categorical([np.nan, 'b']) - result = union_categoricals([c1, c2], sort_categories=False) - expected = Categorical(['x', np.nan, np.nan, 'b'], - categories=['x', 'b']) - tm.assert_categorical_equal(result, expected) - - c1 = Categorical([np.nan]) - c2 = Categorical([np.nan]) - result = union_categoricals([c1, c2], sort_categories=False) - expected = Categorical([np.nan, np.nan], categories=[]) - tm.assert_categorical_equal(result, expected) - - c1 = Categorical([]) - c2 = Categorical([]) - result = union_categoricals([c1, c2], sort_categories=False) - expected = Categorical([]) - tm.assert_categorical_equal(result, expected) - - c1 = Categorical(['b', 'a'], categories=['b', 'a', 'c'], ordered=True) - c2 = Categorical(['a', 'c'], categories=['b', 'a', 'c'], ordered=True) - result = union_categoricals([c1, c2], sort_categories=False) - expected = Categorical(['b', 'a', 'a', 'c'], - categories=['b', 'a', 'c'], ordered=True) - tm.assert_categorical_equal(result, expected) - - def test_union_categorical_unwrap(self): - # GH 14173 - c1 = Categorical(['a', 'b']) - c2 = pd.Series(['b', 'c'], dtype='category') - result = union_categoricals([c1, c2]) - expected = Categorical(['a', 'b', 'b', 'c']) - tm.assert_categorical_equal(result, expected) - - c2 = CategoricalIndex(c2) - result = union_categoricals([c1, c2]) - tm.assert_categorical_equal(result, expected) - - c1 = Series(c1) - result = union_categoricals([c1, c2]) - tm.assert_categorical_equal(result, expected) - - with tm.assertRaises(TypeError): - union_categoricals([c1, ['a', 'b', 'c']]) - def test_concat_bug_1719(self): ts1 = tm.makeTimeSeries() ts2 = tm.makeTimeSeries()[::2] diff --git a/pandas/tests/tools/test_union_categoricals.py b/pandas/tests/tools/test_union_categoricals.py new file mode 100644 index 0000000000000..299b60f2a00b0 --- /dev/null +++ b/pandas/tests/tools/test_union_categoricals.py @@ -0,0 +1,339 @@ +import numpy as np +import pandas as pd +from pandas import Categorical, Series, CategoricalIndex +from pandas.types.concat import union_categoricals +from pandas.util import testing as tm + + +class TestUnionCategoricals(tm.TestCase): + + def test_union_categorical(self): + # GH 13361 + data = [ + (list('abc'), list('abd'), list('abcabd')), + ([0, 1, 2], [2, 3, 4], [0, 1, 2, 2, 3, 4]), + ([0, 1.2, 2], [2, 3.4, 4], [0, 1.2, 2, 2, 3.4, 4]), + + (['b', 'b', np.nan, 'a'], ['a', np.nan, 'c'], + ['b', 'b', np.nan, 'a', 'a', np.nan, 'c']), + + (pd.date_range('2014-01-01', '2014-01-05'), + pd.date_range('2014-01-06', '2014-01-07'), + pd.date_range('2014-01-01', '2014-01-07')), + + (pd.date_range('2014-01-01', '2014-01-05', tz='US/Central'), + pd.date_range('2014-01-06', '2014-01-07', tz='US/Central'), + pd.date_range('2014-01-01', '2014-01-07', tz='US/Central')), + + (pd.period_range('2014-01-01', '2014-01-05'), + pd.period_range('2014-01-06', '2014-01-07'), + pd.period_range('2014-01-01', '2014-01-07')), + ] + + for a, b, combined in data: + for box in [Categorical, CategoricalIndex, Series]: + result = union_categoricals([box(Categorical(a)), + box(Categorical(b))]) + expected = Categorical(combined) + tm.assert_categorical_equal(result, expected, + check_category_order=True) + + # new categories ordered by appearance + s = Categorical(['x', 'y', 'z']) + s2 = Categorical(['a', 'b', 'c']) + result = union_categoricals([s, s2]) + expected = Categorical(['x', 'y', 'z', 'a', 'b', 'c'], + categories=['x', 'y', 'z', 'a', 'b', 'c']) + tm.assert_categorical_equal(result, expected) + + s = Categorical([0, 1.2, 2], ordered=True) + s2 = Categorical([0, 1.2, 2], ordered=True) + result = union_categoricals([s, s2]) + expected = Categorical([0, 1.2, 2, 0, 1.2, 2], ordered=True) + tm.assert_categorical_equal(result, expected) + + # must exactly match types + s = Categorical([0, 1.2, 2]) + s2 = Categorical([2, 3, 4]) + msg = 'dtype of categories must be the same' + with tm.assertRaisesRegexp(TypeError, msg): + union_categoricals([s, s2]) + + msg = 'No Categoricals to union' + with tm.assertRaisesRegexp(ValueError, msg): + union_categoricals([]) + + def test_union_categoricals_nan(self): + # GH 13759 + res = union_categoricals([pd.Categorical([1, 2, np.nan]), + pd.Categorical([3, 2, np.nan])]) + exp = Categorical([1, 2, np.nan, 3, 2, np.nan]) + tm.assert_categorical_equal(res, exp) + + res = union_categoricals([pd.Categorical(['A', 'B']), + pd.Categorical(['B', 'B', np.nan])]) + exp = Categorical(['A', 'B', 'B', 'B', np.nan]) + tm.assert_categorical_equal(res, exp) + + val1 = [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-03-01'), + pd.NaT] + val2 = [pd.NaT, pd.Timestamp('2011-01-01'), + pd.Timestamp('2011-02-01')] + + res = union_categoricals([pd.Categorical(val1), pd.Categorical(val2)]) + exp = Categorical(val1 + val2, + categories=[pd.Timestamp('2011-01-01'), + pd.Timestamp('2011-03-01'), + pd.Timestamp('2011-02-01')]) + tm.assert_categorical_equal(res, exp) + + # all NaN + res = union_categoricals([pd.Categorical([np.nan, np.nan]), + pd.Categorical(['X'])]) + exp = Categorical([np.nan, np.nan, 'X']) + tm.assert_categorical_equal(res, exp) + + res = union_categoricals([pd.Categorical([np.nan, np.nan]), + pd.Categorical([np.nan, np.nan])]) + exp = Categorical([np.nan, np.nan, np.nan, np.nan]) + tm.assert_categorical_equal(res, exp) + + def test_union_categoricals_empty(self): + # GH 13759 + res = union_categoricals([pd.Categorical([]), + pd.Categorical([])]) + exp = Categorical([]) + tm.assert_categorical_equal(res, exp) + + res = union_categoricals([pd.Categorical([]), + pd.Categorical([1.0])]) + exp = Categorical([1.0]) + tm.assert_categorical_equal(res, exp) + + # to make dtype equal + nanc = pd.Categorical(np.array([np.nan], dtype=np.float64)) + res = union_categoricals([nanc, + pd.Categorical([])]) + tm.assert_categorical_equal(res, nanc) + + def test_union_categorical_same_category(self): + # check fastpath + c1 = Categorical([1, 2, 3, 4], categories=[1, 2, 3, 4]) + c2 = Categorical([3, 2, 1, np.nan], categories=[1, 2, 3, 4]) + res = union_categoricals([c1, c2]) + exp = Categorical([1, 2, 3, 4, 3, 2, 1, np.nan], + categories=[1, 2, 3, 4]) + tm.assert_categorical_equal(res, exp) + + c1 = Categorical(['z', 'z', 'z'], categories=['x', 'y', 'z']) + c2 = Categorical(['x', 'x', 'x'], categories=['x', 'y', 'z']) + res = union_categoricals([c1, c2]) + exp = Categorical(['z', 'z', 'z', 'x', 'x', 'x'], + categories=['x', 'y', 'z']) + tm.assert_categorical_equal(res, exp) + + def test_union_categoricals_ordered(self): + c1 = Categorical([1, 2, 3], ordered=True) + c2 = Categorical([1, 2, 3], ordered=False) + + msg = 'Categorical.ordered must be the same' + with tm.assertRaisesRegexp(TypeError, msg): + union_categoricals([c1, c2]) + + res = union_categoricals([c1, c1]) + exp = Categorical([1, 2, 3, 1, 2, 3], ordered=True) + tm.assert_categorical_equal(res, exp) + + c1 = Categorical([1, 2, 3, np.nan], ordered=True) + c2 = Categorical([3, 2], categories=[1, 2, 3], ordered=True) + + res = union_categoricals([c1, c2]) + exp = Categorical([1, 2, 3, np.nan, 3, 2], ordered=True) + tm.assert_categorical_equal(res, exp) + + c1 = Categorical([1, 2, 3], ordered=True) + c2 = Categorical([1, 2, 3], categories=[3, 2, 1], ordered=True) + + msg = "to union ordered Categoricals, all categories must be the same" + with tm.assertRaisesRegexp(TypeError, msg): + union_categoricals([c1, c2]) + + def test_union_categoricals_ignore_order(self): + # GH 15219 + c1 = Categorical([1, 2, 3], ordered=True) + c2 = Categorical([1, 2, 3], ordered=False) + + res = union_categoricals([c1, c2], ignore_order=True) + exp = Categorical([1, 2, 3, 1, 2, 3]) + tm.assert_categorical_equal(res, exp) + + msg = 'Categorical.ordered must be the same' + with tm.assertRaisesRegexp(TypeError, msg): + union_categoricals([c1, c2], ignore_order=False) + + res = union_categoricals([c1, c1], ignore_order=True) + exp = Categorical([1, 2, 3, 1, 2, 3]) + tm.assert_categorical_equal(res, exp) + + res = union_categoricals([c1, c1], ignore_order=False) + exp = Categorical([1, 2, 3, 1, 2, 3], + categories=[1, 2, 3], ordered=True) + tm.assert_categorical_equal(res, exp) + + c1 = Categorical([1, 2, 3, np.nan], ordered=True) + c2 = Categorical([3, 2], categories=[1, 2, 3], ordered=True) + + res = union_categoricals([c1, c2], ignore_order=True) + exp = Categorical([1, 2, 3, np.nan, 3, 2]) + tm.assert_categorical_equal(res, exp) + + c1 = Categorical([1, 2, 3], ordered=True) + c2 = Categorical([1, 2, 3], categories=[3, 2, 1], ordered=True) + + res = union_categoricals([c1, c2], ignore_order=True) + exp = Categorical([1, 2, 3, 1, 2, 3]) + tm.assert_categorical_equal(res, exp) + + res = union_categoricals([c2, c1], ignore_order=True, + sort_categories=True) + exp = Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3]) + tm.assert_categorical_equal(res, exp) + + c1 = Categorical([1, 2, 3], ordered=True) + c2 = Categorical([4, 5, 6], ordered=True) + result = union_categoricals([c1, c2], ignore_order=True) + expected = Categorical([1, 2, 3, 4, 5, 6]) + tm.assert_categorical_equal(result, expected) + + msg = "to union ordered Categoricals, all categories must be the same" + with tm.assertRaisesRegexp(TypeError, msg): + union_categoricals([c1, c2], ignore_order=False) + + with tm.assertRaisesRegexp(TypeError, msg): + union_categoricals([c1, c2]) + + def test_union_categoricals_sort(self): + # GH 13846 + c1 = Categorical(['x', 'y', 'z']) + c2 = Categorical(['a', 'b', 'c']) + result = union_categoricals([c1, c2], sort_categories=True) + expected = Categorical(['x', 'y', 'z', 'a', 'b', 'c'], + categories=['a', 'b', 'c', 'x', 'y', 'z']) + tm.assert_categorical_equal(result, expected) + + # fastpath + c1 = Categorical(['a', 'b'], categories=['b', 'a', 'c']) + c2 = Categorical(['b', 'c'], categories=['b', 'a', 'c']) + result = union_categoricals([c1, c2], sort_categories=True) + expected = Categorical(['a', 'b', 'b', 'c'], + categories=['a', 'b', 'c']) + tm.assert_categorical_equal(result, expected) + + c1 = Categorical(['a', 'b'], categories=['c', 'a', 'b']) + c2 = Categorical(['b', 'c'], categories=['c', 'a', 'b']) + result = union_categoricals([c1, c2], sort_categories=True) + expected = Categorical(['a', 'b', 'b', 'c'], + categories=['a', 'b', 'c']) + tm.assert_categorical_equal(result, expected) + + # fastpath - skip resort + c1 = Categorical(['a', 'b'], categories=['a', 'b', 'c']) + c2 = Categorical(['b', 'c'], categories=['a', 'b', 'c']) + result = union_categoricals([c1, c2], sort_categories=True) + expected = Categorical(['a', 'b', 'b', 'c'], + categories=['a', 'b', 'c']) + tm.assert_categorical_equal(result, expected) + + c1 = Categorical(['x', np.nan]) + c2 = Categorical([np.nan, 'b']) + result = union_categoricals([c1, c2], sort_categories=True) + expected = Categorical(['x', np.nan, np.nan, 'b'], + categories=['b', 'x']) + tm.assert_categorical_equal(result, expected) + + c1 = Categorical([np.nan]) + c2 = Categorical([np.nan]) + result = union_categoricals([c1, c2], sort_categories=True) + expected = Categorical([np.nan, np.nan], categories=[]) + tm.assert_categorical_equal(result, expected) + + c1 = Categorical([]) + c2 = Categorical([]) + result = union_categoricals([c1, c2], sort_categories=True) + expected = Categorical([]) + tm.assert_categorical_equal(result, expected) + + c1 = Categorical(['b', 'a'], categories=['b', 'a', 'c'], ordered=True) + c2 = Categorical(['a', 'c'], categories=['b', 'a', 'c'], ordered=True) + with tm.assertRaises(TypeError): + union_categoricals([c1, c2], sort_categories=True) + + def test_union_categoricals_sort_false(self): + # GH 13846 + c1 = Categorical(['x', 'y', 'z']) + c2 = Categorical(['a', 'b', 'c']) + result = union_categoricals([c1, c2], sort_categories=False) + expected = Categorical(['x', 'y', 'z', 'a', 'b', 'c'], + categories=['x', 'y', 'z', 'a', 'b', 'c']) + tm.assert_categorical_equal(result, expected) + + # fastpath + c1 = Categorical(['a', 'b'], categories=['b', 'a', 'c']) + c2 = Categorical(['b', 'c'], categories=['b', 'a', 'c']) + result = union_categoricals([c1, c2], sort_categories=False) + expected = Categorical(['a', 'b', 'b', 'c'], + categories=['b', 'a', 'c']) + tm.assert_categorical_equal(result, expected) + + # fastpath - skip resort + c1 = Categorical(['a', 'b'], categories=['a', 'b', 'c']) + c2 = Categorical(['b', 'c'], categories=['a', 'b', 'c']) + result = union_categoricals([c1, c2], sort_categories=False) + expected = Categorical(['a', 'b', 'b', 'c'], + categories=['a', 'b', 'c']) + tm.assert_categorical_equal(result, expected) + + c1 = Categorical(['x', np.nan]) + c2 = Categorical([np.nan, 'b']) + result = union_categoricals([c1, c2], sort_categories=False) + expected = Categorical(['x', np.nan, np.nan, 'b'], + categories=['x', 'b']) + tm.assert_categorical_equal(result, expected) + + c1 = Categorical([np.nan]) + c2 = Categorical([np.nan]) + result = union_categoricals([c1, c2], sort_categories=False) + expected = Categorical([np.nan, np.nan], categories=[]) + tm.assert_categorical_equal(result, expected) + + c1 = Categorical([]) + c2 = Categorical([]) + result = union_categoricals([c1, c2], sort_categories=False) + expected = Categorical([]) + tm.assert_categorical_equal(result, expected) + + c1 = Categorical(['b', 'a'], categories=['b', 'a', 'c'], ordered=True) + c2 = Categorical(['a', 'c'], categories=['b', 'a', 'c'], ordered=True) + result = union_categoricals([c1, c2], sort_categories=False) + expected = Categorical(['b', 'a', 'a', 'c'], + categories=['b', 'a', 'c'], ordered=True) + tm.assert_categorical_equal(result, expected) + + def test_union_categorical_unwrap(self): + # GH 14173 + c1 = Categorical(['a', 'b']) + c2 = pd.Series(['b', 'c'], dtype='category') + result = union_categoricals([c1, c2]) + expected = Categorical(['a', 'b', 'b', 'c']) + tm.assert_categorical_equal(result, expected) + + c2 = CategoricalIndex(c2) + result = union_categoricals([c1, c2]) + tm.assert_categorical_equal(result, expected) + + c1 = Series(c1) + result = union_categoricals([c1, c2]) + tm.assert_categorical_equal(result, expected) + + with tm.assertRaises(TypeError): + union_categoricals([c1, ['a', 'b', 'c']]) From f6385506dd668ae461581c9af564be5b98e6ff16 Mon Sep 17 00:00:00 2001 From: Kernc Date: Wed, 22 Feb 2017 13:41:16 -0500 Subject: [PATCH 094/353] BUG: Categorical.unique() preserves categories closes #13179 Author: Kernc Closes #15439 from kernc/Categorical.unique-nostrip-unused and squashes the following commits: 55733b8 [Kernc] fixup! BUG: Fix .groupby(categorical, sort=False) failing 2aec326 [Kernc] fixup! BUG: Fix .groupby(categorical, sort=False) failing c813146 [Kernc] PERF: add asv for categorical grouping 0c550e6 [Kernc] BUG: Fix .groupby(categorical, sort=False) failing --- asv_bench/benchmarks/groupby.py | 37 +++++++++++++++++++++ doc/source/whatsnew/v0.20.0.txt | 34 ++++++++++++++++++- pandas/core/categorical.py | 42 ++++++++++++++++++++++++ pandas/core/groupby.py | 18 +--------- pandas/indexes/category.py | 4 +++ pandas/tests/groupby/test_categorical.py | 24 ++++++++++++++ 6 files changed, 141 insertions(+), 18 deletions(-) diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py index 03ff62568b405..59f55914ea4d3 100644 --- a/asv_bench/benchmarks/groupby.py +++ b/asv_bench/benchmarks/groupby.py @@ -492,6 +492,43 @@ def time_groupby_sum(self): self.df.groupby(['a'])['b'].sum() +class groupby_categorical(object): + goal_time = 0.2 + + def setup(self): + N = 100000 + arr = np.random.random(N) + + self.df = DataFrame(dict( + a=Categorical(np.random.randint(10000, size=N)), + b=arr)) + self.df_ordered = DataFrame(dict( + a=Categorical(np.random.randint(10000, size=N), ordered=True), + b=arr)) + self.df_extra_cat = DataFrame(dict( + a=Categorical(np.random.randint(100, size=N), + categories=np.arange(10000)), + b=arr)) + + def time_groupby_sort(self): + self.df.groupby('a')['b'].count() + + def time_groupby_nosort(self): + self.df.groupby('a', sort=False)['b'].count() + + def time_groupby_ordered_sort(self): + self.df_ordered.groupby('a')['b'].count() + + def time_groupby_ordered_nosort(self): + self.df_ordered.groupby('a', sort=False)['b'].count() + + def time_groupby_extra_cat_sort(self): + self.df_extra_cat.groupby('a')['b'].count() + + def time_groupby_extra_cat_nosort(self): + self.df_extra_cat.groupby('a', sort=False)['b'].count() + + class groupby_period(object): # GH 14338 goal_time = 0.2 diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index bb5f19b301dc8..e65276fe51fe8 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -120,6 +120,39 @@ Notably, a new numerical index, ``UInt64Index``, has been created (:issue:`14937 - Bug in ``pd.unique()`` in which unsigned 64-bit integers were causing overflow (:issue:`14915`) - Bug in ``pd.value_counts()`` in which unsigned 64-bit integers were being erroneously truncated in the output (:issue:`14934`) +.. _whatsnew_0200.enhancements.groupy_categorical + +GroupBy on Categoricals +^^^^^^^^^^^^^^^^^^^^^^^ + +In previous versions, ``.groupby(..., sort=False)`` would fail with a ``ValueError`` when grouping on a categorical series with some categories not appearing in the data. (:issue:`13179`) + +.. ipython:: python + + chromosomes = np.r_[np.arange(1, 23).astype(str), ['X', 'Y']] + df = pd.DataFrame({ + 'A': np.random.randint(100), + 'B': np.random.randint(100), + 'C': np.random.randint(100), + 'chromosomes': pd.Categorical(np.random.choice(chromosomes, 100), + categories=chromosomes, + ordered=True)}) + df + +Previous Behavior: + +.. code-block:: ipython + + In [3]: df[df.chromosomes != '1'].groupby('chromosomes', sort=False).sum() + --------------------------------------------------------------------------- + ValueError: items in new_categories are not the same as in old categories + +New Behavior: + +.. ipython:: python + + df[df.chromosomes != '1'].groupby('chromosomes', sort=False).sum() + .. _whatsnew_0200.enhancements.other: Other enhancements @@ -163,7 +196,6 @@ Other enhancements .. _whatsnew_0200.api_breaking: - Backwards incompatible API changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 491db2e080953..b6898f11ffa74 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -602,6 +602,46 @@ def _get_categories(self): categories = property(fget=_get_categories, fset=_set_categories, doc=_categories_doc) + def _codes_for_groupby(self, sort): + """ + If sort=False, return a copy of self, coded with categories as + returned by .unique(), followed by any categories not appearing in + the data. If sort=True, return self. + + This method is needed solely to ensure the categorical index of the + GroupBy result has categories in the order of appearance in the data + (GH-8868). + + Parameters + ---------- + sort : boolean + The value of the sort paramter groupby was called with. + + Returns + ------- + Categorical + If sort=False, the new categories are set to the order of + appearance in codes (unless ordered=True, in which case the + original order is preserved), followed by any unrepresented + categories in the original order. + """ + + # Already sorted according to self.categories; all is fine + if sort: + return self + + # sort=False should order groups in as-encountered order (GH-8868) + cat = self.unique() + + # But for groupby to work, all categories should be present, + # including those missing from the data (GH-13179), which .unique() + # above dropped + cat.add_categories( + self.categories[~self.categories.isin(cat.categories)], + inplace=True) + + return self.reorder_categories(cat.categories) + _ordered = None def set_ordered(self, value, inplace=False): @@ -1853,8 +1893,10 @@ def unique(self): # unlike np.unique, unique1d does not sort unique_codes = unique1d(self.codes) cat = self.copy() + # keep nan in codes cat._codes = unique_codes + # exclude nan from indexer for categories take_codes = unique_codes[unique_codes != -1] if self.ordered: diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index ba2de295fa0a9..0b3fcba1c1ba5 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -2300,23 +2300,7 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None, # a passed Categorical elif is_categorical_dtype(self.grouper): - # must have an ordered categorical - if self.sort: - if not self.grouper.ordered: - - # technically we cannot group on an unordered - # Categorical - # but this a user convenience to do so; the ordering - # is preserved and if it's a reduction it doesn't make - # any difference - pass - - # fix bug #GH8868 sort=False being ignored in categorical - # groupby - else: - cat = self.grouper.unique() - self.grouper = self.grouper.reorder_categories( - cat.categories) + self.grouper = self.grouper._codes_for_groupby(self.sort) # we make a CategoricalIndex out of the cat grouper # preserving the categories / ordered attributes diff --git a/pandas/indexes/category.py b/pandas/indexes/category.py index acb2758641a62..5299a094156cd 100644 --- a/pandas/indexes/category.py +++ b/pandas/indexes/category.py @@ -550,6 +550,10 @@ def _append_same_dtype(self, to_concat, name): result.name = name return result + def _codes_for_groupby(self, sort): + """ Return a Categorical adjusted for groupby """ + return self.values._codes_for_groupby(sort) + @classmethod def _add_comparison_methods(cls): """ add in comparison methods """ diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index eebd0e0f490c1..cfcb531bedab8 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -284,6 +284,30 @@ def test_groupby_multi_categorical_as_index(self): tm.assert_frame_equal(result, expected, check_index_type=True) + def test_groupby_preserve_categories(self): + # GH-13179 + categories = list('abc') + + # ordered=True + df = DataFrame({'A': pd.Categorical(list('ba'), + categories=categories, + ordered=True)}) + index = pd.CategoricalIndex(categories, categories, ordered=True) + tm.assert_index_equal(df.groupby('A', sort=True).first().index, index) + tm.assert_index_equal(df.groupby('A', sort=False).first().index, index) + + # ordered=False + df = DataFrame({'A': pd.Categorical(list('ba'), + categories=categories, + ordered=False)}) + sort_index = pd.CategoricalIndex(categories, categories, ordered=False) + nosort_index = pd.CategoricalIndex(list('bac'), list('bac'), + ordered=False) + tm.assert_index_equal(df.groupby('A', sort=True).first().index, + sort_index) + tm.assert_index_equal(df.groupby('A', sort=False).first().index, + nosort_index) + def test_groupby_preserve_categorical_dtype(self): # GH13743, GH13854 df = DataFrame({'A': [1, 2, 1, 1, 2], From f4edb053e17e51e8c2bed7c16755c4f7f3222117 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 22 Feb 2017 13:52:46 -0500 Subject: [PATCH 095/353] PEP: pep issue in pandas/tests/tools/test_concat.py --- pandas/tests/tools/test_concat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/tools/test_concat.py b/pandas/tests/tools/test_concat.py index f292aeda8cbe0..a2b5773f551c9 100644 --- a/pandas/tests/tools/test_concat.py +++ b/pandas/tests/tools/test_concat.py @@ -7,7 +7,7 @@ from pandas import (DataFrame, concat, read_csv, isnull, Series, date_range, Index, Panel, MultiIndex, Timestamp, - DatetimeIndex, Categorical, CategoricalIndex) + DatetimeIndex) from pandas.util import testing as tm from pandas.util.testing import (assert_frame_equal, makeCustomDataframe as mkdf, From 03eca9dad6c911d7df12377839e8eb3bb6028d98 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 23 Feb 2017 08:19:24 -0500 Subject: [PATCH 096/353] CI: use correct circleci badge --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 195b76f64b37f..7bc350d1c6675 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,7 @@ - circleci build status + circleci build status
... """ self.table_attributes = attributes return self From 3f91d5a764f019f017fd7f0268d75fd6001b208f Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 2 Mar 2017 03:39:34 -0500 Subject: [PATCH 126/353] TST: split tests/format/test_format.py (#15546) closes #15531 --- pandas/tests/formats/test_eng_formatting.py | 195 ++ pandas/tests/formats/test_format.py | 2610 +------------------ pandas/tests/formats/test_to_csv.py | 216 ++ pandas/tests/formats/test_to_html.py | 1861 +++++++++++++ pandas/tests/formats/test_to_latex.py | 351 +++ 5 files changed, 2653 insertions(+), 2580 deletions(-) create mode 100644 pandas/tests/formats/test_eng_formatting.py create mode 100644 pandas/tests/formats/test_to_csv.py create mode 100644 pandas/tests/formats/test_to_html.py create mode 100644 pandas/tests/formats/test_to_latex.py diff --git a/pandas/tests/formats/test_eng_formatting.py b/pandas/tests/formats/test_eng_formatting.py new file mode 100644 index 0000000000000..d2badd4fc160a --- /dev/null +++ b/pandas/tests/formats/test_eng_formatting.py @@ -0,0 +1,195 @@ +import numpy as np +import pandas as pd +from pandas import DataFrame +from pandas.compat import u +import pandas.formats.format as fmt +from pandas.util import testing as tm + + +class TestEngFormatter(tm.TestCase): + + def test_eng_float_formatter(self): + df = DataFrame({'A': [1.41, 141., 14100, 1410000.]}) + + fmt.set_eng_float_format() + result = df.to_string() + expected = (' A\n' + '0 1.410E+00\n' + '1 141.000E+00\n' + '2 14.100E+03\n' + '3 1.410E+06') + self.assertEqual(result, expected) + + fmt.set_eng_float_format(use_eng_prefix=True) + result = df.to_string() + expected = (' A\n' + '0 1.410\n' + '1 141.000\n' + '2 14.100k\n' + '3 1.410M') + self.assertEqual(result, expected) + + fmt.set_eng_float_format(accuracy=0) + result = df.to_string() + expected = (' A\n' + '0 1E+00\n' + '1 141E+00\n' + '2 14E+03\n' + '3 1E+06') + self.assertEqual(result, expected) + + self.reset_display_options() + + def compare(self, formatter, input, output): + formatted_input = formatter(input) + msg = ("formatting of %s results in '%s', expected '%s'" % + (str(input), formatted_input, output)) + self.assertEqual(formatted_input, output, msg) + + def compare_all(self, formatter, in_out): + """ + Parameters: + ----------- + formatter: EngFormatter under test + in_out: list of tuples. Each tuple = (number, expected_formatting) + + It is tested if 'formatter(number) == expected_formatting'. + *number* should be >= 0 because formatter(-number) == fmt is also + tested. *fmt* is derived from *expected_formatting* + """ + for input, output in in_out: + self.compare(formatter, input, output) + self.compare(formatter, -input, "-" + output[1:]) + + def test_exponents_with_eng_prefix(self): + formatter = fmt.EngFormatter(accuracy=3, use_eng_prefix=True) + f = np.sqrt(2) + in_out = [ + (f * 10 ** -24, " 1.414y"), (f * 10 ** -23, " 14.142y"), + (f * 10 ** -22, " 141.421y"), (f * 10 ** -21, " 1.414z"), + (f * 10 ** -20, " 14.142z"), (f * 10 ** -19, " 141.421z"), + (f * 10 ** -18, " 1.414a"), (f * 10 ** -17, " 14.142a"), + (f * 10 ** -16, " 141.421a"), (f * 10 ** -15, " 1.414f"), + (f * 10 ** -14, " 14.142f"), (f * 10 ** -13, " 141.421f"), + (f * 10 ** -12, " 1.414p"), (f * 10 ** -11, " 14.142p"), + (f * 10 ** -10, " 141.421p"), (f * 10 ** -9, " 1.414n"), + (f * 10 ** -8, " 14.142n"), (f * 10 ** -7, " 141.421n"), + (f * 10 ** -6, " 1.414u"), (f * 10 ** -5, " 14.142u"), + (f * 10 ** -4, " 141.421u"), (f * 10 ** -3, " 1.414m"), + (f * 10 ** -2, " 14.142m"), (f * 10 ** -1, " 141.421m"), + (f * 10 ** 0, " 1.414"), (f * 10 ** 1, " 14.142"), + (f * 10 ** 2, " 141.421"), (f * 10 ** 3, " 1.414k"), + (f * 10 ** 4, " 14.142k"), (f * 10 ** 5, " 141.421k"), + (f * 10 ** 6, " 1.414M"), (f * 10 ** 7, " 14.142M"), + (f * 10 ** 8, " 141.421M"), (f * 10 ** 9, " 1.414G"), + (f * 10 ** 10, " 14.142G"), (f * 10 ** 11, " 141.421G"), + (f * 10 ** 12, " 1.414T"), (f * 10 ** 13, " 14.142T"), + (f * 10 ** 14, " 141.421T"), (f * 10 ** 15, " 1.414P"), + (f * 10 ** 16, " 14.142P"), (f * 10 ** 17, " 141.421P"), + (f * 10 ** 18, " 1.414E"), (f * 10 ** 19, " 14.142E"), + (f * 10 ** 20, " 141.421E"), (f * 10 ** 21, " 1.414Z"), + (f * 10 ** 22, " 14.142Z"), (f * 10 ** 23, " 141.421Z"), + (f * 10 ** 24, " 1.414Y"), (f * 10 ** 25, " 14.142Y"), + (f * 10 ** 26, " 141.421Y")] + self.compare_all(formatter, in_out) + + def test_exponents_without_eng_prefix(self): + formatter = fmt.EngFormatter(accuracy=4, use_eng_prefix=False) + f = np.pi + in_out = [ + (f * 10 ** -24, " 3.1416E-24"), + (f * 10 ** -23, " 31.4159E-24"), + (f * 10 ** -22, " 314.1593E-24"), + (f * 10 ** -21, " 3.1416E-21"), + (f * 10 ** -20, " 31.4159E-21"), + (f * 10 ** -19, " 314.1593E-21"), + (f * 10 ** -18, " 3.1416E-18"), + (f * 10 ** -17, " 31.4159E-18"), + (f * 10 ** -16, " 314.1593E-18"), + (f * 10 ** -15, " 3.1416E-15"), + (f * 10 ** -14, " 31.4159E-15"), + (f * 10 ** -13, " 314.1593E-15"), + (f * 10 ** -12, " 3.1416E-12"), + (f * 10 ** -11, " 31.4159E-12"), + (f * 10 ** -10, " 314.1593E-12"), + (f * 10 ** -9, " 3.1416E-09"), + (f * 10 ** -8, " 31.4159E-09"), + (f * 10 ** -7, " 314.1593E-09"), + (f * 10 ** -6, " 3.1416E-06"), + (f * 10 ** -5, " 31.4159E-06"), + (f * 10 ** -4, " 314.1593E-06"), + (f * 10 ** -3, " 3.1416E-03"), + (f * 10 ** -2, " 31.4159E-03"), + (f * 10 ** -1, " 314.1593E-03"), + (f * 10 ** 0, " 3.1416E+00"), + (f * 10 ** 1, " 31.4159E+00"), + (f * 10 ** 2, " 314.1593E+00"), + (f * 10 ** 3, " 3.1416E+03"), + (f * 10 ** 4, " 31.4159E+03"), + (f * 10 ** 5, " 314.1593E+03"), + (f * 10 ** 6, " 3.1416E+06"), + (f * 10 ** 7, " 31.4159E+06"), + (f * 10 ** 8, " 314.1593E+06"), + (f * 10 ** 9, " 3.1416E+09"), + (f * 10 ** 10, " 31.4159E+09"), + (f * 10 ** 11, " 314.1593E+09"), + (f * 10 ** 12, " 3.1416E+12"), + (f * 10 ** 13, " 31.4159E+12"), + (f * 10 ** 14, " 314.1593E+12"), + (f * 10 ** 15, " 3.1416E+15"), + (f * 10 ** 16, " 31.4159E+15"), + (f * 10 ** 17, " 314.1593E+15"), + (f * 10 ** 18, " 3.1416E+18"), + (f * 10 ** 19, " 31.4159E+18"), + (f * 10 ** 20, " 314.1593E+18"), + (f * 10 ** 21, " 3.1416E+21"), + (f * 10 ** 22, " 31.4159E+21"), + (f * 10 ** 23, " 314.1593E+21"), + (f * 10 ** 24, " 3.1416E+24"), + (f * 10 ** 25, " 31.4159E+24"), + (f * 10 ** 26, " 314.1593E+24")] + self.compare_all(formatter, in_out) + + def test_rounding(self): + formatter = fmt.EngFormatter(accuracy=3, use_eng_prefix=True) + in_out = [(5.55555, ' 5.556'), (55.5555, ' 55.556'), + (555.555, ' 555.555'), (5555.55, ' 5.556k'), + (55555.5, ' 55.556k'), (555555, ' 555.555k')] + self.compare_all(formatter, in_out) + + formatter = fmt.EngFormatter(accuracy=1, use_eng_prefix=True) + in_out = [(5.55555, ' 5.6'), (55.5555, ' 55.6'), (555.555, ' 555.6'), + (5555.55, ' 5.6k'), (55555.5, ' 55.6k'), (555555, ' 555.6k')] + self.compare_all(formatter, in_out) + + formatter = fmt.EngFormatter(accuracy=0, use_eng_prefix=True) + in_out = [(5.55555, ' 6'), (55.5555, ' 56'), (555.555, ' 556'), + (5555.55, ' 6k'), (55555.5, ' 56k'), (555555, ' 556k')] + self.compare_all(formatter, in_out) + + formatter = fmt.EngFormatter(accuracy=3, use_eng_prefix=True) + result = formatter(0) + self.assertEqual(result, u(' 0.000')) + + def test_nan(self): + # Issue #11981 + + formatter = fmt.EngFormatter(accuracy=1, use_eng_prefix=True) + result = formatter(np.nan) + self.assertEqual(result, u('NaN')) + + df = pd.DataFrame({'a': [1.5, 10.3, 20.5], + 'b': [50.3, 60.67, 70.12], + 'c': [100.2, 101.33, 120.33]}) + pt = df.pivot_table(values='a', index='b', columns='c') + fmt.set_eng_float_format(accuracy=1) + result = pt.to_string() + self.assertTrue('NaN' in result) + self.reset_display_options() + + def test_inf(self): + # Issue #11981 + + formatter = fmt.EngFormatter(accuracy=1, use_eng_prefix=True) + result = formatter(np.inf) + self.assertEqual(result, u('inf')) diff --git a/pandas/tests/formats/test_format.py b/pandas/tests/formats/test_format.py index 476c6a636ae5a..ddf9d35841ce7 100644 --- a/pandas/tests/formats/test_format.py +++ b/pandas/tests/formats/test_format.py @@ -1,50 +1,41 @@ # -*- coding: utf-8 -*- +""" +test output formatting for Series/DataFrame +including to_string & reprs +""" + # TODO(wesm): lots of issues making flake8 hard # flake8: noqa from __future__ import print_function -from distutils.version import LooseVersion import re -from pandas.compat import (range, zip, lrange, StringIO, PY3, - u, lzip, is_platform_windows, - is_platform_32bit) -import pandas.compat as compat import itertools from operator import methodcaller import os import sys -from textwrap import dedent import warnings +from datetime import datetime -from numpy import nan -from numpy.random import randn -import numpy as np - -import codecs - -div_style = '' -try: - import IPython - if IPython.__version__ < LooseVersion('3.0.0'): - div_style = ' style="max-width:1500px;overflow:auto;"' -except (ImportError, AttributeError): - pass +import pytest -from pandas import DataFrame, Series, Index, Timestamp, MultiIndex, date_range, NaT +import numpy as np +import pandas as pd +from pandas import (DataFrame, Series, Index, Timestamp, MultiIndex, + date_range, NaT, read_table) +from pandas.compat import (range, zip, lrange, StringIO, PY3, + u, lzip, is_platform_windows, + is_platform_32bit) +import pandas.compat as compat import pandas.formats.format as fmt -import pandas.util.testing as tm -import pandas.core.common as com import pandas.formats.printing as printing + +import pandas.util.testing as tm from pandas.util.terminal import get_terminal_size -import pandas as pd from pandas.core.config import (set_option, get_option, option_context, reset_option) -from datetime import datetime - -import pytest use_32bit_repr = is_platform_windows() or is_platform_32bit() @@ -288,7 +279,7 @@ def test_repr_max_columns_max_rows(self): term_width, term_height = get_terminal_size() if term_width < 10 or term_height < 10: pytest.skip("terminal size too small, " - "{0} x {1}".format(term_width, term_height)) + "{0} x {1}".format(term_width, term_height)) def mkframe(n): index = ['%05d' % i for i in range(n)] @@ -829,1393 +820,6 @@ def test_datetimelike_frame(self): '[10 rows x 2 columns]') self.assertEqual(repr(df), expected) - def test_to_html_with_col_space(self): - def check_with_width(df, col_space): - import re - # check that col_space affects HTML generation - # and be very brittle about it. - html = df.to_html(col_space=col_space) - hdrs = [x for x in html.split(r"\n") if re.search(r"\s]", x)] - self.assertTrue(len(hdrs) > 0) - for h in hdrs: - self.assertTrue("min-width" in h) - self.assertTrue(str(col_space) in h) - - df = DataFrame(np.random.random(size=(1, 3))) - - check_with_width(df, 30) - check_with_width(df, 50) - - def test_to_html_with_empty_string_label(self): - # GH3547, to_html regards empty string labels as repeated labels - data = {'c1': ['a', 'b'], 'c2': ['a', ''], 'data': [1, 2]} - df = DataFrame(data).set_index(['c1', 'c2']) - res = df.to_html() - self.assertTrue("rowspan" not in res) - - def test_to_html_unicode(self): - df = DataFrame({u('\u03c3'): np.arange(10.)}) - expected = u'
\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
\u03c3
00.0
11.0
22.0
33.0
44.0
55.0
66.0
77.0
88.0
99.0
' - self.assertEqual(df.to_html(), expected) - df = DataFrame({'A': [u('\u03c3')]}) - expected = u'\n \n \n \n \n \n \n \n \n \n \n \n \n
A
0\u03c3
' - self.assertEqual(df.to_html(), expected) - - def test_to_html_decimal(self): - # GH 12031 - df = DataFrame({'A': [6.0, 3.1, 2.2]}) - result = df.to_html(decimal=',') - expected = ('\n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - '
A
06,0
13,1
22,2
') - self.assertEqual(result, expected) - - def test_to_html_escaped(self): - a = 'str", - b: ""}, - 'co>l2': {a: "", - b: ""}} - rs = DataFrame(test_dict).to_html() - xp = """ - - - - - - - - - - - - - - - - - - - -
co<l1co>l2
str<ing1 &amp;<type 'str'><type 'str'>
stri>ng2 &amp;<type 'str'><type 'str'>
""" - - self.assertEqual(xp, rs) - - def test_to_html_escape_disabled(self): - a = 'strbold", - b: "bold"}, - 'co>l2': {a: "bold", - b: "bold"}} - rs = DataFrame(test_dict).to_html(escape=False) - xp = """ - - - - - - - - - - - - - - - - - -
co - co>l2
str - boldbold
stri>ng2 &boldbold
""" - - self.assertEqual(xp, rs) - - def test_to_html_multiindex_index_false(self): - # issue 8452 - df = DataFrame({ - 'a': range(2), - 'b': range(3, 5), - 'c': range(5, 7), - 'd': range(3, 5) - }) - df.columns = MultiIndex.from_product([['a', 'b'], ['c', 'd']]) - result = df.to_html(index=False) - expected = """\ - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ab
cdcd
0353
1464
""" - - self.assertEqual(result, expected) - - df.index = Index(df.index.values, name='idx') - result = df.to_html(index=False) - self.assertEqual(result, expected) - - def test_to_html_multiindex_sparsify_false_multi_sparse(self): - with option_context('display.multi_sparse', False): - index = MultiIndex.from_arrays([[0, 0, 1, 1], [0, 1, 0, 1]], - names=['foo', None]) - - df = DataFrame([[0, 1], [2, 3], [4, 5], [6, 7]], index=index) - - result = df.to_html() - expected = """\ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
01
foo
0001
0123
1045
1167
""" - - self.assertEqual(result, expected) - - df = DataFrame([[0, 1], [2, 3], [4, 5], [6, 7]], - columns=index[::2], index=index) - - result = df.to_html() - expected = """\ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
foo01
00
foo
0001
0123
1045
1167
""" - - self.assertEqual(result, expected) - - def test_to_html_multiindex_sparsify(self): - index = MultiIndex.from_arrays([[0, 0, 1, 1], [0, 1, 0, 1]], - names=['foo', None]) - - df = DataFrame([[0, 1], [2, 3], [4, 5], [6, 7]], index=index) - - result = df.to_html() - expected = """ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
01
foo
0001
123
1045
167
""" - - self.assertEqual(result, expected) - - df = DataFrame([[0, 1], [2, 3], [4, 5], [6, 7]], columns=index[::2], - index=index) - - result = df.to_html() - expected = """\ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
foo01
00
foo
0001
123
1045
167
""" - - self.assertEqual(result, expected) - - def test_to_html_multiindex_odd_even_truncate(self): - # GH 14882 - Issue on truncation with odd length DataFrame - mi = MultiIndex.from_product([[100, 200, 300], - [10, 20, 30], - [1, 2, 3, 4, 5, 6, 7]], - names=['a', 'b', 'c']) - df = DataFrame({'n': range(len(mi))}, index=mi) - result = df.to_html(max_rows=60) - expected = """\ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
n
abc
1001010
21
32
43
54
65
76
2017
28
39
410
511
612
713
30114
215
316
417
518
619
720
20010121
222
323
424
525
626
727
20128
229
......
633
734
30135
236
337
438
539
640
741
30010142
243
344
445
546
647
748
20149
250
351
452
553
654
755
30156
257
358
459
560
661
762
""" - self.assertEqual(result, expected) - - # Test that ... appears in a middle level - result = df.to_html(max_rows=56) - expected = """\ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
n
abc
1001010
21
32
43
54
65
76
2017
28
39
410
511
612
713
30114
215
316
417
518
619
720
20010121
222
323
424
525
626
727
.........
30135
236
337
438
539
640
741
30010142
243
344
445
546
647
748
20149
250
351
452
553
654
755
30156
257
358
459
560
661
762
""" - self.assertEqual(result, expected) - - def test_to_html_index_formatter(self): - df = DataFrame([[0, 1], [2, 3], [4, 5], [6, 7]], columns=['foo', None], - index=lrange(4)) - - f = lambda x: 'abcd' [x] - result = df.to_html(formatters={'__index__': f}) - expected = """\ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
fooNone
a01
b23
c45
d67
""" - - self.assertEqual(result, expected) - - def test_to_html_datetime64_monthformatter(self): - months = [datetime(2016, 1, 1), datetime(2016, 2, 2)] - x = DataFrame({'months': months}) - - def format_func(x): - return x.strftime('%Y-%m') - result = x.to_html(formatters={'months': format_func}) - expected = """\ - - - - - - - - - - - - - - - - - -
months
02016-01
12016-02
""" - self.assertEqual(result, expected) - - def test_to_html_datetime64_hourformatter(self): - - x = DataFrame({'hod': pd.to_datetime(['10:10:10.100', '12:12:12.120'], - format='%H:%M:%S.%f')}) - - def format_func(x): - return x.strftime('%H:%M') - result = x.to_html(formatters={'hod': format_func}) - expected = """\ - - - - - - - - - - - - - - - - - -
hod
010:10
112:12
""" - self.assertEqual(result, expected) - - def test_to_html_regression_GH6098(self): - df = DataFrame({u('clé1'): [u('a'), u('a'), u('b'), u('b'), u('a')], - u('clé2'): [u('1er'), u('2ème'), u('1er'), u('2ème'), - u('1er')], - 'données1': np.random.randn(5), - 'données2': np.random.randn(5)}) - # it works - df.pivot_table(index=[u('clé1')], columns=[u('clé2')])._repr_html_() - - def test_to_html_truncate(self): - pytest.skip("unreliable on travis") - index = pd.DatetimeIndex(start='20010101', freq='D', periods=20) - df = DataFrame(index=index, columns=range(20)) - fmt.set_option('display.max_rows', 8) - fmt.set_option('display.max_columns', 4) - result = df._repr_html_() - expected = '''\ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
01...1819
2001-01-01NaNNaN...NaNNaN
2001-01-02NaNNaN...NaNNaN
2001-01-03NaNNaN...NaNNaN
2001-01-04NaNNaN...NaNNaN
..................
2001-01-17NaNNaN...NaNNaN
2001-01-18NaNNaN...NaNNaN
2001-01-19NaNNaN...NaNNaN
2001-01-20NaNNaN...NaNNaN
-

20 rows × 20 columns

-'''.format(div_style) - if compat.PY2: - expected = expected.decode('utf-8') - self.assertEqual(result, expected) - - def test_to_html_truncate_multi_index(self): - pytest.skip("unreliable on travis") - arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], - ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] - df = DataFrame(index=arrays, columns=arrays) - fmt.set_option('display.max_rows', 7) - fmt.set_option('display.max_columns', 7) - result = df._repr_html_() - expected = '''\ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
barbaz...fooqux
onetwoone...twoonetwo
baroneNaNNaNNaN...NaNNaNNaN
twoNaNNaNNaN...NaNNaNNaN
bazoneNaNNaNNaN...NaNNaNNaN
...........................
footwoNaNNaNNaN...NaNNaNNaN
quxoneNaNNaNNaN...NaNNaNNaN
twoNaNNaNNaN...NaNNaNNaN
-

8 rows × 8 columns

-'''.format(div_style) - if compat.PY2: - expected = expected.decode('utf-8') - self.assertEqual(result, expected) - - def test_to_html_truncate_multi_index_sparse_off(self): - pytest.skip("unreliable on travis") - arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], - ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] - df = DataFrame(index=arrays, columns=arrays) - fmt.set_option('display.max_rows', 7) - fmt.set_option('display.max_columns', 7) - fmt.set_option('display.multi_sparse', False) - result = df._repr_html_() - expected = '''\ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
barbarbaz...fooquxqux
onetwoone...twoonetwo
baroneNaNNaNNaN...NaNNaNNaN
bartwoNaNNaNNaN...NaNNaNNaN
bazoneNaNNaNNaN...NaNNaNNaN
footwoNaNNaNNaN...NaNNaNNaN
quxoneNaNNaNNaN...NaNNaNNaN
quxtwoNaNNaNNaN...NaNNaNNaN
-

8 rows × 8 columns

-'''.format(div_style) - if compat.PY2: - expected = expected.decode('utf-8') - self.assertEqual(result, expected) - - def test_to_html_border(self): - df = DataFrame({'A': [1, 2]}) - result = df.to_html() - assert 'border="1"' in result - - def test_to_html_border_option(self): - df = DataFrame({'A': [1, 2]}) - with pd.option_context('html.border', 0): - result = df.to_html() - self.assertTrue('border="0"' in result) - self.assertTrue('border="0"' in df._repr_html_()) - - def test_to_html_border_zero(self): - df = DataFrame({'A': [1, 2]}) - result = df.to_html(border=0) - self.assertTrue('border="0"' in result) - def test_nonunicode_nonascii_alignment(self): df = DataFrame([["aa\xc3\xa4\xc3\xa4", 1], ["bbbb", 2]]) rep_str = df.to_string() @@ -2223,7 +827,7 @@ def test_nonunicode_nonascii_alignment(self): self.assertEqual(len(lines[1]), len(lines[2])) def test_unicode_problem_decoding_as_ascii(self): - dm = DataFrame({u('c/\u03c3'): Series({'test': np.NaN})}) + dm = DataFrame({u('c/\u03c3'): Series({'test': np.nan})}) compat.text_type(dm.to_string()) def test_string_repr_encoding(self): @@ -2271,7 +875,7 @@ def test_pprint_thing(self): # escape embedded tabs in string # GH #2038 - self.assertTrue(not "\t" in pp_t("a\tb", escape_chars=("\t", ))) + assert "\t" not in pp_t("a\tb", escape_chars=("\t", )) def test_wide_repr(self): with option_context('mode.sim_interactive', True, @@ -2294,7 +898,8 @@ def test_wide_repr(self): def test_wide_repr_wide_columns(self): with option_context('mode.sim_interactive', True): - df = DataFrame(randn(5, 3), columns=['a' * 90, 'b' * 90, 'c' * 90]) + df = DataFrame(np.random.randn(5, 3), + columns=['a' * 90, 'b' * 90, 'c' * 90]) rep_str = repr(df) self.assertEqual(len(rep_str.splitlines()), 20) @@ -2346,8 +951,8 @@ def test_wide_repr_multiindex_cols(self): with option_context('mode.sim_interactive', True): max_cols = get_option('display.max_columns') midx = MultiIndex.from_arrays(tm.rands_array(5, size=(2, 10))) - mcols = MultiIndex.from_arrays(tm.rands_array(3, size=(2, max_cols - - 1))) + mcols = MultiIndex.from_arrays( + tm.rands_array(3, size=(2, max_cols - 1))) df = DataFrame(tm.rands_array(25, (10, max_cols - 1)), index=midx, columns=mcols) df.index.names = ['Level 0', 'Level 1'] @@ -2465,16 +1070,14 @@ def test_index_with_nan(self): self.assertEqual(result, expected) def test_to_string(self): - from pandas import read_table - import re # big mixed - biggie = DataFrame({'A': randn(200), + biggie = DataFrame({'A': np.random.randn(200), 'B': tm.makeStringIndex(200)}, index=lrange(200)) - biggie.loc[:20, 'A'] = nan - biggie.loc[:20, 'B'] = nan + biggie.loc[:20, 'A'] = np.nan + biggie.loc[:20, 'B'] = np.nan s = biggie.to_string() buf = StringIO() @@ -2713,414 +1316,6 @@ def test_show_dimensions(self): self.assertFalse('5 rows' in str(df)) self.assertFalse('5 rows' in df._repr_html_()) - def test_to_html(self): - # big mixed - biggie = DataFrame({'A': randn(200), - 'B': tm.makeStringIndex(200)}, - index=lrange(200)) - - biggie.loc[:20, 'A'] = nan - biggie.loc[:20, 'B'] = nan - s = biggie.to_html() - - buf = StringIO() - retval = biggie.to_html(buf=buf) - self.assertIsNone(retval) - self.assertEqual(buf.getvalue(), s) - - tm.assertIsInstance(s, compat.string_types) - - biggie.to_html(columns=['B', 'A'], col_space=17) - biggie.to_html(columns=['B', 'A'], - formatters={'A': lambda x: '%.1f' % x}) - - biggie.to_html(columns=['B', 'A'], float_format=str) - biggie.to_html(columns=['B', 'A'], col_space=12, float_format=str) - - frame = DataFrame(index=np.arange(200)) - frame.to_html() - - def test_to_html_filename(self): - biggie = DataFrame({'A': randn(200), - 'B': tm.makeStringIndex(200)}, - index=lrange(200)) - - biggie.loc[:20, 'A'] = nan - biggie.loc[:20, 'B'] = nan - with tm.ensure_clean('test.html') as path: - biggie.to_html(path) - with open(path, 'r') as f: - s = biggie.to_html() - s2 = f.read() - self.assertEqual(s, s2) - - frame = DataFrame(index=np.arange(200)) - with tm.ensure_clean('test.html') as path: - frame.to_html(path) - with open(path, 'r') as f: - self.assertEqual(frame.to_html(), f.read()) - - def test_to_html_with_no_bold(self): - x = DataFrame({'x': randn(5)}) - ashtml = x.to_html(bold_rows=False) - self.assertFalse('")]) - - def test_to_html_columns_arg(self): - result = self.frame.to_html(columns=['A']) - self.assertNotIn('B', result) - - def test_to_html_multiindex(self): - columns = MultiIndex.from_tuples(list(zip(np.arange(2).repeat(2), - np.mod(lrange(4), 2))), - names=['CL0', 'CL1']) - df = DataFrame([list('abcd'), list('efgh')], columns=columns) - result = df.to_html(justify='left') - expected = ('\n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - '
CL001
CL10101
0abcd
1efgh
') - - self.assertEqual(result, expected) - - columns = MultiIndex.from_tuples(list(zip( - range(4), np.mod( - lrange(4), 2)))) - df = DataFrame([list('abcd'), list('efgh')], columns=columns) - - result = df.to_html(justify='right') - expected = ('\n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - '
0123
0101
0abcd
1efgh
') - - self.assertEqual(result, expected) - - def test_to_html_justify(self): - df = DataFrame({'A': [6, 30000, 2], - 'B': [1, 2, 70000], - 'C': [223442, 0, 1]}, - columns=['A', 'B', 'C']) - result = df.to_html(justify='left') - expected = ('\n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - '
ABC
061223442
13000020
22700001
') - self.assertEqual(result, expected) - - result = df.to_html(justify='right') - expected = ('\n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - '
ABC
061223442
13000020
22700001
') - self.assertEqual(result, expected) - - def test_to_html_index(self): - index = ['foo', 'bar', 'baz'] - df = DataFrame({'A': [1, 2, 3], - 'B': [1.2, 3.4, 5.6], - 'C': ['one', 'two', np.NaN]}, - columns=['A', 'B', 'C'], - index=index) - expected_with_index = ('\n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - '
ABC
foo11.2one
bar23.4two
baz35.6NaN
') - self.assertEqual(df.to_html(), expected_with_index) - - expected_without_index = ('\n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - '
ABC
11.2one
23.4two
35.6NaN
') - result = df.to_html(index=False) - for i in index: - self.assertNotIn(i, result) - self.assertEqual(result, expected_without_index) - df.index = Index(['foo', 'bar', 'baz'], name='idx') - expected_with_index = ('\n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - '
ABC
idx
foo11.2one
bar23.4two
baz35.6NaN
') - self.assertEqual(df.to_html(), expected_with_index) - self.assertEqual(df.to_html(index=False), expected_without_index) - - tuples = [('foo', 'car'), ('foo', 'bike'), ('bar', 'car')] - df.index = MultiIndex.from_tuples(tuples) - - expected_with_index = ('\n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - '
ABC
foocar11.2one
bike23.4two
barcar35.6NaN
') - self.assertEqual(df.to_html(), expected_with_index) - - result = df.to_html(index=False) - for i in ['foo', 'bar', 'car', 'bike']: - self.assertNotIn(i, result) - # must be the same result as normal index - self.assertEqual(result, expected_without_index) - - df.index = MultiIndex.from_tuples(tuples, names=['idx1', 'idx2']) - expected_with_index = ('\n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - '
ABC
idx1idx2
foocar11.2one
bike23.4two
barcar35.6NaN
') - self.assertEqual(df.to_html(), expected_with_index) - self.assertEqual(df.to_html(index=False), expected_without_index) - def test_repr_html(self): self.frame._repr_html_() @@ -3254,7 +1449,7 @@ def test_info_repr(self): def test_info_repr_max_cols(self): # GH #6939 - df = DataFrame(randn(10, 5)) + df = DataFrame(np.random.randn(10, 5)) with option_context('display.large_repr', 'info', 'display.max_columns', 1, 'display.max_info_columns', 4): @@ -3299,46 +1494,6 @@ def get_ipython(): self.reset_display_options() - def test_to_html_with_classes(self): - df = DataFrame() - result = df.to_html(classes="sortable draggable") - expected = dedent(""" - - - - - - - - - -
- - """).strip() - self.assertEqual(result, expected) - - result = df.to_html(classes=["sortable", "draggable"]) - self.assertEqual(result, expected) - - def test_to_html_no_index_max_rows(self): - # GH https://github.com/pandas-dev/pandas/issues/14998 - df = DataFrame({"A": [1, 2, 3, 4]}) - result = df.to_html(index=False, max_rows=1) - expected = dedent("""\ - - - - - - - - - - - -
A
1
""") - self.assertEqual(result, expected) - def test_pprint_pathological_object(self): """ if the test fails, the stack will overflow and nose crash, @@ -3373,541 +1528,6 @@ def test_dict_entries(self): self.assertTrue("'a': 1" in val) self.assertTrue("'b': 2" in val) - def test_to_latex_filename(self): - with tm.ensure_clean('test.tex') as path: - self.frame.to_latex(path) - - with open(path, 'r') as f: - self.assertEqual(self.frame.to_latex(), f.read()) - - # test with utf-8 and encoding option (GH 7061) - df = DataFrame([[u'au\xdfgangen']]) - with tm.ensure_clean('test.tex') as path: - df.to_latex(path, encoding='utf-8') - with codecs.open(path, 'r', encoding='utf-8') as f: - self.assertEqual(df.to_latex(), f.read()) - - # test with utf-8 without encoding option - if compat.PY3: # python3: pandas default encoding is utf-8 - with tm.ensure_clean('test.tex') as path: - df.to_latex(path) - with codecs.open(path, 'r', encoding='utf-8') as f: - self.assertEqual(df.to_latex(), f.read()) - else: - # python2 default encoding is ascii, so an error should be raised - with tm.ensure_clean('test.tex') as path: - self.assertRaises(UnicodeEncodeError, df.to_latex, path) - - def test_to_latex(self): - # it works! - self.frame.to_latex() - - df = DataFrame({'a': [1, 2], 'b': ['b1', 'b2']}) - withindex_result = df.to_latex() - withindex_expected = r"""\begin{tabular}{lrl} -\toprule -{} & a & b \\ -\midrule -0 & 1 & b1 \\ -1 & 2 & b2 \\ -\bottomrule -\end{tabular} -""" - - self.assertEqual(withindex_result, withindex_expected) - - withoutindex_result = df.to_latex(index=False) - withoutindex_expected = r"""\begin{tabular}{rl} -\toprule - a & b \\ -\midrule - 1 & b1 \\ - 2 & b2 \\ -\bottomrule -\end{tabular} -""" - - self.assertEqual(withoutindex_result, withoutindex_expected) - - def test_to_latex_format(self): - # GH Bug #9402 - self.frame.to_latex(column_format='ccc') - - df = DataFrame({'a': [1, 2], 'b': ['b1', 'b2']}) - withindex_result = df.to_latex(column_format='ccc') - withindex_expected = r"""\begin{tabular}{ccc} -\toprule -{} & a & b \\ -\midrule -0 & 1 & b1 \\ -1 & 2 & b2 \\ -\bottomrule -\end{tabular} -""" - - self.assertEqual(withindex_result, withindex_expected) - - def test_to_latex_with_formatters(self): - df = DataFrame({'int': [1, 2, 3], - 'float': [1.0, 2.0, 3.0], - 'object': [(1, 2), True, False], - 'datetime64': [datetime(2016, 1, 1), - datetime(2016, 2, 5), - datetime(2016, 3, 3)]}) - - formatters = {'int': lambda x: '0x%x' % x, - 'float': lambda x: '[% 4.1f]' % x, - 'object': lambda x: '-%s-' % str(x), - 'datetime64': lambda x: x.strftime('%Y-%m'), - '__index__': lambda x: 'index: %s' % x} - result = df.to_latex(formatters=dict(formatters)) - - expected = r"""\begin{tabular}{llrrl} -\toprule -{} & datetime64 & float & int & object \\ -\midrule -index: 0 & 2016-01 & [ 1.0] & 0x1 & -(1, 2)- \\ -index: 1 & 2016-02 & [ 2.0] & 0x2 & -True- \\ -index: 2 & 2016-03 & [ 3.0] & 0x3 & -False- \\ -\bottomrule -\end{tabular} -""" - self.assertEqual(result, expected) - - def test_to_latex_multiindex(self): - df = DataFrame({('x', 'y'): ['a']}) - result = df.to_latex() - expected = r"""\begin{tabular}{ll} -\toprule -{} & x \\ -{} & y \\ -\midrule -0 & a \\ -\bottomrule -\end{tabular} -""" - - self.assertEqual(result, expected) - - result = df.T.to_latex() - expected = r"""\begin{tabular}{lll} -\toprule - & & 0 \\ -\midrule -x & y & a \\ -\bottomrule -\end{tabular} -""" - - self.assertEqual(result, expected) - - df = DataFrame.from_dict({ - ('c1', 0): pd.Series(dict((x, x) for x in range(4))), - ('c1', 1): pd.Series(dict((x, x + 4) for x in range(4))), - ('c2', 0): pd.Series(dict((x, x) for x in range(4))), - ('c2', 1): pd.Series(dict((x, x + 4) for x in range(4))), - ('c3', 0): pd.Series(dict((x, x) for x in range(4))), - }).T - result = df.to_latex() - expected = r"""\begin{tabular}{llrrrr} -\toprule - & & 0 & 1 & 2 & 3 \\ -\midrule -c1 & 0 & 0 & 1 & 2 & 3 \\ - & 1 & 4 & 5 & 6 & 7 \\ -c2 & 0 & 0 & 1 & 2 & 3 \\ - & 1 & 4 & 5 & 6 & 7 \\ -c3 & 0 & 0 & 1 & 2 & 3 \\ -\bottomrule -\end{tabular} -""" - - self.assertEqual(result, expected) - - # GH 10660 - df = pd.DataFrame({'a': [0, 0, 1, 1], - 'b': list('abab'), - 'c': [1, 2, 3, 4]}) - result = df.set_index(['a', 'b']).to_latex() - expected = r"""\begin{tabular}{llr} -\toprule - & & c \\ -a & b & \\ -\midrule -0 & a & 1 \\ - & b & 2 \\ -1 & a & 3 \\ - & b & 4 \\ -\bottomrule -\end{tabular} -""" - - self.assertEqual(result, expected) - - result = df.groupby('a').describe().to_latex() - expected = ('\\begin{tabular}{lrrrrrrrr}\n\\toprule\n{} & c & ' - ' & & & & & & ' - '\\\\\n{} & count & mean & std & min & 25\\% & ' - '50\\% & 75\\% & max \\\\\na & & & ' - ' & & & & & \\\\\n\\midrule\n0 ' - '& 2.0 & 1.5 & 0.707107 & 1.0 & 1.25 & 1.5 & 1.75 ' - '& 2.0 \\\\\n1 & 2.0 & 3.5 & 0.707107 & 3.0 & 3.25 ' - '& 3.5 & 3.75 & 4.0 ' - '\\\\\n\\bottomrule\n\\end{tabular}\n') - - self.assertEqual(result, expected) - - def test_to_latex_escape(self): - a = 'a' - b = 'b' - - test_dict = {u('co^l1'): {a: "a", - b: "b"}, - u('co$e^x$'): {a: "a", - b: "b"}} - - unescaped_result = DataFrame(test_dict).to_latex(escape=False) - escaped_result = DataFrame(test_dict).to_latex( - ) # default: escape=True - - unescaped_expected = r'''\begin{tabular}{lll} -\toprule -{} & co$e^x$ & co^l1 \\ -\midrule -a & a & a \\ -b & b & b \\ -\bottomrule -\end{tabular} -''' - - escaped_expected = r'''\begin{tabular}{lll} -\toprule -{} & co\$e\textasciicircumx\$ & co\textasciicircuml1 \\ -\midrule -a & a & a \\ -b & b & b \\ -\bottomrule -\end{tabular} -''' - - self.assertEqual(unescaped_result, unescaped_expected) - self.assertEqual(escaped_result, escaped_expected) - - def test_to_latex_longtable(self): - self.frame.to_latex(longtable=True) - - df = DataFrame({'a': [1, 2], 'b': ['b1', 'b2']}) - withindex_result = df.to_latex(longtable=True) - withindex_expected = r"""\begin{longtable}{lrl} -\toprule -{} & a & b \\ -\midrule -\endhead -\midrule -\multicolumn{3}{r}{{Continued on next page}} \\ -\midrule -\endfoot - -\bottomrule -\endlastfoot -0 & 1 & b1 \\ -1 & 2 & b2 \\ -\end{longtable} -""" - - self.assertEqual(withindex_result, withindex_expected) - - withoutindex_result = df.to_latex(index=False, longtable=True) - withoutindex_expected = r"""\begin{longtable}{rl} -\toprule - a & b \\ -\midrule -\endhead -\midrule -\multicolumn{3}{r}{{Continued on next page}} \\ -\midrule -\endfoot - -\bottomrule -\endlastfoot - 1 & b1 \\ - 2 & b2 \\ -\end{longtable} -""" - - self.assertEqual(withoutindex_result, withoutindex_expected) - - def test_to_latex_escape_special_chars(self): - special_characters = ['&', '%', '$', '#', '_', '{', '}', '~', '^', - '\\'] - df = DataFrame(data=special_characters) - observed = df.to_latex() - expected = r"""\begin{tabular}{ll} -\toprule -{} & 0 \\ -\midrule -0 & \& \\ -1 & \% \\ -2 & \$ \\ -3 & \# \\ -4 & \_ \\ -5 & \{ \\ -6 & \} \\ -7 & \textasciitilde \\ -8 & \textasciicircum \\ -9 & \textbackslash \\ -\bottomrule -\end{tabular} -""" - - self.assertEqual(observed, expected) - - def test_to_latex_no_header(self): - # GH 7124 - df = DataFrame({'a': [1, 2], 'b': ['b1', 'b2']}) - withindex_result = df.to_latex(header=False) - withindex_expected = r"""\begin{tabular}{lrl} -\toprule -0 & 1 & b1 \\ -1 & 2 & b2 \\ -\bottomrule -\end{tabular} -""" - - self.assertEqual(withindex_result, withindex_expected) - - withoutindex_result = df.to_latex(index=False, header=False) - withoutindex_expected = r"""\begin{tabular}{rl} -\toprule - 1 & b1 \\ - 2 & b2 \\ -\bottomrule -\end{tabular} -""" - - self.assertEqual(withoutindex_result, withoutindex_expected) - - def test_to_latex_decimal(self): - # GH 12031 - self.frame.to_latex() - df = DataFrame({'a': [1.0, 2.1], 'b': ['b1', 'b2']}) - withindex_result = df.to_latex(decimal=',') - print("WHAT THE") - withindex_expected = r"""\begin{tabular}{lrl} -\toprule -{} & a & b \\ -\midrule -0 & 1,0 & b1 \\ -1 & 2,1 & b2 \\ -\bottomrule -\end{tabular} -""" - - self.assertEqual(withindex_result, withindex_expected) - - def test_to_csv_quotechar(self): - df = DataFrame({'col': [1, 2]}) - expected = """\ -"","col" -"0","1" -"1","2" -""" - - with tm.ensure_clean('test.csv') as path: - df.to_csv(path, quoting=1) # 1=QUOTE_ALL - with open(path, 'r') as f: - self.assertEqual(f.read(), expected) - - expected = """\ -$$,$col$ -$0$,$1$ -$1$,$2$ -""" - - with tm.ensure_clean('test.csv') as path: - df.to_csv(path, quoting=1, quotechar="$") - with open(path, 'r') as f: - self.assertEqual(f.read(), expected) - - with tm.ensure_clean('test.csv') as path: - with tm.assertRaisesRegexp(TypeError, 'quotechar'): - df.to_csv(path, quoting=1, quotechar=None) - - def test_to_csv_doublequote(self): - df = DataFrame({'col': ['a"a', '"bb"']}) - expected = '''\ -"","col" -"0","a""a" -"1","""bb""" -''' - - with tm.ensure_clean('test.csv') as path: - df.to_csv(path, quoting=1, doublequote=True) # QUOTE_ALL - with open(path, 'r') as f: - self.assertEqual(f.read(), expected) - - from _csv import Error - with tm.ensure_clean('test.csv') as path: - with tm.assertRaisesRegexp(Error, 'escapechar'): - df.to_csv(path, doublequote=False) # no escapechar set - - def test_to_csv_escapechar(self): - df = DataFrame({'col': ['a"a', '"bb"']}) - expected = '''\ -"","col" -"0","a\\"a" -"1","\\"bb\\"" -''' - - with tm.ensure_clean('test.csv') as path: # QUOTE_ALL - df.to_csv(path, quoting=1, doublequote=False, escapechar='\\') - with open(path, 'r') as f: - self.assertEqual(f.read(), expected) - - df = DataFrame({'col': ['a,a', ',bb,']}) - expected = """\ -,col -0,a\\,a -1,\\,bb\\, -""" - - with tm.ensure_clean('test.csv') as path: - df.to_csv(path, quoting=3, escapechar='\\') # QUOTE_NONE - with open(path, 'r') as f: - self.assertEqual(f.read(), expected) - - def test_csv_to_string(self): - df = DataFrame({'col': [1, 2]}) - expected = ',col\n0,1\n1,2\n' - self.assertEqual(df.to_csv(), expected) - - def test_to_csv_decimal(self): - # GH 781 - df = DataFrame({'col1': [1], 'col2': ['a'], 'col3': [10.1]}) - - expected_default = ',col1,col2,col3\n0,1,a,10.1\n' - self.assertEqual(df.to_csv(), expected_default) - - expected_european_excel = ';col1;col2;col3\n0;1;a;10,1\n' - self.assertEqual( - df.to_csv(decimal=',', sep=';'), expected_european_excel) - - expected_float_format_default = ',col1,col2,col3\n0,1,a,10.10\n' - self.assertEqual( - df.to_csv(float_format='%.2f'), expected_float_format_default) - - expected_float_format = ';col1;col2;col3\n0;1;a;10,10\n' - self.assertEqual( - df.to_csv(decimal=',', sep=';', - float_format='%.2f'), expected_float_format) - - # GH 11553: testing if decimal is taken into account for '0.0' - df = pd.DataFrame({'a': [0, 1.1], 'b': [2.2, 3.3], 'c': 1}) - expected = 'a,b,c\n0^0,2^2,1\n1^1,3^3,1\n' - self.assertEqual(df.to_csv(index=False, decimal='^'), expected) - - # same but for an index - self.assertEqual(df.set_index('a').to_csv(decimal='^'), expected) - - # same for a multi-index - self.assertEqual( - df.set_index(['a', 'b']).to_csv(decimal="^"), expected) - - def test_to_csv_float_format(self): - # testing if float_format is taken into account for the index - # GH 11553 - df = pd.DataFrame({'a': [0, 1], 'b': [2.2, 3.3], 'c': 1}) - expected = 'a,b,c\n0,2.20,1\n1,3.30,1\n' - self.assertEqual( - df.set_index('a').to_csv(float_format='%.2f'), expected) - - # same for a multi-index - self.assertEqual( - df.set_index(['a', 'b']).to_csv(float_format='%.2f'), expected) - - def test_to_csv_na_rep(self): - # testing if NaN values are correctly represented in the index - # GH 11553 - df = DataFrame({'a': [0, np.NaN], 'b': [0, 1], 'c': [2, 3]}) - expected = "a,b,c\n0.0,0,2\n_,1,3\n" - self.assertEqual(df.set_index('a').to_csv(na_rep='_'), expected) - self.assertEqual(df.set_index(['a', 'b']).to_csv(na_rep='_'), expected) - - # now with an index containing only NaNs - df = DataFrame({'a': np.NaN, 'b': [0, 1], 'c': [2, 3]}) - expected = "a,b,c\n_,0,2\n_,1,3\n" - self.assertEqual(df.set_index('a').to_csv(na_rep='_'), expected) - self.assertEqual(df.set_index(['a', 'b']).to_csv(na_rep='_'), expected) - - # check if na_rep parameter does not break anything when no NaN - df = DataFrame({'a': 0, 'b': [0, 1], 'c': [2, 3]}) - expected = "a,b,c\n0,0,2\n0,1,3\n" - self.assertEqual(df.set_index('a').to_csv(na_rep='_'), expected) - self.assertEqual(df.set_index(['a', 'b']).to_csv(na_rep='_'), expected) - - def test_to_csv_date_format(self): - # GH 10209 - df_sec = DataFrame({'A': pd.date_range('20130101', periods=5, freq='s') - }) - df_day = DataFrame({'A': pd.date_range('20130101', periods=5, freq='d') - }) - - expected_default_sec = ',A\n0,2013-01-01 00:00:00\n1,2013-01-01 00:00:01\n2,2013-01-01 00:00:02' + \ - '\n3,2013-01-01 00:00:03\n4,2013-01-01 00:00:04\n' - self.assertEqual(df_sec.to_csv(), expected_default_sec) - - expected_ymdhms_day = ',A\n0,2013-01-01 00:00:00\n1,2013-01-02 00:00:00\n2,2013-01-03 00:00:00' + \ - '\n3,2013-01-04 00:00:00\n4,2013-01-05 00:00:00\n' - self.assertEqual( - df_day.to_csv( - date_format='%Y-%m-%d %H:%M:%S'), expected_ymdhms_day) - - expected_ymd_sec = ',A\n0,2013-01-01\n1,2013-01-01\n2,2013-01-01\n3,2013-01-01\n4,2013-01-01\n' - self.assertEqual( - df_sec.to_csv(date_format='%Y-%m-%d'), expected_ymd_sec) - - expected_default_day = ',A\n0,2013-01-01\n1,2013-01-02\n2,2013-01-03\n3,2013-01-04\n4,2013-01-05\n' - self.assertEqual(df_day.to_csv(), expected_default_day) - self.assertEqual( - df_day.to_csv(date_format='%Y-%m-%d'), expected_default_day) - - # testing if date_format parameter is taken into account for - # multi-indexed dataframes (GH 7791) - df_sec['B'] = 0 - df_sec['C'] = 1 - expected_ymd_sec = 'A,B,C\n2013-01-01,0,1\n' - df_sec_grouped = df_sec.groupby([pd.Grouper(key='A', freq='1h'), 'B']) - self.assertEqual(df_sec_grouped.mean().to_csv(date_format='%Y-%m-%d'), - expected_ymd_sec) - - def test_to_csv_multi_index(self): - # see gh-6618 - df = DataFrame([1], columns=pd.MultiIndex.from_arrays([[1], [2]])) - - exp = ",1\n,2\n0,1\n" - self.assertEqual(df.to_csv(), exp) - - exp = "1\n2\n1\n" - self.assertEqual(df.to_csv(index=False), exp) - - df = DataFrame([1], columns=pd.MultiIndex.from_arrays([[1], [2]]), - index=pd.MultiIndex.from_arrays([[1], [2]])) - - exp = ",,1\n,,2\n1,2,1\n" - self.assertEqual(df.to_csv(), exp) - - exp = "1\n2\n1\n" - self.assertEqual(df.to_csv(index=False), exp) - - df = DataFrame( - [1], columns=pd.MultiIndex.from_arrays([['foo'], ['bar']])) - - exp = ",foo\n,bar\n0,1\n" - self.assertEqual(df.to_csv(), exp) - - exp = "foo\nbar\n1\n" - self.assertEqual(df.to_csv(index=False), exp) - def test_period(self): # GH 12615 df = pd.DataFrame({'A': pd.period_range('2013-01', @@ -4291,7 +1911,7 @@ def test_max_multi_index_display(self): ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] tuples = list(zip(*arrays)) index = MultiIndex.from_tuples(tuples, names=['first', 'second']) - s = Series(randn(8), index=index) + s = Series(np.random.randn(8), index=index) with option_context("display.max_rows", 10): self.assertEqual(len(str(s).split('\n')), 10) @@ -4305,7 +1925,7 @@ def test_max_multi_index_display(self): self.assertEqual(len(str(s).split('\n')), 10) # index - s = Series(randn(8), None) + s = Series(np.random.randn(8), None) with option_context("display.max_rows", 10): self.assertEqual(len(str(s).split('\n')), 9) @@ -4436,176 +2056,6 @@ def test_to_string_header(self): self.assertEqual(res, exp) -class TestEngFormatter(tm.TestCase): - - def test_eng_float_formatter(self): - df = DataFrame({'A': [1.41, 141., 14100, 1410000.]}) - - fmt.set_eng_float_format() - result = df.to_string() - expected = (' A\n' - '0 1.410E+00\n' - '1 141.000E+00\n' - '2 14.100E+03\n' - '3 1.410E+06') - self.assertEqual(result, expected) - - fmt.set_eng_float_format(use_eng_prefix=True) - result = df.to_string() - expected = (' A\n' - '0 1.410\n' - '1 141.000\n' - '2 14.100k\n' - '3 1.410M') - self.assertEqual(result, expected) - - fmt.set_eng_float_format(accuracy=0) - result = df.to_string() - expected = (' A\n' - '0 1E+00\n' - '1 141E+00\n' - '2 14E+03\n' - '3 1E+06') - self.assertEqual(result, expected) - - self.reset_display_options() - - def compare(self, formatter, input, output): - formatted_input = formatter(input) - msg = ("formatting of %s results in '%s', expected '%s'" % - (str(input), formatted_input, output)) - self.assertEqual(formatted_input, output, msg) - - def compare_all(self, formatter, in_out): - """ - Parameters: - ----------- - formatter: EngFormatter under test - in_out: list of tuples. Each tuple = (number, expected_formatting) - - It is tested if 'formatter(number) == expected_formatting'. - *number* should be >= 0 because formatter(-number) == fmt is also - tested. *fmt* is derived from *expected_formatting* - """ - for input, output in in_out: - self.compare(formatter, input, output) - self.compare(formatter, -input, "-" + output[1:]) - - def test_exponents_with_eng_prefix(self): - formatter = fmt.EngFormatter(accuracy=3, use_eng_prefix=True) - f = np.sqrt(2) - in_out = [(f * 10 ** -24, " 1.414y"), (f * 10 ** -23, " 14.142y"), - (f * 10 ** -22, " 141.421y"), (f * 10 ** -21, " 1.414z"), - (f * 10 ** -20, " 14.142z"), (f * 10 ** -19, " 141.421z"), - (f * 10 ** -18, " 1.414a"), (f * 10 ** -17, " 14.142a"), - (f * 10 ** -16, " 141.421a"), (f * 10 ** -15, " 1.414f"), - (f * 10 ** -14, " 14.142f"), (f * 10 ** -13, " 141.421f"), - (f * 10 ** -12, " 1.414p"), (f * 10 ** -11, " 14.142p"), - (f * 10 ** -10, " 141.421p"), (f * 10 ** -9, " 1.414n"), - (f * 10 ** -8, " 14.142n"), (f * 10 ** -7, " 141.421n"), - (f * 10 ** -6, " 1.414u"), (f * 10 ** -5, " 14.142u"), - (f * 10 ** -4, " 141.421u"), (f * 10 ** -3, " 1.414m"), - (f * 10 ** -2, " 14.142m"), (f * 10 ** -1, " 141.421m"), - (f * 10 ** 0, " 1.414"), (f * 10 ** 1, " 14.142"), - (f * 10 ** 2, " 141.421"), (f * 10 ** 3, " 1.414k"), - (f * 10 ** 4, " 14.142k"), (f * 10 ** 5, " 141.421k"), - (f * 10 ** 6, " 1.414M"), (f * 10 ** 7, " 14.142M"), - (f * 10 ** 8, " 141.421M"), (f * 10 ** 9, " 1.414G"), ( - f * 10 ** 10, " 14.142G"), (f * 10 ** 11, " 141.421G"), - (f * 10 ** 12, " 1.414T"), (f * 10 ** 13, " 14.142T"), ( - f * 10 ** 14, " 141.421T"), (f * 10 ** 15, " 1.414P"), ( - f * 10 ** 16, " 14.142P"), (f * 10 ** 17, " 141.421P"), ( - f * 10 ** 18, " 1.414E"), (f * 10 ** 19, " 14.142E"), - (f * 10 ** 20, " 141.421E"), (f * 10 ** 21, " 1.414Z"), ( - f * 10 ** 22, " 14.142Z"), (f * 10 ** 23, " 141.421Z"), ( - f * 10 ** 24, " 1.414Y"), (f * 10 ** 25, " 14.142Y"), ( - f * 10 ** 26, " 141.421Y")] - self.compare_all(formatter, in_out) - - def test_exponents_without_eng_prefix(self): - formatter = fmt.EngFormatter(accuracy=4, use_eng_prefix=False) - f = np.pi - in_out = [(f * 10 ** -24, " 3.1416E-24"), - (f * 10 ** -23, " 31.4159E-24"), - (f * 10 ** -22, " 314.1593E-24"), - (f * 10 ** -21, " 3.1416E-21"), - (f * 10 ** -20, " 31.4159E-21"), - (f * 10 ** -19, " 314.1593E-21"), - (f * 10 ** -18, " 3.1416E-18"), - (f * 10 ** -17, " 31.4159E-18"), - (f * 10 ** -16, " 314.1593E-18"), - (f * 10 ** -15, " 3.1416E-15"), - (f * 10 ** -14, " 31.4159E-15"), - (f * 10 ** -13, " 314.1593E-15"), - (f * 10 ** -12, " 3.1416E-12"), - (f * 10 ** -11, " 31.4159E-12"), - (f * 10 ** -10, " 314.1593E-12"), - (f * 10 ** -9, " 3.1416E-09"), (f * 10 ** -8, " 31.4159E-09"), - (f * 10 ** -7, " 314.1593E-09"), (f * 10 ** -6, " 3.1416E-06"), - (f * 10 ** -5, " 31.4159E-06"), (f * 10 ** -4, - " 314.1593E-06"), - (f * 10 ** -3, " 3.1416E-03"), (f * 10 ** -2, " 31.4159E-03"), - (f * 10 ** -1, " 314.1593E-03"), (f * 10 ** 0, " 3.1416E+00"), ( - f * 10 ** 1, " 31.4159E+00"), (f * 10 ** 2, " 314.1593E+00"), - (f * 10 ** 3, " 3.1416E+03"), (f * 10 ** 4, " 31.4159E+03"), ( - f * 10 ** 5, " 314.1593E+03"), (f * 10 ** 6, " 3.1416E+06"), - (f * 10 ** 7, " 31.4159E+06"), (f * 10 ** 8, " 314.1593E+06"), ( - f * 10 ** 9, " 3.1416E+09"), (f * 10 ** 10, " 31.4159E+09"), - (f * 10 ** 11, " 314.1593E+09"), (f * 10 ** 12, " 3.1416E+12"), - (f * 10 ** 13, " 31.4159E+12"), (f * 10 ** 14, " 314.1593E+12"), - (f * 10 ** 15, " 3.1416E+15"), (f * 10 ** 16, " 31.4159E+15"), - (f * 10 ** 17, " 314.1593E+15"), (f * 10 ** 18, " 3.1416E+18"), - (f * 10 ** 19, " 31.4159E+18"), (f * 10 ** 20, " 314.1593E+18"), - (f * 10 ** 21, " 3.1416E+21"), (f * 10 ** 22, " 31.4159E+21"), - (f * 10 ** 23, " 314.1593E+21"), (f * 10 ** 24, " 3.1416E+24"), - (f * 10 ** 25, " 31.4159E+24"), (f * 10 ** 26, " 314.1593E+24")] - self.compare_all(formatter, in_out) - - def test_rounding(self): - formatter = fmt.EngFormatter(accuracy=3, use_eng_prefix=True) - in_out = [(5.55555, ' 5.556'), (55.5555, ' 55.556'), - (555.555, ' 555.555'), (5555.55, ' 5.556k'), - (55555.5, ' 55.556k'), (555555, ' 555.555k')] - self.compare_all(formatter, in_out) - - formatter = fmt.EngFormatter(accuracy=1, use_eng_prefix=True) - in_out = [(5.55555, ' 5.6'), (55.5555, ' 55.6'), (555.555, ' 555.6'), - (5555.55, ' 5.6k'), (55555.5, ' 55.6k'), (555555, ' 555.6k')] - self.compare_all(formatter, in_out) - - formatter = fmt.EngFormatter(accuracy=0, use_eng_prefix=True) - in_out = [(5.55555, ' 6'), (55.5555, ' 56'), (555.555, ' 556'), - (5555.55, ' 6k'), (55555.5, ' 56k'), (555555, ' 556k')] - self.compare_all(formatter, in_out) - - formatter = fmt.EngFormatter(accuracy=3, use_eng_prefix=True) - result = formatter(0) - self.assertEqual(result, u(' 0.000')) - - def test_nan(self): - # Issue #11981 - - formatter = fmt.EngFormatter(accuracy=1, use_eng_prefix=True) - result = formatter(np.nan) - self.assertEqual(result, u('NaN')) - - df = pd.DataFrame({'a': [1.5, 10.3, 20.5], - 'b': [50.3, 60.67, 70.12], - 'c': [100.2, 101.33, 120.33]}) - pt = df.pivot_table(values='a', index='b', columns='c') - fmt.set_eng_float_format(accuracy=1) - result = pt.to_string() - self.assertTrue('NaN' in result) - self.reset_display_options() - - def test_inf(self): - # Issue #11981 - - formatter = fmt.EngFormatter(accuracy=1, use_eng_prefix=True) - result = formatter(np.inf) - self.assertEqual(result, u('inf')) - - def _three_digit_exp(): return '%.4g' % 1.7e8 == '1.7e+008' diff --git a/pandas/tests/formats/test_to_csv.py b/pandas/tests/formats/test_to_csv.py new file mode 100644 index 0000000000000..51295fd750602 --- /dev/null +++ b/pandas/tests/formats/test_to_csv.py @@ -0,0 +1,216 @@ +from pandas import DataFrame +import numpy as np +import pandas as pd +from pandas.util import testing as tm + + +class TestToCSV(tm.TestCase): + + def test_to_csv_quotechar(self): + df = DataFrame({'col': [1, 2]}) + expected = """\ +"","col" +"0","1" +"1","2" +""" + + with tm.ensure_clean('test.csv') as path: + df.to_csv(path, quoting=1) # 1=QUOTE_ALL + with open(path, 'r') as f: + self.assertEqual(f.read(), expected) + + expected = """\ +$$,$col$ +$0$,$1$ +$1$,$2$ +""" + + with tm.ensure_clean('test.csv') as path: + df.to_csv(path, quoting=1, quotechar="$") + with open(path, 'r') as f: + self.assertEqual(f.read(), expected) + + with tm.ensure_clean('test.csv') as path: + with tm.assertRaisesRegexp(TypeError, 'quotechar'): + df.to_csv(path, quoting=1, quotechar=None) + + def test_to_csv_doublequote(self): + df = DataFrame({'col': ['a"a', '"bb"']}) + expected = '''\ +"","col" +"0","a""a" +"1","""bb""" +''' + + with tm.ensure_clean('test.csv') as path: + df.to_csv(path, quoting=1, doublequote=True) # QUOTE_ALL + with open(path, 'r') as f: + self.assertEqual(f.read(), expected) + + from _csv import Error + with tm.ensure_clean('test.csv') as path: + with tm.assertRaisesRegexp(Error, 'escapechar'): + df.to_csv(path, doublequote=False) # no escapechar set + + def test_to_csv_escapechar(self): + df = DataFrame({'col': ['a"a', '"bb"']}) + expected = '''\ +"","col" +"0","a\\"a" +"1","\\"bb\\"" +''' + + with tm.ensure_clean('test.csv') as path: # QUOTE_ALL + df.to_csv(path, quoting=1, doublequote=False, escapechar='\\') + with open(path, 'r') as f: + self.assertEqual(f.read(), expected) + + df = DataFrame({'col': ['a,a', ',bb,']}) + expected = """\ +,col +0,a\\,a +1,\\,bb\\, +""" + + with tm.ensure_clean('test.csv') as path: + df.to_csv(path, quoting=3, escapechar='\\') # QUOTE_NONE + with open(path, 'r') as f: + self.assertEqual(f.read(), expected) + + def test_csv_to_string(self): + df = DataFrame({'col': [1, 2]}) + expected = ',col\n0,1\n1,2\n' + self.assertEqual(df.to_csv(), expected) + + def test_to_csv_decimal(self): + # GH 781 + df = DataFrame({'col1': [1], 'col2': ['a'], 'col3': [10.1]}) + + expected_default = ',col1,col2,col3\n0,1,a,10.1\n' + self.assertEqual(df.to_csv(), expected_default) + + expected_european_excel = ';col1;col2;col3\n0;1;a;10,1\n' + self.assertEqual( + df.to_csv(decimal=',', sep=';'), expected_european_excel) + + expected_float_format_default = ',col1,col2,col3\n0,1,a,10.10\n' + self.assertEqual( + df.to_csv(float_format='%.2f'), expected_float_format_default) + + expected_float_format = ';col1;col2;col3\n0;1;a;10,10\n' + self.assertEqual( + df.to_csv(decimal=',', sep=';', + float_format='%.2f'), expected_float_format) + + # GH 11553: testing if decimal is taken into account for '0.0' + df = pd.DataFrame({'a': [0, 1.1], 'b': [2.2, 3.3], 'c': 1}) + expected = 'a,b,c\n0^0,2^2,1\n1^1,3^3,1\n' + self.assertEqual(df.to_csv(index=False, decimal='^'), expected) + + # same but for an index + self.assertEqual(df.set_index('a').to_csv(decimal='^'), expected) + + # same for a multi-index + self.assertEqual( + df.set_index(['a', 'b']).to_csv(decimal="^"), expected) + + def test_to_csv_float_format(self): + # testing if float_format is taken into account for the index + # GH 11553 + df = pd.DataFrame({'a': [0, 1], 'b': [2.2, 3.3], 'c': 1}) + expected = 'a,b,c\n0,2.20,1\n1,3.30,1\n' + self.assertEqual( + df.set_index('a').to_csv(float_format='%.2f'), expected) + + # same for a multi-index + self.assertEqual( + df.set_index(['a', 'b']).to_csv(float_format='%.2f'), expected) + + def test_to_csv_na_rep(self): + # testing if NaN values are correctly represented in the index + # GH 11553 + df = DataFrame({'a': [0, np.NaN], 'b': [0, 1], 'c': [2, 3]}) + expected = "a,b,c\n0.0,0,2\n_,1,3\n" + self.assertEqual(df.set_index('a').to_csv(na_rep='_'), expected) + self.assertEqual(df.set_index(['a', 'b']).to_csv(na_rep='_'), expected) + + # now with an index containing only NaNs + df = DataFrame({'a': np.NaN, 'b': [0, 1], 'c': [2, 3]}) + expected = "a,b,c\n_,0,2\n_,1,3\n" + self.assertEqual(df.set_index('a').to_csv(na_rep='_'), expected) + self.assertEqual(df.set_index(['a', 'b']).to_csv(na_rep='_'), expected) + + # check if na_rep parameter does not break anything when no NaN + df = DataFrame({'a': 0, 'b': [0, 1], 'c': [2, 3]}) + expected = "a,b,c\n0,0,2\n0,1,3\n" + self.assertEqual(df.set_index('a').to_csv(na_rep='_'), expected) + self.assertEqual(df.set_index(['a', 'b']).to_csv(na_rep='_'), expected) + + def test_to_csv_date_format(self): + # GH 10209 + df_sec = DataFrame({'A': pd.date_range('20130101', periods=5, freq='s') + }) + df_day = DataFrame({'A': pd.date_range('20130101', periods=5, freq='d') + }) + + expected_default_sec = (',A\n0,2013-01-01 00:00:00\n1,' + '2013-01-01 00:00:01\n2,2013-01-01 00:00:02' + '\n3,2013-01-01 00:00:03\n4,' + '2013-01-01 00:00:04\n') + self.assertEqual(df_sec.to_csv(), expected_default_sec) + + expected_ymdhms_day = (',A\n0,2013-01-01 00:00:00\n1,' + '2013-01-02 00:00:00\n2,2013-01-03 00:00:00' + '\n3,2013-01-04 00:00:00\n4,' + '2013-01-05 00:00:00\n') + self.assertEqual( + df_day.to_csv( + date_format='%Y-%m-%d %H:%M:%S'), expected_ymdhms_day) + + expected_ymd_sec = (',A\n0,2013-01-01\n1,2013-01-01\n2,' + '2013-01-01\n3,2013-01-01\n4,2013-01-01\n') + self.assertEqual( + df_sec.to_csv(date_format='%Y-%m-%d'), expected_ymd_sec) + + expected_default_day = (',A\n0,2013-01-01\n1,2013-01-02\n2,' + '2013-01-03\n3,2013-01-04\n4,2013-01-05\n') + self.assertEqual(df_day.to_csv(), expected_default_day) + self.assertEqual( + df_day.to_csv(date_format='%Y-%m-%d'), expected_default_day) + + # testing if date_format parameter is taken into account for + # multi-indexed dataframes (GH 7791) + df_sec['B'] = 0 + df_sec['C'] = 1 + expected_ymd_sec = 'A,B,C\n2013-01-01,0,1\n' + df_sec_grouped = df_sec.groupby([pd.Grouper(key='A', freq='1h'), 'B']) + self.assertEqual(df_sec_grouped.mean().to_csv(date_format='%Y-%m-%d'), + expected_ymd_sec) + + def test_to_csv_multi_index(self): + # see gh-6618 + df = DataFrame([1], columns=pd.MultiIndex.from_arrays([[1], [2]])) + + exp = ",1\n,2\n0,1\n" + self.assertEqual(df.to_csv(), exp) + + exp = "1\n2\n1\n" + self.assertEqual(df.to_csv(index=False), exp) + + df = DataFrame([1], columns=pd.MultiIndex.from_arrays([[1], [2]]), + index=pd.MultiIndex.from_arrays([[1], [2]])) + + exp = ",,1\n,,2\n1,2,1\n" + self.assertEqual(df.to_csv(), exp) + + exp = "1\n2\n1\n" + self.assertEqual(df.to_csv(index=False), exp) + + df = DataFrame( + [1], columns=pd.MultiIndex.from_arrays([['foo'], ['bar']])) + + exp = ",foo\n,bar\n0,1\n" + self.assertEqual(df.to_csv(), exp) + + exp = "foo\nbar\n1\n" + self.assertEqual(df.to_csv(index=False), exp) diff --git a/pandas/tests/formats/test_to_html.py b/pandas/tests/formats/test_to_html.py new file mode 100644 index 0000000000000..771c66e84037c --- /dev/null +++ b/pandas/tests/formats/test_to_html.py @@ -0,0 +1,1861 @@ +# -*- coding: utf-8 -*- + +import re +from textwrap import dedent +from datetime import datetime +from distutils.version import LooseVersion + +import pytest +import numpy as np +import pandas as pd +from pandas import compat, DataFrame, MultiIndex, option_context, Index +from pandas.compat import u, lrange, StringIO +from pandas.util import testing as tm +import pandas.formats.format as fmt + +div_style = '' +try: + import IPython + if IPython.__version__ < LooseVersion('3.0.0'): + div_style = ' style="max-width:1500px;overflow:auto;"' +except (ImportError, AttributeError): + pass + + +class TestToHTML(tm.TestCase): + + def test_to_html_with_col_space(self): + def check_with_width(df, col_space): + # check that col_space affects HTML generation + # and be very brittle about it. + html = df.to_html(col_space=col_space) + hdrs = [x for x in html.split(r"\n") if re.search(r"\s]", x)] + self.assertTrue(len(hdrs) > 0) + for h in hdrs: + self.assertTrue("min-width" in h) + self.assertTrue(str(col_space) in h) + + df = DataFrame(np.random.random(size=(1, 3))) + + check_with_width(df, 30) + check_with_width(df, 50) + + def test_to_html_with_empty_string_label(self): + # GH3547, to_html regards empty string labels as repeated labels + data = {'c1': ['a', 'b'], 'c2': ['a', ''], 'data': [1, 2]} + df = DataFrame(data).set_index(['c1', 'c2']) + res = df.to_html() + self.assertTrue("rowspan" not in res) + + def test_to_html_unicode(self): + df = DataFrame({u('\u03c3'): np.arange(10.)}) + expected = u'\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
\u03c3
00.0
11.0
22.0
33.0
44.0
55.0
66.0
77.0
88.0
99.0
' # noqa + self.assertEqual(df.to_html(), expected) + df = DataFrame({'A': [u('\u03c3')]}) + expected = u'\n \n \n \n \n \n \n \n \n \n \n \n \n
A
0\u03c3
' # noqa + self.assertEqual(df.to_html(), expected) + + def test_to_html_decimal(self): + # GH 12031 + df = DataFrame({'A': [6.0, 3.1, 2.2]}) + result = df.to_html(decimal=',') + expected = ('\n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + '
A
06,0
13,1
22,2
') + self.assertEqual(result, expected) + + def test_to_html_escaped(self): + a = 'str", + b: ""}, + 'co>l2': {a: "", + b: ""}} + rs = DataFrame(test_dict).to_html() + xp = """ + + + + + + + + + + + + + + + + + + + +
co<l1co>l2
str<ing1 &amp;<type 'str'><type 'str'>
stri>ng2 &amp;<type 'str'><type 'str'>
""" + + self.assertEqual(xp, rs) + + def test_to_html_escape_disabled(self): + a = 'strbold", + b: "bold"}, + 'co>l2': {a: "bold", + b: "bold"}} + rs = DataFrame(test_dict).to_html(escape=False) + xp = """ + + + + + + + + + + + + + + + + + +
co + co>l2
str + boldbold
stri>ng2 &boldbold
""" + + self.assertEqual(xp, rs) + + def test_to_html_multiindex_index_false(self): + # issue 8452 + df = DataFrame({ + 'a': range(2), + 'b': range(3, 5), + 'c': range(5, 7), + 'd': range(3, 5) + }) + df.columns = MultiIndex.from_product([['a', 'b'], ['c', 'd']]) + result = df.to_html(index=False) + expected = """\ + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ab
cdcd
0353
1464
""" + + self.assertEqual(result, expected) + + df.index = Index(df.index.values, name='idx') + result = df.to_html(index=False) + self.assertEqual(result, expected) + + def test_to_html_multiindex_sparsify_false_multi_sparse(self): + with option_context('display.multi_sparse', False): + index = MultiIndex.from_arrays([[0, 0, 1, 1], [0, 1, 0, 1]], + names=['foo', None]) + + df = DataFrame([[0, 1], [2, 3], [4, 5], [6, 7]], index=index) + + result = df.to_html() + expected = """\ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
01
foo
0001
0123
1045
1167
""" + + self.assertEqual(result, expected) + + df = DataFrame([[0, 1], [2, 3], [4, 5], [6, 7]], + columns=index[::2], index=index) + + result = df.to_html() + expected = """\ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
foo01
00
foo
0001
0123
1045
1167
""" + + self.assertEqual(result, expected) + + def test_to_html_multiindex_sparsify(self): + index = MultiIndex.from_arrays([[0, 0, 1, 1], [0, 1, 0, 1]], + names=['foo', None]) + + df = DataFrame([[0, 1], [2, 3], [4, 5], [6, 7]], index=index) + + result = df.to_html() + expected = """ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
01
foo
0001
123
1045
167
""" + + self.assertEqual(result, expected) + + df = DataFrame([[0, 1], [2, 3], [4, 5], [6, 7]], columns=index[::2], + index=index) + + result = df.to_html() + expected = """\ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
foo01
00
foo
0001
123
1045
167
""" + + self.assertEqual(result, expected) + + def test_to_html_multiindex_odd_even_truncate(self): + # GH 14882 - Issue on truncation with odd length DataFrame + mi = MultiIndex.from_product([[100, 200, 300], + [10, 20, 30], + [1, 2, 3, 4, 5, 6, 7]], + names=['a', 'b', 'c']) + df = DataFrame({'n': range(len(mi))}, index=mi) + result = df.to_html(max_rows=60) + expected = """\ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
n
abc
1001010
21
32
43
54
65
76
2017
28
39
410
511
612
713
30114
215
316
417
518
619
720
20010121
222
323
424
525
626
727
20128
229
......
633
734
30135
236
337
438
539
640
741
30010142
243
344
445
546
647
748
20149
250
351
452
553
654
755
30156
257
358
459
560
661
762
""" + self.assertEqual(result, expected) + + # Test that ... appears in a middle level + result = df.to_html(max_rows=56) + expected = """\ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
n
abc
1001010
21
32
43
54
65
76
2017
28
39
410
511
612
713
30114
215
316
417
518
619
720
20010121
222
323
424
525
626
727
.........
30135
236
337
438
539
640
741
30010142
243
344
445
546
647
748
20149
250
351
452
553
654
755
30156
257
358
459
560
661
762
""" + self.assertEqual(result, expected) + + def test_to_html_index_formatter(self): + df = DataFrame([[0, 1], [2, 3], [4, 5], [6, 7]], columns=['foo', None], + index=lrange(4)) + + f = lambda x: 'abcd' [x] + result = df.to_html(formatters={'__index__': f}) + expected = """\ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
fooNone
a01
b23
c45
d67
""" + + self.assertEqual(result, expected) + + def test_to_html_datetime64_monthformatter(self): + months = [datetime(2016, 1, 1), datetime(2016, 2, 2)] + x = DataFrame({'months': months}) + + def format_func(x): + return x.strftime('%Y-%m') + result = x.to_html(formatters={'months': format_func}) + expected = """\ + + + + + + + + + + + + + + + + + +
months
02016-01
12016-02
""" + self.assertEqual(result, expected) + + def test_to_html_datetime64_hourformatter(self): + + x = DataFrame({'hod': pd.to_datetime(['10:10:10.100', '12:12:12.120'], + format='%H:%M:%S.%f')}) + + def format_func(x): + return x.strftime('%H:%M') + result = x.to_html(formatters={'hod': format_func}) + expected = """\ + + + + + + + + + + + + + + + + + +
hod
010:10
112:12
""" + self.assertEqual(result, expected) + + def test_to_html_regression_GH6098(self): + df = DataFrame({ + u('clé1'): [u('a'), u('a'), u('b'), u('b'), u('a')], + u('clé2'): [u('1er'), u('2ème'), u('1er'), u('2ème'), u('1er')], + 'données1': np.random.randn(5), + 'données2': np.random.randn(5)}) + + # it works + df.pivot_table(index=[u('clé1')], columns=[u('clé2')])._repr_html_() + + def test_to_html_truncate(self): + pytest.skip("unreliable on travis") + index = pd.DatetimeIndex(start='20010101', freq='D', periods=20) + df = DataFrame(index=index, columns=range(20)) + fmt.set_option('display.max_rows', 8) + fmt.set_option('display.max_columns', 4) + result = df._repr_html_() + expected = '''\ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
01...1819
2001-01-01NaNNaN...NaNNaN
2001-01-02NaNNaN...NaNNaN
2001-01-03NaNNaN...NaNNaN
2001-01-04NaNNaN...NaNNaN
..................
2001-01-17NaNNaN...NaNNaN
2001-01-18NaNNaN...NaNNaN
2001-01-19NaNNaN...NaNNaN
2001-01-20NaNNaN...NaNNaN
+

20 rows × 20 columns

+'''.format(div_style) + if compat.PY2: + expected = expected.decode('utf-8') + self.assertEqual(result, expected) + + def test_to_html_truncate_multi_index(self): + pytest.skip("unreliable on travis") + arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], + ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] + df = DataFrame(index=arrays, columns=arrays) + fmt.set_option('display.max_rows', 7) + fmt.set_option('display.max_columns', 7) + result = df._repr_html_() + expected = '''\ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
barbaz...fooqux
onetwoone...twoonetwo
baroneNaNNaNNaN...NaNNaNNaN
twoNaNNaNNaN...NaNNaNNaN
bazoneNaNNaNNaN...NaNNaNNaN
...........................
footwoNaNNaNNaN...NaNNaNNaN
quxoneNaNNaNNaN...NaNNaNNaN
twoNaNNaNNaN...NaNNaNNaN
+

8 rows × 8 columns

+'''.format(div_style) + if compat.PY2: + expected = expected.decode('utf-8') + self.assertEqual(result, expected) + + def test_to_html_truncate_multi_index_sparse_off(self): + pytest.skip("unreliable on travis") + arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], + ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] + df = DataFrame(index=arrays, columns=arrays) + fmt.set_option('display.max_rows', 7) + fmt.set_option('display.max_columns', 7) + fmt.set_option('display.multi_sparse', False) + result = df._repr_html_() + expected = '''\ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
barbarbaz...fooquxqux
onetwoone...twoonetwo
baroneNaNNaNNaN...NaNNaNNaN
bartwoNaNNaNNaN...NaNNaNNaN
bazoneNaNNaNNaN...NaNNaNNaN
footwoNaNNaNNaN...NaNNaNNaN
quxoneNaNNaNNaN...NaNNaNNaN
quxtwoNaNNaNNaN...NaNNaNNaN
+

8 rows × 8 columns

+'''.format(div_style) + if compat.PY2: + expected = expected.decode('utf-8') + self.assertEqual(result, expected) + + def test_to_html_border(self): + df = DataFrame({'A': [1, 2]}) + result = df.to_html() + assert 'border="1"' in result + + def test_to_html_border_option(self): + df = DataFrame({'A': [1, 2]}) + with pd.option_context('html.border', 0): + result = df.to_html() + self.assertTrue('border="0"' in result) + self.assertTrue('border="0"' in df._repr_html_()) + + def test_to_html_border_zero(self): + df = DataFrame({'A': [1, 2]}) + result = df.to_html(border=0) + self.assertTrue('border="0"' in result) + + def test_to_html(self): + # big mixed + biggie = DataFrame({'A': np.random.randn(200), + 'B': tm.makeStringIndex(200)}, + index=lrange(200)) + + biggie.loc[:20, 'A'] = np.nan + biggie.loc[:20, 'B'] = np.nan + s = biggie.to_html() + + buf = StringIO() + retval = biggie.to_html(buf=buf) + self.assertIsNone(retval) + self.assertEqual(buf.getvalue(), s) + + tm.assertIsInstance(s, compat.string_types) + + biggie.to_html(columns=['B', 'A'], col_space=17) + biggie.to_html(columns=['B', 'A'], + formatters={'A': lambda x: '%.1f' % x}) + + biggie.to_html(columns=['B', 'A'], float_format=str) + biggie.to_html(columns=['B', 'A'], col_space=12, float_format=str) + + frame = DataFrame(index=np.arange(200)) + frame.to_html() + + def test_to_html_filename(self): + biggie = DataFrame({'A': np.random.randn(200), + 'B': tm.makeStringIndex(200)}, + index=lrange(200)) + + biggie.loc[:20, 'A'] = np.nan + biggie.loc[:20, 'B'] = np.nan + with tm.ensure_clean('test.html') as path: + biggie.to_html(path) + with open(path, 'r') as f: + s = biggie.to_html() + s2 = f.read() + self.assertEqual(s, s2) + + frame = DataFrame(index=np.arange(200)) + with tm.ensure_clean('test.html') as path: + frame.to_html(path) + with open(path, 'r') as f: + self.assertEqual(frame.to_html(), f.read()) + + def test_to_html_with_no_bold(self): + x = DataFrame({'x': np.random.randn(5)}) + ashtml = x.to_html(bold_rows=False) + self.assertFalse('")]) + + def test_to_html_columns_arg(self): + frame = DataFrame(tm.getSeriesData()) + result = frame.to_html(columns=['A']) + self.assertNotIn('B', result) + + def test_to_html_multiindex(self): + columns = MultiIndex.from_tuples(list(zip(np.arange(2).repeat(2), + np.mod(lrange(4), 2))), + names=['CL0', 'CL1']) + df = DataFrame([list('abcd'), list('efgh')], columns=columns) + result = df.to_html(justify='left') + expected = ('\n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + '
CL001
CL10101
0abcd
1efgh
') + + self.assertEqual(result, expected) + + columns = MultiIndex.from_tuples(list(zip( + range(4), np.mod( + lrange(4), 2)))) + df = DataFrame([list('abcd'), list('efgh')], columns=columns) + + result = df.to_html(justify='right') + expected = ('\n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + '
0123
0101
0abcd
1efgh
') + + self.assertEqual(result, expected) + + def test_to_html_justify(self): + df = DataFrame({'A': [6, 30000, 2], + 'B': [1, 2, 70000], + 'C': [223442, 0, 1]}, + columns=['A', 'B', 'C']) + result = df.to_html(justify='left') + expected = ('\n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + '
ABC
061223442
13000020
22700001
') + self.assertEqual(result, expected) + + result = df.to_html(justify='right') + expected = ('\n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + '
ABC
061223442
13000020
22700001
') + self.assertEqual(result, expected) + + def test_to_html_index(self): + index = ['foo', 'bar', 'baz'] + df = DataFrame({'A': [1, 2, 3], + 'B': [1.2, 3.4, 5.6], + 'C': ['one', 'two', np.nan]}, + columns=['A', 'B', 'C'], + index=index) + expected_with_index = ('\n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + '
ABC
foo11.2one
bar23.4two
baz35.6NaN
') + self.assertEqual(df.to_html(), expected_with_index) + + expected_without_index = ('\n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + '
ABC
11.2one
23.4two
35.6NaN
') + result = df.to_html(index=False) + for i in index: + self.assertNotIn(i, result) + self.assertEqual(result, expected_without_index) + df.index = Index(['foo', 'bar', 'baz'], name='idx') + expected_with_index = ('\n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + '
ABC
idx
foo11.2one
bar23.4two
baz35.6NaN
') + self.assertEqual(df.to_html(), expected_with_index) + self.assertEqual(df.to_html(index=False), expected_without_index) + + tuples = [('foo', 'car'), ('foo', 'bike'), ('bar', 'car')] + df.index = MultiIndex.from_tuples(tuples) + + expected_with_index = ('\n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + '
ABC
foocar11.2one
bike23.4two
barcar35.6NaN
') + self.assertEqual(df.to_html(), expected_with_index) + + result = df.to_html(index=False) + for i in ['foo', 'bar', 'car', 'bike']: + self.assertNotIn(i, result) + # must be the same result as normal index + self.assertEqual(result, expected_without_index) + + df.index = MultiIndex.from_tuples(tuples, names=['idx1', 'idx2']) + expected_with_index = ('\n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + '
ABC
idx1idx2
foocar11.2one
bike23.4two
barcar35.6NaN
') + self.assertEqual(df.to_html(), expected_with_index) + self.assertEqual(df.to_html(index=False), expected_without_index) + + def test_to_html_with_classes(self): + df = DataFrame() + result = df.to_html(classes="sortable draggable") + expected = dedent(""" + + + + + + + + + +
+ + """).strip() + self.assertEqual(result, expected) + + result = df.to_html(classes=["sortable", "draggable"]) + self.assertEqual(result, expected) + + def test_to_html_no_index_max_rows(self): + # GH https://github.com/pandas-dev/pandas/issues/14998 + df = DataFrame({"A": [1, 2, 3, 4]}) + result = df.to_html(index=False, max_rows=1) + expected = dedent("""\ + + + + + + + + + + + +
A
1
""") + self.assertEqual(result, expected) diff --git a/pandas/tests/formats/test_to_latex.py b/pandas/tests/formats/test_to_latex.py new file mode 100644 index 0000000000000..89e18e1cec06e --- /dev/null +++ b/pandas/tests/formats/test_to_latex.py @@ -0,0 +1,351 @@ +from datetime import datetime + +import pytest + +import pandas as pd +from pandas import DataFrame, compat +from pandas.util import testing as tm +from pandas.compat import u +import codecs + + +@pytest.fixture +def frame(): + return DataFrame(tm.getSeriesData()) + + +class TestToLatex(object): + + def test_to_latex_filename(self, frame): + with tm.ensure_clean('test.tex') as path: + frame.to_latex(path) + + with open(path, 'r') as f: + assert frame.to_latex() == f.read() + + # test with utf-8 and encoding option (GH 7061) + df = DataFrame([[u'au\xdfgangen']]) + with tm.ensure_clean('test.tex') as path: + df.to_latex(path, encoding='utf-8') + with codecs.open(path, 'r', encoding='utf-8') as f: + assert df.to_latex() == f.read() + + # test with utf-8 without encoding option + if compat.PY3: # python3: pandas default encoding is utf-8 + with tm.ensure_clean('test.tex') as path: + df.to_latex(path) + with codecs.open(path, 'r', encoding='utf-8') as f: + assert df.to_latex() == f.read() + else: + # python2 default encoding is ascii, so an error should be raised + with tm.ensure_clean('test.tex') as path: + with pytest.raises(UnicodeEncodeError): + df.to_latex(path) + + def test_to_latex(self, frame): + # it works! + frame.to_latex() + + df = DataFrame({'a': [1, 2], 'b': ['b1', 'b2']}) + withindex_result = df.to_latex() + withindex_expected = r"""\begin{tabular}{lrl} +\toprule +{} & a & b \\ +\midrule +0 & 1 & b1 \\ +1 & 2 & b2 \\ +\bottomrule +\end{tabular} +""" + + assert withindex_result == withindex_expected + + withoutindex_result = df.to_latex(index=False) + withoutindex_expected = r"""\begin{tabular}{rl} +\toprule + a & b \\ +\midrule + 1 & b1 \\ + 2 & b2 \\ +\bottomrule +\end{tabular} +""" + + assert withoutindex_result == withoutindex_expected + + def test_to_latex_format(self, frame): + # GH Bug #9402 + frame.to_latex(column_format='ccc') + + df = DataFrame({'a': [1, 2], 'b': ['b1', 'b2']}) + withindex_result = df.to_latex(column_format='ccc') + withindex_expected = r"""\begin{tabular}{ccc} +\toprule +{} & a & b \\ +\midrule +0 & 1 & b1 \\ +1 & 2 & b2 \\ +\bottomrule +\end{tabular} +""" + + assert withindex_result == withindex_expected + + def test_to_latex_with_formatters(self): + df = DataFrame({'int': [1, 2, 3], + 'float': [1.0, 2.0, 3.0], + 'object': [(1, 2), True, False], + 'datetime64': [datetime(2016, 1, 1), + datetime(2016, 2, 5), + datetime(2016, 3, 3)]}) + + formatters = {'int': lambda x: '0x%x' % x, + 'float': lambda x: '[% 4.1f]' % x, + 'object': lambda x: '-%s-' % str(x), + 'datetime64': lambda x: x.strftime('%Y-%m'), + '__index__': lambda x: 'index: %s' % x} + result = df.to_latex(formatters=dict(formatters)) + + expected = r"""\begin{tabular}{llrrl} +\toprule +{} & datetime64 & float & int & object \\ +\midrule +index: 0 & 2016-01 & [ 1.0] & 0x1 & -(1, 2)- \\ +index: 1 & 2016-02 & [ 2.0] & 0x2 & -True- \\ +index: 2 & 2016-03 & [ 3.0] & 0x3 & -False- \\ +\bottomrule +\end{tabular} +""" + assert result == expected + + def test_to_latex_multiindex(self): + df = DataFrame({('x', 'y'): ['a']}) + result = df.to_latex() + expected = r"""\begin{tabular}{ll} +\toprule +{} & x \\ +{} & y \\ +\midrule +0 & a \\ +\bottomrule +\end{tabular} +""" + + assert result == expected + + result = df.T.to_latex() + expected = r"""\begin{tabular}{lll} +\toprule + & & 0 \\ +\midrule +x & y & a \\ +\bottomrule +\end{tabular} +""" + + assert result == expected + + df = DataFrame.from_dict({ + ('c1', 0): pd.Series(dict((x, x) for x in range(4))), + ('c1', 1): pd.Series(dict((x, x + 4) for x in range(4))), + ('c2', 0): pd.Series(dict((x, x) for x in range(4))), + ('c2', 1): pd.Series(dict((x, x + 4) for x in range(4))), + ('c3', 0): pd.Series(dict((x, x) for x in range(4))), + }).T + result = df.to_latex() + expected = r"""\begin{tabular}{llrrrr} +\toprule + & & 0 & 1 & 2 & 3 \\ +\midrule +c1 & 0 & 0 & 1 & 2 & 3 \\ + & 1 & 4 & 5 & 6 & 7 \\ +c2 & 0 & 0 & 1 & 2 & 3 \\ + & 1 & 4 & 5 & 6 & 7 \\ +c3 & 0 & 0 & 1 & 2 & 3 \\ +\bottomrule +\end{tabular} +""" + + assert result == expected + + # GH 10660 + df = pd.DataFrame({'a': [0, 0, 1, 1], + 'b': list('abab'), + 'c': [1, 2, 3, 4]}) + result = df.set_index(['a', 'b']).to_latex() + expected = r"""\begin{tabular}{llr} +\toprule + & & c \\ +a & b & \\ +\midrule +0 & a & 1 \\ + & b & 2 \\ +1 & a & 3 \\ + & b & 4 \\ +\bottomrule +\end{tabular} +""" + + assert result == expected + + result = df.groupby('a').describe().to_latex() + expected = ('\\begin{tabular}{lrrrrrrrr}\n\\toprule\n{} & c & ' + ' & & & & & & ' + '\\\\\n{} & count & mean & std & min & 25\\% & ' + '50\\% & 75\\% & max \\\\\na & & & ' + ' & & & & & \\\\\n\\midrule\n0 ' + '& 2.0 & 1.5 & 0.707107 & 1.0 & 1.25 & 1.5 & 1.75 ' + '& 2.0 \\\\\n1 & 2.0 & 3.5 & 0.707107 & 3.0 & 3.25 ' + '& 3.5 & 3.75 & 4.0 ' + '\\\\\n\\bottomrule\n\\end{tabular}\n') + + assert result == expected + + def test_to_latex_escape(self): + a = 'a' + b = 'b' + + test_dict = {u('co^l1'): {a: "a", + b: "b"}, + u('co$e^x$'): {a: "a", + b: "b"}} + + unescaped_result = DataFrame(test_dict).to_latex(escape=False) + escaped_result = DataFrame(test_dict).to_latex( + ) # default: escape=True + + unescaped_expected = r'''\begin{tabular}{lll} +\toprule +{} & co$e^x$ & co^l1 \\ +\midrule +a & a & a \\ +b & b & b \\ +\bottomrule +\end{tabular} +''' + + escaped_expected = r'''\begin{tabular}{lll} +\toprule +{} & co\$e\textasciicircumx\$ & co\textasciicircuml1 \\ +\midrule +a & a & a \\ +b & b & b \\ +\bottomrule +\end{tabular} +''' + + assert unescaped_result == unescaped_expected + assert escaped_result == escaped_expected + + def test_to_latex_longtable(self, frame): + frame.to_latex(longtable=True) + + df = DataFrame({'a': [1, 2], 'b': ['b1', 'b2']}) + withindex_result = df.to_latex(longtable=True) + withindex_expected = r"""\begin{longtable}{lrl} +\toprule +{} & a & b \\ +\midrule +\endhead +\midrule +\multicolumn{3}{r}{{Continued on next page}} \\ +\midrule +\endfoot + +\bottomrule +\endlastfoot +0 & 1 & b1 \\ +1 & 2 & b2 \\ +\end{longtable} +""" + + assert withindex_result == withindex_expected + + withoutindex_result = df.to_latex(index=False, longtable=True) + withoutindex_expected = r"""\begin{longtable}{rl} +\toprule + a & b \\ +\midrule +\endhead +\midrule +\multicolumn{3}{r}{{Continued on next page}} \\ +\midrule +\endfoot + +\bottomrule +\endlastfoot + 1 & b1 \\ + 2 & b2 \\ +\end{longtable} +""" + + assert withoutindex_result == withoutindex_expected + + def test_to_latex_escape_special_chars(self): + special_characters = ['&', '%', '$', '#', '_', '{', '}', '~', '^', + '\\'] + df = DataFrame(data=special_characters) + observed = df.to_latex() + expected = r"""\begin{tabular}{ll} +\toprule +{} & 0 \\ +\midrule +0 & \& \\ +1 & \% \\ +2 & \$ \\ +3 & \# \\ +4 & \_ \\ +5 & \{ \\ +6 & \} \\ +7 & \textasciitilde \\ +8 & \textasciicircum \\ +9 & \textbackslash \\ +\bottomrule +\end{tabular} +""" + + assert observed == expected + + def test_to_latex_no_header(self): + # GH 7124 + df = DataFrame({'a': [1, 2], 'b': ['b1', 'b2']}) + withindex_result = df.to_latex(header=False) + withindex_expected = r"""\begin{tabular}{lrl} +\toprule +0 & 1 & b1 \\ +1 & 2 & b2 \\ +\bottomrule +\end{tabular} +""" + + assert withindex_result == withindex_expected + + withoutindex_result = df.to_latex(index=False, header=False) + withoutindex_expected = r"""\begin{tabular}{rl} +\toprule + 1 & b1 \\ + 2 & b2 \\ +\bottomrule +\end{tabular} +""" + + assert withoutindex_result == withoutindex_expected + + def test_to_latex_decimal(self, frame): + # GH 12031 + frame.to_latex() + + df = DataFrame({'a': [1.0, 2.1], 'b': ['b1', 'b2']}) + withindex_result = df.to_latex(decimal=',') + + withindex_expected = r"""\begin{tabular}{lrl} +\toprule +{} & a & b \\ +\midrule +0 & 1,0 & b1 \\ +1 & 2,1 & b2 \\ +\bottomrule +\end{tabular} +""" + + assert withindex_result == withindex_expected From 2eb6d38ed0563318cea5f419a6eb32b211d24ff1 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 2 Mar 2017 07:50:49 -0500 Subject: [PATCH 127/353] CLN: remove deprecated irow, icol, iget, iget_value (GH10711) xref https://github.com/pandas-dev/pandas/issues/6581 Author: Joris Van den Bossche Closes #15547 from jorisvandenbossche/remove-irow-icol and squashes the following commits: 06ea1bb [Joris Van den Bossche] CLN: remove deprecated irow, icol, iget, iget_value (GH10711) --- doc/source/whatsnew/v0.20.0.txt | 2 ++ pandas/core/frame.py | 25 -------------------- pandas/core/groupby.py | 10 -------- pandas/core/series.py | 25 -------------------- pandas/tests/frame/test_indexing.py | 23 ++++-------------- pandas/tests/frame/test_nonunique_indexes.py | 2 +- pandas/tests/groupby/test_groupby.py | 16 +------------ pandas/tests/series/test_indexing.py | 16 ++----------- pandas/tests/sparse/test_frame.py | 3 +-- 9 files changed, 12 insertions(+), 110 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 6e9dfb92dfd90..dc8420080b50d 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -548,6 +548,8 @@ Removal of prior version deprecations/changes - ``pd.to_datetime`` and ``pd.to_timedelta`` have dropped the ``coerce`` parameter in favor of ``errors`` (:issue:`13602`) - ``pandas.stats.fama_macbeth``, ``pandas.stats.ols``, ``pandas.stats.plm`` and ``pandas.stats.var``, as well as the top-level ``pandas.fama_macbeth`` and ``pandas.ols`` routines are removed. Similar functionaility can be found in the `statsmodels `__ package. (:issue:`11898`) - ``Series.is_time_series`` is dropped in favor of ``Series.index.is_all_dates`` (:issue:``) +- The deprecated ``irow``, ``icol``, ``iget`` and ``iget_value`` methods are removed + in favor of ``iloc`` and ``iat`` as explained :ref:`here ` (:issue:`10711`). .. _whatsnew_0200.performance: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 021ce59e3402b..0d14f00bee508 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1916,23 +1916,6 @@ def set_value(self, index, col, value, takeable=False): return self - def irow(self, i, copy=False): - """ - DEPRECATED. Use ``.iloc[i]`` instead - """ - - warnings.warn("irow(i) is deprecated. Please use .iloc[i]", - FutureWarning, stacklevel=2) - return self._ixs(i, axis=0) - - def icol(self, i): - """ - DEPRECATED. Use ``.iloc[:, i]`` instead - """ - warnings.warn("icol(i) is deprecated. Please use .iloc[:,i]", - FutureWarning, stacklevel=2) - return self._ixs(i, axis=1) - def _ixs(self, i, axis=0): """ i : int, slice, or sequence of integers @@ -2007,14 +1990,6 @@ def _ixs(self, i, axis=0): return result - def iget_value(self, i, j): - """ - DEPRECATED. Use ``.iat[i, j]`` instead - """ - warnings.warn("iget_value(i, j) is deprecated. Please use .iat[i, j]", - FutureWarning, stacklevel=2) - return self.iat[i, j] - def __getitem__(self, key): key = com._apply_if_callable(key, self) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 381a8edcb5192..578c334781d15 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -1004,16 +1004,6 @@ class GroupBy(_GroupBy): """ _apply_whitelist = _common_apply_whitelist - def irow(self, i): - """ - DEPRECATED. Use ``.nth(i)`` instead - """ - - # 10177 - warnings.warn("irow(i) is deprecated. Please use .nth(i)", - FutureWarning, stacklevel=2) - return self.nth(i) - @Substitution(name='groupby') @Appender(_doc_template) def count(self): diff --git a/pandas/core/series.py b/pandas/core/series.py index ffe1be26fda54..1114590421fd8 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -875,31 +875,6 @@ def reshape(self, *args, **kwargs): return self._values.reshape(shape, **kwargs) - def iget_value(self, i, axis=0): - """ - DEPRECATED. Use ``.iloc[i]`` or ``.iat[i]`` instead - """ - warnings.warn("iget_value(i) is deprecated. Please use .iloc[i] or " - ".iat[i]", FutureWarning, stacklevel=2) - return self._ixs(i) - - def iget(self, i, axis=0): - """ - DEPRECATED. Use ``.iloc[i]`` or ``.iat[i]`` instead - """ - - warnings.warn("iget(i) is deprecated. Please use .iloc[i] or .iat[i]", - FutureWarning, stacklevel=2) - return self._ixs(i) - - def irow(self, i, axis=0): - """ - DEPRECATED. Use ``.iloc[i]`` or ``.iat[i]`` instead - """ - warnings.warn("irow(i) is deprecated. Please use .iloc[i] or .iat[i]", - FutureWarning, stacklevel=2) - return self._ixs(i) - def get_value(self, label, takeable=False): """ Quickly retrieve single value at passed index label diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index 18fb17b98570a..36c39ffba70b3 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -1761,13 +1761,9 @@ def test_single_element_ix_dont_upcast(self): result = df.loc[[0], "b"] assert_series_equal(result, expected) - def test_irow(self): + def test_iloc_row(self): df = DataFrame(np.random.randn(10, 4), index=lrange(0, 20, 2)) - # 10711, deprecated - with tm.assert_produces_warning(FutureWarning): - df.irow(1) - result = df.iloc[1] exp = df.loc[2] assert_series_equal(result, exp) @@ -1795,14 +1791,10 @@ def f(): expected = df.reindex(df.index[[1, 2, 4, 6]]) assert_frame_equal(result, expected) - def test_icol(self): + def test_iloc_col(self): df = DataFrame(np.random.randn(4, 10), columns=lrange(0, 20, 2)) - # 10711, deprecated - with tm.assert_produces_warning(FutureWarning): - df.icol(1) - result = df.iloc[:, 1] exp = df.loc[:, 2] assert_series_equal(result, exp) @@ -1828,8 +1820,7 @@ def f(): expected = df.reindex(columns=df.columns[[1, 2, 4, 6]]) assert_frame_equal(result, expected) - def test_irow_icol_duplicates(self): - # 10711, deprecated + def test_iloc_duplicates(self): df = DataFrame(np.random.rand(3, 3), columns=list('ABC'), index=list('aab')) @@ -1874,16 +1865,12 @@ def test_irow_icol_duplicates(self): expected = df.take([0], axis=1) assert_frame_equal(result, expected) - def test_icol_sparse_propegate_fill_value(self): + def test_iloc_sparse_propegate_fill_value(self): from pandas.sparse.api import SparseDataFrame df = SparseDataFrame({'A': [999, 1]}, default_fill_value=999) self.assertTrue(len(df['A'].sp_values) == len(df.iloc[:, 0].sp_values)) - def test_iget_value(self): - # 10711 deprecated - - with tm.assert_produces_warning(FutureWarning): - self.frame.iget_value(0, 0) + def test_iat(self): for i, row in enumerate(self.frame.index): for j, col in enumerate(self.frame.columns): diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py index d6bcb85e01910..bb7c7c2bd012d 100644 --- a/pandas/tests/frame/test_nonunique_indexes.py +++ b/pandas/tests/frame/test_nonunique_indexes.py @@ -429,7 +429,7 @@ def test_columns_with_dups(self): self.assertEqual(len(df._data._blknos), len(df.columns)) self.assertEqual(len(df._data._blklocs), len(df.columns)) - # testing iget + # testing iloc for i in range(len(df.columns)): df.iloc[:, i] diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 59cbcab23b9e7..74e8c6c45946f 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -3828,20 +3828,6 @@ def test_groupby_whitelist(self): 'mad', 'std', 'var', 'sem'] AGG_FUNCTIONS_WITH_SKIPNA = ['skew', 'mad'] - def test_groupby_whitelist_deprecations(self): - from string import ascii_lowercase - letters = np.array(list(ascii_lowercase)) - N = 10 - random_letters = letters.take(np.random.randint(0, 26, N)) - df = DataFrame({'floats': N / 10 * Series(np.random.random(N)), - 'letters': Series(random_letters)}) - - # 10711 deprecated - with tm.assert_produces_warning(FutureWarning): - df.groupby('letters').irow(0) - with tm.assert_produces_warning(FutureWarning): - df.groupby('letters').floats.irow(0) - def test_regression_whitelist_methods(self): # GH6944 @@ -3917,7 +3903,7 @@ def test_tab_completion(self): 'first', 'get_group', 'groups', 'hist', 'indices', 'last', 'max', 'mean', 'median', 'min', 'name', 'ngroups', 'nth', 'ohlc', 'plot', 'prod', 'size', 'std', 'sum', 'transform', 'var', 'sem', 'count', - 'nunique', 'head', 'irow', 'describe', 'cummax', 'quantile', + 'nunique', 'head', 'describe', 'cummax', 'quantile', 'rank', 'cumprod', 'tail', 'resample', 'cummin', 'fillna', 'cumsum', 'cumcount', 'all', 'shift', 'skew', 'bfill', 'ffill', 'take', 'tshift', 'pct_change', 'any', 'mad', 'corr', 'corrwith', diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index 8a2cc53b42938..bb77550e01f11 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -164,22 +164,10 @@ def test_getitem_get(self): result = s.get(None) self.assertIsNone(result) - def test_iget(self): + def test_iloc(self): s = Series(np.random.randn(10), index=lrange(0, 20, 2)) - # 10711, deprecated - with tm.assert_produces_warning(FutureWarning): - s.iget(1) - - # 10711, deprecated - with tm.assert_produces_warning(FutureWarning): - s.irow(1) - - # 10711, deprecated - with tm.assert_produces_warning(FutureWarning): - s.iget_value(1) - for i in range(len(s)): result = s.iloc[i] exp = s[s.index[i]] @@ -199,7 +187,7 @@ def test_iget(self): expected = s.reindex(s.index[[0, 2, 3, 4, 5]]) assert_series_equal(result, expected) - def test_iget_nonunique(self): + def test_iloc_nonunique(self): s = Series([0, 1, 2], index=[0, 1, 0]) self.assertEqual(s.iloc[2], 2) diff --git a/pandas/tests/sparse/test_frame.py b/pandas/tests/sparse/test_frame.py index e3b865492c043..b2283364a1631 100644 --- a/pandas/tests/sparse/test_frame.py +++ b/pandas/tests/sparse/test_frame.py @@ -389,8 +389,7 @@ def test_getitem(self): self.assertRaises(Exception, sdf.__getitem__, ['a', 'd']) - def test_icol(self): - # 10711 deprecated + def test_iloc(self): # 2227 result = self.frame.iloc[:, 0] From d92a75962b6b772f0befb70762cedcfbf7aecb6e Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 2 Mar 2017 08:00:14 -0500 Subject: [PATCH 128/353] DOC: revert gbq doc-strings to be in-line rather than wrapped --- pandas/core/frame.py | 35 ++++++++++++-------- pandas/io/gbq.py | 76 ++++++++++++++++++++++++++++++++++++++------ 2 files changed, 87 insertions(+), 24 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 0d14f00bee508..ff5dcb3f544ec 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -78,7 +78,7 @@ from pandas import compat from pandas.compat.numpy import function as nv from pandas.util.decorators import (deprecate_kwarg, Appender, - Substitution, docstring_wrapper) + Substitution) from pandas.util.validators import validate_bool_kwarg from pandas.tseries.period import PeriodIndex @@ -908,7 +908,26 @@ def to_gbq(self, destination_table, project_id, chunksize=10000, verbose=True, reauth=False, if_exists='fail', private_key=None): """Write a DataFrame to a Google BigQuery table. - THIS IS AN EXPERIMENTAL LIBRARY + The main method a user calls to export pandas DataFrame contents to + Google BigQuery table. + + Google BigQuery API Client Library v2 for Python is used. + Documentation is available `here + `__ + + Authentication to the Google BigQuery service is via OAuth 2.0. + + - If "private_key" is not provided: + + By default "application default credentials" are used. + + If default application credentials are not found or are restrictive, + user account credentials are used. In this case, you will be asked to + grant permissions for product name 'pandas GBQ'. + + - If "private_key" is provided: + + Service account credentials will be used to authenticate. Parameters ---------- @@ -933,8 +952,6 @@ def to_gbq(self, destination_table, project_id, chunksize=10000, Service account private key in JSON format. Can be file path or string contents. This is useful for remote server authentication (eg. jupyter iPython notebook on remote host) - - .. versionadded:: 0.17.0 """ from pandas.io import gbq @@ -5402,16 +5419,6 @@ def combineMult(self, other): _EMPTY_SERIES = Series([]) -# patch in the doc-string for to_gbq -# and bind this method -def _f(): - from pandas.io.gbq import _try_import - return _try_import().to_gbq.__doc__ - - -DataFrame.to_gbq = docstring_wrapper(DataFrame.to_gbq, _f) - - def _arrays_to_mgr(arrays, arr_names, index, columns, dtype=None): """ Segregate Series based on type and coerce into matrices. diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index 3407f51af5e83..9cfb27a92bfef 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -1,7 +1,5 @@ """ Google BigQuery support """ -from pandas.util.decorators import docstring_wrapper - def _try_import(): # since pandas is a dependency of pandas-gbq @@ -25,6 +23,72 @@ def _try_import(): def read_gbq(query, project_id=None, index_col=None, col_order=None, reauth=False, verbose=True, private_key=None, dialect='legacy', **kwargs): + r"""Load data from Google BigQuery. + + The main method a user calls to execute a Query in Google BigQuery + and read results into a pandas DataFrame. + + Google BigQuery API Client Library v2 for Python is used. + Documentation is available `here + `__ + + Authentication to the Google BigQuery service is via OAuth 2.0. + + - If "private_key" is not provided: + + By default "application default credentials" are used. + + If default application credentials are not found or are restrictive, + user account credentials are used. In this case, you will be asked to + grant permissions for product name 'pandas GBQ'. + + - If "private_key" is provided: + + Service account credentials will be used to authenticate. + + Parameters + ---------- + query : str + SQL-Like Query to return data values + project_id : str + Google BigQuery Account project ID. + index_col : str (optional) + Name of result column to use for index in results DataFrame + col_order : list(str) (optional) + List of BigQuery column names in the desired order for results + DataFrame + reauth : boolean (default False) + Force Google BigQuery to reauthenticate the user. This is useful + if multiple accounts are used. + verbose : boolean (default True) + Verbose output + private_key : str (optional) + Service account private key in JSON format. Can be file path + or string contents. This is useful for remote server + authentication (eg. jupyter iPython notebook on remote host) + + dialect : {'legacy', 'standard'}, default 'legacy' + 'legacy' : Use BigQuery's legacy SQL dialect. + 'standard' : Use BigQuery's standard SQL (beta), which is + compliant with the SQL 2011 standard. For more information + see `BigQuery SQL Reference + `__ + + **kwargs : Arbitrary keyword arguments + configuration (dict): query config parameters for job processing. + For example: + + configuration = {'query': {'useQueryCache': False}} + + For more information see `BigQuery SQL Reference + `__ + + Returns + ------- + df: DataFrame + DataFrame representing results of query + + """ pandas_gbq = _try_import() return pandas_gbq.read_gbq( query, project_id=project_id, @@ -35,10 +99,6 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, **kwargs) -read_gbq = docstring_wrapper(read_gbq, - lambda: _try_import().read_gbq.__doc__) - - def to_gbq(dataframe, destination_table, project_id, chunksize=10000, verbose=True, reauth=False, if_exists='fail', private_key=None): pandas_gbq = _try_import() @@ -46,7 +106,3 @@ def to_gbq(dataframe, destination_table, project_id, chunksize=10000, chunksize=chunksize, verbose=verbose, reauth=reauth, if_exists=if_exists, private_key=private_key) - - -to_gbq = docstring_wrapper(to_gbq, - lambda: _try_import().to_gbq.__doc__) From 37fe2c4edddbec2c08d561667897a1ef5a18771c Mon Sep 17 00:00:00 2001 From: Ben Thayer Date: Thu, 2 Mar 2017 08:16:48 -0500 Subject: [PATCH 129/353] ENH: Added FrozenList difference setop closes #15475 Author: Ben Thayer Author: bthayer2365 Closes #15506 from bthayer2365/frozen-index and squashes the following commits: 428a1b3 [Ben Thayer] Added __iadd__ test, fixed whatsnew 84ba405 [Ben Thayer] Merge branch 'master' of github.com:pandas-dev/pandas into frozen-index 8dbde1e [Ben Thayer] Rebased to upstream/master 6f6c140 [Ben Thayer] Added docstrings, depricated __iadd__, changed __add__ to use self.union() 66b3b91 [Ben Thayer] Fixed issue number 3d6cee5 [Ben Thayer] Depricated __add__ in favor of union ccd75c7 [Ben Thayer] Changed __sub__ to difference cd7de26 [Ben Thayer] Added versionadded tag in docs and renamed test_inplace to test_inplace_add for consistency 0ea8d21 [Ben Thayer] Added __isub__ and groupby example to docs 79dd958 [Ben Thayer] Updated whatsnew to reflect changes 0fc7e19 [Ben Thayer] Removed whitespace 73564ab [Ben Thayer] Added FrozenList subtraction fee7a7d [bthayer2365] Merge branch 'master' into frozen-index 6a2b48d [Ben Thayer] Added docstrings, depricated __iadd__, changed __add__ to use self.union() 2ab85cb [Ben Thayer] Fixed issue number cb95089 [Ben Thayer] Depricated __add__ in favor of union 2e43849 [Ben Thayer] Changed __sub__ to difference fdcfbbb [Ben Thayer] Added versionadded tag in docs and renamed test_inplace to test_inplace_add for consistency 2fad2f7 [Ben Thayer] Added __isub__ and groupby example to docs cd73faa [Ben Thayer] Updated whatsnew to reflect changes f6381a8 [Ben Thayer] Removed whitespace ada7cda [Ben Thayer] Added FrozenList subtraction --- doc/source/groupby.rst | 10 +++++++++ doc/source/whatsnew/v0.20.0.txt | 2 ++ pandas/indexes/frozen.py | 24 ++++++++++++++++++--- pandas/tests/indexes/test_frozen.py | 33 +++++++++++++++++++++-------- 4 files changed, 57 insertions(+), 12 deletions(-) diff --git a/doc/source/groupby.rst b/doc/source/groupby.rst index 8484ccd69a983..2d406de7c0c9b 100644 --- a/doc/source/groupby.rst +++ b/doc/source/groupby.rst @@ -126,6 +126,16 @@ We could naturally group by either the ``A`` or ``B`` columns or both: grouped = df.groupby('A') grouped = df.groupby(['A', 'B']) +.. versionadded:: 0.20 + +If we also have a MultiIndex on columns ``A`` and ``B``, we can group by all +but the specified columns. + +.. ipython:: python + + df2 = df.set_index(['A', 'B']) + grouped = df2.groupby(level=df2.index.names.difference(['B']) + These will split the DataFrame on its index (rows). We could also split by the columns: diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index dc8420080b50d..cc33a4a7ce6c6 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -28,6 +28,7 @@ New features - Integration with the ``feather-format``, including a new top-level ``pd.read_feather()`` and ``DataFrame.to_feather()`` method, see :ref:`here `. - ``.str.replace`` now accepts a callable, as replacement, which is passed to ``re.sub`` (:issue:`15055`) +- ``FrozenList`` has gained the ``.difference()`` setop method (:issue:`15475`) @@ -534,6 +535,7 @@ Deprecations - ``Series.sortlevel`` and ``DataFrame.sortlevel`` have been deprecated in favor of ``Series.sort_index`` and ``DataFrame.sort_index`` (:issue:`15099`) - importing ``concat`` from ``pandas.tools.merge`` has been deprecated in favor of imports from the ``pandas`` namespace. This should only affect explict imports (:issue:`15358`) - ``Series/DataFrame/Panel.consolidate()`` been deprecated as a public method. (:issue:`15483`) +- ``FrozenList`` addition (new object and inplace) have been deprecated in favor of the ``.union()`` method. (:issue: `15475`) .. _whatsnew_0200.prior_deprecations: diff --git a/pandas/indexes/frozen.py b/pandas/indexes/frozen.py index e043ba64bbad7..47e2557333ec7 100644 --- a/pandas/indexes/frozen.py +++ b/pandas/indexes/frozen.py @@ -13,6 +13,8 @@ from pandas.types.cast import _coerce_indexer_dtype from pandas.formats.printing import pprint_thing +import warnings + class FrozenList(PandasObject, list): @@ -25,11 +27,14 @@ class FrozenList(PandasObject, list): # typechecks def __add__(self, other): + warnings.warn("__add__ is deprecated, use union(...)", FutureWarning) + return self.union(other) + + def __iadd__(self, other): + warnings.warn("__iadd__ is deprecated, use union(...)", FutureWarning) if isinstance(other, tuple): other = list(other) - return self.__class__(super(FrozenList, self).__add__(other)) - - __iadd__ = __add__ + return super(FrozenList, self).__iadd__(other) # Python 2 compat def __getslice__(self, i, j): @@ -80,6 +85,19 @@ def __repr__(self): __setitem__ = __setslice__ = __delitem__ = __delslice__ = _disabled pop = append = extend = remove = sort = insert = _disabled + def union(self, other): + """Returns a FrozenList with other concatenated to the end of self""" + if isinstance(other, tuple): + other = list(other) + return self.__class__(super(FrozenList, self).__add__(other)) + + def difference(self, other): + """Returns a FrozenList with the same elements as self, but with elements + that are also in other removed.""" + other = set(other) + temp = [x for x in self if x not in other] + return self.__class__(temp) + class FrozenNDArray(PandasObject, np.ndarray): diff --git a/pandas/tests/indexes/test_frozen.py b/pandas/tests/indexes/test_frozen.py index a82409fbf9513..a5fbf066adc83 100644 --- a/pandas/tests/indexes/test_frozen.py +++ b/pandas/tests/indexes/test_frozen.py @@ -15,20 +15,35 @@ def setUp(self): self.klass = FrozenList def test_add(self): - result = self.container + (1, 2, 3) + q = FrozenList([1]) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + q = q + [2, 3] + expected = FrozenList([1, 2, 3]) + self.check_result(q, expected) + + def test_iadd(self): + q = FrozenList([1]) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + q += [2, 3] + expected = FrozenList([1, 2, 3]) + self.check_result(q, expected) + + def test_union(self): + result = self.container.union((1, 2, 3)) expected = FrozenList(self.lst + [1, 2, 3]) self.check_result(result, expected) - result = (1, 2, 3) + self.container - expected = FrozenList([1, 2, 3] + self.lst) + def test_difference(self): + result = self.container.difference([2]) + expected = FrozenList([1, 3, 4, 5]) self.check_result(result, expected) - def test_inplace(self): - q = r = self.container - q += [5] - self.check_result(q, self.lst + [5]) - # other shouldn't be mutated - self.check_result(r, self.lst) + def test_difference_dupe(self): + result = FrozenList([1, 2, 3, 2]).difference([2]) + expected = FrozenList([1, 3]) + self.check_result(result, expected) class TestFrozenNDArray(CheckImmutable, CheckStringMixin, tm.TestCase): From f000a4eac361737c6524ca2273c158e8d3b04ab2 Mon Sep 17 00:00:00 2001 From: Amol Kahat Date: Thu, 2 Mar 2017 08:33:42 -0500 Subject: [PATCH 130/353] BUG: Fix index for datetime64 conversion. Fixes #13937 closes #13937 Author: Amol Kahat Closes #14446 from amolkahat/bug_fixes and squashes the following commits: 3806983 [Amol Kahat] Modify test cases. --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/frame.py | 4 ++-- pandas/tests/frame/test_convert_to.py | 24 ++++++++++++++++++------ 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index cc33a4a7ce6c6..dca4f890e496b 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -612,6 +612,7 @@ Bug Fixes - Bug in ``GroupBy.get_group()`` failing with a categorical grouper (:issue:`15155`) - Bug in ``pandas.tools.utils.cartesian_product()`` with large input can cause overflow on windows (:issue:`15265`) +- Bug in ``DataFrame.to_records()`` with converting a ``DatetimeIndex`` with a timezone (:issue:`13937`) - Bug in ``.groupby(...).rolling(...)`` when ``on`` is specified and using a ``DatetimeIndex`` (:issue:`15130`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ff5dcb3f544ec..26a0a91094e7d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -36,7 +36,7 @@ is_object_dtype, is_extension_type, is_datetimetz, - is_datetime64_dtype, + is_datetime64_any_dtype, is_datetime64tz_dtype, is_bool_dtype, is_integer_dtype, @@ -1103,7 +1103,7 @@ def to_records(self, index=True, convert_datetime64=True): y : recarray """ if index: - if is_datetime64_dtype(self.index) and convert_datetime64: + if is_datetime64_any_dtype(self.index) and convert_datetime64: ix_vals = [self.index.to_pydatetime()] else: if isinstance(self.index, MultiIndex): diff --git a/pandas/tests/frame/test_convert_to.py b/pandas/tests/frame/test_convert_to.py index 0dde113dd5147..8323d5ed9069f 100644 --- a/pandas/tests/frame/test_convert_to.py +++ b/pandas/tests/frame/test_convert_to.py @@ -1,8 +1,6 @@ # -*- coding: utf-8 -*- -from __future__ import print_function - -from numpy import nan +import pytest import numpy as np from pandas import compat @@ -10,7 +8,6 @@ date_range) import pandas.util.testing as tm - from pandas.tests.frame.common import TestData @@ -41,13 +38,13 @@ def test_to_dict(self): recons_data = DataFrame(test_data).to_dict("sp") expected_split = {'columns': ['A', 'B'], 'index': ['1', '2', '3'], - 'data': [[1.0, '1'], [2.0, '2'], [nan, '3']]} + 'data': [[1.0, '1'], [2.0, '2'], [np.nan, '3']]} tm.assert_dict_equal(recons_data, expected_split) recons_data = DataFrame(test_data).to_dict("r") expected_records = [{'A': 1.0, 'B': '1'}, {'A': 2.0, 'B': '2'}, - {'A': nan, 'B': '3'}] + {'A': np.nan, 'B': '3'}] tm.assertIsInstance(recons_data, list) self.assertEqual(len(recons_data), 3) for l, r in zip(recons_data, expected_records): @@ -192,3 +189,18 @@ def test_to_records_with_unicode_column_names(self): "formats": [' Date: Thu, 2 Mar 2017 09:23:58 -0500 Subject: [PATCH 131/353] TST: remove deprecated usages of FrozenList.__add__ from test code xref #15506 --- pandas/core/panel.py | 6 +++--- pandas/core/reshape.py | 6 +++--- pandas/core/strings.py | 2 +- pandas/tests/groupby/test_value_counts.py | 2 +- pandas/tools/concat.py | 2 +- test_fast.sh | 2 +- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 4a6c6cf291316..c5ea513223dce 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -940,9 +940,9 @@ def construct_index_parts(idx, major=True): minor_labels, minor_levels, minor_names = construct_index_parts( self.minor_axis, major=False) - levels = major_levels + minor_levels - labels = major_labels + minor_labels - names = major_names + minor_names + levels = list(major_levels) + list(minor_levels) + labels = list(major_labels) + list(minor_labels) + names = list(major_names) + list(minor_names) index = MultiIndex(levels=levels, labels=labels, names=names, verify_integrity=False) diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index 87cb088c2e91e..faad6c500a21f 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -216,8 +216,8 @@ def get_new_columns(self): width = len(self.value_columns) propagator = np.repeat(np.arange(width), stride) if isinstance(self.value_columns, MultiIndex): - new_levels = self.value_columns.levels + (self.removed_level,) - new_names = self.value_columns.names + (self.removed_name,) + new_levels = self.value_columns.levels.union((self.removed_level,)) + new_names = self.value_columns.names.union((self.removed_name,)) new_labels = [lab.take(propagator) for lab in self.value_columns.labels] @@ -806,7 +806,7 @@ def melt(frame, id_vars=None, value_vars=None, var_name=None, for col in id_vars: mdata[col] = np.tile(frame.pop(col).values, K) - mcolumns = id_vars + var_name + [value_name] + mcolumns = list(id_vars) + list(var_name) + list([value_name]) mdata[value_name] = frame.values.ravel('F') for i, col in enumerate(var_name): diff --git a/pandas/core/strings.py b/pandas/core/strings.py index ac8d1db6a0bf3..51016926d6909 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -787,7 +787,7 @@ def str_extractall(arr, pat, flags=0): if 0 < len(index_list): from pandas import MultiIndex index = MultiIndex.from_tuples( - index_list, names=arr.index.names + ["match"]) + index_list, names=arr.index.names.union(["match"])) else: index = None result = arr._constructor_expanddim(match_list, index=index, diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py index 801d0da070112..ff01df2693c7c 100644 --- a/pandas/tests/groupby/test_value_counts.py +++ b/pandas/tests/groupby/test_value_counts.py @@ -28,7 +28,7 @@ def check_value_counts(df, keys, bins): gr = df.groupby(keys, sort=isort) right = gr['3rd'].apply(Series.value_counts, **kwargs) - right.index.names = right.index.names[:-1] + ['3rd'] + right.index.names = right.index.names[:-1].union(['3rd']) # have to sort on index because of unstable sort on values left, right = map(rebuild_index, (left, right)) # xref GH9212 diff --git a/pandas/tools/concat.py b/pandas/tools/concat.py index 6405106118472..ae9d7af9d98ff 100644 --- a/pandas/tools/concat.py +++ b/pandas/tools/concat.py @@ -574,7 +574,7 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None): " not have the same number of levels") # also copies - names = names + _get_consensus_names(indexes) + names = list(names) + list(_get_consensus_names(indexes)) return MultiIndex(levels=levels, labels=label_list, names=names, verify_integrity=False) diff --git a/test_fast.sh b/test_fast.sh index 30ac7f84cbe8b..f22ab73277e8b 100755 --- a/test_fast.sh +++ b/test_fast.sh @@ -5,4 +5,4 @@ # https://github.com/pytest-dev/pytest/issues/1075 export PYTHONHASHSEED=$(python -c 'import random; print(random.randint(1, 4294967295))') -pytest pandas --skip-slow --skip-network -m "not single" -n 4 +pytest pandas --skip-slow --skip-network -m "not single" -n 4 $@ From 211ecd5d829e6ff9019261680f1d4e6f1b193a13 Mon Sep 17 00:00:00 2001 From: manuels Date: Thu, 2 Mar 2017 18:20:40 -0500 Subject: [PATCH 132/353] Make Series.map() documentation a bit more verbose Author: manuels Closes #15235 from manuels/patch-1 and squashes the following commits: c5113f2 [manuels] Make Series.map() documentation a bit more verbose --- pandas/core/series.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 1114590421fd8..626a4a81193cc 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2089,13 +2089,15 @@ def map(self, arg, na_action=None): Examples -------- - Map inputs to outputs + Map inputs to outputs (both of type `Series`) + >>> x = pd.Series([1,2,3], index=['one', 'two', 'three']) >>> x one 1 two 2 three 3 + >>> y = pd.Series(['foo', 'bar', 'baz'], index=[1,2,3]) >>> y 1 foo 2 bar @@ -2106,6 +2108,16 @@ def map(self, arg, na_action=None): two bar three baz + Mapping a dictionary keys on the index labels works similar as + with a `Series`: + + >>> z = {1: 'A', 2: 'B', 3: 'C'} + + >>> x.map(z) + one A + two B + three C + Use na_action to control whether NA values are affected by the mapping function. @@ -2127,6 +2139,11 @@ def map(self, arg, na_action=None): 3 NaN dtype: object + See Also + -------- + Series.apply: For applying more complex functions on a Series + DataFrame.apply: Apply a function row-/column-wise + DataFrame.applymap: Apply a function elementwise on a whole DataFrame """ if is_extension_type(self.dtype): From 24a2155eec4a24242cdecd9ddd7e61d02d8d6aeb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Gs=C3=A4nger?= Date: Fri, 3 Mar 2017 10:16:45 +0100 Subject: [PATCH 133/353] ENH: Added multicolumn/multirow support for latex (#14184) closes #13508 Print names of MultiIndex columns. Added "multicolumn" and "multirow" flags to to_latex which trigger the corresponding feature. "multicolumn_format" is used to select alignment. Multirow adds clines to visually separate sections. --- doc/source/options.rst | 295 +++++++++++++------------- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/config_init.py | 31 ++- pandas/core/frame.py | 46 +++- pandas/formats/format.py | 115 +++++++++- pandas/tests/formats/test_to_latex.py | 115 +++++++++- 6 files changed, 433 insertions(+), 170 deletions(-) diff --git a/doc/source/options.rst b/doc/source/options.rst index 77cac6d495d13..10a13ed36df8d 100644 --- a/doc/source/options.rst +++ b/doc/source/options.rst @@ -273,151 +273,156 @@ Options are 'right', and 'left'. Available Options ----------------- -========================== ============ ================================== -Option Default Function -========================== ============ ================================== -display.chop_threshold None If set to a float value, all float - values smaller then the given - threshold will be displayed as - exactly 0 by repr and friends. -display.colheader_justify right Controls the justification of - column headers. used by DataFrameFormatter. -display.column_space 12 No description available. -display.date_dayfirst False When True, prints and parses dates - with the day first, eg 20/01/2005 -display.date_yearfirst False When True, prints and parses dates - with the year first, eg 2005/01/20 -display.encoding UTF-8 Defaults to the detected encoding - of the console. Specifies the encoding - to be used for strings returned by - to_string, these are generally strings - meant to be displayed on the console. -display.expand_frame_repr True Whether to print out the full DataFrame - repr for wide DataFrames across - multiple lines, `max_columns` is - still respected, but the output will - wrap-around across multiple "pages" - if its width exceeds `display.width`. -display.float_format None The callable should accept a floating - point number and return a string with - the desired format of the number. - This is used in some places like - SeriesFormatter. - See core.format.EngFormatter for an example. -display.height 60 Deprecated. Use `display.max_rows` instead. -display.large_repr truncate For DataFrames exceeding max_rows/max_cols, - the repr (and HTML repr) can show - a truncated table (the default from 0.13), - or switch to the view from df.info() - (the behaviour in earlier versions of pandas). - allowable settings, ['truncate', 'info'] -display.latex.repr False Whether to produce a latex DataFrame - representation for jupyter frontends - that support it. -display.latex.escape True Escapes special caracters in Dataframes, when - using the to_latex method. -display.latex.longtable False Specifies if the to_latex method of a Dataframe - uses the longtable format. -display.line_width 80 Deprecated. Use `display.width` instead. -display.max_columns 20 max_rows and max_columns are used - in __repr__() methods to decide if - to_string() or info() is used to - render an object to a string. In - case python/IPython is running in - a terminal this can be set to 0 and - pandas will correctly auto-detect - the width the terminal and swap to - a smaller format in case all columns - would not fit vertically. The IPython - notebook, IPython qtconsole, or IDLE - do not run in a terminal and hence - it is not possible to do correct - auto-detection. 'None' value means - unlimited. -display.max_colwidth 50 The maximum width in characters of - a column in the repr of a pandas - data structure. When the column overflows, - a "..." placeholder is embedded in - the output. -display.max_info_columns 100 max_info_columns is used in DataFrame.info - method to decide if per column information - will be printed. -display.max_info_rows 1690785 df.info() will usually show null-counts - for each column. For large frames - this can be quite slow. max_info_rows - and max_info_cols limit this null - check only to frames with smaller - dimensions then specified. -display.max_rows 60 This sets the maximum number of rows - pandas should output when printing - out various output. For example, - this value determines whether the - repr() for a dataframe prints out - fully or just a summary repr. - 'None' value means unlimited. -display.max_seq_items 100 when pretty-printing a long sequence, - no more then `max_seq_items` will - be printed. If items are omitted, - they will be denoted by the addition - of "..." to the resulting string. - If set to None, the number of items - to be printed is unlimited. -display.memory_usage True This specifies if the memory usage of - a DataFrame should be displayed when the - df.info() method is invoked. -display.multi_sparse True "Sparsify" MultiIndex display (don't - display repeated elements in outer - levels within groups) -display.notebook_repr_html True When True, IPython notebook will - use html representation for - pandas objects (if it is available). -display.pprint_nest_depth 3 Controls the number of nested levels - to process when pretty-printing -display.precision 6 Floating point output precision in - terms of number of places after the - decimal, for regular formatting as well - as scientific notation. Similar to - numpy's ``precision`` print option -display.show_dimensions truncate Whether to print out dimensions - at the end of DataFrame repr. - If 'truncate' is specified, only - print out the dimensions if the - frame is truncated (e.g. not display - all rows and/or columns) -display.width 80 Width of the display in characters. - In case python/IPython is running in - a terminal this can be set to None - and pandas will correctly auto-detect - the width. Note that the IPython notebook, - IPython qtconsole, or IDLE do not run in a - terminal and hence it is not possible - to correctly detect the width. -html.border 1 A ``border=value`` attribute is - inserted in the ```` tag - for the DataFrame HTML repr. -io.excel.xls.writer xlwt The default Excel writer engine for - 'xls' files. -io.excel.xlsm.writer openpyxl The default Excel writer engine for - 'xlsm' files. Available options: - 'openpyxl' (the default). -io.excel.xlsx.writer openpyxl The default Excel writer engine for - 'xlsx' files. -io.hdf.default_format None default format writing format, if - None, then put will default to - 'fixed' and append will default to - 'table' -io.hdf.dropna_table True drop ALL nan rows when appending - to a table -mode.chained_assignment warn Raise an exception, warn, or no - action if trying to use chained - assignment, The default is warn -mode.sim_interactive False Whether to simulate interactive mode - for purposes of testing -mode.use_inf_as_null False True means treat None, NaN, -INF, - INF as null (old way), False means - None and NaN are null, but INF, -INF - are not null (new way). -========================== ============ ================================== +=================================== ============ ================================== +Option Default Function +=================================== ============ ================================== +display.chop_threshold None If set to a float value, all float + values smaller then the given + threshold will be displayed as + exactly 0 by repr and friends. +display.colheader_justify right Controls the justification of + column headers. used by DataFrameFormatter. +display.column_space 12 No description available. +display.date_dayfirst False When True, prints and parses dates + with the day first, eg 20/01/2005 +display.date_yearfirst False When True, prints and parses dates + with the year first, eg 2005/01/20 +display.encoding UTF-8 Defaults to the detected encoding + of the console. Specifies the encoding + to be used for strings returned by + to_string, these are generally strings + meant to be displayed on the console. +display.expand_frame_repr True Whether to print out the full DataFrame + repr for wide DataFrames across + multiple lines, `max_columns` is + still respected, but the output will + wrap-around across multiple "pages" + if its width exceeds `display.width`. +display.float_format None The callable should accept a floating + point number and return a string with + the desired format of the number. + This is used in some places like + SeriesFormatter. + See core.format.EngFormatter for an example. +display.height 60 Deprecated. Use `display.max_rows` instead. +display.large_repr truncate For DataFrames exceeding max_rows/max_cols, + the repr (and HTML repr) can show + a truncated table (the default from 0.13), + or switch to the view from df.info() + (the behaviour in earlier versions of pandas). + allowable settings, ['truncate', 'info'] +display.latex.repr False Whether to produce a latex DataFrame + representation for jupyter frontends + that support it. +display.latex.escape True Escapes special caracters in Dataframes, when + using the to_latex method. +display.latex.longtable False Specifies if the to_latex method of a Dataframe + uses the longtable format. +display.latex.multicolumn True Combines columns when using a MultiIndex +display.latex.multicolumn_format 'l' Alignment of multicolumn labels +display.latex.multirow False Combines rows when using a MultiIndex. + Centered instead of top-aligned, + separated by clines. +display.line_width 80 Deprecated. Use `display.width` instead. +display.max_columns 20 max_rows and max_columns are used + in __repr__() methods to decide if + to_string() or info() is used to + render an object to a string. In + case python/IPython is running in + a terminal this can be set to 0 and + pandas will correctly auto-detect + the width the terminal and swap to + a smaller format in case all columns + would not fit vertically. The IPython + notebook, IPython qtconsole, or IDLE + do not run in a terminal and hence + it is not possible to do correct + auto-detection. 'None' value means + unlimited. +display.max_colwidth 50 The maximum width in characters of + a column in the repr of a pandas + data structure. When the column overflows, + a "..." placeholder is embedded in + the output. +display.max_info_columns 100 max_info_columns is used in DataFrame.info + method to decide if per column information + will be printed. +display.max_info_rows 1690785 df.info() will usually show null-counts + for each column. For large frames + this can be quite slow. max_info_rows + and max_info_cols limit this null + check only to frames with smaller + dimensions then specified. +display.max_rows 60 This sets the maximum number of rows + pandas should output when printing + out various output. For example, + this value determines whether the + repr() for a dataframe prints out + fully or just a summary repr. + 'None' value means unlimited. +display.max_seq_items 100 when pretty-printing a long sequence, + no more then `max_seq_items` will + be printed. If items are omitted, + they will be denoted by the addition + of "..." to the resulting string. + If set to None, the number of items + to be printed is unlimited. +display.memory_usage True This specifies if the memory usage of + a DataFrame should be displayed when the + df.info() method is invoked. +display.multi_sparse True "Sparsify" MultiIndex display (don't + display repeated elements in outer + levels within groups) +display.notebook_repr_html True When True, IPython notebook will + use html representation for + pandas objects (if it is available). +display.pprint_nest_depth 3 Controls the number of nested levels + to process when pretty-printing +display.precision 6 Floating point output precision in + terms of number of places after the + decimal, for regular formatting as well + as scientific notation. Similar to + numpy's ``precision`` print option +display.show_dimensions truncate Whether to print out dimensions + at the end of DataFrame repr. + If 'truncate' is specified, only + print out the dimensions if the + frame is truncated (e.g. not display + all rows and/or columns) +display.width 80 Width of the display in characters. + In case python/IPython is running in + a terminal this can be set to None + and pandas will correctly auto-detect + the width. Note that the IPython notebook, + IPython qtconsole, or IDLE do not run in a + terminal and hence it is not possible + to correctly detect the width. +html.border 1 A ``border=value`` attribute is + inserted in the ``
`` tag + for the DataFrame HTML repr. +io.excel.xls.writer xlwt The default Excel writer engine for + 'xls' files. +io.excel.xlsm.writer openpyxl The default Excel writer engine for + 'xlsm' files. Available options: + 'openpyxl' (the default). +io.excel.xlsx.writer openpyxl The default Excel writer engine for + 'xlsx' files. +io.hdf.default_format None default format writing format, if + None, then put will default to + 'fixed' and append will default to + 'table' +io.hdf.dropna_table True drop ALL nan rows when appending + to a table +mode.chained_assignment warn Raise an exception, warn, or no + action if trying to use chained + assignment, The default is warn +mode.sim_interactive False Whether to simulate interactive mode + for purposes of testing +mode.use_inf_as_null False True means treat None, NaN, -INF, + INF as null (old way), False means + None and NaN are null, but INF, -INF + are not null (new way). +=================================== ============ ================================== .. _basics.console_output: diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index dca4f890e496b..0991f3873b06f 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -182,6 +182,7 @@ Other enhancements - ``Timedelta.isoformat`` method added for formatting Timedeltas as an `ISO 8601 duration`_. See the :ref:`Timedelta docs ` (:issue:`15136`) - ``pandas.io.json.json_normalize()`` gained the option ``errors='ignore'|'raise'``; the default is ``errors='raise'`` which is backward compatible. (:issue:`14583`) - ``.select_dtypes()`` now allows the string 'datetimetz' to generically select datetimes with tz (:issue:`14910`) +- The ``.to_latex()`` method will now accept ``multicolumn`` and ``multirow`` arguments to use the accompanying LaTeX enhancements - ``pd.merge_asof()`` gained the option ``direction='backward'|'forward'|'nearest'`` (:issue:`14887`) - ``Series/DataFrame.asfreq()`` have gained a ``fill_value`` parameter, to fill missing values (:issue:`3715`). - ``Series/DataFrame.resample.asfreq`` have gained a ``fill_value`` parameter, to fill missing values during resampling (:issue:`3715`). diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index d3db633f3aa04..89616890e1de1 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -239,14 +239,35 @@ : bool This specifies if the to_latex method of a Dataframe uses escapes special characters. - method. Valid values: False,True + Valid values: False,True """ pc_latex_longtable = """ :bool This specifies if the to_latex method of a Dataframe uses the longtable format. - method. Valid values: False,True + Valid values: False,True +""" + +pc_latex_multicolumn = """ +: bool + This specifies if the to_latex method of a Dataframe uses multicolumns + to pretty-print MultiIndex columns. + Valid values: False,True +""" + +pc_latex_multicolumn_format = """ +: string + This specifies the format for multicolumn headers. + Can be surrounded with '|'. + Valid values: 'l', 'c', 'r', 'p{}' +""" + +pc_latex_multirow = """ +: bool + This specifies if the to_latex method of a Dataframe uses multirows + to pretty-print MultiIndex rows. + Valid values: False,True """ style_backup = dict() @@ -339,6 +360,12 @@ def mpl_style_cb(key): validator=is_bool) cf.register_option('latex.longtable', False, pc_latex_longtable, validator=is_bool) + cf.register_option('latex.multicolumn', True, pc_latex_multicolumn, + validator=is_bool) + cf.register_option('latex.multicolumn_format', 'l', pc_latex_multicolumn, + validator=is_text) + cf.register_option('latex.multirow', False, pc_latex_multirow, + validator=is_bool) cf.deprecate_option('display.line_width', msg=pc_line_width_deprecation_warning, diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 26a0a91094e7d..b3e43edc3eb55 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1614,10 +1614,11 @@ def to_latex(self, buf=None, columns=None, col_space=None, header=True, index=True, na_rep='NaN', formatters=None, float_format=None, sparsify=None, index_names=True, bold_rows=True, column_format=None, longtable=None, escape=None, - encoding=None, decimal='.'): - """ + encoding=None, decimal='.', multicolumn=None, + multicolumn_format=None, multirow=None): + r""" Render a DataFrame to a tabular environment table. You can splice - this into a LaTeX document. Requires \\usepackage{booktabs}. + this into a LaTeX document. Requires \usepackage{booktabs}. `to_latex`-specific options: @@ -1628,27 +1629,54 @@ def to_latex(self, buf=None, columns=None, col_space=None, header=True, `__ e.g 'rcl' for 3 columns longtable : boolean, default will be read from the pandas config module - default: False + Default: False. Use a longtable environment instead of tabular. Requires adding - a \\usepackage{longtable} to your LaTeX preamble. + a \usepackage{longtable} to your LaTeX preamble. escape : boolean, default will be read from the pandas config module - default: True + Default: True. When set to False prevents from escaping latex special characters in column names. encoding : str, default None A string representing the encoding to use in the output file, defaults to 'ascii' on Python 2 and 'utf-8' on Python 3. decimal : string, default '.' - Character recognized as decimal separator, e.g. ',' in Europe + Character recognized as decimal separator, e.g. ',' in Europe. .. versionadded:: 0.18.0 + multicolumn : boolean, default True + Use \multicolumn to enhance MultiIndex columns. + The default will be read from the config module. + + .. versionadded:: 0.20.0 + + multicolumn_format : str, default 'l' + The alignment for multicolumns, similar to `column_format` + The default will be read from the config module. + + .. versionadded:: 0.20.0 + + multirow : boolean, default False + Use \multirow to enhance MultiIndex rows. + Requires adding a \usepackage{multirow} to your LaTeX preamble. + Will print centered labels (instead of top-aligned) + across the contained rows, separating groups via clines. + The default will be read from the pandas config module. + + .. versionadded:: 0.20.0 + """ # Get defaults from the pandas config if longtable is None: longtable = get_option("display.latex.longtable") if escape is None: escape = get_option("display.latex.escape") + if multicolumn is None: + multicolumn = get_option("display.latex.multicolumn") + if multicolumn_format is None: + multicolumn_format = get_option("display.latex.multicolumn_format") + if multirow is None: + multirow = get_option("display.latex.multirow") formatter = fmt.DataFrameFormatter(self, buf=buf, columns=columns, col_space=col_space, na_rep=na_rep, @@ -1660,7 +1688,9 @@ def to_latex(self, buf=None, columns=None, col_space=None, header=True, index_names=index_names, escape=escape, decimal=decimal) formatter.to_latex(column_format=column_format, longtable=longtable, - encoding=encoding) + encoding=encoding, multicolumn=multicolumn, + multicolumn_format=multicolumn_format, + multirow=multirow) if buf is None: return formatter.buf.getvalue() diff --git a/pandas/formats/format.py b/pandas/formats/format.py index 4c081770e0125..9dde3b0001c31 100644 --- a/pandas/formats/format.py +++ b/pandas/formats/format.py @@ -650,13 +650,17 @@ def _join_multiline(self, *strcols): st = ed return '\n\n'.join(str_lst) - def to_latex(self, column_format=None, longtable=False, encoding=None): + def to_latex(self, column_format=None, longtable=False, encoding=None, + multicolumn=False, multicolumn_format=None, multirow=False): """ Render a DataFrame to a LaTeX tabular/longtable environment output. """ latex_renderer = LatexFormatter(self, column_format=column_format, - longtable=longtable) + longtable=longtable, + multicolumn=multicolumn, + multicolumn_format=multicolumn_format, + multirow=multirow) if encoding is None: encoding = 'ascii' if compat.PY2 else 'utf-8' @@ -824,11 +828,15 @@ class LatexFormatter(TableFormatter): HTMLFormatter """ - def __init__(self, formatter, column_format=None, longtable=False): + def __init__(self, formatter, column_format=None, longtable=False, + multicolumn=False, multicolumn_format=None, multirow=False): self.fmt = formatter self.frame = self.fmt.frame self.column_format = column_format self.longtable = longtable + self.multicolumn = multicolumn + self.multicolumn_format = multicolumn_format + self.multirow = multirow def write_result(self, buf): """ @@ -850,14 +858,21 @@ def get_col_type(dtype): else: return 'l' + # reestablish the MultiIndex that has been joined by _to_str_column if self.fmt.index and isinstance(self.frame.index, MultiIndex): clevels = self.frame.columns.nlevels strcols.pop(0) name = any(self.frame.index.names) + cname = any(self.frame.columns.names) + lastcol = self.frame.index.nlevels - 1 for i, lev in enumerate(self.frame.index.levels): lev2 = lev.format() blank = ' ' * len(lev2[0]) - lev3 = [blank] * clevels + # display column names in last index-column + if cname and i == lastcol: + lev3 = [x if x else '{}' for x in self.frame.columns.names] + else: + lev3 = [blank] * clevels if name: lev3.append(lev.name) for level_idx, group in itertools.groupby( @@ -885,10 +900,15 @@ def get_col_type(dtype): buf.write('\\begin{longtable}{%s}\n' % column_format) buf.write('\\toprule\n') - nlevels = self.frame.columns.nlevels + ilevels = self.frame.index.nlevels + clevels = self.frame.columns.nlevels + nlevels = clevels if any(self.frame.index.names): nlevels += 1 - for i, row in enumerate(zip(*strcols)): + strrows = list(zip(*strcols)) + self.clinebuf = [] + + for i, row in enumerate(strrows): if i == nlevels and self.fmt.header: buf.write('\\midrule\n') # End of header if self.longtable: @@ -910,8 +930,17 @@ def get_col_type(dtype): if x else '{}') for x in row] else: crow = [x if x else '{}' for x in row] + if i < clevels and self.fmt.header and self.multicolumn: + # sum up columns to multicolumns + crow = self._format_multicolumn(crow, ilevels) + if (i >= nlevels and self.fmt.index and self.multirow and + ilevels > 1): + # sum up rows to multirows + crow = self._format_multirow(crow, ilevels, i, strrows) buf.write(' & '.join(crow)) buf.write(' \\\\\n') + if self.multirow and i < len(strrows) - 1: + self._print_cline(buf, i, len(strcols)) if not self.longtable: buf.write('\\bottomrule\n') @@ -919,6 +948,80 @@ def get_col_type(dtype): else: buf.write('\\end{longtable}\n') + def _format_multicolumn(self, row, ilevels): + """ + Combine columns belonging to a group to a single multicolumn entry + according to self.multicolumn_format + + e.g.: + a & & & b & c & + will become + \multicolumn{3}{l}{a} & b & \multicolumn{2}{l}{c} + """ + row2 = list(row[:ilevels]) + ncol = 1 + coltext = '' + + def append_col(): + # write multicolumn if needed + if ncol > 1: + row2.append('\\multicolumn{{{0:d}}}{{{1:s}}}{{{2:s}}}' + .format(ncol, self.multicolumn_format, + coltext.strip())) + # don't modify where not needed + else: + row2.append(coltext) + for c in row[ilevels:]: + # if next col has text, write the previous + if c.strip(): + if coltext: + append_col() + coltext = c + ncol = 1 + # if not, add it to the previous multicolumn + else: + ncol += 1 + # write last column name + if coltext: + append_col() + return row2 + + def _format_multirow(self, row, ilevels, i, rows): + """ + Check following rows, whether row should be a multirow + + e.g.: becomes: + a & 0 & \multirow{2}{*}{a} & 0 & + & 1 & & 1 & + b & 0 & \cline{1-2} + b & 0 & + """ + for j in range(ilevels): + if row[j].strip(): + nrow = 1 + for r in rows[i + 1:]: + if not r[j].strip(): + nrow += 1 + else: + break + if nrow > 1: + # overwrite non-multirow entry + row[j] = '\\multirow{{{0:d}}}{{*}}{{{1:s}}}'.format( + nrow, row[j].strip()) + # save when to end the current block with \cline + self.clinebuf.append([i + nrow - 1, j + 1]) + return row + + def _print_cline(self, buf, i, icol): + """ + Print clines after multirow-blocks are finished + """ + for cl in self.clinebuf: + if cl[0] == i: + buf.write('\cline{{{0:d}-{1:d}}}\n'.format(cl[1], icol)) + # remove entries that have been written to buffer + self.clinebuf = [x for x in self.clinebuf if x[0] != i] + class HTMLFormatter(TableFormatter): diff --git a/pandas/tests/formats/test_to_latex.py b/pandas/tests/formats/test_to_latex.py index 89e18e1cec06e..17e1e18f03dd6 100644 --- a/pandas/tests/formats/test_to_latex.py +++ b/pandas/tests/formats/test_to_latex.py @@ -168,6 +168,24 @@ def test_to_latex_multiindex(self): assert result == expected + # GH 14184 + df = df.T + df.columns.names = ['a', 'b'] + result = df.to_latex() + expected = r"""\begin{tabular}{lrrrrr} +\toprule +a & \multicolumn{2}{l}{c1} & \multicolumn{2}{l}{c2} & c3 \\ +b & 0 & 1 & 0 & 1 & 0 \\ +\midrule +0 & 0 & 4 & 0 & 4 & 0 \\ +1 & 1 & 5 & 1 & 5 & 1 \\ +2 & 2 & 6 & 2 & 6 & 2 \\ +3 & 3 & 7 & 3 & 7 & 3 \\ +\bottomrule +\end{tabular} +""" + assert result == expected + # GH 10660 df = pd.DataFrame({'a': [0, 0, 1, 1], 'b': list('abab'), @@ -189,16 +207,95 @@ def test_to_latex_multiindex(self): assert result == expected result = df.groupby('a').describe().to_latex() - expected = ('\\begin{tabular}{lrrrrrrrr}\n\\toprule\n{} & c & ' - ' & & & & & & ' - '\\\\\n{} & count & mean & std & min & 25\\% & ' - '50\\% & 75\\% & max \\\\\na & & & ' - ' & & & & & \\\\\n\\midrule\n0 ' - '& 2.0 & 1.5 & 0.707107 & 1.0 & 1.25 & 1.5 & 1.75 ' - '& 2.0 \\\\\n1 & 2.0 & 3.5 & 0.707107 & 3.0 & 3.25 ' - '& 3.5 & 3.75 & 4.0 ' - '\\\\\n\\bottomrule\n\\end{tabular}\n') + expected = r"""\begin{tabular}{lrrrrrrrr} +\toprule +{} & \multicolumn{8}{l}{c} \\ +{} & count & mean & std & min & 25\% & 50\% & 75\% & max \\ +a & & & & & & & & \\ +\midrule +0 & 2.0 & 1.5 & 0.707107 & 1.0 & 1.25 & 1.5 & 1.75 & 2.0 \\ +1 & 2.0 & 3.5 & 0.707107 & 3.0 & 3.25 & 3.5 & 3.75 & 4.0 \\ +\bottomrule +\end{tabular} +""" + + assert result == expected + + def test_to_latex_multicolumnrow(self): + df = pd.DataFrame({ + ('c1', 0): dict((x, x) for x in range(5)), + ('c1', 1): dict((x, x + 5) for x in range(5)), + ('c2', 0): dict((x, x) for x in range(5)), + ('c2', 1): dict((x, x + 5) for x in range(5)), + ('c3', 0): dict((x, x) for x in range(5)) + }) + result = df.to_latex() + expected = r"""\begin{tabular}{lrrrrr} +\toprule +{} & \multicolumn{2}{l}{c1} & \multicolumn{2}{l}{c2} & c3 \\ +{} & 0 & 1 & 0 & 1 & 0 \\ +\midrule +0 & 0 & 5 & 0 & 5 & 0 \\ +1 & 1 & 6 & 1 & 6 & 1 \\ +2 & 2 & 7 & 2 & 7 & 2 \\ +3 & 3 & 8 & 3 & 8 & 3 \\ +4 & 4 & 9 & 4 & 9 & 4 \\ +\bottomrule +\end{tabular} +""" + assert result == expected + result = df.to_latex(multicolumn=False) + expected = r"""\begin{tabular}{lrrrrr} +\toprule +{} & c1 & & c2 & & c3 \\ +{} & 0 & 1 & 0 & 1 & 0 \\ +\midrule +0 & 0 & 5 & 0 & 5 & 0 \\ +1 & 1 & 6 & 1 & 6 & 1 \\ +2 & 2 & 7 & 2 & 7 & 2 \\ +3 & 3 & 8 & 3 & 8 & 3 \\ +4 & 4 & 9 & 4 & 9 & 4 \\ +\bottomrule +\end{tabular} +""" + assert result == expected + + result = df.T.to_latex(multirow=True) + expected = r"""\begin{tabular}{llrrrrr} +\toprule + & & 0 & 1 & 2 & 3 & 4 \\ +\midrule +\multirow{2}{*}{c1} & 0 & 0 & 1 & 2 & 3 & 4 \\ + & 1 & 5 & 6 & 7 & 8 & 9 \\ +\cline{1-7} +\multirow{2}{*}{c2} & 0 & 0 & 1 & 2 & 3 & 4 \\ + & 1 & 5 & 6 & 7 & 8 & 9 \\ +\cline{1-7} +c3 & 0 & 0 & 1 & 2 & 3 & 4 \\ +\bottomrule +\end{tabular} +""" + assert result == expected + + df.index = df.T.index + result = df.T.to_latex(multirow=True, multicolumn=True, + multicolumn_format='c') + expected = r"""\begin{tabular}{llrrrrr} +\toprule + & & \multicolumn{2}{c}{c1} & \multicolumn{2}{c}{c2} & c3 \\ + & & 0 & 1 & 0 & 1 & 0 \\ +\midrule +\multirow{2}{*}{c1} & 0 & 0 & 1 & 2 & 3 & 4 \\ + & 1 & 5 & 6 & 7 & 8 & 9 \\ +\cline{1-7} +\multirow{2}{*}{c2} & 0 & 0 & 1 & 2 & 3 & 4 \\ + & 1 & 5 & 6 & 7 & 8 & 9 \\ +\cline{1-7} +c3 & 0 & 0 & 1 & 2 & 3 & 4 \\ +\bottomrule +\end{tabular} +""" assert result == expected def test_to_latex_escape(self): From 524a9a06566295eef1d43450ff42859fe81081bf Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 3 Mar 2017 05:12:21 -0500 Subject: [PATCH 134/353] DEPR: deprecate some top-level non-used functions (#15538) closes #13790 pd.pnow pd.groupby pd.match pd.Term pd.Expr remove info.py --- doc/source/comparison_with_r.rst | 8 ----- doc/source/whatsnew/v0.20.0.txt | 6 ++++ pandas/__init__.py | 43 +++++++++++++++++++++++++- pandas/computation/api.py | 12 +++++++- pandas/core/api.py | 24 +++++++++++++-- pandas/info.py | 20 ------------ pandas/io/api.py | 14 ++++++++- pandas/tests/api/test_api.py | 49 ++++++++++++++++++++++-------- pandas/tests/scalar/test_period.py | 14 +++------ pandas/tseries/period.py | 8 ++++- 10 files changed, 141 insertions(+), 57 deletions(-) delete mode 100644 pandas/info.py diff --git a/doc/source/comparison_with_r.rst b/doc/source/comparison_with_r.rst index aa0cbab4df10b..194e022e34c7c 100644 --- a/doc/source/comparison_with_r.rst +++ b/doc/source/comparison_with_r.rst @@ -206,14 +206,6 @@ of its first argument in its second: s <- 0:4 match(s, c(2,4)) -The :meth:`~pandas.core.groupby.GroupBy.apply` method can be used to replicate -this: - -.. ipython:: python - - s = pd.Series(np.arange(5),dtype=np.float32) - pd.Series(pd.match(s,[2,4],np.nan)) - For more details and examples see :ref:`the reshaping documentation `. diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 0991f3873b06f..fa5974ee84d34 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -537,6 +537,12 @@ Deprecations - importing ``concat`` from ``pandas.tools.merge`` has been deprecated in favor of imports from the ``pandas`` namespace. This should only affect explict imports (:issue:`15358`) - ``Series/DataFrame/Panel.consolidate()`` been deprecated as a public method. (:issue:`15483`) - ``FrozenList`` addition (new object and inplace) have been deprecated in favor of the ``.union()`` method. (:issue: `15475`) +- The following top-level pandas functions have been deprecated and will be removed in a future version (:issue:`13790`) + * ``pd.pnow()``, replaced by ``Period.now()`` + * ``pd.Term``, is removed, as it is not applicable to user code. Instead use in-line string expressions in the where clause when searching in HDFStore + * ``pd.Expr``, is removed, as it is not applicable to user code. + * ``pd.match()``, is removed. + * ``pd.groupby()``, replaced by using the ``.groupby()`` method directly on a ``Series/DataFrame`` .. _whatsnew_0200.prior_deprecations: diff --git a/pandas/__init__.py b/pandas/__init__.py index 70c547010f623..3bded89e6644a 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -33,7 +33,6 @@ "the C extensions first.".format(module)) from datetime import datetime -from pandas.info import __doc__ # let init-time option registration happen import pandas.core.config_init @@ -63,3 +62,45 @@ v = get_versions() __version__ = v.get('closest-tag', v['version']) del get_versions, v + +# module level doc-string +__doc__ = """ +pandas - a powerful data analysis and manipulation library for Python +===================================================================== + +**pandas** is a Python package providing fast, flexible, and expressive data +structures designed to make working with "relational" or "labeled" data both +easy and intuitive. It aims to be the fundamental high-level building block for +doing practical, **real world** data analysis in Python. Additionally, it has +the broader goal of becoming **the most powerful and flexible open source data +analysis / manipulation tool available in any language**. It is already well on +its way toward this goal. + +Main Features +------------- +Here are just a few of the things that pandas does well: + + - Easy handling of missing data in floating point as well as non-floating + point data + - Size mutability: columns can be inserted and deleted from DataFrame and + higher dimensional objects + - Automatic and explicit data alignment: objects can be explicitly aligned + to a set of labels, or the user can simply ignore the labels and let + `Series`, `DataFrame`, etc. automatically align the data for you in + computations + - Powerful, flexible group by functionality to perform split-apply-combine + operations on data sets, for both aggregating and transforming data + - Make it easy to convert ragged, differently-indexed data in other Python + and NumPy data structures into DataFrame objects + - Intelligent label-based slicing, fancy indexing, and subsetting of large + data sets + - Intuitive merging and joining data sets + - Flexible reshaping and pivoting of data sets + - Hierarchical labeling of axes (possible to have multiple labels per tick) + - Robust IO tools for loading data from flat files (CSV and delimited), + Excel files, databases, and saving/loading data from the ultrafast HDF5 + format + - Time series-specific functionality: date range generation and frequency + conversion, moving window statistics, moving window linear regressions, + date shifting and lagging, etc. +""" diff --git a/pandas/computation/api.py b/pandas/computation/api.py index e5814e08c4bbe..fe3dad015048e 100644 --- a/pandas/computation/api.py +++ b/pandas/computation/api.py @@ -1,4 +1,14 @@ # flake8: noqa from pandas.computation.eval import eval -from pandas.computation.expr import Expr + + +# deprecation, xref #13790 +def Expr(*args, **kwargs): + import warnings + + warnings.warn("pd.Expr is deprecated as it is not " + "applicable to user code", + FutureWarning, stacklevel=2) + from pandas.computation.expr import Expr + return Expr(*args, **kwargs) diff --git a/pandas/core/api.py b/pandas/core/api.py index eaebf45a038a0..65253dedb8b53 100644 --- a/pandas/core/api.py +++ b/pandas/core/api.py @@ -4,7 +4,7 @@ import numpy as np -from pandas.core.algorithms import factorize, match, unique, value_counts +from pandas.core.algorithms import factorize, unique, value_counts from pandas.types.missing import isnull, notnull from pandas.core.categorical import Categorical from pandas.core.groupby import Grouper @@ -17,7 +17,6 @@ from pandas.core.frame import DataFrame from pandas.core.panel import Panel, WidePanel from pandas.core.panel4d import Panel4D -from pandas.core.groupby import groupby from pandas.core.reshape import (pivot_simple as pivot, get_dummies, lreshape, wide_to_long) @@ -42,3 +41,24 @@ from pandas.core.config import (get_option, set_option, reset_option, describe_option, option_context, options) + + +# deprecation, xref #13790 +def match(*args, **kwargs): + import warnings + + warnings.warn("pd.match() is deprecated and will be removed " + "in a future version", + FutureWarning, stacklevel=2) + from pandas.core.algorithms import match + return match(*args, **kwargs) + + +def groupby(*args, **kwargs): + import warnings + + warnings.warn("pd.groupby() is deprecated and will be removed " + "Please use the Series.groupby() or " + "DataFrame.groupby() methods", + FutureWarning, stacklevel=2) + return args[0].groupby(*args[1:], **kwargs) diff --git a/pandas/info.py b/pandas/info.py deleted file mode 100644 index 57ecd91739eab..0000000000000 --- a/pandas/info.py +++ /dev/null @@ -1,20 +0,0 @@ -""" -pandas - a powerful data analysis and manipulation library for Python -===================================================================== - -See http://pandas.pydata.org/ for full documentation. Otherwise, see the -docstrings of the various objects in the pandas namespace: - -Series -DataFrame -Panel -Index -DatetimeIndex -HDFStore -bdate_range -date_range -read_csv -read_fwf -read_table -ols -""" diff --git a/pandas/io/api.py b/pandas/io/api.py index 0bd86c85b4b8b..1284b3cb222d6 100644 --- a/pandas/io/api.py +++ b/pandas/io/api.py @@ -7,7 +7,7 @@ from pandas.io.parsers import read_csv, read_table, read_fwf from pandas.io.clipboard import read_clipboard from pandas.io.excel import ExcelFile, ExcelWriter, read_excel -from pandas.io.pytables import HDFStore, Term, get_store, read_hdf +from pandas.io.pytables import HDFStore, get_store, read_hdf from pandas.io.json import read_json from pandas.io.html import read_html from pandas.io.sql import read_sql, read_sql_table, read_sql_query @@ -17,3 +17,15 @@ from pandas.io.pickle import read_pickle, to_pickle from pandas.io.packers import read_msgpack, to_msgpack from pandas.io.gbq import read_gbq + +# deprecation, xref #13790 +def Term(*args, **kwargs): + import warnings + + warnings.warn("pd.Term is deprecated as it is not " + "applicable to user code. Instead use in-line " + "string expressions in the where clause when " + "searching in HDFStore", + FutureWarning, stacklevel=2) + from pandas.io.pytables import Term + return Term(*args, **kwargs) diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 8ca369f8df83a..f2f7a9c778e66 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -59,13 +59,10 @@ class TestPDApi(Base, tm.TestCase): # these are already deprecated; awaiting removal deprecated_classes = ['WidePanel', 'SparseTimeSeries', 'Panel4D', - 'SparseList'] + 'SparseList', 'Expr', 'Term'] # these should be deprecated in the future - deprecated_classes_in_future = ['Term', 'Panel'] - - # these should be removed from top-level namespace - remove_classes_from_top_level_namespace = ['Expr'] + deprecated_classes_in_future = ['Panel'] # external modules exposed in pandas namespace modules = ['np', 'datetime'] @@ -75,7 +72,7 @@ class TestPDApi(Base, tm.TestCase): 'date_range', 'eval', 'factorize', 'get_dummies', 'get_store', 'infer_freq', 'isnull', 'lreshape', - 'match', 'melt', 'notnull', 'offsets', + 'melt', 'notnull', 'offsets', 'merge', 'merge_ordered', 'merge_asof', 'period_range', 'pivot', 'pivot_table', 'plot_params', 'qcut', @@ -99,9 +96,6 @@ class TestPDApi(Base, tm.TestCase): funcs_to = ['to_datetime', 'to_msgpack', 'to_numeric', 'to_pickle', 'to_timedelta'] - # these should be deprecated in the future - deprecated_funcs_in_future = ['pnow', 'groupby', 'info'] - # these are already deprecated; awaiting removal deprecated_funcs = ['ewma', 'ewmcorr', 'ewmcov', 'ewmstd', 'ewmvar', 'ewmvol', 'expanding_apply', 'expanding_corr', @@ -114,7 +108,8 @@ class TestPDApi(Base, tm.TestCase): 'rolling_kurt', 'rolling_max', 'rolling_mean', 'rolling_median', 'rolling_min', 'rolling_quantile', 'rolling_skew', 'rolling_std', 'rolling_sum', - 'rolling_var', 'rolling_window', 'ordered_merge'] + 'rolling_var', 'rolling_window', 'ordered_merge', + 'pnow', 'match', 'groupby'] def test_api(self): @@ -123,11 +118,9 @@ def test_api(self): self.modules + self.deprecated_modules + self.classes + self.deprecated_classes + self.deprecated_classes_in_future + - self.remove_classes_from_top_level_namespace + self.funcs + self.funcs_option + self.funcs_read + self.funcs_to + - self.deprecated_funcs + - self.deprecated_funcs_in_future, + self.deprecated_funcs, self.ignored) @@ -225,3 +218,33 @@ def test_deprecation_access_obj(self): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): pd.datetools.monthEnd + + +class TestTopLevelDeprecations(tm.TestCase): + # top-level API deprecations + # GH 13790 + + def test_pnow(self): + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + pd.pnow(freq='M') + + def test_term(self): + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + pd.Term('index>=date') + + def test_expr(self): + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + pd.Expr('2>1') + + def test_match(self): + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + pd.match([1, 2, 3], [1]) + + def test_groupby(self): + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + pd.groupby(pd.Series([1, 2, 3]), [1, 1, 1]) diff --git a/pandas/tests/scalar/test_period.py b/pandas/tests/scalar/test_period.py index ffe00a4a62a0a..49aa44492fe81 100644 --- a/pandas/tests/scalar/test_period.py +++ b/pandas/tests/scalar/test_period.py @@ -864,17 +864,11 @@ def test_properties_nat(self): self.assertTrue(np.isnan(getattr(t_nat, f))) def test_pnow(self): - dt = datetime.now() - val = period.pnow('D') - exp = Period(dt, freq='D') - self.assertEqual(val, exp) - - val2 = period.pnow('2D') - exp2 = Period(dt, freq='2D') - self.assertEqual(val2, exp2) - self.assertEqual(val.ordinal, val2.ordinal) - self.assertEqual(val.ordinal, exp2.ordinal) + # deprecation, xref #13790 + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + period.pnow('D') def test_constructor_corner(self): expected = Period('2007-01', freq='2M') diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 8a6b0c153bb50..6e499924730b3 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -1144,7 +1144,13 @@ def _make_field_arrays(*fields): def pnow(freq=None): - return Period(datetime.now(), freq=freq) + # deprecation, xref #13790 + import warnings + + warnings.warn("pd.pnow() and pandas.tseries.period.pnow() " + "are deprecated. Please use Period.now()", + FutureWarning, stacklevel=2) + return Period.now(freq=freq) def period_range(start=None, end=None, periods=None, freq='D', name=None): From 0b07b07da7d5de06a414af467f9f5667835c150e Mon Sep 17 00:00:00 2001 From: Sahil Dua Date: Fri, 3 Mar 2017 08:04:43 -0500 Subject: [PATCH 135/353] BUG: Set frequency for empty Series closes #14320 Author: Sahil Dua Closes #14458 from sahildua2305/frequency-series-fix and squashes the following commits: 384e666 [Sahil Dua] BUG: Set frequency for empty Series --- doc/source/whatsnew/v0.20.0.txt | 3 +++ pandas/tests/series/test_timeseries.py | 8 ++++++++ pandas/tseries/resample.py | 18 ++++++++++-------- 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index fa5974ee84d34..df259f4a42b86 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -605,6 +605,9 @@ Bug Fixes - Bug in the display of ``.info()`` where a qualifier (+) would always be displayed with a ``MultiIndex`` that contains only non-strings (:issue:`15245`) + +- Bug in ``.asfreq()``, where frequency was not set for empty ``Series` (:issue:`14320`) + - Bug in ``pd.read_msgpack()`` in which ``Series`` categoricals were being improperly processed (:issue:`14901`) - Bug in ``Series.ffill()`` with mixed dtypes containing tz-aware datetimes. (:issue:`14956`) diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index 8c22b3f047210..d384460c3d030 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -260,6 +260,14 @@ def test_asfreq(self): index=[-1.0, 2.0, 1.0, 0.0]).sort_index() assert_series_equal(result, expected) + def test_asfreq_datetimeindex_empty_series(self): + # GH 14320 + expected = Series(index=pd.DatetimeIndex( + ["2016-09-29 11:00"])).asfreq('H') + result = Series(index=pd.DatetimeIndex(["2016-09-29 11:00"]), + data=[3]).asfreq('H') + self.assert_index_equal(expected.index, result.index) + def test_diff(self): # Just run the function self.ts.diff() diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py index 75e550a065fd2..21d7dc0c177b6 100755 --- a/pandas/tseries/resample.py +++ b/pandas/tseries/resample.py @@ -1382,16 +1382,18 @@ def asfreq(obj, freq, method=None, how=None, normalize=False, fill_value=None): if how is None: how = 'E' - new_index = obj.index.asfreq(freq, how=how) new_obj = obj.copy() - new_obj.index = new_index - return new_obj + new_obj.index = obj.index.asfreq(freq, how=how) + + elif len(obj.index) == 0: + new_obj = obj.copy() + new_obj.index = obj.index._shallow_copy(freq=to_offset(freq)) + else: - if len(obj.index) == 0: - return obj.copy() dti = date_range(obj.index[0], obj.index[-1], freq=freq) dti.name = obj.index.name - rs = obj.reindex(dti, method=method, fill_value=fill_value) + new_obj = obj.reindex(dti, method=method, fill_value=fill_value) if normalize: - rs.index = rs.index.normalize() - return rs + new_obj.index = new_obj.index.normalize() + + return new_obj From 04e116851337cd852b4255f8221d9be44829e0e1 Mon Sep 17 00:00:00 2001 From: Chris Date: Fri, 3 Mar 2017 10:04:26 -0500 Subject: [PATCH 136/353] BUG: syntax error in hdf query with ts closes #15492 Author: Chris Closes #15544 from chris-b1/hdf-dt-error and squashes the following commits: 8288dca [Chris] lint 7c7100d [Chris] expand test cases 946a48e [Chris] ERR: more strict HDFStore string comparison 213585f [Chris] CLN: remove timetuple type check cc977f0 [Chris] BUG: syntax error in hdf query with ts --- doc/source/whatsnew/v0.20.0.txt | 31 +++++++++++++++++++++- pandas/computation/pytables.py | 21 ++++++--------- pandas/tests/io/test_pytables.py | 44 ++++++++++++++++++++++++++++++++ 3 files changed, 82 insertions(+), 14 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index df259f4a42b86..782ae6082c1cf 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -501,6 +501,35 @@ New Behavior: df.groupby('A').agg([np.mean, np.std, np.min, np.max]) +.. _whatsnew_0200.api_breaking.hdfstore_where: + +HDFStore where string comparison +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In previous versions most types could be compared to string column in a ``HDFStore`` +usually resulting in an invalid comparsion. These comparisions will now raise a +``TypeError`` (:issue:`15492`) + +New Behavior: + +.. code-block:: ipython + + In [15]: df = pd.DataFrame({'unparsed_date': ['2014-01-01', '2014-01-01']}) + + In [16]: df.dtypes + Out[16]: + unparsed_date object + dtype: object + + In [17]: df.to_hdf('store.h5', 'key', format='table', data_columns=True) + + In [18]: ts = pd.Timestamp('2014-01-01') + + In [19]: pd.read_hdf('store.h5', 'key', where='unparsed_date > ts') + TypeError: Cannot compare 2014-01-01 00:00:00 of + type to string column + + .. _whatsnew_0200.api: Other API Changes @@ -671,7 +700,7 @@ Bug Fixes - Bug in ``pd.merge_asof()`` where ``left_index``/``right_index`` together caused a failure when ``tolerance`` was specified (:issue:`15135`) - +- Bug in ``pd.read_hdf()`` passing a ``Timestamp`` to the ``where`` parameter with a non date column (:issue:`15492`) - Bug in ``Series`` constructor when both ``copy=True`` and ``dtype`` arguments are provided (:issue:`15125`) diff --git a/pandas/computation/pytables.py b/pandas/computation/pytables.py index 9dc18284ec22c..7c09ca8d38773 100644 --- a/pandas/computation/pytables.py +++ b/pandas/computation/pytables.py @@ -1,7 +1,6 @@ """ manage PyTables query interface via Expressions """ import ast -import time import warnings from functools import partial from datetime import datetime, timedelta @@ -188,10 +187,6 @@ def stringify(value): if v.tz is not None: v = v.tz_convert('UTC') return TermValue(v, v.value, kind) - elif (isinstance(v, datetime) or hasattr(v, 'timetuple') or - kind == u('date')): - v = time.mktime(v.timetuple()) - return TermValue(v, pd.Timestamp(v), kind) elif kind == u('timedelta64') or kind == u('timedelta'): v = _coerce_scalar_to_timedelta_type(v, unit='s').value return TermValue(int(v), v, kind) @@ -218,12 +213,13 @@ def stringify(value): else: v = bool(v) return TermValue(v, v, kind) - elif not isinstance(v, string_types): - v = stringify(v) + elif isinstance(v, string_types): + # string quoting return TermValue(v, stringify(v), u('string')) - - # string quoting - return TermValue(v, stringify(v), u('string')) + else: + raise TypeError(("Cannot compare {v} of type {typ}" + " to {kind} column").format(v=v, typ=type(v), + kind=kind)) def convert_values(self): pass @@ -558,9 +554,8 @@ def parse_back_compat(self, w, op=None, value=None): # stringify with quotes these values def convert(v): - if (isinstance(v, (datetime, np.datetime64, - timedelta, np.timedelta64)) or - hasattr(v, 'timetuple')): + if isinstance(v, (datetime, np.datetime64, + timedelta, np.timedelta64)): return "'{0}'".format(v) return v diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 821d9956a2dfa..9f1dea2094bc6 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -5071,6 +5071,50 @@ def test_query_long_float_literal(self): expected = df.loc[[1], :] tm.assert_frame_equal(expected, result) + def test_query_compare_column_type(self): + # GH 15492 + df = pd.DataFrame({'date': ['2014-01-01', '2014-01-02'], + 'real_date': date_range('2014-01-01', periods=2), + 'float': [1.1, 1.2], + 'int': [1, 2]}, + columns=['date', 'real_date', 'float', 'int']) + + with ensure_clean_store(self.path) as store: + store.append('test', df, format='table', data_columns=True) + + ts = pd.Timestamp('2014-01-01') # noqa + result = store.select('test', where='real_date > ts') + expected = df.loc[[1], :] + tm.assert_frame_equal(expected, result) + + for op in ['<', '>', '==']: + # non strings to string column always fail + for v in [2.1, True, pd.Timestamp('2014-01-01'), + pd.Timedelta(1, 's')]: + query = 'date {op} v'.format(op=op) + with tm.assertRaises(TypeError): + result = store.select('test', where=query) + + # strings to other columns must be convertible to type + v = 'a' + for col in ['int', 'float', 'real_date']: + query = '{col} {op} v'.format(op=op, col=col) + with tm.assertRaises(ValueError): + result = store.select('test', where=query) + + for v, col in zip(['1', '1.1', '2014-01-01'], + ['int', 'float', 'real_date']): + query = '{col} {op} v'.format(op=op, col=col) + result = store.select('test', where=query) + + if op == '==': + expected = df.loc[[0], :] + elif op == '>': + expected = df.loc[[1], :] + else: + expected = df.loc[[], :] + tm.assert_frame_equal(expected, result) + class TestHDFComplexValues(Base): # GH10447 From 7ae4fd10b1581d77b4cfbf594e3b444af7456876 Mon Sep 17 00:00:00 2001 From: Kyle Kelley Date: Sat, 4 Mar 2017 03:09:45 -0800 Subject: [PATCH 137/353] BUG: handle empty lists in json_normalize (#15535) closes #15534 --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/io/json/normalize.py | 3 +++ pandas/tests/io/json/test_normalize.py | 5 +++++ 3 files changed, 9 insertions(+) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 782ae6082c1cf..8b6c53a159ad8 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -192,6 +192,7 @@ Other enhancements - HTML table output skips ``colspan`` or ``rowspan`` attribute if equal to 1. (:issue:`15403`) - ``pd.TimedeltaIndex`` now has a custom datetick formatter specifically designed for nanosecond level precision (:issue:`8711`) - ``pd.types.concat.union_categoricals`` gained the ``ignore_ordered`` argument to allow ignoring the ordered attribute of unioned categoricals (:issue:`13410`). See the :ref:`categorical union docs ` for more information. +- ``pandas.io.json.json_normalize()`` with an empty ``list`` will return an empty ``DataFrame`` (:issue:`15534`) .. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations diff --git a/pandas/io/json/normalize.py b/pandas/io/json/normalize.py index f29472155da17..0e7d025e81851 100644 --- a/pandas/io/json/normalize.py +++ b/pandas/io/json/normalize.py @@ -157,6 +157,9 @@ def _pull_field(js, spec): return result + if isinstance(data, list) and len(data) is 0: + return DataFrame() + # A bit of a hackjob if isinstance(data, dict): data = [data] diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py index c60b81ffe504d..f881f4dafe0f3 100644 --- a/pandas/tests/io/json/test_normalize.py +++ b/pandas/tests/io/json/test_normalize.py @@ -62,6 +62,11 @@ def test_simple_normalize(self): tm.assert_frame_equal(result, expected) + def test_empty_array(self): + result = json_normalize([]) + expected = DataFrame() + tm.assert_frame_equal(result, expected) + def test_more_deeply_nested(self): data = [{'country': 'USA', 'states': [{'name': 'California', From 07ac39e9556538e02b3684bd7f4493c5301f409c Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sat, 4 Mar 2017 05:50:04 -0600 Subject: [PATCH 138/353] ENH: Added to_json_schema (#14904) Lays the groundwork for https://github.com/pandas-dev/pandas/issues/14386 This handles the schema part of the request there. We'll still need to do the work to publish the data to the frontend, but that can be done as a followup. Added publish to dataframe repr --- ci/requirements-2.7.pip | 2 + ci/requirements-3.5.run | 1 + ci/requirements-3.6.run | 1 + doc/source/api.rst | 1 + doc/source/io.rst | 120 +++++ doc/source/options.rst | 21 + doc/source/whatsnew/v0.20.0.txt | 35 ++ pandas/core/config_init.py | 10 + pandas/core/generic.py | 86 +++- pandas/io/json/__init__.py | 3 +- pandas/io/json/json.py | 89 +++- pandas/io/json/table_schema.py | 177 +++++++ pandas/tests/formats/test_printing.py | 61 +++ .../tests/io/json/test_json_table_schema.py | 462 ++++++++++++++++++ pandas/util/testing.py | 19 + 15 files changed, 1072 insertions(+), 16 deletions(-) create mode 100644 pandas/io/json/table_schema.py create mode 100644 pandas/tests/io/json/test_json_table_schema.py diff --git a/ci/requirements-2.7.pip b/ci/requirements-2.7.pip index 08240184f2934..eb796368e7820 100644 --- a/ci/requirements-2.7.pip +++ b/ci/requirements-2.7.pip @@ -4,3 +4,5 @@ pathlib backports.lzma py PyCrypto +mock +ipython diff --git a/ci/requirements-3.5.run b/ci/requirements-3.5.run index b07ce611c79a2..43e6814ed6c8e 100644 --- a/ci/requirements-3.5.run +++ b/ci/requirements-3.5.run @@ -18,3 +18,4 @@ pymysql psycopg2 s3fs beautifulsoup4 +ipython diff --git a/ci/requirements-3.6.run b/ci/requirements-3.6.run index 5d9cb05a7b402..9a6c1c7edbc5e 100644 --- a/ci/requirements-3.6.run +++ b/ci/requirements-3.6.run @@ -18,3 +18,4 @@ pymysql beautifulsoup4 s3fs xarray +ipython diff --git a/doc/source/api.rst b/doc/source/api.rst index 6c4a3cff5b4cf..33ac5fde651d4 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -60,6 +60,7 @@ JSON :toctree: generated/ json_normalize + build_table_schema .. currentmodule:: pandas diff --git a/doc/source/io.rst b/doc/source/io.rst index b36ae8c2ed450..c34cc1ec17512 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -2033,6 +2033,126 @@ using Hadoop or Spark. df df.to_json(orient='records', lines=True) + +.. _io.table_schema: + +Table Schema +'''''''''''' + +.. versionadded:: 0.20.0 + +`Table Schema`_ is a spec for describing tabular datasets as a JSON +object. The JSON includes information on the field names, types, and +other attributes. You can use the orient ``table`` to build +a JSON string with two fields, ``schema`` and ``data``. + +.. ipython:: python + + df = pd.DataFrame( + {'A': [1, 2, 3], + 'B': ['a', 'b', 'c'], + 'C': pd.date_range('2016-01-01', freq='d', periods=3), + }, index=pd.Index(range(3), name='idx')) + df + df.to_json(orient='table', date_format="iso") + +The ``schema`` field contains the ``fields`` key, which itself contains +a list of column name to type pairs, including the ``Index`` or ``MultiIndex`` +(see below for a list of types). +The ``schema`` field also contains a ``primaryKey`` field if the (Multi)index +is unique. + +The second field, ``data``, contains the serialized data with the ``records`` +orient. +The index is included, and any datetimes are ISO 8601 formatted, as required +by the Table Schema spec. + +The full list of types supported are described in the Table Schema +spec. This table shows the mapping from pandas types: + +============== ================= +Pandas type Table Schema type +============== ================= +int64 integer +float64 number +bool boolean +datetime64[ns] datetime +timedelta64[ns] duration +categorical any +object str +=============== ================= + +A few notes on the generated table schema: + +- The ``schema`` object contains a ``pandas_version`` field. This contains + the version of pandas' dialect of the schema, and will be incremented + with each revision. +- All dates are converted to UTC when serializing. Even timezone naïve values, + which are treated as UTC with an offset of 0. + + .. ipython:: python: + + from pandas.io.json import build_table_schema + s = pd.Series(pd.date_range('2016', periods=4)) + build_table_schema(s) + +- datetimes with a timezone (before serializing), include an additional field + ``tz`` with the time zone name (e.g. ``'US/Central'``). + + .. ipython:: python + + s_tz = pd.Series(pd.date_range('2016', periods=12, + tz='US/Central')) + build_table_schema(s_tz) + +- Periods are converted to timestamps before serialization, and so have the + same behavior of being converted to UTC. In addition, periods will contain + and additional field ``freq`` with the period's frequency, e.g. ``'A-DEC'`` + + .. ipython:: python + + s_per = pd.Series(1, index=pd.period_range('2016', freq='A-DEC', + periods=4)) + build_table_schema(s_per) + +- Categoricals use the ``any`` type and an ``enum`` constraint listing + the set of possible values. Additionally, an ``ordered`` field is included + + .. ipython:: python + + s_cat = pd.Series(pd.Categorical(['a', 'b', 'a'])) + build_table_schema(s_cat) + +- A ``primaryKey`` field, containing an array of labels, is included + *if the index is unique*: + + .. ipython:: python + + s_dupe = pd.Series([1, 2], index=[1, 1]) + build_table_schema(s_dupe) + +- The ``primaryKey`` behavior is the same with MultiIndexes, but in this + case the ``primaryKey`` is an array: + + .. ipython:: python + + s_multi = pd.Series(1, index=pd.MultiIndex.from_product([('a', 'b'), + (0, 1)])) + build_table_schema(s_multi) + +- The default naming roughly follows these rules: + + + For series, the ``object.name`` is used. If that's none, then the + name is ``values`` + + For DataFrames, the stringified version of the column name is used + + For ``Index`` (not ``MultiIndex``), ``index.name`` is used, with a + fallback to ``index`` if that is None. + + For ``MultiIndex``, ``mi.names`` is used. If any level has no name, + then ``level_`` is used. + + +_Table Schema: http://specs.frictionlessdata.io/json-table-schema/ + HTML ---- diff --git a/doc/source/options.rst b/doc/source/options.rst index 10a13ed36df8d..1a0e5cf6b7235 100644 --- a/doc/source/options.rst +++ b/doc/source/options.rst @@ -397,6 +397,9 @@ display.width 80 Width of the display in charact IPython qtconsole, or IDLE do not run in a terminal and hence it is not possible to correctly detect the width. +display.html.table_schema False Whether to publish a Table Schema + representation for frontends that + support it. html.border 1 A ``border=value`` attribute is inserted in the ``
`` tag for the DataFrame HTML repr. @@ -424,6 +427,7 @@ mode.use_inf_as_null False True means treat None, NaN, -IN are not null (new way). =================================== ============ ================================== + .. _basics.console_output: Number Formatting @@ -512,3 +516,20 @@ Enabling ``display.unicode.ambiguous_as_wide`` lets pandas to figure these chara pd.set_option('display.unicode.east_asian_width', False) pd.set_option('display.unicode.ambiguous_as_wide', False) + +.. _options.table_schema: + +Table Schema Display +-------------------- + +.. versionadded:: 0.20.0 + +``DataFrame`` and ``Series`` will publish a Table Schema representation +by default. False by default, this can be enabled globally with the +``display.html.table_schema`` option: + +.. ipython:: python + + pd.set_option('display.html.table_schema', True) + +Only ``'display.max_rows'`` are serialized and published. diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 8b6c53a159ad8..7b4538bd181d2 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -12,6 +12,7 @@ Highlights include: - Building pandas for development now requires ``cython >= 0.23`` (:issue:`14831`) - The ``.ix`` indexer has been deprecated, see :ref:`here ` - Switched the test framework to `pytest`_ (:issue:`13097`) +- A new orient for JSON serialization, ``orient='table'``, that uses the Table Schema spec, see :ref: `here ` .. _pytest: http://doc.pytest.org/en/latest/ @@ -154,6 +155,40 @@ New Behavior: df[df.chromosomes != '1'].groupby('chromosomes', sort=False).sum() +.. _whatsnew_0200.enhancements.table_schema + +Table Schema Output +^^^^^^^^^^^^^^^^^^^ + +The new orient ``'table'`` for :meth:`DataFrame.to_json` +will generate a `Table Schema`_ compatible string representation of +the data. + +.. ipython:: python + + df = pd.DataFrame( + {'A': [1, 2, 3], + 'B': ['a', 'b', 'c'], + 'C': pd.date_range('2016-01-01', freq='d', periods=3), + }, index=pd.Index(range(3), name='idx')) + df + df.to_json(orient='table') + + +See :ref:`IO: Table Schema for more`. + +Additionally, the repr for ``DataFrame`` and ``Series`` can now publish +this JSON Table schema representation of the Series or DataFrame if you are +using IPython (or another frontend like `nteract`_ using the Jupyter messaging +protocol). +This gives frontends like the Jupyter notebook and `nteract`_ +more flexiblity in how they display pandas objects, since they have +more information about the data. +You must enable this by setting the ``display.html.table_schema`` option to True. + +.. _Table Schema: http://specs.frictionlessdata.io/json-table-schema/ +.. _nteract: http://nteract.io/ + .. _whatsnew_0200.enhancements.other: Other enhancements diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 89616890e1de1..931fe0661818d 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -164,6 +164,13 @@ (default: False) """ +pc_table_schema_doc = """ +: boolean + Whether to publish a Table Schema representation for frontends + that support it. + (default: False) +""" + pc_line_width_deprecation_warning = """\ line_width has been deprecated, use display.width instead (currently both are identical) @@ -366,6 +373,9 @@ def mpl_style_cb(key): validator=is_text) cf.register_option('latex.multirow', False, pc_latex_multirow, validator=is_bool) + cf.register_option('html.table_schema', False, pc_table_schema_doc, + validator=is_bool) + cf.deprecate_option('display.line_width', msg=pc_line_width_deprecation_warning, diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 127aac970fbc1..298fa75779420 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4,6 +4,7 @@ import operator import weakref import gc +import json import numpy as np import pandas.lib as lib @@ -129,6 +130,37 @@ def __init__(self, data, axes=None, copy=False, dtype=None, object.__setattr__(self, '_data', data) object.__setattr__(self, '_item_cache', {}) + def _ipython_display_(self): + try: + from IPython.display import display + except ImportError: + return None + + # Series doesn't define _repr_html_ or _repr_latex_ + latex = self._repr_latex_() if hasattr(self, '_repr_latex_') else None + html = self._repr_html_() if hasattr(self, '_repr_html_') else None + table_schema = self._repr_table_schema_() + # We need the inital newline since we aren't going through the + # usual __repr__. See + # https://github.com/pandas-dev/pandas/pull/14904#issuecomment-277829277 + text = "\n" + repr(self) + + reprs = {"text/plain": text, "text/html": html, "text/latex": latex, + "application/vnd.dataresource+json": table_schema} + reprs = {k: v for k, v in reprs.items() if v} + display(reprs, raw=True) + + def _repr_table_schema_(self): + """ + Not a real Jupyter special repr method, but we use the same + naming convention. + """ + if config.get_option("display.html.table_schema"): + data = self.head(config.get_option('display.max_rows')) + payload = json.loads(data.to_json(orient='table'), + object_pairs_hook=collections.OrderedDict) + return payload + def _validate_dtype(self, dtype): """ validate the passed dtype """ @@ -1094,7 +1126,7 @@ def __setstate__(self, state): strings before writing. """ - def to_json(self, path_or_buf=None, orient=None, date_format='epoch', + def to_json(self, path_or_buf=None, orient=None, date_format=None, double_precision=10, force_ascii=True, date_unit='ms', default_handler=None, lines=False): """ @@ -1129,10 +1161,17 @@ def to_json(self, path_or_buf=None, orient=None, date_format='epoch', - index : dict like {index -> {column -> value}} - columns : dict like {column -> {index -> value}} - values : just the values array + - table : dict like {'schema': {schema}, 'data': {data}} + describing the data, and the data component is + like ``orient='records'``. - date_format : {'epoch', 'iso'} + .. versionchanged:: 0.20.0 + + date_format : {None, 'epoch', 'iso'} Type of date conversion. `epoch` = epoch milliseconds, - `iso`` = ISO8601, default is epoch. + `iso` = ISO8601. The default depends on the `orient`. For + `orient='table'`, the default is `'iso'`. For all other orients, + the default is `'epoch'`. double_precision : The number of decimal places to use when encoding floating point values, default 10. force_ascii : force encoded string to be ASCII, default True. @@ -1151,14 +1190,53 @@ def to_json(self, path_or_buf=None, orient=None, date_format='epoch', .. versionadded:: 0.19.0 - Returns ------- same type as input object with filtered info axis + See Also + -------- + pd.read_json + + Examples + -------- + + >>> df = pd.DataFrame([['a', 'b'], ['c', 'd']], + ... index=['row 1', 'row 2'], + ... columns=['col 1', 'col 2']) + >>> df.to_json(orient='split') + '{"columns":["col 1","col 2"], + "index":["row 1","row 2"], + "data":[["a","b"],["c","d"]]}' + + Encoding/decoding a Dataframe using ``'index'`` formatted JSON: + + >>> df.to_json(orient='index') + '{"row 1":{"col 1":"a","col 2":"b"},"row 2":{"col 1":"c","col 2":"d"}}' + + Encoding/decoding a Dataframe using ``'records'`` formatted JSON. + Note that index labels are not preserved with this encoding. + + >>> df.to_json(orient='records') + '[{"col 1":"a","col 2":"b"},{"col 1":"c","col 2":"d"}]' + + Encoding with Table Schema + + >>> df.to_json(orient='table') + '{"schema": {"fields": [{"name": "index", "type": "string"}, + {"name": "col 1", "type": "string"}, + {"name": "col 2", "type": "string"}], + "primaryKey": "index", + "pandas_version": "0.20.0"}, + "data": [{"index": "row 1", "col 1": "a", "col 2": "b"}, + {"index": "row 2", "col 1": "c", "col 2": "d"}]}' """ from pandas.io import json + if date_format is None and orient == 'table': + date_format = 'iso' + elif date_format is None: + date_format = 'epoch' return json.to_json(path_or_buf=path_or_buf, obj=self, orient=orient, date_format=date_format, double_precision=double_precision, diff --git a/pandas/io/json/__init__.py b/pandas/io/json/__init__.py index a9390a04cc2cd..32d110b3404a9 100644 --- a/pandas/io/json/__init__.py +++ b/pandas/io/json/__init__.py @@ -1,4 +1,5 @@ from .json import to_json, read_json, loads, dumps # noqa from .normalize import json_normalize # noqa +from .table_schema import build_table_schema # noqa -del json, normalize # noqa +del json, normalize, table_schema # noqa diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py index 6fc766081eefe..a00d3492e8a37 100644 --- a/pandas/io/json/json.py +++ b/pandas/io/json/json.py @@ -1,5 +1,4 @@ # pylint: disable-msg=E1101,W0613,W0603 - import os import numpy as np @@ -12,10 +11,14 @@ from pandas.core.common import AbstractMethodError from pandas.formats.printing import pprint_thing from .normalize import _convert_to_line_delimits +from .table_schema import build_table_schema +from pandas.types.common import is_period_dtype loads = _json.loads dumps = _json.dumps +TABLE_SCHEMA_VERSION = '0.20.0' + # interface to/from def to_json(path_or_buf, obj, orient=None, date_format='epoch', @@ -26,19 +29,22 @@ def to_json(path_or_buf, obj, orient=None, date_format='epoch', raise ValueError( "'lines' keyword only valid when 'orient' is records") - if isinstance(obj, Series): - s = SeriesWriter( - obj, orient=orient, date_format=date_format, - double_precision=double_precision, ensure_ascii=force_ascii, - date_unit=date_unit, default_handler=default_handler).write() + if orient == 'table' and isinstance(obj, Series): + obj = obj.to_frame(name=obj.name or 'values') + if orient == 'table' and isinstance(obj, DataFrame): + writer = JSONTableWriter + elif isinstance(obj, Series): + writer = SeriesWriter elif isinstance(obj, DataFrame): - s = FrameWriter( - obj, orient=orient, date_format=date_format, - double_precision=double_precision, ensure_ascii=force_ascii, - date_unit=date_unit, default_handler=default_handler).write() + writer = FrameWriter else: raise NotImplementedError("'obj' should be a Series or a DataFrame") + s = writer( + obj, orient=orient, date_format=date_format, + double_precision=double_precision, ensure_ascii=force_ascii, + date_unit=date_unit, default_handler=default_handler).write() + if lines: s = _convert_to_line_delimits(s) @@ -81,7 +87,8 @@ def write(self): ensure_ascii=self.ensure_ascii, date_unit=self.date_unit, iso_dates=self.date_format == 'iso', - default_handler=self.default_handler) + default_handler=self.default_handler + ) class SeriesWriter(Writer): @@ -108,6 +115,55 @@ def _format_axes(self): "'%s'." % self.orient) +class JSONTableWriter(FrameWriter): + _default_orient = 'records' + + def __init__(self, obj, orient, date_format, double_precision, + ensure_ascii, date_unit, default_handler=None): + """ + Adds a `schema` attribut with the Table Schema, resets + the index (can't do in caller, because the schema inference needs + to know what the index is, forces orient to records, and forces + date_format to 'iso'. + """ + super(JSONTableWriter, self).__init__( + obj, orient, date_format, double_precision, ensure_ascii, + date_unit, default_handler=default_handler) + + if date_format != 'iso': + msg = ("Trying to write with `orient='table'` and " + "`date_format='%s'`. Table Schema requires dates " + "to be formatted with `date_format='iso'`" % date_format) + raise ValueError(msg) + + self.schema = build_table_schema(obj) + + # TODO: Do this timedelta properly in objToJSON.c See GH #15137 + if ((obj.ndim == 1) and (obj.name in set(obj.index.names)) or + len(obj.columns & obj.index.names)): + msg = "Overlapping names between the index and columns" + raise ValueError(msg) + + obj = obj.copy() + timedeltas = obj.select_dtypes(include=['timedelta']).columns + if len(timedeltas): + obj[timedeltas] = obj[timedeltas].applymap( + lambda x: x.isoformat()) + # Convert PeriodIndex to datetimes before serialzing + if is_period_dtype(obj.index): + obj.index = obj.index.to_timestamp() + + self.obj = obj.reset_index() + self.date_format = 'iso' + self.orient = 'records' + + def write(self): + data = super(JSONTableWriter, self).write() + serialized = '{{"schema": {}, "data": {}}}'.format( + dumps(self.schema), data) + return serialized + + def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True, convert_axes=True, convert_dates=True, keep_default_dates=True, numpy=False, precise_float=False, date_unit=None, encoding=None, @@ -244,6 +300,17 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True, col 1 col 2 0 a b 1 c d + + Encoding with Table Schema + + >>> df.to_json(orient='table') + '{"schema": {"fields": [{"name": "index", "type": "string"}, + {"name": "col 1", "type": "string"}, + {"name": "col 2", "type": "string"}], + "primaryKey": "index", + "pandas_version": "0.20.0"}, + "data": [{"index": "row 1", "col 1": "a", "col 2": "b"}, + {"index": "row 2", "col 1": "c", "col 2": "d"}]}' """ filepath_or_buffer, _, _ = get_filepath_or_buffer(path_or_buf, diff --git a/pandas/io/json/table_schema.py b/pandas/io/json/table_schema.py new file mode 100644 index 0000000000000..48f92d28baf61 --- /dev/null +++ b/pandas/io/json/table_schema.py @@ -0,0 +1,177 @@ +""" +Table Schema builders + +http://specs.frictionlessdata.io/json-table-schema/ +""" +from pandas.types.common import ( + is_integer_dtype, is_timedelta64_dtype, is_numeric_dtype, + is_bool_dtype, is_datetime64_dtype, is_datetime64tz_dtype, + is_categorical_dtype, is_period_dtype, is_string_dtype +) + + +def as_json_table_type(x): + """ + Convert a NumPy / pandas type to its corresponding json_table. + + Parameters + ---------- + x : array or dtype + + Returns + ------- + t : str + the Table Schema data types + + Notes + ----- + This table shows the relationship between NumPy / pandas dtypes, + and Table Schema dtypes. + + ============== ================= + Pandas type Table Schema type + ============== ================= + int64 integer + float64 number + bool boolean + datetime64[ns] datetime + timedelta64[ns] duration + object str + categorical any + =============== ================= + """ + if is_integer_dtype(x): + return 'integer' + elif is_bool_dtype(x): + return 'boolean' + elif is_numeric_dtype(x): + return 'number' + elif (is_datetime64_dtype(x) or is_datetime64tz_dtype(x) or + is_period_dtype(x)): + return 'datetime' + elif is_timedelta64_dtype(x): + return 'duration' + elif is_categorical_dtype(x): + return 'any' + elif is_string_dtype(x): + return 'string' + else: + return 'any' + + +def set_default_names(data): + """Sets index names to 'index' for regular, or 'level_x' for Multi""" + if all(name is not None for name in data.index.names): + return data + + data = data.copy() + if data.index.nlevels > 1: + names = [name if name is not None else 'level_{}'.format(i) + for i, name in enumerate(data.index.names)] + data.index.names = names + else: + data.index.name = data.index.name or 'index' + return data + + +def make_field(arr, dtype=None): + dtype = dtype or arr.dtype + field = {'name': arr.name or 'values', + 'type': as_json_table_type(dtype)} + + if is_categorical_dtype(arr): + if hasattr(arr, 'categories'): + cats = arr.categories + ordered = arr.ordered + else: + cats = arr.cat.categories + ordered = arr.cat.ordered + field['constraints'] = {"enum": list(cats)} + field['ordered'] = ordered + elif is_period_dtype(arr): + field['freq'] = arr.freqstr + elif is_datetime64tz_dtype(arr): + if hasattr(arr, 'dt'): + field['tz'] = arr.dt.tz.zone + else: + field['tz'] = arr.tz.zone + return field + + +def build_table_schema(data, index=True, primary_key=None, version=True): + """ + Create a Table schema from ``data``. + + Parameters + ---------- + data : Series, DataFrame + index : bool, default True + Whether to include ``data.index`` in the schema. + primary_key : bool or None, default True + column names to designate as the primary key. + The default `None` will set `'primaryKey'` to the index + level or levels if the index is unique. + version : bool, default True + Whether to include a field `pandas_version` with the version + of pandas that generated the schema. + + Returns + ------- + schema : dict + + Examples + -------- + >>> df = pd.DataFrame( + ... {'A': [1, 2, 3], + ... 'B': ['a', 'b', 'c'], + ... 'C': pd.date_range('2016-01-01', freq='d', periods=3), + ... }, index=pd.Index(range(3), name='idx')) + >>> build_table_schema(df) + {'fields': [{'name': 'idx', 'type': 'integer'}, + {'name': 'A', 'type': 'integer'}, + {'name': 'B', 'type': 'string'}, + {'name': 'C', 'type': 'datetime'}], + 'pandas_version': '0.20.0', + 'primaryKey': ['idx']} + + Notes + ----- + See `_as_json_table_type` for conversion types. + Timedeltas as converted to ISO8601 duration format with + 9 decimal places after the secnods field for nanosecond precision. + + Categoricals are converted to the `any` dtype, and use the `enum` field + constraint to list the allowed values. The `ordered` attribute is included + in an `ordered` field. + """ + if index is True: + data = set_default_names(data) + + schema = {} + fields = [] + + if index: + if data.index.nlevels > 1: + for level in data.index.levels: + fields.append(make_field(level)) + else: + fields.append(make_field(data.index)) + + if data.ndim > 1: + for column, s in data.iteritems(): + fields.append(make_field(s)) + else: + fields.append(make_field(data)) + + schema['fields'] = fields + if index and data.index.is_unique and primary_key is None: + if data.index.nlevels == 1: + schema['primaryKey'] = [data.index.name] + else: + schema['primaryKey'] = data.index.names + elif primary_key is not None: + schema['primaryKey'] = primary_key + + if version: + schema['pandas_version'] = '0.20.0' + return schema diff --git a/pandas/tests/formats/test_printing.py b/pandas/tests/formats/test_printing.py index 52f3e06c6cbd0..cacba2ad3f3ba 100644 --- a/pandas/tests/formats/test_printing.py +++ b/pandas/tests/formats/test_printing.py @@ -1,5 +1,7 @@ # -*- coding: utf-8 -*- +import pytest from pandas import compat +import pandas as pd import pandas.formats.printing as printing import pandas.formats.format as fmt import pandas.util.testing as tm @@ -118,6 +120,65 @@ def test_ambiguous_width(self): self.assertEqual(adjoined, expected) +class TestTableSchemaRepr(tm.TestCase): + + @classmethod + def setUpClass(cls): + pytest.importorskip('IPython') + try: + import mock + except ImportError: + try: + from unittest import mock + except ImportError: + pytest.skip("Mock is not installed") + cls.mock = mock + + def test_publishes(self): + df = pd.DataFrame({"A": [1, 2]}) + objects = [df['A'], df, df] # dataframe / series + expected_keys = [ + {'text/plain', 'application/vnd.dataresource+json'}, + {'text/plain', 'text/html', 'application/vnd.dataresource+json'}, + ] + + make_patch = self.mock.patch('IPython.display.display') + opt = pd.option_context('display.html.table_schema', True) + for obj, expected in zip(objects, expected_keys): + with opt, make_patch as mock_display: + handle = obj._ipython_display_() + self.assertEqual(mock_display.call_count, 1) + self.assertIsNone(handle) + args, kwargs = mock_display.call_args + arg, = args # just one argument + + self.assertEqual(kwargs, {"raw": True}) + self.assertEqual(set(arg.keys()), expected) + + with_latex = pd.option_context('display.latex.repr', True) + + with opt, with_latex, make_patch as mock_display: + handle = obj._ipython_display_() + args, kwargs = mock_display.call_args + arg, = args + + expected = {'text/plain', 'text/html', 'text/latex', + 'application/vnd.dataresource+json'} + self.assertEqual(set(arg.keys()), expected) + + def test_config_on(self): + df = pd.DataFrame({"A": [1, 2]}) + with pd.option_context("display.html.table_schema", True): + result = df._repr_table_schema_() + self.assertIsNotNone(result) + + def test_config_default_off(self): + df = pd.DataFrame({"A": [1, 2]}) + with pd.option_context("display.html.table_schema", False): + result = df._repr_table_schema_() + self.assertIsNone(result) + + # TODO: fix this broken test # def test_console_encode(): diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py new file mode 100644 index 0000000000000..d1795f2816817 --- /dev/null +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -0,0 +1,462 @@ +"""Tests for Table Schema integration.""" +import json +from collections import OrderedDict + +import numpy as np +import pandas as pd +import pytest + +from pandas import DataFrame +from pandas.types.dtypes import PeriodDtype, CategoricalDtype, DatetimeTZDtype +import pandas.util.testing as tm +from pandas.io.json.table_schema import ( + as_json_table_type, build_table_schema, make_field, set_default_names +) + + +class TestBuildSchema(tm.TestCase): + + def setUp(self): + self.df = DataFrame( + {'A': [1, 2, 3, 4], + 'B': ['a', 'b', 'c', 'c'], + 'C': pd.date_range('2016-01-01', freq='d', periods=4), + 'D': pd.timedelta_range('1H', periods=4, freq='T'), + }, + index=pd.Index(range(4), name='idx')) + + def test_build_table_schema(self): + result = build_table_schema(self.df, version=False) + expected = { + 'fields': [{'name': 'idx', 'type': 'integer'}, + {'name': 'A', 'type': 'integer'}, + {'name': 'B', 'type': 'string'}, + {'name': 'C', 'type': 'datetime'}, + {'name': 'D', 'type': 'duration'}, + ], + 'primaryKey': ['idx'] + } + self.assertEqual(result, expected) + result = build_table_schema(self.df) + self.assertTrue("pandas_version" in result) + + def test_series(self): + s = pd.Series([1, 2, 3], name='foo') + result = build_table_schema(s, version=False) + expected = {'fields': [{'name': 'index', 'type': 'integer'}, + {'name': 'foo', 'type': 'integer'}], + 'primaryKey': ['index']} + self.assertEqual(result, expected) + result = build_table_schema(s) + self.assertTrue('pandas_version' in result) + + def tets_series_unnamed(self): + result = build_table_schema(pd.Series([1, 2, 3]), version=False) + expected = {'fields': [{'name': 'index', 'type': 'integer'}, + {'name': 'values', 'type': 'integer'}], + 'primaryKey': ['index']} + self.assertEqual(result, expected) + + def test_multiindex(self): + df = self.df.copy() + idx = pd.MultiIndex.from_product([('a', 'b'), (1, 2)]) + df.index = idx + + result = build_table_schema(df, version=False) + expected = { + 'fields': [{'name': 'level_0', 'type': 'string'}, + {'name': 'level_1', 'type': 'integer'}, + {'name': 'A', 'type': 'integer'}, + {'name': 'B', 'type': 'string'}, + {'name': 'C', 'type': 'datetime'}, + {'name': 'D', 'type': 'duration'}, + ], + 'primaryKey': ['level_0', 'level_1'] + } + self.assertEqual(result, expected) + + df.index.names = ['idx0', None] + expected['fields'][0]['name'] = 'idx0' + expected['primaryKey'] = ['idx0', 'level_1'] + result = build_table_schema(df, version=False) + self.assertEqual(result, expected) + + +class TestTableSchemaType(tm.TestCase): + + def test_as_json_table_type_int_data(self): + int_data = [1, 2, 3] + int_types = [np.int, np.int16, np.int32, np.int64] + for t in int_types: + self.assertEqual(as_json_table_type(np.array(int_data, dtype=t)), + 'integer') + + def test_as_json_table_type_float_data(self): + float_data = [1., 2., 3.] + float_types = [np.float, np.float16, np.float32, np.float64] + for t in float_types: + self.assertEqual(as_json_table_type(np.array(float_data, + dtype=t)), + 'number') + + def test_as_json_table_type_bool_data(self): + bool_data = [True, False] + bool_types = [bool, np.bool] + for t in bool_types: + self.assertEqual(as_json_table_type(np.array(bool_data, dtype=t)), + 'boolean') + + def test_as_json_table_type_date_data(self): + date_data = [pd.to_datetime(['2016']), + pd.to_datetime(['2016'], utc=True), + pd.Series(pd.to_datetime(['2016'])), + pd.Series(pd.to_datetime(['2016'], utc=True)), + pd.period_range('2016', freq='A', periods=3)] + for arr in date_data: + self.assertEqual(as_json_table_type(arr), 'datetime') + + def test_as_json_table_type_string_data(self): + strings = [pd.Series(['a', 'b']), pd.Index(['a', 'b'])] + for t in strings: + self.assertEqual(as_json_table_type(t), 'string') + + def test_as_json_table_type_categorical_data(self): + self.assertEqual(as_json_table_type(pd.Categorical(['a'])), 'any') + self.assertEqual(as_json_table_type(pd.Categorical([1])), 'any') + self.assertEqual(as_json_table_type( + pd.Series(pd.Categorical([1]))), 'any') + self.assertEqual(as_json_table_type(pd.CategoricalIndex([1])), 'any') + self.assertEqual(as_json_table_type(pd.Categorical([1])), 'any') + + # ------ + # dtypes + # ------ + def test_as_json_table_type_int_dtypes(self): + integers = [np.int, np.int16, np.int32, np.int64] + for t in integers: + self.assertEqual(as_json_table_type(t), 'integer') + + def test_as_json_table_type_float_dtypes(self): + floats = [np.float, np.float16, np.float32, np.float64] + for t in floats: + self.assertEqual(as_json_table_type(t), 'number') + + def test_as_json_table_type_bool_dtypes(self): + bools = [bool, np.bool] + for t in bools: + self.assertEqual(as_json_table_type(t), 'boolean') + + def test_as_json_table_type_date_dtypes(self): + # TODO: datedate.date? datetime.time? + dates = [np.datetime64, np.dtype(" Date: Sat, 4 Mar 2017 15:14:36 +0100 Subject: [PATCH 139/353] DEPR/CLN: remove SparseTimeSeries class (follow-up GH15098) (#15567) --- doc/source/whatsnew/v0.20.0.txt | 2 ++ pandas/compat/pickle_compat.py | 3 ++- pandas/sparse/api.py | 2 +- pandas/sparse/series.py | 11 ----------- pandas/tests/api/test_api.py | 3 +-- pandas/tests/sparse/test_series.py | 6 ------ 6 files changed, 6 insertions(+), 21 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 7b4538bd181d2..eac187b52f65d 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -621,6 +621,8 @@ Removal of prior version deprecations/changes Similar functionality can be found in the `Google2Pandas `__ package. - ``pd.to_datetime`` and ``pd.to_timedelta`` have dropped the ``coerce`` parameter in favor of ``errors`` (:issue:`13602`) - ``pandas.stats.fama_macbeth``, ``pandas.stats.ols``, ``pandas.stats.plm`` and ``pandas.stats.var``, as well as the top-level ``pandas.fama_macbeth`` and ``pandas.ols`` routines are removed. Similar functionaility can be found in the `statsmodels `__ package. (:issue:`11898`) +- The ``TimeSeries`` and ``SparseTimeSeries`` classes, aliases of ``Series`` + and ``SparseSeries``, are removed (:issue:`10890`, :issue:`15098`). - ``Series.is_time_series`` is dropped in favor of ``Series.index.is_all_dates`` (:issue:``) - The deprecated ``irow``, ``icol``, ``iget`` and ``iget_value`` methods are removed in favor of ``iloc`` and ``iat`` as explained :ref:`here ` (:issue:`10711`). diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index b8ccd13c153d4..25a170c3eb121 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -61,7 +61,8 @@ def load_reduce(self): ('pandas.core.base', 'FrozenList'): ('pandas.indexes.frozen', 'FrozenList'), # 10890 - ('pandas.core.series', 'TimeSeries'): ('pandas.core.series', 'Series') + ('pandas.core.series', 'TimeSeries'): ('pandas.core.series', 'Series'), + ('pandas.sparse.series', 'SparseTimeSeries'): ('pandas.sparse.series', 'SparseSeries') } diff --git a/pandas/sparse/api.py b/pandas/sparse/api.py index 55841fbeffa2d..90be0a216535f 100644 --- a/pandas/sparse/api.py +++ b/pandas/sparse/api.py @@ -2,5 +2,5 @@ # flake8: noqa from pandas.sparse.array import SparseArray from pandas.sparse.list import SparseList -from pandas.sparse.series import SparseSeries, SparseTimeSeries +from pandas.sparse.series import SparseSeries from pandas.sparse.frame import SparseDataFrame diff --git a/pandas/sparse/series.py b/pandas/sparse/series.py index dfdbb3c89814a..a3b701169ce91 100644 --- a/pandas/sparse/series.py +++ b/pandas/sparse/series.py @@ -844,14 +844,3 @@ def from_coo(cls, A, dense_index=False): comp_method=_arith_method, bool_method=None, use_numexpr=False, force=True) - - -# backwards compatiblity -class SparseTimeSeries(SparseSeries): - - def __init__(self, *args, **kwargs): - # deprecation TimeSeries, #10890 - warnings.warn("SparseTimeSeries is deprecated. Please use " - "SparseSeries", FutureWarning, stacklevel=2) - - super(SparseTimeSeries, self).__init__(*args, **kwargs) diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index f2f7a9c778e66..2f8ebc4cc1df4 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -57,8 +57,7 @@ class TestPDApi(Base, tm.TestCase): 'TimedeltaIndex', 'Timestamp'] # these are already deprecated; awaiting removal - deprecated_classes = ['WidePanel', - 'SparseTimeSeries', 'Panel4D', + deprecated_classes = ['WidePanel', 'Panel4D', 'SparseList', 'Expr', 'Term'] # these should be deprecated in the future diff --git a/pandas/tests/sparse/test_series.py b/pandas/tests/sparse/test_series.py index d4543b97af4dd..de6636162ff05 100644 --- a/pandas/tests/sparse/test_series.py +++ b/pandas/tests/sparse/test_series.py @@ -112,12 +112,6 @@ def test_iteration_and_str(self): [x for x in self.bseries] str(self.bseries) - def test_TimeSeries_deprecation(self): - - # deprecation TimeSeries, #10890 - with tm.assert_produces_warning(FutureWarning): - pd.SparseTimeSeries(1, index=pd.date_range('20130101', periods=3)) - def test_construct_DataFrame_with_sp_series(self): # it works! df = DataFrame({'col': self.bseries}) From d6524850c8dea36ab37536e439999fd121b95429 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 4 Mar 2017 10:37:58 -0500 Subject: [PATCH 140/353] Revert FrozenList changes (doc build slowdown, #15559) See #15559. This temporarily reverts #15506, to see if this fixes the doc build slowdown. Author: Joris Van den Bossche Closes #15566 from jorisvandenbossche/revert and squashes the following commits: befd858 [Joris Van den Bossche] Revert "ENH: Added FrozenList difference setop" 527ded9 [Joris Van den Bossche] Revert "TST: remove deprecated usages of FrozenList.__add__ from test code" --- doc/source/groupby.rst | 10 ------- doc/source/whatsnew/v0.20.0.txt | 2 -- pandas/core/panel.py | 6 ++--- pandas/core/reshape.py | 6 ++--- pandas/core/strings.py | 2 +- pandas/indexes/frozen.py | 24 +++-------------- pandas/tests/groupby/test_value_counts.py | 2 +- pandas/tests/indexes/test_frozen.py | 33 +++++++---------------- pandas/tools/concat.py | 2 +- test_fast.sh | 2 +- 10 files changed, 22 insertions(+), 67 deletions(-) diff --git a/doc/source/groupby.rst b/doc/source/groupby.rst index 2d406de7c0c9b..8484ccd69a983 100644 --- a/doc/source/groupby.rst +++ b/doc/source/groupby.rst @@ -126,16 +126,6 @@ We could naturally group by either the ``A`` or ``B`` columns or both: grouped = df.groupby('A') grouped = df.groupby(['A', 'B']) -.. versionadded:: 0.20 - -If we also have a MultiIndex on columns ``A`` and ``B``, we can group by all -but the specified columns. - -.. ipython:: python - - df2 = df.set_index(['A', 'B']) - grouped = df2.groupby(level=df2.index.names.difference(['B']) - These will split the DataFrame on its index (rows). We could also split by the columns: diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index eac187b52f65d..1ba327a4ea50c 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -29,7 +29,6 @@ New features - Integration with the ``feather-format``, including a new top-level ``pd.read_feather()`` and ``DataFrame.to_feather()`` method, see :ref:`here `. - ``.str.replace`` now accepts a callable, as replacement, which is passed to ``re.sub`` (:issue:`15055`) -- ``FrozenList`` has gained the ``.difference()`` setop method (:issue:`15475`) @@ -601,7 +600,6 @@ Deprecations - ``Series.sortlevel`` and ``DataFrame.sortlevel`` have been deprecated in favor of ``Series.sort_index`` and ``DataFrame.sort_index`` (:issue:`15099`) - importing ``concat`` from ``pandas.tools.merge`` has been deprecated in favor of imports from the ``pandas`` namespace. This should only affect explict imports (:issue:`15358`) - ``Series/DataFrame/Panel.consolidate()`` been deprecated as a public method. (:issue:`15483`) -- ``FrozenList`` addition (new object and inplace) have been deprecated in favor of the ``.union()`` method. (:issue: `15475`) - The following top-level pandas functions have been deprecated and will be removed in a future version (:issue:`13790`) * ``pd.pnow()``, replaced by ``Period.now()`` * ``pd.Term``, is removed, as it is not applicable to user code. Instead use in-line string expressions in the where clause when searching in HDFStore diff --git a/pandas/core/panel.py b/pandas/core/panel.py index c5ea513223dce..4a6c6cf291316 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -940,9 +940,9 @@ def construct_index_parts(idx, major=True): minor_labels, minor_levels, minor_names = construct_index_parts( self.minor_axis, major=False) - levels = list(major_levels) + list(minor_levels) - labels = list(major_labels) + list(minor_labels) - names = list(major_names) + list(minor_names) + levels = major_levels + minor_levels + labels = major_labels + minor_labels + names = major_names + minor_names index = MultiIndex(levels=levels, labels=labels, names=names, verify_integrity=False) diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index faad6c500a21f..87cb088c2e91e 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -216,8 +216,8 @@ def get_new_columns(self): width = len(self.value_columns) propagator = np.repeat(np.arange(width), stride) if isinstance(self.value_columns, MultiIndex): - new_levels = self.value_columns.levels.union((self.removed_level,)) - new_names = self.value_columns.names.union((self.removed_name,)) + new_levels = self.value_columns.levels + (self.removed_level,) + new_names = self.value_columns.names + (self.removed_name,) new_labels = [lab.take(propagator) for lab in self.value_columns.labels] @@ -806,7 +806,7 @@ def melt(frame, id_vars=None, value_vars=None, var_name=None, for col in id_vars: mdata[col] = np.tile(frame.pop(col).values, K) - mcolumns = list(id_vars) + list(var_name) + list([value_name]) + mcolumns = id_vars + var_name + [value_name] mdata[value_name] = frame.values.ravel('F') for i, col in enumerate(var_name): diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 51016926d6909..ac8d1db6a0bf3 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -787,7 +787,7 @@ def str_extractall(arr, pat, flags=0): if 0 < len(index_list): from pandas import MultiIndex index = MultiIndex.from_tuples( - index_list, names=arr.index.names.union(["match"])) + index_list, names=arr.index.names + ["match"]) else: index = None result = arr._constructor_expanddim(match_list, index=index, diff --git a/pandas/indexes/frozen.py b/pandas/indexes/frozen.py index 47e2557333ec7..e043ba64bbad7 100644 --- a/pandas/indexes/frozen.py +++ b/pandas/indexes/frozen.py @@ -13,8 +13,6 @@ from pandas.types.cast import _coerce_indexer_dtype from pandas.formats.printing import pprint_thing -import warnings - class FrozenList(PandasObject, list): @@ -27,14 +25,11 @@ class FrozenList(PandasObject, list): # typechecks def __add__(self, other): - warnings.warn("__add__ is deprecated, use union(...)", FutureWarning) - return self.union(other) - - def __iadd__(self, other): - warnings.warn("__iadd__ is deprecated, use union(...)", FutureWarning) if isinstance(other, tuple): other = list(other) - return super(FrozenList, self).__iadd__(other) + return self.__class__(super(FrozenList, self).__add__(other)) + + __iadd__ = __add__ # Python 2 compat def __getslice__(self, i, j): @@ -85,19 +80,6 @@ def __repr__(self): __setitem__ = __setslice__ = __delitem__ = __delslice__ = _disabled pop = append = extend = remove = sort = insert = _disabled - def union(self, other): - """Returns a FrozenList with other concatenated to the end of self""" - if isinstance(other, tuple): - other = list(other) - return self.__class__(super(FrozenList, self).__add__(other)) - - def difference(self, other): - """Returns a FrozenList with the same elements as self, but with elements - that are also in other removed.""" - other = set(other) - temp = [x for x in self if x not in other] - return self.__class__(temp) - class FrozenNDArray(PandasObject, np.ndarray): diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py index ff01df2693c7c..801d0da070112 100644 --- a/pandas/tests/groupby/test_value_counts.py +++ b/pandas/tests/groupby/test_value_counts.py @@ -28,7 +28,7 @@ def check_value_counts(df, keys, bins): gr = df.groupby(keys, sort=isort) right = gr['3rd'].apply(Series.value_counts, **kwargs) - right.index.names = right.index.names[:-1].union(['3rd']) + right.index.names = right.index.names[:-1] + ['3rd'] # have to sort on index because of unstable sort on values left, right = map(rebuild_index, (left, right)) # xref GH9212 diff --git a/pandas/tests/indexes/test_frozen.py b/pandas/tests/indexes/test_frozen.py index a5fbf066adc83..a82409fbf9513 100644 --- a/pandas/tests/indexes/test_frozen.py +++ b/pandas/tests/indexes/test_frozen.py @@ -15,35 +15,20 @@ def setUp(self): self.klass = FrozenList def test_add(self): - q = FrozenList([1]) - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - q = q + [2, 3] - expected = FrozenList([1, 2, 3]) - self.check_result(q, expected) - - def test_iadd(self): - q = FrozenList([1]) - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - q += [2, 3] - expected = FrozenList([1, 2, 3]) - self.check_result(q, expected) - - def test_union(self): - result = self.container.union((1, 2, 3)) + result = self.container + (1, 2, 3) expected = FrozenList(self.lst + [1, 2, 3]) self.check_result(result, expected) - def test_difference(self): - result = self.container.difference([2]) - expected = FrozenList([1, 3, 4, 5]) + result = (1, 2, 3) + self.container + expected = FrozenList([1, 2, 3] + self.lst) self.check_result(result, expected) - def test_difference_dupe(self): - result = FrozenList([1, 2, 3, 2]).difference([2]) - expected = FrozenList([1, 3]) - self.check_result(result, expected) + def test_inplace(self): + q = r = self.container + q += [5] + self.check_result(q, self.lst + [5]) + # other shouldn't be mutated + self.check_result(r, self.lst) class TestFrozenNDArray(CheckImmutable, CheckStringMixin, tm.TestCase): diff --git a/pandas/tools/concat.py b/pandas/tools/concat.py index ae9d7af9d98ff..6405106118472 100644 --- a/pandas/tools/concat.py +++ b/pandas/tools/concat.py @@ -574,7 +574,7 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None): " not have the same number of levels") # also copies - names = list(names) + list(_get_consensus_names(indexes)) + names = names + _get_consensus_names(indexes) return MultiIndex(levels=levels, labels=label_list, names=names, verify_integrity=False) diff --git a/test_fast.sh b/test_fast.sh index f22ab73277e8b..30ac7f84cbe8b 100755 --- a/test_fast.sh +++ b/test_fast.sh @@ -5,4 +5,4 @@ # https://github.com/pytest-dev/pytest/issues/1075 export PYTHONHASHSEED=$(python -c 'import random; print(random.randint(1, 4294967295))') -pytest pandas --skip-slow --skip-network -m "not single" -n 4 $@ +pytest pandas --skip-slow --skip-network -m "not single" -n 4 From 5f0b69aee3622eed9392cef163e4b31ba742498e Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 4 Mar 2017 13:10:27 -0500 Subject: [PATCH 141/353] DEPR: silence some deprecation warnings --- pandas/tests/test_multilevel.py | 2 +- pandas/tests/test_panel.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 0f36af2c8c4e7..c809b39bb566e 100755 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -1646,7 +1646,7 @@ def test_multilevel_consolidate(self): 'bar', 'one'), ('bar', 'two')]) df = DataFrame(np.random.randn(4, 4), index=index, columns=index) df['Totals', ''] = df.sum(1) - df = df.consolidate() + df = df._consolidate() def test_ix_preserve_names(self): result = self.ymd.loc[2000] diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 2f329f241a5b8..373f590cbf9eb 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -688,7 +688,7 @@ def test_ix_setitem_slice_dataframe(self): def test_ix_align(self): from pandas import Series b = Series(np.random.randn(10), name=0) - b.sort() + b.sort_values() df_orig = Panel(np.random.randn(3, 10, 2)) df = df_orig.copy() @@ -1001,7 +1001,7 @@ def test_consolidate(self): self.panel['foo'] = 1. self.assertFalse(self.panel._data.is_consolidated()) - panel = self.panel.consolidate() + panel = self.panel._consolidate() self.assertTrue(panel._data.is_consolidated()) def test_ctor_dict(self): From ca6d88b7367de415770bf2c171887c5bece38d9f Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sat, 4 Mar 2017 16:04:15 -0500 Subject: [PATCH 142/353] CLN: clean up PeriodIndex constructor closes #13232 Material clean up of PeriodIndex constructor, which was doing a few weird things (https://github.com/pydata/pandas/issues/13232#issuecomme nt-220788816), and generally getting messy. Author: Maximilian Roos Closes #13277 from MaximilianR/period-float and squashes the following commits: 5cae7aa [Maximilian Roos] @jreback changes 75ff54d [Maximilian Roos] _new_PeriodIndex for unpickling 240172f [Maximilian Roos] coerce freq object earlier for perf ba5133b [Maximilian Roos] documentation b0fc0a7 [Maximilian Roos] final changes fa0fa9d [Maximilian Roos] clean up PeriodIndex constructor --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/algorithms.py | 4 +- pandas/indexes/base.py | 5 + pandas/io/packers.py | 2 +- .../tests/indexes/period/test_construction.py | 9 +- pandas/tests/indexes/period/test_period.py | 6 + pandas/tseries/period.py | 156 +++++++++--------- setup.cfg | 1 + 8 files changed, 98 insertions(+), 86 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 1ba327a4ea50c..ca093eca30511 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -657,6 +657,7 @@ Bug Fixes - Bug in ``DataFrame.sort_values()`` when sorting by multiple columns where one column is of type ``int64`` and contains ``NaT`` (:issue:`14922`) - Bug in ``DataFrame.reindex()`` in which ``method`` was ignored when passing ``columns`` (:issue:`14992`) - Bug in ``pd.to_numeric()`` in which float and unsigned integer elements were being improperly casted (:issue:`14941`, :issue:`15005`) +- Cleaned up ``PeriodIndex`` constructor, including raising on floats more consistently (:issue:`13277`) - Bug in ``pd.read_csv()`` in which the ``dialect`` parameter was not being verified before processing (:issue:`14898`) - Bug in ``pd.read_fwf`` where the skiprows parameter was not being respected during column width inference (:issue:`11256`) - Bug in ``pd.read_csv()`` in which missing data was being improperly handled with ``usecols`` (:issue:`6710`) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 55d404f05dd1d..d37c98c9b9b90 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -471,8 +471,8 @@ def _value_counts_arraylike(values, dropna=True): # dtype handling if is_datetimetz_type: keys = DatetimeIndex._simple_new(keys, tz=orig.dtype.tz) - if is_period_type: - keys = PeriodIndex._simple_new(keys, freq=freq) + elif is_period_type: + keys = PeriodIndex._from_ordinals(keys, freq=freq) elif is_signed_integer_dtype(dtype): values = _ensure_int64(values) diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index 5d43d2d32af67..e441d9a88690d 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -88,6 +88,11 @@ def _new_Index(cls, d): """ This is called upon unpickling, rather than the default which doesn't have arguments and breaks __new__ """ + # required for backward compat, because PI can't be instantiated with + # ordinals through __new__ GH #13277 + if issubclass(cls, ABCPeriodIndex): + from pandas.tseries.period import _new_PeriodIndex + return _new_PeriodIndex(cls, **d) return cls.__new__(cls, **d) diff --git a/pandas/io/packers.py b/pandas/io/packers.py index 7afe8a06b6af1..39bc1a4ecf225 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -573,7 +573,7 @@ def decode(obj): elif typ == u'period_index': data = unconvert(obj[u'data'], np.int64, obj.get(u'compress')) d = dict(name=obj[u'name'], freq=obj[u'freq']) - return globals()[obj[u'klass']](data, **d) + return globals()[obj[u'klass']]._from_ordinals(data, **d) elif typ == u'datetime_index': data = unconvert(obj[u'data'], np.int64, obj.get(u'compress')) d = dict(name=obj[u'name'], freq=obj[u'freq'], verify_integrity=False) diff --git a/pandas/tests/indexes/period/test_construction.py b/pandas/tests/indexes/period/test_construction.py index 228615829b5b8..f13a84f4f0e92 100644 --- a/pandas/tests/indexes/period/test_construction.py +++ b/pandas/tests/indexes/period/test_construction.py @@ -120,7 +120,7 @@ def test_constructor_fromarraylike(self): self.assertRaises(ValueError, PeriodIndex, idx._values) self.assertRaises(ValueError, PeriodIndex, list(idx._values)) - self.assertRaises(ValueError, PeriodIndex, + self.assertRaises(TypeError, PeriodIndex, data=Period('2007', freq='A')) result = PeriodIndex(iter(idx)) @@ -285,12 +285,15 @@ def test_constructor_simple_new_empty(self): result = idx._simple_new(idx, name='p', freq='M') tm.assert_index_equal(result, idx) - def test_constructor_simple_new_floats(self): + def test_constructor_floats(self): # GH13079 - for floats in [[1.1], np.array([1.1])]: + for floats in [[1.1, 2.1], np.array([1.1, 2.1])]: with self.assertRaises(TypeError): pd.PeriodIndex._simple_new(floats, freq='M') + with self.assertRaises(TypeError): + pd.PeriodIndex(floats, freq='M') + def test_constructor_nat(self): self.assertRaises(ValueError, period_range, start='NaT', end='2011-01-01', freq='M') diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index b80ab6feeeb23..1739211982b10 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -53,6 +53,12 @@ def test_astype_raises(self): def test_pickle_compat_construction(self): pass + def test_pickle_round_trip(self): + for freq in ['D', 'M', 'Y']: + idx = PeriodIndex(['2016-05-16', 'NaT', NaT, np.NaN], freq='D') + result = self.round_trip_pickle(idx) + tm.assert_index_equal(result, idx) + def test_get_loc(self): idx = pd.period_range('2000-01-01', periods=3) diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 6e499924730b3..bfe7724a1cfaa 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -17,7 +17,6 @@ is_period_dtype, is_bool_dtype, pandas_dtype, - _ensure_int64, _ensure_object) from pandas.types.dtypes import PeriodDtype from pandas.types.generic import ABCSeries @@ -114,6 +113,13 @@ def wrapper(self, other): return wrapper +def _new_PeriodIndex(cls, **d): + # GH13277 for unpickling + if d['data'].dtype == 'int64': + values = d.pop('data') + return cls._from_ordinals(values=values, **d) + + class PeriodIndex(DatelikeOps, DatetimeIndexOpsMixin, Int64Index): """ Immutable ndarray holding ordinal values indicating regular periods in @@ -209,17 +215,57 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, msg = 'specified freq and dtype are different' raise IncompatibleFrequency(msg) + # coerce freq to freq object, otherwise it can be coerced elementwise + # which is slow + if freq: + freq = Period._maybe_convert_freq(freq) + if data is None: if ordinal is not None: data = np.asarray(ordinal, dtype=np.int64) else: data, freq = cls._generate_range(start, end, periods, freq, kwargs) - else: - ordinal, freq = cls._from_arraylike(data, freq, tz) - data = np.array(ordinal, dtype=np.int64, copy=copy) + return cls._from_ordinals(data, name=name, freq=freq) - return cls._simple_new(data, name=name, freq=freq) + if isinstance(data, PeriodIndex): + if freq is None or freq == data.freq: # no freq change + freq = data.freq + data = data._values + else: + base1, _ = _gfc(data.freq) + base2, _ = _gfc(freq) + data = period.period_asfreq_arr(data._values, + base1, base2, 1) + return cls._simple_new(data, name=name, freq=freq) + + # not array / index + if not isinstance(data, (np.ndarray, PeriodIndex, + DatetimeIndex, Int64Index)): + if is_scalar(data) or isinstance(data, Period): + cls._scalar_data_error(data) + + # other iterable of some kind + if not isinstance(data, (list, tuple)): + data = list(data) + + data = np.asarray(data) + + # datetime other than period + if is_datetime64_dtype(data.dtype): + data = dt64arr_to_periodarr(data, freq, tz) + return cls._from_ordinals(data, name=name, freq=freq) + + # check not floats + if infer_dtype(data) == 'floating' and len(data) > 0: + raise TypeError("PeriodIndex does not allow " + "floating point in construction") + + # anything else, likely an array of strings or periods + data = _ensure_object(data) + freq = freq or period.extract_freq(data) + data = period.extract_ordinals(data, freq) + return cls._from_ordinals(data, name=name, freq=freq) @classmethod def _generate_range(cls, start, end, periods, freq, fields): @@ -240,77 +286,26 @@ def _generate_range(cls, start, end, periods, freq, fields): return subarr, freq - @classmethod - def _from_arraylike(cls, data, freq, tz): - if freq is not None: - freq = Period._maybe_convert_freq(freq) - - if not isinstance(data, (np.ndarray, PeriodIndex, - DatetimeIndex, Int64Index)): - if is_scalar(data) or isinstance(data, Period): - raise ValueError('PeriodIndex() must be called with a ' - 'collection of some kind, %s was passed' - % repr(data)) - - # other iterable of some kind - if not isinstance(data, (list, tuple)): - data = list(data) - - try: - data = _ensure_int64(data) - if freq is None: - raise ValueError('freq not specified') - data = np.array([Period(x, freq=freq) for x in data], - dtype=np.int64) - except (TypeError, ValueError): - data = _ensure_object(data) - - if freq is None: - freq = period.extract_freq(data) - data = period.extract_ordinals(data, freq) - else: - if isinstance(data, PeriodIndex): - if freq is None or freq == data.freq: - freq = data.freq - data = data._values - else: - base1, _ = _gfc(data.freq) - base2, _ = _gfc(freq) - data = period.period_asfreq_arr(data._values, - base1, base2, 1) - else: - if is_object_dtype(data): - inferred = infer_dtype(data) - if inferred == 'integer': - data = data.astype(np.int64) - - if freq is None and is_object_dtype(data): - # must contain Period instance and thus extract ordinals - freq = period.extract_freq(data) - data = period.extract_ordinals(data, freq) - - if freq is None: - msg = 'freq not specified and cannot be inferred' - raise ValueError(msg) - - if data.dtype != np.int64: - if np.issubdtype(data.dtype, np.datetime64): - data = dt64arr_to_periodarr(data, freq, tz) - else: - data = _ensure_object(data) - data = period.extract_ordinals(data, freq) - - return data, freq - @classmethod def _simple_new(cls, values, name=None, freq=None, **kwargs): - + """ + Values can be any type that can be coerced to Periods. + Ordinals in an ndarray are fastpath-ed to `_from_ordinals` + """ if not is_integer_dtype(values): values = np.array(values, copy=False) - if (len(values) > 0 and is_float_dtype(values)): + if len(values) > 0 and is_float_dtype(values): raise TypeError("PeriodIndex can't take floats") - else: - return cls(values, name=name, freq=freq, **kwargs) + return cls(values, name=name, freq=freq, **kwargs) + + return cls._from_ordinals(values, name, freq, **kwargs) + + @classmethod + def _from_ordinals(cls, values, name=None, freq=None, **kwargs): + """ + Values should be int ordinals + `__new__` & `_simple_new` cooerce to ordinals and call this method + """ values = np.array(values, dtype='int64', copy=False) @@ -318,7 +313,7 @@ def _simple_new(cls, values, name=None, freq=None, **kwargs): result._data = values result.name = name if freq is None: - raise ValueError('freq is not specified') + raise ValueError('freq is not specified and cannot be inferred') result.freq = Period._maybe_convert_freq(freq) result._reset_identity() return result @@ -327,13 +322,13 @@ def _shallow_copy_with_infer(self, values=None, **kwargs): """ we always want to return a PeriodIndex """ return self._shallow_copy(values=values, **kwargs) - def _shallow_copy(self, values=None, **kwargs): - if kwargs.get('freq') is None: - # freq must be provided - kwargs['freq'] = self.freq + def _shallow_copy(self, values=None, freq=None, **kwargs): + if freq is None: + freq = self.freq if values is None: values = self._values - return super(PeriodIndex, self)._shallow_copy(values=values, **kwargs) + return super(PeriodIndex, self)._shallow_copy(values=values, + freq=freq, **kwargs) def _coerce_scalar_to_index(self, item): """ @@ -413,7 +408,7 @@ def __array_wrap__(self, result, context=None): return result # the result is object dtype array of Period # cannot pass _simple_new as it is - return PeriodIndex(result, freq=self.freq, name=self.name) + return self._shallow_copy(result, freq=self.freq, name=self.name) @property def _box_func(self): @@ -708,7 +703,7 @@ def shift(self, n): values = self._values + n * self.freq.n if self.hasnans: values[self._isnan] = tslib.iNaT - return PeriodIndex(data=values, name=self.name, freq=self.freq) + return self._shallow_copy(values=values) @cache_readonly def dtype(self): @@ -945,7 +940,8 @@ def _wrap_union_result(self, other, result): def _apply_meta(self, rawarr): if not isinstance(rawarr, PeriodIndex): - rawarr = PeriodIndex(rawarr, freq=self.freq) + rawarr = PeriodIndex._from_ordinals(rawarr, freq=self.freq, + name=self.name) return rawarr def _format_native_types(self, na_rep=u('NaT'), date_format=None, diff --git a/setup.cfg b/setup.cfg index b9de7a3532209..8de4fc955bd50 100644 --- a/setup.cfg +++ b/setup.cfg @@ -13,6 +13,7 @@ parentdir_prefix = pandas- [flake8] ignore = E731,E402 +max-line-length = 79 [yapf] based_on_style = pep8 From ed2a2e49945478a170b97466bb94444a3353da21 Mon Sep 17 00:00:00 2001 From: Nicholas Ver Halen Date: Sat, 4 Mar 2017 16:38:35 -0500 Subject: [PATCH 143/353] BUG: pivot_table over Categorical Columns closes #15193 Author: Nicholas Ver Halen Closes #15511 from verhalenn/issue15193 and squashes the following commits: bf0fdeb [Nicholas Ver Halen] Added description to code change. adf8616 [Nicholas Ver Halen] Added whatsnew for issue 15193 a643267 [Nicholas Ver Halen] Added test for issue 15193 d605251 [Nicholas Ver Halen] Made sure pivot_table propped na columns --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/tests/tools/test_pivot.py | 33 ++++++++++++++++++++++++++++++++ pandas/tools/pivot.py | 4 ++++ 3 files changed, 38 insertions(+) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index ca093eca30511..f51ff4cd0c908 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -735,6 +735,7 @@ Bug Fixes - Bug in ``pd.merge_asof()`` where ``left_index``/``right_index`` together caused a failure when ``tolerance`` was specified (:issue:`15135`) +- Bug in ``DataFrame.pivot_table()`` where ``dropna=True`` would not drop all-NaN columns when the columns was a ``category`` dtype (:issue:`15193`) - Bug in ``pd.read_hdf()`` passing a ``Timestamp`` to the ``where`` parameter with a non date column (:issue:`15492`) diff --git a/pandas/tests/tools/test_pivot.py b/pandas/tests/tools/test_pivot.py index f5d91d0088306..62863372dbd02 100644 --- a/pandas/tests/tools/test_pivot.py +++ b/pandas/tests/tools/test_pivot.py @@ -86,6 +86,39 @@ def test_pivot_table_dropna(self): tm.assert_index_equal(pv_col.columns, m) tm.assert_index_equal(pv_ind.index, m) + def test_pivot_table_dropna_categoricals(self): + # GH 15193 + categories = ['a', 'b', 'c', 'd'] + + df = DataFrame({'A': ['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'c'], + 'B': [1, 2, 3, 1, 2, 3, 1, 2, 3], + 'C': range(0, 9)}) + + df['A'] = df['A'].astype('category', ordered=False, + categories=categories) + result_true = df.pivot_table(index='B', columns='A', values='C', + dropna=True) + expected_columns = Series(['a', 'b', 'c'], name='A') + expected_columns = expected_columns.astype('category', ordered=False, + categories=categories) + expected_index = Series([1, 2, 3], name='B') + expected_true = DataFrame([[0.0, 3.0, 6.0], + [1.0, 4.0, 7.0], + [2.0, 5.0, 8.0]], + index=expected_index, + columns=expected_columns,) + tm.assert_frame_equal(expected_true, result_true) + + result_false = df.pivot_table(index='B', columns='A', values='C', + dropna=False) + expected_columns = Series(['a', 'b', 'c', 'd'], name='A') + expected_false = DataFrame([[0.0, 3.0, 6.0, np.NaN], + [1.0, 4.0, 7.0, np.NaN], + [2.0, 5.0, 8.0, np.NaN]], + index=expected_index, + columns=expected_columns,) + tm.assert_frame_equal(expected_false, result_false) + def test_pass_array(self): result = self.data.pivot_table( 'D', index=self.data.A, columns=self.data.C) diff --git a/pandas/tools/pivot.py b/pandas/tools/pivot.py index 41fc705691a96..e23beb8332fd4 100644 --- a/pandas/tools/pivot.py +++ b/pandas/tools/pivot.py @@ -175,6 +175,10 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', if len(index) == 0 and len(columns) > 0: table = table.T + # GH 15193 Makse sure empty columns are removed if dropna=True + if isinstance(table, DataFrame) and dropna: + table = table.dropna(how='all', axis=1) + return table From 0b776806b78421e377bb6c305d1e3a752f24e358 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 4 Mar 2017 22:58:32 +0100 Subject: [PATCH 144/353] DOC: fix build_table_schema docs (#15571) --- doc/source/io.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index c34cc1ec17512..c7a68a0fe9fbb 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -2090,7 +2090,7 @@ A few notes on the generated table schema: - All dates are converted to UTC when serializing. Even timezone naïve values, which are treated as UTC with an offset of 0. - .. ipython:: python: + .. ipython:: python from pandas.io.json import build_table_schema s = pd.Series(pd.date_range('2016', periods=4)) From c198e28e1cd187523d77386d607c5536bce024c5 Mon Sep 17 00:00:00 2001 From: chris-b1 Date: Sat, 4 Mar 2017 19:45:20 -0600 Subject: [PATCH 145/353] BUG: DataFrame.isin empty datetimelike (#15570) --- doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/core/ops.py | 2 +- pandas/tests/frame/test_analytics.py | 21 +++++++++++++++++++++ 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index f51ff4cd0c908..c29dfaba2604a 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -677,7 +677,7 @@ Bug Fixes - Bug in ``pd.read_msgpack()`` in which ``Series`` categoricals were being improperly processed (:issue:`14901`) - Bug in ``Series.ffill()`` with mixed dtypes containing tz-aware datetimes. (:issue:`14956`) - +- Bug in ``DataFrame.isin`` comparing datetimelike to empty frame (:issue:`15473`) - Bug in ``Series.where()`` and ``DataFrame.where()`` where array-like conditionals were being rejected (:issue:`15414`) - Bug in ``Series`` construction with a datetimetz (:issue:`14928`) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 697a99f63f62f..6cc43cd9228f6 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -1249,7 +1249,7 @@ def na_op(x, y): result = op(x, y) except TypeError: xrav = x.ravel() - result = np.empty(x.size, dtype=x.dtype) + result = np.empty(x.size, dtype=bool) if isinstance(y, (np.ndarray, ABCSeries)): yrav = y.ravel() mask = notnull(xrav) & notnull(yrav) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 111195363beb2..4758ee1323ca0 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1502,6 +1502,27 @@ def test_isin_multiIndex(self): result = df1.isin(df2) tm.assert_frame_equal(result, expected) + def test_isin_empty_datetimelike(self): + # GH 15473 + df1_ts = DataFrame({'date': + pd.to_datetime(['2014-01-01', '2014-01-02'])}) + df1_td = DataFrame({'date': + [pd.Timedelta(1, 's'), pd.Timedelta(2, 's')]}) + df2 = DataFrame({'date': []}) + df3 = DataFrame() + + expected = DataFrame({'date': [False, False]}) + + result = df1_ts.isin(df2) + tm.assert_frame_equal(result, expected) + result = df1_ts.isin(df3) + tm.assert_frame_equal(result, expected) + + result = df1_td.isin(df2) + tm.assert_frame_equal(result, expected) + result = df1_td.isin(df3) + tm.assert_frame_equal(result, expected) + # ---------------------------------------------------------------------- # Row deduplication From f5b7bcb4d6ccbc85450a8de3d443eeef11c57d93 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sat, 4 Mar 2017 18:02:41 -0800 Subject: [PATCH 146/353] BUG: Groupby.cummin/max DataError on datetimes (#15561) (#15569) --- doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/core/groupby.py | 4 ++-- pandas/tests/groupby/test_groupby.py | 10 +++++++++- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index c29dfaba2604a..4e528daa6e876 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -635,7 +635,7 @@ Performance Improvements - Increased performance of ``pd.factorize()`` by releasing the GIL with ``object`` dtype when inferred as strings (:issue:`14859`) - Improved performance of timeseries plotting with an irregular DatetimeIndex (or with ``compat_x=True``) (:issue:`15073`). -- Improved performance of ``groupby().cummin()`` and ``groupby().cummax()`` (:issue:`15048`, :issue:`15109`) +- Improved performance of ``groupby().cummin()`` and ``groupby().cummax()`` (:issue:`15048`, :issue:`15109`, :issue:`15561`) - Improved performance and reduced memory when indexing with a ``MultiIndex`` (:issue:`15245`) - When reading buffer object in ``read_sas()`` method without specified format, filepath string is inferred rather than buffer object. (:issue:`14947`) - Improved performance of `rank()` for categorical data (:issue:`15498`) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 578c334781d15..43c57a88b4d19 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -1442,7 +1442,7 @@ def cummin(self, axis=0, **kwargs): if axis != 0: return self.apply(lambda x: np.minimum.accumulate(x, axis)) - return self._cython_transform('cummin', **kwargs) + return self._cython_transform('cummin', numeric_only=False) @Substitution(name='groupby') @Appender(_doc_template) @@ -1451,7 +1451,7 @@ def cummax(self, axis=0, **kwargs): if axis != 0: return self.apply(lambda x: np.maximum.accumulate(x, axis)) - return self._cython_transform('cummax', **kwargs) + return self._cython_transform('cummax', numeric_only=False) @Substitution(name='groupby') @Appender(_doc_template) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 74e8c6c45946f..e846963732883 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1954,7 +1954,8 @@ def test_arg_passthru(self): for attr in ['cummin', 'cummax']: f = getattr(df.groupby('group'), attr) result = f() - tm.assert_index_equal(result.columns, expected_columns_numeric) + # GH 15561: numeric_only=False set by default like min/max + tm.assert_index_equal(result.columns, expected_columns) result = f(numeric_only=False) tm.assert_index_equal(result.columns, expected_columns) @@ -4295,6 +4296,13 @@ def test_cummin_cummax(self): result = base_df.groupby('A').B.apply(lambda x: x.cummax()).to_frame() tm.assert_frame_equal(expected, result) + # GH 15561 + df = pd.DataFrame(dict(a=[1], b=pd.to_datetime(['2001']))) + expected = pd.Series(pd.to_datetime('2001'), index=[0], name='b') + for method in ['cummax', 'cummin']: + result = getattr(df.groupby('a')['b'], method)() + tm.assert_series_equal(expected, result) + def _check_groupby(df, result, keys, field, f=lambda x: x.sum()): tups = lmap(tuple, df[keys].values) From 0159dc2fa9ea6a6b4c17e01712d61dc4772cc965 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sun, 5 Mar 2017 11:28:57 +0100 Subject: [PATCH 147/353] DOC: reset table_schema option after example (#15572) --- doc/source/options.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/doc/source/options.rst b/doc/source/options.rst index 1a0e5cf6b7235..1b219f640cc87 100644 --- a/doc/source/options.rst +++ b/doc/source/options.rst @@ -533,3 +533,9 @@ by default. False by default, this can be enabled globally with the pd.set_option('display.html.table_schema', True) Only ``'display.max_rows'`` are serialized and published. + + +.. ipython:: python + :suppress: + + pd.reset_option('display.html.table_schema') \ No newline at end of file From a00ad37c3c8c29f4dd13802e93af9fc4c2cd73a7 Mon Sep 17 00:00:00 2001 From: Rouz Azari Date: Sun, 5 Mar 2017 03:23:57 -0800 Subject: [PATCH 148/353] ENH: str.replace accepts a compiled expression (#15456) - Series.str.replace now accepts a compiled regular expression for `pat`. - Signature for .str.replace changed, but remains backwards compatible. See #15446 --- doc/source/text.rst | 21 +++++++++++ doc/source/whatsnew/v0.20.0.txt | 3 +- pandas/core/strings.py | 65 +++++++++++++++++++++++++-------- pandas/tests/test_strings.py | 59 ++++++++++++++++++++++++++++++ 4 files changed, 132 insertions(+), 16 deletions(-) diff --git a/doc/source/text.rst b/doc/source/text.rst index 52e05c5d511bc..2b2520cb6100f 100644 --- a/doc/source/text.rst +++ b/doc/source/text.rst @@ -164,6 +164,27 @@ positional argument (a regex object) and return a string. repl = lambda m: m.group('two').swapcase() pd.Series(['Foo Bar Baz', np.nan]).str.replace(pat, repl) +The ``replace`` method also accepts a compiled regular expression object +from :func:`re.compile` as a pattern. All flags should be included in the +compiled regular expression object. + +.. versionadded:: 0.20.0 + +.. ipython:: python + + import re + regex_pat = re.compile(r'^.a|dog', flags=re.IGNORECASE) + s3.str.replace(regex_pat, 'XX-XX ') + +Including a ``flags`` argument when calling ``replace`` with a compiled +regular expression object will raise a ``ValueError``. + +.. ipython:: + + @verbatim + In [1]: s3.str.replace(regex_pat, 'XX-XX ', flags=re.IGNORECASE) + --------------------------------------------------------------------------- + ValueError: case and flags cannot be set when pat is a compiled regex Indexing with ``.str`` ---------------------- diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 4e528daa6e876..fe9035106e4af 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -28,7 +28,8 @@ New features ~~~~~~~~~~~~ - Integration with the ``feather-format``, including a new top-level ``pd.read_feather()`` and ``DataFrame.to_feather()`` method, see :ref:`here `. -- ``.str.replace`` now accepts a callable, as replacement, which is passed to ``re.sub`` (:issue:`15055`) +- ``Series.str.replace()`` now accepts a callable, as replacement, which is passed to ``re.sub`` (:issue:`15055`) +- ``Series.str.replace()`` now accepts a compiled regular expression as a pattern (:issue:`15446`) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index ac8d1db6a0bf3..46ba48b4cd846 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -9,7 +9,8 @@ is_string_like, is_list_like, is_scalar, - is_integer) + is_integer, + is_re) from pandas.core.common import _values_from_object from pandas.core.algorithms import take_1d @@ -303,7 +304,7 @@ def str_endswith(arr, pat, na=np.nan): return _na_map(f, arr, na, dtype=bool) -def str_replace(arr, pat, repl, n=-1, case=True, flags=0): +def str_replace(arr, pat, repl, n=-1, case=None, flags=0): """ Replace occurrences of pattern/regex in the Series/Index with some other string. Equivalent to :meth:`str.replace` or @@ -311,8 +312,12 @@ def str_replace(arr, pat, repl, n=-1, case=True, flags=0): Parameters ---------- - pat : string - Character sequence or regular expression + pat : string or compiled regex + String can be a character sequence or regular expression. + + .. versionadded:: 0.20.0 + `pat` also accepts a compiled regex. + repl : string or callable Replacement string or a callable. The callable is passed the regex match object and must return a replacement string to be used. @@ -323,15 +328,24 @@ def str_replace(arr, pat, repl, n=-1, case=True, flags=0): n : int, default -1 (all) Number of replacements to make from start - case : boolean, default True - If True, case sensitive + case : boolean, default None + - If True, case sensitive (the default if `pat` is a string) + - Set to False for case insensitive + - Cannot be set if `pat` is a compiled regex flags : int, default 0 (no flags) - re module flags, e.g. re.IGNORECASE + - re module flags, e.g. re.IGNORECASE + - Cannot be set if `pat` is a compiled regex Returns ------- replaced : Series/Index of objects + Notes + ----- + When `pat` is a compiled regex, all flags should be included in the + compiled regex. Use of `case` or `flags` with a compiled regex will + raise an error. + Examples -------- When `repl` is a string, every `pat` is replaced as with @@ -372,21 +386,42 @@ def str_replace(arr, pat, repl, n=-1, case=True, flags=0): 0 tWO 1 bAR dtype: object + + Using a compiled regex with flags + + >>> regex_pat = re.compile(r'FUZ', flags=re.IGNORECASE) + >>> pd.Series(['foo', 'fuz', np.nan]).str.replace(regex_pat, 'bar') + 0 foo + 1 bar + 2 NaN + dtype: object """ # Check whether repl is valid (GH 13438, GH 15055) if not (is_string_like(repl) or callable(repl)): raise TypeError("repl must be a string or callable") - use_re = not case or len(pat) > 1 or flags or callable(repl) - if use_re: - if not case: + is_compiled_re = is_re(pat) + if is_compiled_re: + if (case is not None) or (flags != 0): + raise ValueError("case and flags cannot be set" + " when pat is a compiled regex") + else: + # not a compiled regex + # set default case + if case is None: + case = True + + # add case flag, if provided + if case is False: flags |= re.IGNORECASE - regex = re.compile(pat, flags=flags) - n = n if n >= 0 else 0 - def f(x): - return regex.sub(repl, x, count=n) + use_re = is_compiled_re or len(pat) > 1 or flags or callable(repl) + + if use_re: + n = n if n >= 0 else 0 + regex = re.compile(pat, flags=flags) + f = lambda x: regex.sub(repl=repl, string=x, count=n) else: f = lambda x: x.replace(pat, repl, n) @@ -1558,7 +1593,7 @@ def match(self, pat, case=True, flags=0, na=np.nan, as_indexer=False): return self._wrap_result(result) @copy(str_replace) - def replace(self, pat, repl, n=-1, case=True, flags=0): + def replace(self, pat, repl, n=-1, case=None, flags=0): result = str_replace(self._data, pat, repl, n=n, case=case, flags=flags) return self._wrap_result(result) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index ce97b09b7e3ca..f98cabbb70477 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -469,6 +469,65 @@ def test_replace_callable(self): exp = Series(['bAR', NA]) tm.assert_series_equal(result, exp) + def test_replace_compiled_regex(self): + # GH 15446 + values = Series(['fooBAD__barBAD', NA]) + + # test with compiled regex + pat = re.compile(r'BAD[_]*') + result = values.str.replace(pat, '') + exp = Series(['foobar', NA]) + tm.assert_series_equal(result, exp) + + # mixed + mixed = Series(['aBAD', NA, 'bBAD', True, datetime.today(), 'fooBAD', + None, 1, 2.]) + + rs = Series(mixed).str.replace(pat, '') + xp = Series(['a', NA, 'b', NA, NA, 'foo', NA, NA, NA]) + tm.assertIsInstance(rs, Series) + tm.assert_almost_equal(rs, xp) + + # unicode + values = Series([u('fooBAD__barBAD'), NA]) + + result = values.str.replace(pat, '') + exp = Series([u('foobar'), NA]) + tm.assert_series_equal(result, exp) + + result = values.str.replace(pat, '', n=1) + exp = Series([u('foobarBAD'), NA]) + tm.assert_series_equal(result, exp) + + # flags + unicode + values = Series([b"abcd,\xc3\xa0".decode("utf-8")]) + exp = Series([b"abcd, \xc3\xa0".decode("utf-8")]) + pat = re.compile(r"(?<=\w),(?=\w)", flags=re.UNICODE) + result = values.str.replace(pat, ", ") + tm.assert_series_equal(result, exp) + + # case and flags provided to str.replace will have no effect + # and will produce warnings + values = Series(['fooBAD__barBAD__bad', NA]) + pat = re.compile(r'BAD[_]*') + + with tm.assertRaisesRegexp(ValueError, "case and flags must be"): + result = values.str.replace(pat, '', flags=re.IGNORECASE) + + with tm.assertRaisesRegexp(ValueError, "case and flags must be"): + result = values.str.replace(pat, '', case=False) + + with tm.assertRaisesRegexp(ValueError, "case and flags must be"): + result = values.str.replace(pat, '', case=True) + + # test with callable + values = Series(['fooBAD__barBAD', NA]) + repl = lambda m: m.group(0).swapcase() + pat = re.compile('[a-z][A-Z]{2}') + result = values.str.replace(pat, repl, n=2) + exp = Series(['foObaD__baRbaD', NA]) + tm.assert_series_equal(result, exp) + def test_repeat(self): values = Series(['a', 'b', NA, 'c', NA, 'd']) From 84bbeae9f10d63fcd546c632649828621a80f64d Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sun, 5 Mar 2017 13:01:53 +0100 Subject: [PATCH 149/353] TST: fix test str_replace error messge (#15456) --- pandas/tests/test_strings.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index f98cabbb70477..f8ce0070b2c78 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -511,13 +511,13 @@ def test_replace_compiled_regex(self): values = Series(['fooBAD__barBAD__bad', NA]) pat = re.compile(r'BAD[_]*') - with tm.assertRaisesRegexp(ValueError, "case and flags must be"): + with tm.assertRaisesRegexp(ValueError, "case and flags cannot be"): result = values.str.replace(pat, '', flags=re.IGNORECASE) - with tm.assertRaisesRegexp(ValueError, "case and flags must be"): + with tm.assertRaisesRegexp(ValueError, "case and flags cannot be"): result = values.str.replace(pat, '', case=False) - with tm.assertRaisesRegexp(ValueError, "case and flags must be"): + with tm.assertRaisesRegexp(ValueError, "case and flags cannot be"): result = values.str.replace(pat, '', case=True) # test with callable From f4a03d97fd4e9af69cbd480df01f0172057a0ef1 Mon Sep 17 00:00:00 2001 From: Petio Petrov Date: Sun, 5 Mar 2017 11:21:14 -0500 Subject: [PATCH 150/353] Update dtypes.py (#15577) --- pandas/types/dtypes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/types/dtypes.py b/pandas/types/dtypes.py index 5b6d7905d4095..43135ba94ab46 100644 --- a/pandas/types/dtypes.py +++ b/pandas/types/dtypes.py @@ -73,7 +73,7 @@ def __ne__(self, other): @classmethod def is_dtype(cls, dtype): - """ Return a boolean if we if the passed type is an actual dtype that + """ Return a boolean if the passed type is an actual dtype that we can match (via string or type) """ if hasattr(dtype, 'dtype'): From 5067708f0199a0b614586dbbc1a1536fa4442b65 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 5 Mar 2017 11:25:46 -0500 Subject: [PATCH 151/353] BUG: Floating point accuracy with DatetimeIndex.round (#14440) closes #14440 Employs @eoincondron's fix for float point inaccuracies when rounding by milliseconds for `DatetimeIndex.round` and `Timestamp.round` Author: Matt Roeschke Closes #15568 from mroeschke/fix_14440 and squashes the following commits: c5a7cbc [Matt Roeschke] BUG:Floating point accuracy with DatetimeIndex.round (#14440) --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/tests/indexes/datetimes/test_ops.py | 11 +++++++++++ pandas/tests/scalar/test_timestamp.py | 9 +++++++++ pandas/tseries/base.py | 2 +- pandas/tslib.pyx | 3 ++- 5 files changed, 24 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index fe9035106e4af..db803e6e7856b 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -652,6 +652,7 @@ Bug Fixes - Bug in ``Index`` power operations with reversed operands (:issue:`14973`) - Bug in ``TimedeltaIndex`` addition where overflow was being allowed without error (:issue:`14816`) - Bug in ``TimedeltaIndex`` raising a ``ValueError`` when boolean indexing with ``loc`` (:issue:`14946`) +- Bug in ``DatetimeIndex.round()`` and ``Timestamp.round()`` floating point accuracy when rounding by milliseconds (:issue: `14440`) - Bug in ``astype()`` where ``inf`` values were incorrectly converted to integers. Now raises error now with ``astype()`` for Series and DataFrames (:issue:`14265`) - Bug in ``DataFrame(..).apply(to_numeric)`` when values are of type decimal.Decimal. (:issue:`14827`) - Bug in ``describe()`` when passing a numpy array which does not contain the median to the ``percentiles`` keyword argument (:issue:`14908`) diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index 8eb9128d8d1c8..3a6402ae83ae2 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -175,6 +175,17 @@ def test_round(self): tm.assertRaisesRegexp(ValueError, msg, rng.round, freq='M') tm.assertRaisesRegexp(ValueError, msg, elt.round, freq='M') + # GH 14440 + index = pd.DatetimeIndex(['2016-10-17 12:00:00.0015'], tz=tz) + result = index.round('ms') + expected = pd.DatetimeIndex(['2016-10-17 12:00:00.002000'], tz=tz) + tm.assert_index_equal(result, expected) + + index = pd.DatetimeIndex(['2016-10-17 12:00:00.00149'], tz=tz) + result = index.round('ms') + expected = pd.DatetimeIndex(['2016-10-17 12:00:00.001000'], tz=tz) + tm.assert_index_equal(result, expected) + def test_repeat_range(self): rng = date_range('1/1/2000', '1/1/2001') diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py index 2abc83ca6109c..ae278ebfa2533 100644 --- a/pandas/tests/scalar/test_timestamp.py +++ b/pandas/tests/scalar/test_timestamp.py @@ -732,6 +732,15 @@ def test_round(self): for freq in ['Y', 'M', 'foobar']: self.assertRaises(ValueError, lambda: dti.round(freq)) + # GH 14440 + result = pd.Timestamp('2016-10-17 12:00:00.0015').round('ms') + expected = pd.Timestamp('2016-10-17 12:00:00.002000') + self.assertEqual(result, expected) + + result = pd.Timestamp('2016-10-17 12:00:00.00149').round('ms') + expected = pd.Timestamp('2016-10-17 12:00:00.001000') + self.assertEqual(result, expected) + def test_class_ops_pytz(self): tm._skip_if_no_pytz() from pytz import timezone diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py index ee9234d6c8237..5891481677ed2 100644 --- a/pandas/tseries/base.py +++ b/pandas/tseries/base.py @@ -83,7 +83,7 @@ def _round(self, freq, rounder): # round the local times values = _ensure_datetimelike_to_i8(self) - result = (unit * rounder(values / float(unit))).astype('i8') + result = (unit * rounder(values / float(unit)).astype('i8')) result = self._maybe_mask_results(result, fill_value=tslib.NaT) attribs = self._get_attributes_dict() if 'freq' in attribs: diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index fc6e689a35d81..b96e9434e617a 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -421,7 +421,8 @@ class Timestamp(_Timestamp): value = self.tz_localize(None).value else: value = self.value - result = Timestamp(unit * rounder(value / float(unit)), unit='ns') + result = (unit * rounder(value / float(unit)).astype('i8')) + result = Timestamp(result, unit='ns') if self.tz is not None: result = result.tz_localize(self.tz) return result From 09360d80da730008a6a89f38f3780bb1d55f9e25 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 5 Mar 2017 17:09:17 -0500 Subject: [PATCH 152/353] PERF: faster unstacking closes #15503 Author: Jeff Reback Closes #15510 from jreback/reshape3 and squashes the following commits: ec29226 [Jeff Reback] PERF: faster unstacking --- asv_bench/benchmarks/reshape.py | 21 ++++++++ doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/core/reshape.py | 56 +++++++++++++++++---- pandas/src/reshape.pyx | 35 +++++++++++++ pandas/src/reshape_helper.pxi.in | 81 ++++++++++++++++++++++++++++++ pandas/tests/frame/test_reshape.py | 13 +++-- setup.py | 3 ++ 7 files changed, 196 insertions(+), 15 deletions(-) create mode 100644 pandas/src/reshape.pyx create mode 100644 pandas/src/reshape_helper.pxi.in diff --git a/asv_bench/benchmarks/reshape.py b/asv_bench/benchmarks/reshape.py index a3ecfff52c794..b9346c497b9ef 100644 --- a/asv_bench/benchmarks/reshape.py +++ b/asv_bench/benchmarks/reshape.py @@ -59,6 +59,27 @@ def time_reshape_unstack_simple(self): self.df.unstack(1) +class reshape_unstack_large_single_dtype(object): + goal_time = 0.2 + + def setup(self): + m = 100 + n = 1000 + + levels = np.arange(m) + index = pd.MultiIndex.from_product([levels]*2) + columns = np.arange(n) + values = np.arange(m*m*n).reshape(m*m, n) + self.df = pd.DataFrame(values, index, columns) + self.df2 = self.df.iloc[:-1] + + def time_unstack_full_product(self): + self.df.unstack() + + def time_unstack_with_mask(self): + self.df2.unstack() + + class unstack_sparse_keyspace(object): goal_time = 0.2 diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index db803e6e7856b..725dc7fc52ed0 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -640,7 +640,7 @@ Performance Improvements - Improved performance and reduced memory when indexing with a ``MultiIndex`` (:issue:`15245`) - When reading buffer object in ``read_sas()`` method without specified format, filepath string is inferred rather than buffer object. (:issue:`14947`) - Improved performance of `rank()` for categorical data (:issue:`15498`) - +- Improved performance when using ``.unstack()`` (:issue:`15503`) .. _whatsnew_0200.bug_fixes: diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index 87cb088c2e91e..7bcd9f2d30b79 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -7,7 +7,9 @@ import numpy as np -from pandas.types.common import _ensure_platform_int, is_list_like +from pandas.types.common import (_ensure_platform_int, + is_list_like, is_bool_dtype, + needs_i8_conversion) from pandas.types.cast import _maybe_promote from pandas.types.missing import notnull import pandas.types.concat as _concat @@ -25,6 +27,7 @@ import pandas.core.algorithms as algos import pandas.algos as _algos +import pandas._reshape as _reshape from pandas.core.index import MultiIndex, _get_na_value @@ -182,9 +185,21 @@ def get_new_values(self): stride = values.shape[1] result_width = width * stride result_shape = (length, result_width) + mask = self.mask + mask_all = mask.all() + + # we can simply reshape if we don't have a mask + if mask_all and len(values): + new_values = (self.sorted_values + .reshape(length, width, stride) + .swapaxes(1, 2) + .reshape(result_shape) + ) + new_mask = np.ones(result_shape, dtype=bool) + return new_values, new_mask # if our mask is all True, then we can use our existing dtype - if self.mask.all(): + if mask_all: dtype = values.dtype new_values = np.empty(result_shape, dtype=dtype) else: @@ -194,13 +209,36 @@ def get_new_values(self): new_mask = np.zeros(result_shape, dtype=bool) - # is there a simpler / faster way of doing this? - for i in range(values.shape[1]): - chunk = new_values[:, i * width:(i + 1) * width] - mask_chunk = new_mask[:, i * width:(i + 1) * width] - - chunk.flat[self.mask] = self.sorted_values[:, i] - mask_chunk.flat[self.mask] = True + name = np.dtype(dtype).name + sorted_values = self.sorted_values + + # we need to convert to a basic dtype + # and possibly coerce an input to our output dtype + # e.g. ints -> floats + if needs_i8_conversion(values): + sorted_values = sorted_values.view('i8') + new_values = new_values.view('i8') + name = 'int64' + elif is_bool_dtype(values): + sorted_values = sorted_values.astype('object') + new_values = new_values.astype('object') + name = 'object' + else: + sorted_values = sorted_values.astype(name, copy=False) + + # fill in our values & mask + f = getattr(_reshape, "unstack_{}".format(name)) + f(sorted_values, + mask.view('u1'), + stride, + length, + width, + new_values, + new_mask.view('u1')) + + # reconstruct dtype if needed + if needs_i8_conversion(values): + new_values = new_values.view(values.dtype) return new_values, new_mask diff --git a/pandas/src/reshape.pyx b/pandas/src/reshape.pyx new file mode 100644 index 0000000000000..82851b7e80994 --- /dev/null +++ b/pandas/src/reshape.pyx @@ -0,0 +1,35 @@ +# cython: profile=False + +from numpy cimport * +cimport numpy as np +import numpy as np + +cimport cython + +import_array() + +cimport util + +from numpy cimport NPY_INT8 as NPY_int8 +from numpy cimport NPY_INT16 as NPY_int16 +from numpy cimport NPY_INT32 as NPY_int32 +from numpy cimport NPY_INT64 as NPY_int64 +from numpy cimport NPY_FLOAT16 as NPY_float16 +from numpy cimport NPY_FLOAT32 as NPY_float32 +from numpy cimport NPY_FLOAT64 as NPY_float64 + +from numpy cimport (int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t, + uint32_t, uint64_t, float16_t, float32_t, float64_t) + +int8 = np.dtype(np.int8) +int16 = np.dtype(np.int16) +int32 = np.dtype(np.int32) +int64 = np.dtype(np.int64) +float16 = np.dtype(np.float16) +float32 = np.dtype(np.float32) +float64 = np.dtype(np.float64) + +cdef double NaN = np.NaN +cdef double nan = NaN + +include "reshape_helper.pxi" diff --git a/pandas/src/reshape_helper.pxi.in b/pandas/src/reshape_helper.pxi.in new file mode 100644 index 0000000000000..bb9a5977f8b45 --- /dev/null +++ b/pandas/src/reshape_helper.pxi.in @@ -0,0 +1,81 @@ +""" +Template for each `dtype` helper function for take + +WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in +""" + +# ---------------------------------------------------------------------- +# reshape +# ---------------------------------------------------------------------- + +{{py: + +# name, c_type +dtypes = [('uint8', 'uint8_t'), + ('uint16', 'uint16_t'), + ('uint32', 'uint32_t'), + ('uint64', 'uint64_t'), + ('int8', 'int8_t'), + ('int16', 'int16_t'), + ('int32', 'int32_t'), + ('int64', 'int64_t'), + ('float32', 'float32_t'), + ('float64', 'float64_t'), + ('object', 'object')] +}} + +{{for dtype, c_type in dtypes}} + + +@cython.wraparound(False) +@cython.boundscheck(False) +def unstack_{{dtype}}(ndarray[{{c_type}}, ndim=2] values, + ndarray[uint8_t, ndim=1] mask, + Py_ssize_t stride, + Py_ssize_t length, + Py_ssize_t width, + ndarray[{{c_type}}, ndim=2] new_values, + ndarray[uint8_t, ndim=2] new_mask): + """ + transform long sorted_values to wide new_values + + Parameters + ---------- + values : typed ndarray + mask : boolean ndarray + stride : int + length : int + width : int + new_values : typed ndarray + result array + new_mask : boolean ndarray + result mask + + """ + + cdef: + Py_ssize_t i, j, w, nulls, s, offset + + {{if dtype == 'object'}} + if True: + {{else}} + with nogil: + {{endif}} + + for i in range(stride): + + nulls = 0 + for j in range(length): + + for w in range(width): + + offset = j * width + w + + if mask[offset]: + s = i * width + w + new_values[j, s] = values[offset - nulls, i] + new_mask[j, s] = 1 + else: + nulls += 1 + +{{endfor}} diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index 1890b33e3dbaa..c8c7313ddd071 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -121,19 +121,22 @@ def test_pivot_index_none(self): assert_frame_equal(result, expected) def test_stack_unstack(self): - stacked = self.frame.stack() + f = self.frame.copy() + f[:] = np.arange(np.prod(f.shape)).reshape(f.shape) + + stacked = f.stack() stacked_df = DataFrame({'foo': stacked, 'bar': stacked}) unstacked = stacked.unstack() unstacked_df = stacked_df.unstack() - assert_frame_equal(unstacked, self.frame) - assert_frame_equal(unstacked_df['bar'], self.frame) + assert_frame_equal(unstacked, f) + assert_frame_equal(unstacked_df['bar'], f) unstacked_cols = stacked.unstack(0) unstacked_cols_df = stacked_df.unstack(0) - assert_frame_equal(unstacked_cols.T, self.frame) - assert_frame_equal(unstacked_cols_df['bar'].T, self.frame) + assert_frame_equal(unstacked_cols.T, f) + assert_frame_equal(unstacked_cols_df['bar'].T, f) def test_unstack_fill(self): diff --git a/setup.py b/setup.py index cbcadce459c67..525cbdf600c78 100755 --- a/setup.py +++ b/setup.py @@ -113,6 +113,7 @@ def is_platform_mac(): _pxi_dep_template = { 'algos': ['algos_common_helper.pxi.in', 'algos_groupby_helper.pxi.in', 'algos_take_helper.pxi.in', 'algos_rank_helper.pxi.in'], + '_reshape': ['reshape_helper.pxi.in'], '_join': ['join_helper.pxi.in', 'joins_func_helper.pxi.in'], 'hashtable': ['hashtable_class_helper.pxi.in', 'hashtable_func_helper.pxi.in'], @@ -496,6 +497,8 @@ def pxd(name): algos={'pyxfile': 'algos', 'pxdfiles': ['src/util', 'hashtable'], 'depends': _pxi_dep['algos']}, + _reshape={'pyxfile': 'src/reshape', + 'depends': _pxi_dep['_reshape']}, _join={'pyxfile': 'src/join', 'pxdfiles': ['src/util', 'hashtable'], 'depends': _pxi_dep['_join']}, From dc323507672ec0ceb4b2b0366445a794f3e92ee7 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 6 Mar 2017 10:32:57 +0100 Subject: [PATCH 153/353] API: return Index instead of array from datetime field accessors (GH15022) --- pandas/tests/indexes/datetimes/test_misc.py | 4 ++-- pandas/tests/indexes/period/test_construction.py | 4 ++-- pandas/tests/indexes/period/test_period.py | 8 ++++---- pandas/tseries/index.py | 15 +++++++++------ pandas/tseries/period.py | 5 +++-- 5 files changed, 20 insertions(+), 16 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py index 6b0191edbda5a..8fcb26ab517bf 100644 --- a/pandas/tests/indexes/datetimes/test_misc.py +++ b/pandas/tests/indexes/datetimes/test_misc.py @@ -307,5 +307,5 @@ def test_datetimeindex_accessors(self): def test_nanosecond_field(self): dti = DatetimeIndex(np.arange(10)) - self.assert_numpy_array_equal(dti.nanosecond, - np.arange(10, dtype=np.int32)) + self.assert_index_equal(dti.nanosecond, + pd.Index(np.arange(10, dtype=np.int64))) diff --git a/pandas/tests/indexes/period/test_construction.py b/pandas/tests/indexes/period/test_construction.py index f13a84f4f0e92..ab70ad59846e8 100644 --- a/pandas/tests/indexes/period/test_construction.py +++ b/pandas/tests/indexes/period/test_construction.py @@ -91,8 +91,8 @@ def test_constructor_arrays_negative_year(self): pindex = PeriodIndex(year=years, quarter=quarters) - self.assert_numpy_array_equal(pindex.year, years) - self.assert_numpy_array_equal(pindex.quarter, quarters) + self.assert_index_equal(pindex.year, pd.Index(years)) + self.assert_index_equal(pindex.quarter, pd.Index(quarters)) def test_constructor_invalid_quarters(self): self.assertRaises(ValueError, PeriodIndex, year=lrange(2000, 2004), diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 1739211982b10..16b8ce6569802 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -653,10 +653,10 @@ def test_pindex_fieldaccessor_nat(self): idx = PeriodIndex(['2011-01', '2011-02', 'NaT', '2012-03', '2012-04'], freq='D') - exp = np.array([2011, 2011, -1, 2012, 2012], dtype=np.int64) - self.assert_numpy_array_equal(idx.year, exp) - exp = np.array([1, 2, -1, 3, 4], dtype=np.int64) - self.assert_numpy_array_equal(idx.month, exp) + exp = Index([2011, 2011, -1, 2012, 2012], dtype=np.int64) + self.assert_index_equal(idx.year, exp) + exp = Index([1, 2, -1, 3, 4], dtype=np.int64) + self.assert_index_equal(idx.month, exp) def test_pindex_qaccess(self): pi = PeriodIndex(['2Q05', '3Q05', '4Q05', '1Q06', '2Q06'], freq='Q') diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 5f00e8b648689..f9821c4b799e6 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -77,16 +77,19 @@ def f(self): result = tslib.get_start_end_field(values, field, self.freqstr, month_kw) + result = self._maybe_mask_results(result, convert='float64') + elif field in ['weekday_name']: result = tslib.get_date_name_field(values, field) - return self._maybe_mask_results(result) + result = self._maybe_mask_results(result) elif field in ['is_leap_year']: # no need to mask NaT - return tslib.get_date_field(values, field) + result = tslib.get_date_field(values, field) else: result = tslib.get_date_field(values, field) + result = self._maybe_mask_results(result, convert='float64') - return self._maybe_mask_results(result, convert='float64') + return Index(result) f.__name__ = name f.__doc__ = docstring @@ -1913,9 +1916,9 @@ def to_julian_date(self): """ # http://mysite.verizon.net/aesir_research/date/jdalg2.htm - year = self.year - month = self.month - day = self.day + year = np.asarray(self.year) + month = np.asarray(self.month) + day = np.asarray(self.day) testarr = month < 3 year[testarr] -= 1 month[testarr] += 12 diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index bfe7724a1cfaa..56f88b7ed800c 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -52,7 +52,8 @@ def _field_accessor(name, alias, docstring=None): def f(self): base, mult = _gfc(self.freq) - return get_period_field_arr(alias, self._values, base) + result = get_period_field_arr(alias, self._values, base) + return Index(result) f.__name__ = name f.__doc__ = docstring return property(f) @@ -585,7 +586,7 @@ def to_datetime(self, dayfirst=False): @property def is_leap_year(self): """ Logical indicating if the date belongs to a leap year """ - return tslib._isleapyear_arr(self.year) + return tslib._isleapyear_arr(np.asarray(self.year)) @property def start_time(self): From 0bf45320440e0d477f6b31ac8825f34e8212f152 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 6 Mar 2017 10:38:22 +0100 Subject: [PATCH 154/353] Revert "API: return Index instead of array from datetime field accessors (GH15022)" This reverts commit dc323507672ec0ceb4b2b0366445a794f3e92ee7. --- pandas/tests/indexes/datetimes/test_misc.py | 4 ++-- pandas/tests/indexes/period/test_construction.py | 4 ++-- pandas/tests/indexes/period/test_period.py | 8 ++++---- pandas/tseries/index.py | 15 ++++++--------- pandas/tseries/period.py | 5 ++--- 5 files changed, 16 insertions(+), 20 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py index 8fcb26ab517bf..6b0191edbda5a 100644 --- a/pandas/tests/indexes/datetimes/test_misc.py +++ b/pandas/tests/indexes/datetimes/test_misc.py @@ -307,5 +307,5 @@ def test_datetimeindex_accessors(self): def test_nanosecond_field(self): dti = DatetimeIndex(np.arange(10)) - self.assert_index_equal(dti.nanosecond, - pd.Index(np.arange(10, dtype=np.int64))) + self.assert_numpy_array_equal(dti.nanosecond, + np.arange(10, dtype=np.int32)) diff --git a/pandas/tests/indexes/period/test_construction.py b/pandas/tests/indexes/period/test_construction.py index ab70ad59846e8..f13a84f4f0e92 100644 --- a/pandas/tests/indexes/period/test_construction.py +++ b/pandas/tests/indexes/period/test_construction.py @@ -91,8 +91,8 @@ def test_constructor_arrays_negative_year(self): pindex = PeriodIndex(year=years, quarter=quarters) - self.assert_index_equal(pindex.year, pd.Index(years)) - self.assert_index_equal(pindex.quarter, pd.Index(quarters)) + self.assert_numpy_array_equal(pindex.year, years) + self.assert_numpy_array_equal(pindex.quarter, quarters) def test_constructor_invalid_quarters(self): self.assertRaises(ValueError, PeriodIndex, year=lrange(2000, 2004), diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 16b8ce6569802..1739211982b10 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -653,10 +653,10 @@ def test_pindex_fieldaccessor_nat(self): idx = PeriodIndex(['2011-01', '2011-02', 'NaT', '2012-03', '2012-04'], freq='D') - exp = Index([2011, 2011, -1, 2012, 2012], dtype=np.int64) - self.assert_index_equal(idx.year, exp) - exp = Index([1, 2, -1, 3, 4], dtype=np.int64) - self.assert_index_equal(idx.month, exp) + exp = np.array([2011, 2011, -1, 2012, 2012], dtype=np.int64) + self.assert_numpy_array_equal(idx.year, exp) + exp = np.array([1, 2, -1, 3, 4], dtype=np.int64) + self.assert_numpy_array_equal(idx.month, exp) def test_pindex_qaccess(self): pi = PeriodIndex(['2Q05', '3Q05', '4Q05', '1Q06', '2Q06'], freq='Q') diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index f9821c4b799e6..5f00e8b648689 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -77,19 +77,16 @@ def f(self): result = tslib.get_start_end_field(values, field, self.freqstr, month_kw) - result = self._maybe_mask_results(result, convert='float64') - elif field in ['weekday_name']: result = tslib.get_date_name_field(values, field) - result = self._maybe_mask_results(result) + return self._maybe_mask_results(result) elif field in ['is_leap_year']: # no need to mask NaT - result = tslib.get_date_field(values, field) + return tslib.get_date_field(values, field) else: result = tslib.get_date_field(values, field) - result = self._maybe_mask_results(result, convert='float64') - return Index(result) + return self._maybe_mask_results(result, convert='float64') f.__name__ = name f.__doc__ = docstring @@ -1916,9 +1913,9 @@ def to_julian_date(self): """ # http://mysite.verizon.net/aesir_research/date/jdalg2.htm - year = np.asarray(self.year) - month = np.asarray(self.month) - day = np.asarray(self.day) + year = self.year + month = self.month + day = self.day testarr = month < 3 year[testarr] -= 1 month[testarr] += 12 diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 56f88b7ed800c..bfe7724a1cfaa 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -52,8 +52,7 @@ def _field_accessor(name, alias, docstring=None): def f(self): base, mult = _gfc(self.freq) - result = get_period_field_arr(alias, self._values, base) - return Index(result) + return get_period_field_arr(alias, self._values, base) f.__name__ = name f.__doc__ = docstring return property(f) @@ -586,7 +585,7 @@ def to_datetime(self, dayfirst=False): @property def is_leap_year(self): """ Logical indicating if the date belongs to a leap year """ - return tslib._isleapyear_arr(np.asarray(self.year)) + return tslib._isleapyear_arr(self.year) @property def start_time(self): From c61b350c999f515c199bf9701c4bba3d610bc384 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 6 Mar 2017 17:12:44 -0500 Subject: [PATCH 155/353] DOC: updated badges --- README.md | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 7bc350d1c6675..8595043cf68c3 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,15 @@ + + + + From 7740231bbc0b92db55479cd02400b37c9470a4d2 Mon Sep 17 00:00:00 2001 From: Mark Mandel Date: Mon, 6 Mar 2017 16:15:10 -0600 Subject: [PATCH 156/353] DOC: remove wakari.io section (#15596) --- doc/source/install.rst | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/doc/source/install.rst b/doc/source/install.rst index 8b0fec6a3dac3..fe2a9fa4ba509 100644 --- a/doc/source/install.rst +++ b/doc/source/install.rst @@ -23,18 +23,6 @@ Officially Python 2.7, 3.4, 3.5, and 3.6 Installing pandas ----------------- -Trying out pandas, no installation required! -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The easiest way to start experimenting with pandas doesn't involve installing -pandas at all. - -`Wakari `__ is a free service that provides a hosted -`IPython Notebook `__ service in the cloud. - -Simply create an account, and have access to pandas from within your brower via -an `IPython Notebook `__ in a few minutes. - .. _install.anaconda: Installing pandas with Anaconda From e097bf596509779294b2ebf320a4b271deaec6ec Mon Sep 17 00:00:00 2001 From: Leon Yin Date: Mon, 6 Mar 2017 23:41:30 -0800 Subject: [PATCH 157/353] DOC: remove Panel4D from the API docs #15579 (#15598) --- doc/source/api.rst | 53 +------------------------------------ scripts/api_rst_coverage.py | 4 +-- 2 files changed, 3 insertions(+), 54 deletions(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index 33ac5fde651d4..fbce64df84859 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -1237,58 +1237,7 @@ Serialization / IO / Conversion Panel.to_frame Panel.to_xarray Panel.to_clipboard - -.. _api.panel4d: - -Panel4D -------- - -Constructor -~~~~~~~~~~~ -.. autosummary:: - :toctree: generated/ - - Panel4D - -Serialization / IO / Conversion -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. autosummary:: - :toctree: generated/ - - Panel4D.to_xarray - -Attributes and underlying data -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -**Axes** - - * **labels**: axis 1; each label corresponds to a Panel contained inside - * **items**: axis 2; each item corresponds to a DataFrame contained inside - * **major_axis**: axis 3; the index (rows) of each of the DataFrames - * **minor_axis**: axis 4; the columns of each of the DataFrames - -.. autosummary:: - :toctree: generated/ - - Panel4D.values - Panel4D.axes - Panel4D.ndim - Panel4D.size - Panel4D.shape - Panel4D.dtypes - Panel4D.ftypes - Panel4D.get_dtype_counts - Panel4D.get_ftype_counts - -Conversion -~~~~~~~~~~ -.. autosummary:: - :toctree: generated/ - - Panel4D.astype - Panel4D.copy - Panel4D.isnull - Panel4D.notnull - + .. _api.index: Index diff --git a/scripts/api_rst_coverage.py b/scripts/api_rst_coverage.py index cc456f03c02ec..6bb5383509be6 100644 --- a/scripts/api_rst_coverage.py +++ b/scripts/api_rst_coverage.py @@ -4,11 +4,11 @@ def main(): # classes whose members to check - classes = [pd.Series, pd.DataFrame, pd.Panel, pd.Panel4D] + classes = [pd.Series, pd.DataFrame, pd.Panel] def class_name_sort_key(x): if x.startswith('Series'): - # make sure Series precedes DataFrame, Panel, and Panel4D + # make sure Series precedes DataFrame, and Panel. return ' ' + x else: return x From 11239822b3ced16c28831f08d1ef62ed0c5c28ca Mon Sep 17 00:00:00 2001 From: Jeff Carey Date: Tue, 7 Mar 2017 08:21:32 -0500 Subject: [PATCH 158/353] CLN: Moved freeze_panes validation to io/excel.py (#15160) follow up to #15160 Author: Jeff Carey Closes #15592 from jeffcarey/enh-15160-touchup2 and squashes the following commits: 81cb86f [Jeff Carey] Cleaned up freeze_panes validation code a802fc7 [Jeff Carey] Moved freeze_panes validation to io/excel.py --- pandas/core/frame.py | 12 ------------ pandas/io/excel.py | 22 +++++++++++++++++++--- 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b3e43edc3eb55..15179ac321076 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1431,24 +1431,12 @@ def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='', inf_rep=inf_rep) formatted_cells = formatter.get_formatted_cells() - freeze_panes = self._validate_freeze_panes(freeze_panes) excel_writer.write_cells(formatted_cells, sheet_name, startrow=startrow, startcol=startcol, freeze_panes=freeze_panes) if need_save: excel_writer.save() - def _validate_freeze_panes(self, freeze_panes): - if freeze_panes is not None: - if ( - len(freeze_panes) == 2 and - all(isinstance(item, int) for item in freeze_panes) - ): - return freeze_panes - - raise ValueError("freeze_panes must be of form (row, column)" - " where row and column are integers") - def to_stata(self, fname, convert_dates=None, write_index=True, encoding="latin-1", byteorder=None, time_stamp=None, data_label=None, variable_labels=None): diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 37a61b7dc9ab5..00ec8bcf060ef 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -543,6 +543,22 @@ def __exit__(self, exc_type, exc_value, traceback): self.close() +def _validate_freeze_panes(freeze_panes): + if freeze_panes is not None: + if ( + len(freeze_panes) == 2 and + all(isinstance(item, int) for item in freeze_panes) + ): + return True + + raise ValueError("freeze_panes must be of form (row, column)" + " where row and column are integers") + + # freeze_panes wasn't specified, return False so it won't be applied + # to output sheet + return False + + def _trim_excel_header(row): # trim header row so auto-index inference works # xlrd uses '' , openpyxl None @@ -1330,7 +1346,7 @@ def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0, wks.title = sheet_name self.sheets[sheet_name] = wks - if freeze_panes is not None: + if _validate_freeze_panes(freeze_panes): wks.freeze_panes = wks.cell(row=freeze_panes[0] + 1, column=freeze_panes[1] + 1) @@ -1418,7 +1434,7 @@ def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0, wks = self.book.add_sheet(sheet_name) self.sheets[sheet_name] = wks - if freeze_panes is not None: + if _validate_freeze_panes(freeze_panes): wks.set_panes_frozen(True) wks.set_horz_split_pos(freeze_panes[0]) wks.set_vert_split_pos(freeze_panes[1]) @@ -1550,7 +1566,7 @@ def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0, style_dict = {} - if freeze_panes is not None: + if _validate_freeze_panes(freeze_panes): wks.freeze_panes(*(freeze_panes)) for cell in cells: From fdee92214dedf87f351f1ae0613d9f25061359b0 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 7 Mar 2017 08:23:18 -0500 Subject: [PATCH 159/353] BUG: Timestamp.round precision error for ns (#15578) closes #15578 Author: Matt Roeschke Closes #15588 from mroeschke/fix_15578 and squashes the following commits: af95baa [Matt Roeschke] BUG: Timestamp.round precision error for ns (#15578) --- doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/tests/indexes/datetimes/test_ops.py | 14 +++++++++++++- pandas/tests/scalar/test_timestamp.py | 13 ++++++++++++- pandas/tseries/base.py | 16 +++++++++++++--- pandas/tslib.pyx | 13 ++++++++++++- 5 files changed, 51 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 725dc7fc52ed0..f1df8f456159a 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -652,7 +652,7 @@ Bug Fixes - Bug in ``Index`` power operations with reversed operands (:issue:`14973`) - Bug in ``TimedeltaIndex`` addition where overflow was being allowed without error (:issue:`14816`) - Bug in ``TimedeltaIndex`` raising a ``ValueError`` when boolean indexing with ``loc`` (:issue:`14946`) -- Bug in ``DatetimeIndex.round()`` and ``Timestamp.round()`` floating point accuracy when rounding by milliseconds (:issue: `14440`) +- Bug in ``DatetimeIndex.round()`` and ``Timestamp.round()`` floating point accuracy when rounding by milliseconds or less (:issue: `14440`, :issue:`15578`) - Bug in ``astype()`` where ``inf`` values were incorrectly converted to integers. Now raises error now with ``astype()`` for Series and DataFrames (:issue:`14265`) - Bug in ``DataFrame(..).apply(to_numeric)`` when values are of type decimal.Decimal. (:issue:`14827`) - Bug in ``describe()`` when passing a numpy array which does not contain the median to the ``percentiles`` keyword argument (:issue:`14908`) diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index 3a6402ae83ae2..312017eef3446 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -175,17 +175,29 @@ def test_round(self): tm.assertRaisesRegexp(ValueError, msg, rng.round, freq='M') tm.assertRaisesRegexp(ValueError, msg, elt.round, freq='M') - # GH 14440 + # GH 14440 & 15578 index = pd.DatetimeIndex(['2016-10-17 12:00:00.0015'], tz=tz) result = index.round('ms') expected = pd.DatetimeIndex(['2016-10-17 12:00:00.002000'], tz=tz) tm.assert_index_equal(result, expected) + for freq in ['us', 'ns']: + tm.assert_index_equal(index, index.round(freq)) + index = pd.DatetimeIndex(['2016-10-17 12:00:00.00149'], tz=tz) result = index.round('ms') expected = pd.DatetimeIndex(['2016-10-17 12:00:00.001000'], tz=tz) tm.assert_index_equal(result, expected) + index = pd.DatetimeIndex(['2016-10-17 12:00:00.001501031']) + result = index.round('10ns') + expected = pd.DatetimeIndex(['2016-10-17 12:00:00.001501030']) + tm.assert_index_equal(result, expected) + + with tm.assert_produces_warning(): + ts = '2016-10-17 12:00:00.001501031' + pd.DatetimeIndex([ts]).round('1010ns') + def test_repeat_range(self): rng = date_range('1/1/2000', '1/1/2001') diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py index ae278ebfa2533..bbcdce922f58a 100644 --- a/pandas/tests/scalar/test_timestamp.py +++ b/pandas/tests/scalar/test_timestamp.py @@ -732,7 +732,7 @@ def test_round(self): for freq in ['Y', 'M', 'foobar']: self.assertRaises(ValueError, lambda: dti.round(freq)) - # GH 14440 + # GH 14440 & 15578 result = pd.Timestamp('2016-10-17 12:00:00.0015').round('ms') expected = pd.Timestamp('2016-10-17 12:00:00.002000') self.assertEqual(result, expected) @@ -741,6 +741,17 @@ def test_round(self): expected = pd.Timestamp('2016-10-17 12:00:00.001000') self.assertEqual(result, expected) + ts = pd.Timestamp('2016-10-17 12:00:00.0015') + for freq in ['us', 'ns']: + self.assertEqual(ts, ts.round(freq)) + + result = pd.Timestamp('2016-10-17 12:00:00.001501031').round('10ns') + expected = pd.Timestamp('2016-10-17 12:00:00.001501030') + self.assertEqual(result, expected) + + with tm.assert_produces_warning(): + pd.Timestamp('2016-10-17 12:00:00.001501031').round('1010ns') + def test_class_ops_pytz(self): tm._skip_if_no_pytz() from pytz import timezone diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py index 5891481677ed2..2e22c35868cb3 100644 --- a/pandas/tseries/base.py +++ b/pandas/tseries/base.py @@ -1,6 +1,7 @@ """ Base and utility classes for tseries type pandas objects. """ +import warnings from datetime import datetime, timedelta @@ -79,11 +80,20 @@ def _round(self, freq, rounder): from pandas.tseries.frequencies import to_offset unit = to_offset(freq).nanos - # round the local times values = _ensure_datetimelike_to_i8(self) - - result = (unit * rounder(values / float(unit)).astype('i8')) + if unit < 1000 and unit % 1000 != 0: + # for nano rounding, work with the last 6 digits separately + # due to float precision + buff = 1000000 + result = (buff * (values // buff) + unit * + (rounder((values % buff) / float(unit))).astype('i8')) + elif unit >= 1000 and unit % 1000 != 0: + msg = 'Precision will be lost using frequency: {}' + warnings.warn(msg.format(freq)) + result = (unit * rounder(values / float(unit)).astype('i8')) + else: + result = (unit * rounder(values / float(unit)).astype('i8')) result = self._maybe_mask_results(result, fill_value=tslib.NaT) attribs = self._get_attributes_dict() if 'freq' in attribs: diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index b96e9434e617a..8ee92e9fb900d 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -421,7 +421,18 @@ class Timestamp(_Timestamp): value = self.tz_localize(None).value else: value = self.value - result = (unit * rounder(value / float(unit)).astype('i8')) + if unit < 1000 and unit % 1000 != 0: + # for nano rounding, work with the last 6 digits separately + # due to float precision + buff = 1000000 + result = (buff * (value // buff) + unit * + (rounder((value % buff) / float(unit))).astype('i8')) + elif unit >= 1000 and unit % 1000 != 0: + msg = 'Precision will be lost using frequency: {}' + warnings.warn(msg.format(freq)) + result = (unit * rounder(value / float(unit)).astype('i8')) + else: + result = (unit * rounder(value / float(unit)).astype('i8')) result = Timestamp(result, unit='ns') if self.tz is not None: result = result.tz_localize(self.tz) From 38a34be9108fc76b68e57860506f428d8d67e002 Mon Sep 17 00:00:00 2001 From: Kernc Date: Tue, 7 Mar 2017 08:27:38 -0500 Subject: [PATCH 160/353] BUG: repr SparseDataFrame after setting a value closes #15488 Author: Kernc Closes #15489 from kernc/sdf-repr and squashes the following commits: 2dc145c [Kernc] BUG: repr SparseDataFrame after setting a value --- doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/formats/format.py | 3 --- pandas/tests/sparse/test_format.py | 12 ++++++++++++ 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index f1df8f456159a..e459c854dfab9 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -698,7 +698,7 @@ Bug Fixes - Bug in ``to_sql`` when writing a DataFrame with numeric index names (:issue:`15404`). - Bug in ``Series.iloc`` where a ``Categorical`` object for list-like indexes input was returned, where a ``Series`` was expected. (:issue:`14580`) - +- Bug in repr-formatting a ``SparseDataFrame`` after a value was set on (a copy of) one of its series (:issue:`15488`) - Bug in groupby operations with timedelta64 when passing ``numeric_only=False`` (:issue:`5724`) diff --git a/pandas/formats/format.py b/pandas/formats/format.py index 9dde3b0001c31..622c4cd3bbcc7 100644 --- a/pandas/formats/format.py +++ b/pandas/formats/format.py @@ -716,9 +716,6 @@ def to_html(self, classes=None, notebook=False, border=None): def _get_formatted_column_labels(self, frame): from pandas.core.index import _sparsify - def is_numeric_dtype(dtype): - return issubclass(dtype.type, np.number) - columns = frame.columns if isinstance(columns, MultiIndex): diff --git a/pandas/tests/sparse/test_format.py b/pandas/tests/sparse/test_format.py index 0c0e773d19bb9..ba870a2c33801 100644 --- a/pandas/tests/sparse/test_format.py +++ b/pandas/tests/sparse/test_format.py @@ -116,3 +116,15 @@ def test_sparse_frame(self): with option_context("display.max_rows", 3): self.assertEqual(repr(sparse), repr(df)) + + def test_sparse_repr_after_set(self): + # GH 15488 + sdf = pd.SparseDataFrame([[np.nan, 1], [2, np.nan]]) + res = sdf.copy() + + # Ignore the warning + with pd.option_context('mode.chained_assignment', None): + sdf[0][1] = 2 # This line triggers the bug + + repr(sdf) + tm.assert_sp_frame_equal(sdf, res) From a347ecb574f4e53f43400ad50b507c481ce12edb Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 7 Mar 2017 09:30:06 -0500 Subject: [PATCH 161/353] DOC/BUILD: Parallelize doc build closes #15591 a couple of minutes faster with -j 2. fixes some deprecated use of pd.Term Author: Jeff Reback Closes #15600 from jreback/docs and squashes the following commits: c19303d [Jeff Reback] DOC: parallel build for docs --- doc/make.py | 6 +++--- doc/source/io.rst | 8 +++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/doc/make.py b/doc/make.py index 8a6d4e5df24f0..a2f5be5594e44 100755 --- a/doc/make.py +++ b/doc/make.py @@ -197,7 +197,7 @@ def html(): print(e) print("Failed to convert %s" % nb) - if os.system('sphinx-build -P -b html -d build/doctrees ' + if os.system('sphinx-build -j 2 -P -b html -d build/doctrees ' 'source build/html'): raise SystemExit("Building HTML failed.") try: @@ -222,7 +222,7 @@ def latex(): check_build() if sys.platform != 'win32': # LaTeX format. - if os.system('sphinx-build -b latex -d build/doctrees ' + if os.system('sphinx-build -j 2 -b latex -d build/doctrees ' 'source build/latex'): raise SystemExit("Building LaTeX failed.") # Produce pdf. @@ -245,7 +245,7 @@ def latex_forced(): check_build() if sys.platform != 'win32': # LaTeX format. - if os.system('sphinx-build -b latex -d build/doctrees ' + if os.system('sphinx-build -j 2 -b latex -d build/doctrees ' 'source build/latex'): raise SystemExit("Building LaTeX failed.") # Produce pdf. diff --git a/doc/source/io.rst b/doc/source/io.rst index c7a68a0fe9fbb..fa57d6d692152 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -3758,7 +3758,7 @@ be data_columns # on-disk operations store.append('df_dc', df_dc, data_columns = ['B', 'C', 'string', 'string2']) - store.select('df_dc', [ pd.Term('B>0') ]) + store.select('df_dc', where='B>0') # getting creative store.select('df_dc', 'B > 0 & C > 0 & string == foo') @@ -4352,6 +4352,9 @@ HDFStore supports ``Panel4D`` storage. .. ipython:: python :okwarning: + wp = pd.Panel(randn(2, 5, 4), items=['Item1', 'Item2'], + major_axis=pd.date_range('1/1/2000', periods=5), + minor_axis=['A', 'B', 'C', 'D']) p4d = pd.Panel4D({ 'l1' : wp }) p4d store.append('p4d', p4d) @@ -4368,8 +4371,7 @@ object). This cannot be changed after table creation. :okwarning: store.append('p4d2', p4d, axes=['labels', 'major_axis', 'minor_axis']) - store - store.select('p4d2', [ pd.Term('labels=l1'), pd.Term('items=Item1'), pd.Term('minor_axis=A_big_strings') ]) + store.select('p4d2', where='labels=l1 and items=Item1 and minor_axis=A') .. ipython:: python :suppress: From c52ff68a536fafc0204c5afea57abb943a6c37ce Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Tue, 7 Mar 2017 16:14:53 -0500 Subject: [PATCH 162/353] BUG: fix SparseSeries reindex by using Series implementation closes #15447 Author: Pietro Battiston Closes #15461 from toobaz/drop_sparse_reindex and squashes the following commits: 9084246 [Pietro Battiston] Test SparseSeries.reindex with fill_value and nearest d6a46da [Pietro Battiston] Use _shared_docs for documentation 922c7b0 [Pietro Battiston] Test "copy" argument af99190 [Pietro Battiston] Whatsnew 7945cb4 [Pietro Battiston] Tests for .loc() and .reindex() on sparse series with MultiIndex 55b99f8 [Pietro Battiston] BUG: Drop faulty and redundant reindex() for SparseSeries --- doc/source/whatsnew/v0.20.0.txt | 4 +++ pandas/sparse/series.py | 24 +++---------- pandas/tests/sparse/test_indexing.py | 53 +++++++++++++++++++++++++++- 3 files changed, 61 insertions(+), 20 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index e459c854dfab9..ece9ff4a1adff 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -672,6 +672,10 @@ Bug Fixes - Bug in ``Rolling.quantile`` function that caused a segmentation fault when called with a quantile value outside of the range [0, 1] (:issue:`15463`) +- Bug in ``SparseSeries.reindex`` on single level with list of length 1 (:issue:`15447`) + + + - Bug in the display of ``.info()`` where a qualifier (+) would always be displayed with a ``MultiIndex`` that contains only non-strings (:issue:`15245`) - Bug in ``.asfreq()``, where frequency was not set for empty ``Series` (:issue:`14320`) diff --git a/pandas/sparse/series.py b/pandas/sparse/series.py index a3b701169ce91..c3dd089e8409a 100644 --- a/pandas/sparse/series.py +++ b/pandas/sparse/series.py @@ -32,7 +32,7 @@ _coo_to_sparse_series) -_shared_doc_kwargs = dict(klass='SparseSeries', +_shared_doc_kwargs = dict(axes='index', klass='SparseSeries', axes_single_arg="{0, 'index'}") # ----------------------------------------------------------------------------- @@ -570,27 +570,13 @@ def copy(self, deep=True): return self._constructor(new_data, sparse_index=self.sp_index, fill_value=self.fill_value).__finalize__(self) + @Appender(generic._shared_docs['reindex'] % _shared_doc_kwargs) def reindex(self, index=None, method=None, copy=True, limit=None, **kwargs): - """ - Conform SparseSeries to new Index - - See Series.reindex docstring for general behavior - Returns - ------- - reindexed : SparseSeries - """ - new_index = _ensure_index(index) - - if self.index.equals(new_index): - if copy: - return self.copy() - else: - return self - return self._constructor(self._data.reindex(new_index, method=method, - limit=limit, copy=copy), - index=new_index).__finalize__(self) + return super(SparseSeries, self).reindex(index=index, method=method, + copy=copy, limit=limit, + **kwargs) def sparse_reindex(self, new_index): """ diff --git a/pandas/tests/sparse/test_indexing.py b/pandas/tests/sparse/test_indexing.py index 357a7103f4027..1a0782c0a3db9 100644 --- a/pandas/tests/sparse/test_indexing.py +++ b/pandas/tests/sparse/test_indexing.py @@ -366,7 +366,7 @@ def test_reindex(self): exp = orig.reindex(['A', 'E', 'C', 'D']).to_sparse() tm.assert_sp_series_equal(res, exp) - def test_reindex_fill_value(self): + def test_fill_value_reindex(self): orig = pd.Series([1, np.nan, 0, 3, 0], index=list('ABCDE')) sparse = orig.to_sparse(fill_value=0) @@ -397,6 +397,23 @@ def test_reindex_fill_value(self): exp = orig.reindex(['A', 'E', 'C', 'D']).to_sparse(fill_value=0) tm.assert_sp_series_equal(res, exp) + def test_reindex_fill_value(self): + floats = pd.Series([1., 2., 3.]).to_sparse() + result = floats.reindex([1, 2, 3], fill_value=0) + expected = pd.Series([2., 3., 0], index=[1, 2, 3]).to_sparse() + tm.assert_sp_series_equal(result, expected) + + def test_reindex_nearest(self): + s = pd.Series(np.arange(10, dtype='float64')).to_sparse() + target = [0.1, 0.9, 1.5, 2.0] + actual = s.reindex(target, method='nearest') + expected = pd.Series(np.around(target), target).to_sparse() + tm.assert_sp_series_equal(expected, actual) + + actual = s.reindex(target, method='nearest', tolerance=0.2) + expected = pd.Series([0, 1, np.nan, 2], target).to_sparse() + tm.assert_sp_series_equal(expected, actual) + def tests_indexing_with_sparse(self): # GH 13985 @@ -504,6 +521,11 @@ def test_loc(self): exp = orig.loc[[1, 3, 4, 5]].to_sparse() tm.assert_sp_series_equal(result, exp) + # single element list (GH 15447) + result = sparse.loc[['A']] + exp = orig.loc[['A']].to_sparse() + tm.assert_sp_series_equal(result, exp) + # dense array result = sparse.loc[orig % 2 == 1] exp = orig.loc[orig % 2 == 1].to_sparse() @@ -537,6 +559,35 @@ def test_loc_slice(self): orig.loc['A':'B'].to_sparse()) tm.assert_sp_series_equal(sparse.loc[:'B'], orig.loc[:'B'].to_sparse()) + def test_reindex(self): + # GH 15447 + orig = self.orig + sparse = self.sparse + + res = sparse.reindex([('A', 0), ('C', 1)]) + exp = orig.reindex([('A', 0), ('C', 1)]).to_sparse() + tm.assert_sp_series_equal(res, exp) + + # On specific level: + res = sparse.reindex(['A', 'C', 'B'], level=0) + exp = orig.reindex(['A', 'C', 'B'], level=0).to_sparse() + tm.assert_sp_series_equal(res, exp) + + # single element list (GH 15447) + res = sparse.reindex(['A'], level=0) + exp = orig.reindex(['A'], level=0).to_sparse() + tm.assert_sp_series_equal(res, exp) + + with tm.assertRaises(TypeError): + # Incomplete keys are not accepted for reindexing: + sparse.reindex(['A', 'C']) + + # "copy" argument: + res = sparse.reindex(sparse.index, copy=True) + exp = orig.reindex(orig.index, copy=True).to_sparse() + tm.assert_sp_series_equal(res, exp) + self.assertIsNot(sparse, res) + class TestSparseDataFrameIndexing(tm.TestCase): From 648ae4f03622d8eafe1ca3b833bd6a99f56bece4 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 7 Mar 2017 18:21:18 -0500 Subject: [PATCH 163/353] BLD: consolidate remaining extensions moves extensions to pandas/_libs, which holds the extension code and also the generated builds (as its importable). pandas/_libs/src is now almost an includes dir, holding low-frequency changing code. This consolidates the import process making it more uniform and consistent throughout the codebase. Finally this cleans up the remaining top-level namespace (with some deprecations in place for example pandas.lib, pandas.tslib, pandas.json, pandas.parser. I listed all of the changes in the whatsnew, but I don't think worthwhile deprecating anything else. Author: Jeff Reback Closes #15537 from jreback/extensions3 and squashes the following commits: a6d6cfa [Jeff Reback] BLD: rename / move some extensions --- Makefile | 2 +- asv_bench/benchmarks/binary_ops.py | 2 +- asv_bench/benchmarks/pandas_vb_common.py | 14 +- asv_bench/benchmarks/panel_methods.py | 2 +- doc/source/whatsnew/v0.20.0.txt | 29 +++ pandas/__init__.py | 14 +- pandas/_libs/__init__.py | 8 + pandas/{ => _libs}/algos.pyx | 4 +- .../{src => _libs}/algos_common_helper.pxi.in | 2 +- .../algos_groupby_helper.pxi.in | 0 .../{src => _libs}/algos_rank_helper.pxi.in | 0 .../{src => _libs}/algos_take_helper.pxi.in | 0 pandas/{ => _libs}/hashtable.pxd | 0 pandas/{ => _libs}/hashtable.pyx | 2 +- .../hashtable_class_helper.pxi.in | 0 .../hashtable_func_helper.pxi.in | 0 pandas/{ => _libs}/index.pyx | 4 +- .../{src => _libs}/index_class_helper.pxi.in | 0 pandas/{src => _libs}/join.pyx | 4 +- .../join_func_helper.pxi.in} | 0 pandas/{src => _libs}/join_helper.pxi.in | 0 pandas/{ => _libs}/lib.pxd | 0 pandas/{ => _libs}/lib.pyx | 0 pandas/{src => _libs}/period.pyx | 44 ++-- pandas/{src => _libs}/reshape.pyx | 0 pandas/{src => _libs}/reshape_helper.pxi.in | 0 pandas/{ => _libs}/src/datetime.pxd | 0 pandas/{ => _libs}/src/datetime/np_datetime.c | 0 pandas/{ => _libs}/src/datetime/np_datetime.h | 0 .../src/datetime/np_datetime_strings.c | 0 .../src/datetime/np_datetime_strings.h | 0 pandas/{ => _libs}/src/datetime_helper.h | 0 pandas/{ => _libs}/src/headers/math.h | 0 pandas/{ => _libs}/src/headers/ms_inttypes.h | 0 pandas/{ => _libs}/src/headers/ms_stdint.h | 0 pandas/{ => _libs}/src/headers/portable.h | 0 pandas/{ => _libs}/src/headers/stdint.h | 0 pandas/{ => _libs}/src/helper.h | 0 pandas/{ => _libs}/src/inference.pyx | 0 pandas/{ => _libs}/src/khash.pxd | 0 pandas/{ => _libs}/src/klib/khash.h | 0 pandas/{ => _libs}/src/klib/khash_python.h | 0 pandas/{ => _libs}/src/klib/ktypes.h | 0 pandas/{ => _libs}/src/klib/kvec.h | 0 pandas/{ => _libs}/src/msgpack/pack.h | 0 .../{ => _libs}/src/msgpack/pack_template.h | 0 pandas/{ => _libs}/src/msgpack/sysdep.h | 0 pandas/{ => _libs}/src/msgpack/unpack.h | 0 .../{ => _libs}/src/msgpack/unpack_define.h | 0 .../{ => _libs}/src/msgpack/unpack_template.h | 0 pandas/{ => _libs}/src/numpy.pxd | 0 pandas/{ => _libs}/src/numpy_helper.h | 0 pandas/{ => _libs}/src/offsets.pyx | 0 pandas/{ => _libs}/src/parse_helper.h | 0 pandas/{ => _libs}/src/parser/.gitignore | 0 pandas/{ => _libs}/src/parser/Makefile | 0 pandas/{ => _libs}/src/parser/io.c | 0 pandas/{ => _libs}/src/parser/io.h | 0 pandas/{ => _libs}/src/parser/tokenizer.c | 0 pandas/{ => _libs}/src/parser/tokenizer.h | 0 pandas/{ => _libs}/src/period_helper.c | 0 pandas/{ => _libs}/src/period_helper.h | 0 pandas/{ => _libs}/src/properties.pyx | 0 pandas/{ => _libs}/src/reduce.pyx | 0 pandas/{ => _libs}/src/skiplist.h | 0 pandas/{ => _libs}/src/skiplist.pxd | 0 pandas/{ => _libs}/src/skiplist.pyx | 0 pandas/{ => _libs}/src/ujson/lib/ultrajson.h | 0 .../{ => _libs}/src/ujson/lib/ultrajsondec.c | 0 .../{ => _libs}/src/ujson/lib/ultrajsonenc.c | 0 .../{ => _libs}/src/ujson/python/JSONtoObj.c | 0 .../{ => _libs}/src/ujson/python/objToJSON.c | 2 +- .../{ => _libs}/src/ujson/python/py_defines.h | 0 pandas/{ => _libs}/src/ujson/python/ujson.c | 8 +- pandas/{ => _libs}/src/ujson/python/version.h | 0 pandas/{ => _libs}/src/util.pxd | 0 pandas/{ => _libs}/tslib.pxd | 0 pandas/{ => _libs}/tslib.pyx | 0 pandas/compat/pickle_compat.py | 8 +- pandas/computation/scope.py | 8 +- pandas/core/algorithms.py | 9 +- pandas/core/base.py | 2 +- pandas/core/categorical.py | 10 +- pandas/core/common.py | 5 +- pandas/core/frame.py | 32 ++- pandas/core/generic.py | 6 +- pandas/core/groupby.py | 48 ++-- pandas/core/internals.py | 9 +- pandas/core/missing.py | 4 +- pandas/core/nanops.py | 3 +- pandas/core/ops.py | 39 +-- pandas/core/reshape.py | 5 +- pandas/core/series.py | 31 ++- pandas/core/sorting.py | 25 +- pandas/core/strings.py | 2 +- pandas/core/window.py | 9 +- pandas/{ => core}/window.pyx | 2 +- pandas/formats/format.py | 5 +- pandas/indexes/api.py | 2 +- pandas/indexes/base.py | 38 ++- pandas/indexes/category.py | 6 +- pandas/indexes/multi.py | 10 +- pandas/indexes/numeric.py | 49 ++-- pandas/indexes/range.py | 4 +- pandas/io/api.py | 2 +- pandas/io/date_converters.py | 2 +- pandas/io/excel.py | 6 +- pandas/io/json/json.py | 8 +- pandas/io/json/normalize.py | 2 +- pandas/{ => io}/msgpack/__init__.py | 8 +- pandas/{ => io}/msgpack/_packer.pyx | 6 +- pandas/{ => io}/msgpack/_unpacker.pyx | 8 +- pandas/{ => io}/msgpack/_version.py | 0 pandas/{ => io}/msgpack/exceptions.py | 0 pandas/io/packers.py | 4 +- pandas/io/parsers.py | 8 +- pandas/{parser.pyx => io/parsers.pyx} | 7 +- pandas/io/pytables.py | 4 +- pandas/io/sas/__init__.py | 1 + pandas/io/sas/{saslib.pyx => sas.pyx} | 0 pandas/io/sas/sas7bdat.py | 2 +- pandas/io/sql.py | 2 +- pandas/io/stata.py | 4 +- pandas/json.py | 7 + pandas/lib.py | 7 + pandas/parser.py | 8 + pandas/sparse/array.py | 8 +- pandas/sparse/list.py | 2 +- pandas/sparse/series.py | 6 +- pandas/{src => sparse}/sparse.pyx | 0 .../{src => sparse}/sparse_op_helper.pxi.in | 0 pandas/tests/api/test_api.py | 48 +++- pandas/tests/computation/test_eval.py | 2 +- pandas/tests/frame/test_constructors.py | 2 +- pandas/tests/frame/test_indexing.py | 4 +- pandas/tests/frame/test_to_csv.py | 2 +- pandas/tests/groupby/test_bin_groupby.py | 4 +- pandas/tests/groupby/test_transform.py | 13 +- pandas/tests/indexes/common.py | 9 +- .../indexes/datetimes/test_construction.py | 5 +- .../indexes/datetimes/test_date_range.py | 2 +- .../tests/indexes/datetimes/test_datetime.py | 2 +- pandas/tests/indexes/datetimes/test_ops.py | 2 +- pandas/tests/indexes/datetimes/test_setops.py | 2 +- pandas/tests/indexes/datetimes/test_tools.py | 4 +- pandas/tests/indexes/period/test_indexing.py | 3 +- pandas/tests/indexes/period/test_ops.py | 2 +- pandas/tests/indexes/period/test_tools.py | 2 +- pandas/tests/indexes/test_base.py | 2 +- pandas/tests/indexes/test_multi.py | 9 +- pandas/tests/indexes/test_numeric.py | 2 +- .../indexes/timedeltas/test_construction.py | 4 +- pandas/tests/indexes/timedeltas/test_ops.py | 6 +- pandas/tests/indexes/timedeltas/test_tools.py | 5 +- pandas/tests/indexing/test_indexing.py | 2 +- pandas/tests/io/json/test_pandas.py | 36 +-- pandas/tests/io/json/test_ujson.py | 4 +- pandas/tests/{ => io}/msgpack/__init__.py | 0 pandas/tests/{ => io}/msgpack/test_buffer.py | 2 +- pandas/tests/{ => io}/msgpack/test_case.py | 2 +- pandas/tests/{ => io}/msgpack/test_except.py | 2 +- .../tests/{ => io}/msgpack/test_extension.py | 4 +- pandas/tests/{ => io}/msgpack/test_format.py | 2 +- pandas/tests/{ => io}/msgpack/test_limits.py | 2 +- pandas/tests/{ => io}/msgpack/test_newspec.py | 2 +- pandas/tests/{ => io}/msgpack/test_obj.py | 2 +- pandas/tests/{ => io}/msgpack/test_pack.py | 2 +- .../tests/{ => io}/msgpack/test_read_size.py | 2 +- pandas/tests/{ => io}/msgpack/test_seq.py | 2 +- .../tests/{ => io}/msgpack/test_sequnpack.py | 4 +- pandas/tests/{ => io}/msgpack/test_subtype.py | 2 +- pandas/tests/{ => io}/msgpack/test_unpack.py | 2 +- .../tests/{ => io}/msgpack/test_unpack_raw.py | 2 +- pandas/tests/io/parser/common.py | 2 +- pandas/tests/io/parser/converters.py | 2 +- pandas/tests/io/parser/parse_dates.py | 4 +- pandas/tests/io/parser/test_textreader.py | 4 +- pandas/tests/io/parser/usecols.py | 2 +- pandas/tests/io/test_html.py | 2 +- pandas/tests/io/test_packers.py | 5 +- pandas/tests/io/test_pytables.py | 2 +- pandas/tests/io/test_stata.py | 2 +- pandas/tests/scalar/test_period.py | 14 +- pandas/tests/scalar/test_timedelta.py | 13 +- pandas/tests/scalar/test_timestamp.py | 10 +- pandas/tests/series/test_constructors.py | 23 +- pandas/tests/series/test_dtypes.py | 2 +- pandas/tests/series/test_indexing.py | 6 +- pandas/tests/series/test_internals.py | 2 +- pandas/tests/series/test_missing.py | 27 +-- pandas/tests/series/test_replace.py | 2 +- pandas/tests/series/test_timeseries.py | 2 +- pandas/tests/sparse/test_array.py | 2 +- pandas/tests/sparse/test_frame.py | 2 +- pandas/tests/sparse/test_libsparse.py | 2 +- pandas/tests/sparse/test_series.py | 2 +- pandas/tests/test_algos.py | 41 ++-- pandas/tests/test_base.py | 7 +- pandas/tests/test_internals.py | 2 +- pandas/tests/test_join.py | 2 +- pandas/tests/test_lib.py | 2 +- pandas/tests/test_multilevel.py | 2 +- pandas/tests/test_take.py | 2 +- pandas/tests/tools/test_join.py | 8 +- pandas/tests/tseries/test_offsets.py | 4 +- pandas/tests/tseries/test_resample.py | 2 +- pandas/tests/tseries/test_timezones.py | 5 +- pandas/tests/types/test_inference.py | 14 +- pandas/tests/types/test_io.py | 4 +- pandas/tests/types/test_missing.py | 2 +- pandas/tools/hashing.py | 5 +- pandas/{src/hash.pyx => tools/hashing.pyx} | 0 pandas/tools/merge.py | 28 +-- pandas/tools/tile.py | 2 +- pandas/tools/util.py | 2 +- pandas/tseries/api.py | 2 +- pandas/tseries/base.py | 44 ++-- pandas/tseries/common.py | 3 +- pandas/tseries/converter.py | 2 +- pandas/tseries/frequencies.py | 6 +- pandas/tseries/index.py | 106 ++++----- pandas/tseries/offsets.py | 3 +- pandas/tseries/period.py | 12 +- pandas/tseries/resample.py | 7 +- pandas/tseries/tdi.py | 41 ++-- pandas/tseries/timedeltas.py | 2 +- pandas/tseries/tools.py | 3 +- pandas/tslib.py | 8 + pandas/types/cast.py | 10 +- pandas/types/common.py | 2 +- pandas/types/concat.py | 2 +- pandas/types/inference.py | 2 +- pandas/types/missing.py | 4 +- pandas/util/decorators.py | 2 +- pandas/util/depr_module.py | 30 ++- pandas/util/testing.py | 28 ++- pandas/{src => util}/testing.pyx | 0 scripts/bench_join.py | 2 +- scripts/bench_join_multi.py | 2 +- scripts/groupby_test.py | 2 +- scripts/roll_median_leak.py | 2 +- setup.py | 223 +++++++++--------- vb_suite/pandas_vb_common.py | 2 +- 243 files changed, 885 insertions(+), 771 deletions(-) create mode 100644 pandas/_libs/__init__.py rename pandas/{ => _libs}/algos.pyx (99%) rename pandas/{src => _libs}/algos_common_helper.pxi.in (99%) rename pandas/{src => _libs}/algos_groupby_helper.pxi.in (100%) rename pandas/{src => _libs}/algos_rank_helper.pxi.in (100%) rename pandas/{src => _libs}/algos_take_helper.pxi.in (100%) rename pandas/{ => _libs}/hashtable.pxd (100%) rename pandas/{ => _libs}/hashtable.pyx (99%) rename pandas/{src => _libs}/hashtable_class_helper.pxi.in (100%) rename pandas/{src => _libs}/hashtable_func_helper.pxi.in (100%) rename pandas/{ => _libs}/index.pyx (99%) rename pandas/{src => _libs}/index_class_helper.pxi.in (100%) rename pandas/{src => _libs}/join.pyx (98%) rename pandas/{src/joins_func_helper.pxi.in => _libs/join_func_helper.pxi.in} (100%) rename pandas/{src => _libs}/join_helper.pxi.in (100%) rename pandas/{ => _libs}/lib.pxd (100%) rename pandas/{ => _libs}/lib.pyx (100%) rename pandas/{src => _libs}/period.pyx (98%) rename pandas/{src => _libs}/reshape.pyx (100%) rename pandas/{src => _libs}/reshape_helper.pxi.in (100%) rename pandas/{ => _libs}/src/datetime.pxd (100%) rename pandas/{ => _libs}/src/datetime/np_datetime.c (100%) rename pandas/{ => _libs}/src/datetime/np_datetime.h (100%) rename pandas/{ => _libs}/src/datetime/np_datetime_strings.c (100%) rename pandas/{ => _libs}/src/datetime/np_datetime_strings.h (100%) rename pandas/{ => _libs}/src/datetime_helper.h (100%) rename pandas/{ => _libs}/src/headers/math.h (100%) rename pandas/{ => _libs}/src/headers/ms_inttypes.h (100%) rename pandas/{ => _libs}/src/headers/ms_stdint.h (100%) rename pandas/{ => _libs}/src/headers/portable.h (100%) rename pandas/{ => _libs}/src/headers/stdint.h (100%) rename pandas/{ => _libs}/src/helper.h (100%) rename pandas/{ => _libs}/src/inference.pyx (100%) rename pandas/{ => _libs}/src/khash.pxd (100%) rename pandas/{ => _libs}/src/klib/khash.h (100%) rename pandas/{ => _libs}/src/klib/khash_python.h (100%) rename pandas/{ => _libs}/src/klib/ktypes.h (100%) rename pandas/{ => _libs}/src/klib/kvec.h (100%) rename pandas/{ => _libs}/src/msgpack/pack.h (100%) rename pandas/{ => _libs}/src/msgpack/pack_template.h (100%) rename pandas/{ => _libs}/src/msgpack/sysdep.h (100%) rename pandas/{ => _libs}/src/msgpack/unpack.h (100%) rename pandas/{ => _libs}/src/msgpack/unpack_define.h (100%) rename pandas/{ => _libs}/src/msgpack/unpack_template.h (100%) rename pandas/{ => _libs}/src/numpy.pxd (100%) rename pandas/{ => _libs}/src/numpy_helper.h (100%) rename pandas/{ => _libs}/src/offsets.pyx (100%) rename pandas/{ => _libs}/src/parse_helper.h (100%) rename pandas/{ => _libs}/src/parser/.gitignore (100%) rename pandas/{ => _libs}/src/parser/Makefile (100%) rename pandas/{ => _libs}/src/parser/io.c (100%) rename pandas/{ => _libs}/src/parser/io.h (100%) rename pandas/{ => _libs}/src/parser/tokenizer.c (100%) rename pandas/{ => _libs}/src/parser/tokenizer.h (100%) rename pandas/{ => _libs}/src/period_helper.c (100%) rename pandas/{ => _libs}/src/period_helper.h (100%) rename pandas/{ => _libs}/src/properties.pyx (100%) rename pandas/{ => _libs}/src/reduce.pyx (100%) rename pandas/{ => _libs}/src/skiplist.h (100%) rename pandas/{ => _libs}/src/skiplist.pxd (100%) rename pandas/{ => _libs}/src/skiplist.pyx (100%) rename pandas/{ => _libs}/src/ujson/lib/ultrajson.h (100%) rename pandas/{ => _libs}/src/ujson/lib/ultrajsondec.c (100%) rename pandas/{ => _libs}/src/ujson/lib/ultrajsonenc.c (100%) rename pandas/{ => _libs}/src/ujson/python/JSONtoObj.c (100%) rename pandas/{ => _libs}/src/ujson/python/objToJSON.c (99%) rename pandas/{ => _libs}/src/ujson/python/py_defines.h (100%) rename pandas/{ => _libs}/src/ujson/python/ujson.c (95%) rename pandas/{ => _libs}/src/ujson/python/version.h (100%) rename pandas/{ => _libs}/src/util.pxd (100%) rename pandas/{ => _libs}/tslib.pxd (100%) rename pandas/{ => _libs}/tslib.pyx (100%) rename pandas/{ => core}/window.pyx (99%) rename pandas/{ => io}/msgpack/__init__.py (81%) rename pandas/{ => io}/msgpack/_packer.pyx (98%) rename pandas/{ => io}/msgpack/_unpacker.pyx (98%) rename pandas/{ => io}/msgpack/_version.py (100%) rename pandas/{ => io}/msgpack/exceptions.py (100%) rename pandas/{parser.pyx => io/parsers.pyx} (99%) rename pandas/io/sas/{saslib.pyx => sas.pyx} (100%) create mode 100644 pandas/json.py create mode 100644 pandas/lib.py create mode 100644 pandas/parser.py rename pandas/{src => sparse}/sparse.pyx (100%) rename pandas/{src => sparse}/sparse_op_helper.pxi.in (100%) rename pandas/tests/{ => io}/msgpack/__init__.py (100%) rename pandas/tests/{ => io}/msgpack/test_buffer.py (90%) rename pandas/tests/{ => io}/msgpack/test_case.py (98%) rename pandas/tests/{ => io}/msgpack/test_except.py (96%) rename pandas/tests/{ => io}/msgpack/test_extension.py (96%) rename pandas/tests/{ => io}/msgpack/test_format.py (98%) rename pandas/tests/{ => io}/msgpack/test_limits.py (97%) rename pandas/tests/{ => io}/msgpack/test_newspec.py (97%) rename pandas/tests/{ => io}/msgpack/test_obj.py (98%) rename pandas/tests/{ => io}/msgpack/test_pack.py (98%) rename pandas/tests/{ => io}/msgpack/test_read_size.py (96%) rename pandas/tests/{ => io}/msgpack/test_seq.py (96%) rename pandas/tests/{ => io}/msgpack/test_sequnpack.py (97%) rename pandas/tests/{ => io}/msgpack/test_subtype.py (90%) rename pandas/tests/{ => io}/msgpack/test_unpack.py (96%) rename pandas/tests/{ => io}/msgpack/test_unpack_raw.py (94%) rename pandas/{src/hash.pyx => tools/hashing.pyx} (100%) create mode 100644 pandas/tslib.py rename pandas/{src => util}/testing.pyx (100%) diff --git a/Makefile b/Makefile index 9a768932b8bea..90dcd16d955d6 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -tseries: pandas/lib.pyx pandas/tslib.pyx pandas/hashtable.pyx +tseries: pandas/_libs/lib.pyx pandas/_libs/tslib.pyx pandas/_libs/hashtable.pyx python setup.py build_ext --inplace .PHONY : develop build clean clean_pyc tseries doc diff --git a/asv_bench/benchmarks/binary_ops.py b/asv_bench/benchmarks/binary_ops.py index 53cb1cf465698..72700c3de282e 100644 --- a/asv_bench/benchmarks/binary_ops.py +++ b/asv_bench/benchmarks/binary_ops.py @@ -107,4 +107,4 @@ def setup(self): self.s = Series(date_range('20010101', periods=self.N, freq='T', tz='US/Eastern')) self.ts = self.s[self.halfway] - self.s2 = Series(date_range('20010101', periods=self.N, freq='s', tz='US/Eastern')) \ No newline at end of file + self.s2 = Series(date_range('20010101', periods=self.N, freq='s', tz='US/Eastern')) diff --git a/asv_bench/benchmarks/pandas_vb_common.py b/asv_bench/benchmarks/pandas_vb_common.py index 25b0b5dd4d1b0..56ccc94c414fb 100644 --- a/asv_bench/benchmarks/pandas_vb_common.py +++ b/asv_bench/benchmarks/pandas_vb_common.py @@ -8,16 +8,22 @@ import random import numpy as np import threading +from importlib import import_module + try: from pandas.compat import range except ImportError: pass np.random.seed(1234) -try: - import pandas._tseries as lib -except: - import pandas.lib as lib + +# try em until it works! +for imp in ['pandas_tseries', 'pandas.lib', 'pandas._libs.lib']: + try: + lib = import_module(imp) + break + except: + pass try: Panel = Panel diff --git a/asv_bench/benchmarks/panel_methods.py b/asv_bench/benchmarks/panel_methods.py index ebe278f6e68b5..6609305502011 100644 --- a/asv_bench/benchmarks/panel_methods.py +++ b/asv_bench/benchmarks/panel_methods.py @@ -21,4 +21,4 @@ def time_shift(self): self.panel.shift(1) def time_shift_minor(self): - self.panel.shift(1, axis='minor') \ No newline at end of file + self.panel.shift(1, axis='minor') diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index ece9ff4a1adff..8f2033de6c77f 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -484,6 +484,35 @@ New Behavior: In [11]: index.memory_usage(deep=True) Out[11]: 260 +.. _whatsnew_0200.api_breaking.extensions: + +Extension Modules Moved +^^^^^^^^^^^^^^^^^^^^^^^ + +Some formerly public c/c++/cython extension modules have been moved and/or renamed. These are all removed from the public API. +If indicated, a deprecation warning will be issued if you reference that module. (:issue:`12588`) + +.. csv-table:: + :header: "Previous Location", "New Location", "Deprecated" + :widths: 30, 30, 4 + + "pandas.lib", "pandas._libs.lib", "X" + "pandas.tslib", "pandas._libs.tslib", "X" + "pandas._join", "pandas._libs.join", "" + "pandas._period", "pandas._libs.period", "" + "pandas.msgpack", "pandas.io.msgpack", "" + "pandas.index", "pandas._libs.index", "" + "pandas.algos", "pandas._libs.algos", "" + "pandas.hashtable", "pandas._libs.hashtable", "" + "pandas.json", "pandas.io.json.libjson", "X" + "pandas.parser", "pandas.io.libparsers", "X" + "pandas.io.sas.saslib", "pandas.io.sas.libsas", "" + "pandas._testing", "pandas.util.libtesting", "" + "pandas._sparse", "pandas.sparse.libsparse", "" + "pandas._hash", "pandas.tools.libhash", "" + "pandas._window", "pandas.core.libwindow", "" + + .. _whatsnew_0200.api_breaking.groupby_describe: Groupby Describe Formatting diff --git a/pandas/__init__.py b/pandas/__init__.py index 3bded89e6644a..5c7c9d44c5d10 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -23,7 +23,9 @@ from pandas.compat.numpy import * try: - from pandas import hashtable, tslib, lib + from pandas._libs import (hashtable as _hashtable, + lib as _lib, + tslib as _tslib) except ImportError as e: # pragma: no cover # hack but overkill to use re module = str(e).lstrip('cannot import name ') @@ -52,11 +54,17 @@ from pandas.tools.util import to_numeric from pandas.core.reshape import melt from pandas.util.print_versions import show_versions - from pandas.io.api import * - from pandas.util._tester import test +# extension module deprecations +from pandas.util.depr_module import _DeprecatedModule + +json = _DeprecatedModule(deprmod='pandas.json', deprmodto='pandas.io.json.libjson') +parser = _DeprecatedModule(deprmod='pandas.parser', deprmodto='pandas.io.libparsers') +lib = _DeprecatedModule(deprmod='pandas.lib', deprmodto='pandas._libs.lib') +tslib = _DeprecatedModule(deprmod='pandas.tslib', deprmodto='pandas._libs.tslib') + # use the closest tagged version if possible from ._version import get_versions v = get_versions() diff --git a/pandas/_libs/__init__.py b/pandas/_libs/__init__.py new file mode 100644 index 0000000000000..ab3832d0292ba --- /dev/null +++ b/pandas/_libs/__init__.py @@ -0,0 +1,8 @@ +# flake8: noqa + +from .tslib import iNaT, NaT, Timestamp, Timedelta, OutOfBoundsDatetime + +# TODO +# period is directly dependent on tslib and imports python +# modules, so exposing Period as an alias is currently not possible +# from period import Period diff --git a/pandas/algos.pyx b/pandas/_libs/algos.pyx similarity index 99% rename from pandas/algos.pyx rename to pandas/_libs/algos.pyx index 32955fd0f465b..7d3ce3280ec1e 100644 --- a/pandas/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -37,7 +37,7 @@ float64 = np.dtype(np.float64) cdef double NaN = np.NaN cdef double nan = NaN -cdef extern from "src/headers/math.h": +cdef extern from "../src/headers/math.h": double sqrt(double x) nogil double fabs(double) nogil @@ -46,7 +46,7 @@ from util cimport numeric, get_nat cimport lib from lib cimport is_null_datetimelike -from pandas import lib +from pandas._libs import lib cdef int64_t iNaT = get_nat() diff --git a/pandas/src/algos_common_helper.pxi.in b/pandas/_libs/algos_common_helper.pxi.in similarity index 99% rename from pandas/src/algos_common_helper.pxi.in rename to pandas/_libs/algos_common_helper.pxi.in index b83dec1d26242..336dd77ea9a89 100644 --- a/pandas/src/algos_common_helper.pxi.in +++ b/pandas/_libs/algos_common_helper.pxi.in @@ -433,7 +433,7 @@ def arrmap_{{name}}(ndarray[{{c_type}}] index, object func): cdef ndarray[object] result = np.empty(length, dtype=np.object_) - from pandas.lib import maybe_convert_objects + from pandas._libs.lib import maybe_convert_objects for i in range(length): result[i] = func(index[i]) diff --git a/pandas/src/algos_groupby_helper.pxi.in b/pandas/_libs/algos_groupby_helper.pxi.in similarity index 100% rename from pandas/src/algos_groupby_helper.pxi.in rename to pandas/_libs/algos_groupby_helper.pxi.in diff --git a/pandas/src/algos_rank_helper.pxi.in b/pandas/_libs/algos_rank_helper.pxi.in similarity index 100% rename from pandas/src/algos_rank_helper.pxi.in rename to pandas/_libs/algos_rank_helper.pxi.in diff --git a/pandas/src/algos_take_helper.pxi.in b/pandas/_libs/algos_take_helper.pxi.in similarity index 100% rename from pandas/src/algos_take_helper.pxi.in rename to pandas/_libs/algos_take_helper.pxi.in diff --git a/pandas/hashtable.pxd b/pandas/_libs/hashtable.pxd similarity index 100% rename from pandas/hashtable.pxd rename to pandas/_libs/hashtable.pxd diff --git a/pandas/hashtable.pyx b/pandas/_libs/hashtable.pyx similarity index 99% rename from pandas/hashtable.pyx rename to pandas/_libs/hashtable.pyx index 276b0679070dc..eee287b2c157b 100644 --- a/pandas/hashtable.pyx +++ b/pandas/_libs/hashtable.pyx @@ -22,7 +22,7 @@ cdef extern from "numpy/npy_math.h": cimport cython cimport numpy as cnp -from pandas.lib import checknull +from pandas._libs.lib import checknull cnp.import_array() cnp.import_ufunc() diff --git a/pandas/src/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in similarity index 100% rename from pandas/src/hashtable_class_helper.pxi.in rename to pandas/_libs/hashtable_class_helper.pxi.in diff --git a/pandas/src/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in similarity index 100% rename from pandas/src/hashtable_func_helper.pxi.in rename to pandas/_libs/hashtable_func_helper.pxi.in diff --git a/pandas/index.pyx b/pandas/_libs/index.pyx similarity index 99% rename from pandas/index.pyx rename to pandas/_libs/index.pyx index 37fe7d90bebe0..c7a537acf5d6f 100644 --- a/pandas/index.pyx +++ b/pandas/_libs/index.pyx @@ -17,8 +17,8 @@ import numpy as np cimport tslib from hashtable cimport * -from pandas import algos, tslib, hashtable as _hash -from pandas.tslib import Timestamp, Timedelta +from pandas._libs import tslib, algos, hashtable as _hash +from pandas._libs.tslib import Timestamp, Timedelta from datetime cimport (get_datetime64_value, _pydatetime_to_dts, pandas_datetimestruct) diff --git a/pandas/src/index_class_helper.pxi.in b/pandas/_libs/index_class_helper.pxi.in similarity index 100% rename from pandas/src/index_class_helper.pxi.in rename to pandas/_libs/index_class_helper.pxi.in diff --git a/pandas/src/join.pyx b/pandas/_libs/join.pyx similarity index 98% rename from pandas/src/join.pyx rename to pandas/_libs/join.pyx index 65c790beb5dbf..385a9762ed90d 100644 --- a/pandas/src/join.pyx +++ b/pandas/_libs/join.pyx @@ -32,10 +32,10 @@ float64 = np.dtype(np.float64) cdef double NaN = np.NaN cdef double nan = NaN -from pandas.algos import groupsort_indexer, ensure_platform_int +from pandas._libs.algos import groupsort_indexer, ensure_platform_int from pandas.core.algorithms import take_nd -include "joins_func_helper.pxi" +include "join_func_helper.pxi" def inner_join(ndarray[int64_t] left, ndarray[int64_t] right, diff --git a/pandas/src/joins_func_helper.pxi.in b/pandas/_libs/join_func_helper.pxi.in similarity index 100% rename from pandas/src/joins_func_helper.pxi.in rename to pandas/_libs/join_func_helper.pxi.in diff --git a/pandas/src/join_helper.pxi.in b/pandas/_libs/join_helper.pxi.in similarity index 100% rename from pandas/src/join_helper.pxi.in rename to pandas/_libs/join_helper.pxi.in diff --git a/pandas/lib.pxd b/pandas/_libs/lib.pxd similarity index 100% rename from pandas/lib.pxd rename to pandas/_libs/lib.pxd diff --git a/pandas/lib.pyx b/pandas/_libs/lib.pyx similarity index 100% rename from pandas/lib.pyx rename to pandas/_libs/lib.pyx diff --git a/pandas/src/period.pyx b/pandas/_libs/period.pyx similarity index 98% rename from pandas/src/period.pyx rename to pandas/_libs/period.pyx index 2d92b9f192328..f30035910a62f 100644 --- a/pandas/src/period.pyx +++ b/pandas/_libs/period.pyx @@ -16,19 +16,15 @@ cdef extern from "datetime_helper.h": from libc.stdlib cimport free from pandas import compat - -from pandas.tseries import offsets -from pandas.tseries.tools import parse_time_string +from pandas.compat import PY2 cimport cython from datetime cimport * -cimport util -cimport lib +cimport util, lib from lib cimport is_null_datetimelike, is_period -import lib -from pandas import tslib -from tslib import Timedelta, Timestamp, iNaT, NaT -from tslib import have_pytz, _get_utcoffset +from pandas._libs import tslib, lib +from pandas._libs.tslib import (Timedelta, Timestamp, iNaT, + NaT, have_pytz, _get_utcoffset) from tslib cimport ( maybe_get_tz, _is_utc, @@ -37,12 +33,10 @@ from tslib cimport ( _nat_scalar_rules, ) +from pandas.tseries import offsets +from pandas.tseries.tools import parse_time_string from pandas.tseries import frequencies -from sys import version_info - -cdef bint PY2 = version_info[0] == 2 - cdef int64_t NPY_NAT = util.get_nat() cdef int RESO_US = frequencies.RESO_US @@ -474,7 +468,7 @@ def extract_ordinals(ndarray[object] values, freq): p = values[i] if is_null_datetimelike(p): - ordinals[i] = tslib.iNaT + ordinals[i] = iNaT else: try: ordinals[i] = p.ordinal @@ -485,9 +479,9 @@ def extract_ordinals(ndarray[object] values, freq): except AttributeError: p = Period(p, freq=freq) - if p is tslib.NaT: + if p is NaT: # input may contain NaT-like string - ordinals[i] = tslib.iNaT + ordinals[i] = iNaT else: ordinals[i] = p.ordinal @@ -716,8 +710,8 @@ cdef class _Period(object): """ Fast creation from an ordinal and freq that are already validated! """ - if ordinal == tslib.iNaT: - return tslib.NaT + if ordinal == iNaT: + return NaT else: self = _Period.__new__(cls) self.ordinal = ordinal @@ -730,7 +724,7 @@ cdef class _Period(object): msg = _DIFFERENT_FREQ.format(self.freqstr, other.freqstr) raise IncompatibleFrequency(msg) return PyObject_RichCompareBool(self.ordinal, other.ordinal, op) - elif other is tslib.NaT: + elif other is NaT: return _nat_scalar_rules[op] # index/series like elif hasattr(other, '_typ'): @@ -776,8 +770,8 @@ cdef class _Period(object): offsets.Tick, offsets.DateOffset, Timedelta)): return self._add_delta(other) - elif other is tslib.NaT: - return tslib.NaT + elif other is NaT: + return NaT elif lib.is_integer(other): ordinal = self.ordinal + other * self.freq.n return Period(ordinal=ordinal, freq=self.freq) @@ -808,8 +802,8 @@ cdef class _Period(object): else: # pragma: no cover return NotImplemented elif isinstance(other, Period): - if self is tslib.NaT: - return tslib.NaT + if self is NaT: + return NaT return NotImplemented else: return NotImplemented @@ -1164,7 +1158,7 @@ class Period(_Period): if (year is None and month is None and quarter is None and day is None and hour is None and minute is None and second is None): - ordinal = tslib.iNaT + ordinal = iNaT else: if freq is None: raise ValueError("If value is None, freq cannot be None") @@ -1190,7 +1184,7 @@ class Period(_Period): ordinal = converted.ordinal elif is_null_datetimelike(value) or value in tslib._nat_strings: - ordinal = tslib.iNaT + ordinal = iNaT elif isinstance(value, compat.string_types) or lib.is_integer(value): if lib.is_integer(value): diff --git a/pandas/src/reshape.pyx b/pandas/_libs/reshape.pyx similarity index 100% rename from pandas/src/reshape.pyx rename to pandas/_libs/reshape.pyx diff --git a/pandas/src/reshape_helper.pxi.in b/pandas/_libs/reshape_helper.pxi.in similarity index 100% rename from pandas/src/reshape_helper.pxi.in rename to pandas/_libs/reshape_helper.pxi.in diff --git a/pandas/src/datetime.pxd b/pandas/_libs/src/datetime.pxd similarity index 100% rename from pandas/src/datetime.pxd rename to pandas/_libs/src/datetime.pxd diff --git a/pandas/src/datetime/np_datetime.c b/pandas/_libs/src/datetime/np_datetime.c similarity index 100% rename from pandas/src/datetime/np_datetime.c rename to pandas/_libs/src/datetime/np_datetime.c diff --git a/pandas/src/datetime/np_datetime.h b/pandas/_libs/src/datetime/np_datetime.h similarity index 100% rename from pandas/src/datetime/np_datetime.h rename to pandas/_libs/src/datetime/np_datetime.h diff --git a/pandas/src/datetime/np_datetime_strings.c b/pandas/_libs/src/datetime/np_datetime_strings.c similarity index 100% rename from pandas/src/datetime/np_datetime_strings.c rename to pandas/_libs/src/datetime/np_datetime_strings.c diff --git a/pandas/src/datetime/np_datetime_strings.h b/pandas/_libs/src/datetime/np_datetime_strings.h similarity index 100% rename from pandas/src/datetime/np_datetime_strings.h rename to pandas/_libs/src/datetime/np_datetime_strings.h diff --git a/pandas/src/datetime_helper.h b/pandas/_libs/src/datetime_helper.h similarity index 100% rename from pandas/src/datetime_helper.h rename to pandas/_libs/src/datetime_helper.h diff --git a/pandas/src/headers/math.h b/pandas/_libs/src/headers/math.h similarity index 100% rename from pandas/src/headers/math.h rename to pandas/_libs/src/headers/math.h diff --git a/pandas/src/headers/ms_inttypes.h b/pandas/_libs/src/headers/ms_inttypes.h similarity index 100% rename from pandas/src/headers/ms_inttypes.h rename to pandas/_libs/src/headers/ms_inttypes.h diff --git a/pandas/src/headers/ms_stdint.h b/pandas/_libs/src/headers/ms_stdint.h similarity index 100% rename from pandas/src/headers/ms_stdint.h rename to pandas/_libs/src/headers/ms_stdint.h diff --git a/pandas/src/headers/portable.h b/pandas/_libs/src/headers/portable.h similarity index 100% rename from pandas/src/headers/portable.h rename to pandas/_libs/src/headers/portable.h diff --git a/pandas/src/headers/stdint.h b/pandas/_libs/src/headers/stdint.h similarity index 100% rename from pandas/src/headers/stdint.h rename to pandas/_libs/src/headers/stdint.h diff --git a/pandas/src/helper.h b/pandas/_libs/src/helper.h similarity index 100% rename from pandas/src/helper.h rename to pandas/_libs/src/helper.h diff --git a/pandas/src/inference.pyx b/pandas/_libs/src/inference.pyx similarity index 100% rename from pandas/src/inference.pyx rename to pandas/_libs/src/inference.pyx diff --git a/pandas/src/khash.pxd b/pandas/_libs/src/khash.pxd similarity index 100% rename from pandas/src/khash.pxd rename to pandas/_libs/src/khash.pxd diff --git a/pandas/src/klib/khash.h b/pandas/_libs/src/klib/khash.h similarity index 100% rename from pandas/src/klib/khash.h rename to pandas/_libs/src/klib/khash.h diff --git a/pandas/src/klib/khash_python.h b/pandas/_libs/src/klib/khash_python.h similarity index 100% rename from pandas/src/klib/khash_python.h rename to pandas/_libs/src/klib/khash_python.h diff --git a/pandas/src/klib/ktypes.h b/pandas/_libs/src/klib/ktypes.h similarity index 100% rename from pandas/src/klib/ktypes.h rename to pandas/_libs/src/klib/ktypes.h diff --git a/pandas/src/klib/kvec.h b/pandas/_libs/src/klib/kvec.h similarity index 100% rename from pandas/src/klib/kvec.h rename to pandas/_libs/src/klib/kvec.h diff --git a/pandas/src/msgpack/pack.h b/pandas/_libs/src/msgpack/pack.h similarity index 100% rename from pandas/src/msgpack/pack.h rename to pandas/_libs/src/msgpack/pack.h diff --git a/pandas/src/msgpack/pack_template.h b/pandas/_libs/src/msgpack/pack_template.h similarity index 100% rename from pandas/src/msgpack/pack_template.h rename to pandas/_libs/src/msgpack/pack_template.h diff --git a/pandas/src/msgpack/sysdep.h b/pandas/_libs/src/msgpack/sysdep.h similarity index 100% rename from pandas/src/msgpack/sysdep.h rename to pandas/_libs/src/msgpack/sysdep.h diff --git a/pandas/src/msgpack/unpack.h b/pandas/_libs/src/msgpack/unpack.h similarity index 100% rename from pandas/src/msgpack/unpack.h rename to pandas/_libs/src/msgpack/unpack.h diff --git a/pandas/src/msgpack/unpack_define.h b/pandas/_libs/src/msgpack/unpack_define.h similarity index 100% rename from pandas/src/msgpack/unpack_define.h rename to pandas/_libs/src/msgpack/unpack_define.h diff --git a/pandas/src/msgpack/unpack_template.h b/pandas/_libs/src/msgpack/unpack_template.h similarity index 100% rename from pandas/src/msgpack/unpack_template.h rename to pandas/_libs/src/msgpack/unpack_template.h diff --git a/pandas/src/numpy.pxd b/pandas/_libs/src/numpy.pxd similarity index 100% rename from pandas/src/numpy.pxd rename to pandas/_libs/src/numpy.pxd diff --git a/pandas/src/numpy_helper.h b/pandas/_libs/src/numpy_helper.h similarity index 100% rename from pandas/src/numpy_helper.h rename to pandas/_libs/src/numpy_helper.h diff --git a/pandas/src/offsets.pyx b/pandas/_libs/src/offsets.pyx similarity index 100% rename from pandas/src/offsets.pyx rename to pandas/_libs/src/offsets.pyx diff --git a/pandas/src/parse_helper.h b/pandas/_libs/src/parse_helper.h similarity index 100% rename from pandas/src/parse_helper.h rename to pandas/_libs/src/parse_helper.h diff --git a/pandas/src/parser/.gitignore b/pandas/_libs/src/parser/.gitignore similarity index 100% rename from pandas/src/parser/.gitignore rename to pandas/_libs/src/parser/.gitignore diff --git a/pandas/src/parser/Makefile b/pandas/_libs/src/parser/Makefile similarity index 100% rename from pandas/src/parser/Makefile rename to pandas/_libs/src/parser/Makefile diff --git a/pandas/src/parser/io.c b/pandas/_libs/src/parser/io.c similarity index 100% rename from pandas/src/parser/io.c rename to pandas/_libs/src/parser/io.c diff --git a/pandas/src/parser/io.h b/pandas/_libs/src/parser/io.h similarity index 100% rename from pandas/src/parser/io.h rename to pandas/_libs/src/parser/io.h diff --git a/pandas/src/parser/tokenizer.c b/pandas/_libs/src/parser/tokenizer.c similarity index 100% rename from pandas/src/parser/tokenizer.c rename to pandas/_libs/src/parser/tokenizer.c diff --git a/pandas/src/parser/tokenizer.h b/pandas/_libs/src/parser/tokenizer.h similarity index 100% rename from pandas/src/parser/tokenizer.h rename to pandas/_libs/src/parser/tokenizer.h diff --git a/pandas/src/period_helper.c b/pandas/_libs/src/period_helper.c similarity index 100% rename from pandas/src/period_helper.c rename to pandas/_libs/src/period_helper.c diff --git a/pandas/src/period_helper.h b/pandas/_libs/src/period_helper.h similarity index 100% rename from pandas/src/period_helper.h rename to pandas/_libs/src/period_helper.h diff --git a/pandas/src/properties.pyx b/pandas/_libs/src/properties.pyx similarity index 100% rename from pandas/src/properties.pyx rename to pandas/_libs/src/properties.pyx diff --git a/pandas/src/reduce.pyx b/pandas/_libs/src/reduce.pyx similarity index 100% rename from pandas/src/reduce.pyx rename to pandas/_libs/src/reduce.pyx diff --git a/pandas/src/skiplist.h b/pandas/_libs/src/skiplist.h similarity index 100% rename from pandas/src/skiplist.h rename to pandas/_libs/src/skiplist.h diff --git a/pandas/src/skiplist.pxd b/pandas/_libs/src/skiplist.pxd similarity index 100% rename from pandas/src/skiplist.pxd rename to pandas/_libs/src/skiplist.pxd diff --git a/pandas/src/skiplist.pyx b/pandas/_libs/src/skiplist.pyx similarity index 100% rename from pandas/src/skiplist.pyx rename to pandas/_libs/src/skiplist.pyx diff --git a/pandas/src/ujson/lib/ultrajson.h b/pandas/_libs/src/ujson/lib/ultrajson.h similarity index 100% rename from pandas/src/ujson/lib/ultrajson.h rename to pandas/_libs/src/ujson/lib/ultrajson.h diff --git a/pandas/src/ujson/lib/ultrajsondec.c b/pandas/_libs/src/ujson/lib/ultrajsondec.c similarity index 100% rename from pandas/src/ujson/lib/ultrajsondec.c rename to pandas/_libs/src/ujson/lib/ultrajsondec.c diff --git a/pandas/src/ujson/lib/ultrajsonenc.c b/pandas/_libs/src/ujson/lib/ultrajsonenc.c similarity index 100% rename from pandas/src/ujson/lib/ultrajsonenc.c rename to pandas/_libs/src/ujson/lib/ultrajsonenc.c diff --git a/pandas/src/ujson/python/JSONtoObj.c b/pandas/_libs/src/ujson/python/JSONtoObj.c similarity index 100% rename from pandas/src/ujson/python/JSONtoObj.c rename to pandas/_libs/src/ujson/python/JSONtoObj.c diff --git a/pandas/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c similarity index 99% rename from pandas/src/ujson/python/objToJSON.c rename to pandas/_libs/src/ujson/python/objToJSON.c index e3c75d3b6e081..26a68b8a9ae3a 100644 --- a/pandas/src/ujson/python/objToJSON.c +++ b/pandas/_libs/src/ujson/python/objToJSON.c @@ -180,7 +180,7 @@ void initObjToJSON(void) Py_DECREF(mod_pandas); } - mod_tslib = PyImport_ImportModule("pandas.tslib"); + mod_tslib = PyImport_ImportModule("pandas._libs.tslib"); if (mod_tslib) { cls_nat = (PyTypeObject *)PyObject_GetAttrString(mod_tslib, "NaTType"); Py_DECREF(mod_tslib); diff --git a/pandas/src/ujson/python/py_defines.h b/pandas/_libs/src/ujson/python/py_defines.h similarity index 100% rename from pandas/src/ujson/python/py_defines.h rename to pandas/_libs/src/ujson/python/py_defines.h diff --git a/pandas/src/ujson/python/ujson.c b/pandas/_libs/src/ujson/python/ujson.c similarity index 95% rename from pandas/src/ujson/python/ujson.c rename to pandas/_libs/src/ujson/python/ujson.c index 8c25975f12409..ec6720f16bc77 100644 --- a/pandas/src/ujson/python/ujson.c +++ b/pandas/_libs/src/ujson/python/ujson.c @@ -80,7 +80,7 @@ static PyMethodDef ujsonMethods[] = { static struct PyModuleDef moduledef = { PyModuleDef_HEAD_INIT, - "_pandasujson", + "_libjson", 0, /* m_doc */ -1, /* m_size */ ujsonMethods, /* m_methods */ @@ -90,14 +90,14 @@ static struct PyModuleDef moduledef = { NULL /* m_free */ }; -#define PYMODINITFUNC PyMODINIT_FUNC PyInit_json(void) +#define PYMODINITFUNC PyMODINIT_FUNC PyInit_libjson(void) #define PYMODULE_CREATE() PyModule_Create(&moduledef) #define MODINITERROR return NULL #else -#define PYMODINITFUNC PyMODINIT_FUNC initjson(void) -#define PYMODULE_CREATE() Py_InitModule("json", ujsonMethods) +#define PYMODINITFUNC PyMODINIT_FUNC initlibjson(void) +#define PYMODULE_CREATE() Py_InitModule("libjson", ujsonMethods) #define MODINITERROR return #endif diff --git a/pandas/src/ujson/python/version.h b/pandas/_libs/src/ujson/python/version.h similarity index 100% rename from pandas/src/ujson/python/version.h rename to pandas/_libs/src/ujson/python/version.h diff --git a/pandas/src/util.pxd b/pandas/_libs/src/util.pxd similarity index 100% rename from pandas/src/util.pxd rename to pandas/_libs/src/util.pxd diff --git a/pandas/tslib.pxd b/pandas/_libs/tslib.pxd similarity index 100% rename from pandas/tslib.pxd rename to pandas/_libs/tslib.pxd diff --git a/pandas/tslib.pyx b/pandas/_libs/tslib.pyx similarity index 100% rename from pandas/tslib.pyx rename to pandas/_libs/tslib.pyx diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index 25a170c3eb121..279a82fea1cc2 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -62,7 +62,13 @@ def load_reduce(self): # 10890 ('pandas.core.series', 'TimeSeries'): ('pandas.core.series', 'Series'), - ('pandas.sparse.series', 'SparseTimeSeries'): ('pandas.sparse.series', 'SparseSeries') + ('pandas.sparse.series', 'SparseTimeSeries'): ('pandas.sparse.series', 'SparseSeries'), + + # 12588, extensions moving + ('pandas._sparse', 'BlockIndex'): ('pandas.sparse.libsparse', 'BlockIndex'), + ('pandas.tslib', 'Timestamp'): ('pandas._libs.tslib', 'Timestamp'), + ('pandas.tslib', '__nat_unpickle'): ('pandas._libs.tslib', '__nat_unpickle'), + ('pandas._period', 'Period'): ('pandas._libs.period', 'Period') } diff --git a/pandas/computation/scope.py b/pandas/computation/scope.py index 875aaa959b264..9ade755e0ff12 100644 --- a/pandas/computation/scope.py +++ b/pandas/computation/scope.py @@ -1,4 +1,5 @@ -"""Module for scope operations +""" +Module for scope operations """ import sys @@ -10,7 +11,8 @@ import numpy as np -import pandas as pd +import pandas +import pandas as pd # noqa from pandas.compat import DeepChainMap, map, StringIO from pandas.core.base import StringMixin import pandas.computation as compu @@ -46,7 +48,7 @@ def _raw_hex_id(obj): _DEFAULT_GLOBALS = { - 'Timestamp': pd.lib.Timestamp, + 'Timestamp': pandas._libs.lib.Timestamp, 'datetime': datetime.datetime, 'True': True, 'False': False, diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index d37c98c9b9b90..6937675603c10 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -6,7 +6,7 @@ from warnings import warn import numpy as np -from pandas import compat, lib, tslib, _np_version_under1p8 +from pandas import compat, _np_version_under1p8 from pandas.types.cast import _maybe_promote from pandas.types.generic import ABCSeries, ABCIndex from pandas.types.common import (is_unsigned_integer_dtype, @@ -34,10 +34,9 @@ from pandas.types.missing import isnull import pandas.core.common as com -import pandas.algos as algos -import pandas.hashtable as htable from pandas.compat import string_types -from pandas.tslib import iNaT +from pandas._libs import algos, lib, hashtable as htable +from pandas._libs.tslib import iNaT # --------------- # @@ -1412,7 +1411,7 @@ def diff(arr, n, axis=0): if needs_i8_conversion(arr): dtype = np.float64 arr = arr.view('i8') - na = tslib.iNaT + na = iNaT is_timedelta = True elif issubclass(dtype.type, np.integer): dtype = np.float64 diff --git a/pandas/core/base.py b/pandas/core/base.py index 55149198b0dbf..d7c9e35ab6a51 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -12,7 +12,7 @@ from pandas.core import common as com import pandas.core.nanops as nanops -import pandas.lib as lib +import pandas._libs.lib as lib from pandas.compat.numpy import function as nv from pandas.util.decorators import (Appender, cache_readonly, deprecate_kwarg, Substitution) diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index d5dce250275d9..47db86ce1e73e 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -4,9 +4,9 @@ from warnings import warn import types -from pandas import compat, lib +from pandas import compat from pandas.compat import u, lzip -import pandas.algos as _algos +from pandas._libs import lib, algos as libalgos from pandas.types.generic import ABCSeries, ABCIndexClass, ABCCategoricalIndex from pandas.types.missing import isnull, notnull @@ -1817,8 +1817,8 @@ def _reverse_indexer(self): """ categories = self.categories - r, counts = _algos.groupsort_indexer(self.codes.astype('int64'), - categories.size) + r, counts = libalgos.groupsort_indexer(self.codes.astype('int64'), + categories.size) counts = counts.cumsum() result = [r[counts[indexer]:counts[indexer + 1]] for indexer in range(len(counts) - 1)] @@ -1897,7 +1897,7 @@ def mode(self): modes : `Categorical` (sorted) """ - import pandas.hashtable as htable + import pandas._libs.hashtable as htable good = self._codes != -1 values = sorted(htable.mode_int64(_ensure_int64(self._codes[good]))) result = self._constructor(values=values, categories=self.categories, diff --git a/pandas/core/common.py b/pandas/core/common.py index fddac1f29d454..93e24dce8b623 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -8,8 +8,8 @@ from functools import partial import numpy as np -import pandas.lib as lib -import pandas.tslib as tslib +from pandas._libs import lib, tslib + from pandas import compat from pandas.compat import long, zip, iteritems from pandas.core.config import get_option @@ -476,7 +476,6 @@ def _where_compat(mask, arr1, arr2): new_vals = np.where(mask, arr1.view('i8'), arr2.view('i8')) return new_vals.view(_NS_DTYPE) - import pandas.tslib as tslib if arr1.dtype == _NS_DTYPE: arr1 = tslib.ints_to_pydatetime(arr1.view('i8')) if arr2.dtype == _NS_DTYPE: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 15179ac321076..4e7a5ebdf6f67 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -71,7 +71,7 @@ from pandas.core.series import Series from pandas.core.categorical import Categorical import pandas.computation.expressions as expressions -import pandas.core.algorithms as algos +import pandas.core.algorithms as algorithms from pandas.computation.eval import eval as _eval from pandas.compat import (range, map, zip, lrange, lmap, lzip, StringIO, u, OrderedDict, raise_with_traceback) @@ -93,8 +93,7 @@ from pandas.formats.printing import pprint_thing import pandas.tools.plotting as gfx -import pandas.lib as lib -import pandas.algos as _algos +from pandas._libs import lib, algos as libalgos from pandas.core.config import get_option @@ -2794,8 +2793,8 @@ def _reindex_multi(self, axes, copy, fill_value): if row_indexer is not None and col_indexer is not None: indexer = row_indexer, col_indexer - new_values = algos.take_2d_multi(self.values, indexer, - fill_value=fill_value) + new_values = algorithms.take_2d_multi(self.values, indexer, + fill_value=fill_value) return self._constructor(new_values, index=new_index, columns=new_columns) else: @@ -3180,12 +3179,11 @@ def duplicated(self, subset=None, keep='first'): duplicated : Series """ from pandas.core.sorting import get_group_index - from pandas.hashtable import duplicated_int64, _SIZE_HINT_LIMIT + from pandas._libs.hashtable import duplicated_int64, _SIZE_HINT_LIMIT def f(vals): - labels, shape = algos.factorize(vals, - size_hint=min(len(self), - _SIZE_HINT_LIMIT)) + labels, shape = algorithms.factorize( + vals, size_hint=min(len(self), _SIZE_HINT_LIMIT)) return labels.astype('i8', copy=False), len(shape) if subset is None: @@ -3437,7 +3435,7 @@ def nlargest(self, n, columns, keep='first'): 1 10 b 2 2 8 d NaN """ - return algos.select_n_frame(self, columns, n, 'nlargest', keep) + return algorithms.select_n_frame(self, columns, n, 'nlargest', keep) def nsmallest(self, n, columns, keep='first'): """Get the rows of a DataFrame sorted by the `n` smallest @@ -3471,7 +3469,7 @@ def nsmallest(self, n, columns, keep='first'): 0 1 a 1 2 8 d NaN """ - return algos.select_n_frame(self, columns, n, 'nsmallest', keep) + return algorithms.select_n_frame(self, columns, n, 'nsmallest', keep) def swaplevel(self, i=-2, j=-1, axis=0): """ @@ -4739,10 +4737,10 @@ def corr(self, method='pearson', min_periods=1): mat = numeric_df.values if method == 'pearson': - correl = _algos.nancorr(_ensure_float64(mat), minp=min_periods) + correl = libalgos.nancorr(_ensure_float64(mat), minp=min_periods) elif method == 'spearman': - correl = _algos.nancorr_spearman(_ensure_float64(mat), - minp=min_periods) + correl = libalgos.nancorr_spearman(_ensure_float64(mat), + minp=min_periods) else: if min_periods is None: min_periods = 1 @@ -4802,8 +4800,8 @@ def cov(self, min_periods=None): baseCov = np.cov(mat.T) baseCov = baseCov.reshape((len(cols), len(cols))) else: - baseCov = _algos.nancorr(_ensure_float64(mat), cov=True, - minp=min_periods) + baseCov = libalgos.nancorr(_ensure_float64(mat), cov=True, + minp=min_periods) return self._constructor(baseCov, index=idx, columns=cols) @@ -5669,7 +5667,7 @@ def _list_of_series_to_arrays(data, columns, coerce_float=False, dtype=None): indexer = indexer_cache[id(index)] = index.get_indexer(columns) values = _values_from_object(s) - aligned_values.append(algos.take_1d(values, indexer)) + aligned_values.append(algorithms.take_1d(values, indexer)) values = np.vstack(aligned_values) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 298fa75779420..ff58a2aa77447 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7,11 +7,9 @@ import json import numpy as np -import pandas.lib as lib - import pandas as pd - +from pandas._libs import tslib, lib from pandas.types.common import (_coerce_to_dtype, _ensure_int64, needs_i8_conversion, @@ -6115,7 +6113,7 @@ def cum_func(self, axis=None, skipna=True, *args, **kwargs): issubclass(y.dtype.type, (np.datetime64, np.timedelta64))): result = accum_func(y, axis) mask = isnull(self) - np.putmask(result, mask, pd.tslib.iNaT) + np.putmask(result, mask, tslib.iNaT) elif skipna and not issubclass(y.dtype.type, (np.integer, np.bool_)): mask = isnull(self) np.putmask(y, mask, mask_a) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 43c57a88b4d19..a10be078a8f96 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -55,13 +55,12 @@ from pandas.formats.printing import pprint_thing from pandas.util.validators import validate_kwargs -import pandas.core.algorithms as algos +import pandas.core.algorithms as algorithms import pandas.core.common as com from pandas.core.config import option_context -import pandas.lib as lib -from pandas.lib import Timestamp -import pandas.tslib as tslib -import pandas.algos as _algos + +from pandas._libs import lib, algos as libalgos, Timestamp, NaT, iNaT +from pandas._libs.lib import count_level_2d _doc_template = """ @@ -1474,11 +1473,11 @@ def shift(self, periods=1, freq=None, axis=0): # filled in by Cython indexer = np.zeros_like(labels) - _algos.group_shift_indexer(indexer, labels, ngroups, periods) + libalgos.group_shift_indexer(indexer, labels, ngroups, periods) output = {} for name, obj in self._iterate_slices(): - output[name] = algos.take_nd(obj.values, indexer) + output[name] = algorithms.take_nd(obj.values, indexer) return self._wrap_transformed_output(output) @@ -1815,13 +1814,13 @@ def _get_cython_function(self, kind, how, values, is_numeric): def get_func(fname): # see if there is a fused-type version of function # only valid for numeric - f = getattr(_algos, fname, None) + f = getattr(libalgos, fname, None) if f is not None and is_numeric: return f # otherwise find dtype-specific version, falling back to object for dt in [dtype_str, 'object']: - f = getattr(_algos, "%s_%s" % (fname, dtype_str), None) + f = getattr(libalgos, "%s_%s" % (fname, dtype_str), None) if f is not None: return f @@ -1901,7 +1900,7 @@ def _cython_operation(self, kind, values, how, axis): elif is_integer_dtype(values): # we use iNaT for the missing value on ints # so pre-convert to guard this condition - if (values == tslib.iNaT).any(): + if (values == iNaT).any(): values = _ensure_float64(values) else: values = values.astype('int64', copy=False) @@ -1943,7 +1942,7 @@ def _cython_operation(self, kind, values, how, axis): result, values, labels, func, is_numeric, is_datetimelike) if is_integer_dtype(result): - mask = result == tslib.iNaT + mask = result == iNaT if mask.any(): result = result.astype('float64') result[mask] = np.nan @@ -2034,7 +2033,8 @@ def _aggregate_series_fast(self, obj, func): dummy = obj._get_values(slice(None, 0)).to_dense() indexer = get_group_index_sorter(group_index, ngroups) obj = obj.take(indexer, convert=False) - group_index = algos.take_nd(group_index, indexer, allow_fill=False) + group_index = algorithms.take_nd( + group_index, indexer, allow_fill=False) grouper = lib.SeriesGrouper(obj, func, group_index, ngroups, dummy) result, counts = grouper.get_result() @@ -2132,7 +2132,7 @@ def groups(self): # GH 3881 result = {} for key, value in zip(self.binlabels, self.bins): - if key is not tslib.NaT: + if key is not NaT: result[key] = value return result @@ -2159,7 +2159,7 @@ def get_iterator(self, data, axis=0): start = 0 for edge, label in zip(self.bins, self.binlabels): - if label is not tslib.NaT: + if label is not NaT: yield label, slicer(start, edge) start = edge @@ -2173,7 +2173,7 @@ def indices(self): i = 0 for label, bin in zip(self.binlabels, self.bins): if i < bin: - if label is not tslib.NaT: + if label is not NaT: indices[label] = list(range(i, bin)) i = bin return indices @@ -2383,7 +2383,8 @@ def group_index(self): def _make_labels(self): if self._labels is None or self._group_index is None: - labels, uniques = algos.factorize(self.grouper, sort=self.sort) + labels, uniques = algorithms.factorize( + self.grouper, sort=self.sort) uniques = Index(uniques, name=self.name) self._labels = labels self._group_index = uniques @@ -2928,7 +2929,7 @@ def _transform_fast(self, func): ids, _, ngroup = self.grouper.group_info cast = (self.size().fillna(0) > 0).any() - out = algos.take_1d(func().values, ids) + out = algorithms.take_1d(func().values, ids) if cast: out = self._try_cast(out, self.obj) return Series(out, index=self.obj.index, name=self.obj.name) @@ -2985,7 +2986,7 @@ def nunique(self, dropna=True): except TypeError: # catches object dtypes assert val.dtype == object, \ 'val.dtype must be object, got %s' % val.dtype - val, _ = algos.factorize(val, sort=False) + val, _ = algorithms.factorize(val, sort=False) sorter = np.lexsort((val, ids)) _isnull = lambda a: a == -1 else: @@ -3069,7 +3070,7 @@ def value_counts(self, normalize=False, sort=True, ascending=False, ids, val = ids[mask], val[mask] if bins is None: - lab, lev = algos.factorize(val, sort=True) + lab, lev = algorithms.factorize(val, sort=True) else: cat, bins = cut(val, bins, retbins=True) # bins[:-1] for backward compat; @@ -3108,7 +3109,7 @@ def value_counts(self, normalize=False, sort=True, ascending=False, if dropna: m = ids[lab == -1] if _np_version_under1p8: - mi, ml = algos.factorize(m) + mi, ml = algorithms.factorize(m) d[ml] = d[ml] - np.bincount(mi) else: np.add.at(d, m, -1) @@ -3130,7 +3131,7 @@ def value_counts(self, normalize=False, sort=True, ascending=False, out = _ensure_int64(out) return Series(out, index=mi, name=self.name) - # for compat. with algos.value_counts need to ensure every + # for compat. with libalgos.value_counts need to ensure every # bin is present at every index level, null filled with zeros diff = np.zeros(len(out), dtype='bool') for lab in labels[:-1]: @@ -3701,7 +3702,7 @@ def _transform_fast(self, result, obj): ids, _, ngroup = self.grouper.group_info output = [] for i, _ in enumerate(result.columns): - res = algos.take_1d(result.iloc[:, i].values, ids) + res = algorithms.take_1d(result.iloc[:, i].values, ids) if cast: res = self._try_cast(res, obj.iloc[:, i]) output.append(res) @@ -3995,7 +3996,6 @@ def _apply_to_column_groupbys(self, func): def count(self): """ Compute count of group, excluding missing values """ from functools import partial - from pandas.lib import count_level_2d from pandas.types.missing import _isnull_ndarraylike as isnull data, _ = self._get_data_to_aggregate() @@ -4190,7 +4190,7 @@ def __init__(self, data, labels, ngroups, axis=0): @cache_readonly def slabels(self): # Sorted labels - return algos.take_nd(self.labels, self.sort_idx, allow_fill=False) + return algorithms.take_nd(self.labels, self.sort_idx, allow_fill=False) @cache_readonly def sort_idx(self): diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 6cd5eceed5f2a..4b43574f49820 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -53,18 +53,17 @@ import pandas.core.missing as missing from pandas.sparse.array import _maybe_to_sparse, SparseArray -import pandas.lib as lib -import pandas.tslib as tslib +from pandas._libs import lib, tslib +from pandas._libs.tslib import Timedelta +from pandas._libs.lib import BlockPlacement + import pandas.computation.expressions as expressions from pandas.util.decorators import cache_readonly from pandas.util.validators import validate_bool_kwarg -from pandas.tslib import Timedelta from pandas import compat, _np_version_under1p9 from pandas.compat import range, map, zip, u -from pandas.lib import BlockPlacement - class Block(PandasObject): """ diff --git a/pandas/core/missing.py b/pandas/core/missing.py index ffd0423572f5e..3b9bfe1de48e7 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -5,8 +5,8 @@ import numpy as np from distutils.version import LooseVersion -import pandas.algos as algos -import pandas.lib as lib +from pandas._libs import algos, lib + from pandas.compat import range, string_types from pandas.types.common import (is_numeric_v_string_like, is_float_dtype, is_datetime64_dtype, diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 0cc3a2d039b5e..bb6c9b4546d0f 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -9,7 +9,8 @@ except ImportError: # pragma: no cover _USE_BOTTLENECK = False -from pandas import compat, lib, algos, tslib +from pandas import compat +from pandas._libs import tslib, algos, lib from pandas.types.common import (_get_dtype, is_float, is_scalar, is_integer, is_complex, is_float_dtype, diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 6cc43cd9228f6..fe83f8a352851 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -10,15 +10,17 @@ import numpy as np import pandas as pd import datetime -from pandas import compat, lib, tslib -import pandas.index as _index + +from pandas._libs import (lib, index as libindex, + tslib as libts, algos as libalgos, iNaT) + +from pandas import compat from pandas.util.decorators import Appender import pandas.computation.expressions as expressions -from pandas.lib import isscalar -from pandas.tslib import iNaT + from pandas.compat import bind_method import pandas.core.missing as missing -import pandas.algos as _algos + from pandas.core.common import (_values_from_object, _maybe_match_name, PerformanceWarning) from pandas.types.missing import notnull, isnull @@ -29,6 +31,7 @@ is_datetime64_dtype, is_datetime64tz_dtype, is_bool_dtype, is_datetimetz, is_list_like, + is_scalar, _ensure_object) from pandas.types.cast import _maybe_upcast_putmask, _find_common_type from pandas.types.generic import ABCSeries, ABCIndex, ABCPeriodIndex @@ -476,7 +479,7 @@ def _convert_to_array(self, values, name=None, other=None): values = values._values elif not (isinstance(values, (np.ndarray, ABCSeries)) and is_datetime64_dtype(values)): - values = tslib.array_to_datetime(values) + values = libts.array_to_datetime(values) elif inferred_type in ('timedelta', 'timedelta64'): # have a timedelta, convert to to ns here values = to_timedelta(values, errors='coerce', box=False) @@ -680,12 +683,12 @@ def safe_na_op(lvalues, rvalues): if isinstance(rvalues, ABCSeries): if is_object_dtype(rvalues): # if dtype is object, try elementwise op - return _algos.arrmap_object(rvalues, - lambda x: op(lvalues, x)) + return libalgos.arrmap_object(rvalues, + lambda x: op(lvalues, x)) else: if is_object_dtype(lvalues): - return _algos.arrmap_object(lvalues, - lambda x: op(x, rvalues)) + return libalgos.arrmap_object(lvalues, + lambda x: op(x, rvalues)) raise def wrapper(left, right, name=name, na_op=na_op): @@ -754,7 +757,7 @@ def na_op(x, y): # in either operand if is_categorical_dtype(x): return op(x, y) - elif is_categorical_dtype(y) and not isscalar(y): + elif is_categorical_dtype(y) and not is_scalar(y): return op(y, x) if is_object_dtype(x.dtype): @@ -770,7 +773,7 @@ def na_op(x, y): raise TypeError("invalid type comparison") # numpy does not like comparisons vs None - if isscalar(y) and isnull(y): + if is_scalar(y) and isnull(y): if name == '__ne__': return np.ones(len(x), dtype=bool) else: @@ -779,11 +782,11 @@ def na_op(x, y): # we have a datetime/timedelta and may need to convert mask = None if (needs_i8_conversion(x) or - (not isscalar(y) and needs_i8_conversion(y))): + (not is_scalar(y) and needs_i8_conversion(y))): - if isscalar(y): + if is_scalar(y): mask = isnull(x) - y = _index.convert_scalar(x, _values_from_object(y)) + y = libindex.convert_scalar(x, _values_from_object(y)) else: mask = isnull(x) | isnull(y) y = y.view('i8') @@ -819,7 +822,7 @@ def wrapper(self, other, axis=None): elif isinstance(other, (np.ndarray, pd.Index)): # do not check length of zerodim array # as it will broadcast - if (not lib.isscalar(lib.item_from_zerodim(other)) and + if (not is_scalar(lib.item_from_zerodim(other)) and len(self) != len(other)): raise ValueError('Lengths must match to compare') @@ -855,7 +858,7 @@ def wrapper(self, other, axis=None): with np.errstate(all='ignore'): res = na_op(values, other) - if isscalar(res): + if is_scalar(res): raise TypeError('Could not compare %s type with Series' % type(other)) @@ -1333,7 +1336,7 @@ def na_op(x, y): # work only for scalars def f(self, other): - if not isscalar(other): + if not is_scalar(other): raise ValueError('Simple arithmetic with %s can only be ' 'done with scalar values' % self._constructor.__name__) diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index 7bcd9f2d30b79..3279a8f2be39d 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -19,15 +19,14 @@ from pandas.core.sparse import SparseDataFrame, SparseSeries from pandas.sparse.array import SparseArray -from pandas._sparse import IntIndex +from pandas.sparse.libsparse import IntIndex from pandas.core.categorical import Categorical, _factorize_from_iterable from pandas.core.sorting import (get_group_index, compress_group_index, decons_obs_group_ids) import pandas.core.algorithms as algos -import pandas.algos as _algos -import pandas._reshape as _reshape +from pandas._libs import algos as _algos, reshape as _reshape from pandas.core.index import MultiIndex, _get_na_value diff --git a/pandas/core/series.py b/pandas/core/series.py index 626a4a81193cc..83036ffef0bed 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -60,7 +60,7 @@ from pandas.compat.numpy import function as nv import pandas.core.ops as ops -import pandas.core.algorithms as algos +import pandas.core.algorithms as algorithms import pandas.core.common as com import pandas.core.nanops as nanops @@ -68,10 +68,7 @@ from pandas.util.decorators import Appender, deprecate_kwarg, Substitution from pandas.util.validators import validate_bool_kwarg -import pandas.lib as lib -import pandas.tslib as tslib -import pandas.index as _index - +from pandas._libs import index as libindex, tslib as libts, lib, iNaT from pandas.core.config import get_option __all__ = ['Series'] @@ -294,7 +291,7 @@ def _set_axis(self, axis, labels, fastpath=False): # need to set here becuase we changed the index if fastpath: self._data.set_axis(axis, labels) - except (tslib.OutOfBoundsDatetime, ValueError): + except (libts.OutOfBoundsDatetime, ValueError): # labels may exceeds datetime bounds, # or not be a DatetimeIndex pass @@ -568,7 +565,7 @@ def _ixs(self, i, axis=0): # dispatch to the values if we need values = self._values if isinstance(values, np.ndarray): - return _index.get_value_at(values, i) + return libindex.get_value_at(values, i) else: return values[i] except IndexError: @@ -582,7 +579,7 @@ def _ixs(self, i, axis=0): if isinstance(label, Index): return self.take(i, axis=axis, convert=True) else: - return _index.get_value_at(self, i) + return libindex.get_value_at(self, i) @property def _is_mixed_type(self): @@ -733,7 +730,7 @@ def setitem(key, value): elif is_timedelta64_dtype(self.dtype): # reassign a null value to iNaT if isnull(value): - value = tslib.iNaT + value = iNaT try: self.index._engine.set_value(self._values, key, @@ -1202,7 +1199,7 @@ def mode(self): modes : Series (sorted) """ # TODO: Add option for bins like value_counts() - return algos.mode(self) + return algorithms.mode(self) @Appender(base._shared_docs['unique'] % _shared_doc_kwargs) def unique(self): @@ -1424,7 +1421,7 @@ def diff(self, periods=1): ------- diffed : Series """ - result = algos.diff(_values_from_object(self), periods) + result = algorithms.diff(_values_from_object(self), periods) return self._constructor(result, index=self.index).__finalize__(self) def autocorr(self, lag=1): @@ -1915,7 +1912,8 @@ def nlargest(self, n=5, keep='first'): >>> s = pd.Series(np.random.randn(1e6)) >>> s.nlargest(10) # only sorts up to the N requested """ - return algos.select_n_series(self, n=n, keep=keep, method='nlargest') + return algorithms.select_n_series(self, n=n, keep=keep, + method='nlargest') @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', False: 'first'}) @@ -1953,7 +1951,8 @@ def nsmallest(self, n=5, keep='first'): >>> s = pd.Series(np.random.randn(1e6)) >>> s.nsmallest(10) # only sorts up to the N requested """ - return algos.select_n_series(self, n=n, keep=keep, method='nsmallest') + return algorithms.select_n_series(self, n=n, keep=keep, + method='nsmallest') def sortlevel(self, level=0, ascending=True, sort_remaining=True): """ @@ -2166,7 +2165,7 @@ def map_f(values, f): arg = self._constructor(arg, index=arg.keys()) indexer = arg.index.get_indexer(values) - new_values = algos.take_1d(arg._values, indexer) + new_values = algorithms.take_1d(arg._values, indexer) else: new_values = map_f(values, arg) @@ -2324,7 +2323,7 @@ def _reindex_indexer(self, new_index, indexer, copy): return self # be subclass-friendly - new_values = algos.take_1d(self.get_values(), indexer) + new_values = algorithms.take_1d(self.get_values(), indexer) return self._constructor(new_values, index=new_index) def _needs_reindex_multi(self, axes, method, level): @@ -2484,7 +2483,7 @@ def isin(self, values): dtype: bool """ - result = algos.isin(_values_from_object(self), values) + result = algorithms.isin(_values_from_object(self), values) return self._constructor(result, index=self.index).__finalize__(self) def between(self, left, right, inclusive=True): diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 71314da7745c0..205d0d94d2ec3 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -7,10 +7,9 @@ _ensure_int64, is_categorical_dtype) from pandas.types.missing import isnull -import pandas.core.algorithms as algos -import pandas.algos as _algos -import pandas.hashtable as _hash -from pandas import lib +import pandas.core.algorithms as algorithms +from pandas._libs import lib, algos, hashtable +from pandas._libs.hashtable import unique_label_indices _INT64_MAX = np.iinfo(np.int64).max @@ -131,7 +130,6 @@ def decons_obs_group_ids(comp_ids, obs_ids, shape, labels, xnull): xnull: boolean, if nulls are excluded; i.e. -1 labels are passed through """ - from pandas.hashtable import unique_label_indices if not xnull: lift = np.fromiter(((a == -1).any() for a in labels), dtype='i8') @@ -250,7 +248,8 @@ def __init__(self, comp_ids, ngroups, levels, labels): self.comp_ids = comp_ids.astype(np.int64) self.k = len(labels) - self.tables = [_hash.Int64HashTable(ngroups) for _ in range(self.k)] + self.tables = [hashtable.Int64HashTable(ngroups) + for _ in range(self.k)] self._populate_tables() @@ -291,7 +290,7 @@ def get_indexer_dict(label_list, keys): def get_group_index_sorter(group_index, ngroups): """ - _algos.groupsort_indexer implements `counting sort` and it is at least + algos.groupsort_indexer implements `counting sort` and it is at least O(ngroups), where ngroups = prod(shape) shape = map(len, keys) @@ -309,8 +308,8 @@ def get_group_index_sorter(group_index, ngroups): do_groupsort = (count > 0 and ((alpha + beta * ngroups) < (count * np.log(count)))) if do_groupsort: - sorter, _ = _algos.groupsort_indexer(_ensure_int64(group_index), - ngroups) + sorter, _ = algos.groupsort_indexer(_ensure_int64(group_index), + ngroups) return _ensure_platform_int(sorter) else: return group_index.argsort(kind='mergesort') @@ -323,8 +322,8 @@ def compress_group_index(group_index, sort=True): (comp_ids) into the list of unique labels (obs_group_ids). """ - size_hint = min(len(group_index), _hash._SIZE_HINT_LIMIT) - table = _hash.Int64HashTable(size_hint) + size_hint = min(len(group_index), hashtable._SIZE_HINT_LIMIT) + table = hashtable.Int64HashTable(size_hint) group_index = _ensure_int64(group_index) @@ -348,10 +347,10 @@ def _reorder_by_uniques(uniques, labels): mask = labels < 0 # move labels to right locations (ie, unsort ascending labels) - labels = algos.take_nd(reverse_indexer, labels, allow_fill=False) + labels = algorithms.take_nd(reverse_indexer, labels, allow_fill=False) np.putmask(labels, mask, -1) # sort observed ids - uniques = algos.take_nd(uniques, sorter, allow_fill=False) + uniques = algorithms.take_nd(uniques, sorter, allow_fill=False) return uniques, labels diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 46ba48b4cd846..b5b5d58235eaa 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -18,7 +18,7 @@ from pandas.core.base import AccessorProperty, NoNewAttributesMixin from pandas.util.decorators import Appender import re -import pandas.lib as lib +import pandas._libs.lib as lib import warnings import textwrap import codecs diff --git a/pandas/core/window.py b/pandas/core/window.py index 3f9aa2b0ff392..6fda60c449f42 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -24,13 +24,14 @@ needs_i8_conversion, is_timedelta64_dtype, is_list_like, - _ensure_float64) + _ensure_float64, + is_scalar) import pandas as pd -from pandas.lib import isscalar + from pandas.core.base import (PandasObject, SelectionMixin, GroupByMixin) import pandas.core.common as com -import pandas._window as _window +import pandas.core.libwindow as _window from pandas.tseries.offsets import DateOffset from pandas import compat from pandas.compat.numpy import function as nv @@ -154,7 +155,7 @@ def _gotitem(self, key, ndim, subset=None): self = self._shallow_copy(subset) self._reset_cache() if subset.ndim == 2: - if isscalar(key) and key in subset or is_list_like(key): + if is_scalar(key) and key in subset or is_list_like(key): self._selection = key return self diff --git a/pandas/window.pyx b/pandas/core/window.pyx similarity index 99% rename from pandas/window.pyx rename to pandas/core/window.pyx index 005d42c9f68be..a06e616002ee2 100644 --- a/pandas/window.pyx +++ b/pandas/core/window.pyx @@ -58,7 +58,7 @@ from util cimport numeric from skiplist cimport * -cdef extern from "src/headers/math.h": +cdef extern from "../src/headers/math.h": double sqrt(double x) nogil int signbit(double) nogil diff --git a/pandas/formats/format.py b/pandas/formats/format.py index 622c4cd3bbcc7..d354911a825bc 100644 --- a/pandas/formats/format.py +++ b/pandas/formats/format.py @@ -33,8 +33,9 @@ from pandas.io.common import _get_handle, UnicodeWriter, _expand_user from pandas.formats.printing import adjoin, justify, pprint_thing import pandas.core.common as com -import pandas.lib as lib -from pandas.tslib import iNaT, Timestamp, Timedelta, format_array_from_datetime +import pandas._libs.lib as lib +from pandas._libs.tslib import (iNaT, Timestamp, Timedelta, + format_array_from_datetime) from pandas.tseries.index import DatetimeIndex from pandas.tseries.period import PeriodIndex import pandas as pd diff --git a/pandas/indexes/api.py b/pandas/indexes/api.py index 64992e46613e5..a38453e0d2ccc 100644 --- a/pandas/indexes/api.py +++ b/pandas/indexes/api.py @@ -8,7 +8,7 @@ from pandas.indexes.range import RangeIndex # noqa import pandas.core.common as com -import pandas.lib as lib +import pandas._libs.lib as lib # TODO: there are many places that rely on these private methods existing in # pandas.core.index diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index e441d9a88690d..607a463083fdd 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -3,12 +3,10 @@ import operator import numpy as np -import pandas.tslib as tslib -import pandas.lib as lib -import pandas._join as _join -import pandas.algos as _algos -import pandas.index as _index -from pandas.lib import Timestamp, Timedelta, is_datetime_array +from pandas._libs import (lib, index as libindex, tslib as libts, + algos as libalgos, join as libjoin, + Timestamp, Timedelta, ) +from pandas._libs.lib import is_datetime_array from pandas.compat import range, u from pandas.compat.numpy import function as nv @@ -120,11 +118,11 @@ class Index(IndexOpsMixin, StringAccessorMixin, PandasObject): _join_precedence = 1 # Cython methods - _arrmap = _algos.arrmap_object - _left_indexer_unique = _join.left_join_indexer_unique_object - _left_indexer = _join.left_join_indexer_object - _inner_indexer = _join.inner_join_indexer_object - _outer_indexer = _join.outer_join_indexer_object + _arrmap = libalgos.arrmap_object + _left_indexer_unique = libjoin.left_join_indexer_unique_object + _left_indexer = libjoin.left_join_indexer_object + _inner_indexer = libjoin.inner_join_indexer_object + _outer_indexer = libjoin.outer_join_indexer_object _box_scalars = False _typ = 'index' @@ -144,7 +142,7 @@ class Index(IndexOpsMixin, StringAccessorMixin, PandasObject): # used to infer integers as datetime-likes _infer_as_myclass = False - _engine_type = _index.ObjectEngine + _engine_type = libindex.ObjectEngine def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False, tupleize_cols=True, **kwargs): @@ -285,7 +283,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, try: return DatetimeIndex(subarr, copy=copy, name=name, **kwargs) - except tslib.OutOfBoundsDatetime: + except libts.OutOfBoundsDatetime: pass elif inferred.startswith('timedelta'): @@ -2314,7 +2312,7 @@ def get_value(self, series, key): raise try: - return tslib.get_value_box(s, key) + return libts.get_value_box(s, key) except IndexError: raise except TypeError: @@ -2972,7 +2970,6 @@ def _join_level(self, other, level, how='left', return_indexers=False, order of the data indexed by the MultiIndex will not be changed; otherwise, it will tie out with `other`. """ - from pandas.algos import groupsort_indexer from .multi import MultiIndex def _get_leaf_sorter(labels): @@ -2985,7 +2982,7 @@ def _get_leaf_sorter(labels): if len(labels) == 1: lab = _ensure_int64(labels[0]) - sorter, _ = groupsort_indexer(lab, 1 + lab.max()) + sorter, _ = libalgos.groupsort_indexer(lab, 1 + lab.max()) return sorter # find indexers of begining of each set of @@ -3051,8 +3048,9 @@ def _get_leaf_sorter(labels): else: # tie out the order with other if level == 0: # outer most level, take the fast route ngroups = 1 + new_lev_labels.max() - left_indexer, counts = groupsort_indexer(new_lev_labels, - ngroups) + left_indexer, counts = libalgos.groupsort_indexer( + new_lev_labels, ngroups) + # missing values are placed first; drop them! left_indexer = left_indexer[counts[0]:] new_labels = [lab[left_indexer] for lab in new_labels] @@ -3846,8 +3844,8 @@ def _ensure_index(index_like, copy=False): def _get_na_value(dtype): - return {np.datetime64: tslib.NaT, - np.timedelta64: tslib.NaT}.get(dtype, np.nan) + return {np.datetime64: libts.NaT, + np.timedelta64: libts.NaT}.get(dtype, np.nan) def _ensure_has_len(seq): diff --git a/pandas/indexes/category.py b/pandas/indexes/category.py index 5299a094156cd..3d8f76fc56b01 100644 --- a/pandas/indexes/category.py +++ b/pandas/indexes/category.py @@ -1,5 +1,5 @@ import numpy as np -import pandas.index as _index +from pandas._libs import index as libindex from pandas import compat from pandas.compat.numpy import function as nv @@ -45,7 +45,7 @@ class CategoricalIndex(Index, base.PandasDelegate): """ _typ = 'categoricalindex' - _engine_type = _index.Int64Engine + _engine_type = libindex.Int64Engine _attributes = ['name'] def __new__(cls, data=None, categories=None, ordered=None, dtype=None, @@ -303,7 +303,7 @@ def unique(self): False: 'first'}) @Appender(base._shared_docs['duplicated'] % _index_doc_kwargs) def duplicated(self, keep='first'): - from pandas.hashtable import duplicated_int64 + from pandas._libs.hashtable import duplicated_int64 codes = self.codes.astype('i8') return duplicated_int64(codes, keep) diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index 23a42265a149b..bca1db83b6645 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -6,9 +6,7 @@ from sys import getsizeof import numpy as np -import pandas.lib as lib -import pandas.index as _index -from pandas.lib import Timestamp +from pandas._libs import index as libindex, lib, Timestamp from pandas.compat import range, zip, lrange, lzip, map from pandas.compat.numpy import function as nv @@ -76,7 +74,7 @@ class MultiIndex(Index): _levels = FrozenList() _labels = FrozenList() _comparables = ['names'] - _engine_type = _index.MultiIndexEngine + _engine_type = libindex.MultiIndexEngine rename = Index.set_names def __new__(cls, levels=None, labels=None, sortorder=None, names=None, @@ -762,7 +760,7 @@ def f(k, stringify): @Appender(base._shared_docs['duplicated'] % _index_doc_kwargs) def duplicated(self, keep='first'): from pandas.core.sorting import get_group_index - from pandas.hashtable import duplicated_int64 + from pandas._libs.hashtable import duplicated_int64 shape = map(len, self.levels) ids = get_group_index(self.labels, shape, sort=False, xnull=False) @@ -813,7 +811,7 @@ def _try_mi(k): pass try: - return _index.get_value_at(s, k) + return libindex.get_value_at(s, k) except IndexError: raise except TypeError: diff --git a/pandas/indexes/numeric.py b/pandas/indexes/numeric.py index 00ddf5b0c918d..9bb70feb2501f 100644 --- a/pandas/indexes/numeric.py +++ b/pandas/indexes/numeric.py @@ -1,9 +1,6 @@ import numpy as np -import pandas.lib as lib -import pandas._join as _join -import pandas.algos as _algos -import pandas.index as _index - +from pandas._libs import (lib, index as libindex, + algos as libalgos, join as libjoin) from pandas.types.common import (is_dtype_equal, pandas_dtype, is_float_dtype, is_object_dtype, is_integer_dtype, is_scalar) @@ -114,16 +111,13 @@ class Int64Index(NumericIndex): __doc__ = _num_index_shared_docs['class_descr'] % _int64_descr_args _typ = 'int64index' - _arrmap = _algos.arrmap_int64 - _left_indexer_unique = _join.left_join_indexer_unique_int64 - _left_indexer = _join.left_join_indexer_int64 - _inner_indexer = _join.inner_join_indexer_int64 - _outer_indexer = _join.outer_join_indexer_int64 - + _arrmap = libalgos.arrmap_int64 + _left_indexer_unique = libjoin.left_join_indexer_unique_int64 + _left_indexer = libjoin.left_join_indexer_int64 + _inner_indexer = libjoin.inner_join_indexer_int64 + _outer_indexer = libjoin.outer_join_indexer_int64 _can_hold_na = False - - _engine_type = _index.Int64Engine - + _engine_type = libindex.Int64Engine _default_dtype = np.int64 @property @@ -175,17 +169,14 @@ class UInt64Index(NumericIndex): __doc__ = _num_index_shared_docs['class_descr'] % _uint64_descr_args _typ = 'uint64index' - _arrmap = _algos.arrmap_uint64 - _left_indexer_unique = _join.left_join_indexer_unique_uint64 - _left_indexer = _join.left_join_indexer_uint64 - _inner_indexer = _join.inner_join_indexer_uint64 - _outer_indexer = _join.outer_join_indexer_uint64 - + _arrmap = libalgos.arrmap_uint64 + _left_indexer_unique = libjoin.left_join_indexer_unique_uint64 + _left_indexer = libjoin.left_join_indexer_uint64 + _inner_indexer = libjoin.inner_join_indexer_uint64 + _outer_indexer = libjoin.outer_join_indexer_uint64 _can_hold_na = False _na_value = 0 - - _engine_type = _index.UInt64Engine - + _engine_type = libindex.UInt64Engine _default_dtype = np.uint64 @property @@ -255,12 +246,12 @@ class Float64Index(NumericIndex): __doc__ = _num_index_shared_docs['class_descr'] % _float64_descr_args _typ = 'float64index' - _engine_type = _index.Float64Engine - _arrmap = _algos.arrmap_float64 - _left_indexer_unique = _join.left_join_indexer_unique_float64 - _left_indexer = _join.left_join_indexer_float64 - _inner_indexer = _join.inner_join_indexer_float64 - _outer_indexer = _join.outer_join_indexer_float64 + _engine_type = libindex.Float64Engine + _arrmap = libalgos.arrmap_float64 + _left_indexer_unique = libjoin.left_join_indexer_unique_float64 + _left_indexer = libjoin.left_join_indexer_float64 + _inner_indexer = libjoin.inner_join_indexer_float64 + _outer_indexer = libjoin.outer_join_indexer_float64 _default_dtype = np.float64 diff --git a/pandas/indexes/range.py b/pandas/indexes/range.py index cc78361f843bf..103a3ac2fd5f4 100644 --- a/pandas/indexes/range.py +++ b/pandas/indexes/range.py @@ -2,7 +2,7 @@ import operator import numpy as np -import pandas.index as _index +from pandas._libs import index as libindex from pandas.types.common import (is_integer, is_scalar, @@ -39,7 +39,7 @@ class RangeIndex(Int64Index): """ _typ = 'rangeindex' - _engine_type = _index.Int64Engine + _engine_type = libindex.Int64Engine def __new__(cls, start=None, stop=None, step=None, name=None, dtype=None, fastpath=False, copy=False, **kwargs): diff --git a/pandas/io/api.py b/pandas/io/api.py index 1284b3cb222d6..e312e7bc2f300 100644 --- a/pandas/io/api.py +++ b/pandas/io/api.py @@ -11,7 +11,7 @@ from pandas.io.json import read_json from pandas.io.html import read_html from pandas.io.sql import read_sql, read_sql_table, read_sql_query -from pandas.io.sas.sasreader import read_sas +from pandas.io.sas import read_sas from pandas.io.feather_format import read_feather from pandas.io.stata import read_stata from pandas.io.pickle import read_pickle, to_pickle diff --git a/pandas/io/date_converters.py b/pandas/io/date_converters.py index 3ffcef4b21552..080d6c3e273a3 100644 --- a/pandas/io/date_converters.py +++ b/pandas/io/date_converters.py @@ -1,7 +1,7 @@ """This module is designed for community supported date conversion functions""" from pandas.compat import range, map import numpy as np -import pandas.lib as lib +import pandas._libs.lib as lib def parse_date_time(date_col, time_col): diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 00ec8bcf060ef..82ea2e8a46592 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -19,7 +19,7 @@ EmptyDataError, get_filepath_or_buffer, _NA_VALUES) from pandas.tseries.period import Period -from pandas import json +from pandas.io.json import libjson from pandas.compat import (map, zip, reduce, range, lrange, u, add_metaclass, string_types, OrderedDict) from pandas.core import config @@ -1450,7 +1450,7 @@ def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0, elif isinstance(cell.val, date): num_format_str = self.date_format - stylekey = json.dumps(cell.style) + stylekey = libjson.dumps(cell.style) if num_format_str: stylekey += num_format_str @@ -1578,7 +1578,7 @@ def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0, elif isinstance(cell.val, date): num_format_str = self.date_format - stylekey = json.dumps(cell.style) + stylekey = libjson.dumps(cell.style) if num_format_str: stylekey += num_format_str diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py index a00d3492e8a37..114ec4bb2723e 100644 --- a/pandas/io/json/json.py +++ b/pandas/io/json/json.py @@ -2,8 +2,8 @@ import os import numpy as np -import pandas.json as _json -from pandas.tslib import iNaT +from pandas.io.json import libjson +from pandas._libs.tslib import iNaT from pandas.compat import StringIO, long, u from pandas import compat, isnull from pandas import Series, DataFrame, to_datetime @@ -14,8 +14,8 @@ from .table_schema import build_table_schema from pandas.types.common import is_period_dtype -loads = _json.loads -dumps = _json.dumps +loads = libjson.loads +dumps = libjson.dumps TABLE_SCHEMA_VERSION = '0.20.0' diff --git a/pandas/io/json/normalize.py b/pandas/io/json/normalize.py index 0e7d025e81851..4da4a6ad57850 100644 --- a/pandas/io/json/normalize.py +++ b/pandas/io/json/normalize.py @@ -5,7 +5,7 @@ from collections import defaultdict import numpy as np -from pandas.lib import convert_json_to_lines +from pandas._libs.lib import convert_json_to_lines from pandas import compat, DataFrame diff --git a/pandas/msgpack/__init__.py b/pandas/io/msgpack/__init__.py similarity index 81% rename from pandas/msgpack/__init__.py rename to pandas/io/msgpack/__init__.py index 4d6e241171281..984e90ee03e69 100644 --- a/pandas/msgpack/__init__.py +++ b/pandas/io/msgpack/__init__.py @@ -2,8 +2,8 @@ from collections import namedtuple -from pandas.msgpack.exceptions import * # noqa -from pandas.msgpack._version import version # noqa +from pandas.io.msgpack.exceptions import * # noqa +from pandas.io.msgpack._version import version # noqa class ExtType(namedtuple('ExtType', 'code data')): @@ -19,8 +19,8 @@ def __new__(cls, code, data): import os # noqa -from pandas.msgpack._packer import Packer # noqa -from pandas.msgpack._unpacker import unpack, unpackb, Unpacker # noqa +from pandas.io.msgpack._packer import Packer # noqa +from pandas.io.msgpack._unpacker import unpack, unpackb, Unpacker # noqa def pack(o, stream, **kwargs): diff --git a/pandas/msgpack/_packer.pyx b/pandas/io/msgpack/_packer.pyx similarity index 98% rename from pandas/msgpack/_packer.pyx rename to pandas/io/msgpack/_packer.pyx index 008dbe5541d50..ad7ce1fb2531a 100644 --- a/pandas/msgpack/_packer.pyx +++ b/pandas/io/msgpack/_packer.pyx @@ -6,11 +6,11 @@ from libc.stdlib cimport * from libc.string cimport * from libc.limits cimport * -from pandas.msgpack.exceptions import PackValueError -from pandas.msgpack import ExtType +from pandas.io.msgpack.exceptions import PackValueError +from pandas.io.msgpack import ExtType -cdef extern from "../src/msgpack/pack.h": +cdef extern from "../../src/msgpack/pack.h": struct msgpack_packer: char* buf size_t length diff --git a/pandas/msgpack/_unpacker.pyx b/pandas/io/msgpack/_unpacker.pyx similarity index 98% rename from pandas/msgpack/_unpacker.pyx rename to pandas/io/msgpack/_unpacker.pyx index 6f23a24adde6c..504bfed48df3c 100644 --- a/pandas/msgpack/_unpacker.pyx +++ b/pandas/io/msgpack/_unpacker.pyx @@ -11,12 +11,12 @@ from libc.stdlib cimport * from libc.string cimport * from libc.limits cimport * -from pandas.msgpack.exceptions import (BufferFull, OutOfData, - UnpackValueError, ExtraData) -from pandas.msgpack import ExtType +from pandas.io.msgpack.exceptions import (BufferFull, OutOfData, + UnpackValueError, ExtraData) +from pandas.io.msgpack import ExtType -cdef extern from "../src/msgpack/unpack.h": +cdef extern from "../../src/msgpack/unpack.h": ctypedef struct msgpack_user: bint use_list PyObject* object_hook diff --git a/pandas/msgpack/_version.py b/pandas/io/msgpack/_version.py similarity index 100% rename from pandas/msgpack/_version.py rename to pandas/io/msgpack/_version.py diff --git a/pandas/msgpack/exceptions.py b/pandas/io/msgpack/exceptions.py similarity index 100% rename from pandas/msgpack/exceptions.py rename to pandas/io/msgpack/exceptions.py diff --git a/pandas/io/packers.py b/pandas/io/packers.py index 39bc1a4ecf225..404be758a7fbe 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -55,7 +55,7 @@ Index, MultiIndex, Float64Index, Int64Index, Panel, RangeIndex, PeriodIndex, DatetimeIndex, NaT, Categorical, CategoricalIndex) -from pandas.tslib import NaTType +from pandas._libs.tslib import NaTType from pandas.sparse.api import SparseSeries, SparseDataFrame from pandas.sparse.array import BlockIndex, IntIndex from pandas.core.generic import NDFrame @@ -64,7 +64,7 @@ from pandas.core.internals import BlockManager, make_block, _safe_reshape import pandas.core.internals as internals -from pandas.msgpack import Unpacker as _Unpacker, Packer as _Packer, ExtType +from pandas.io.msgpack import Unpacker as _Unpacker, Packer as _Packer, ExtType from pandas.util._move import ( BadMove as _BadMove, move_into_mutable_buffer as _move_into_mutable_buffer, diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 811844ec35deb..9aedddc811830 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -36,8 +36,8 @@ from pandas.util.decorators import Appender -import pandas.lib as lib -import pandas.parser as _parser +import pandas._libs.lib as lib +import pandas.io.libparsers as libparsers # BOM character (byte order mark) @@ -1415,7 +1415,7 @@ def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False, if issubclass(cvals.dtype.type, np.integer) and self.compact_ints: cvals = lib.downcast_int64( - cvals, _parser.na_values, + cvals, libparsers.na_values, self.use_unsigned) result[c] = cvals @@ -1533,7 +1533,7 @@ def __init__(self, src, **kwds): # #2442 kwds['allow_leading_cols'] = self.index_col is not False - self._reader = _parser.TextReader(src, **kwds) + self._reader = libparsers.TextReader(src, **kwds) # XXX self.usecols, self.usecols_dtype = _validate_usecols_arg( diff --git a/pandas/parser.pyx b/pandas/io/parsers.pyx similarity index 99% rename from pandas/parser.pyx rename to pandas/io/parsers.pyx index 23aee860b3108..a5858accbb6f5 100644 --- a/pandas/parser.pyx +++ b/pandas/io/parsers.pyx @@ -13,11 +13,12 @@ from cpython cimport (PyObject, PyBytes_FromString, PyUnicode_Check, PyUnicode_AsUTF8String, PyErr_Occurred, PyErr_Fetch) from cpython.ref cimport PyObject, Py_XDECREF -from io.common import ParserError, DtypeWarning, EmptyDataError, ParserWarning +from pandas.io.common import (ParserError, DtypeWarning, + EmptyDataError, ParserWarning) # Import CParserError as alias of ParserError for backwards compatibility. # Ultimately, we want to remove this import. See gh-12665 and gh-14479. -from io.common import CParserError +from pandas.io.common import CParserError cdef extern from "Python.h": object PyUnicode_FromString(char *v) @@ -36,7 +37,7 @@ from numpy cimport ndarray, uint8_t, uint64_t import numpy as np cimport util -import pandas.lib as lib +import pandas._libs.lib as lib import pandas.compat as compat from pandas.types.common import (is_categorical_dtype, CategoricalDtype, is_integer_dtype, is_float_dtype, diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 9ad53db305b59..72efc47a3c744 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -44,9 +44,7 @@ from pandas.core.config import get_option from pandas.computation.pytables import Expr, maybe_expression -import pandas.lib as lib -import pandas.algos as algos -import pandas.tslib as tslib +from pandas._libs import tslib, algos, lib from distutils.version import LooseVersion diff --git a/pandas/io/sas/__init__.py b/pandas/io/sas/__init__.py index e69de29bb2d1d..fa6b29a1a3fcc 100644 --- a/pandas/io/sas/__init__.py +++ b/pandas/io/sas/__init__.py @@ -0,0 +1 @@ +from .sasreader import read_sas # noqa diff --git a/pandas/io/sas/saslib.pyx b/pandas/io/sas/sas.pyx similarity index 100% rename from pandas/io/sas/saslib.pyx rename to pandas/io/sas/sas.pyx diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py index 91f417abc0502..d33cee2c5a1bc 100644 --- a/pandas/io/sas/sas7bdat.py +++ b/pandas/io/sas/sas7bdat.py @@ -20,7 +20,7 @@ import numpy as np import struct import pandas.io.sas.sas_constants as const -from pandas.io.sas.saslib import Parser +from pandas.io.sas.libsas import Parser class _subheader_pointer(object): diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 2ab642b3af0c7..b210baedaaf6d 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -11,7 +11,7 @@ import re import numpy as np -import pandas.lib as lib +import pandas._libs.lib as lib from pandas.types.missing import isnull from pandas.types.dtypes import DatetimeTZDtype from pandas.types.common import (is_list_like, is_dict_like, diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 1698ade4c0102..af4bc6a6b7ddb 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -30,8 +30,8 @@ import pandas as pd from pandas.io.common import get_filepath_or_buffer, BaseIterator -from pandas.lib import max_len_string_array, infer_dtype -from pandas.tslib import NaT, Timestamp +from pandas._libs.lib import max_len_string_array, infer_dtype +from pandas._libs.tslib import NaT, Timestamp _version_error = ("Version of given Stata file is not 104, 105, 108, " "111 (Stata 7SE), 113 (Stata 8/9), 114 (Stata 10/11), " diff --git a/pandas/json.py b/pandas/json.py new file mode 100644 index 0000000000000..5b1e395fa4b74 --- /dev/null +++ b/pandas/json.py @@ -0,0 +1,7 @@ +# flake8: noqa + +import warnings +warnings.warn("The pandas.json module is deprecated and will be " + "removed in a future version. Please import from " + "the pandas.io.json instead", FutureWarning, stacklevel=2) +from pandas.io.json.libjson import dumps, loads diff --git a/pandas/lib.py b/pandas/lib.py new file mode 100644 index 0000000000000..6c26627a97de3 --- /dev/null +++ b/pandas/lib.py @@ -0,0 +1,7 @@ +# flake8: noqa + +import warnings +warnings.warn("The pandas.lib module is deprecated and will be " + "removed in a future version. Please import from " + "the pandas._libs.lib instead", FutureWarning, stacklevel=2) +from pandas._libs.lib import * diff --git a/pandas/parser.py b/pandas/parser.py new file mode 100644 index 0000000000000..af203c3df8cc9 --- /dev/null +++ b/pandas/parser.py @@ -0,0 +1,8 @@ +# flake8: noqa + +import warnings +warnings.warn("The pandas.parser module is deprecated and will be " + "removed in a future version. Please import from " + "the pandas.io.parser instead", FutureWarning, stacklevel=2) +from pandas.io.libparsers import na_values +from pandas.io.common import CParserError diff --git a/pandas/sparse/array.py b/pandas/sparse/array.py index c65e0dd5c9f7b..762b6d869eae0 100644 --- a/pandas/sparse/array.py +++ b/pandas/sparse/array.py @@ -25,9 +25,9 @@ _astype_nansafe, _find_common_type) from pandas.types.missing import isnull, notnull, na_value_for_dtype -from pandas._sparse import SparseIndex, BlockIndex, IntIndex -import pandas._sparse as splib -import pandas.index as _index +from pandas.sparse import libsparse as splib +from pandas.sparse.libsparse import SparseIndex, BlockIndex, IntIndex +from pandas._libs import index as libindex import pandas.core.algorithms as algos import pandas.core.ops as ops import pandas.formats.printing as printing @@ -447,7 +447,7 @@ def _get_val_at(self, loc): if sp_loc == -1: return self.fill_value else: - return _index.get_value_at(self, sp_loc) + return libindex.get_value_at(self, sp_loc) @Appender(_index_shared_docs['take'] % _sparray_doc_kwargs) def take(self, indices, axis=0, allow_fill=True, diff --git a/pandas/sparse/list.py b/pandas/sparse/list.py index d294e65bbf10c..54ebf5e51045d 100644 --- a/pandas/sparse/list.py +++ b/pandas/sparse/list.py @@ -6,7 +6,7 @@ from pandas.types.common import is_scalar from pandas.sparse.array import SparseArray from pandas.util.validators import validate_bool_kwarg -import pandas._sparse as splib +import pandas.sparse.libsparse as splib class SparseList(PandasObject): diff --git a/pandas/sparse/series.py b/pandas/sparse/series.py index c3dd089e8409a..7ec42f02c3998 100644 --- a/pandas/sparse/series.py +++ b/pandas/sparse/series.py @@ -20,13 +20,13 @@ from pandas.core import generic import pandas.core.common as com import pandas.core.ops as ops -import pandas.index as _index +import pandas._libs.index as _index from pandas.util.decorators import Appender from pandas.sparse.array import (make_sparse, _sparse_array_op, SparseArray, _make_index) -from pandas._sparse import BlockIndex, IntIndex -import pandas._sparse as splib +from pandas.sparse.libsparse import BlockIndex, IntIndex +import pandas.sparse.libsparse as splib from pandas.sparse.scipy_sparse import (_sparse_series_to_coo, _coo_to_sparse_series) diff --git a/pandas/src/sparse.pyx b/pandas/sparse/sparse.pyx similarity index 100% rename from pandas/src/sparse.pyx rename to pandas/sparse/sparse.pyx diff --git a/pandas/src/sparse_op_helper.pxi.in b/pandas/sparse/sparse_op_helper.pxi.in similarity index 100% rename from pandas/src/sparse_op_helper.pxi.in rename to pandas/sparse/sparse_op_helper.pxi.in diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 2f8ebc4cc1df4..db92210478182 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- +from warnings import catch_warnings import numpy as np import pandas as pd @@ -33,16 +34,12 @@ class TestPDApi(Base, tm.TestCase): # top-level sub-packages lib = ['api', 'compat', 'computation', 'core', 'indexes', 'formats', 'pandas', - 'test', 'tools', 'tseries', + 'test', 'tools', 'tseries', 'sparse', 'types', 'util', 'options', 'io'] - # top-level packages that are c-imports, should rename to _* - # to avoid naming conflicts - lib_to_rename = ['algos', 'hashtable', 'tslib', 'msgpack', 'sparse', - 'json', 'lib', 'index', 'parser'] - # these are already deprecated; awaiting removal - deprecated_modules = ['stats', 'datetools'] + deprecated_modules = ['stats', 'datetools', 'parser', + 'json', 'lib', 'tslib'] # misc misc = ['IndexSlice', 'NaT'] @@ -113,7 +110,7 @@ class TestPDApi(Base, tm.TestCase): def test_api(self): self.check(pd, - self.lib + self.lib_to_rename + self.misc + + self.lib + self.misc + self.modules + self.deprecated_modules + self.classes + self.deprecated_classes + self.deprecated_classes_in_future + @@ -206,7 +203,7 @@ def test_removed_from_core_common(self): self.assertRaises(AttributeError, lambda: getattr(com, t)) -class TestDatetools(tm.TestCase): +class TestDatetoolsDeprecation(tm.TestCase): def test_deprecation_access_func(self): with tm.assert_produces_warning(FutureWarning, @@ -247,3 +244,36 @@ def test_groupby(self): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): pd.groupby(pd.Series([1, 2, 3]), [1, 1, 1]) + + +class TestJson(tm.TestCase): + + def test_deprecation_access_func(self): + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + pd.json.dumps([]) + + +class TestParser(tm.TestCase): + + def test_deprecation_access_func(self): + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + pd.parser.na_values + + +class TestLib(tm.TestCase): + + def test_deprecation_access_func(self): + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + pd.lib.infer_dtype + + +class TestTSLib(tm.TestCase): + + def test_deprecation_access_func(self): + # some libraries may be imported before we + # test and could show the warning + with catch_warnings(record=True): + pd.tslib.Timestamp diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index b42f79fe5009b..ed6006440441e 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -28,7 +28,7 @@ import pandas.computation.expr as expr import pandas.util.testing as tm -import pandas.lib as lib +import pandas._libs.lib as lib from pandas.util.testing import (assert_frame_equal, randbool, assertRaisesRegexp, assert_numpy_array_equal, assert_produces_warning, assert_series_equal, diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 76eb61bd81110..ba7e45d7e66fb 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -23,7 +23,7 @@ from pandas.core.common import PandasError import pandas as pd import pandas.core.common as com -import pandas.lib as lib +import pandas._libs.lib as lib import pandas.util.testing as tm from pandas.tests.frame.common import TestData diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index 36c39ffba70b3..f0dfc4553886b 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -18,6 +18,7 @@ date_range) import pandas as pd +from pandas._libs.tslib import iNaT from pandas.tseries.offsets import BDay from pandas.types.common import (is_float_dtype, is_integer, @@ -1491,8 +1492,7 @@ def test_setitem_single_column_mixed_datetime(self): assert_series_equal(result, expected) # set an allowable datetime64 type - from pandas import tslib - df.loc['b', 'timestamp'] = tslib.iNaT + df.loc['b', 'timestamp'] = iNaT self.assertTrue(isnull(df.loc['b', 'timestamp'])) # allow this syntax diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index 471fc536a90f6..e49dfffc48803 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -8,7 +8,7 @@ import numpy as np from pandas.compat import (lmap, range, lrange, StringIO, u) -from pandas.parser import ParserError +from pandas.io.common import ParserError from pandas import (DataFrame, Index, Series, MultiIndex, Timestamp, date_range, read_csv, compat, to_datetime) import pandas as pd diff --git a/pandas/tests/groupby/test_bin_groupby.py b/pandas/tests/groupby/test_bin_groupby.py index 51a10f4141ab5..77c5bde332cff 100644 --- a/pandas/tests/groupby/test_bin_groupby.py +++ b/pandas/tests/groupby/test_bin_groupby.py @@ -7,8 +7,8 @@ from pandas import Index, isnull from pandas.util.testing import assert_almost_equal import pandas.util.testing as tm -import pandas.lib as lib -import pandas.algos as algos +import pandas._libs.lib as lib +import pandas._libs.algos as algos def test_series_grouper(): diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index 2d21eab5822fe..4acf9dd4755f4 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -6,6 +6,7 @@ from pandas import Series, DataFrame, Timestamp, MultiIndex, concat, date_range from pandas.types.common import _ensure_platform_int, is_timedelta64_dtype from pandas.compat import StringIO +from pandas._libs import algos from .common import MixIn, assert_fp_equal from pandas.util.testing import assert_frame_equal, assert_series_equal @@ -417,8 +418,8 @@ def test_cython_group_transform_algos(self): dtypes = [np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint32, np.uint64, np.float32, np.float64] - ops = [(pd.algos.group_cumprod_float64, np.cumproduct, [np.float64]), - (pd.algos.group_cumsum, np.cumsum, dtypes)] + ops = [(algos.group_cumprod_float64, np.cumproduct, [np.float64]), + (algos.group_cumsum, np.cumsum, dtypes)] is_datetimelike = False for pd_op, np_op, dtypes in ops: @@ -436,13 +437,13 @@ def test_cython_group_transform_algos(self): data = np.array([[1], [2], [3], [np.nan], [4]], dtype='float64') actual = np.zeros_like(data) actual.fill(np.nan) - pd.algos.group_cumprod_float64(actual, data, labels, is_datetimelike) + algos.group_cumprod_float64(actual, data, labels, is_datetimelike) expected = np.array([1, 2, 6, np.nan, 24], dtype='float64') self.assert_numpy_array_equal(actual[:, 0], expected) actual = np.zeros_like(data) actual.fill(np.nan) - pd.algos.group_cumsum(actual, data, labels, is_datetimelike) + algos.group_cumsum(actual, data, labels, is_datetimelike) expected = np.array([1, 3, 6, np.nan, 10], dtype='float64') self.assert_numpy_array_equal(actual[:, 0], expected) @@ -450,8 +451,8 @@ def test_cython_group_transform_algos(self): is_datetimelike = True data = np.array([np.timedelta64(1, 'ns')] * 5, dtype='m8[ns]')[:, None] actual = np.zeros_like(data, dtype='int64') - pd.algos.group_cumsum(actual, data.view('int64'), labels, - is_datetimelike) + algos.group_cumsum(actual, data.view('int64'), labels, + is_datetimelike) expected = np.array([np.timedelta64(1, 'ns'), np.timedelta64( 2, 'ns'), np.timedelta64(3, 'ns'), np.timedelta64(4, 'ns'), np.timedelta64(5, 'ns')]) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 7b39a33266ffa..3581f894e53a3 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -10,6 +10,7 @@ TimedeltaIndex, PeriodIndex, notnull, isnull) from pandas.types.common import needs_i8_conversion from pandas.util.testing import assertRaisesRegexp +from pandas._libs.tslib import iNaT import pandas.util.testing as tm @@ -322,7 +323,7 @@ def test_get_unique_index(self): if needs_i8_conversion(ind): vals = ind.asi8[[0] * 5] - vals[0] = pd.tslib.iNaT + vals[0] = iNaT else: vals = ind.values[[0] * 5] vals[0] = np.nan @@ -407,7 +408,7 @@ def test_numpy_argsort(self): # pandas compatibility input validation - the # rest already perform separate (or no) such # validation via their 'values' attribute as - # defined in pandas/indexes/base.py - they + # defined in pandas.indexes/base.py - they # cannot be changed at the moment due to # backwards compatibility concerns if isinstance(type(ind), (CategoricalIndex, RangeIndex)): @@ -836,7 +837,7 @@ def test_hasnans_isnans(self): if len(index) == 0: continue elif isinstance(index, pd.tseries.base.DatetimeIndexOpsMixin): - values[1] = pd.tslib.iNaT + values[1] = iNaT elif isinstance(index, (Int64Index, UInt64Index)): continue else: @@ -876,7 +877,7 @@ def test_fillna(self): values = idx.values if isinstance(index, pd.tseries.base.DatetimeIndexOpsMixin): - values[1] = pd.tslib.iNaT + values[1] = iNaT elif isinstance(index, (Int64Index, UInt64Index)): continue else: diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index 772d76305cff2..16881de6e8c39 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -2,9 +2,10 @@ from datetime import timedelta import pandas as pd -from pandas import tslib, offsets, lib +from pandas import offsets import pandas.util.testing as tm -from pandas.tslib import OutOfBoundsDatetime +from pandas._libs import tslib, lib +from pandas._libs.tslib import OutOfBoundsDatetime from pandas import (DatetimeIndex, Index, Timestamp, datetime, date_range, to_datetime) diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index 80664ce246bf8..67e82e5c71d75 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -350,7 +350,7 @@ def test_range_tz_dateutil(self): # GH 2906 tm._skip_if_no_dateutil() # Use maybe_get_tz to fix filename in tz under dateutil. - from pandas.tslib import maybe_get_tz + from pandas._libs.tslib import maybe_get_tz tz = lambda x: maybe_get_tz('dateutil/' + x) start = datetime(2011, 1, 1, tzinfo=tz('US/Eastern')) diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index 2c87c48bcda11..78c37f773547a 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -117,7 +117,7 @@ def test_reindex_preserves_tz_if_target_is_empty_list_or_array(self): def test_time_loc(self): # GH8667 from datetime import time - from pandas.index import _SIZE_CUTOFF + from pandas._libs.index import _SIZE_CUTOFF ns = _SIZE_CUTOFF + np.array([-100, 100], dtype=np.int64) key = time(15, 11, 30) diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index 312017eef3446..4abc282252559 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -5,7 +5,7 @@ from itertools import product import pandas as pd -import pandas.tslib as tslib +import pandas._libs.tslib as tslib import pandas.util.testing as tm from pandas.core.common import PerformanceWarning from pandas.tseries.index import cdate_range diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index 8d05a4016ba45..a1ad147f84aff 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -326,7 +326,7 @@ def test_month_range_union_tz_pytz(self): def test_month_range_union_tz_dateutil(self): tm._skip_if_windows_python_3() tm._skip_if_no_dateutil() - from pandas.tslib import _dateutil_gettz as timezone + from pandas._libs.tslib import _dateutil_gettz as timezone tz = timezone('US/Eastern') early_start = datetime(2011, 1, 1) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 1b67ffce63b10..512a3e1c38629 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -9,7 +9,7 @@ from distutils.version import LooseVersion import pandas as pd -from pandas import tslib +from pandas._libs import tslib, lib from pandas.tseries import tools from pandas.tseries.tools import normalize_date from pandas.compat import lmap @@ -19,7 +19,7 @@ from pandas.util.testing import assert_series_equal, _skip_if_has_locale from pandas import (isnull, to_datetime, Timestamp, Series, DataFrame, Index, DatetimeIndex, NaT, date_range, bdate_range, - compat, lib) + compat) class TimeConversionFormats(tm.TestCase): diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py index 8d9e26406defc..ff83b50a2a7b2 100644 --- a/pandas/tests/indexes/period/test_indexing.py +++ b/pandas/tests/indexes/period/test_indexing.py @@ -4,8 +4,9 @@ import pandas as pd from pandas.util import testing as tm from pandas.compat import lrange +from pandas._libs import tslib from pandas import (PeriodIndex, Series, DatetimeIndex, - period_range, Period, tslib, _np_version_under1p9) + period_range, Period, _np_version_under1p9) class TestGetItem(tm.TestCase): diff --git a/pandas/tests/indexes/period/test_ops.py b/pandas/tests/indexes/period/test_ops.py index 82a881d7c65bc..4533428cf1514 100644 --- a/pandas/tests/indexes/period/test_ops.py +++ b/pandas/tests/indexes/period/test_ops.py @@ -2,7 +2,7 @@ from datetime import timedelta import pandas as pd -import pandas.tslib as tslib +import pandas._libs.tslib as tslib import pandas.util.testing as tm import pandas.tseries.period as period from pandas import (DatetimeIndex, PeriodIndex, period_range, Series, Period, diff --git a/pandas/tests/indexes/period/test_tools.py b/pandas/tests/indexes/period/test_tools.py index e09d405afd375..f9a1df3d824f1 100644 --- a/pandas/tests/indexes/period/test_tools.py +++ b/pandas/tests/indexes/period/test_tools.py @@ -6,7 +6,7 @@ import pandas.tseries.period as period from pandas.compat import lrange from pandas.tseries.frequencies import get_freq, MONTHS -from pandas._period import period_ordinal, period_asfreq +from pandas._libs.period import period_ordinal, period_asfreq from pandas import (PeriodIndex, Period, DatetimeIndex, Timestamp, Series, date_range, to_datetime, period_range) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 79d10cbda565e..8c0a399cb58b3 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -24,7 +24,7 @@ from pandas.tseries.index import _to_m8 import pandas as pd -from pandas.lib import Timestamp +from pandas._libs.lib import Timestamp class TestIndex(Base, tm.TestCase): diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 80ff67ab3d043..f67231e78983c 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -17,7 +17,8 @@ from pandas.compat import PY3, long, lrange, lzip, range, u from pandas.core.common import PerformanceWarning, UnsortedIndexError from pandas.indexes.base import InvalidIndexError -from pandas.lib import Timestamp +from pandas._libs import lib +from pandas._libs.lib import Timestamp import pandas.util.testing as tm @@ -851,7 +852,7 @@ def test_from_product_invalid_input(self): def test_from_product_datetimeindex(self): dt_index = date_range('2000-01-01', periods=2) mi = pd.MultiIndex.from_product([[1, 2], dt_index]) - etalon = pd.lib.list_to_object_array([(1, pd.Timestamp( + etalon = lib.list_to_object_array([(1, pd.Timestamp( '2000-01-01')), (1, pd.Timestamp('2000-01-02')), (2, pd.Timestamp( '2000-01-01')), (2, pd.Timestamp('2000-01-02'))]) tm.assert_numpy_array_equal(mi.values, etalon) @@ -878,7 +879,7 @@ def test_values_boxed(self): (3, pd.Timestamp('2000-01-03'))] mi = pd.MultiIndex.from_tuples(tuples) tm.assert_numpy_array_equal(mi.values, - pd.lib.list_to_object_array(tuples)) + lib.list_to_object_array(tuples)) # Check that code branches for boxed values produce identical results tm.assert_numpy_array_equal(mi.values[:4], mi[:4].values) @@ -2181,7 +2182,7 @@ def check(nlevels, with_nulls): for keep in ['first', 'last', False]: left = mi.duplicated(keep=keep) - right = pd.hashtable.duplicated_object(mi.values, keep=keep) + right = pd._libs.hashtable.duplicated_object(mi.values, keep=keep) tm.assert_numpy_array_equal(left, right) # GH5873 diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 1bf9a10628542..e23e7c19ed799 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -11,7 +11,7 @@ import pandas.util.testing as tm import pandas as pd -from pandas.lib import Timestamp +from pandas._libs.lib import Timestamp from pandas.tests.indexes.common import Base diff --git a/pandas/tests/indexes/timedeltas/test_construction.py b/pandas/tests/indexes/timedeltas/test_construction.py index 0810b13eb0f53..9a3dd1c6bca71 100644 --- a/pandas/tests/indexes/timedeltas/test_construction.py +++ b/pandas/tests/indexes/timedeltas/test_construction.py @@ -3,9 +3,7 @@ import pandas as pd import pandas.util.testing as tm -from pandas import TimedeltaIndex, timedelta_range, tslib, to_timedelta - -iNaT = tslib.iNaT +from pandas import TimedeltaIndex, timedelta_range, to_timedelta class TestTimedeltaIndex(tm.TestCase): diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index 406a5bdbf3bcd..8c7b88a9cf2ca 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -8,8 +8,8 @@ from pandas.util.testing import assert_series_equal, assert_frame_equal from pandas import (Series, Timedelta, DataFrame, Timestamp, TimedeltaIndex, timedelta_range, date_range, DatetimeIndex, Int64Index, - _np_version_under1p10, Float64Index, Index, tslib) - + _np_version_under1p10, Float64Index, Index) +from pandas._libs.tslib import iNaT from pandas.tests.test_base import Ops @@ -772,7 +772,7 @@ def test_nat_new(self): tm.assert_index_equal(result, exp) result = idx._nat_new(box=False) - exp = np.array([tslib.iNaT] * 5, dtype=np.int64) + exp = np.array([iNaT] * 5, dtype=np.int64) tm.assert_numpy_array_equal(result, exp) def test_shift(self): diff --git a/pandas/tests/indexes/timedeltas/test_tools.py b/pandas/tests/indexes/timedeltas/test_tools.py index 2442051547312..ade9366c7e994 100644 --- a/pandas/tests/indexes/timedeltas/test_tools.py +++ b/pandas/tests/indexes/timedeltas/test_tools.py @@ -4,8 +4,9 @@ import pandas as pd import pandas.util.testing as tm from pandas.util.testing import assert_series_equal -from pandas import (Series, Timedelta, to_timedelta, tslib, isnull, +from pandas import (Series, Timedelta, to_timedelta, isnull, TimedeltaIndex) +from pandas._libs.tslib import iNaT class TestTimedeltas(tm.TestCase): @@ -26,7 +27,7 @@ def conv(v): # empty string result = to_timedelta('', box=False) - self.assertEqual(result.astype('int64'), tslib.iNaT) + self.assertEqual(result.astype('int64'), iNaT) result = to_timedelta(['', '']) self.assertTrue(isnull(result).all()) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index f7a4af711bbb8..4502e0171dfbe 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -9,7 +9,7 @@ is_float_dtype, is_scalar) from pandas.compat import range, lrange, lzip, StringIO, lmap -from pandas.tslib import NaT +from pandas._libs.tslib import NaT from numpy import nan from numpy.random import randn import numpy as np diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index c298b3841096c..7dbcf25c60b45 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -637,13 +637,14 @@ def test_convert_dates(self): def test_convert_dates_infer(self): # GH10747 + from pandas.io.json import dumps infer_words = ['trade_time', 'date', 'datetime', 'sold_at', 'modified', 'timestamp', 'timestamps'] for infer_word in infer_words: data = [{'id': 1, infer_word: 1036713600000}, {'id': 2}] expected = DataFrame([[1, Timestamp('2002-11-08')], [2, pd.NaT]], columns=['id', infer_word]) - result = read_json(pd.json.dumps(data))[['id', infer_word]] + result = read_json(dumps(data))[['id', infer_word]] assert_frame_equal(result, expected) def test_date_format_frame(self): @@ -910,50 +911,53 @@ def test_sparse(self): self.assertEqual(expected, ss.to_json()) def test_tz_is_utc(self): + from pandas.io.json import dumps exp = '"2013-01-10T05:00:00.000Z"' ts = Timestamp('2013-01-10 05:00:00Z') - self.assertEqual(exp, pd.json.dumps(ts, iso_dates=True)) + self.assertEqual(exp, dumps(ts, iso_dates=True)) dt = ts.to_pydatetime() - self.assertEqual(exp, pd.json.dumps(dt, iso_dates=True)) + self.assertEqual(exp, dumps(dt, iso_dates=True)) ts = Timestamp('2013-01-10 00:00:00', tz='US/Eastern') - self.assertEqual(exp, pd.json.dumps(ts, iso_dates=True)) + self.assertEqual(exp, dumps(ts, iso_dates=True)) dt = ts.to_pydatetime() - self.assertEqual(exp, pd.json.dumps(dt, iso_dates=True)) + self.assertEqual(exp, dumps(dt, iso_dates=True)) ts = Timestamp('2013-01-10 00:00:00-0500') - self.assertEqual(exp, pd.json.dumps(ts, iso_dates=True)) + self.assertEqual(exp, dumps(ts, iso_dates=True)) dt = ts.to_pydatetime() - self.assertEqual(exp, pd.json.dumps(dt, iso_dates=True)) + self.assertEqual(exp, dumps(dt, iso_dates=True)) def test_tz_range_is_utc(self): + from pandas.io.json import dumps + exp = '["2013-01-01T05:00:00.000Z","2013-01-02T05:00:00.000Z"]' dfexp = ('{"DT":{' '"0":"2013-01-01T05:00:00.000Z",' '"1":"2013-01-02T05:00:00.000Z"}}') tz_range = pd.date_range('2013-01-01 05:00:00Z', periods=2) - self.assertEqual(exp, pd.json.dumps(tz_range, iso_dates=True)) + self.assertEqual(exp, dumps(tz_range, iso_dates=True)) dti = pd.DatetimeIndex(tz_range) - self.assertEqual(exp, pd.json.dumps(dti, iso_dates=True)) + self.assertEqual(exp, dumps(dti, iso_dates=True)) df = DataFrame({'DT': dti}) - self.assertEqual(dfexp, pd.json.dumps(df, iso_dates=True)) + self.assertEqual(dfexp, dumps(df, iso_dates=True)) tz_range = pd.date_range('2013-01-01 00:00:00', periods=2, tz='US/Eastern') - self.assertEqual(exp, pd.json.dumps(tz_range, iso_dates=True)) + self.assertEqual(exp, dumps(tz_range, iso_dates=True)) dti = pd.DatetimeIndex(tz_range) - self.assertEqual(exp, pd.json.dumps(dti, iso_dates=True)) + self.assertEqual(exp, dumps(dti, iso_dates=True)) df = DataFrame({'DT': dti}) - self.assertEqual(dfexp, pd.json.dumps(df, iso_dates=True)) + self.assertEqual(dfexp, dumps(df, iso_dates=True)) tz_range = pd.date_range('2013-01-01 00:00:00-0500', periods=2) - self.assertEqual(exp, pd.json.dumps(tz_range, iso_dates=True)) + self.assertEqual(exp, dumps(tz_range, iso_dates=True)) dti = pd.DatetimeIndex(tz_range) - self.assertEqual(exp, pd.json.dumps(dti, iso_dates=True)) + self.assertEqual(exp, dumps(dti, iso_dates=True)) df = DataFrame({'DT': dti}) - self.assertEqual(dfexp, pd.json.dumps(df, iso_dates=True)) + self.assertEqual(dfexp, dumps(df, iso_dates=True)) def test_read_jsonl(self): # GH9180 diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py index 6a986710ae444..e66721beed288 100644 --- a/pandas/tests/io/json/test_ujson.py +++ b/pandas/tests/io/json/test_ujson.py @@ -17,7 +17,7 @@ import decimal from functools import partial from pandas.compat import range, zip, StringIO, u -import pandas.json as ujson +import pandas.io.json.libjson as ujson import pandas.compat as compat import numpy as np @@ -400,7 +400,7 @@ def test_npy_nat(self): assert ujson.encode(input) == 'null', "Expected null" def test_datetime_units(self): - from pandas.lib import Timestamp + from pandas._libs.lib import Timestamp val = datetime.datetime(2013, 8, 17, 21, 17, 12, 215504) stamp = Timestamp(val) diff --git a/pandas/tests/msgpack/__init__.py b/pandas/tests/io/msgpack/__init__.py similarity index 100% rename from pandas/tests/msgpack/__init__.py rename to pandas/tests/io/msgpack/__init__.py diff --git a/pandas/tests/msgpack/test_buffer.py b/pandas/tests/io/msgpack/test_buffer.py similarity index 90% rename from pandas/tests/msgpack/test_buffer.py rename to pandas/tests/io/msgpack/test_buffer.py index caaa22bfd08fc..5a2dc3dba5dfa 100644 --- a/pandas/tests/msgpack/test_buffer.py +++ b/pandas/tests/io/msgpack/test_buffer.py @@ -1,6 +1,6 @@ # coding: utf-8 -from pandas.msgpack import packb, unpackb +from pandas.io.msgpack import packb, unpackb def test_unpack_buffer(): diff --git a/pandas/tests/msgpack/test_case.py b/pandas/tests/io/msgpack/test_case.py similarity index 98% rename from pandas/tests/msgpack/test_case.py rename to pandas/tests/io/msgpack/test_case.py index a8a45b5b37eb0..3927693a94dd8 100644 --- a/pandas/tests/msgpack/test_case.py +++ b/pandas/tests/io/msgpack/test_case.py @@ -1,6 +1,6 @@ # coding: utf-8 -from pandas.msgpack import packb, unpackb +from pandas.io.msgpack import packb, unpackb def check(length, obj): diff --git a/pandas/tests/msgpack/test_except.py b/pandas/tests/io/msgpack/test_except.py similarity index 96% rename from pandas/tests/msgpack/test_except.py rename to pandas/tests/io/msgpack/test_except.py index 76b91bb375bbc..4bcef3607bfa4 100644 --- a/pandas/tests/msgpack/test_except.py +++ b/pandas/tests/io/msgpack/test_except.py @@ -1,7 +1,7 @@ # coding: utf-8 import unittest -from pandas.msgpack import packb, unpackb +from pandas.io.msgpack import packb, unpackb class DummyException(Exception): diff --git a/pandas/tests/msgpack/test_extension.py b/pandas/tests/io/msgpack/test_extension.py similarity index 96% rename from pandas/tests/msgpack/test_extension.py rename to pandas/tests/io/msgpack/test_extension.py index 97f0962a753d9..a5a111efbb835 100644 --- a/pandas/tests/msgpack/test_extension.py +++ b/pandas/tests/io/msgpack/test_extension.py @@ -1,7 +1,7 @@ from __future__ import print_function import array -import pandas.msgpack as msgpack -from pandas.msgpack import ExtType +import pandas.io.msgpack as msgpack +from pandas.io.msgpack import ExtType def test_pack_ext_type(): diff --git a/pandas/tests/msgpack/test_format.py b/pandas/tests/io/msgpack/test_format.py similarity index 98% rename from pandas/tests/msgpack/test_format.py rename to pandas/tests/io/msgpack/test_format.py index a4b309ebb657d..3659602e1381f 100644 --- a/pandas/tests/msgpack/test_format.py +++ b/pandas/tests/io/msgpack/test_format.py @@ -1,6 +1,6 @@ # coding: utf-8 -from pandas.msgpack import unpackb +from pandas.io.msgpack import unpackb def check(src, should, use_list=0): diff --git a/pandas/tests/msgpack/test_limits.py b/pandas/tests/io/msgpack/test_limits.py similarity index 97% rename from pandas/tests/msgpack/test_limits.py rename to pandas/tests/io/msgpack/test_limits.py index 9c08f328b90dd..a908ee3547634 100644 --- a/pandas/tests/msgpack/test_limits.py +++ b/pandas/tests/io/msgpack/test_limits.py @@ -3,7 +3,7 @@ unicode_literals) import pandas.util.testing as tm -from pandas.msgpack import packb, unpackb, Packer, Unpacker, ExtType +from pandas.io.msgpack import packb, unpackb, Packer, Unpacker, ExtType class TestLimits(tm.TestCase): diff --git a/pandas/tests/msgpack/test_newspec.py b/pandas/tests/io/msgpack/test_newspec.py similarity index 97% rename from pandas/tests/msgpack/test_newspec.py rename to pandas/tests/io/msgpack/test_newspec.py index 4eb9a0425c57b..783bfc1b364f8 100644 --- a/pandas/tests/msgpack/test_newspec.py +++ b/pandas/tests/io/msgpack/test_newspec.py @@ -1,6 +1,6 @@ # coding: utf-8 -from pandas.msgpack import packb, unpackb, ExtType +from pandas.io.msgpack import packb, unpackb, ExtType def test_str8(): diff --git a/pandas/tests/msgpack/test_obj.py b/pandas/tests/io/msgpack/test_obj.py similarity index 98% rename from pandas/tests/msgpack/test_obj.py rename to pandas/tests/io/msgpack/test_obj.py index bcc76929fe8f8..b067dacb84494 100644 --- a/pandas/tests/msgpack/test_obj.py +++ b/pandas/tests/io/msgpack/test_obj.py @@ -1,7 +1,7 @@ # coding: utf-8 import unittest -from pandas.msgpack import packb, unpackb +from pandas.io.msgpack import packb, unpackb class DecodeError(Exception): diff --git a/pandas/tests/msgpack/test_pack.py b/pandas/tests/io/msgpack/test_pack.py similarity index 98% rename from pandas/tests/msgpack/test_pack.py rename to pandas/tests/io/msgpack/test_pack.py index 005352691d908..6f9a271cbd326 100644 --- a/pandas/tests/msgpack/test_pack.py +++ b/pandas/tests/io/msgpack/test_pack.py @@ -5,7 +5,7 @@ import struct from pandas import compat from pandas.compat import u, OrderedDict -from pandas.msgpack import packb, unpackb, Unpacker, Packer +from pandas.io.msgpack import packb, unpackb, Unpacker, Packer class TestPack(unittest.TestCase): diff --git a/pandas/tests/msgpack/test_read_size.py b/pandas/tests/io/msgpack/test_read_size.py similarity index 96% rename from pandas/tests/msgpack/test_read_size.py rename to pandas/tests/io/msgpack/test_read_size.py index 965e97a7007de..ef521fa345637 100644 --- a/pandas/tests/msgpack/test_read_size.py +++ b/pandas/tests/io/msgpack/test_read_size.py @@ -1,5 +1,5 @@ """Test Unpacker's read_array_header and read_map_header methods""" -from pandas.msgpack import packb, Unpacker, OutOfData +from pandas.io.msgpack import packb, Unpacker, OutOfData UnexpectedTypeException = ValueError diff --git a/pandas/tests/msgpack/test_seq.py b/pandas/tests/io/msgpack/test_seq.py similarity index 96% rename from pandas/tests/msgpack/test_seq.py rename to pandas/tests/io/msgpack/test_seq.py index 927c2622419a6..5f203e8997ccb 100644 --- a/pandas/tests/msgpack/test_seq.py +++ b/pandas/tests/io/msgpack/test_seq.py @@ -1,7 +1,7 @@ # coding: utf-8 import io -import pandas.msgpack as msgpack +import pandas.io.msgpack as msgpack binarydata = bytes(bytearray(range(256))) diff --git a/pandas/tests/msgpack/test_sequnpack.py b/pandas/tests/io/msgpack/test_sequnpack.py similarity index 97% rename from pandas/tests/msgpack/test_sequnpack.py rename to pandas/tests/io/msgpack/test_sequnpack.py index fe089ccda1c7f..c9c979c4e0e44 100644 --- a/pandas/tests/msgpack/test_sequnpack.py +++ b/pandas/tests/io/msgpack/test_sequnpack.py @@ -3,8 +3,8 @@ import unittest from pandas import compat -from pandas.msgpack import Unpacker, BufferFull -from pandas.msgpack import OutOfData +from pandas.io.msgpack import Unpacker, BufferFull +from pandas.io.msgpack import OutOfData class TestPack(unittest.TestCase): diff --git a/pandas/tests/msgpack/test_subtype.py b/pandas/tests/io/msgpack/test_subtype.py similarity index 90% rename from pandas/tests/msgpack/test_subtype.py rename to pandas/tests/io/msgpack/test_subtype.py index d6dd72c4d9850..e27ec66c63e1f 100644 --- a/pandas/tests/msgpack/test_subtype.py +++ b/pandas/tests/io/msgpack/test_subtype.py @@ -1,6 +1,6 @@ # coding: utf-8 -from pandas.msgpack import packb +from pandas.io.msgpack import packb from collections import namedtuple diff --git a/pandas/tests/msgpack/test_unpack.py b/pandas/tests/io/msgpack/test_unpack.py similarity index 96% rename from pandas/tests/msgpack/test_unpack.py rename to pandas/tests/io/msgpack/test_unpack.py index ae8227ab276fb..24a8e885d19d6 100644 --- a/pandas/tests/msgpack/test_unpack.py +++ b/pandas/tests/io/msgpack/test_unpack.py @@ -1,6 +1,6 @@ from io import BytesIO import sys -from pandas.msgpack import Unpacker, packb, OutOfData, ExtType +from pandas.io.msgpack import Unpacker, packb, OutOfData, ExtType import pandas.util.testing as tm import pytest diff --git a/pandas/tests/msgpack/test_unpack_raw.py b/pandas/tests/io/msgpack/test_unpack_raw.py similarity index 94% rename from pandas/tests/msgpack/test_unpack_raw.py rename to pandas/tests/io/msgpack/test_unpack_raw.py index c6bf747c8d992..a261bf4cbbcd7 100644 --- a/pandas/tests/msgpack/test_unpack_raw.py +++ b/pandas/tests/io/msgpack/test_unpack_raw.py @@ -1,7 +1,7 @@ """Tests for cases where the user seeks to obtain packed msgpack objects""" import io -from pandas.msgpack import Unpacker, packb +from pandas.io.msgpack import Unpacker, packb def test_write_bytes(): diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py index b667eed346355..df75d14e9702d 100644 --- a/pandas/tests/io/parser/common.py +++ b/pandas/tests/io/parser/common.py @@ -11,7 +11,7 @@ import pytest import numpy as np -from pandas.lib import Timestamp +from pandas._libs.lib import Timestamp import pandas as pd import pandas.util.testing as tm diff --git a/pandas/tests/io/parser/converters.py b/pandas/tests/io/parser/converters.py index 859d2e19bd56a..2659d977ea747 100644 --- a/pandas/tests/io/parser/converters.py +++ b/pandas/tests/io/parser/converters.py @@ -13,7 +13,7 @@ import pandas as pd import pandas.util.testing as tm -from pandas.lib import Timestamp +from pandas._libs.lib import Timestamp from pandas import DataFrame, Index from pandas.compat import parse_date, StringIO, lmap diff --git a/pandas/tests/io/parser/parse_dates.py b/pandas/tests/io/parser/parse_dates.py index b1960159bb41d..4cba9276a9d1e 100644 --- a/pandas/tests/io/parser/parse_dates.py +++ b/pandas/tests/io/parser/parse_dates.py @@ -10,8 +10,8 @@ import pytest import numpy as np -import pandas.lib as lib -from pandas.lib import Timestamp +import pandas._libs.lib as lib +from pandas._libs.lib import Timestamp import pandas as pd import pandas.io.parsers as parsers diff --git a/pandas/tests/io/parser/test_textreader.py b/pandas/tests/io/parser/test_textreader.py index 0e91ca806e8fe..b6a9900b0b087 100644 --- a/pandas/tests/io/parser/test_textreader.py +++ b/pandas/tests/io/parser/test_textreader.py @@ -20,8 +20,8 @@ import pandas.util.testing as tm -from pandas.parser import TextReader -import pandas.parser as parser +from pandas.io.libparsers import TextReader +import pandas.io.libparsers as parser class TestTextReader(tm.TestCase): diff --git a/pandas/tests/io/parser/usecols.py b/pandas/tests/io/parser/usecols.py index 95df077dae997..0cf642983e8d3 100644 --- a/pandas/tests/io/parser/usecols.py +++ b/pandas/tests/io/parser/usecols.py @@ -11,7 +11,7 @@ import pandas.util.testing as tm from pandas import DataFrame, Index -from pandas.lib import Timestamp +from pandas._libs.lib import Timestamp from pandas.compat import StringIO diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index 232e68a87f16e..c1a2a4545a6f9 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -23,7 +23,7 @@ is_platform_windows) from pandas.io.common import URLError, urlopen, file_path_to_url from pandas.io.html import read_html -from pandas.parser import ParserError +from pandas.io.libparsers import ParserError import pandas.util.testing as tm from pandas.util.testing import makeCustomDataframe as mkdf, network diff --git a/pandas/tests/io/test_packers.py b/pandas/tests/io/test_packers.py index 251c6ae8b4dec..efa8587d64657 100644 --- a/pandas/tests/io/test_packers.py +++ b/pandas/tests/io/test_packers.py @@ -22,7 +22,8 @@ from pandas.tests.test_panel import assert_panel_equal import pandas -from pandas import Timestamp, NaT, tslib +from pandas import Timestamp, NaT +from pandas._libs.tslib import iNaT nan = np.nan @@ -373,7 +374,7 @@ def setUp(self): s.name = 'object' self.d['object'] = s - s = Series(tslib.iNaT, dtype='M8[ns]', index=range(5)) + s = Series(iNaT, dtype='M8[ns]', index=range(5)) self.d['date'] = s data = { diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 9f1dea2094bc6..5592c564e51df 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -5282,7 +5282,7 @@ def test_append_with_timezones_dateutil(self): # use maybe_get_tz instead of dateutil.tz.gettz to handle the windows # filename issues. - from pandas.tslib import maybe_get_tz + from pandas._libs.tslib import maybe_get_tz gettz = lambda x: maybe_get_tz('dateutil/' + x) # as columns diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index ae09e671dbca3..5188adf54b887 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -19,7 +19,7 @@ from pandas.io.parsers import read_csv from pandas.io.stata import (read_stata, StataReader, InvalidColumnName, PossiblePrecisionLoss, StataMissingValue) -from pandas.tslib import NaT +from pandas._libs.tslib import NaT from pandas.types.common import is_categorical_dtype diff --git a/pandas/tests/scalar/test_period.py b/pandas/tests/scalar/test_period.py index 49aa44492fe81..3128e90695324 100644 --- a/pandas/tests/scalar/test_period.py +++ b/pandas/tests/scalar/test_period.py @@ -6,7 +6,9 @@ import pandas.tseries.period as period from pandas.compat import text_type, iteritems from pandas.compat.numpy import np_datetime64_compat -from pandas import Period, Timestamp, tslib, offsets, _period + +from pandas._libs import tslib, period as libperiod +from pandas import Period, Timestamp, offsets from pandas.tseries.frequencies import DAYS, MONTHS @@ -256,8 +258,8 @@ def test_timestamp_tz_arg(self): self.assertEqual(p.tz, exp.tz) def test_timestamp_tz_arg_dateutil(self): - from pandas.tslib import _dateutil_gettz as gettz - from pandas.tslib import maybe_get_tz + from pandas._libs.tslib import _dateutil_gettz as gettz + from pandas._libs.tslib import maybe_get_tz for case in ['dateutil/Europe/Brussels', 'dateutil/Asia/Tokyo', 'dateutil/US/Pacific']: p = Period('1/1/2005', freq='M').to_timestamp( @@ -275,7 +277,7 @@ def test_timestamp_tz_arg_dateutil(self): self.assertEqual(p.tz, exp.tz) def test_timestamp_tz_arg_dateutil_from_string(self): - from pandas.tslib import _dateutil_gettz as gettz + from pandas._libs.tslib import _dateutil_gettz as gettz p = Period('1/1/2005', freq='M').to_timestamp(tz='dateutil/Europe/Brussels') self.assertEqual(p.tz, gettz('Europe/Brussels')) @@ -939,10 +941,10 @@ def test_round_trip(self): class TestPeriodField(tm.TestCase): def test_get_period_field_raises_on_out_of_range(self): - self.assertRaises(ValueError, _period.get_period_field, -1, 0, 0) + self.assertRaises(ValueError, libperiod.get_period_field, -1, 0, 0) def test_get_period_field_array_raises_on_out_of_range(self): - self.assertRaises(ValueError, _period.get_period_field_arr, -1, + self.assertRaises(ValueError, libperiod.get_period_field_arr, -1, np.empty(1), 0) diff --git a/pandas/tests/scalar/test_timedelta.py b/pandas/tests/scalar/test_timedelta.py index c5a828bf2e912..7c5caa9506ca2 100644 --- a/pandas/tests/scalar/test_timedelta.py +++ b/pandas/tests/scalar/test_timedelta.py @@ -6,9 +6,8 @@ import pandas.util.testing as tm from pandas.tseries.timedeltas import _coerce_scalar_to_timedelta_type as ct from pandas import (Timedelta, TimedeltaIndex, timedelta_range, Series, - to_timedelta, tslib, compat, isnull) - -iNaT = tslib.iNaT + to_timedelta, compat, isnull) +from pandas._libs.tslib import iNaT, NaTType class TestTimedeltas(tm.TestCase): @@ -301,9 +300,9 @@ def check(value): def test_nat_converters(self): self.assertEqual(to_timedelta( - 'nat', box=False).astype('int64'), tslib.iNaT) + 'nat', box=False).astype('int64'), iNaT) self.assertEqual(to_timedelta( - 'nan', box=False).astype('int64'), tslib.iNaT) + 'nan', box=False).astype('int64'), iNaT) def testit(unit, transform): @@ -589,7 +588,7 @@ def test_implementation_limits(self): # Beyond lower limit, a NAT before the Overflow self.assertIsInstance(min_td - Timedelta(1, 'ns'), - pd.tslib.NaTType) + NaTType) with tm.assertRaises(OverflowError): min_td - Timedelta(2, 'ns') @@ -599,7 +598,7 @@ def test_implementation_limits(self): # Same tests using the internal nanosecond values td = Timedelta(min_td.value - 1, 'ns') - self.assertIsInstance(td, pd.tslib.NaTType) + self.assertIsInstance(td, NaTType) with tm.assertRaises(OverflowError): Timedelta(min_td.value - 2, 'ns') diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py index bbcdce922f58a..d5d92dcf96eab 100644 --- a/pandas/tests/scalar/test_timestamp.py +++ b/pandas/tests/scalar/test_timestamp.py @@ -9,13 +9,15 @@ import pandas as pd import pandas.util.testing as tm -import pandas._period as period + from pandas.tseries import offsets, frequencies -from pandas.tslib import get_timezone, iNaT +from pandas._libs import tslib, period +from pandas._libs.tslib import get_timezone, iNaT + from pandas.compat import lrange, long from pandas.util.testing import assert_series_equal from pandas.compat.numpy import np_datetime64_compat -from pandas import (Timestamp, date_range, Period, Timedelta, tslib, compat, +from pandas import (Timestamp, date_range, Period, Timedelta, compat, Series, NaT, isnull, DataFrame, DatetimeIndex) from pandas.tseries.frequencies import (RESO_DAY, RESO_HR, RESO_MIN, RESO_US, RESO_MS, RESO_SEC) @@ -1482,7 +1484,7 @@ def test_timestamp_to_datetime_explicit_pytz(self): def test_timestamp_to_datetime_explicit_dateutil(self): tm._skip_if_windows_python_3() tm._skip_if_no_dateutil() - from pandas.tslib import _dateutil_gettz as gettz + from pandas._libs.tslib import _dateutil_gettz as gettz rng = date_range('20090415', '20090519', tz=gettz('US/Eastern')) stamp = rng[0] diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index c15171f331df3..24e4355fa9f9a 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -14,7 +14,8 @@ from pandas.core.index import MultiIndex from pandas.tseries.index import Timestamp, DatetimeIndex -from pandas import lib, tslib +from pandas._libs import lib +from pandas._libs.tslib import iNaT from pandas.compat import lrange, range, zip, OrderedDict, long from pandas import compat @@ -200,14 +201,14 @@ def test_constructor_maskedarray(self): data = ma.masked_all((3, ), dtype='M8[ns]') result = Series(data) - expected = Series([tslib.iNaT, tslib.iNaT, tslib.iNaT], dtype='M8[ns]') + expected = Series([iNaT, iNaT, iNaT], dtype='M8[ns]') assert_series_equal(result, expected) data[0] = datetime(2001, 1, 1) data[2] = datetime(2001, 1, 3) index = ['a', 'b', 'c'] result = Series(data, index=index) - expected = Series([datetime(2001, 1, 1), tslib.iNaT, + expected = Series([datetime(2001, 1, 1), iNaT, datetime(2001, 1, 3)], index=index, dtype='M8[ns]') assert_series_equal(result, expected) @@ -327,20 +328,19 @@ def test_constructor_datelike_coercion(self): self.assertTrue(result.dtype == object) def test_constructor_dtype_datetime64(self): - import pandas.tslib as tslib - s = Series(tslib.iNaT, dtype='M8[ns]', index=lrange(5)) + s = Series(iNaT, dtype='M8[ns]', index=lrange(5)) self.assertTrue(isnull(s).all()) # in theory this should be all nulls, but since # we are not specifying a dtype is ambiguous - s = Series(tslib.iNaT, index=lrange(5)) + s = Series(iNaT, index=lrange(5)) self.assertFalse(isnull(s).all()) s = Series(nan, dtype='M8[ns]', index=lrange(5)) self.assertTrue(isnull(s).all()) - s = Series([datetime(2001, 1, 2, 0, 0), tslib.iNaT], dtype='M8[ns]') + s = Series([datetime(2001, 1, 2, 0, 0), iNaT], dtype='M8[ns]') self.assertTrue(isnull(s[1])) self.assertEqual(s.dtype, 'M8[ns]') @@ -732,8 +732,7 @@ def test_constructor_dtype_timedelta64(self): self.assertEqual(td.dtype, 'timedelta64[ns]') # mixed with NaT - from pandas import tslib - td = Series([timedelta(days=1), tslib.NaT], dtype='m8[ns]') + td = Series([timedelta(days=1), NaT], dtype='m8[ns]') self.assertEqual(td.dtype, 'timedelta64[ns]') td = Series([timedelta(days=1), np.nan], dtype='m8[ns]') @@ -744,11 +743,11 @@ def test_constructor_dtype_timedelta64(self): # improved inference # GH5689 - td = Series([np.timedelta64(300000000), pd.NaT]) + td = Series([np.timedelta64(300000000), NaT]) self.assertEqual(td.dtype, 'timedelta64[ns]') # because iNaT is int, not coerced to timedelta - td = Series([np.timedelta64(300000000), tslib.iNaT]) + td = Series([np.timedelta64(300000000), iNaT]) self.assertEqual(td.dtype, 'object') td = Series([np.timedelta64(300000000), np.nan]) @@ -791,7 +790,7 @@ def f(): self.assertEqual(s.dtype, 'timedelta64[ns]') def test_NaT_scalar(self): - series = Series([0, 1000, 2000, tslib.iNaT], dtype='M8[ns]') + series = Series([0, 1000, 2000, iNaT], dtype='M8[ns]') val = series[3] self.assertTrue(isnull(val)) diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index 13375ab886d8d..a2aaff25516ae 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -62,7 +62,7 @@ def test_astype_cast_object_int(self): self.assert_series_equal(result, Series(np.arange(1, 5))) def test_astype_datetimes(self): - import pandas.tslib as tslib + import pandas._libs.tslib as tslib s = Series(tslib.iNaT, dtype='M8[ns]', index=lrange(5)) s = s.astype('O') diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index bb77550e01f11..9d93d9f01b161 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -7,14 +7,14 @@ import numpy as np import pandas as pd -import pandas.index as _index +import pandas._libs.index as _index from pandas.types.common import is_integer, is_scalar from pandas import (Index, Series, DataFrame, isnull, date_range, NaT, MultiIndex, Timestamp, DatetimeIndex, Timedelta) from pandas.core.indexing import IndexingError from pandas.tseries.offsets import BDay -from pandas import lib, tslib +from pandas._libs import tslib, lib from pandas.compat import lrange, range from pandas import compat @@ -375,7 +375,7 @@ def test_getitem_setitem_datetime_tz_pytz(self): def test_getitem_setitem_datetime_tz_dateutil(self): tm._skip_if_no_dateutil() from dateutil.tz import tzutc - from pandas.tslib import _dateutil_gettz as gettz + from pandas._libs.tslib import _dateutil_gettz as gettz tz = lambda x: tzutc() if x == 'UTC' else gettz( x) # handle special case for utc in dateutil diff --git a/pandas/tests/series/test_internals.py b/pandas/tests/series/test_internals.py index a3b13ba9b993a..4b1c303200739 100644 --- a/pandas/tests/series/test_internals.py +++ b/pandas/tests/series/test_internals.py @@ -8,7 +8,7 @@ from pandas import Series from pandas.tseries.index import Timestamp -import pandas.lib as lib +import pandas._libs.lib as lib from pandas.util.testing import assert_series_equal import pandas.util.testing as tm diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 23eb6a40f5f1d..87cfcf32229b4 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -9,9 +9,9 @@ import pandas as pd from pandas import (Series, DataFrame, isnull, date_range, - MultiIndex, Index, Timestamp) + MultiIndex, Index, Timestamp, NaT) from pandas.compat import range -from pandas import tslib +from pandas._libs.tslib import iNaT from pandas.util.testing import assert_series_equal, assert_frame_equal import pandas.util.testing as tm @@ -69,9 +69,8 @@ def test_timedelta_fillna(self): timedelta(days=1, seconds=9 * 3600 + 60 + 1)]) assert_series_equal(result, expected) - from pandas import tslib - result = td.fillna(tslib.NaT) - expected = Series([tslib.NaT, timedelta(0), timedelta(1), + result = td.fillna(NaT) + expected = Series([NaT, timedelta(0), timedelta(1), timedelta(days=1, seconds=9 * 3600 + 60 + 1)], dtype='m8[ns]') assert_series_equal(result, expected) @@ -102,8 +101,7 @@ def test_datetime64_fillna(self): '20130101'), Timestamp('20130104'), Timestamp('20130103 9:01:01')]) assert_series_equal(result, expected) - from pandas import tslib - result = s.fillna(tslib.NaT) + result = s.fillna(NaT) expected = s assert_series_equal(result, expected) @@ -303,7 +301,7 @@ def test_fillna_raise(self): s.fillna(1, limit=limit, method=method) def test_fillna_nat(self): - series = Series([0, 1, 2, tslib.iNaT], dtype='M8[ns]') + series = Series([0, 1, 2, iNaT], dtype='M8[ns]') filled = series.fillna(method='pad') filled2 = series.fillna(value=series.values[2]) @@ -321,7 +319,7 @@ def test_fillna_nat(self): assert_frame_equal(filled, expected) assert_frame_equal(filled2, expected) - series = Series([tslib.iNaT, 0, 1, 2], dtype='M8[ns]') + series = Series([iNaT, 0, 1, 2], dtype='M8[ns]') filled = series.fillna(method='bfill') filled2 = series.fillna(value=series[1]) @@ -460,26 +458,25 @@ def test_bfill(self): def test_timedelta64_nan(self): - from pandas import tslib td = Series([timedelta(days=i) for i in range(10)]) # nan ops on timedeltas td1 = td.copy() td1[0] = np.nan self.assertTrue(isnull(td1[0])) - self.assertEqual(td1[0].value, tslib.iNaT) + self.assertEqual(td1[0].value, iNaT) td1[0] = td[0] self.assertFalse(isnull(td1[0])) - td1[1] = tslib.iNaT + td1[1] = iNaT self.assertTrue(isnull(td1[1])) - self.assertEqual(td1[1].value, tslib.iNaT) + self.assertEqual(td1[1].value, iNaT) td1[1] = td[1] self.assertFalse(isnull(td1[1])) - td1[2] = tslib.NaT + td1[2] = NaT self.assertTrue(isnull(td1[2])) - self.assertEqual(td1[2].value, tslib.iNaT) + self.assertEqual(td1[2].value, iNaT) td1[2] = td[2] self.assertFalse(isnull(td1[2])) diff --git a/pandas/tests/series/test_replace.py b/pandas/tests/series/test_replace.py index 7fe31bab87537..0acd03316339e 100644 --- a/pandas/tests/series/test_replace.py +++ b/pandas/tests/series/test_replace.py @@ -3,7 +3,7 @@ import numpy as np import pandas as pd -import pandas.lib as lib +import pandas._libs.lib as lib import pandas.util.testing as tm from .common import TestData diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index d384460c3d030..ce7d5a573bfab 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -6,7 +6,7 @@ import pandas as pd import pandas.util.testing as tm -from pandas.tslib import iNaT +from pandas._libs.tslib import iNaT from pandas.compat import lrange, StringIO, product from pandas.tseries.tdi import TimedeltaIndex from pandas.tseries.index import DatetimeIndex diff --git a/pandas/tests/sparse/test_array.py b/pandas/tests/sparse/test_array.py index 70aaea5b5b1f0..15531cecfe79b 100644 --- a/pandas/tests/sparse/test_array.py +++ b/pandas/tests/sparse/test_array.py @@ -8,7 +8,7 @@ from pandas import _np_version_under1p8 from pandas.sparse.api import SparseArray, SparseSeries -from pandas._sparse import IntIndex +from pandas.sparse.libsparse import IntIndex from pandas.util.testing import assert_almost_equal, assertRaisesRegexp import pandas.util.testing as tm diff --git a/pandas/tests/sparse/test_frame.py b/pandas/tests/sparse/test_frame.py index b2283364a1631..a7dd7f2e81033 100644 --- a/pandas/tests/sparse/test_frame.py +++ b/pandas/tests/sparse/test_frame.py @@ -14,7 +14,7 @@ from pandas import compat import pandas.sparse.frame as spf -from pandas._sparse import BlockIndex, IntIndex +from pandas.sparse.libsparse import BlockIndex, IntIndex from pandas.sparse.api import SparseSeries, SparseDataFrame, SparseArray from pandas.tests.frame.test_misc_api import SharedWithSparse diff --git a/pandas/tests/sparse/test_libsparse.py b/pandas/tests/sparse/test_libsparse.py index 0435b732911da..b6ab99dc66cda 100644 --- a/pandas/tests/sparse/test_libsparse.py +++ b/pandas/tests/sparse/test_libsparse.py @@ -8,7 +8,7 @@ from pandas import compat from pandas.sparse.array import IntIndex, BlockIndex, _make_index -import pandas._sparse as splib +import pandas.sparse.libsparse as splib TEST_LENGTH = 20 diff --git a/pandas/tests/sparse/test_series.py b/pandas/tests/sparse/test_series.py index de6636162ff05..8aa85a5b7f396 100644 --- a/pandas/tests/sparse/test_series.py +++ b/pandas/tests/sparse/test_series.py @@ -16,7 +16,7 @@ import pandas.sparse.frame as spf -from pandas._sparse import BlockIndex, IntIndex +from pandas.sparse.libsparse import BlockIndex, IntIndex from pandas.sparse.api import SparseSeries from pandas.tests.series.test_misc_api import SharedWithSparse diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index fab04f7fa4bf2..7a3cc3e2c3cd7 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -10,11 +10,11 @@ import pandas as pd from pandas import compat -import pandas.algos as _algos +from pandas._libs import algos as libalgos, hashtable +from pandas._libs.hashtable import unique_label_indices from pandas.compat import lrange import pandas.core.algorithms as algos import pandas.util.testing as tm -import pandas.hashtable as hashtable from pandas.compat.numpy import np_array_datetime64_compat from pandas.util.testing import assert_almost_equal @@ -972,7 +972,6 @@ def test_quantile(): def test_unique_label_indices(): - from pandas.hashtable import unique_label_indices a = np.random.randint(1, 1 << 10, 1 << 15).astype('i8') @@ -998,7 +997,7 @@ def test_scipy_compat(self): def _check(arr): mask = ~np.isfinite(arr) arr = arr.copy() - result = _algos.rank_1d_float64(arr) + result = libalgos.rank_1d_float64(arr) arr[mask] = np.inf exp = rankdata(arr) exp[mask] = nan @@ -1034,26 +1033,26 @@ def test_pad_backfill_object_segfault(): old = np.array([], dtype='O') new = np.array([datetime(2010, 12, 31)], dtype='O') - result = _algos.pad_object(old, new) + result = libalgos.pad_object(old, new) expected = np.array([-1], dtype=np.int64) assert (np.array_equal(result, expected)) - result = _algos.pad_object(new, old) + result = libalgos.pad_object(new, old) expected = np.array([], dtype=np.int64) assert (np.array_equal(result, expected)) - result = _algos.backfill_object(old, new) + result = libalgos.backfill_object(old, new) expected = np.array([-1], dtype=np.int64) assert (np.array_equal(result, expected)) - result = _algos.backfill_object(new, old) + result = libalgos.backfill_object(new, old) expected = np.array([], dtype=np.int64) assert (np.array_equal(result, expected)) def test_arrmap(): values = np.array(['foo', 'foo', 'bar', 'bar', 'baz', 'qux'], dtype='O') - result = _algos.arrmap_object(values, lambda x: x in ['foo', 'bar']) + result = libalgos.arrmap_object(values, lambda x: x in ['foo', 'bar']) assert (result.dtype == np.bool_) @@ -1078,7 +1077,7 @@ def test_backfill(self): old = Index([1, 5, 10]) new = Index(lrange(12)) - filler = _algos.backfill_int64(old.values, new.values) + filler = libalgos.backfill_int64(old.values, new.values) expect_filler = np.array([0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, -1], dtype=np.int64) @@ -1087,7 +1086,7 @@ def test_backfill(self): # corner case old = Index([1, 4]) new = Index(lrange(5, 10)) - filler = _algos.backfill_int64(old.values, new.values) + filler = libalgos.backfill_int64(old.values, new.values) expect_filler = np.array([-1, -1, -1, -1, -1], dtype=np.int64) self.assert_numpy_array_equal(filler, expect_filler) @@ -1096,7 +1095,7 @@ def test_pad(self): old = Index([1, 5, 10]) new = Index(lrange(12)) - filler = _algos.pad_int64(old.values, new.values) + filler = libalgos.pad_int64(old.values, new.values) expect_filler = np.array([-1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2], dtype=np.int64) @@ -1105,7 +1104,7 @@ def test_pad(self): # corner case old = Index([5, 10]) new = Index(lrange(5)) - filler = _algos.pad_int64(old.values, new.values) + filler = libalgos.pad_int64(old.values, new.values) expect_filler = np.array([-1, -1, -1, -1, -1], dtype=np.int64) self.assert_numpy_array_equal(filler, expect_filler) @@ -1137,7 +1136,7 @@ def test_is_lexsorted(): 6, 5, 4, 3, 2, 1, 0])] - assert (not _algos.is_lexsorted(failure)) + assert (not libalgos.is_lexsorted(failure)) # def test_get_group_index(): # a = np.array([0, 1, 2, 0, 2, 1, 0, 0], dtype=np.int64) @@ -1153,7 +1152,7 @@ def test_groupsort_indexer(): a = np.random.randint(0, 1000, 100).astype(np.int64) b = np.random.randint(0, 1000, 100).astype(np.int64) - result = _algos.groupsort_indexer(a, 1000)[0] + result = libalgos.groupsort_indexer(a, 1000)[0] # need to use a stable sort expected = np.argsort(a, kind='mergesort') @@ -1161,7 +1160,7 @@ def test_groupsort_indexer(): # compare with lexsort key = a * 1000 + b - result = _algos.groupsort_indexer(key, 1000000)[0] + result = libalgos.groupsort_indexer(key, 1000000)[0] expected = np.lexsort((b, a)) assert (np.array_equal(result, expected)) @@ -1172,8 +1171,8 @@ def test_infinity_sort(): # itself. Instead, let's give our infinities a self-consistent # ordering, but outside the float extended real line. - Inf = _algos.Infinity() - NegInf = _algos.NegInfinity() + Inf = libalgos.Infinity() + NegInf = libalgos.NegInfinity() ref_nums = [NegInf, float("-inf"), -1e100, 0, 1e100, float("inf"), Inf] @@ -1191,14 +1190,14 @@ def test_infinity_sort(): assert sorted(perm) == ref_nums # smoke tests - np.array([_algos.Infinity()] * 32).argsort() - np.array([_algos.NegInfinity()] * 32).argsort() + np.array([libalgos.Infinity()] * 32).argsort() + np.array([libalgos.NegInfinity()] * 32).argsort() def test_ensure_platform_int(): arr = np.arange(100, dtype=np.intp) - result = _algos.ensure_platform_int(arr) + result = libalgos.ensure_platform_int(arr) assert (result is arr) diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 8264ad33950f9..1d4dddf6477df 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -18,6 +18,7 @@ from pandas.compat.numpy import np_array_datetime64_compat from pandas.core.base import PandasDelegate, NoNewAttributesMixin from pandas.tseries.base import DatetimeIndexOpsMixin +from pandas._libs.tslib import iNaT class CheckStringMixin(object): @@ -451,15 +452,15 @@ def test_value_counts_unique_nunique_null(self): if is_datetimetz(o): if isinstance(o, DatetimeIndex): v = o.asi8 - v[0:2] = pd.tslib.iNaT + v[0:2] = iNaT values = o._shallow_copy(v) else: o = o.copy() - o[0:2] = pd.tslib.iNaT + o[0:2] = iNaT values = o._values elif needs_i8_conversion(o): - values[0:2] = pd.tslib.iNaT + values[0:2] = iNaT values = o._shallow_copy(values) else: values[0:2] = null_obj diff --git a/pandas/tests/test_internals.py b/pandas/tests/test_internals.py index f086935df6dc8..5ab2bbc4ac6ba 100644 --- a/pandas/tests/test_internals.py +++ b/pandas/tests/test_internals.py @@ -17,7 +17,7 @@ import pandas.core.algorithms as algos import pandas.util.testing as tm import pandas as pd -from pandas import lib +from pandas._libs import lib from pandas.util.testing import (assert_almost_equal, assert_frame_equal, randn, assert_series_equal) from pandas.compat import zip, u diff --git a/pandas/tests/test_join.py b/pandas/tests/test_join.py index 2a16d7663b0cf..6723494d1529b 100644 --- a/pandas/tests/test_join.py +++ b/pandas/tests/test_join.py @@ -3,7 +3,7 @@ import numpy as np from pandas import Index -import pandas._join as _join +from pandas._libs import join as _join import pandas.util.testing as tm from pandas.util.testing import assert_almost_equal diff --git a/pandas/tests/test_lib.py b/pandas/tests/test_lib.py index 2381c52ef14b6..a925cf13900e9 100644 --- a/pandas/tests/test_lib.py +++ b/pandas/tests/test_lib.py @@ -2,7 +2,7 @@ import numpy as np import pandas as pd -import pandas.lib as lib +import pandas._libs.lib as lib import pandas.util.testing as tm diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index c809b39bb566e..d1b7fdadce6ae 100755 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -20,7 +20,7 @@ cart_product, zip) import pandas as pd -import pandas.index as _index +import pandas._libs.index as _index class TestMultiLevel(tm.TestCase): diff --git a/pandas/tests/test_take.py b/pandas/tests/test_take.py index 3aed22c140ffe..0bc1d0dcd0532 100644 --- a/pandas/tests/test_take.py +++ b/pandas/tests/test_take.py @@ -6,7 +6,7 @@ from pandas.compat import long import pandas.core.algorithms as algos import pandas.util.testing as tm -from pandas.tslib import iNaT +from pandas._libs.tslib import iNaT class TestTake(tm.TestCase): diff --git a/pandas/tests/tools/test_join.py b/pandas/tests/tools/test_join.py index ee6b3d57b852d..b65f800802bca 100644 --- a/pandas/tests/tools/test_join.py +++ b/pandas/tests/tools/test_join.py @@ -9,7 +9,7 @@ from pandas.util.testing import assert_frame_equal from pandas import DataFrame, MultiIndex, Series, Index, merge, concat -import pandas._join as _join +from pandas._libs import join as libjoin import pandas.util.testing as tm from pandas.tests.tools.test_merge import get_test_data, N, NGROUPS @@ -46,7 +46,7 @@ def test_cython_left_outer_join(self): right = a_([1, 1, 0, 4, 2, 2, 1], dtype=np.int64) max_group = 5 - ls, rs = _join.left_outer_join(left, right, max_group) + ls, rs = libjoin.left_outer_join(left, right, max_group) exp_ls = left.argsort(kind='mergesort') exp_rs = right.argsort(kind='mergesort') @@ -70,7 +70,7 @@ def test_cython_right_outer_join(self): right = a_([1, 1, 0, 4, 2, 2, 1], dtype=np.int64) max_group = 5 - rs, ls = _join.left_outer_join(right, left, max_group) + rs, ls = libjoin.left_outer_join(right, left, max_group) exp_ls = left.argsort(kind='mergesort') exp_rs = right.argsort(kind='mergesort') @@ -96,7 +96,7 @@ def test_cython_inner_join(self): right = a_([1, 1, 0, 4, 2, 2, 1, 4], dtype=np.int64) max_group = 5 - ls, rs = _join.inner_join(left, right, max_group) + ls, rs = libjoin.inner_join(left, right, max_group) exp_ls = left.argsort(kind='mergesort') exp_rs = right.argsort(kind='mergesort') diff --git a/pandas/tests/tseries/test_offsets.py b/pandas/tests/tseries/test_offsets.py index dfa1e94e4dc11..f644c353982f6 100644 --- a/pandas/tests/tseries/test_offsets.py +++ b/pandas/tests/tseries/test_offsets.py @@ -31,8 +31,8 @@ to_datetime, DateParseError) import pandas.tseries.offsets as offsets from pandas.io.pickle import read_pickle -from pandas.tslib import normalize_date, NaT, Timestamp, Timedelta -import pandas.tslib as tslib +from pandas._libs.tslib import normalize_date, NaT, Timestamp, Timedelta +import pandas._libs.tslib as tslib from pandas.util.testing import assertRaisesRegexp import pandas.util.testing as tm from pandas.tseries.holiday import USFederalHolidayCalendar diff --git a/pandas/tests/tseries/test_resample.py b/pandas/tests/tseries/test_resample.py index 1535bd665fe8b..57a655b0b7610 100755 --- a/pandas/tests/tseries/test_resample.py +++ b/pandas/tests/tseries/test_resample.py @@ -26,7 +26,7 @@ from pandas.tseries.tdi import timedelta_range, TimedeltaIndex from pandas.util.testing import (assert_series_equal, assert_almost_equal, assert_frame_equal, assert_index_equal) -from pandas._period import IncompatibleFrequency +from pandas._libs.period import IncompatibleFrequency bday = BDay() diff --git a/pandas/tests/tseries/test_timezones.py b/pandas/tests/tseries/test_timezones.py index 771fb2f50c410..1ccc1652d2719 100644 --- a/pandas/tests/tseries/test_timezones.py +++ b/pandas/tests/tseries/test_timezones.py @@ -11,7 +11,8 @@ from pandas.compat import lrange, zip from pandas.tseries.index import bdate_range, date_range from pandas.types.dtypes import DatetimeTZDtype -from pandas import (Index, Series, DataFrame, isnull, Timestamp, tslib, NaT, +from pandas._libs import tslib +from pandas import (Index, Series, DataFrame, isnull, Timestamp, NaT, DatetimeIndex, to_datetime) from pandas.util.testing import (assert_frame_equal, assert_series_equal, set_timezone) @@ -924,7 +925,7 @@ def test_utc_with_system_utc(self): # Skipped on win32 due to dateutil bug tm._skip_if_windows() - from pandas.tslib import maybe_get_tz + from pandas._libs.tslib import maybe_get_tz # from system utc to real utc ts = Timestamp('2001-01-05 11:56', tz=maybe_get_tz('dateutil/UTC')) diff --git a/pandas/tests/types/test_inference.py b/pandas/tests/types/test_inference.py index 629aa63f4a0ae..a36a77a70f9ad 100644 --- a/pandas/tests/types/test_inference.py +++ b/pandas/tests/types/test_inference.py @@ -13,7 +13,7 @@ import pytz import pandas as pd -from pandas import lib, tslib +from pandas._libs import tslib, lib from pandas import (Series, Index, DataFrame, Timedelta, DatetimeIndex, TimedeltaIndex, Timestamp, Panel, Period, Categorical) @@ -517,28 +517,28 @@ def test_infer_dtype_period(self): # GH 13664 arr = np.array([pd.Period('2011-01', freq='D'), pd.Period('2011-02', freq='D')]) - self.assertEqual(pd.lib.infer_dtype(arr), 'period') + self.assertEqual(lib.infer_dtype(arr), 'period') arr = np.array([pd.Period('2011-01', freq='D'), pd.Period('2011-02', freq='M')]) - self.assertEqual(pd.lib.infer_dtype(arr), 'period') + self.assertEqual(lib.infer_dtype(arr), 'period') # starts with nan for n in [pd.NaT, np.nan]: arr = np.array([n, pd.Period('2011-01', freq='D')]) - self.assertEqual(pd.lib.infer_dtype(arr), 'period') + self.assertEqual(lib.infer_dtype(arr), 'period') arr = np.array([n, pd.Period('2011-01', freq='D'), n]) - self.assertEqual(pd.lib.infer_dtype(arr), 'period') + self.assertEqual(lib.infer_dtype(arr), 'period') # different type of nat arr = np.array([np.datetime64('nat'), pd.Period('2011-01', freq='M')], dtype=object) - self.assertEqual(pd.lib.infer_dtype(arr), 'mixed') + self.assertEqual(lib.infer_dtype(arr), 'mixed') arr = np.array([pd.Period('2011-01', freq='M'), np.datetime64('nat')], dtype=object) - self.assertEqual(pd.lib.infer_dtype(arr), 'mixed') + self.assertEqual(lib.infer_dtype(arr), 'mixed') def test_infer_dtype_all_nan_nat_like(self): arr = np.array([np.nan, np.nan]) diff --git a/pandas/tests/types/test_io.py b/pandas/tests/types/test_io.py index ce8e23342bf5a..b6c10394dd232 100644 --- a/pandas/tests/types/test_io.py +++ b/pandas/tests/types/test_io.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- import numpy as np -import pandas.lib as lib +import pandas._libs.lib as lib import pandas.util.testing as tm from pandas.compat import long, u @@ -73,7 +73,7 @@ def test_convert_sql_column_decimals(self): self.assert_numpy_array_equal(result, expected) def test_convert_downcast_int64(self): - from pandas.parser import na_values + from pandas.io.libparsers import na_values arr = np.array([1, 2, 7, 8, 10], dtype=np.int64) expected = np.array([1, 2, 7, 8, 10], dtype=np.int8) diff --git a/pandas/tests/types/test_missing.py b/pandas/tests/types/test_missing.py index cab44f1122ae1..2e35f5c1badbb 100644 --- a/pandas/tests/types/test_missing.py +++ b/pandas/tests/types/test_missing.py @@ -7,7 +7,7 @@ import pandas as pd from pandas.core import config as cf from pandas.compat import u -from pandas.tslib import iNaT +from pandas._libs.tslib import iNaT from pandas import (NaT, Float64Index, Series, DatetimeIndex, TimedeltaIndex, date_range) from pandas.types.dtypes import DatetimeTZDtype diff --git a/pandas/tools/hashing.py b/pandas/tools/hashing.py index ef863510cdd87..85ceb439435ee 100644 --- a/pandas/tools/hashing.py +++ b/pandas/tools/hashing.py @@ -4,8 +4,9 @@ import itertools import numpy as np -from pandas import _hash, Series, factorize, Categorical, Index, MultiIndex -from pandas.lib import is_bool_array +from pandas import Series, factorize, Categorical, Index, MultiIndex +from pandas.tools import libhashing as _hash +from pandas._libs.lib import is_bool_array from pandas.types.generic import ABCIndexClass, ABCSeries, ABCDataFrame from pandas.types.common import (is_categorical_dtype, is_numeric_dtype, is_datetime64_dtype, is_timedelta64_dtype, diff --git a/pandas/src/hash.pyx b/pandas/tools/hashing.pyx similarity index 100% rename from pandas/src/hash.pyx rename to pandas/tools/hashing.pyx diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index ba53d42fccec7..3f1e7640ba538 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -37,9 +37,7 @@ from pandas.core.sorting import is_int64_overflow_possible import pandas.core.algorithms as algos import pandas.core.common as com - -import pandas._join as _join -import pandas.hashtable as _hash +from pandas._libs import hashtable as libhashtable, join as libjoin # back-compat of pseudo-public API @@ -1005,8 +1003,8 @@ def get_result(self): rdata.items, rsuf) if self.fill_method == 'ffill': - left_join_indexer = _join.ffill_indexer(left_indexer) - right_join_indexer = _join.ffill_indexer(right_indexer) + left_join_indexer = libjoin.ffill_indexer(left_indexer) + right_join_indexer = libjoin.ffill_indexer(right_indexer) else: left_join_indexer = left_indexer right_join_indexer = right_indexer @@ -1030,11 +1028,11 @@ def get_result(self): def _asof_function(direction, on_type): - return getattr(_join, 'asof_join_%s_%s' % (direction, on_type), None) + return getattr(libjoin, 'asof_join_%s_%s' % (direction, on_type), None) def _asof_by_function(direction, on_type, by_type): - return getattr(_join, 'asof_join_%s_%s_by_%s' % + return getattr(libjoin, 'asof_join_%s_%s_by_%s' % (direction, on_type, by_type), None) @@ -1294,13 +1292,13 @@ def _get_multiindex_indexer(join_keys, index, sort): # factorize keys to a dense i8 space lkey, rkey, count = fkeys(lkey, rkey) - return _join.left_outer_join(lkey, rkey, count, sort=sort) + return libjoin.left_outer_join(lkey, rkey, count, sort=sort) def _get_single_indexer(join_key, index, sort=False): left_key, right_key, count = _factorize_keys(join_key, index, sort=sort) - left_indexer, right_indexer = _join.left_outer_join( + left_indexer, right_indexer = libjoin.left_outer_join( _ensure_int64(left_key), _ensure_int64(right_key), count, sort=sort) @@ -1335,15 +1333,15 @@ def _left_join_on_index(left_ax, right_ax, join_keys, sort=False): def _right_outer_join(x, y, max_groups): - right_indexer, left_indexer = _join.left_outer_join(y, x, max_groups) + right_indexer, left_indexer = libjoin.left_outer_join(y, x, max_groups) return left_indexer, right_indexer _join_functions = { - 'inner': _join.inner_join, - 'left': _join.left_outer_join, + 'inner': libjoin.inner_join, + 'left': libjoin.left_outer_join, 'right': _right_outer_join, - 'outer': _join.full_outer_join, + 'outer': libjoin.full_outer_join, } @@ -1352,11 +1350,11 @@ def _factorize_keys(lk, rk, sort=True): lk = lk.values rk = rk.values if is_int_or_datetime_dtype(lk) and is_int_or_datetime_dtype(rk): - klass = _hash.Int64Factorizer + klass = libhashtable.Int64Factorizer lk = _ensure_int64(com._values_from_object(lk)) rk = _ensure_int64(com._values_from_object(rk)) else: - klass = _hash.Factorizer + klass = libhashtable.Factorizer lk = _ensure_object(lk) rk = _ensure_object(rk) diff --git a/pandas/tools/tile.py b/pandas/tools/tile.py index feb4d4bfd5044..9b21e542f153c 100644 --- a/pandas/tools/tile.py +++ b/pandas/tools/tile.py @@ -13,7 +13,7 @@ from pandas.compat import zip from pandas import to_timedelta, to_datetime from pandas.types.common import is_datetime64_dtype, is_timedelta64_dtype -from pandas.lib import infer_dtype +from pandas._libs.lib import infer_dtype import numpy as np diff --git a/pandas/tools/util.py b/pandas/tools/util.py index 8ec074fbf5950..bf78a9dfb65cc 100644 --- a/pandas/tools/util.py +++ b/pandas/tools/util.py @@ -1,5 +1,5 @@ import numpy as np -import pandas.lib as lib +import pandas._libs.lib as lib from pandas.types.common import (is_number, is_numeric_dtype, diff --git a/pandas/tseries/api.py b/pandas/tseries/api.py index 9a07983b4d951..a00ccf99e1b96 100644 --- a/pandas/tseries/api.py +++ b/pandas/tseries/api.py @@ -10,5 +10,5 @@ from pandas.tseries.period import Period, PeriodIndex, period_range, pnow from pandas.tseries.resample import TimeGrouper from pandas.tseries.timedeltas import to_timedelta -from pandas.lib import NaT +from pandas._libs.lib import NaT import pandas.tseries.offsets as offsets diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py index 2e22c35868cb3..ae40c2f66a590 100644 --- a/pandas/tseries/base.py +++ b/pandas/tseries/base.py @@ -21,9 +21,10 @@ from pandas.core.common import AbstractMethodError import pandas.formats.printing as printing -import pandas.tslib as tslib -import pandas._period as prlib -import pandas.lib as lib +from pandas._libs import (tslib as libts, lib, + Timedelta, Timestamp, iNaT, NaT) +from pandas._libs.period import Period + from pandas.core.index import Index from pandas.indexes.base import _index_shared_docs from pandas.util.decorators import Appender, cache_readonly @@ -94,7 +95,8 @@ def _round(self, freq, rounder): result = (unit * rounder(values / float(unit)).astype('i8')) else: result = (unit * rounder(values / float(unit)).astype('i8')) - result = self._maybe_mask_results(result, fill_value=tslib.NaT) + result = self._maybe_mask_results(result, fill_value=NaT) + attribs = self._get_attributes_dict() if 'freq' in attribs: attribs['freq'] = None @@ -196,7 +198,7 @@ def _evaluate_compare(self, other, op): result[mask] = False return result try: - result[mask] = tslib.iNaT + result[mask] = iNaT return Index(result) except TypeError: return result @@ -327,7 +329,7 @@ def _nat_new(self, box=True): - If False returns ndarray of np.int64. """ result = np.zeros(len(self), dtype=np.int64) - result.fill(tslib.iNaT) + result.fill(iNaT) if not box: return result @@ -392,7 +394,7 @@ def take(self, indices, axis=0, allow_fill=True, taken = self._assert_take_fillable(self.asi8, indices, allow_fill=allow_fill, fill_value=fill_value, - na_value=tslib.iNaT) + na_value=iNaT) # keep freq in PeriodIndex, reset otherwise freq = self.freq if isinstance(self, ABCPeriodIndex) else None @@ -404,13 +406,13 @@ def get_duplicates(self): _can_hold_na = True - _na_value = tslib.NaT + _na_value = NaT """The expected NA value to use with this index.""" @cache_readonly def _isnan(self): """ return if each value is nan""" - return (self.asi8 == tslib.iNaT) + return (self.asi8 == iNaT) @property def asobject(self): @@ -424,7 +426,7 @@ def asobject(self): def _convert_tolerance(self, tolerance): try: - return tslib.Timedelta(tolerance).to_timedelta64() + return Timedelta(tolerance).to_timedelta64() except ValueError: raise ValueError('tolerance argument for %s must be convertible ' 'to Timedelta: %r' @@ -477,7 +479,7 @@ def min(self, axis=None, *args, **kwargs): # quick check if len(i8) and self.is_monotonic: - if i8[0] != tslib.iNaT: + if i8[0] != iNaT: return self._box_func(i8[0]) if self.hasnans: @@ -525,7 +527,7 @@ def max(self, axis=None, *args, **kwargs): # quick check if len(i8) and self.is_monotonic: - if i8[-1] != tslib.iNaT: + if i8[-1] != iNaT: return self._box_func(i8[-1]) if self.hasnans: @@ -643,11 +645,11 @@ def __add__(self, other): .format(typ1=type(self).__name__, typ2=type(other).__name__)) elif isinstance(other, (DateOffset, timedelta, np.timedelta64, - tslib.Timedelta)): + Timedelta)): return self._add_delta(other) elif is_integer(other): return self.shift(other) - elif isinstance(other, (tslib.Timestamp, datetime)): + elif isinstance(other, (Timestamp, datetime)): return self._add_datelike(other) else: # pragma: no cover return NotImplemented @@ -673,13 +675,13 @@ def __sub__(self, other): .format(typ1=type(self).__name__, typ2=type(other).__name__)) elif isinstance(other, (DateOffset, timedelta, np.timedelta64, - tslib.Timedelta)): + Timedelta)): return self._add_delta(-other) elif is_integer(other): return self.shift(-other) - elif isinstance(other, (tslib.Timestamp, datetime)): + elif isinstance(other, (Timestamp, datetime)): return self._sub_datelike(other) - elif isinstance(other, prlib.Period): + elif isinstance(other, Period): return self._sub_period(other) else: # pragma: no cover return NotImplemented @@ -699,11 +701,11 @@ def _add_delta_td(self, other): # add a delta of a timedeltalike # return the i8 result view - inc = tslib._delta_to_nanoseconds(other) + inc = libts._delta_to_nanoseconds(other) new_values = checked_add_with_arr(self.asi8, inc, arr_mask=self._isnan).view('i8') if self.hasnans: - new_values[self._isnan] = tslib.iNaT + new_values[self._isnan] = iNaT return new_values.view('i8') def _add_delta_tdi(self, other): @@ -721,7 +723,7 @@ def _add_delta_tdi(self, other): b_mask=other._isnan) if self.hasnans or other.hasnans: mask = (self._isnan) | (other._isnan) - new_values[mask] = tslib.iNaT + new_values[mask] = iNaT return new_values.view(self.dtype) def isin(self, values): @@ -849,7 +851,7 @@ def _append_same_dtype(self, to_concat, name): def _ensure_datetimelike_to_i8(other): """ helper for coercing an input scalar or array to i8 """ if lib.isscalar(other) and isnull(other): - other = tslib.iNaT + other = iNaT elif isinstance(other, ABCIndexClass): # convert tz if needed if getattr(other, 'tz', None) is not None: diff --git a/pandas/tseries/common.py b/pandas/tseries/common.py index 46e8bd43e8ff8..82fcdbcd0d367 100644 --- a/pandas/tseries/common.py +++ b/pandas/tseries/common.py @@ -13,10 +13,9 @@ from pandas.core.base import PandasDelegate, NoNewAttributesMixin from pandas.tseries.index import DatetimeIndex -from pandas._period import IncompatibleFrequency # flake8: noqa +from pandas._libs.period import IncompatibleFrequency # flake8: noqa from pandas.tseries.period import PeriodIndex from pandas.tseries.tdi import TimedeltaIndex -from pandas import tslib from pandas.core.algorithms import take_1d diff --git a/pandas/tseries/converter.py b/pandas/tseries/converter.py index db7049ebc89b3..1f99e88ce86d6 100644 --- a/pandas/tseries/converter.py +++ b/pandas/tseries/converter.py @@ -20,7 +20,7 @@ from pandas.compat import lrange import pandas.compat as compat -import pandas.lib as lib +import pandas._libs.lib as lib import pandas.core.common as com from pandas.core.index import Index diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 957a934d13f09..8013947babc5a 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -17,9 +17,9 @@ from pandas.tseries.offsets import DateOffset from pandas.util.decorators import cache_readonly, deprecate_kwarg import pandas.tseries.offsets as offsets -import pandas.lib as lib -import pandas.tslib as tslib -from pandas.tslib import Timedelta + +from pandas._libs import lib, tslib +from pandas._libs.tslib import Timedelta from pytz import AmbiguousTimeError diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 5f00e8b648689..f80618ef34373 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -44,13 +44,9 @@ import pandas.tseries.offsets as offsets import pandas.tseries.tools as tools -from pandas.lib import Timestamp -import pandas.lib as lib -import pandas.tslib as tslib -import pandas._period as period -import pandas._join as _join -import pandas.algos as _algos -import pandas.index as _index +from pandas._libs import (lib, index as libindex, tslib as libts, + algos as libalgos, join as libjoin, + Timestamp, period as libperiod) def _utc(): @@ -75,16 +71,16 @@ def f(self): self.freq.kwds.get('month', 12)) if self.freq else 12) - result = tslib.get_start_end_field(values, field, self.freqstr, + result = libts.get_start_end_field(values, field, self.freqstr, month_kw) elif field in ['weekday_name']: - result = tslib.get_date_name_field(values, field) + result = libts.get_date_name_field(values, field) return self._maybe_mask_results(result) elif field in ['is_leap_year']: # no need to mask NaT - return tslib.get_date_field(values, field) + return libts.get_date_field(values, field) else: - result = tslib.get_date_field(values, field) + result = libts.get_date_field(values, field) return self._maybe_mask_results(result, convert='float64') @@ -115,9 +111,9 @@ def wrapper(self, other): result = _values_from_object(result) if isinstance(other, Index): - o_mask = other.values.view('i8') == tslib.iNaT + o_mask = other.values.view('i8') == libts.iNaT else: - o_mask = other.view('i8') == tslib.iNaT + o_mask = other.view('i8') == libts.iNaT if o_mask.any(): result[o_mask] = nat_result @@ -211,11 +207,11 @@ def _join_i8_wrapper(joinf, **kwargs): return DatetimeIndexOpsMixin._join_i8_wrapper(joinf, dtype='M8[ns]', **kwargs) - _inner_indexer = _join_i8_wrapper(_join.inner_join_indexer_int64) - _outer_indexer = _join_i8_wrapper(_join.outer_join_indexer_int64) - _left_indexer = _join_i8_wrapper(_join.left_join_indexer_int64) + _inner_indexer = _join_i8_wrapper(libjoin.inner_join_indexer_int64) + _outer_indexer = _join_i8_wrapper(libjoin.outer_join_indexer_int64) + _left_indexer = _join_i8_wrapper(libjoin.left_join_indexer_int64) _left_indexer_unique = _join_i8_wrapper( - _join.left_join_indexer_unique_int64, with_indexers=False) + libjoin.left_join_indexer_unique_int64, with_indexers=False) _arrmap = None __eq__ = _dt_index_cmp('__eq__') @@ -225,7 +221,7 @@ def _join_i8_wrapper(joinf, **kwargs): __le__ = _dt_index_cmp('__le__') __ge__ = _dt_index_cmp('__ge__') - _engine_type = _index.DatetimeEngine + _engine_type = libindex.DatetimeEngine tz = None offset = None @@ -340,7 +336,7 @@ def __new__(cls, data=None, verify_integrity = False else: if data.dtype != _NS_DTYPE: - subarr = tslib.cast_to_nanoseconds(data) + subarr = libts.cast_to_nanoseconds(data) else: subarr = data else: @@ -356,13 +352,13 @@ def __new__(cls, data=None, tz = subarr.tz else: if tz is not None: - tz = tslib.maybe_get_tz(tz) + tz = libts.maybe_get_tz(tz) if (not isinstance(data, DatetimeIndex) or getattr(data, 'tz', None) is None): # Convert tz-naive to UTC ints = subarr.view('i8') - subarr = tslib.tz_localize_to_utc(ints, tz, + subarr = libts.tz_localize_to_utc(ints, tz, ambiguous=ambiguous) subarr = subarr.view(_NS_DTYPE) @@ -430,17 +426,17 @@ def _generate(cls, start, end, periods, name, offset, raise TypeError('Start and end cannot both be tz-aware with ' 'different timezones') - inferred_tz = tslib.maybe_get_tz(inferred_tz) + inferred_tz = libts.maybe_get_tz(inferred_tz) # these may need to be localized - tz = tslib.maybe_get_tz(tz) + tz = libts.maybe_get_tz(tz) if tz is not None: date = start or end if date.tzinfo is not None and hasattr(tz, 'localize'): tz = tz.localize(date.replace(tzinfo=None)).tzinfo if tz is not None and inferred_tz is not None: - if not tslib.get_timezone(inferred_tz) == tslib.get_timezone(tz): + if not libts.get_timezone(inferred_tz) == libts.get_timezone(tz): raise AssertionError("Inferred time zone not equal to passed " "time zone") @@ -507,7 +503,7 @@ def _generate(cls, start, end, periods, name, offset, index = _generate_regular_range(start, end, periods, offset) if tz is not None and getattr(index, 'tz', None) is None: - index = tslib.tz_localize_to_utc(_ensure_int64(index), tz, + index = libts.tz_localize_to_utc(_ensure_int64(index), tz, ambiguous=ambiguous) index = index.view(_NS_DTYPE) @@ -539,11 +535,11 @@ def _local_timestamps(self): utc = _utc() if self.is_monotonic: - return tslib.tz_convert(self.asi8, utc, self.tz) + return libts.tz_convert(self.asi8, utc, self.tz) else: values = self.asi8 indexer = values.argsort() - result = tslib.tz_convert(values.take(indexer), utc, self.tz) + result = libts.tz_convert(values.take(indexer), utc, self.tz) n = len(indexer) reverse = np.empty(n, dtype=np.int_) @@ -576,7 +572,7 @@ def _simple_new(cls, values, name=None, freq=None, tz=None, result._data = values result.name = name result.offset = freq - result.tz = tslib.maybe_get_tz(tz) + result.tz = libts.maybe_get_tz(tz) result._reset_identity() return result @@ -590,7 +586,7 @@ def tzinfo(self): @cache_readonly def _timezone(self): """ Comparable timezone both for pytz / dateutil""" - return tslib.get_timezone(self.tzinfo) + return libts.get_timezone(self.tzinfo) def _has_same_tz(self, other): zzone = self._timezone @@ -599,7 +595,7 @@ def _has_same_tz(self, other): if isinstance(other, np.datetime64): # convert to Timestamp as np.datetime64 doesn't have tz attr other = Timestamp(other) - vzone = tslib.get_timezone(getattr(other, 'tzinfo', '__no_tz__')) + vzone = libts.get_timezone(getattr(other, 'tzinfo', '__no_tz__')) return zzone == vzone @classmethod @@ -671,7 +667,7 @@ def _cached_range(cls, start=None, end=None, periods=None, offset=None, def _mpl_repr(self): # how to represent ourselves to matplotlib - return tslib.ints_to_pydatetime(self.asi8, self.tz) + return libts.ints_to_pydatetime(self.asi8, self.tz) @cache_readonly def _is_dates_only(self): @@ -728,7 +724,7 @@ def __setstate__(self, state): def _add_datelike(self, other): # adding a timedeltaindex to a datetimelike - if other is tslib.NaT: + if other is libts.NaT: return self._nat_new(box=True) raise TypeError("cannot add a datelike to a DatetimeIndex") @@ -741,9 +737,9 @@ def _sub_datelike(self, other): raise TypeError("DatetimeIndex subtraction must have the same " "timezones or no timezones") result = self._sub_datelike_dti(other) - elif isinstance(other, (tslib.Timestamp, datetime)): + elif isinstance(other, (libts.Timestamp, datetime)): other = Timestamp(other) - if other is tslib.NaT: + if other is libts.NaT: result = self._nat_new(box=False) # require tz compat elif not self._has_same_tz(other): @@ -753,7 +749,7 @@ def _sub_datelike(self, other): i8 = self.asi8 result = i8 - other.value result = self._maybe_mask_results(result, - fill_value=tslib.iNaT) + fill_value=libts.iNaT) else: raise TypeError("cannot subtract DatetimeIndex and {typ}" .format(typ=type(other).__name__)) @@ -769,7 +765,7 @@ def _sub_datelike_dti(self, other): new_values = self_i8 - other_i8 if self.hasnans or other.hasnans: mask = (self._isnan) | (other._isnan) - new_values[mask] = tslib.iNaT + new_values[mask] = libts.iNaT return new_values.view('i8') def _maybe_update_attributes(self, attrs): @@ -822,7 +818,7 @@ def _format_native_types(self, na_rep='NaT', date_format=None, **kwargs): from pandas.formats.format import _get_format_datetime64_from_values format = _get_format_datetime64_from_values(self, date_format) - return tslib.format_array_from_datetime(self.asi8, + return libts.format_array_from_datetime(self.asi8, tz=self.tz, format=format, na_rep=na_rep) @@ -855,7 +851,7 @@ def _get_time_micros(self): values = self.asi8 if self.tz is not None and self.tz is not utc: values = self._local_timestamps() - return tslib.get_time_micros(values) + return libts.get_time_micros(values) def to_series(self, keep_tz=False): """ @@ -908,7 +904,7 @@ def to_pydatetime(self): ------- datetimes : ndarray """ - return tslib.ints_to_pydatetime(self.asi8, tz=self.tz) + return libts.ints_to_pydatetime(self.asi8, tz=self.tz) def to_period(self, freq=None): """ @@ -1160,7 +1156,7 @@ def __iter__(self): for i in range(chunks): start_i = i * chunksize end_i = min((i + 1) * chunksize, l) - converted = tslib.ints_to_pydatetime(data[start_i:end_i], + converted = libts.ints_to_pydatetime(data[start_i:end_i], tz=self.tz, freq=self.freq, box=True) for v in converted: @@ -1248,14 +1244,14 @@ def _parsed_string_to_bounds(self, reso, parsed): Timestamp(datetime(parsed.year, 12, 31, 23, 59, 59, 999999), tz=self.tz)) elif reso == 'month': - d = tslib.monthrange(parsed.year, parsed.month)[1] + d = libts.monthrange(parsed.year, parsed.month)[1] return (Timestamp(datetime(parsed.year, parsed.month, 1), tz=self.tz), Timestamp(datetime(parsed.year, parsed.month, d, 23, 59, 59, 999999), tz=self.tz)) elif reso == 'quarter': qe = (((parsed.month - 1) + 2) % 12) + 1 # two months ahead - d = tslib.monthrange(parsed.year, qe)[1] # at end of month + d = libts.monthrange(parsed.year, qe)[1] # at end of month return (Timestamp(datetime(parsed.year, parsed.month, 1), tz=self.tz), Timestamp(datetime(parsed.year, qe, d, 23, 59, @@ -1594,9 +1590,9 @@ def time(self): """ Returns numpy array of datetime.time. The time part of the Timestamps. """ - return self._maybe_mask_results(_algos.arrmap_object( + return self._maybe_mask_results(libalgos.arrmap_object( self.asobject.values, - lambda x: np.nan if x is tslib.NaT else x.time())) + lambda x: np.nan if x is libts.NaT else x.time())) @property def date(self): @@ -1604,7 +1600,7 @@ def date(self): Returns numpy array of python datetime.date objects (namely, the date part of Timestamps without timezone information). """ - return self._maybe_mask_results(_algos.arrmap_object( + return self._maybe_mask_results(libalgos.arrmap_object( self.asobject.values, lambda x: x.date())) def normalize(self): @@ -1615,7 +1611,7 @@ def normalize(self): ------- normalized : DatetimeIndex """ - new_values = tslib.date_normalize(self.asi8, self.tz) + new_values = libts.date_normalize(self.asi8, self.tz) return DatetimeIndex(new_values, freq='infer', name=self.name, tz=self.tz) @@ -1654,11 +1650,11 @@ def is_normalized(self): """ Returns True if all of the dates are at midnight ("no time") """ - return tslib.dates_normalized(self.asi8, self.tz) + return libts.dates_normalized(self.asi8, self.tz) @cache_readonly def _resolution(self): - return period.resolution(self.asi8, self.tz) + return libperiod.resolution(self.asi8, self.tz) def insert(self, loc, item): """ @@ -1695,7 +1691,7 @@ def insert(self, loc, item): new_dates = np.concatenate((self[:loc].asi8, [item.view(np.int64)], self[loc:].asi8)) if self.tz is not None: - new_dates = tslib.tz_convert(new_dates, 'UTC', self.tz) + new_dates = libts.tz_convert(new_dates, 'UTC', self.tz) return DatetimeIndex(new_dates, name=self.name, freq=freq, tz=self.tz) @@ -1735,7 +1731,7 @@ def delete(self, loc): freq = self.freq if self.tz is not None: - new_dates = tslib.tz_convert(new_dates, 'UTC', self.tz) + new_dates = libts.tz_convert(new_dates, 'UTC', self.tz) return DatetimeIndex(new_dates, name=self.name, freq=freq, tz=self.tz) def tz_convert(self, tz): @@ -1759,7 +1755,7 @@ def tz_convert(self, tz): TypeError If DatetimeIndex is tz-naive. """ - tz = tslib.maybe_get_tz(tz) + tz = libts.maybe_get_tz(tz) if self.tz is None: # tz naive, use tz_localize @@ -1814,14 +1810,14 @@ def tz_localize(self, tz, ambiguous='raise', errors='raise'): """ if self.tz is not None: if tz is None: - new_dates = tslib.tz_convert(self.asi8, 'UTC', self.tz) + new_dates = libts.tz_convert(self.asi8, 'UTC', self.tz) else: raise TypeError("Already tz-aware, use tz_convert to convert.") else: - tz = tslib.maybe_get_tz(tz) + tz = libts.maybe_get_tz(tz) # Convert to UTC - new_dates = tslib.tz_localize_to_utc(self.asi8, tz, + new_dates = libts.tz_localize_to_utc(self.asi8, tz, ambiguous=ambiguous, errors=errors) new_dates = new_dates.view(_NS_DTYPE) @@ -2134,7 +2130,7 @@ def _to_m8(key, tz=None): # this also converts strings key = Timestamp(key, tz=tz) - return np.int64(tslib.pydt_to_i8(key)).view(_NS_DTYPE) + return np.int64(libts.pydt_to_i8(key)).view(_NS_DTYPE) _CACHE_START = Timestamp(datetime(1950, 1, 1)) diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 79227f6de90a5..2b6a684fc39dd 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -10,8 +10,7 @@ # import after tools, dateutil check from dateutil.relativedelta import relativedelta, weekday from dateutil.easter import easter -import pandas.tslib as tslib -from pandas.tslib import Timestamp, OutOfBoundsDatetime, Timedelta +from pandas._libs import tslib, Timestamp, OutOfBoundsDatetime, Timedelta import functools import operator diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index bfe7724a1cfaa..f7e9ba9eaa9b1 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -29,10 +29,11 @@ from pandas.tseries.tools import parse_time_string import pandas.tseries.offsets as offsets -import pandas._period as period -from pandas._period import (Period, IncompatibleFrequency, - get_period_field_arr, _validate_end_alias, - _quarter_to_myear) +from pandas._libs.lib import infer_dtype +from pandas._libs import tslib, period +from pandas._libs.period import (Period, IncompatibleFrequency, + get_period_field_arr, _validate_end_alias, + _quarter_to_myear) from pandas.core.base import _shared_docs from pandas.indexes.base import _index_shared_docs, _ensure_index @@ -40,9 +41,8 @@ from pandas import compat from pandas.util.decorators import (Appender, Substitution, cache_readonly, deprecate_kwarg) -from pandas.lib import infer_dtype -import pandas.tslib as tslib from pandas.compat import zip, u + import pandas.indexes.base as ibase _index_doc_kwargs = dict(ibase._index_doc_kwargs) _index_doc_kwargs.update( diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py index 21d7dc0c177b6..2856b54ad9a8c 100755 --- a/pandas/tseries/resample.py +++ b/pandas/tseries/resample.py @@ -20,10 +20,9 @@ import pandas.compat as compat from pandas.compat.numpy import function as nv -from pandas.lib import Timestamp -from pandas._period import IncompatibleFrequency -import pandas.lib as lib -import pandas.tslib as tslib +from pandas._libs import lib, tslib +from pandas._libs.lib import Timestamp +from pandas._libs.period import IncompatibleFrequency from pandas.util.decorators import Appender from pandas.core.generic import _shared_docs diff --git a/pandas/tseries/tdi.py b/pandas/tseries/tdi.py index c62e3fc40d4af..f47d80a31b174 100644 --- a/pandas/tseries/tdi.py +++ b/pandas/tseries/tdi.py @@ -30,13 +30,8 @@ from pandas.tseries.timedeltas import (to_timedelta, _coerce_scalar_to_timedelta_type) from pandas.tseries.offsets import Tick, DateOffset - -import pandas.lib as lib -import pandas.tslib as tslib -import pandas._join as _join -import pandas.index as _index - -Timedelta = tslib.Timedelta +from pandas._libs import (lib, index as libindex, tslib as libts, + join as libjoin, Timedelta, NaT, iNaT) def _td_index_cmp(opname, nat_result=False): @@ -47,7 +42,7 @@ def _td_index_cmp(opname, nat_result=False): def wrapper(self, other): msg = "cannot compare a TimedeltaIndex with type {0}" func = getattr(super(TimedeltaIndex, self), opname) - if _is_convertible_to_td(other) or other is tslib.NaT: + if _is_convertible_to_td(other) or other is NaT: try: other = _to_m8(other) except ValueError: @@ -65,9 +60,9 @@ def wrapper(self, other): result = _values_from_object(result) if isinstance(other, Index): - o_mask = other.values.view('i8') == tslib.iNaT + o_mask = other.values.view('i8') == iNaT else: - o_mask = other.view('i8') == tslib.iNaT + o_mask = other.view('i8') == iNaT if o_mask.any(): result[o_mask] = nat_result @@ -126,11 +121,11 @@ def _join_i8_wrapper(joinf, **kwargs): return DatetimeIndexOpsMixin._join_i8_wrapper( joinf, dtype='m8[ns]', **kwargs) - _inner_indexer = _join_i8_wrapper(_join.inner_join_indexer_int64) - _outer_indexer = _join_i8_wrapper(_join.outer_join_indexer_int64) - _left_indexer = _join_i8_wrapper(_join.left_join_indexer_int64) + _inner_indexer = _join_i8_wrapper(libjoin.inner_join_indexer_int64) + _outer_indexer = _join_i8_wrapper(libjoin.outer_join_indexer_int64) + _left_indexer = _join_i8_wrapper(libjoin.left_join_indexer_int64) _left_indexer_unique = _join_i8_wrapper( - _join.left_join_indexer_unique_int64, with_indexers=False) + libjoin.left_join_indexer_unique_int64, with_indexers=False) _arrmap = None _datetimelike_ops = ['days', 'seconds', 'microseconds', 'nanoseconds', 'freq', 'components'] @@ -142,7 +137,7 @@ def _join_i8_wrapper(joinf, **kwargs): __le__ = _td_index_cmp('__le__') __ge__ = _td_index_cmp('__ge__') - _engine_type = _index.TimedeltaEngine + _engine_type = libindex.TimedeltaEngine _comparables = ['name', 'freq'] _attributes = ['name', 'freq'] @@ -274,7 +269,7 @@ def _box_func(self): def _simple_new(cls, values, name=None, freq=None, **kwargs): values = np.array(values, copy=False) if values.dtype == np.object_: - values = tslib.array_to_timedelta64(values) + values = libts.array_to_timedelta64(values) if values.dtype != _TD_DTYPE: values = _ensure_int64(values).view(_TD_DTYPE) @@ -341,18 +336,18 @@ def _evaluate_with_timedelta_like(self, other, op, opstr): def _add_datelike(self, other): # adding a timedeltaindex to a datetimelike from pandas import Timestamp, DatetimeIndex - if other is tslib.NaT: + if other is NaT: result = self._nat_new(box=False) else: other = Timestamp(other) i8 = self.asi8 result = checked_add_with_arr(i8, other.value) - result = self._maybe_mask_results(result, fill_value=tslib.iNaT) + result = self._maybe_mask_results(result, fill_value=iNaT) return DatetimeIndex(result, name=self.name, copy=False) def _sub_datelike(self, other): from pandas import DatetimeIndex - if other is tslib.NaT: + if other is NaT: result = self._nat_new(box=False) else: raise TypeError("cannot subtract a datelike from a TimedeltaIndex") @@ -452,7 +447,7 @@ def to_pytimedelta(self): ------- datetimes : ndarray """ - return tslib.ints_to_pytimedelta(self.asi8) + return libts.ints_to_pytimedelta(self.asi8) @Appender(_index_shared_docs['astype']) def astype(self, dtype, copy=True): @@ -677,7 +672,7 @@ def get_loc(self, key, method=None, tolerance=None): raise TypeError if isnull(key): - key = tslib.NaT + key = NaT if tolerance is not None: # try converting tolerance now, so errors don't get swallowed by @@ -736,7 +731,7 @@ def _maybe_cast_slice_bound(self, label, side, kind): def _get_string_slice(self, key, use_lhs=True, use_rhs=True): freq = getattr(self, 'freqstr', getattr(self, 'inferred_freq', None)) - if is_integer(key) or is_float(key) or key is tslib.NaT: + if is_integer(key) or is_float(key) or key is NaT: self._invalid_indexer('slice', key) loc = self._partial_td_slice(key, freq, use_lhs=use_lhs, use_rhs=use_rhs) @@ -837,7 +832,7 @@ def insert(self, loc, item): pass freq = None - if isinstance(item, (Timedelta, tslib.NaTType)): + if isinstance(item, (Timedelta, libts.NaTType)): # check freq can be preserved on edge cases if self.freq is not None: diff --git a/pandas/tseries/timedeltas.py b/pandas/tseries/timedeltas.py index 5a5d1533bfa91..ead602ee80e32 100644 --- a/pandas/tseries/timedeltas.py +++ b/pandas/tseries/timedeltas.py @@ -4,7 +4,7 @@ import numpy as np import pandas as pd -import pandas.tslib as tslib +import pandas._libs.tslib as tslib from pandas.types.common import (_ensure_object, is_integer_dtype, diff --git a/pandas/tseries/tools.py b/pandas/tseries/tools.py index f746409aadfc9..093331e861fa7 100644 --- a/pandas/tseries/tools.py +++ b/pandas/tseries/tools.py @@ -2,8 +2,7 @@ import numpy as np from collections import MutableMapping -import pandas.lib as lib -import pandas.tslib as tslib +from pandas._libs import lib, tslib from pandas.types.common import (_ensure_object, is_datetime64_ns_dtype, diff --git a/pandas/tslib.py b/pandas/tslib.py new file mode 100644 index 0000000000000..3ecbffa20700d --- /dev/null +++ b/pandas/tslib.py @@ -0,0 +1,8 @@ +# flake8: noqa + +import warnings +warnings.warn("The pandas.tslib module is deprecated and will be " + "removed in a future version. Please import from " + "the pandas._libs.tslib instead", FutureWarning, stacklevel=2) +from pandas._libs.tslib import (Timestamp, Timedelta, + NaT, OutOfBoundsDatetime) diff --git a/pandas/types/cast.py b/pandas/types/cast.py index 8cc3fe41f73c8..1cd55274b9b49 100644 --- a/pandas/types/cast.py +++ b/pandas/types/cast.py @@ -2,8 +2,8 @@ from datetime import datetime, timedelta import numpy as np -from pandas import lib, tslib -from pandas.tslib import iNaT +from pandas._libs import tslib, lib +from pandas._libs.tslib import iNaT from pandas.compat import string_types, text_type, PY3 from .common import (_ensure_object, is_bool, is_integer, is_float, is_complex, is_datetimetz, is_categorical_dtype, @@ -807,14 +807,14 @@ def _possibly_cast_to_datetime(value, dtype, errors='raise'): "dtype [%s]" % dtype) if is_scalar(value): - if value == tslib.iNaT or isnull(value): - value = tslib.iNaT + if value == iNaT or isnull(value): + value = iNaT else: value = np.array(value, copy=False) # have a scalar array-like (e.g. NaT) if value.ndim == 0: - value = tslib.iNaT + value = iNaT # we have an array of datetime or timedeltas & nulls elif np.prod(value.shape) or not is_dtype_equal(value.dtype, diff --git a/pandas/types/common.py b/pandas/types/common.py index e58e0826ea49a..1be5b5f6f1368 100644 --- a/pandas/types/common.py +++ b/pandas/types/common.py @@ -3,7 +3,7 @@ import numpy as np from pandas.compat import (string_types, text_type, binary_type, PY3, PY36) -from pandas import lib, algos +from pandas._libs import algos, lib from .dtypes import (CategoricalDtype, CategoricalDtypeType, DatetimeTZDtype, DatetimeTZDtypeType, PeriodDtype, PeriodDtypeType, diff --git a/pandas/types/concat.py b/pandas/types/concat.py index 9e47a97dd621a..b098bbb75d984 100644 --- a/pandas/types/concat.py +++ b/pandas/types/concat.py @@ -3,7 +3,7 @@ """ import numpy as np -import pandas.tslib as tslib +import pandas._libs.tslib as tslib from pandas import compat from pandas.core.algorithms import take_1d from .common import (is_categorical_dtype, diff --git a/pandas/types/inference.py b/pandas/types/inference.py index d2a2924b27659..d8e3b3ee7329b 100644 --- a/pandas/types/inference.py +++ b/pandas/types/inference.py @@ -6,7 +6,7 @@ from numbers import Number from pandas.compat import (string_types, text_type, string_and_binary_types) -from pandas import lib +from pandas._libs import lib is_bool = lib.is_bool diff --git a/pandas/types/missing.py b/pandas/types/missing.py index e6791b79bf3bd..cc8b5edc27542 100644 --- a/pandas/types/missing.py +++ b/pandas/types/missing.py @@ -2,8 +2,8 @@ missing types & inference """ import numpy as np -from pandas import lib -from pandas.tslib import NaT, iNaT +from pandas._libs import lib +from pandas._libs.tslib import NaT, iNaT from .generic import (ABCMultiIndex, ABCSeries, ABCIndexClass, ABCGeneric) from .common import (is_string_dtype, is_datetimelike, diff --git a/pandas/util/decorators.py b/pandas/util/decorators.py index 62ff6ef14418a..4e1719958e8b7 100644 --- a/pandas/util/decorators.py +++ b/pandas/util/decorators.py @@ -1,5 +1,5 @@ from pandas.compat import StringIO, callable, signature -from pandas.lib import cache_readonly # noqa +from pandas._libs.lib import cache_readonly # noqa import types import sys import warnings diff --git a/pandas/util/depr_module.py b/pandas/util/depr_module.py index cf8b0f7960f17..b181c4627b1e1 100644 --- a/pandas/util/depr_module.py +++ b/pandas/util/depr_module.py @@ -13,12 +13,15 @@ class _DeprecatedModule(object): Parameters ---------- deprmod : name of module to be deprecated. + deprmodto : name of module as a replacement, optional + if not givent will __module__ removals : objects or methods in module that will no longer be accessible once module is removed. """ - def __init__(self, deprmod, removals=None): + def __init__(self, deprmod, deprmodto=None, removals=None): self.deprmod = deprmod + self.deprmodto = deprmodto self.removals = removals if self.removals is not None: self.removals = frozenset(self.removals) @@ -40,7 +43,15 @@ def __getattr__(self, name): if name in self.self_dir: return object.__getattribute__(self, name) - deprmodule = self._import_deprmod() + try: + deprmodule = self._import_deprmod(self.deprmod) + except ImportError: + if self.deprmodto is None: + raise + + # a rename + deprmodule = self._import_deprmod(self.deprmodto) + obj = getattr(deprmodule, name) if self.removals is not None and name in self.removals: @@ -49,17 +60,24 @@ def __getattr__(self, name): "a future version.".format(deprmod=self.deprmod, name=name), FutureWarning, stacklevel=2) else: + deprmodto = self.deprmodto + if deprmodto is None: + deprmodto = "{modname}.{name}".format( + modname=obj.__module__, name=name) # The object is actually located in another module. warnings.warn( "{deprmod}.{name} is deprecated. Please use " - "{modname}.{name} instead.".format( - deprmod=self.deprmod, modname=obj.__module__, name=name), + "{deprmodto}.{name} instead.".format( + deprmod=self.deprmod, name=name, deprmodto=deprmodto), FutureWarning, stacklevel=2) return obj - def _import_deprmod(self): + def _import_deprmod(self, mod=None): + if mod is None: + mod = self.deprmod + with warnings.catch_warnings(): warnings.filterwarnings('ignore', category=FutureWarning) - deprmodule = importlib.import_module(self.deprmod) + deprmodule = importlib.import_module(mod) return deprmodule diff --git a/pandas/util/testing.py b/pandas/util/testing.py index c5e5df9037daa..b68bf55a347b2 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -47,7 +47,7 @@ TimedeltaIndex, PeriodIndex, RangeIndex, Index, MultiIndex, Series, DataFrame, Panel, Panel4D) from pandas.util.decorators import deprecate -from pandas import _testing +from pandas.util import libtesting from pandas.io.common import urlopen slow = pytest.mark.slow @@ -173,7 +173,7 @@ def assert_almost_equal(left, right, check_exact=False, else: obj = 'Input' assert_class_equal(left, right, obj=obj) - return _testing.assert_almost_equal( + return libtesting.assert_almost_equal( left, right, check_dtype=check_dtype, check_less_precise=check_less_precise, @@ -185,7 +185,7 @@ def assert_dict_equal(left, right, compare_keys=True): assertIsInstance(left, dict, '[dict] ') assertIsInstance(right, dict, '[dict] ') - return _testing.assert_dict_equal(left, right, compare_keys=compare_keys) + return libtesting.assert_dict_equal(left, right, compare_keys=compare_keys) def randbool(size=(), p=0.5): @@ -833,10 +833,10 @@ def _get_ilevel_values(index, level): .format(obj, np.round(diff, 5)) raise_assert_detail(obj, msg, left, right) else: - _testing.assert_almost_equal(left.values, right.values, - check_less_precise=check_less_precise, - check_dtype=exact, - obj=obj, lobj=left, robj=right) + libtesting.assert_almost_equal(left.values, right.values, + check_less_precise=check_less_precise, + check_dtype=exact, + obj=obj, lobj=left, robj=right) # metadata comparison if check_names: @@ -1213,10 +1213,10 @@ def assert_series_equal(left, right, check_dtype=True, assert_numpy_array_equal(left.get_values(), right.get_values(), check_dtype=check_dtype) else: - _testing.assert_almost_equal(left.get_values(), right.get_values(), - check_less_precise=check_less_precise, - check_dtype=check_dtype, - obj='{0}'.format(obj)) + libtesting.assert_almost_equal(left.get_values(), right.get_values(), + check_less_precise=check_less_precise, + check_dtype=check_dtype, + obj='{0}'.format(obj)) # metadata comparison if check_names: @@ -1432,8 +1432,10 @@ def assert_sp_array_equal(left, right, check_dtype=True): check_dtype=check_dtype) # SparseIndex comparison - assertIsInstance(left.sp_index, pd._sparse.SparseIndex, '[SparseIndex]') - assertIsInstance(right.sp_index, pd._sparse.SparseIndex, '[SparseIndex]') + assertIsInstance(left.sp_index, + pd.sparse.libsparse.SparseIndex, '[SparseIndex]') + assertIsInstance(right.sp_index, + pd.sparse.libsparse.SparseIndex, '[SparseIndex]') if not left.sp_index.equals(right.sp_index): raise_assert_detail('SparseArray.index', 'index are not equal', diff --git a/pandas/src/testing.pyx b/pandas/util/testing.pyx similarity index 100% rename from pandas/src/testing.pyx rename to pandas/util/testing.pyx diff --git a/scripts/bench_join.py b/scripts/bench_join.py index 1ce5c94130e85..f9d43772766d8 100644 --- a/scripts/bench_join.py +++ b/scripts/bench_join.py @@ -1,6 +1,6 @@ from pandas.compat import range, lrange import numpy as np -import pandas.lib as lib +import pandas._libs.lib as lib from pandas import * from copy import deepcopy import time diff --git a/scripts/bench_join_multi.py b/scripts/bench_join_multi.py index 7b93112b7f869..b19da6a2c47d8 100644 --- a/scripts/bench_join_multi.py +++ b/scripts/bench_join_multi.py @@ -3,7 +3,7 @@ import numpy as np from pandas.compat import zip, range, lzip from pandas.util.testing import rands -import pandas.lib as lib +import pandas._libs.lib as lib N = 100000 diff --git a/scripts/groupby_test.py b/scripts/groupby_test.py index 5acf7da7534a3..f640a6ed79503 100644 --- a/scripts/groupby_test.py +++ b/scripts/groupby_test.py @@ -5,7 +5,7 @@ from pandas import * -import pandas.lib as tseries +import pandas._libs.lib as tseries import pandas.core.groupby as gp import pandas.util.testing as tm from pandas.compat import range diff --git a/scripts/roll_median_leak.py b/scripts/roll_median_leak.py index 07161cc6499bf..03f39e2b18372 100644 --- a/scripts/roll_median_leak.py +++ b/scripts/roll_median_leak.py @@ -7,7 +7,7 @@ from vbench.api import Benchmark from pandas.util.testing import rands from pandas.compat import range -import pandas.lib as lib +import pandas._libs.lib as lib import pandas._sandbox as sbx import time diff --git a/setup.py b/setup.py index 525cbdf600c78..e257b2376060b 100755 --- a/setup.py +++ b/setup.py @@ -109,21 +109,21 @@ def is_platform_mac(): from os.path import join as pjoin -_pxipath = pjoin('pandas', 'src') _pxi_dep_template = { - 'algos': ['algos_common_helper.pxi.in', 'algos_groupby_helper.pxi.in', - 'algos_take_helper.pxi.in', 'algos_rank_helper.pxi.in'], - '_reshape': ['reshape_helper.pxi.in'], - '_join': ['join_helper.pxi.in', 'joins_func_helper.pxi.in'], - 'hashtable': ['hashtable_class_helper.pxi.in', - 'hashtable_func_helper.pxi.in'], - 'index': ['index_class_helper.pxi.in'], - '_sparse': ['sparse_op_helper.pxi.in'] + 'algos': ['_libs/algos_common_helper.pxi.in', '_libs/algos_groupby_helper.pxi.in', + '_libs/algos_take_helper.pxi.in', '_libs/algos_rank_helper.pxi.in'], + 'join': ['_libs/join_helper.pxi.in', '_libs/join_func_helper.pxi.in'], + 'reshape': ['_libs/reshape_helper.pxi.in'], + 'hashtable': ['_libs/hashtable_class_helper.pxi.in', + '_libs/hashtable_func_helper.pxi.in'], + 'index': ['_libs/index_class_helper.pxi.in'], + 'sparse': ['sparse/sparse_op_helper.pxi.in'], } + _pxifiles = [] _pxi_dep = {} for module, files in _pxi_dep_template.items(): - pxi_files = [pjoin(_pxipath, x) for x in files] + pxi_files = [pjoin('pandas', x) for x in files] _pxifiles.extend(pxi_files) _pxi_dep[module] = pxi_files @@ -261,7 +261,7 @@ def initialize_options(self): self._clean_me = [] self._clean_trees = [] - base = pjoin('pandas','src') + base = pjoin('pandas','_libs', 'src') dt = pjoin(base,'datetime') src = base util = pjoin('pandas','util') @@ -327,19 +327,19 @@ def run(self): class CheckSDist(sdist_class): """Custom sdist that ensures Cython has compiled all pyx files to c.""" - _pyxfiles = ['pandas/lib.pyx', - 'pandas/hashtable.pyx', - 'pandas/tslib.pyx', - 'pandas/index.pyx', - 'pandas/algos.pyx', - 'pandas/join.pyx', - 'pandas/window.pyx', - 'pandas/parser.pyx', - 'pandas/src/period.pyx', - 'pandas/src/sparse.pyx', - 'pandas/src/testing.pyx', - 'pandas/src/hash.pyx', - 'pandas/io/sas/saslib.pyx'] + _pyxfiles = ['pandas/_libs/lib.pyx', + 'pandas/_libs/hashtable.pyx', + 'pandas/_libs/tslib.pyx', + 'pandas/_libs/period.pyx', + 'pandas/_libs/index.pyx', + 'pandas/_libs/algos.pyx', + 'pandas/_libs/join.pyx', + 'pandas/core/window.pyx', + 'pandas/sparse/sparse.pyx', + 'pandas/util/testing.pyx', + 'pandas/tools/hash.pyx', + 'pandas/io/parsers.pyx', + 'pandas/io/sas/sas.pyx'] def initialize_options(self): sdist_class.initialize_options(self) @@ -374,6 +374,7 @@ def check_cython_extensions(self, extensions): for ext in extensions: for src in ext.sources: if not os.path.exists(src): + print("{}: -> [{}]".format(ext.name, ext.sources)) raise Exception("""Cython-generated file '%s' not found. Cython is required to compile pandas from a development branch. Please install Cython or download a release package of pandas. @@ -440,12 +441,12 @@ def srcpath(name=None, suffix='.pyx', subdir='src'): if suffix == '.pyx': lib_depends = [srcpath(f, suffix='.pyx') for f in lib_depends] - lib_depends.append('pandas/src/util.pxd') + lib_depends.append('pandas/_libs/src/util.pxd') else: lib_depends = [] plib_depends = [] -common_include = ['pandas/src/klib', 'pandas/src'] +common_include = ['pandas/_libs/src/klib', 'pandas/_libs/src'] def pxd(name): @@ -457,71 +458,70 @@ def pxd(name): else: extra_compile_args=['-Wno-unused-function'] -lib_depends = lib_depends + ['pandas/src/numpy_helper.h', - 'pandas/src/parse_helper.h'] +lib_depends = lib_depends + ['pandas/_libs/src/numpy_helper.h', + 'pandas/_libs/src/parse_helper.h'] -tseries_depends = ['pandas/src/datetime/np_datetime.h', - 'pandas/src/datetime/np_datetime_strings.h', - 'pandas/src/datetime_helper.h', - 'pandas/src/period_helper.h', - 'pandas/src/datetime.pxd'] +tseries_depends = ['pandas/_libs/src/datetime/np_datetime.h', + 'pandas/_libs/src/datetime/np_datetime_strings.h', + 'pandas/_libs/src/datetime_helper.h', + 'pandas/_libs/src/period_helper.h', + 'pandas/_libs/src/datetime.pxd'] # some linux distros require it libraries = ['m'] if not is_platform_windows() else [] -ext_data = dict( - lib={'pyxfile': 'lib', - 'pxdfiles': [], - 'depends': lib_depends}, - hashtable={'pyxfile': 'hashtable', - 'pxdfiles': ['hashtable'], - 'depends': (['pandas/src/klib/khash_python.h'] - + _pxi_dep['hashtable'])}, - tslib={'pyxfile': 'tslib', - 'depends': tseries_depends, - 'sources': ['pandas/src/datetime/np_datetime.c', - 'pandas/src/datetime/np_datetime_strings.c', - 'pandas/src/period_helper.c']}, - _period={'pyxfile': 'src/period', - 'depends': tseries_depends, - 'sources': ['pandas/src/datetime/np_datetime.c', - 'pandas/src/datetime/np_datetime_strings.c', - 'pandas/src/period_helper.c']}, - index={'pyxfile': 'index', - 'sources': ['pandas/src/datetime/np_datetime.c', - 'pandas/src/datetime/np_datetime_strings.c'], - 'pxdfiles': ['src/util', 'hashtable'], - 'depends': _pxi_dep['index']}, - algos={'pyxfile': 'algos', - 'pxdfiles': ['src/util', 'hashtable'], - 'depends': _pxi_dep['algos']}, - _reshape={'pyxfile': 'src/reshape', - 'depends': _pxi_dep['_reshape']}, - _join={'pyxfile': 'src/join', - 'pxdfiles': ['src/util', 'hashtable'], - 'depends': _pxi_dep['_join']}, - _window={'pyxfile': 'window', - 'pxdfiles': ['src/skiplist', 'src/util'], - 'depends': ['pandas/src/skiplist.pyx', - 'pandas/src/skiplist.h']}, - parser={'pyxfile': 'parser', - 'depends': ['pandas/src/parser/tokenizer.h', - 'pandas/src/parser/io.h', - 'pandas/src/numpy_helper.h'], - 'sources': ['pandas/src/parser/tokenizer.c', - 'pandas/src/parser/io.c']}, - _sparse={'pyxfile': 'src/sparse', - 'depends': ([srcpath('sparse', suffix='.pyx')] + - _pxi_dep['_sparse'])}, - _testing={'pyxfile': 'src/testing', - 'depends': [srcpath('testing', suffix='.pyx')]}, - _hash={'pyxfile': 'src/hash', - 'depends': [srcpath('hash', suffix='.pyx')]}, -) - -ext_data["io.sas.saslib"] = {'pyxfile': 'io/sas/saslib'} +ext_data = { + '_libs.lib': {'pyxfile': '_libs/lib', + 'pxdfiles': [], + 'depends': lib_depends}, + '_libs.hashtable': {'pyxfile': '_libs/hashtable', + 'pxdfiles': ['_libs/hashtable'], + 'depends': (['pandas/_libs/src/klib/khash_python.h'] + + _pxi_dep['hashtable'])}, + '_libs.tslib': {'pyxfile': '_libs/tslib', + 'depends': tseries_depends, + 'sources': ['pandas/_libs/src/datetime/np_datetime.c', + 'pandas/_libs/src/datetime/np_datetime_strings.c', + 'pandas/_libs/src/period_helper.c']}, + '_libs.period': {'pyxfile': '_libs/period', + 'depends': tseries_depends, + 'sources': ['pandas/_libs/src/datetime/np_datetime.c', + 'pandas/_libs/src/datetime/np_datetime_strings.c', + 'pandas/_libs/src/period_helper.c']}, + '_libs.index': {'pyxfile': '_libs/index', + 'sources': ['pandas/_libs/src/datetime/np_datetime.c', + 'pandas/_libs/src/datetime/np_datetime_strings.c'], + 'pxdfiles': ['_libs/src/util', '_libs/hashtable'], + 'depends': _pxi_dep['index']}, + '_libs.algos': {'pyxfile': '_libs/algos', + 'pxdfiles': ['_libs/src/util', '_libs/hashtable'], + 'depends': _pxi_dep['algos']}, + '_libs.join': {'pyxfile': '_libs/join', + 'pxdfiles': ['_libs/src/util', '_libs/hashtable'], + 'depends': _pxi_dep['join']}, + '_libs.reshape': {'pyxfile': '_libs/reshape', + 'depends': _pxi_dep['reshape']}, + 'core.libwindow': {'pyxfile': 'core/window', + 'pxdfiles': ['_libs/src/skiplist', '_libs/src/util'], + 'depends': ['pandas/_libs/src/skiplist.pyx', + 'pandas/_libs/src/skiplist.h']}, + 'io.libparsers': {'pyxfile': 'io/parsers', + 'depends': ['pandas/_libs/src/parser/tokenizer.h', + 'pandas/_libs/src/parser/io.h', + 'pandas/_libs/src/numpy_helper.h'], + 'sources': ['pandas/_libs/src/parser/tokenizer.c', + 'pandas/_libs/src/parser/io.c']}, + 'sparse.libsparse': {'pyxfile': 'sparse/sparse', + 'depends': (['pandas/sparse/sparse.pyx'] + + _pxi_dep['sparse'])}, + 'util.libtesting': {'pyxfile': 'util/testing', + 'depends': ['pandas/util/testing.pyx']}, + 'tools.libhashing': {'pyxfile': 'tools/hashing', + 'depends': ['pandas/tools/hashing.pyx']}, + 'io.sas.libsas': {'pyxfile': 'io/sas/sas'}, + } extensions = [] @@ -552,25 +552,25 @@ def pxd(name): else: macros = [('__LITTLE_ENDIAN__', '1')] -packer_ext = Extension('pandas.msgpack._packer', - depends=['pandas/src/msgpack/pack.h', - 'pandas/src/msgpack/pack_template.h'], +packer_ext = Extension('pandas.io.msgpack._packer', + depends=['pandas/_libs/src/msgpack/pack.h', + 'pandas/_libs/src/msgpack/pack_template.h'], sources = [srcpath('_packer', suffix=suffix if suffix == '.pyx' else '.cpp', - subdir='msgpack')], + subdir='io/msgpack')], language='c++', - include_dirs=['pandas/src/msgpack'] + common_include, + include_dirs=['pandas/_libs/src/msgpack'] + common_include, define_macros=macros, extra_compile_args=extra_compile_args) -unpacker_ext = Extension('pandas.msgpack._unpacker', - depends=['pandas/src/msgpack/unpack.h', - 'pandas/src/msgpack/unpack_define.h', - 'pandas/src/msgpack/unpack_template.h'], +unpacker_ext = Extension('pandas.io.msgpack._unpacker', + depends=['pandas/_libs/src/msgpack/unpack.h', + 'pandas/_libs/src/msgpack/unpack_define.h', + 'pandas/_libs/src/msgpack/unpack_template.h'], sources = [srcpath('_unpacker', suffix=suffix if suffix == '.pyx' else '.cpp', - subdir='msgpack')], + subdir='io/msgpack')], language='c++', - include_dirs=['pandas/src/msgpack'] + common_include, + include_dirs=['pandas/_libs/src/msgpack'] + common_include, define_macros=macros, extra_compile_args=extra_compile_args) extensions.append(packer_ext) @@ -586,20 +586,20 @@ def pxd(name): root, _ = os.path.splitext(ext.sources[0]) ext.sources[0] = root + suffix -ujson_ext = Extension('pandas.json', - depends=['pandas/src/ujson/lib/ultrajson.h', - 'pandas/src/datetime_helper.h', - 'pandas/src/numpy_helper.h'], - sources=['pandas/src/ujson/python/ujson.c', - 'pandas/src/ujson/python/objToJSON.c', - 'pandas/src/ujson/python/JSONtoObj.c', - 'pandas/src/ujson/lib/ultrajsonenc.c', - 'pandas/src/ujson/lib/ultrajsondec.c', - 'pandas/src/datetime/np_datetime.c', - 'pandas/src/datetime/np_datetime_strings.c'], - include_dirs=['pandas/src/ujson/python', - 'pandas/src/ujson/lib', - 'pandas/src/datetime'] + common_include, +ujson_ext = Extension('pandas.io.json.libjson', + depends=['pandas/_libs/src/ujson/lib/ultrajson.h', + 'pandas/_libs/src/datetime_helper.h', + 'pandas/_libs/src/numpy_helper.h'], + sources=['pandas/_libs/src/ujson/python/ujson.c', + 'pandas/_libs/src/ujson/python/objToJSON.c', + 'pandas/_libs/src/ujson/python/JSONtoObj.c', + 'pandas/_libs/src/ujson/lib/ultrajsonenc.c', + 'pandas/_libs/src/ujson/lib/ultrajsondec.c', + 'pandas/_libs/src/datetime/np_datetime.c', + 'pandas/_libs/src/datetime/np_datetime_strings.c'], + include_dirs=['pandas/_libs/src/ujson/python', + 'pandas/_libs/src/ujson/lib', + 'pandas/_libs/src/datetime'] + common_include, extra_compile_args=['-D_GNU_SOURCE'] + extra_compile_args) @@ -634,6 +634,8 @@ def pxd(name): 'pandas.io', 'pandas.io.json', 'pandas.io.sas', + 'pandas.io.msgpack', + 'pandas._libs', 'pandas.formats', 'pandas.sparse', 'pandas.stats', @@ -650,10 +652,10 @@ def pxd(name): 'pandas.tests.io.json', 'pandas.tests.io.parser', 'pandas.tests.io.sas', + 'pandas.tests.io.msgpack', 'pandas.tests.groupby', 'pandas.tests.series', 'pandas.tests.formats', - 'pandas.tests.msgpack', 'pandas.tests.scalar', 'pandas.tests.sparse', 'pandas.tests.tseries', @@ -663,7 +665,6 @@ def pxd(name): 'pandas.tools', 'pandas.tseries', 'pandas.types', - 'pandas.msgpack', 'pandas.util.clipboard' ], package_data={'pandas.tests': ['data/*.csv'], diff --git a/vb_suite/pandas_vb_common.py b/vb_suite/pandas_vb_common.py index a1326d63a112a..bd2e8a1c1d504 100644 --- a/vb_suite/pandas_vb_common.py +++ b/vb_suite/pandas_vb_common.py @@ -16,7 +16,7 @@ try: import pandas._tseries as lib except: - import pandas.lib as lib + import pandas._libs.lib as lib try: Panel = WidePanel From 9da0e0b233c79311c19dad6b151a157c4e47e109 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 7 Mar 2017 19:31:22 -0500 Subject: [PATCH 164/353] CLN: clean up Makefile & fix lib.pyx deps --- Makefile | 3 --- setup.py | 6 +++--- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index 90dcd16d955d6..194a8861715b7 100644 --- a/Makefile +++ b/Makefile @@ -9,9 +9,6 @@ clean: clean_pyc: -find . -name '*.py[co]' -exec rm {} \; -sparse: pandas/src/sparse.pyx - python setup.py build_ext --inplace - build: clean_pyc python setup.py build_ext --inplace diff --git a/setup.py b/setup.py index e257b2376060b..3e0a6b41152dc 100755 --- a/setup.py +++ b/setup.py @@ -440,7 +440,7 @@ def srcpath(name=None, suffix='.pyx', subdir='src'): return pjoin('pandas', subdir, name + suffix) if suffix == '.pyx': - lib_depends = [srcpath(f, suffix='.pyx') for f in lib_depends] + lib_depends = [srcpath(f, suffix='.pyx', subdir='_libs/src') for f in lib_depends] lib_depends.append('pandas/_libs/src/util.pxd') else: lib_depends = [] @@ -474,13 +474,13 @@ def pxd(name): ext_data = { '_libs.lib': {'pyxfile': '_libs/lib', - 'pxdfiles': [], - 'depends': lib_depends}, + 'depends': lib_depends + tseries_depends}, '_libs.hashtable': {'pyxfile': '_libs/hashtable', 'pxdfiles': ['_libs/hashtable'], 'depends': (['pandas/_libs/src/klib/khash_python.h'] + _pxi_dep['hashtable'])}, '_libs.tslib': {'pyxfile': '_libs/tslib', + 'pxdfiles': ['_libs/src/util', '_libs/lib'], 'depends': tseries_depends, 'sources': ['pandas/_libs/src/datetime/np_datetime.c', 'pandas/_libs/src/datetime/np_datetime_strings.c', From 8daf677b04b6797e7db894b85da7f6e5a4d356c5 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 8 Mar 2017 08:12:10 -0500 Subject: [PATCH 165/353] DOC: fix appeveyor badge to point to pandas-dev account --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 8595043cf68c3..e05f1405419fc 100644 --- a/README.md +++ b/README.md @@ -41,8 +41,8 @@ From c9d4e0b01f8b47e7c04fb132081dce607c05757c Mon Sep 17 00:00:00 2001 From: manu Date: Wed, 8 Mar 2017 08:24:38 -0500 Subject: [PATCH 166/353] BUG: make Series.sort_values(ascending=[False]) behave as ascending=False (#15604) closes #15604 Author: manu Closes #15607 from MLopez-Ibanez/series-ascending and squashes the following commits: 6678574 [manu] BUG: make Series.sort_values(ascending=[False]) behave as ascending=False (#15604) --- doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/core/series.py | 10 ++++++++++ pandas/tests/series/test_sorting.py | 19 +++++++++++++++++++ 3 files changed, 30 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 8f2033de6c77f..a7169640759e3 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -228,7 +228,7 @@ Other enhancements - ``pd.TimedeltaIndex`` now has a custom datetick formatter specifically designed for nanosecond level precision (:issue:`8711`) - ``pd.types.concat.union_categoricals`` gained the ``ignore_ordered`` argument to allow ignoring the ordered attribute of unioned categoricals (:issue:`13410`). See the :ref:`categorical union docs ` for more information. - ``pandas.io.json.json_normalize()`` with an empty ``list`` will return an empty ``DataFrame`` (:issue:`15534`) - +- ``Series.sort_values`` accepts a one element list of bool for consistency with the behavior of ``DataFrame.sort_values`` (:issue:`15604`) .. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations diff --git a/pandas/core/series.py b/pandas/core/series.py index 83036ffef0bed..f23e90effdabf 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -14,6 +14,7 @@ import numpy.ma as ma from pandas.types.common import (_coerce_to_dtype, is_categorical_dtype, + is_bool, is_integer, is_integer_dtype, is_float_dtype, is_extension_type, is_datetimetz, @@ -1719,6 +1720,15 @@ def _try_kind_sort(arr): argsorted = _try_kind_sort(arr[good]) + if is_list_like(ascending): + if len(ascending) != 1: + raise ValueError('Length of ascending (%d) must be 1 ' + 'for Series' % (len(ascending))) + ascending = ascending[0] + + if not is_bool(ascending): + raise ValueError('ascending must be boolean') + if not ascending: argsorted = argsorted[::-1] diff --git a/pandas/tests/series/test_sorting.py b/pandas/tests/series/test_sorting.py index db506f12a2293..590a530a847bd 100644 --- a/pandas/tests/series/test_sorting.py +++ b/pandas/tests/series/test_sorting.py @@ -64,6 +64,25 @@ def test_sort_values(self): ordered = ts.sort_values(ascending=False, na_position='first') assert_almost_equal(expected, ordered.valid().values) + # ascending=[False] should behave the same as ascending=False + ordered = ts.sort_values(ascending=[False]) + expected = ts.sort_values(ascending=False) + assert_series_equal(expected, ordered) + ordered = ts.sort_values(ascending=[False], na_position='first') + expected = ts.sort_values(ascending=False, na_position='first') + assert_series_equal(expected, ordered) + + self.assertRaises(ValueError, + lambda: ts.sort_values(ascending=None)) + self.assertRaises(ValueError, + lambda: ts.sort_values(ascending=[])) + self.assertRaises(ValueError, + lambda: ts.sort_values(ascending=[1, 2, 3])) + self.assertRaises(ValueError, + lambda: ts.sort_values(ascending=[False, False])) + self.assertRaises(ValueError, + lambda: ts.sort_values(ascending='foobar')) + # inplace=True ts = self.ts.copy() ts.sort_values(ascending=False, inplace=True) From 11c947997e0f7f91a4170ad7ddcc90124b7f5f2a Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 8 Mar 2017 08:30:59 -0500 Subject: [PATCH 167/353] DOC: remove gbq references / clean some whatsnew --- doc/source/whatsnew/v0.20.0.txt | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index a7169640759e3..92daf29efe71f 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -12,7 +12,7 @@ Highlights include: - Building pandas for development now requires ``cython >= 0.23`` (:issue:`14831`) - The ``.ix`` indexer has been deprecated, see :ref:`here ` - Switched the test framework to `pytest`_ (:issue:`13097`) -- A new orient for JSON serialization, ``orient='table'``, that uses the Table Schema spec, see :ref: `here ` +- A new orient for JSON serialization, ``orient='table'``, that uses the Table Schema spec, see :ref:`here ` .. _pytest: http://doc.pytest.org/en/latest/ @@ -27,11 +27,6 @@ Check the :ref:`API Changes ` and :ref:`deprecations New features ~~~~~~~~~~~~ -- Integration with the ``feather-format``, including a new top-level ``pd.read_feather()`` and ``DataFrame.to_feather()`` method, see :ref:`here `. -- ``Series.str.replace()`` now accepts a callable, as replacement, which is passed to ``re.sub`` (:issue:`15055`) -- ``Series.str.replace()`` now accepts a compiled regular expression as a pattern (:issue:`15446`) - - .. _whatsnew_0200.enhancements.dataio_dtype: @@ -193,6 +188,11 @@ You must enable this by setting the ``display.html.table_schema`` option to True Other enhancements ^^^^^^^^^^^^^^^^^^ +- Integration with the ``feather-format``, including a new top-level ``pd.read_feather()`` and ``DataFrame.to_feather()`` method, see :ref:`here `. +- ``Series.str.replace()`` now accepts a callable, as replacement, which is passed to ``re.sub`` (:issue:`15055`) +- ``Series.str.replace()`` now accepts a compiled regular expression as a pattern (:issue:`15446`) + + - ``Series.sort_index`` accepts parameters ``kind`` and ``na_position`` (:issue:`13589`, :issue:`14444`) - ``DataFrame`` has gained a ``nunique()`` method to count the distinct values over an axis (:issue:`14336`). @@ -201,7 +201,6 @@ Other enhancements - ``pd.read_excel`` now preserves sheet order when using ``sheetname=None`` (:issue:`9930`) - Multiple offset aliases with decimal points are now supported (e.g. '0.5min' is parsed as '30s') (:issue:`8419`) - ``.isnull()`` and ``.notnull()`` have been added to ``Index`` object to make them more consistent with the ``Series`` API (:issue:`15300`) -- ``pd.read_gbq`` method now allows query configuration preferences (:issue:`14742`) - New ``UnsortedIndexError`` (subclass of ``KeyError``) raised when indexing/slicing into an unsorted MultiIndex (:issue:`11897`). This allows differentiation between errors due to lack @@ -228,7 +227,7 @@ Other enhancements - ``pd.TimedeltaIndex`` now has a custom datetick formatter specifically designed for nanosecond level precision (:issue:`8711`) - ``pd.types.concat.union_categoricals`` gained the ``ignore_ordered`` argument to allow ignoring the ordered attribute of unioned categoricals (:issue:`13410`). See the :ref:`categorical union docs ` for more information. - ``pandas.io.json.json_normalize()`` with an empty ``list`` will return an empty ``DataFrame`` (:issue:`15534`) -- ``Series.sort_values`` accepts a one element list of bool for consistency with the behavior of ``DataFrame.sort_values`` (:issue:`15604`) + .. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations @@ -444,7 +443,7 @@ Pandas Google BigQuery support has moved ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ pandas has split off Google BigQuery support into a separate package ``pandas-gbq``. You can ``pip install pandas-gbq`` to get it. -The functionality of ``pd.read_gbq()`` and ``.to_gbq()`` remains the same with the currently released version of ``pandas-gbq=0.1.2``. (:issue:`15347`) +The functionality of ``pd.read_gbq()`` and ``.to_gbq()`` remains the same with the currently released version of ``pandas-gbq=0.1.3``. (:issue:`15347`) Documentation is now hosted `here `__ .. _whatsnew_0200.api_breaking.memory_usage: @@ -611,9 +610,9 @@ Other API Changes - ``inplace`` arguments now require a boolean value, else a ``ValueError`` is thrown (:issue:`14189`) - ``pandas.api.types.is_datetime64_ns_dtype`` will now report ``True`` on a tz-aware dtype, similar to ``pandas.api.types.is_datetime64_any_dtype`` - ``DataFrame.asof()`` will return a null filled ``Series`` instead the scalar ``NaN`` if a match is not found (:issue:`15118`) -- The :func:`pd.read_gbq` method now stores ``INTEGER`` columns as ``dtype=object`` if they contain ``NULL`` values. Otherwise they are stored as ``int64``. This prevents precision lost for integers greather than 2**53. Furthermore ``FLOAT`` columns with values above 10**4 are no longer casted to ``int64`` which also caused precision loss (:issue:`14064`, :issue:`14305`). - Reorganization of timeseries development tests (:issue:`14854`) - Specific support for ``copy.copy()`` and ``copy.deepcopy()`` functions on NDFrame objects (:issue:`15444`) +- ``Series.sort_values()`` accepts a one element list of bool for consistency with the behavior of ``DataFrame.sort_values()`` (:issue:`15604`) .. _whatsnew_0200.deprecations: @@ -651,7 +650,7 @@ Removal of prior version deprecations/changes - ``pandas.stats.fama_macbeth``, ``pandas.stats.ols``, ``pandas.stats.plm`` and ``pandas.stats.var``, as well as the top-level ``pandas.fama_macbeth`` and ``pandas.ols`` routines are removed. Similar functionaility can be found in the `statsmodels `__ package. (:issue:`11898`) - The ``TimeSeries`` and ``SparseTimeSeries`` classes, aliases of ``Series`` and ``SparseSeries``, are removed (:issue:`10890`, :issue:`15098`). -- ``Series.is_time_series`` is dropped in favor of ``Series.index.is_all_dates`` (:issue:``) +- ``Series.is_time_series`` is dropped in favor of ``Series.index.is_all_dates`` (:issue:`15098`) - The deprecated ``irow``, ``icol``, ``iget`` and ``iget_value`` methods are removed in favor of ``iloc`` and ``iat`` as explained :ref:`here ` (:issue:`10711`). @@ -681,7 +680,7 @@ Bug Fixes - Bug in ``Index`` power operations with reversed operands (:issue:`14973`) - Bug in ``TimedeltaIndex`` addition where overflow was being allowed without error (:issue:`14816`) - Bug in ``TimedeltaIndex`` raising a ``ValueError`` when boolean indexing with ``loc`` (:issue:`14946`) -- Bug in ``DatetimeIndex.round()`` and ``Timestamp.round()`` floating point accuracy when rounding by milliseconds or less (:issue: `14440`, :issue:`15578`) +- Bug in ``DatetimeIndex.round()`` and ``Timestamp.round()`` floating point accuracy when rounding by milliseconds or less (:issue:`14440`, :issue:`15578`) - Bug in ``astype()`` where ``inf`` values were incorrectly converted to integers. Now raises error now with ``astype()`` for Series and DataFrames (:issue:`14265`) - Bug in ``DataFrame(..).apply(to_numeric)`` when values are of type decimal.Decimal. (:issue:`14827`) - Bug in ``describe()`` when passing a numpy array which does not contain the median to the ``percentiles`` keyword argument (:issue:`14908`) From d32acaa7fbe95a96a7118a32324beea1e2e8ae32 Mon Sep 17 00:00:00 2001 From: Luca Scarabello Date: Wed, 8 Mar 2017 08:38:43 -0500 Subject: [PATCH 168/353] BUG: pd.cut with bins=1 and input all 0s The special case of running pd.cut() qith bins=1 an input containing all 0s raises a ValueError closes #15428 closes #15431 Author: Luca Scarabello Author: Luca Closes #15437 from luca-s/issue_15428 and squashes the following commits: 1248987 [Luca] rebased on master def84ba [Luca] Yet another implementation attempt 692503a [Luca Scarabello] Improved solution: using same approach as pd.cut b7d92dc [Luca] Added 'allow' duplicates option to _bins_to_cuts f56a27f [Luca Scarabello] Issue #15431 55806cf [Luca Scarabello] BUG: pd.cut with bins=1 and input all 0s --- doc/source/whatsnew/v0.20.0.txt | 4 +- pandas/tests/tools/test_tile.py | 81 +++++++++++++++++++++++++++++++-- pandas/tools/tile.py | 6 +-- 3 files changed, 83 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 92daf29efe71f..bf778f6065010 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -698,8 +698,8 @@ Bug Fixes - Bug in ``DataFrame.loc`` with indexing a ``MultiIndex`` with a ``Series`` indexer (:issue:`14730`, :issue:`15424`) - Bug in ``DataFrame.loc`` with indexing a ``MultiIndex`` with a numpy array (:issue:`15434`) - Bug in ``Rolling.quantile`` function that caused a segmentation fault when called with a quantile value outside of the range [0, 1] (:issue:`15463`) - - +- Bug in ``pd.cut()`` with a single bin on an all 0s array (:issue:`15428`) +- Bug in ``pd.qcut()`` with a single quantile and an array with identical values (:issue:`15431`) - Bug in ``SparseSeries.reindex`` on single level with list of length 1 (:issue:`15447`) diff --git a/pandas/tests/tools/test_tile.py b/pandas/tests/tools/test_tile.py index de44eadc15751..11b242bc06e15 100644 --- a/pandas/tests/tools/test_tile.py +++ b/pandas/tests/tools/test_tile.py @@ -3,7 +3,7 @@ import numpy as np from pandas.compat import zip -from pandas import Series, Index +from pandas import Series, Index, Categorical import pandas.util.testing as tm from pandas.util.testing import assertRaisesRegexp import pandas.core.common as com @@ -239,7 +239,6 @@ def test_qcut_binning_issues(self): self.assertTrue(ep <= sn) def test_cut_return_categorical(self): - from pandas import Categorical s = Series([0, 1, 2, 3, 4, 5, 6, 7, 8]) res = cut(s, 3) exp = Series(Categorical.from_codes([0, 0, 0, 1, 1, 1, 2, 2, 2], @@ -249,7 +248,6 @@ def test_cut_return_categorical(self): tm.assert_series_equal(res, exp) def test_qcut_return_categorical(self): - from pandas import Categorical s = Series([0, 1, 2, 3, 4, 5, 6, 7, 8]) res = qcut(s, [0, 0.333, 0.666, 1]) exp = Series(Categorical.from_codes([0, 0, 0, 1, 1, 1, 2, 2, 2], @@ -285,6 +283,60 @@ def test_qcut_duplicates_bin(self): # invalid self.assertRaises(ValueError, qcut, values, 3, duplicates='foo') + def test_single_quantile(self): + # issue 15431 + expected = Series([0, 0]) + + s = Series([9., 9.]) + result = qcut(s, 1, labels=False) + tm.assert_series_equal(result, expected) + result = qcut(s, 1) + exp_lab = Series(Categorical.from_codes([0, 0], ["[9, 9]"], + ordered=True)) + tm.assert_series_equal(result, exp_lab) + + s = Series([-9., -9.]) + result = qcut(s, 1, labels=False) + tm.assert_series_equal(result, expected) + result = qcut(s, 1) + exp_lab = Series(Categorical.from_codes([0, 0], ["[-9, -9]"], + ordered=True)) + tm.assert_series_equal(result, exp_lab) + + s = Series([0., 0.]) + result = qcut(s, 1, labels=False) + tm.assert_series_equal(result, expected) + result = qcut(s, 1) + exp_lab = Series(Categorical.from_codes([0, 0], ["[0, 0]"], + ordered=True)) + tm.assert_series_equal(result, exp_lab) + + expected = Series([0]) + + s = Series([9]) + result = qcut(s, 1, labels=False) + tm.assert_series_equal(result, expected) + result = qcut(s, 1) + exp_lab = Series(Categorical.from_codes([0], ["[9, 9]"], + ordered=True)) + tm.assert_series_equal(result, exp_lab) + + s = Series([-9]) + result = qcut(s, 1, labels=False) + tm.assert_series_equal(result, expected) + result = qcut(s, 1) + exp_lab = Series(Categorical.from_codes([0], ["[-9, -9]"], + ordered=True)) + tm.assert_series_equal(result, exp_lab) + + s = Series([0]) + result = qcut(s, 1, labels=False) + tm.assert_series_equal(result, expected) + result = qcut(s, 1) + exp_lab = Series(Categorical.from_codes([0], ["[0, 0]"], + ordered=True)) + tm.assert_series_equal(result, exp_lab) + def test_single_bin(self): # issue 14652 expected = Series([0, 0]) @@ -297,6 +349,29 @@ def test_single_bin(self): result = cut(s, 1, labels=False) tm.assert_series_equal(result, expected) + expected = Series([0]) + + s = Series([9]) + result = cut(s, 1, labels=False) + tm.assert_series_equal(result, expected) + + s = Series([-9]) + result = cut(s, 1, labels=False) + tm.assert_series_equal(result, expected) + + # issue 15428 + expected = Series([0, 0]) + + s = Series([0., 0.]) + result = cut(s, 1, labels=False) + tm.assert_series_equal(result, expected) + + expected = Series([0]) + + s = Series([0]) + result = cut(s, 1, labels=False) + tm.assert_series_equal(result, expected) + def test_datetime_cut(self): # GH 14714 # testing for time data to be present as series diff --git a/pandas/tools/tile.py b/pandas/tools/tile.py index 9b21e542f153c..ccd8c2478e8a5 100644 --- a/pandas/tools/tile.py +++ b/pandas/tools/tile.py @@ -104,8 +104,8 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3, mn, mx = [mi + 0.0 for mi in rng] if mn == mx: # adjust end points before binning - mn -= .001 * abs(mn) - mx += .001 * abs(mx) + mn -= .001 * abs(mn) if mn != 0 else .001 + mx += .001 * abs(mx) if mx != 0 else .001 bins = np.linspace(mn, mx, bins + 1, endpoint=True) else: # adjust end points after binning bins = np.linspace(mn, mx, bins + 1, endpoint=True) @@ -206,7 +206,7 @@ def _bins_to_cuts(x, bins, right=True, labels=None, "valid options are: raise, drop") unique_bins = algos.unique(bins) - if len(unique_bins) < len(bins): + if len(unique_bins) < len(bins) and len(bins) != 2: if duplicates == 'raise': raise ValueError("Bin edges must be unique: {}.\nYou " "can drop duplicate edges by setting " From b508a0486d0091c550964718d22b0d4292272587 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 8 Mar 2017 09:52:29 -0500 Subject: [PATCH 169/353] BLD: fix linting wrt to #15537, changes in location of pandas/src (#15614) --- ci/lint.sh | 8 ++++---- pandas/_libs/src/datetime/np_datetime.h | 6 +++--- pandas/_libs/src/datetime/np_datetime_strings.h | 6 +++--- pandas/_libs/src/datetime_helper.h | 6 +++--- pandas/_libs/src/helper.h | 6 +++--- pandas/_libs/src/numpy_helper.h | 6 +++--- pandas/_libs/src/parse_helper.h | 6 +++--- pandas/_libs/src/parser/io.h | 6 +++--- pandas/_libs/src/parser/tokenizer.h | 6 +++--- pandas/_libs/src/period_helper.h | 6 +++--- pandas/_libs/src/skiplist.h | 6 +++--- pandas/_libs/src/ujson/lib/ultrajson.h | 6 +++--- pandas/_libs/src/ujson/python/py_defines.h | 8 ++++---- pandas/_libs/src/ujson/python/version.h | 8 ++++---- test.bat | 2 +- test_fast.sh | 2 +- 16 files changed, 47 insertions(+), 47 deletions(-) diff --git a/ci/lint.sh b/ci/lint.sh index 2ffc68e5eb139..ed3af2568811c 100755 --- a/ci/lint.sh +++ b/ci/lint.sh @@ -8,9 +8,9 @@ RET=0 if [ "$LINT" ]; then - # pandas/src is C code, so no need to search there. + # pandas/_libs/src is C code, so no need to search there. echo "Linting *.py" - flake8 pandas --filename=*.py --exclude pandas/src + flake8 pandas --filename=*.py --exclude pandas/_libs/src if [ $? -ne "0" ]; then RET=1 fi @@ -46,8 +46,8 @@ if [ "$LINT" ]; then echo "Linting *.c and *.h" for path in '*.h' 'period_helper.c' 'datetime' 'parser' 'ujson' do - echo "linting -> pandas/src/$path" - cpplint --quiet --extensions=c,h --headers=h --filter=-readability/casting,-runtime/int,-build/include_subdir --recursive pandas/src/$path + echo "linting -> pandas/_libs/src/$path" + cpplint --quiet --extensions=c,h --headers=h --filter=-readability/casting,-runtime/int,-build/include_subdir --recursive pandas/_libs/src/$path if [ $? -ne "0" ]; then RET=1 fi diff --git a/pandas/_libs/src/datetime/np_datetime.h b/pandas/_libs/src/datetime/np_datetime.h index 3445fc3e48376..97ec5782b625b 100644 --- a/pandas/_libs/src/datetime/np_datetime.h +++ b/pandas/_libs/src/datetime/np_datetime.h @@ -14,8 +14,8 @@ This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt */ -#ifndef PANDAS_SRC_DATETIME_NP_DATETIME_H_ -#define PANDAS_SRC_DATETIME_NP_DATETIME_H_ +#ifndef PANDAS__LIBS_SRC_DATETIME_NP_DATETIME_H_ +#define PANDAS__LIBS_SRC_DATETIME_NP_DATETIME_H_ #include @@ -124,4 +124,4 @@ convert_datetime_to_datetimestruct(pandas_datetime_metadata *meta, PANDAS_DATETIMEUNIT get_datetime64_unit(PyObject *obj); -#endif // PANDAS_SRC_DATETIME_NP_DATETIME_H_ +#endif // PANDAS__LIBS_SRC_DATETIME_NP_DATETIME_H_ diff --git a/pandas/_libs/src/datetime/np_datetime_strings.h b/pandas/_libs/src/datetime/np_datetime_strings.h index 1114ec5eae064..833c1869c1664 100644 --- a/pandas/_libs/src/datetime/np_datetime_strings.h +++ b/pandas/_libs/src/datetime/np_datetime_strings.h @@ -19,8 +19,8 @@ This file implements string parsing and creation for NumPy datetime. */ -#ifndef PANDAS_SRC_DATETIME_NP_DATETIME_STRINGS_H_ -#define PANDAS_SRC_DATETIME_NP_DATETIME_STRINGS_H_ +#ifndef PANDAS__LIBS_SRC_DATETIME_NP_DATETIME_STRINGS_H_ +#define PANDAS__LIBS_SRC_DATETIME_NP_DATETIME_STRINGS_H_ /* * Parses (almost) standard ISO 8601 date strings. The differences are: @@ -103,4 +103,4 @@ make_iso_8601_datetime(pandas_datetimestruct *dts, char *outstr, int outlen, int local, PANDAS_DATETIMEUNIT base, int tzoffset, NPY_CASTING casting); -#endif // PANDAS_SRC_DATETIME_NP_DATETIME_STRINGS_H_ +#endif // PANDAS__LIBS_SRC_DATETIME_NP_DATETIME_STRINGS_H_ diff --git a/pandas/_libs/src/datetime_helper.h b/pandas/_libs/src/datetime_helper.h index bef4b4266c824..8023285f85b9b 100644 --- a/pandas/_libs/src/datetime_helper.h +++ b/pandas/_libs/src/datetime_helper.h @@ -7,8 +7,8 @@ Distributed under the terms of the BSD Simplified License. The full license is in the LICENSE file, distributed with this software. */ -#ifndef PANDAS_SRC_DATETIME_HELPER_H_ -#define PANDAS_SRC_DATETIME_HELPER_H_ +#ifndef PANDAS__LIBS_SRC_DATETIME_HELPER_H_ +#define PANDAS__LIBS_SRC_DATETIME_HELPER_H_ #include #include "datetime.h" @@ -33,4 +33,4 @@ npy_float64 total_seconds(PyObject *td) { return (microseconds + (seconds + days_in_seconds) * 1000000.0) / 1000000.0; } -#endif // PANDAS_SRC_DATETIME_HELPER_H_ +#endif // PANDAS__LIBS_SRC_DATETIME_HELPER_H_ diff --git a/pandas/_libs/src/helper.h b/pandas/_libs/src/helper.h index 39bcf27e074df..26b4d033b963b 100644 --- a/pandas/_libs/src/helper.h +++ b/pandas/_libs/src/helper.h @@ -7,8 +7,8 @@ Distributed under the terms of the BSD Simplified License. The full license is in the LICENSE file, distributed with this software. */ -#ifndef PANDAS_SRC_HELPER_H_ -#define PANDAS_SRC_HELPER_H_ +#ifndef PANDAS__LIBS_SRC_HELPER_H_ +#define PANDAS__LIBS_SRC_HELPER_H_ #ifndef PANDAS_INLINE #if defined(__GNUC__) @@ -22,4 +22,4 @@ The full license is in the LICENSE file, distributed with this software. #endif #endif -#endif // PANDAS_SRC_HELPER_H_ +#endif // PANDAS__LIBS_SRC_HELPER_H_ diff --git a/pandas/_libs/src/numpy_helper.h b/pandas/_libs/src/numpy_helper.h index 809edb2e99fa2..5f4db5b2f55d3 100644 --- a/pandas/_libs/src/numpy_helper.h +++ b/pandas/_libs/src/numpy_helper.h @@ -7,8 +7,8 @@ Distributed under the terms of the BSD Simplified License. The full license is in the LICENSE file, distributed with this software. */ -#ifndef PANDAS_SRC_NUMPY_HELPER_H_ -#define PANDAS_SRC_NUMPY_HELPER_H_ +#ifndef PANDAS__LIBS_SRC_NUMPY_HELPER_H_ +#define PANDAS__LIBS_SRC_NUMPY_HELPER_H_ #include "Python.h" #include "helper.h" @@ -159,4 +159,4 @@ PANDAS_INLINE PyObject* unbox_if_zerodim(PyObject* arr) { } } -#endif // PANDAS_SRC_NUMPY_HELPER_H_ +#endif // PANDAS__LIBS_SRC_NUMPY_HELPER_H_ diff --git a/pandas/_libs/src/parse_helper.h b/pandas/_libs/src/parse_helper.h index 5d2a0dad3da17..6dd8b66eab33d 100644 --- a/pandas/_libs/src/parse_helper.h +++ b/pandas/_libs/src/parse_helper.h @@ -7,8 +7,8 @@ Distributed under the terms of the BSD Simplified License. The full license is in the LICENSE file, distributed with this software. */ -#ifndef PANDAS_SRC_PARSE_HELPER_H_ -#define PANDAS_SRC_PARSE_HELPER_H_ +#ifndef PANDAS__LIBS_SRC_PARSE_HELPER_H_ +#define PANDAS__LIBS_SRC_PARSE_HELPER_H_ #include #include @@ -270,4 +270,4 @@ static double xstrtod(const char *str, char **endptr, char decimal, char sci, return number; } -#endif // PANDAS_SRC_PARSE_HELPER_H_ +#endif // PANDAS__LIBS_SRC_PARSE_HELPER_H_ diff --git a/pandas/_libs/src/parser/io.h b/pandas/_libs/src/parser/io.h index 5a0c2b2b5e4a4..77121e9a169c1 100644 --- a/pandas/_libs/src/parser/io.h +++ b/pandas/_libs/src/parser/io.h @@ -7,8 +7,8 @@ Distributed under the terms of the BSD Simplified License. The full license is in the LICENSE file, distributed with this software. */ -#ifndef PANDAS_SRC_PARSER_IO_H_ -#define PANDAS_SRC_PARSER_IO_H_ +#ifndef PANDAS__LIBS_SRC_PARSER_IO_H_ +#define PANDAS__LIBS_SRC_PARSER_IO_H_ #include "Python.h" #include "tokenizer.h" @@ -83,4 +83,4 @@ void *buffer_file_bytes(void *source, size_t nbytes, size_t *bytes_read, void *buffer_rd_bytes(void *source, size_t nbytes, size_t *bytes_read, int *status); -#endif // PANDAS_SRC_PARSER_IO_H_ +#endif // PANDAS__LIBS_SRC_PARSER_IO_H_ diff --git a/pandas/_libs/src/parser/tokenizer.h b/pandas/_libs/src/parser/tokenizer.h index 6c1bc630ab547..9853b5149bee3 100644 --- a/pandas/_libs/src/parser/tokenizer.h +++ b/pandas/_libs/src/parser/tokenizer.h @@ -9,8 +9,8 @@ See LICENSE for the license */ -#ifndef PANDAS_SRC_PARSER_TOKENIZER_H_ -#define PANDAS_SRC_PARSER_TOKENIZER_H_ +#ifndef PANDAS__LIBS_SRC_PARSER_TOKENIZER_H_ +#define PANDAS__LIBS_SRC_PARSER_TOKENIZER_H_ #include #include @@ -276,4 +276,4 @@ double round_trip(const char *p, char **q, char decimal, char sci, char tsep, int skip_trailing); int to_boolean(const char *item, uint8_t *val); -#endif // PANDAS_SRC_PARSER_TOKENIZER_H_ +#endif // PANDAS__LIBS_SRC_PARSER_TOKENIZER_H_ diff --git a/pandas/_libs/src/period_helper.h b/pandas/_libs/src/period_helper.h index 601717692ff6d..45afc074cab72 100644 --- a/pandas/_libs/src/period_helper.h +++ b/pandas/_libs/src/period_helper.h @@ -11,8 +11,8 @@ Cython to pandas. This primarily concerns interval representation and frequency conversion routines. */ -#ifndef PANDAS_SRC_PERIOD_HELPER_H_ -#define PANDAS_SRC_PERIOD_HELPER_H_ +#ifndef PANDAS__LIBS_SRC_PERIOD_HELPER_H_ +#define PANDAS__LIBS_SRC_PERIOD_HELPER_H_ #include #include "headers/stdint.h" @@ -188,4 +188,4 @@ int get_yq(npy_int64 ordinal, int freq, int *quarter, int *year); void initialize_daytime_conversion_factor_matrix(void); -#endif // PANDAS_SRC_PERIOD_HELPER_H_ +#endif // PANDAS__LIBS_SRC_PERIOD_HELPER_H_ diff --git a/pandas/_libs/src/skiplist.h b/pandas/_libs/src/skiplist.h index 013516a49fa2f..f9527e72f577e 100644 --- a/pandas/_libs/src/skiplist.h +++ b/pandas/_libs/src/skiplist.h @@ -13,8 +13,8 @@ Port of Wes McKinney's Cython version of Raymond Hettinger's original pure Python recipe (http://rhettinger.wordpress.com/2010/02/06/lost-knowledge/) */ -#ifndef PANDAS_SRC_SKIPLIST_H_ -#define PANDAS_SRC_SKIPLIST_H_ +#ifndef PANDAS__LIBS_SRC_SKIPLIST_H_ +#define PANDAS__LIBS_SRC_SKIPLIST_H_ #include #include @@ -287,4 +287,4 @@ PANDAS_INLINE int skiplist_remove(skiplist_t *skp, double value) { return 1; } -#endif // PANDAS_SRC_SKIPLIST_H_ +#endif // PANDAS__LIBS_SRC_SKIPLIST_H_ diff --git a/pandas/_libs/src/ujson/lib/ultrajson.h b/pandas/_libs/src/ujson/lib/ultrajson.h index 3bfb4b26c0095..d0588348baa44 100644 --- a/pandas/_libs/src/ujson/lib/ultrajson.h +++ b/pandas/_libs/src/ujson/lib/ultrajson.h @@ -49,8 +49,8 @@ tree doesn't have cyclic references. */ -#ifndef PANDAS_SRC_UJSON_LIB_ULTRAJSON_H_ -#define PANDAS_SRC_UJSON_LIB_ULTRAJSON_H_ +#ifndef PANDAS__LIBS_SRC_UJSON_LIB_ULTRAJSON_H_ +#define PANDAS__LIBS_SRC_UJSON_LIB_ULTRAJSON_H_ #include #include @@ -307,4 +307,4 @@ EXPORTFUNCTION JSOBJ JSON_DecodeObject(JSONObjectDecoder *dec, const char *buffer, size_t cbBuffer); EXPORTFUNCTION void encode(JSOBJ, JSONObjectEncoder *, const char *, size_t); -#endif // PANDAS_SRC_UJSON_LIB_ULTRAJSON_H_ +#endif // PANDAS__LIBS_SRC_UJSON_LIB_ULTRAJSON_H_ diff --git a/pandas/_libs/src/ujson/python/py_defines.h b/pandas/_libs/src/ujson/python/py_defines.h index b32285766c86a..82385fdd48a3b 100644 --- a/pandas/_libs/src/ujson/python/py_defines.h +++ b/pandas/_libs/src/ujson/python/py_defines.h @@ -16,7 +16,7 @@ modification, are permitted provided that the following conditions are met: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE LIABLE +DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND @@ -35,8 +35,8 @@ Numeric decoder derived from from TCL library * Copyright (c) 1994 Sun Microsystems, Inc. */ -#ifndef PANDAS_SRC_UJSON_PYTHON_PY_DEFINES_H_ -#define PANDAS_SRC_UJSON_PYTHON_PY_DEFINES_H_ +#ifndef PANDAS__LIBS_SRC_UJSON_PYTHON_PY_DEFINES_H_ +#define PANDAS__LIBS_SRC_UJSON_PYTHON_PY_DEFINES_H_ #include @@ -55,4 +55,4 @@ Numeric decoder derived from from TCL library #endif -#endif // PANDAS_SRC_UJSON_PYTHON_PY_DEFINES_H_ +#endif // PANDAS__LIBS_SRC_UJSON_PYTHON_PY_DEFINES_H_ diff --git a/pandas/_libs/src/ujson/python/version.h b/pandas/_libs/src/ujson/python/version.h index c074ef572101d..ef6d28bf3a1f7 100644 --- a/pandas/_libs/src/ujson/python/version.h +++ b/pandas/_libs/src/ujson/python/version.h @@ -16,7 +16,7 @@ modification, are permitted provided that the following conditions are met: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE LIABLE +DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND @@ -35,9 +35,9 @@ Numeric decoder derived from from TCL library * Copyright (c) 1994 Sun Microsystems, Inc. */ -#ifndef PANDAS_SRC_UJSON_PYTHON_VERSION_H_ -#define PANDAS_SRC_UJSON_PYTHON_VERSION_H_ +#ifndef PANDAS__LIBS_SRC_UJSON_PYTHON_VERSION_H_ +#define PANDAS__LIBS_SRC_UJSON_PYTHON_VERSION_H_ #define UJSON_VERSION "1.33" -#endif // PANDAS_SRC_UJSON_PYTHON_VERSION_H_ +#endif // PANDAS__LIBS_SRC_UJSON_PYTHON_VERSION_H_ diff --git a/test.bat b/test.bat index 2c5f25c24a637..080a1cc163a05 100644 --- a/test.bat +++ b/test.bat @@ -1,3 +1,3 @@ :: test on windows -pytest --skip-slow --skip-network pandas +pytest --skip-slow --skip-network pandas %* diff --git a/test_fast.sh b/test_fast.sh index 30ac7f84cbe8b..9b984156a796c 100755 --- a/test_fast.sh +++ b/test_fast.sh @@ -5,4 +5,4 @@ # https://github.com/pytest-dev/pytest/issues/1075 export PYTHONHASHSEED=$(python -c 'import random; print(random.randint(1, 4294967295))') -pytest pandas --skip-slow --skip-network -m "not single" -n 4 +pytest pandas --skip-slow --skip-network -m "not single" -n 4 "$@" From 3d699884e26120618bf0bb8869bc07f1e51a2935 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 8 Mar 2017 10:07:24 -0500 Subject: [PATCH 170/353] DOC: more whatsnew fixing --- doc/source/whatsnew/v0.20.0.txt | 84 ++++++++++++++++----------------- 1 file changed, 41 insertions(+), 43 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index bf778f6065010..34358a193b360 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -289,7 +289,7 @@ Possible incompat for HDF5 formats for pandas < 0.13.0 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ``pd.TimeSeries`` was deprecated officially in 0.17.0, though has only been an alias since 0.13.0. It has -been dropped in favor of ``pd.Series``. (:issue:``15098). +been dropped in favor of ``pd.Series``. (:issue:`15098`). This *may* cause HDF5 files that were created in prior versions to become unreadable if ``pd.TimeSeries`` was used. This is most likely to be for pandas < 0.13.0. If you find yourself in this situation. @@ -328,68 +328,66 @@ then write them out again after applying the procedure below. Map on Index types now return other Index types ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -- ``map`` on an ``Index`` now returns an ``Index``, not a numpy array (:issue:`12766`) +``map`` on an ``Index`` now returns an ``Index``, not a numpy array (:issue:`12766`) - .. ipython:: python - - idx = Index([1, 2]) - idx - mi = MultiIndex.from_tuples([(1, 2), (2, 4)]) - mi - - Previous Behavior: +.. ipython:: python - .. code-block:: ipython + idx = Index([1, 2]) + idx + mi = MultiIndex.from_tuples([(1, 2), (2, 4)]) + mi - In [5]: idx.map(lambda x: x * 2) - Out[5]: array([2, 4]) +Previous Behavior: - In [6]: idx.map(lambda x: (x, x * 2)) - Out[6]: array([(1, 2), (2, 4)], dtype=object) +.. code-block:: ipython - In [7]: mi.map(lambda x: x) - Out[7]: array([(1, 2), (2, 4)], dtype=object) + In [5]: idx.map(lambda x: x * 2) + Out[5]: array([2, 4]) - In [8]: mi.map(lambda x: x[0]) - Out[8]: array([1, 2]) + In [6]: idx.map(lambda x: (x, x * 2)) + Out[6]: array([(1, 2), (2, 4)], dtype=object) - New Behavior: + In [7]: mi.map(lambda x: x) + Out[7]: array([(1, 2), (2, 4)], dtype=object) - .. ipython:: python + In [8]: mi.map(lambda x: x[0]) + Out[8]: array([1, 2]) - idx.map(lambda x: x * 2) +New Behavior: - idx.map(lambda x: (x, x * 2)) +.. ipython:: python - mi.map(lambda x: x) + idx.map(lambda x: x * 2) + idx.map(lambda x: (x, x * 2)) - mi.map(lambda x: x[0]) + mi.map(lambda x: x) + mi.map(lambda x: x[0]) -- ``map`` on a ``Series`` with ``datetime64`` values may return ``int64`` dtypes rather than ``int32`` - .. ipython:: python +``map`` on a ``Series`` with ``datetime64`` values may return ``int64`` dtypes rather than ``int32`` - s = Series(date_range('2011-01-02T00:00', '2011-01-02T02:00', freq='H').tz_localize('Asia/Tokyo')) - s +.. ipython:: python - Previous Behavior: + s = Series(date_range('2011-01-02T00:00', '2011-01-02T02:00', freq='H').tz_localize('Asia/Tokyo')) + s - .. code-block:: ipython +Previous Behavior: - In [9]: s.map(lambda x: x.hour) - Out[9]: - 0 0 - 1 1 - 2 2 - dtype: int32 +.. code-block:: ipython + In [9]: s.map(lambda x: x.hour) + Out[9]: + 0 0 + 1 1 + 2 2 + dtype: int32 - New Behavior: +New Behavior: - .. ipython:: python +.. ipython:: python - s.map(lambda x: x.hour) + s.map(lambda x: x.hour) .. _whatsnew_0200.api_breaking.s3: @@ -443,8 +441,8 @@ Pandas Google BigQuery support has moved ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ pandas has split off Google BigQuery support into a separate package ``pandas-gbq``. You can ``pip install pandas-gbq`` to get it. -The functionality of ``pd.read_gbq()`` and ``.to_gbq()`` remains the same with the currently released version of ``pandas-gbq=0.1.3``. (:issue:`15347`) -Documentation is now hosted `here `__ +The functionality of :func:`read_gbq` and :meth:`DataFrame.to_gbq` remain the same with the currently released version of ``pandas-gbq=0.1.3``. +Documentation is now hosted `here `__ (:issue:`15347`) .. _whatsnew_0200.api_breaking.memory_usage: @@ -667,7 +665,7 @@ Performance Improvements - Improved performance of ``groupby().cummin()`` and ``groupby().cummax()`` (:issue:`15048`, :issue:`15109`, :issue:`15561`) - Improved performance and reduced memory when indexing with a ``MultiIndex`` (:issue:`15245`) - When reading buffer object in ``read_sas()`` method without specified format, filepath string is inferred rather than buffer object. (:issue:`14947`) -- Improved performance of `rank()` for categorical data (:issue:`15498`) +- Improved performance of ``.rank()`` for categorical data (:issue:`15498`) - Improved performance when using ``.unstack()`` (:issue:`15503`) From 54e71a74c81ecefb55bb35934b75f4cd1fb3ded1 Mon Sep 17 00:00:00 2001 From: DaanVanHauwermeiren Date: Wed, 8 Mar 2017 19:46:50 +0100 Subject: [PATCH 171/353] DOC: fix link to offset strings in resample method (#15619) --- pandas/core/generic.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ff58a2aa77447..c45cf57152599 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4360,6 +4360,8 @@ def resample(self, rule, how=None, axis=0, fill_method=None, closed=None, .. versionadded:: 0.19.0 + Notes + ----- To learn more about the offset strings, please see `this link `__. From 1a75f495271dd2e8ab55065ccc5594ee0469a17d Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 9 Mar 2017 08:51:09 +0100 Subject: [PATCH 172/353] DOC: make it possible to run doctests (#15626) --- pandas/conftest.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pandas/conftest.py b/pandas/conftest.py index 623feb99e9cdc..e0a15f740688b 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -1,5 +1,8 @@ import pytest +import numpy +import pandas + def pytest_addoption(parser): parser.addoption("--skip-slow", action="store_true", @@ -19,3 +22,11 @@ def pytest_runtest_setup(item): if 'network' in item.keywords and item.config.getoption("--skip-network"): pytest.skip("skipping due to --skip-network") + + +# For running doctests: make np and pd names available + +@pytest.fixture(autouse=True) +def add_imports(doctest_namespace): + doctest_namespace['np'] = numpy + doctest_namespace['pd'] = pandas From 2229c26442ea28b7d69819e0b52b9bbc45afae4a Mon Sep 17 00:00:00 2001 From: DaanVanHauwermeiren Date: Thu, 9 Mar 2017 08:57:27 +0100 Subject: [PATCH 173/353] DOC: add example for DataFrame.resample: keywords on and level (#15627) --- pandas/core/generic.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index c45cf57152599..84a48c9be8fd9 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4462,6 +4462,30 @@ def resample(self, rule, how=None, axis=0, fill_method=None, closed=None, 2000-01-01 00:06:00 26 Freq: 3T, dtype: int64 + For DataFrame objects, the keyword ``on`` can be used to specify the + column instead of the index for resampling. + + >>> df = pd.DataFrame(data=9*[range(4)], columns=['a', 'b', 'c', 'd']) + >>> df['time'] = pd.date_range('1/1/2000', periods=9, freq='T') + >>> df.resample('3T', on='time').sum() + a b c d + time + 2000-01-01 00:00:00 0 3 6 9 + 2000-01-01 00:03:00 0 3 6 9 + 2000-01-01 00:06:00 0 3 6 9 + + For a DataFrame with MultiIndex, the keyword ``level`` can be used to + specify on level the resampling needs to take place. + + >>> time = pd.date_range('1/1/2000', periods=5, freq='T') + >>> df2 = pd.DataFrame(data=10*[range(4)], + columns=['a', 'b', 'c', 'd'], + index=pd.MultiIndex.from_product([time, [1, 2]]) + ) + >>> df2.resample('3T', level=0).sum() + a b c d + 2000-01-01 00:00:00 0 6 12 18 + 2000-01-01 00:03:00 0 4 8 12 """ from pandas.tseries.resample import (resample, _maybe_process_deprecations) From a1d3ff3e3ec407915adb9d37107cd64a2028dd76 Mon Sep 17 00:00:00 2001 From: Michiel Stock Date: Thu, 9 Mar 2017 09:21:30 +0100 Subject: [PATCH 174/353] DOC: resolved mistakes in examples series (#15625) --- pandas/core/generic.py | 11 ++++--- pandas/core/series.py | 71 +++++++++++++++++++++++++++++------------- 2 files changed, 56 insertions(+), 26 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 84a48c9be8fd9..606906bfcd7c4 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -668,6 +668,7 @@ def swaplevel(self, i=-2, j=-1, axis=0): dtype: int64 >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) >>> df.rename(2) + Traceback (most recent call last): ... TypeError: 'int' object is not callable >>> df.rename(index=str, columns={"A": "a", "B": "c"}) @@ -1115,7 +1116,7 @@ def __setstate__(self, state): to the existing workbook. This can be used to save different DataFrames to one workbook: - >>> writer = ExcelWriter('output.xlsx') + >>> writer = pd.ExcelWriter('output.xlsx') >>> df1.to_excel(writer,'Sheet1') >>> df2.to_excel(writer,'Sheet2') >>> writer.save() @@ -2260,7 +2261,7 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, ... 'response_time': [0.04, 0.02, 0.07, 0.08, 1.0]}, ... index=index) >>> df - http_status response_time + http_status response_time Firefox 200 0.04 Chrome 200 0.02 Safari 404 0.07 @@ -2275,11 +2276,11 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, ... 'Chrome'] >>> df.reindex(new_index) http_status response_time - Safari 404 0.07 + Safari 404.0 0.07 Iceweasel NaN NaN Comodo Dragon NaN NaN - IE10 404 0.08 - Chrome 200 0.02 + IE10 404.0 0.08 + Chrome 200.0 0.02 We can fill in the missing values by passing a value to the keyword ``fill_value``. Because the index is not monotonically diff --git a/pandas/core/series.py b/pandas/core/series.py index f23e90effdabf..cfa25ca1299eb 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -369,10 +369,10 @@ def values(self): Timezone aware datetime data is converted to UTC: >>> pd.Series(pd.date_range('20130101', periods=3, - tz='US/Eastern')).values - array(['2013-01-01T00:00:00.000000000-0500', - '2013-01-02T00:00:00.000000000-0500', - '2013-01-03T00:00:00.000000000-0500'], dtype='datetime64[ns]') + ... tz='US/Eastern')).values + array(['2013-01-01T05:00:00.000000000', + '2013-01-02T05:00:00.000000000', + '2013-01-03T05:00:00.000000000'], dtype='datetime64[ns]') """ return self._data.external_values() @@ -1550,6 +1550,8 @@ def append(self, to_append, ignore_index=False, verify_integrity=False): With `verify_integrity` set to True: >>> s1.append(s2, verify_integrity=True) + Traceback (most recent call last): + ... ValueError: Indexes have overlapping values: [0, 1, 2] @@ -1919,8 +1921,19 @@ def nlargest(self, n=5, keep='first'): -------- >>> import pandas as pd >>> import numpy as np - >>> s = pd.Series(np.random.randn(1e6)) + >>> s = pd.Series(np.random.randn(10**6)) >>> s.nlargest(10) # only sorts up to the N requested + 219921 4.644710 + 82124 4.608745 + 421689 4.564644 + 425277 4.447014 + 718691 4.414137 + 43154 4.403520 + 283187 4.313922 + 595519 4.273635 + 503969 4.250236 + 121637 4.240952 + dtype: float64 """ return algorithms.select_n_series(self, n=n, keep=keep, method='nlargest') @@ -1958,8 +1971,19 @@ def nsmallest(self, n=5, keep='first'): -------- >>> import pandas as pd >>> import numpy as np - >>> s = pd.Series(np.random.randn(1e6)) + >>> s = pd.Series(np.random.randn(10**6)) >>> s.nsmallest(10) # only sorts up to the N requested + 288532 -4.954580 + 732345 -4.835960 + 64803 -4.812550 + 446457 -4.609998 + 501225 -4.483945 + 669476 -4.472935 + 973615 -4.401699 + 621279 -4.355126 + 773916 -4.347355 + 359919 -4.331927 + dtype: float64 """ return algorithms.select_n_series(self, n=n, keep=keep, method='nsmallest') @@ -2052,21 +2076,24 @@ def unstack(self, level=-1, fill_value=None): Examples -------- + >>> s = pd.Series([1, 2, 3, 4], + ... index=pd.MultiIndex.from_product([['one', 'two'], ['a', 'b']])) >>> s - one a 1. - one b 2. - two a 3. - two b 4. + one a 1 + b 2 + two a 3 + b 4 + dtype: int64 >>> s.unstack(level=-1) - a b - one 1. 2. - two 3. 4. + a b + one 1 2 + two 3 4 >>> s.unstack(level=0) one two - a 1. 2. - b 3. 4. + a 1 3 + b 2 4 Returns ------- @@ -2102,15 +2129,16 @@ def map(self, arg, na_action=None): >>> x = pd.Series([1,2,3], index=['one', 'two', 'three']) >>> x - one 1 - two 2 - three 3 + one 1 + two 2 + three 3 + dtype: int64 >>> y = pd.Series(['foo', 'bar', 'baz'], index=[1,2,3]) >>> y - 1 foo - 2 bar - 3 baz + 1 foo + 2 bar + 3 baz >>> x.map(y) one foo @@ -2215,6 +2243,7 @@ def apply(self, func, convert_dtype=True, args=(), **kwds): >>> import numpy as np >>> series = pd.Series([20, 21, 12], index=['London', ... 'New York','Helsinki']) + >>> series London 20 New York 21 Helsinki 12 From ae0a92a68b985e845465a11a8fb0ec589001d6a9 Mon Sep 17 00:00:00 2001 From: mcocdawc Date: Thu, 9 Mar 2017 11:58:48 +0100 Subject: [PATCH 175/353] ENH: to_string/to_latex now accept list-like header arg for overwriting column names (#15548) closes #15536 --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/frame.py | 5 +++ pandas/formats/format.py | 38 ++++++++++++---------- pandas/tests/formats/test_format.py | 11 +++++++ pandas/tests/formats/test_to_latex.py | 45 +++++++++++++++++++++++++++ 5 files changed, 84 insertions(+), 16 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 34358a193b360..ad7571662b8f4 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -227,6 +227,7 @@ Other enhancements - ``pd.TimedeltaIndex`` now has a custom datetick formatter specifically designed for nanosecond level precision (:issue:`8711`) - ``pd.types.concat.union_categoricals`` gained the ``ignore_ordered`` argument to allow ignoring the ordered attribute of unioned categoricals (:issue:`13410`). See the :ref:`categorical union docs ` for more information. - ``pandas.io.json.json_normalize()`` with an empty ``list`` will return an empty ``DataFrame`` (:issue:`15534`) +- ``pd.DataFrame.to_latex`` and ``pd.DataFrame.to_string`` now allow optional header aliases. (:issue:`15536`) .. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4e7a5ebdf6f67..2062f301b9e0e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1516,6 +1516,8 @@ def to_feather(self, fname): from pandas.io.feather_format import to_feather to_feather(self, fname) + @Substitution(header='Write out column names. If a list of string is given, \ +it is assumed to be aliases for the column names') @Appender(fmt.docstring_to_string, indents=1) def to_string(self, buf=None, columns=None, col_space=None, header=True, index=True, na_rep='NaN', formatters=None, float_format=None, @@ -1543,6 +1545,7 @@ def to_string(self, buf=None, columns=None, col_space=None, header=True, result = formatter.buf.getvalue() return result + @Substitution(header='whether to print column labels, default True') @Appender(fmt.docstring_to_string, indents=1) def to_html(self, buf=None, columns=None, col_space=None, header=True, index=True, na_rep='NaN', formatters=None, float_format=None, @@ -1596,6 +1599,8 @@ def to_html(self, buf=None, columns=None, col_space=None, header=True, if buf is None: return formatter.buf.getvalue() + @Substitution(header='Write out column names. If a list of string is given, \ +it is assumed to be aliases for the column names.') @Appender(fmt.common_docstring + fmt.return_docstring, indents=1) def to_latex(self, buf=None, columns=None, col_space=None, header=True, index=True, na_rep='NaN', formatters=None, float_format=None, diff --git a/pandas/formats/format.py b/pandas/formats/format.py index d354911a825bc..2665f5aea145d 100644 --- a/pandas/formats/format.py +++ b/pandas/formats/format.py @@ -20,9 +20,9 @@ is_float, is_numeric_dtype, is_datetime64_dtype, - is_timedelta64_dtype) + is_timedelta64_dtype, + is_list_like) from pandas.types.generic import ABCSparseArray - from pandas.core.base import PandasObject from pandas.core.index import Index, MultiIndex, _ensure_index from pandas import compat @@ -54,7 +54,7 @@ col_space : int, optional the minimum width of each column header : bool, optional - whether to print column labels, default True + %(header)s index : bool, optional whether to print index (row) labels, default True na_rep : string, optional @@ -488,32 +488,38 @@ def _to_str_columns(self): # may include levels names also str_index = self._get_formatted_index(frame) - str_columns = self._get_formatted_column_labels(frame) - if self.header: + if not is_list_like(self.header) and not self.header: stringified = [] for i, c in enumerate(frame): - cheader = str_columns[i] - max_colwidth = max(self.col_space or 0, *(self.adj.len(x) - for x in cheader)) fmt_values = self._format_col(i) fmt_values = _make_fixed_width(fmt_values, self.justify, - minimum=max_colwidth, + minimum=(self.col_space or 0), adj=self.adj) - - max_len = max(np.max([self.adj.len(x) for x in fmt_values]), - max_colwidth) - cheader = self.adj.justify(cheader, max_len, mode=self.justify) - stringified.append(cheader + fmt_values) + stringified.append(fmt_values) else: + if is_list_like(self.header): + if len(self.header) != len(self.columns): + raise ValueError(('Writing %d cols but got %d aliases' + % (len(self.columns), len(self.header)))) + str_columns = [[label] for label in self.header] + else: + str_columns = self._get_formatted_column_labels(frame) + stringified = [] for i, c in enumerate(frame): + cheader = str_columns[i] + header_colwidth = max(self.col_space or 0, + *(self.adj.len(x) for x in cheader)) fmt_values = self._format_col(i) fmt_values = _make_fixed_width(fmt_values, self.justify, - minimum=(self.col_space or 0), + minimum=header_colwidth, adj=self.adj) - stringified.append(fmt_values) + max_len = max(np.max([self.adj.len(x) for x in fmt_values]), + header_colwidth) + cheader = self.adj.justify(cheader, max_len, mode=self.justify) + stringified.append(cheader + fmt_values) strcols = stringified if self.index: diff --git a/pandas/tests/formats/test_format.py b/pandas/tests/formats/test_format.py index ddf9d35841ce7..b1f163ccf9429 100644 --- a/pandas/tests/formats/test_format.py +++ b/pandas/tests/formats/test_format.py @@ -1125,6 +1125,17 @@ def test_to_string_no_header(self): self.assertEqual(df_s, expected) + def test_to_string_specified_header(self): + df = DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]}) + + df_s = df.to_string(header=['X', 'Y']) + expected = ' X Y\n0 1 4\n1 2 5\n2 3 6' + + self.assertEqual(df_s, expected) + + with tm.assertRaises(ValueError): + df.to_string(header=['X']) + def test_to_string_no_index(self): df = DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]}) diff --git a/pandas/tests/formats/test_to_latex.py b/pandas/tests/formats/test_to_latex.py index 17e1e18f03dd6..29ead83f3bcd9 100644 --- a/pandas/tests/formats/test_to_latex.py +++ b/pandas/tests/formats/test_to_latex.py @@ -428,6 +428,51 @@ def test_to_latex_no_header(self): assert withoutindex_result == withoutindex_expected + def test_to_latex_specified_header(self): + # GH 7124 + df = DataFrame({'a': [1, 2], 'b': ['b1', 'b2']}) + withindex_result = df.to_latex(header=['AA', 'BB']) + withindex_expected = r"""\begin{tabular}{lrl} +\toprule +{} & AA & BB \\ +\midrule +0 & 1 & b1 \\ +1 & 2 & b2 \\ +\bottomrule +\end{tabular} +""" + + assert withindex_result == withindex_expected + + withoutindex_result = df.to_latex(header=['AA', 'BB'], index=False) + withoutindex_expected = r"""\begin{tabular}{rl} +\toprule +AA & BB \\ +\midrule + 1 & b1 \\ + 2 & b2 \\ +\bottomrule +\end{tabular} +""" + + assert withoutindex_result == withoutindex_expected + + withoutescape_result = df.to_latex(header=['$A$', '$B$'], escape=False) + withoutescape_expected = r"""\begin{tabular}{lrl} +\toprule +{} & $A$ & $B$ \\ +\midrule +0 & 1 & b1 \\ +1 & 2 & b2 \\ +\bottomrule +\end{tabular} +""" + + assert withoutescape_result == withoutescape_expected + + with tm.assertRaises(ValueError): + df.to_latex(header=['A']) + def test_to_latex_decimal(self, frame): # GH 12031 frame.to_latex() From 27b0ba70c7a62965af1f669f91162f01a2c7e2f5 Mon Sep 17 00:00:00 2001 From: Jim Date: Thu, 9 Mar 2017 13:05:47 +0100 Subject: [PATCH 176/353] DOC: add documentation to IndexSlice (#15623) --- doc/source/api.rst | 1 + pandas/core/indexing.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/doc/source/api.rst b/doc/source/api.rst index fbce64df84859..f126e478f424d 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -1405,6 +1405,7 @@ MultiIndex :toctree: generated/ MultiIndex + IndexSlice MultiIndex Components ~~~~~~~~~~~~~~~~~~~~~~ diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 6f490875742ca..546cbd8337e7e 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -43,6 +43,36 @@ def get_indexers_list(): # the public IndexSlicerMaker class _IndexSlice(object): + """ + Create an object to more easily perform multi-index slicing + + Examples + -------- + + >>> midx = pd.MultiIndex.from_product([['A0','A1'], ['B0','B1','B2','B3']]) + >>> columns = ['foo', 'bar'] + >>> dfmi = pd.DataFrame(np.arange(16).reshape((len(midx), len(columns))), + index=midx, columns=columns) + + Using the default slice command: + + >>> dfmi.loc[(slice(None), slice('B0', 'B1')), :] + foo bar + A0 B0 0 1 + B1 2 3 + A1 B0 8 9 + B1 10 11 + + Using the IndexSlice class for a more intuitive command: + + >>> idx = pd.IndexSlice + >>> dfmi.loc[idx[:, 'B0':'B1'], :] + foo bar + A0 B0 0 1 + B1 2 3 + A1 B0 8 9 + B1 10 11 + """ def __getitem__(self, arg): return arg From df6783f68df903a58c65dc45857ef4e16440f9ee Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 9 Mar 2017 09:08:17 -0500 Subject: [PATCH 177/353] DOC: remove to_gbq from api.rst as not directly callable (DataFrame.to_gbq) is the entry point --- doc/source/api.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index f126e478f424d..7e297a15055a0 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -118,7 +118,6 @@ Google BigQuery :toctree: generated/ read_gbq - to_gbq .. currentmodule:: pandas @@ -1237,7 +1236,7 @@ Serialization / IO / Conversion Panel.to_frame Panel.to_xarray Panel.to_clipboard - + .. _api.index: Index From a4bba287d40e5e362a20543a302b09ec90254c5f Mon Sep 17 00:00:00 2001 From: chaimdemulder Date: Thu, 9 Mar 2017 15:26:56 +0100 Subject: [PATCH 178/353] DOC: use mathjax on sphinx - #15469 Exponentially Weighed Windows pages now shows formulas (#15618) --- doc/source/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/conf.py b/doc/source/conf.py index 1e82dfca87d17..6840f76866d2c 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -46,7 +46,7 @@ 'ipython_sphinxext.ipython_console_highlighting', 'sphinx.ext.intersphinx', 'sphinx.ext.coverage', - 'sphinx.ext.pngmath', + 'sphinx.ext.mathjax', 'sphinx.ext.ifconfig', 'sphinx.ext.linkcode', ] From 0cfc95055ca78ae0ba5189dd84f9319d175586a8 Mon Sep 17 00:00:00 2001 From: goldenbull Date: Thu, 9 Mar 2017 09:43:14 -0500 Subject: [PATCH 179/353] ENH: add compression support for 'read_pickle' and 'to_pickle' closes #11666 Author: goldenbull Author: Chen Jinniu Closes #13317 from goldenbull/pickle_io_compression and squashes the following commits: e9c5fd2 [goldenbull] docs update d50e430 [goldenbull] update docs. re-write all tests to avoid round-trip read/write comparison. 86afd25 [goldenbull] change test to new pytest parameterized style 945e7bb [goldenbull] Merge remote-tracking branch 'origin/master' into pickle_io_compression ccbeaa9 [goldenbull] move pickle compression tests into a new class 9a07250 [goldenbull] Remove prepared compressed data. _get_handle will take care of compressed I/O 1cb810b [goldenbull] add zip decompression support. refactor using lambda. b8c4175 [goldenbull] add compressed pickle data file to io/tests 6df6611 [goldenbull] pickle compression code update 81d55a0 [Chen Jinniu] Merge branch 'master' into pickle_io_compression 025a0cd [goldenbull] add compression support for pickle --- doc/source/io.rst | 39 +++++++ doc/source/whatsnew/v0.20.0.txt | 34 ++++++ pandas/core/generic.py | 8 +- pandas/io/common.py | 14 ++- pandas/io/pickle.py | 52 +++++++-- pandas/tests/io/test_pickle.py | 196 +++++++++++++++++++++++++++++++- 6 files changed, 324 insertions(+), 19 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index fa57d6d692152..67491c8b30de7 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -3046,6 +3046,45 @@ any pickled pandas object (or any other pickled object) from file: These methods were previously ``pd.save`` and ``pd.load``, prior to 0.12.0, and are now deprecated. +.. _io.pickle.compression: + +Read/Write compressed pickle files +'''''''''''''' + +.. versionadded:: 0.20.0 + +:func:`read_pickle`, :meth:`DataFame.to_pickle` and :meth:`Series.to_pickle` can read +and write compressed pickle files. Compression types of ``gzip``, ``bz2``, ``xz`` supports +both read and write. ``zip`` file supports read only and must contain only one data file +to be read in. +Compression type can be an explicitely parameter or be inferred from the file extension. +If 'infer', then use ``gzip``, ``bz2``, ``zip``, or ``xz`` if filename ends in ``'.gz'``, ``'.bz2'``, ``'.zip'``, or +``'.xz'``, respectively. + +.. ipython:: python + + df = pd.DataFrame({ + 'A': np.random.randn(1000), + 'B': np.random.randn(1000), + 'C': np.random.randn(1000)}) + df.to_pickle("data.pkl.compress", compression="gzip") # explicit compression type + df.to_pickle("data.pkl.xz", compression="infer") # infer compression type from extension + df.to_pickle("data.pkl.gz") # default, using "infer" + df["A"].to_pickle("s1.pkl.bz2") + + df = pd.read_pickle("data.pkl.compress", compression="gzip") + df = pd.read_pickle("data.pkl.xz", compression="infer") + df = pd.read_pickle("data.pkl.gz") + s = pd.read_pickle("s1.pkl.bz2") + +.. ipython:: python + :suppress: + import os + os.remove("data.pkl.compress") + os.remove("data.pkl.xz") + os.remove("data.pkl.gz") + os.remove("s1.pkl.bz2") + .. _io.msgpack: msgpack diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index ad7571662b8f4..4b320d21fe738 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -94,6 +94,40 @@ support for bz2 compression in the python 2 c-engine improved (:issue:`14874`). df = pd.read_table(url, compression='bz2') # explicitly specify compression df.head(2) +.. _whatsnew_0200.enhancements.pickle_compression: + +Pickle file I/O now supports compression +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:func:`read_pickle`, :meth:`DataFame.to_pickle` and :meth:`Series.to_pickle` +can now read from and write to compressed pickle files. Compression methods +can be an explicit parameter or be inferred from the file extension. +See :ref:`Read/Write compressed pickle files ` + +.. ipython:: python + + df = pd.DataFrame({ + 'A': np.random.randn(1000), + 'B': np.random.randn(1000), + 'C': np.random.randn(1000)}) + df.to_pickle("data.pkl.compress", compression="gzip") # explicit compression type + df.to_pickle("data.pkl.xz", compression="infer") # infer compression type from extension + df.to_pickle("data.pkl.gz") # default, using "infer" + df["A"].to_pickle("s1.pkl.bz2") + + df = pd.read_pickle("data.pkl.compress", compression="gzip") + df = pd.read_pickle("data.pkl.xz", compression="infer") + df = pd.read_pickle("data.pkl.gz") + s = pd.read_pickle("s1.pkl.bz2") + +.. ipython:: python + :suppress: + import os + os.remove("data.pkl.compress") + os.remove("data.pkl.xz") + os.remove("data.pkl.gz") + os.remove("s1.pkl.bz2") + .. _whatsnew_0200.enhancements.uint64_support: UInt64 Support Improved diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 606906bfcd7c4..a0111cb9ef7ec 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1355,7 +1355,7 @@ def to_sql(self, name, con, flavor=None, schema=None, if_exists='fail', if_exists=if_exists, index=index, index_label=index_label, chunksize=chunksize, dtype=dtype) - def to_pickle(self, path): + def to_pickle(self, path, compression='infer'): """ Pickle (serialize) object to input file path. @@ -1363,9 +1363,13 @@ def to_pickle(self, path): ---------- path : string File path + compression : {'infer', 'gzip', 'bz2', 'xz', None}, default 'infer' + a string representing the compression to use in the output file + + .. versionadded:: 0.20.0 """ from pandas.io.pickle import to_pickle - return to_pickle(self, path) + return to_pickle(self, path, compression=compression) def to_clipboard(self, excel=None, sep=None, **kwargs): """ diff --git a/pandas/io/common.py b/pandas/io/common.py index 74c51b74ca18a..e42d218d7925f 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -305,7 +305,7 @@ def _infer_compression(filepath_or_buffer, compression): def _get_handle(path_or_buf, mode, encoding=None, compression=None, - memory_map=False): + memory_map=False, is_text=True): """ Get file handle for given path/buffer and mode. @@ -320,7 +320,9 @@ def _get_handle(path_or_buf, mode, encoding=None, compression=None, Supported compression protocols are gzip, bz2, zip, and xz memory_map : boolean, default False See parsers._parser_params for more information. - + is_text : boolean, default True + whether file/buffer is in text format (csv, json, etc.), or in binary + mode (pickle, etc.) Returns ------- f : file-like @@ -394,13 +396,17 @@ def _get_handle(path_or_buf, mode, encoding=None, compression=None, elif encoding: # Python 3 and encoding f = open(path_or_buf, mode, encoding=encoding) - else: + elif is_text: # Python 3 and no explicit encoding f = open(path_or_buf, mode, errors='replace') + else: + # Python 3 and binary mode + f = open(path_or_buf, mode) handles.append(f) # in Python 3, convert BytesIO or fileobjects passed with an encoding - if compat.PY3 and (compression or isinstance(f, need_text_wrapping)): + if compat.PY3 and is_text and\ + (compression or isinstance(f, need_text_wrapping)): from io import TextIOWrapper f = TextIOWrapper(f, encoding=encoding) handles.append(f) diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py index 2358c296f782e..969a2a51cb15d 100644 --- a/pandas/io/pickle.py +++ b/pandas/io/pickle.py @@ -4,9 +4,10 @@ from numpy.lib.format import read_array, write_array from pandas.compat import BytesIO, cPickle as pkl, pickle_compat as pc, PY3 from pandas.types.common import is_datetime64_dtype, _NS_DTYPE +from pandas.io.common import _get_handle, _infer_compression -def to_pickle(obj, path): +def to_pickle(obj, path, compression='infer'): """ Pickle (serialize) object to input file path @@ -15,12 +16,23 @@ def to_pickle(obj, path): obj : any object path : string File path + compression : {'infer', 'gzip', 'bz2', 'xz', None}, default 'infer' + a string representing the compression to use in the output file + + .. versionadded:: 0.20.0 """ - with open(path, 'wb') as f: + inferred_compression = _infer_compression(path, compression) + f, fh = _get_handle(path, 'wb', + compression=inferred_compression, + is_text=False) + try: pkl.dump(obj, f, protocol=pkl.HIGHEST_PROTOCOL) + finally: + for _f in fh: + _f.close() -def read_pickle(path): +def read_pickle(path, compression='infer'): """ Load pickled pandas object (or any other pickled object) from the specified file path @@ -32,12 +44,32 @@ def read_pickle(path): ---------- path : string File path + compression : {'infer', 'gzip', 'bz2', 'xz', 'zip', None}, default 'infer' + For on-the-fly decompression of on-disk data. If 'infer', then use + gzip, bz2, xz or zip if path is a string ending in '.gz', '.bz2', 'xz', + or 'zip' respectively, and no decompression otherwise. + Set to None for no decompression. + + .. versionadded:: 0.20.0 Returns ------- unpickled : type of object stored in file """ + inferred_compression = _infer_compression(path, compression) + + def read_wrapper(func): + # wrapper file handle open/close operation + f, fh = _get_handle(path, 'rb', + compression=inferred_compression, + is_text=False) + try: + return func(f) + finally: + for _f in fh: + _f.close() + def try_read(path, encoding=None): # try with cPickle # try with current pickle, if we have a Type Error then @@ -48,19 +80,16 @@ def try_read(path, encoding=None): # cpickle # GH 6899 try: - with open(path, 'rb') as fh: - return pkl.load(fh) + return read_wrapper(lambda f: pkl.load(f)) except Exception: # reg/patched pickle try: - with open(path, 'rb') as fh: - return pc.load(fh, encoding=encoding, compat=False) - + return read_wrapper( + lambda f: pc.load(f, encoding=encoding, compat=False)) # compat pickle except: - with open(path, 'rb') as fh: - return pc.load(fh, encoding=encoding, compat=True) - + return read_wrapper( + lambda f: pc.load(f, encoding=encoding, compat=True)) try: return try_read(path) except: @@ -68,6 +97,7 @@ def try_read(path, encoding=None): return try_read(path, encoding='latin1') raise + # compat with sparse pickle / unpickle diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index c736ec829808a..2fffc3c39ec26 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -15,15 +15,14 @@ import pytest import os - from distutils.version import LooseVersion - import pandas as pd from pandas import Index from pandas.compat import is_platform_little_endian import pandas import pandas.util.testing as tm from pandas.tseries.offsets import Day, MonthEnd +import shutil @pytest.fixture(scope='module') @@ -302,3 +301,196 @@ def test_pickle_v0_15_2(): # with open(pickle_path, 'wb') as f: pickle.dump(cat, f) # tm.assert_categorical_equal(cat, pd.read_pickle(pickle_path)) + + +# --------------------- +# test pickle compression +# --------------------- +_compression_to_extension = { + None: ".none", + 'gzip': '.gz', + 'bz2': '.bz2', + 'zip': '.zip', + 'xz': '.xz', +} + + +def get_random_path(): + return u'__%s__.pickle' % tm.rands(10) + + +def compress_file(src_path, dest_path, compression): + if compression is None: + shutil.copyfile(src_path, dest_path) + return + + if compression == 'gzip': + import gzip + f = gzip.open(dest_path, "w") + elif compression == 'bz2': + import bz2 + f = bz2.BZ2File(dest_path, "w") + elif compression == 'zip': + import zipfile + zip_file = zipfile.ZipFile(dest_path, "w", + compression=zipfile.ZIP_DEFLATED) + zip_file.write(src_path, os.path.basename(src_path)) + elif compression == 'xz': + lzma = pandas.compat.import_lzma() + f = lzma.LZMAFile(dest_path, "w") + else: + msg = 'Unrecognized compression type: {}'.format(compression) + raise ValueError(msg) + + if compression != "zip": + f.write(open(src_path, "rb").read()) + f.close() + + +def decompress_file(src_path, dest_path, compression): + if compression is None: + shutil.copyfile(src_path, dest_path) + return + + if compression == 'gzip': + import gzip + f = gzip.open(src_path, "r") + elif compression == 'bz2': + import bz2 + f = bz2.BZ2File(src_path, "r") + elif compression == 'zip': + import zipfile + zip_file = zipfile.ZipFile(src_path) + zip_names = zip_file.namelist() + if len(zip_names) == 1: + f = zip_file.open(zip_names.pop()) + else: + raise ValueError('ZIP file {} error. Only one file per ZIP.' + .format(src_path)) + elif compression == 'xz': + lzma = pandas.compat.import_lzma() + f = lzma.LZMAFile(src_path, "r") + else: + msg = 'Unrecognized compression type: {}'.format(compression) + raise ValueError(msg) + + open(dest_path, "wb").write(f.read()) + f.close() + + +@pytest.mark.parametrize('compression', [None, 'gzip', 'bz2', 'xz']) +def test_write_explicit(compression): + # issue 11666 + if compression == 'xz': + tm._skip_if_no_lzma() + + base = get_random_path() + path1 = base + ".compressed" + path2 = base + ".raw" + + with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2: + df = tm.makeDataFrame() + + # write to compressed file + df.to_pickle(p1, compression=compression) + + # decompress + decompress_file(p1, p2, compression=compression) + + # read decompressed file + df2 = pd.read_pickle(p2, compression=None) + + tm.assert_frame_equal(df, df2) + + +@pytest.mark.parametrize('compression', ['', 'None', 'bad', '7z']) +def test_write_explicit_bad(compression): + with tm.assertRaisesRegexp(ValueError, + "Unrecognized compression type"): + with tm.ensure_clean(get_random_path()) as path: + df = tm.makeDataFrame() + df.to_pickle(path, compression=compression) + + +@pytest.mark.parametrize('ext', ['', '.gz', '.bz2', '.xz', '.no_compress']) +def test_write_infer(ext): + if ext == '.xz': + tm._skip_if_no_lzma() + + base = get_random_path() + path1 = base + ext + path2 = base + ".raw" + compression = None + for c in _compression_to_extension: + if _compression_to_extension[c] == ext: + compression = c + break + + with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2: + df = tm.makeDataFrame() + + # write to compressed file by inferred compression method + df.to_pickle(p1) + + # decompress + decompress_file(p1, p2, compression=compression) + + # read decompressed file + df2 = pd.read_pickle(p2, compression=None) + + tm.assert_frame_equal(df, df2) + + +@pytest.mark.parametrize('compression', [None, 'gzip', 'bz2', 'xz', "zip"]) +def test_read_explicit(compression): + # issue 11666 + if compression == 'xz': + tm._skip_if_no_lzma() + + base = get_random_path() + path1 = base + ".raw" + path2 = base + ".compressed" + + with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2: + df = tm.makeDataFrame() + + # write to uncompressed file + df.to_pickle(p1, compression=None) + + # compress + compress_file(p1, p2, compression=compression) + + # read compressed file + df2 = pd.read_pickle(p2, compression=compression) + + tm.assert_frame_equal(df, df2) + + +@pytest.mark.parametrize('ext', ['', '.gz', '.bz2', '.xz', '.zip', + '.no_compress']) +def test_read_infer(ext): + if ext == '.xz': + tm._skip_if_no_lzma() + + base = get_random_path() + path1 = base + ".raw" + path2 = base + ext + compression = None + for c in _compression_to_extension: + if _compression_to_extension[c] == ext: + compression = c + break + + with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2: + df = tm.makeDataFrame() + + # write to uncompressed file + df.to_pickle(p1, compression=None) + + # compress + compress_file(p1, p2, compression=compression) + + # read compressed file by inferred compression method + df2 = pd.read_pickle(p2) + + tm.assert_frame_equal(df, df2) From 5667a3ad0489815c1239cba785300952c9799000 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 9 Mar 2017 09:50:04 -0500 Subject: [PATCH 180/353] TST: fix up compression tests / docs --- doc/source/io.rst | 55 +++--- doc/source/whatsnew/v0.20.0.txt | 40 +++-- pandas/tests/io/test_pickle.py | 289 ++++++++++++++++---------------- 3 files changed, 208 insertions(+), 176 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index 67491c8b30de7..fdd33ab4625f3 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -3042,22 +3042,19 @@ any pickled pandas object (or any other pickled object) from file: See `this question `__ for a detailed explanation. -.. note:: - - These methods were previously ``pd.save`` and ``pd.load``, prior to 0.12.0, and are now deprecated. - .. _io.pickle.compression: -Read/Write compressed pickle files -'''''''''''''' +Compressed pickle files +''''''''''''''''''''''' .. versionadded:: 0.20.0 :func:`read_pickle`, :meth:`DataFame.to_pickle` and :meth:`Series.to_pickle` can read -and write compressed pickle files. Compression types of ``gzip``, ``bz2``, ``xz`` supports -both read and write. ``zip`` file supports read only and must contain only one data file +and write compressed pickle files. The compression types of ``gzip``, ``bz2``, ``xz`` are supported for reading and writing. +`zip`` file supports read only and must contain only one data file to be read in. -Compression type can be an explicitely parameter or be inferred from the file extension. + +The compression type can be an explicit parameter or be inferred from the file extension. If 'infer', then use ``gzip``, ``bz2``, ``zip``, or ``xz`` if filename ends in ``'.gz'``, ``'.bz2'``, ``'.zip'``, or ``'.xz'``, respectively. @@ -3065,17 +3062,37 @@ If 'infer', then use ``gzip``, ``bz2``, ``zip``, or ``xz`` if filename ends in ` df = pd.DataFrame({ 'A': np.random.randn(1000), - 'B': np.random.randn(1000), - 'C': np.random.randn(1000)}) - df.to_pickle("data.pkl.compress", compression="gzip") # explicit compression type - df.to_pickle("data.pkl.xz", compression="infer") # infer compression type from extension - df.to_pickle("data.pkl.gz") # default, using "infer" - df["A"].to_pickle("s1.pkl.bz2") + 'B': 'foo', + 'C': pd.date_range('20130101', periods=1000, freq='s')}) + df + +Using an explicit compression type + +.. ipython:: python - df = pd.read_pickle("data.pkl.compress", compression="gzip") - df = pd.read_pickle("data.pkl.xz", compression="infer") - df = pd.read_pickle("data.pkl.gz") - s = pd.read_pickle("s1.pkl.bz2") + df.to_pickle("data.pkl.compress", compression="gzip") + rt = pd.read_pickle("data.pkl.compress", compression="gzip") + rt + +Inferring compression type from the extension + +.. ipython:: python + + df.to_pickle("data.pkl.xz", compression="infer") + rt = pd.read_pickle("data.pkl.xz", compression="infer") + rt + +The default is to 'infer + +.. ipython:: python + + df.to_pickle("data.pkl.gz") + rt = pd.read_pickle("data.pkl.gz") + rt + + df["A"].to_pickle("s1.pkl.bz2") + rt = pd.read_pickle("s1.pkl.bz2") + rt .. ipython:: python :suppress: diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 4b320d21fe738..8f671062464f0 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -102,23 +102,41 @@ Pickle file I/O now supports compression :func:`read_pickle`, :meth:`DataFame.to_pickle` and :meth:`Series.to_pickle` can now read from and write to compressed pickle files. Compression methods can be an explicit parameter or be inferred from the file extension. -See :ref:`Read/Write compressed pickle files ` +See :ref:`the docs here ` .. ipython:: python df = pd.DataFrame({ 'A': np.random.randn(1000), - 'B': np.random.randn(1000), - 'C': np.random.randn(1000)}) - df.to_pickle("data.pkl.compress", compression="gzip") # explicit compression type - df.to_pickle("data.pkl.xz", compression="infer") # infer compression type from extension - df.to_pickle("data.pkl.gz") # default, using "infer" - df["A"].to_pickle("s1.pkl.bz2") + 'B': 'foo', + 'C': pd.date_range('20130101', periods=1000, freq='s')}) + +Using an explicit compression type + +.. ipython:: python - df = pd.read_pickle("data.pkl.compress", compression="gzip") - df = pd.read_pickle("data.pkl.xz", compression="infer") - df = pd.read_pickle("data.pkl.gz") - s = pd.read_pickle("s1.pkl.bz2") + df.to_pickle("data.pkl.compress", compression="gzip") + rt = pd.read_pickle("data.pkl.compress", compression="gzip") + rt + +Inferring compression type from the extension + +.. ipython:: python + + df.to_pickle("data.pkl.xz", compression="infer") + rt = pd.read_pickle("data.pkl.xz", compression="infer") + rt + +The default is to 'infer + +.. ipython:: python + + df.to_pickle("data.pkl.gz") + rt = pd.read_pickle("data.pkl.gz") + rt + df["A"].to_pickle("s1.pkl.bz2") + rt = pd.read_pickle("s1.pkl.bz2") + rt .. ipython:: python :suppress: diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index 2fffc3c39ec26..91e70e942089c 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -306,191 +306,188 @@ def test_pickle_v0_15_2(): # --------------------- # test pickle compression # --------------------- -_compression_to_extension = { - None: ".none", - 'gzip': '.gz', - 'bz2': '.bz2', - 'zip': '.zip', - 'xz': '.xz', -} - +@pytest.fixture def get_random_path(): return u'__%s__.pickle' % tm.rands(10) -def compress_file(src_path, dest_path, compression): - if compression is None: - shutil.copyfile(src_path, dest_path) - return - - if compression == 'gzip': - import gzip - f = gzip.open(dest_path, "w") - elif compression == 'bz2': - import bz2 - f = bz2.BZ2File(dest_path, "w") - elif compression == 'zip': - import zipfile - zip_file = zipfile.ZipFile(dest_path, "w", - compression=zipfile.ZIP_DEFLATED) - zip_file.write(src_path, os.path.basename(src_path)) - elif compression == 'xz': - lzma = pandas.compat.import_lzma() - f = lzma.LZMAFile(dest_path, "w") - else: - msg = 'Unrecognized compression type: {}'.format(compression) - raise ValueError(msg) - - if compression != "zip": - f.write(open(src_path, "rb").read()) - f.close() +class TestCompression(object): + _compression_to_extension = { + None: ".none", + 'gzip': '.gz', + 'bz2': '.bz2', + 'zip': '.zip', + 'xz': '.xz', + } -def decompress_file(src_path, dest_path, compression): - if compression is None: - shutil.copyfile(src_path, dest_path) - return + def compress_file(self, src_path, dest_path, compression): + if compression is None: + shutil.copyfile(src_path, dest_path) + return - if compression == 'gzip': - import gzip - f = gzip.open(src_path, "r") - elif compression == 'bz2': - import bz2 - f = bz2.BZ2File(src_path, "r") - elif compression == 'zip': - import zipfile - zip_file = zipfile.ZipFile(src_path) - zip_names = zip_file.namelist() - if len(zip_names) == 1: - f = zip_file.open(zip_names.pop()) + if compression == 'gzip': + import gzip + f = gzip.open(dest_path, "w") + elif compression == 'bz2': + import bz2 + f = bz2.BZ2File(dest_path, "w") + elif compression == 'zip': + import zipfile + zip_file = zipfile.ZipFile(dest_path, "w", + compression=zipfile.ZIP_DEFLATED) + zip_file.write(src_path, os.path.basename(src_path)) + elif compression == 'xz': + lzma = pandas.compat.import_lzma() + f = lzma.LZMAFile(dest_path, "w") else: - raise ValueError('ZIP file {} error. Only one file per ZIP.' - .format(src_path)) - elif compression == 'xz': - lzma = pandas.compat.import_lzma() - f = lzma.LZMAFile(src_path, "r") - else: - msg = 'Unrecognized compression type: {}'.format(compression) - raise ValueError(msg) - - open(dest_path, "wb").write(f.read()) - f.close() + msg = 'Unrecognized compression type: {}'.format(compression) + raise ValueError(msg) + if compression != "zip": + f.write(open(src_path, "rb").read()) + f.close() -@pytest.mark.parametrize('compression', [None, 'gzip', 'bz2', 'xz']) -def test_write_explicit(compression): - # issue 11666 - if compression == 'xz': - tm._skip_if_no_lzma() + def decompress_file(self, src_path, dest_path, compression): + if compression is None: + shutil.copyfile(src_path, dest_path) + return - base = get_random_path() - path1 = base + ".compressed" - path2 = base + ".raw" + if compression == 'gzip': + import gzip + f = gzip.open(src_path, "r") + elif compression == 'bz2': + import bz2 + f = bz2.BZ2File(src_path, "r") + elif compression == 'zip': + import zipfile + zip_file = zipfile.ZipFile(src_path) + zip_names = zip_file.namelist() + if len(zip_names) == 1: + f = zip_file.open(zip_names.pop()) + else: + raise ValueError('ZIP file {} error. Only one file per ZIP.' + .format(src_path)) + elif compression == 'xz': + lzma = pandas.compat.import_lzma() + f = lzma.LZMAFile(src_path, "r") + else: + msg = 'Unrecognized compression type: {}'.format(compression) + raise ValueError(msg) - with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2: - df = tm.makeDataFrame() + open(dest_path, "wb").write(f.read()) + f.close() - # write to compressed file - df.to_pickle(p1, compression=compression) + @pytest.mark.parametrize('compression', [None, 'gzip', 'bz2', 'xz']) + def test_write_explicit(self, compression, get_random_path): + # issue 11666 + if compression == 'xz': + tm._skip_if_no_lzma() - # decompress - decompress_file(p1, p2, compression=compression) + base = get_random_path + path1 = base + ".compressed" + path2 = base + ".raw" - # read decompressed file - df2 = pd.read_pickle(p2, compression=None) + with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2: + df = tm.makeDataFrame() - tm.assert_frame_equal(df, df2) + # write to compressed file + df.to_pickle(p1, compression=compression) + # decompress + self.decompress_file(p1, p2, compression=compression) -@pytest.mark.parametrize('compression', ['', 'None', 'bad', '7z']) -def test_write_explicit_bad(compression): - with tm.assertRaisesRegexp(ValueError, - "Unrecognized compression type"): - with tm.ensure_clean(get_random_path()) as path: - df = tm.makeDataFrame() - df.to_pickle(path, compression=compression) + # read decompressed file + df2 = pd.read_pickle(p2, compression=None) + tm.assert_frame_equal(df, df2) -@pytest.mark.parametrize('ext', ['', '.gz', '.bz2', '.xz', '.no_compress']) -def test_write_infer(ext): - if ext == '.xz': - tm._skip_if_no_lzma() + @pytest.mark.parametrize('compression', ['', 'None', 'bad', '7z']) + def test_write_explicit_bad(self, compression, get_random_path): + with tm.assertRaisesRegexp(ValueError, + "Unrecognized compression type"): + with tm.ensure_clean(get_random_path) as path: + df = tm.makeDataFrame() + df.to_pickle(path, compression=compression) - base = get_random_path() - path1 = base + ext - path2 = base + ".raw" - compression = None - for c in _compression_to_extension: - if _compression_to_extension[c] == ext: - compression = c - break + @pytest.mark.parametrize('ext', ['', '.gz', '.bz2', '.xz', '.no_compress']) + def test_write_infer(self, ext, get_random_path): + if ext == '.xz': + tm._skip_if_no_lzma() - with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2: - df = tm.makeDataFrame() + base = get_random_path + path1 = base + ext + path2 = base + ".raw" + compression = None + for c in self._compression_to_extension: + if self._compression_to_extension[c] == ext: + compression = c + break - # write to compressed file by inferred compression method - df.to_pickle(p1) - - # decompress - decompress_file(p1, p2, compression=compression) + with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2: + df = tm.makeDataFrame() - # read decompressed file - df2 = pd.read_pickle(p2, compression=None) + # write to compressed file by inferred compression method + df.to_pickle(p1) - tm.assert_frame_equal(df, df2) + # decompress + self.decompress_file(p1, p2, compression=compression) + # read decompressed file + df2 = pd.read_pickle(p2, compression=None) -@pytest.mark.parametrize('compression', [None, 'gzip', 'bz2', 'xz', "zip"]) -def test_read_explicit(compression): - # issue 11666 - if compression == 'xz': - tm._skip_if_no_lzma() + tm.assert_frame_equal(df, df2) - base = get_random_path() - path1 = base + ".raw" - path2 = base + ".compressed" + @pytest.mark.parametrize('compression', [None, 'gzip', 'bz2', 'xz', "zip"]) + def test_read_explicit(self, compression, get_random_path): + # issue 11666 + if compression == 'xz': + tm._skip_if_no_lzma() - with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2: - df = tm.makeDataFrame() + base = get_random_path + path1 = base + ".raw" + path2 = base + ".compressed" - # write to uncompressed file - df.to_pickle(p1, compression=None) + with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2: + df = tm.makeDataFrame() - # compress - compress_file(p1, p2, compression=compression) + # write to uncompressed file + df.to_pickle(p1, compression=None) - # read compressed file - df2 = pd.read_pickle(p2, compression=compression) + # compress + self.compress_file(p1, p2, compression=compression) - tm.assert_frame_equal(df, df2) + # read compressed file + df2 = pd.read_pickle(p2, compression=compression) + tm.assert_frame_equal(df, df2) -@pytest.mark.parametrize('ext', ['', '.gz', '.bz2', '.xz', '.zip', - '.no_compress']) -def test_read_infer(ext): - if ext == '.xz': - tm._skip_if_no_lzma() + @pytest.mark.parametrize('ext', ['', '.gz', '.bz2', '.xz', '.zip', + '.no_compress']) + def test_read_infer(self, ext, get_random_path): + if ext == '.xz': + tm._skip_if_no_lzma() - base = get_random_path() - path1 = base + ".raw" - path2 = base + ext - compression = None - for c in _compression_to_extension: - if _compression_to_extension[c] == ext: - compression = c - break + base = get_random_path + path1 = base + ".raw" + path2 = base + ext + compression = None + for c in self._compression_to_extension: + if self._compression_to_extension[c] == ext: + compression = c + break - with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2: - df = tm.makeDataFrame() + with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2: + df = tm.makeDataFrame() - # write to uncompressed file - df.to_pickle(p1, compression=None) + # write to uncompressed file + df.to_pickle(p1, compression=None) - # compress - compress_file(p1, p2, compression=compression) + # compress + self.compress_file(p1, p2, compression=compression) - # read compressed file by inferred compression method - df2 = pd.read_pickle(p2) + # read compressed file by inferred compression method + df2 = pd.read_pickle(p2) - tm.assert_frame_equal(df, df2) + tm.assert_frame_equal(df, df2) From 470c3276479925a198f38f9c0aacd745ef3a64bd Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 10 Mar 2017 00:15:03 +0100 Subject: [PATCH 181/353] DOC: remove latex and parallel building (#15637) --- ci/build_docs.sh | 3 --- doc/make.py | 2 +- doc/source/io.rst | 5 +++-- doc/source/whatsnew/v0.20.0.txt | 1 + 4 files changed, 5 insertions(+), 6 deletions(-) diff --git a/ci/build_docs.sh b/ci/build_docs.sh index 5dc649a91c4f7..bfe7a1eed756b 100755 --- a/ci/build_docs.sh +++ b/ci/build_docs.sh @@ -23,9 +23,6 @@ if [ x"$DOC_BUILD" != x"" ]; then source activate pandas - # install sudo deps - time sudo apt-get $APT_ARGS install dvipng texlive-latex-base texlive-latex-extra - mv "$TRAVIS_BUILD_DIR"/doc /tmp cd /tmp/doc diff --git a/doc/make.py b/doc/make.py index a2f5be5594e44..30cd2ad8b61c9 100755 --- a/doc/make.py +++ b/doc/make.py @@ -197,7 +197,7 @@ def html(): print(e) print("Failed to convert %s" % nb) - if os.system('sphinx-build -j 2 -P -b html -d build/doctrees ' + if os.system('sphinx-build -P -b html -d build/doctrees ' 'source build/html'): raise SystemExit("Building HTML failed.") try: diff --git a/doc/source/io.rst b/doc/source/io.rst index fdd33ab4625f3..a702efdc6aaf9 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -2070,9 +2070,9 @@ by the Table Schema spec. The full list of types supported are described in the Table Schema spec. This table shows the mapping from pandas types: -============== ================= +=============== ================= Pandas type Table Schema type -============== ================= +=============== ================= int64 integer float64 number bool boolean @@ -3096,6 +3096,7 @@ The default is to 'infer .. ipython:: python :suppress: + import os os.remove("data.pkl.compress") os.remove("data.pkl.xz") diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 8f671062464f0..cf3dddc3a2933 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -140,6 +140,7 @@ The default is to 'infer .. ipython:: python :suppress: + import os os.remove("data.pkl.compress") os.remove("data.pkl.xz") From a703abcb6328b105bfa0b30895b8893f7f52f88f Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 10 Mar 2017 03:24:36 -0500 Subject: [PATCH 182/353] DOC: increase recursion limit on sphinx builds (#15641) --- doc/source/conf.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/doc/source/conf.py b/doc/source/conf.py index 6840f76866d2c..0b0de16411e9b 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -16,6 +16,14 @@ import inspect from pandas.compat import u, PY3 +# https://github.com/sphinx-doc/sphinx/pull/2325/files +# Workaround for sphinx-build recursion limit overflow: +# pickle.dump(doctree, f, pickle.HIGHEST_PROTOCOL) +# RuntimeError: maximum recursion depth exceeded while pickling an object +# +# Python's default allowed recursion depth is 1000. +sys.setrecursionlimit(5000) + # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. From 22038081aa1546a1f269e7393f0b5f0d294283c5 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 10 Mar 2017 06:20:01 -0500 Subject: [PATCH 183/353] DEPR: remove more .ix warnings from tests Author: Jeff Reback Closes #15638 from jreback/indexing and squashes the following commits: 8b82bd6 [Jeff Reback] CLN: split test_indexing.py 23e82eb [Jeff Reback] DEPR: remove more .ix warnings from tests --- pandas/tests/indexing/common.py | 257 ++ .../indexing/test_chaining_and_caching.py | 96 +- pandas/tests/indexing/test_iloc.py | 590 ++++ pandas/tests/indexing/test_indexing.py | 2811 +---------------- pandas/tests/indexing/test_ix.py | 333 ++ pandas/tests/indexing/test_loc.py | 630 ++++ pandas/tests/indexing/test_multiindex.py | 225 +- pandas/tests/indexing/test_panel.py | 12 +- pandas/tests/indexing/test_partial.py | 587 ++++ pandas/tests/indexing/test_scalar.py | 156 + 10 files changed, 2906 insertions(+), 2791 deletions(-) create mode 100644 pandas/tests/indexing/test_iloc.py create mode 100644 pandas/tests/indexing/test_ix.py create mode 100644 pandas/tests/indexing/test_loc.py create mode 100644 pandas/tests/indexing/test_partial.py create mode 100644 pandas/tests/indexing/test_scalar.py diff --git a/pandas/tests/indexing/common.py b/pandas/tests/indexing/common.py index 73167393cf35d..c7637a00910c6 100644 --- a/pandas/tests/indexing/common.py +++ b/pandas/tests/indexing/common.py @@ -1,5 +1,262 @@ """ common utilities """ +import itertools +from warnings import catch_warnings +import numpy as np + +from pandas.compat import lrange +from pandas.types.common import is_scalar +from pandas import Series, DataFrame, Panel, date_range, UInt64Index +from pandas.util import testing as tm +from pandas.formats.printing import pprint_thing + +_verbose = False + def _mklbl(prefix, n): return ["%s%s" % (prefix, i) for i in range(n)] + + +def _axify(obj, key, axis): + # create a tuple accessor + axes = [slice(None)] * obj.ndim + axes[axis] = key + return tuple(axes) + + +class Base(object): + """ indexing comprehensive base class """ + + _objs = set(['series', 'frame', 'panel']) + _typs = set(['ints', 'uints', 'labels', 'mixed', + 'ts', 'floats', 'empty', 'ts_rev']) + + def setUp(self): + + self.series_ints = Series(np.random.rand(4), index=lrange(0, 8, 2)) + self.frame_ints = DataFrame(np.random.randn(4, 4), + index=lrange(0, 8, 2), + columns=lrange(0, 12, 3)) + self.panel_ints = Panel(np.random.rand(4, 4, 4), + items=lrange(0, 8, 2), + major_axis=lrange(0, 12, 3), + minor_axis=lrange(0, 16, 4)) + + self.series_uints = Series(np.random.rand(4), + index=UInt64Index(lrange(0, 8, 2))) + self.frame_uints = DataFrame(np.random.randn(4, 4), + index=UInt64Index(lrange(0, 8, 2)), + columns=UInt64Index(lrange(0, 12, 3))) + self.panel_uints = Panel(np.random.rand(4, 4, 4), + items=UInt64Index(lrange(0, 8, 2)), + major_axis=UInt64Index(lrange(0, 12, 3)), + minor_axis=UInt64Index(lrange(0, 16, 4))) + + self.series_labels = Series(np.random.randn(4), index=list('abcd')) + self.frame_labels = DataFrame(np.random.randn(4, 4), + index=list('abcd'), columns=list('ABCD')) + self.panel_labels = Panel(np.random.randn(4, 4, 4), + items=list('abcd'), + major_axis=list('ABCD'), + minor_axis=list('ZYXW')) + + self.series_mixed = Series(np.random.randn(4), index=[2, 4, 'null', 8]) + self.frame_mixed = DataFrame(np.random.randn(4, 4), + index=[2, 4, 'null', 8]) + self.panel_mixed = Panel(np.random.randn(4, 4, 4), + items=[2, 4, 'null', 8]) + + self.series_ts = Series(np.random.randn(4), + index=date_range('20130101', periods=4)) + self.frame_ts = DataFrame(np.random.randn(4, 4), + index=date_range('20130101', periods=4)) + self.panel_ts = Panel(np.random.randn(4, 4, 4), + items=date_range('20130101', periods=4)) + + dates_rev = (date_range('20130101', periods=4) + .sort_values(ascending=False)) + self.series_ts_rev = Series(np.random.randn(4), + index=dates_rev) + self.frame_ts_rev = DataFrame(np.random.randn(4, 4), + index=dates_rev) + self.panel_ts_rev = Panel(np.random.randn(4, 4, 4), + items=dates_rev) + + self.frame_empty = DataFrame({}) + self.series_empty = Series({}) + self.panel_empty = Panel({}) + + # form agglomerates + for o in self._objs: + + d = dict() + for t in self._typs: + d[t] = getattr(self, '%s_%s' % (o, t), None) + + setattr(self, o, d) + + def generate_indices(self, f, values=False): + """ generate the indicies + if values is True , use the axis values + is False, use the range + """ + + axes = f.axes + if values: + axes = [lrange(len(a)) for a in axes] + + return itertools.product(*axes) + + def get_result(self, obj, method, key, axis): + """ return the result for this obj with this key and this axis """ + + if isinstance(key, dict): + key = key[axis] + + # use an artifical conversion to map the key as integers to the labels + # so ix can work for comparisions + if method == 'indexer': + method = 'ix' + key = obj._get_axis(axis)[key] + + # in case we actually want 0 index slicing + try: + with catch_warnings(record=True): + xp = getattr(obj, method).__getitem__(_axify(obj, key, axis)) + except: + xp = getattr(obj, method).__getitem__(key) + + return xp + + def get_value(self, f, i, values=False): + """ return the value for the location i """ + + # check agains values + if values: + return f.values[i] + + # this is equiv of f[col][row]..... + # v = f + # for a in reversed(i): + # v = v.__getitem__(a) + # return v + with catch_warnings(record=True): + return f.ix[i] + + def check_values(self, f, func, values=False): + + if f is None: + return + axes = f.axes + indicies = itertools.product(*axes) + + for i in indicies: + result = getattr(f, func)[i] + + # check agains values + if values: + expected = f.values[i] + else: + expected = f + for a in reversed(i): + expected = expected.__getitem__(a) + + tm.assert_almost_equal(result, expected) + + def check_result(self, name, method1, key1, method2, key2, typs=None, + objs=None, axes=None, fails=None): + def _eq(t, o, a, obj, k1, k2): + """ compare equal for these 2 keys """ + + if a is not None and a > obj.ndim - 1: + return + + def _print(result, error=None): + if error is not None: + error = str(error) + v = ("%-16.16s [%-16.16s]: [typ->%-8.8s,obj->%-8.8s," + "key1->(%-4.4s),key2->(%-4.4s),axis->%s] %s" % + (name, result, t, o, method1, method2, a, error or '')) + if _verbose: + pprint_thing(v) + + try: + rs = getattr(obj, method1).__getitem__(_axify(obj, k1, a)) + + try: + xp = self.get_result(obj, method2, k2, a) + except: + result = 'no comp' + _print(result) + return + + detail = None + + try: + if is_scalar(rs) and is_scalar(xp): + self.assertEqual(rs, xp) + elif xp.ndim == 1: + tm.assert_series_equal(rs, xp) + elif xp.ndim == 2: + tm.assert_frame_equal(rs, xp) + elif xp.ndim == 3: + tm.assert_panel_equal(rs, xp) + result = 'ok' + except AssertionError as e: + detail = str(e) + result = 'fail' + + # reverse the checks + if fails is True: + if result == 'fail': + result = 'ok (fail)' + + _print(result) + if not result.startswith('ok'): + raise AssertionError(detail) + + except AssertionError: + raise + except Exception as detail: + + # if we are in fails, the ok, otherwise raise it + if fails is not None: + if isinstance(detail, fails): + result = 'ok (%s)' % type(detail).__name__ + _print(result) + return + + result = type(detail).__name__ + raise AssertionError(_print(result, error=detail)) + + if typs is None: + typs = self._typs + + if objs is None: + objs = self._objs + + if axes is not None: + if not isinstance(axes, (tuple, list)): + axes = [axes] + else: + axes = list(axes) + else: + axes = [0, 1, 2] + + # check + for o in objs: + if o not in self._objs: + continue + + d = getattr(self, o) + for a in axes: + for t in typs: + if t not in self._typs: + continue + + obj = d[t] + if obj is not None: + obj = obj.copy() + + k2 = key2 + _eq(t, o, a, obj, key1, k2) diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index 0e921aaf826f9..72e704537ba3f 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -1,3 +1,5 @@ +from warnings import catch_warnings + import numpy as np import pandas as pd from pandas.core import common as com @@ -41,13 +43,13 @@ def test_setitem_cache_updating(self): # ref the cache if do_ref: - df.ix[0, "c"] + df.loc[0, "c"] # set it - df.ix[7, 'c'] = 1 + df.loc[7, 'c'] = 1 - self.assertEqual(df.ix[0, 'c'], 0.0) - self.assertEqual(df.ix[7, 'c'], 1.0) + self.assertEqual(df.loc[0, 'c'], 0.0) + self.assertEqual(df.loc[7, 'c'], 1.0) # GH 7084 # not updating cache on series setting with slices @@ -226,21 +228,21 @@ def random_text(nobs=100): # explicity copy indexer = df.letters.apply(lambda x: len(x) > 10) - df = df.ix[indexer].copy() + df = df.loc[indexer].copy() self.assertIsNone(df.is_copy) df['letters'] = df['letters'].apply(str.lower) # implicity take df = random_text(100000) indexer = df.letters.apply(lambda x: len(x) > 10) - df = df.ix[indexer] + df = df.loc[indexer] self.assertIsNotNone(df.is_copy) df['letters'] = df['letters'].apply(str.lower) # implicity take 2 df = random_text(100000) indexer = df.letters.apply(lambda x: len(x) > 10) - df = df.ix[indexer] + df = df.loc[indexer] self.assertIsNotNone(df.is_copy) df.loc[:, 'letters'] = df['letters'].apply(str.lower) @@ -251,7 +253,8 @@ def random_text(nobs=100): df = random_text(100000) indexer = df.letters.apply(lambda x: len(x) > 10) - df.ix[indexer, 'letters'] = df.ix[indexer, 'letters'].apply(str.lower) + df.loc[indexer, 'letters'] = ( + df.loc[indexer, 'letters'].apply(str.lower)) # an identical take, so no copy df = DataFrame({'a': [1]}).dropna() @@ -312,12 +315,12 @@ def f(): D=list('abcde'))) def f(): - df.ix[2]['D'] = 'foo' + df.loc[2]['D'] = 'foo' self.assertRaises(com.SettingWithCopyError, f) def f(): - df.ix[2]['C'] = 'foo' + df.loc[2]['C'] = 'foo' self.assertRaises(com.SettingWithCopyError, f) @@ -356,3 +359,76 @@ def test_detect_chained_assignment_warnings(self): with tm.assert_produces_warning( expected_warning=com.SettingWithCopyWarning): df.loc[0]['A'] = 111 + + def test_chained_getitem_with_lists(self): + + # GH6394 + # Regression in chained getitem indexing with embedded list-like from + # 0.12 + def check(result, expected): + tm.assert_numpy_array_equal(result, expected) + tm.assertIsInstance(result, np.ndarray) + + df = DataFrame({'A': 5 * [np.zeros(3)], 'B': 5 * [np.ones(3)]}) + expected = df['A'].iloc[2] + result = df.loc[2, 'A'] + check(result, expected) + result2 = df.iloc[2]['A'] + check(result2, expected) + result3 = df['A'].loc[2] + check(result3, expected) + result4 = df['A'].iloc[2] + check(result4, expected) + + def test_cache_updating(self): + # GH 4939, make sure to update the cache on setitem + + df = tm.makeDataFrame() + df['A'] # cache series + with catch_warnings(record=True): + df.ix["Hello Friend"] = df.ix[0] + self.assertIn("Hello Friend", df['A'].index) + self.assertIn("Hello Friend", df['B'].index) + + with catch_warnings(record=True): + panel = tm.makePanel() + panel.ix[0] # get first item into cache + panel.ix[:, :, 'A+1'] = panel.ix[:, :, 'A'] + 1 + self.assertIn("A+1", panel.ix[0].columns) + self.assertIn("A+1", panel.ix[1].columns) + + # 5216 + # make sure that we don't try to set a dead cache + a = np.random.rand(10, 3) + df = DataFrame(a, columns=['x', 'y', 'z']) + tuples = [(i, j) for i in range(5) for j in range(2)] + index = MultiIndex.from_tuples(tuples) + df.index = index + + # setting via chained assignment + # but actually works, since everything is a view + df.loc[0]['z'].iloc[0] = 1. + result = df.loc[(0, 0), 'z'] + self.assertEqual(result, 1) + + # correct setting + df.loc[(0, 0), 'z'] = 2 + result = df.loc[(0, 0), 'z'] + self.assertEqual(result, 2) + + # 10264 + df = DataFrame(np.zeros((5, 5), dtype='int64'), columns=[ + 'a', 'b', 'c', 'd', 'e'], index=range(5)) + df['f'] = 0 + df.f.values[3] = 1 + + # TODO(wesm): unused? + # y = df.iloc[np.arange(2, len(df))] + + df.f.values[3] = 2 + expected = DataFrame(np.zeros((5, 6), dtype='int64'), columns=[ + 'a', 'b', 'c', 'd', 'e', 'f'], index=range(5)) + expected.at[3, 'f'] = 2 + tm.assert_frame_equal(df, expected) + expected = Series([0, 0, 0, 2, 0], name='f') + tm.assert_series_equal(df.f, expected) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py new file mode 100644 index 0000000000000..517194835ca73 --- /dev/null +++ b/pandas/tests/indexing/test_iloc.py @@ -0,0 +1,590 @@ +""" test positional based indexing with iloc """ + +from warnings import catch_warnings +import numpy as np + +import pandas as pd +from pandas.compat import lrange, lmap +from pandas import Series, DataFrame, date_range, concat, isnull +from pandas.util import testing as tm +from pandas.tests.indexing.common import Base + + +class TestiLoc(Base, tm.TestCase): + + def test_iloc_exceeds_bounds(self): + + # GH6296 + # iloc should allow indexers that exceed the bounds + df = DataFrame(np.random.random_sample((20, 5)), columns=list('ABCDE')) + expected = df + + # lists of positions should raise IndexErrror! + with tm.assertRaisesRegexp(IndexError, + 'positional indexers are out-of-bounds'): + df.iloc[:, [0, 1, 2, 3, 4, 5]] + self.assertRaises(IndexError, lambda: df.iloc[[1, 30]]) + self.assertRaises(IndexError, lambda: df.iloc[[1, -30]]) + self.assertRaises(IndexError, lambda: df.iloc[[100]]) + + s = df['A'] + self.assertRaises(IndexError, lambda: s.iloc[[100]]) + self.assertRaises(IndexError, lambda: s.iloc[[-100]]) + + # still raise on a single indexer + msg = 'single positional indexer is out-of-bounds' + with tm.assertRaisesRegexp(IndexError, msg): + df.iloc[30] + self.assertRaises(IndexError, lambda: df.iloc[-30]) + + # GH10779 + # single positive/negative indexer exceeding Series bounds should raise + # an IndexError + with tm.assertRaisesRegexp(IndexError, msg): + s.iloc[30] + self.assertRaises(IndexError, lambda: s.iloc[-30]) + + # slices are ok + result = df.iloc[:, 4:10] # 0 < start < len < stop + expected = df.iloc[:, 4:] + tm.assert_frame_equal(result, expected) + + result = df.iloc[:, -4:-10] # stop < 0 < start < len + expected = df.iloc[:, :0] + tm.assert_frame_equal(result, expected) + + result = df.iloc[:, 10:4:-1] # 0 < stop < len < start (down) + expected = df.iloc[:, :4:-1] + tm.assert_frame_equal(result, expected) + + result = df.iloc[:, 4:-10:-1] # stop < 0 < start < len (down) + expected = df.iloc[:, 4::-1] + tm.assert_frame_equal(result, expected) + + result = df.iloc[:, -10:4] # start < 0 < stop < len + expected = df.iloc[:, :4] + tm.assert_frame_equal(result, expected) + + result = df.iloc[:, 10:4] # 0 < stop < len < start + expected = df.iloc[:, :0] + tm.assert_frame_equal(result, expected) + + result = df.iloc[:, -10:-11:-1] # stop < start < 0 < len (down) + expected = df.iloc[:, :0] + tm.assert_frame_equal(result, expected) + + result = df.iloc[:, 10:11] # 0 < len < start < stop + expected = df.iloc[:, :0] + tm.assert_frame_equal(result, expected) + + # slice bounds exceeding is ok + result = s.iloc[18:30] + expected = s.iloc[18:] + tm.assert_series_equal(result, expected) + + result = s.iloc[30:] + expected = s.iloc[:0] + tm.assert_series_equal(result, expected) + + result = s.iloc[30::-1] + expected = s.iloc[::-1] + tm.assert_series_equal(result, expected) + + # doc example + def check(result, expected): + str(result) + result.dtypes + tm.assert_frame_equal(result, expected) + + dfl = DataFrame(np.random.randn(5, 2), columns=list('AB')) + check(dfl.iloc[:, 2:3], DataFrame(index=dfl.index)) + check(dfl.iloc[:, 1:3], dfl.iloc[:, [1]]) + check(dfl.iloc[4:6], dfl.iloc[[4]]) + + self.assertRaises(IndexError, lambda: dfl.iloc[[4, 5, 6]]) + self.assertRaises(IndexError, lambda: dfl.iloc[:, 4]) + + def test_iloc_getitem_int(self): + + # integer + self.check_result('integer', 'iloc', 2, 'ix', + {0: 4, 1: 6, 2: 8}, typs=['ints', 'uints']) + self.check_result('integer', 'iloc', 2, 'indexer', 2, + typs=['labels', 'mixed', 'ts', 'floats', 'empty'], + fails=IndexError) + + def test_iloc_getitem_neg_int(self): + + # neg integer + self.check_result('neg int', 'iloc', -1, 'ix', + {0: 6, 1: 9, 2: 12}, typs=['ints', 'uints']) + self.check_result('neg int', 'iloc', -1, 'indexer', -1, + typs=['labels', 'mixed', 'ts', 'floats', 'empty'], + fails=IndexError) + + def test_iloc_getitem_list_int(self): + + # list of ints + self.check_result('list int', 'iloc', [0, 1, 2], 'ix', + {0: [0, 2, 4], 1: [0, 3, 6], 2: [0, 4, 8]}, + typs=['ints', 'uints']) + self.check_result('list int', 'iloc', [2], 'ix', + {0: [4], 1: [6], 2: [8]}, typs=['ints', 'uints']) + self.check_result('list int', 'iloc', [0, 1, 2], 'indexer', [0, 1, 2], + typs=['labels', 'mixed', 'ts', 'floats', 'empty'], + fails=IndexError) + + # array of ints (GH5006), make sure that a single indexer is returning + # the correct type + self.check_result('array int', 'iloc', np.array([0, 1, 2]), 'ix', + {0: [0, 2, 4], + 1: [0, 3, 6], + 2: [0, 4, 8]}, typs=['ints', 'uints']) + self.check_result('array int', 'iloc', np.array([2]), 'ix', + {0: [4], 1: [6], 2: [8]}, typs=['ints', 'uints']) + self.check_result('array int', 'iloc', np.array([0, 1, 2]), 'indexer', + [0, 1, 2], + typs=['labels', 'mixed', 'ts', 'floats', 'empty'], + fails=IndexError) + + def test_iloc_getitem_neg_int_can_reach_first_index(self): + # GH10547 and GH10779 + # negative integers should be able to reach index 0 + df = DataFrame({'A': [2, 3, 5], 'B': [7, 11, 13]}) + s = df['A'] + + expected = df.iloc[0] + result = df.iloc[-3] + tm.assert_series_equal(result, expected) + + expected = df.iloc[[0]] + result = df.iloc[[-3]] + tm.assert_frame_equal(result, expected) + + expected = s.iloc[0] + result = s.iloc[-3] + self.assertEqual(result, expected) + + expected = s.iloc[[0]] + result = s.iloc[[-3]] + tm.assert_series_equal(result, expected) + + # check the length 1 Series case highlighted in GH10547 + expected = pd.Series(['a'], index=['A']) + result = expected.iloc[[-1]] + tm.assert_series_equal(result, expected) + + def test_iloc_getitem_dups(self): + + # no dups in panel (bug?) + self.check_result('list int (dups)', 'iloc', [0, 1, 1, 3], 'ix', + {0: [0, 2, 2, 6], 1: [0, 3, 3, 9]}, + objs=['series', 'frame'], typs=['ints', 'uints']) + + # GH 6766 + df1 = DataFrame([{'A': None, 'B': 1}, {'A': 2, 'B': 2}]) + df2 = DataFrame([{'A': 3, 'B': 3}, {'A': 4, 'B': 4}]) + df = concat([df1, df2], axis=1) + + # cross-sectional indexing + result = df.iloc[0, 0] + self.assertTrue(isnull(result)) + + result = df.iloc[0, :] + expected = Series([np.nan, 1, 3, 3], index=['A', 'B', 'A', 'B'], + name=0) + tm.assert_series_equal(result, expected) + + def test_iloc_getitem_array(self): + + # array like + s = Series(index=lrange(1, 4)) + self.check_result('array like', 'iloc', s.index, 'ix', + {0: [2, 4, 6], 1: [3, 6, 9], 2: [4, 8, 12]}, + typs=['ints', 'uints']) + + def test_iloc_getitem_bool(self): + + # boolean indexers + b = [True, False, True, False, ] + self.check_result('bool', 'iloc', b, 'ix', b, typs=['ints', 'uints']) + self.check_result('bool', 'iloc', b, 'ix', b, + typs=['labels', 'mixed', 'ts', 'floats', 'empty'], + fails=IndexError) + + def test_iloc_getitem_slice(self): + + # slices + self.check_result('slice', 'iloc', slice(1, 3), 'ix', + {0: [2, 4], 1: [3, 6], 2: [4, 8]}, + typs=['ints', 'uints']) + self.check_result('slice', 'iloc', slice(1, 3), 'indexer', + slice(1, 3), + typs=['labels', 'mixed', 'ts', 'floats', 'empty'], + fails=IndexError) + + def test_iloc_getitem_slice_dups(self): + + df1 = DataFrame(np.random.randn(10, 4), columns=['A', 'A', 'B', 'B']) + df2 = DataFrame(np.random.randint(0, 10, size=20).reshape(10, 2), + columns=['A', 'C']) + + # axis=1 + df = concat([df1, df2], axis=1) + tm.assert_frame_equal(df.iloc[:, :4], df1) + tm.assert_frame_equal(df.iloc[:, 4:], df2) + + df = concat([df2, df1], axis=1) + tm.assert_frame_equal(df.iloc[:, :2], df2) + tm.assert_frame_equal(df.iloc[:, 2:], df1) + + exp = concat([df2, df1.iloc[:, [0]]], axis=1) + tm.assert_frame_equal(df.iloc[:, 0:3], exp) + + # axis=0 + df = concat([df, df], axis=0) + tm.assert_frame_equal(df.iloc[0:10, :2], df2) + tm.assert_frame_equal(df.iloc[0:10, 2:], df1) + tm.assert_frame_equal(df.iloc[10:, :2], df2) + tm.assert_frame_equal(df.iloc[10:, 2:], df1) + + def test_iloc_setitem(self): + df = self.frame_ints + + df.iloc[1, 1] = 1 + result = df.iloc[1, 1] + self.assertEqual(result, 1) + + df.iloc[:, 2:3] = 0 + expected = df.iloc[:, 2:3] + result = df.iloc[:, 2:3] + tm.assert_frame_equal(result, expected) + + # GH5771 + s = Series(0, index=[4, 5, 6]) + s.iloc[1:2] += 1 + expected = Series([0, 1, 0], index=[4, 5, 6]) + tm.assert_series_equal(s, expected) + + def test_iloc_setitem_list(self): + + # setitem with an iloc list + df = DataFrame(np.arange(9).reshape((3, 3)), index=["A", "B", "C"], + columns=["A", "B", "C"]) + df.iloc[[0, 1], [1, 2]] + df.iloc[[0, 1], [1, 2]] += 100 + + expected = DataFrame( + np.array([0, 101, 102, 3, 104, 105, 6, 7, 8]).reshape((3, 3)), + index=["A", "B", "C"], columns=["A", "B", "C"]) + tm.assert_frame_equal(df, expected) + + def test_iloc_setitem_dups(self): + + # GH 6766 + # iloc with a mask aligning from another iloc + df1 = DataFrame([{'A': None, 'B': 1}, {'A': 2, 'B': 2}]) + df2 = DataFrame([{'A': 3, 'B': 3}, {'A': 4, 'B': 4}]) + df = concat([df1, df2], axis=1) + + expected = df.fillna(3) + expected['A'] = expected['A'].astype('float64') + inds = np.isnan(df.iloc[:, 0]) + mask = inds[inds].index + df.iloc[mask, 0] = df.iloc[mask, 2] + tm.assert_frame_equal(df, expected) + + # del a dup column across blocks + expected = DataFrame({0: [1, 2], 1: [3, 4]}) + expected.columns = ['B', 'B'] + del df['A'] + tm.assert_frame_equal(df, expected) + + # assign back to self + df.iloc[[0, 1], [0, 1]] = df.iloc[[0, 1], [0, 1]] + tm.assert_frame_equal(df, expected) + + # reversed x 2 + df.iloc[[1, 0], [0, 1]] = df.iloc[[1, 0], [0, 1]].reset_index( + drop=True) + df.iloc[[1, 0], [0, 1]] = df.iloc[[1, 0], [0, 1]].reset_index( + drop=True) + tm.assert_frame_equal(df, expected) + + def test_iloc_getitem_frame(self): + df = DataFrame(np.random.randn(10, 4), index=lrange(0, 20, 2), + columns=lrange(0, 8, 2)) + + result = df.iloc[2] + with catch_warnings(record=True): + exp = df.ix[4] + tm.assert_series_equal(result, exp) + + result = df.iloc[2, 2] + with catch_warnings(record=True): + exp = df.ix[4, 4] + self.assertEqual(result, exp) + + # slice + result = df.iloc[4:8] + with catch_warnings(record=True): + expected = df.ix[8:14] + tm.assert_frame_equal(result, expected) + + result = df.iloc[:, 2:3] + with catch_warnings(record=True): + expected = df.ix[:, 4:5] + tm.assert_frame_equal(result, expected) + + # list of integers + result = df.iloc[[0, 1, 3]] + with catch_warnings(record=True): + expected = df.ix[[0, 2, 6]] + tm.assert_frame_equal(result, expected) + + result = df.iloc[[0, 1, 3], [0, 1]] + with catch_warnings(record=True): + expected = df.ix[[0, 2, 6], [0, 2]] + tm.assert_frame_equal(result, expected) + + # neg indicies + result = df.iloc[[-1, 1, 3], [-1, 1]] + with catch_warnings(record=True): + expected = df.ix[[18, 2, 6], [6, 2]] + tm.assert_frame_equal(result, expected) + + # dups indicies + result = df.iloc[[-1, -1, 1, 3], [-1, 1]] + with catch_warnings(record=True): + expected = df.ix[[18, 18, 2, 6], [6, 2]] + tm.assert_frame_equal(result, expected) + + # with index-like + s = Series(index=lrange(1, 5)) + result = df.iloc[s.index] + with catch_warnings(record=True): + expected = df.ix[[2, 4, 6, 8]] + tm.assert_frame_equal(result, expected) + + def test_iloc_getitem_labelled_frame(self): + # try with labelled frame + df = DataFrame(np.random.randn(10, 4), + index=list('abcdefghij'), columns=list('ABCD')) + + result = df.iloc[1, 1] + exp = df.loc['b', 'B'] + self.assertEqual(result, exp) + + result = df.iloc[:, 2:3] + expected = df.loc[:, ['C']] + tm.assert_frame_equal(result, expected) + + # negative indexing + result = df.iloc[-1, -1] + exp = df.loc['j', 'D'] + self.assertEqual(result, exp) + + # out-of-bounds exception + self.assertRaises(IndexError, df.iloc.__getitem__, tuple([10, 5])) + + # trying to use a label + self.assertRaises(ValueError, df.iloc.__getitem__, tuple(['j', 'D'])) + + def test_iloc_getitem_doc_issue(self): + + # multi axis slicing issue with single block + # surfaced in GH 6059 + + arr = np.random.randn(6, 4) + index = date_range('20130101', periods=6) + columns = list('ABCD') + df = DataFrame(arr, index=index, columns=columns) + + # defines ref_locs + df.describe() + + result = df.iloc[3:5, 0:2] + str(result) + result.dtypes + + expected = DataFrame(arr[3:5, 0:2], index=index[3:5], + columns=columns[0:2]) + tm.assert_frame_equal(result, expected) + + # for dups + df.columns = list('aaaa') + result = df.iloc[3:5, 0:2] + str(result) + result.dtypes + + expected = DataFrame(arr[3:5, 0:2], index=index[3:5], + columns=list('aa')) + tm.assert_frame_equal(result, expected) + + # related + arr = np.random.randn(6, 4) + index = list(range(0, 12, 2)) + columns = list(range(0, 8, 2)) + df = DataFrame(arr, index=index, columns=columns) + + df._data.blocks[0].mgr_locs + result = df.iloc[1:5, 2:4] + str(result) + result.dtypes + expected = DataFrame(arr[1:5, 2:4], index=index[1:5], + columns=columns[2:4]) + tm.assert_frame_equal(result, expected) + + def test_iloc_setitem_series(self): + df = DataFrame(np.random.randn(10, 4), index=list('abcdefghij'), + columns=list('ABCD')) + + df.iloc[1, 1] = 1 + result = df.iloc[1, 1] + self.assertEqual(result, 1) + + df.iloc[:, 2:3] = 0 + expected = df.iloc[:, 2:3] + result = df.iloc[:, 2:3] + tm.assert_frame_equal(result, expected) + + s = Series(np.random.randn(10), index=lrange(0, 20, 2)) + + s.iloc[1] = 1 + result = s.iloc[1] + self.assertEqual(result, 1) + + s.iloc[:4] = 0 + expected = s.iloc[:4] + result = s.iloc[:4] + tm.assert_series_equal(result, expected) + + s = Series([-1] * 6) + s.iloc[0::2] = [0, 2, 4] + s.iloc[1::2] = [1, 3, 5] + result = s + expected = Series([0, 1, 2, 3, 4, 5]) + tm.assert_series_equal(result, expected) + + def test_iloc_setitem_list_of_lists(self): + + # GH 7551 + # list-of-list is set incorrectly in mixed vs. single dtyped frames + df = DataFrame(dict(A=np.arange(5, dtype='int64'), + B=np.arange(5, 10, dtype='int64'))) + df.iloc[2:4] = [[10, 11], [12, 13]] + expected = DataFrame(dict(A=[0, 1, 10, 12, 4], B=[5, 6, 11, 13, 9])) + tm.assert_frame_equal(df, expected) + + df = DataFrame( + dict(A=list('abcde'), B=np.arange(5, 10, dtype='int64'))) + df.iloc[2:4] = [['x', 11], ['y', 13]] + expected = DataFrame(dict(A=['a', 'b', 'x', 'y', 'e'], + B=[5, 6, 11, 13, 9])) + tm.assert_frame_equal(df, expected) + + def test_iloc_mask(self): + + # GH 3631, iloc with a mask (of a series) should raise + df = DataFrame(lrange(5), list('ABCDE'), columns=['a']) + mask = (df.a % 2 == 0) + self.assertRaises(ValueError, df.iloc.__getitem__, tuple([mask])) + mask.index = lrange(len(mask)) + self.assertRaises(NotImplementedError, df.iloc.__getitem__, + tuple([mask])) + + # ndarray ok + result = df.iloc[np.array([True] * len(mask), dtype=bool)] + tm.assert_frame_equal(result, df) + + # the possibilities + locs = np.arange(4) + nums = 2 ** locs + reps = lmap(bin, nums) + df = DataFrame({'locs': locs, 'nums': nums}, reps) + + expected = { + (None, ''): '0b1100', + (None, '.loc'): '0b1100', + (None, '.iloc'): '0b1100', + ('index', ''): '0b11', + ('index', '.loc'): '0b11', + ('index', '.iloc'): ('iLocation based boolean indexing ' + 'cannot use an indexable as a mask'), + ('locs', ''): 'Unalignable boolean Series provided as indexer ' + '(index of the boolean Series and of the indexed ' + 'object do not match', + ('locs', '.loc'): 'Unalignable boolean Series provided as indexer ' + '(index of the boolean Series and of the ' + 'indexed object do not match', + ('locs', '.iloc'): ('iLocation based boolean indexing on an ' + 'integer type is not available'), + } + + # UserWarnings from reindex of a boolean mask + with catch_warnings(record=True): + result = dict() + for idx in [None, 'index', 'locs']: + mask = (df.nums > 2).values + if idx: + mask = Series(mask, list(reversed(getattr(df, idx)))) + for method in ['', '.loc', '.iloc']: + try: + if method: + accessor = getattr(df, method[1:]) + else: + accessor = df + ans = str(bin(accessor[mask]['nums'].sum())) + except Exception as e: + ans = str(e) + + key = tuple([idx, method]) + r = expected.get(key) + if r != ans: + raise AssertionError( + "[%s] does not match [%s], received [%s]" + % (key, ans, r)) + + def test_iloc_non_unique_indexing(self): + + # GH 4017, non-unique indexing (on the axis) + df = DataFrame({'A': [0.1] * 3000, 'B': [1] * 3000}) + idx = np.array(lrange(30)) * 99 + expected = df.iloc[idx] + + df3 = pd.concat([df, 2 * df, 3 * df]) + result = df3.iloc[idx] + + tm.assert_frame_equal(result, expected) + + df2 = DataFrame({'A': [0.1] * 1000, 'B': [1] * 1000}) + df2 = pd.concat([df2, 2 * df2, 3 * df2]) + + sidx = df2.index.to_series() + expected = df2.iloc[idx[idx <= sidx.max()]] + + new_list = [] + for r, s in expected.iterrows(): + new_list.append(s) + new_list.append(s * 2) + new_list.append(s * 3) + + expected = DataFrame(new_list) + expected = pd.concat([expected, DataFrame(index=idx[idx > sidx.max()]) + ]) + result = df2.loc[idx] + tm.assert_frame_equal(result, expected, check_index_type=False) + + def test_iloc_empty_list_indexer_is_ok(self): + from pandas.util.testing import makeCustomDataframe as mkdf + df = mkdf(5, 2) + # vertical empty + tm.assert_frame_equal(df.iloc[:, []], df.iloc[:, :0], + check_index_type=True, check_column_type=True) + # horizontal empty + tm.assert_frame_equal(df.iloc[[], :], df.iloc[:0, :], + check_index_type=True, check_column_type=True) + # horizontal empty + tm.assert_frame_equal(df.iloc[[]], df.iloc[:0, :], + check_index_type=True, + check_column_type=True) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 4502e0171dfbe..0d6ca383a1be1 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -1,1648 +1,64 @@ # -*- coding: utf-8 -*- # pylint: disable-msg=W0612,E1101 -import itertools -import warnings + +""" test fancy indexing & misc """ + from warnings import catch_warnings from datetime import datetime from pandas.types.common import (is_integer_dtype, - is_float_dtype, - is_scalar) -from pandas.compat import range, lrange, lzip, StringIO, lmap -from pandas._libs.tslib import NaT -from numpy import nan -from numpy.random import randn + is_float_dtype) +from pandas.compat import range, lrange, lzip, StringIO import numpy as np import pandas as pd -from pandas import option_context from pandas.core.indexing import _non_reducing_slice, _maybe_numeric_slice -from pandas.core.api import (DataFrame, Index, Series, Panel, isnull, - MultiIndex, Timestamp, Timedelta, UInt64Index) -from pandas.formats.printing import pprint_thing -from pandas import concat -from pandas.core.common import PerformanceWarning -from pandas.tests.indexing.common import _mklbl +from pandas import NaT, DataFrame, Index, Series, MultiIndex import pandas.util.testing as tm -from pandas import date_range +from pandas.tests.indexing.common import Base, _mklbl -_verbose = False # ------------------------------------------------------------------------ # Indexing test cases -def _generate_indices(f, values=False): - """ generate the indicies - if values is True , use the axis values - is False, use the range - """ - - axes = f.axes - if values: - axes = [lrange(len(a)) for a in axes] - - return itertools.product(*axes) - - -def _get_value(f, i, values=False): - """ return the value for the location i """ - - # check agains values - if values: - return f.values[i] - - # this is equiv of f[col][row]..... - # v = f - # for a in reversed(i): - # v = v.__getitem__(a) - # return v - with catch_warnings(record=True): - return f.ix[i] - - -def _get_result(obj, method, key, axis): - """ return the result for this obj with this key and this axis """ - - if isinstance(key, dict): - key = key[axis] - - # use an artifical conversion to map the key as integers to the labels - # so ix can work for comparisions - if method == 'indexer': - method = 'ix' - key = obj._get_axis(axis)[key] - - # in case we actually want 0 index slicing - try: - xp = getattr(obj, method).__getitem__(_axify(obj, key, axis)) - except: - xp = getattr(obj, method).__getitem__(key) - - return xp - - -def _axify(obj, key, axis): - # create a tuple accessor - axes = [slice(None)] * obj.ndim - axes[axis] = key - return tuple(axes) - - -class TestIndexing(tm.TestCase): - - _objs = set(['series', 'frame', 'panel']) - _typs = set(['ints', 'uints', 'labels', 'mixed', - 'ts', 'floats', 'empty', 'ts_rev']) - - def setUp(self): - - self.series_ints = Series(np.random.rand(4), index=lrange(0, 8, 2)) - self.frame_ints = DataFrame(np.random.randn(4, 4), - index=lrange(0, 8, 2), - columns=lrange(0, 12, 3)) - self.panel_ints = Panel(np.random.rand(4, 4, 4), - items=lrange(0, 8, 2), - major_axis=lrange(0, 12, 3), - minor_axis=lrange(0, 16, 4)) - - self.series_uints = Series(np.random.rand(4), - index=UInt64Index(lrange(0, 8, 2))) - self.frame_uints = DataFrame(np.random.randn(4, 4), - index=UInt64Index(lrange(0, 8, 2)), - columns=UInt64Index(lrange(0, 12, 3))) - self.panel_uints = Panel(np.random.rand(4, 4, 4), - items=UInt64Index(lrange(0, 8, 2)), - major_axis=UInt64Index(lrange(0, 12, 3)), - minor_axis=UInt64Index(lrange(0, 16, 4))) - - self.series_labels = Series(np.random.randn(4), index=list('abcd')) - self.frame_labels = DataFrame(np.random.randn(4, 4), - index=list('abcd'), columns=list('ABCD')) - self.panel_labels = Panel(np.random.randn(4, 4, 4), - items=list('abcd'), - major_axis=list('ABCD'), - minor_axis=list('ZYXW')) - - self.series_mixed = Series(np.random.randn(4), index=[2, 4, 'null', 8]) - self.frame_mixed = DataFrame(np.random.randn(4, 4), - index=[2, 4, 'null', 8]) - self.panel_mixed = Panel(np.random.randn(4, 4, 4), - items=[2, 4, 'null', 8]) - - self.series_ts = Series(np.random.randn(4), - index=date_range('20130101', periods=4)) - self.frame_ts = DataFrame(np.random.randn(4, 4), - index=date_range('20130101', periods=4)) - self.panel_ts = Panel(np.random.randn(4, 4, 4), - items=date_range('20130101', periods=4)) - - dates_rev = (date_range('20130101', periods=4) - .sort_values(ascending=False)) - self.series_ts_rev = Series(np.random.randn(4), - index=dates_rev) - self.frame_ts_rev = DataFrame(np.random.randn(4, 4), - index=dates_rev) - self.panel_ts_rev = Panel(np.random.randn(4, 4, 4), - items=dates_rev) - - self.frame_empty = DataFrame({}) - self.series_empty = Series({}) - self.panel_empty = Panel({}) - - # form agglomerates - for o in self._objs: - - d = dict() - for t in self._typs: - d[t] = getattr(self, '%s_%s' % (o, t), None) - - setattr(self, o, d) - - def check_values(self, f, func, values=False): - - if f is None: - return - axes = f.axes - indicies = itertools.product(*axes) - - for i in indicies: - result = getattr(f, func)[i] - - # check agains values - if values: - expected = f.values[i] - else: - expected = f - for a in reversed(i): - expected = expected.__getitem__(a) - - tm.assert_almost_equal(result, expected) - - def check_result(self, name, method1, key1, method2, key2, typs=None, - objs=None, axes=None, fails=None): - def _eq(t, o, a, obj, k1, k2): - """ compare equal for these 2 keys """ - - if a is not None and a > obj.ndim - 1: - return - - def _print(result, error=None): - if error is not None: - error = str(error) - v = ("%-16.16s [%-16.16s]: [typ->%-8.8s,obj->%-8.8s," - "key1->(%-4.4s),key2->(%-4.4s),axis->%s] %s" % - (name, result, t, o, method1, method2, a, error or '')) - if _verbose: - pprint_thing(v) - - try: - rs = getattr(obj, method1).__getitem__(_axify(obj, k1, a)) - - try: - xp = _get_result(obj, method2, k2, a) - except: - result = 'no comp' - _print(result) - return - - detail = None - - try: - if is_scalar(rs) and is_scalar(xp): - self.assertEqual(rs, xp) - elif xp.ndim == 1: - tm.assert_series_equal(rs, xp) - elif xp.ndim == 2: - tm.assert_frame_equal(rs, xp) - elif xp.ndim == 3: - tm.assert_panel_equal(rs, xp) - result = 'ok' - except AssertionError as e: - detail = str(e) - result = 'fail' - - # reverse the checks - if fails is True: - if result == 'fail': - result = 'ok (fail)' - - _print(result) - if not result.startswith('ok'): - raise AssertionError(detail) - - except AssertionError: - raise - except Exception as detail: - - # if we are in fails, the ok, otherwise raise it - if fails is not None: - if isinstance(detail, fails): - result = 'ok (%s)' % type(detail).__name__ - _print(result) - return - - result = type(detail).__name__ - raise AssertionError(_print(result, error=detail)) - - if typs is None: - typs = self._typs - - if objs is None: - objs = self._objs - - if axes is not None: - if not isinstance(axes, (tuple, list)): - axes = [axes] - else: - axes = list(axes) - else: - axes = [0, 1, 2] - - # check - for o in objs: - if o not in self._objs: - continue - - d = getattr(self, o) - for a in axes: - for t in typs: - if t not in self._typs: - continue - - obj = d[t] - if obj is not None: - obj = obj.copy() - - k2 = key2 - _eq(t, o, a, obj, key1, k2) - - def test_ix_deprecation(self): - # GH 15114 - - df = DataFrame({'A': [1, 2, 3]}) - with tm.assert_produces_warning(DeprecationWarning, - check_stacklevel=False): - df.ix[1, 'A'] - - def test_indexer_caching(self): - # GH5727 - # make sure that indexers are in the _internal_names_set - n = 1000001 - arrays = [lrange(n), lrange(n)] - index = MultiIndex.from_tuples(lzip(*arrays)) - s = Series(np.zeros(n), index=index) - str(s) - - # setitem - expected = Series(np.ones(n), index=index) - s = Series(np.zeros(n), index=index) - s[s == 0] = 1 - tm.assert_series_equal(s, expected) - - def test_at_and_iat_get(self): - def _check(f, func, values=False): - - if f is not None: - indicies = _generate_indices(f, values) - for i in indicies: - result = getattr(f, func)[i] - expected = _get_value(f, i, values) - tm.assert_almost_equal(result, expected) - - for o in self._objs: - - d = getattr(self, o) - - # iat - for f in [d['ints'], d['uints']]: - _check(f, 'iat', values=True) - - for f in [d['labels'], d['ts'], d['floats']]: - if f is not None: - self.assertRaises(ValueError, self.check_values, f, 'iat') - - # at - for f in [d['ints'], d['uints'], d['labels'], - d['ts'], d['floats']]: - _check(f, 'at') - - def test_at_and_iat_set(self): - def _check(f, func, values=False): - - if f is not None: - indicies = _generate_indices(f, values) - for i in indicies: - getattr(f, func)[i] = 1 - expected = _get_value(f, i, values) - tm.assert_almost_equal(expected, 1) - - for t in self._objs: - - d = getattr(self, t) - - # iat - for f in [d['ints'], d['uints']]: - _check(f, 'iat', values=True) - - for f in [d['labels'], d['ts'], d['floats']]: - if f is not None: - self.assertRaises(ValueError, _check, f, 'iat') - - # at - for f in [d['ints'], d['uints'], d['labels'], - d['ts'], d['floats']]: - _check(f, 'at') - - def test_at_iat_coercion(self): - - # as timestamp is not a tuple! - dates = date_range('1/1/2000', periods=8) - df = DataFrame(randn(8, 4), index=dates, columns=['A', 'B', 'C', 'D']) - s = df['A'] - - result = s.at[dates[5]] - xp = s.values[5] - self.assertEqual(result, xp) - - # GH 7729 - # make sure we are boxing the returns - s = Series(['2014-01-01', '2014-02-02'], dtype='datetime64[ns]') - expected = Timestamp('2014-02-02') - - for r in [lambda: s.iat[1], lambda: s.iloc[1]]: - result = r() - self.assertEqual(result, expected) - - s = Series(['1 days', '2 days'], dtype='timedelta64[ns]') - expected = Timedelta('2 days') - - for r in [lambda: s.iat[1], lambda: s.iloc[1]]: - result = r() - self.assertEqual(result, expected) - - def test_iat_invalid_args(self): - pass - - def test_imethods_with_dups(self): - - # GH6493 - # iat/iloc with dups - - s = Series(range(5), index=[1, 1, 2, 2, 3], dtype='int64') - result = s.iloc[2] - self.assertEqual(result, 2) - result = s.iat[2] - self.assertEqual(result, 2) - - self.assertRaises(IndexError, lambda: s.iat[10]) - self.assertRaises(IndexError, lambda: s.iat[-10]) - - result = s.iloc[[2, 3]] - expected = Series([2, 3], [2, 2], dtype='int64') - tm.assert_series_equal(result, expected) - - df = s.to_frame() - result = df.iloc[2] - expected = Series(2, index=[0], name=2) - tm.assert_series_equal(result, expected) - - result = df.iat[2, 0] - expected = 2 - self.assertEqual(result, 2) - - def test_repeated_getitem_dups(self): - # GH 5678 - # repeated gettitems on a dup index returing a ndarray - df = DataFrame( - np.random.random_sample((20, 5)), - index=['ABCDE' [x % 5] for x in range(20)]) - expected = df.loc['A', 0] - result = df.loc[:, 0].loc['A'] - tm.assert_series_equal(result, expected) - - def test_iloc_exceeds_bounds(self): - - # GH6296 - # iloc should allow indexers that exceed the bounds - df = DataFrame(np.random.random_sample((20, 5)), columns=list('ABCDE')) - expected = df - - # lists of positions should raise IndexErrror! - with tm.assertRaisesRegexp(IndexError, - 'positional indexers are out-of-bounds'): - df.iloc[:, [0, 1, 2, 3, 4, 5]] - self.assertRaises(IndexError, lambda: df.iloc[[1, 30]]) - self.assertRaises(IndexError, lambda: df.iloc[[1, -30]]) - self.assertRaises(IndexError, lambda: df.iloc[[100]]) - - s = df['A'] - self.assertRaises(IndexError, lambda: s.iloc[[100]]) - self.assertRaises(IndexError, lambda: s.iloc[[-100]]) - - # still raise on a single indexer - msg = 'single positional indexer is out-of-bounds' - with tm.assertRaisesRegexp(IndexError, msg): - df.iloc[30] - self.assertRaises(IndexError, lambda: df.iloc[-30]) - - # GH10779 - # single positive/negative indexer exceeding Series bounds should raise - # an IndexError - with tm.assertRaisesRegexp(IndexError, msg): - s.iloc[30] - self.assertRaises(IndexError, lambda: s.iloc[-30]) - - # slices are ok - result = df.iloc[:, 4:10] # 0 < start < len < stop - expected = df.iloc[:, 4:] - tm.assert_frame_equal(result, expected) - - result = df.iloc[:, -4:-10] # stop < 0 < start < len - expected = df.iloc[:, :0] - tm.assert_frame_equal(result, expected) - - result = df.iloc[:, 10:4:-1] # 0 < stop < len < start (down) - expected = df.iloc[:, :4:-1] - tm.assert_frame_equal(result, expected) - - result = df.iloc[:, 4:-10:-1] # stop < 0 < start < len (down) - expected = df.iloc[:, 4::-1] - tm.assert_frame_equal(result, expected) - - result = df.iloc[:, -10:4] # start < 0 < stop < len - expected = df.iloc[:, :4] - tm.assert_frame_equal(result, expected) - - result = df.iloc[:, 10:4] # 0 < stop < len < start - expected = df.iloc[:, :0] - tm.assert_frame_equal(result, expected) - - result = df.iloc[:, -10:-11:-1] # stop < start < 0 < len (down) - expected = df.iloc[:, :0] - tm.assert_frame_equal(result, expected) - - result = df.iloc[:, 10:11] # 0 < len < start < stop - expected = df.iloc[:, :0] - tm.assert_frame_equal(result, expected) - - # slice bounds exceeding is ok - result = s.iloc[18:30] - expected = s.iloc[18:] - tm.assert_series_equal(result, expected) - - result = s.iloc[30:] - expected = s.iloc[:0] - tm.assert_series_equal(result, expected) - - result = s.iloc[30::-1] - expected = s.iloc[::-1] - tm.assert_series_equal(result, expected) - - # doc example - def check(result, expected): - str(result) - result.dtypes - tm.assert_frame_equal(result, expected) - - dfl = DataFrame(np.random.randn(5, 2), columns=list('AB')) - check(dfl.iloc[:, 2:3], DataFrame(index=dfl.index)) - check(dfl.iloc[:, 1:3], dfl.iloc[:, [1]]) - check(dfl.iloc[4:6], dfl.iloc[[4]]) - - self.assertRaises(IndexError, lambda: dfl.iloc[[4, 5, 6]]) - self.assertRaises(IndexError, lambda: dfl.iloc[:, 4]) - - def test_iloc_getitem_int(self): - - # integer - self.check_result('integer', 'iloc', 2, 'ix', - {0: 4, 1: 6, 2: 8}, typs=['ints', 'uints']) - self.check_result('integer', 'iloc', 2, 'indexer', 2, - typs=['labels', 'mixed', 'ts', 'floats', 'empty'], - fails=IndexError) - - def test_iloc_getitem_neg_int(self): - - # neg integer - self.check_result('neg int', 'iloc', -1, 'ix', - {0: 6, 1: 9, 2: 12}, typs=['ints', 'uints']) - self.check_result('neg int', 'iloc', -1, 'indexer', -1, - typs=['labels', 'mixed', 'ts', 'floats', 'empty'], - fails=IndexError) - - def test_iloc_getitem_list_int(self): - - # list of ints - self.check_result('list int', 'iloc', [0, 1, 2], 'ix', - {0: [0, 2, 4], 1: [0, 3, 6], 2: [0, 4, 8]}, - typs=['ints', 'uints']) - self.check_result('list int', 'iloc', [2], 'ix', - {0: [4], 1: [6], 2: [8]}, typs=['ints', 'uints']) - self.check_result('list int', 'iloc', [0, 1, 2], 'indexer', [0, 1, 2], - typs=['labels', 'mixed', 'ts', 'floats', 'empty'], - fails=IndexError) - - # array of ints (GH5006), make sure that a single indexer is returning - # the correct type - self.check_result('array int', 'iloc', np.array([0, 1, 2]), 'ix', - {0: [0, 2, 4], - 1: [0, 3, 6], - 2: [0, 4, 8]}, typs=['ints', 'uints']) - self.check_result('array int', 'iloc', np.array([2]), 'ix', - {0: [4], 1: [6], 2: [8]}, typs=['ints', 'uints']) - self.check_result('array int', 'iloc', np.array([0, 1, 2]), 'indexer', - [0, 1, 2], - typs=['labels', 'mixed', 'ts', 'floats', 'empty'], - fails=IndexError) - - def test_iloc_getitem_neg_int_can_reach_first_index(self): - # GH10547 and GH10779 - # negative integers should be able to reach index 0 - df = DataFrame({'A': [2, 3, 5], 'B': [7, 11, 13]}) - s = df['A'] - - expected = df.iloc[0] - result = df.iloc[-3] - tm.assert_series_equal(result, expected) - - expected = df.iloc[[0]] - result = df.iloc[[-3]] - tm.assert_frame_equal(result, expected) - - expected = s.iloc[0] - result = s.iloc[-3] - self.assertEqual(result, expected) - - expected = s.iloc[[0]] - result = s.iloc[[-3]] - tm.assert_series_equal(result, expected) - - # check the length 1 Series case highlighted in GH10547 - expected = pd.Series(['a'], index=['A']) - result = expected.iloc[[-1]] - tm.assert_series_equal(result, expected) - - def test_iloc_getitem_dups(self): - - # no dups in panel (bug?) - self.check_result('list int (dups)', 'iloc', [0, 1, 1, 3], 'ix', - {0: [0, 2, 2, 6], 1: [0, 3, 3, 9]}, - objs=['series', 'frame'], typs=['ints', 'uints']) - - # GH 6766 - df1 = DataFrame([{'A': None, 'B': 1}, {'A': 2, 'B': 2}]) - df2 = DataFrame([{'A': 3, 'B': 3}, {'A': 4, 'B': 4}]) - df = concat([df1, df2], axis=1) - - # cross-sectional indexing - result = df.iloc[0, 0] - self.assertTrue(isnull(result)) - - result = df.iloc[0, :] - expected = Series([np.nan, 1, 3, 3], index=['A', 'B', 'A', 'B'], - name=0) - tm.assert_series_equal(result, expected) - - def test_iloc_getitem_array(self): - - # array like - s = Series(index=lrange(1, 4)) - self.check_result('array like', 'iloc', s.index, 'ix', - {0: [2, 4, 6], 1: [3, 6, 9], 2: [4, 8, 12]}, - typs=['ints', 'uints']) - - def test_iloc_getitem_bool(self): - - # boolean indexers - b = [True, False, True, False, ] - self.check_result('bool', 'iloc', b, 'ix', b, typs=['ints', 'uints']) - self.check_result('bool', 'iloc', b, 'ix', b, - typs=['labels', 'mixed', 'ts', 'floats', 'empty'], - fails=IndexError) - - def test_iloc_getitem_slice(self): - - # slices - self.check_result('slice', 'iloc', slice(1, 3), 'ix', - {0: [2, 4], 1: [3, 6], 2: [4, 8]}, - typs=['ints', 'uints']) - self.check_result('slice', 'iloc', slice(1, 3), 'indexer', - slice(1, 3), - typs=['labels', 'mixed', 'ts', 'floats', 'empty'], - fails=IndexError) - - def test_iloc_getitem_slice_dups(self): - - df1 = DataFrame(np.random.randn(10, 4), columns=['A', 'A', 'B', 'B']) - df2 = DataFrame(np.random.randint(0, 10, size=20).reshape(10, 2), - columns=['A', 'C']) - - # axis=1 - df = concat([df1, df2], axis=1) - tm.assert_frame_equal(df.iloc[:, :4], df1) - tm.assert_frame_equal(df.iloc[:, 4:], df2) - - df = concat([df2, df1], axis=1) - tm.assert_frame_equal(df.iloc[:, :2], df2) - tm.assert_frame_equal(df.iloc[:, 2:], df1) - - exp = concat([df2, df1.iloc[:, [0]]], axis=1) - tm.assert_frame_equal(df.iloc[:, 0:3], exp) - - # axis=0 - df = concat([df, df], axis=0) - tm.assert_frame_equal(df.iloc[0:10, :2], df2) - tm.assert_frame_equal(df.iloc[0:10, 2:], df1) - tm.assert_frame_equal(df.iloc[10:, :2], df2) - tm.assert_frame_equal(df.iloc[10:, 2:], df1) - - def test_iloc_setitem(self): - df = self.frame_ints - - df.iloc[1, 1] = 1 - result = df.iloc[1, 1] - self.assertEqual(result, 1) - - df.iloc[:, 2:3] = 0 - expected = df.iloc[:, 2:3] - result = df.iloc[:, 2:3] - tm.assert_frame_equal(result, expected) - - # GH5771 - s = Series(0, index=[4, 5, 6]) - s.iloc[1:2] += 1 - expected = Series([0, 1, 0], index=[4, 5, 6]) - tm.assert_series_equal(s, expected) - - def test_loc_setitem_slice(self): - # GH10503 - - # assigning the same type should not change the type - df1 = DataFrame({'a': [0, 1, 1], - 'b': Series([100, 200, 300], dtype='uint32')}) - ix = df1['a'] == 1 - newb1 = df1.loc[ix, 'b'] + 1 - df1.loc[ix, 'b'] = newb1 - expected = DataFrame({'a': [0, 1, 1], - 'b': Series([100, 201, 301], dtype='uint32')}) - tm.assert_frame_equal(df1, expected) - - # assigning a new type should get the inferred type - df2 = DataFrame({'a': [0, 1, 1], 'b': [100, 200, 300]}, - dtype='uint64') - ix = df1['a'] == 1 - newb2 = df2.loc[ix, 'b'] - df1.loc[ix, 'b'] = newb2 - expected = DataFrame({'a': [0, 1, 1], 'b': [100, 200, 300]}, - dtype='uint64') - tm.assert_frame_equal(df2, expected) - - def test_ix_loc_setitem_consistency(self): - - # GH 5771 - # loc with slice and series - s = Series(0, index=[4, 5, 6]) - s.loc[4:5] += 1 - expected = Series([1, 1, 0], index=[4, 5, 6]) - tm.assert_series_equal(s, expected) - - # GH 5928 - # chained indexing assignment - df = DataFrame({'a': [0, 1, 2]}) - expected = df.copy() - with catch_warnings(record=True): - expected.ix[[0, 1, 2], 'a'] = -expected.ix[[0, 1, 2], 'a'] - - with catch_warnings(record=True): - df['a'].ix[[0, 1, 2]] = -df['a'].ix[[0, 1, 2]] - tm.assert_frame_equal(df, expected) - - df = DataFrame({'a': [0, 1, 2], 'b': [0, 1, 2]}) - with catch_warnings(record=True): - df['a'].ix[[0, 1, 2]] = -df['a'].ix[[0, 1, 2]].astype( - 'float64') + 0.5 - expected = DataFrame({'a': [0.5, -0.5, -1.5], 'b': [0, 1, 2]}) - tm.assert_frame_equal(df, expected) - - # GH 8607 - # ix setitem consistency - df = DataFrame({'timestamp': [1413840976, 1413842580, 1413760580], - 'delta': [1174, 904, 161], - 'elapsed': [7673, 9277, 1470]}) - expected = DataFrame({'timestamp': pd.to_datetime( - [1413840976, 1413842580, 1413760580], unit='s'), - 'delta': [1174, 904, 161], - 'elapsed': [7673, 9277, 1470]}) - - df2 = df.copy() - df2['timestamp'] = pd.to_datetime(df['timestamp'], unit='s') - tm.assert_frame_equal(df2, expected) - - df2 = df.copy() - df2.loc[:, 'timestamp'] = pd.to_datetime(df['timestamp'], unit='s') - tm.assert_frame_equal(df2, expected) - - df2 = df.copy() - with catch_warnings(record=True): - df2.ix[:, 2] = pd.to_datetime(df['timestamp'], unit='s') - tm.assert_frame_equal(df2, expected) - - def test_ix_loc_consistency(self): - - # GH 8613 - # some edge cases where ix/loc should return the same - # this is not an exhaustive case - - def compare(result, expected): - if is_scalar(expected): - self.assertEqual(result, expected) - else: - self.assertTrue(expected.equals(result)) - - # failure cases for .loc, but these work for .ix - df = pd.DataFrame(np.random.randn(5, 4), columns=list('ABCD')) - for key in [slice(1, 3), tuple([slice(0, 2), slice(0, 2)]), - tuple([slice(0, 2), df.columns[0:2]])]: - - for index in [tm.makeStringIndex, tm.makeUnicodeIndex, - tm.makeDateIndex, tm.makePeriodIndex, - tm.makeTimedeltaIndex]: - df.index = index(len(df.index)) - with catch_warnings(record=True): - df.ix[key] - - self.assertRaises(TypeError, lambda: df.loc[key]) - - df = pd.DataFrame(np.random.randn(5, 4), columns=list('ABCD'), - index=pd.date_range('2012-01-01', periods=5)) - - for key in ['2012-01-03', - '2012-01-31', - slice('2012-01-03', '2012-01-03'), - slice('2012-01-03', '2012-01-04'), - slice('2012-01-03', '2012-01-06', 2), - slice('2012-01-03', '2012-01-31'), - tuple([[True, True, True, False, True]]), ]: - - # getitem - - # if the expected raises, then compare the exceptions - try: - with catch_warnings(record=True): - expected = df.ix[key] - except KeyError: - self.assertRaises(KeyError, lambda: df.loc[key]) - continue - - result = df.loc[key] - compare(result, expected) - - # setitem - df1 = df.copy() - df2 = df.copy() - - with catch_warnings(record=True): - df1.ix[key] = 10 - df2.loc[key] = 10 - compare(df2, df1) - - # edge cases - s = Series([1, 2, 3, 4], index=list('abde')) - - result1 = s['a':'c'] - with catch_warnings(record=True): - result2 = s.ix['a':'c'] - result3 = s.loc['a':'c'] - tm.assert_series_equal(result1, result2) - tm.assert_series_equal(result1, result3) - - # now work rather than raising KeyError - s = Series(range(5), [-2, -1, 1, 2, 3]) - - with catch_warnings(record=True): - result1 = s.ix[-10:3] - result2 = s.loc[-10:3] - tm.assert_series_equal(result1, result2) - - with catch_warnings(record=True): - result1 = s.ix[0:3] - result2 = s.loc[0:3] - tm.assert_series_equal(result1, result2) - - def test_loc_setitem_dups(self): - - # GH 6541 - df_orig = DataFrame( - {'me': list('rttti'), - 'foo': list('aaade'), - 'bar': np.arange(5, dtype='float64') * 1.34 + 2, - 'bar2': np.arange(5, dtype='float64') * -.34 + 2}).set_index('me') - - indexer = tuple(['r', ['bar', 'bar2']]) - df = df_orig.copy() - df.loc[indexer] *= 2.0 - tm.assert_series_equal(df.loc[indexer], 2.0 * df_orig.loc[indexer]) - - indexer = tuple(['r', 'bar']) - df = df_orig.copy() - df.loc[indexer] *= 2.0 - self.assertEqual(df.loc[indexer], 2.0 * df_orig.loc[indexer]) - - indexer = tuple(['t', ['bar', 'bar2']]) - df = df_orig.copy() - df.loc[indexer] *= 2.0 - tm.assert_frame_equal(df.loc[indexer], 2.0 * df_orig.loc[indexer]) - - def test_iloc_setitem_dups(self): - - # GH 6766 - # iloc with a mask aligning from another iloc - df1 = DataFrame([{'A': None, 'B': 1}, {'A': 2, 'B': 2}]) - df2 = DataFrame([{'A': 3, 'B': 3}, {'A': 4, 'B': 4}]) - df = concat([df1, df2], axis=1) - - expected = df.fillna(3) - expected['A'] = expected['A'].astype('float64') - inds = np.isnan(df.iloc[:, 0]) - mask = inds[inds].index - df.iloc[mask, 0] = df.iloc[mask, 2] - tm.assert_frame_equal(df, expected) - - # del a dup column across blocks - expected = DataFrame({0: [1, 2], 1: [3, 4]}) - expected.columns = ['B', 'B'] - del df['A'] - tm.assert_frame_equal(df, expected) - - # assign back to self - df.iloc[[0, 1], [0, 1]] = df.iloc[[0, 1], [0, 1]] - tm.assert_frame_equal(df, expected) - - # reversed x 2 - df.iloc[[1, 0], [0, 1]] = df.iloc[[1, 0], [0, 1]].reset_index( - drop=True) - df.iloc[[1, 0], [0, 1]] = df.iloc[[1, 0], [0, 1]].reset_index( - drop=True) - tm.assert_frame_equal(df, expected) - - def test_chained_getitem_with_lists(self): - - # GH6394 - # Regression in chained getitem indexing with embedded list-like from - # 0.12 - def check(result, expected): - tm.assert_numpy_array_equal(result, expected) - tm.assertIsInstance(result, np.ndarray) - - df = DataFrame({'A': 5 * [np.zeros(3)], 'B': 5 * [np.ones(3)]}) - expected = df['A'].iloc[2] - result = df.loc[2, 'A'] - check(result, expected) - result2 = df.iloc[2]['A'] - check(result2, expected) - result3 = df['A'].loc[2] - check(result3, expected) - result4 = df['A'].iloc[2] - check(result4, expected) - - def test_loc_getitem_int(self): - - # int label - self.check_result('int label', 'loc', 2, 'ix', 2, - typs=['ints', 'uints'], axes=0) - self.check_result('int label', 'loc', 3, 'ix', 3, - typs=['ints', 'uints'], axes=1) - self.check_result('int label', 'loc', 4, 'ix', 4, - typs=['ints', 'uints'], axes=2) - self.check_result('int label', 'loc', 2, 'ix', 2, - typs=['label'], fails=KeyError) - - def test_loc_getitem_label(self): - - # label - self.check_result('label', 'loc', 'c', 'ix', 'c', typs=['labels'], - axes=0) - self.check_result('label', 'loc', 'null', 'ix', 'null', typs=['mixed'], - axes=0) - self.check_result('label', 'loc', 8, 'ix', 8, typs=['mixed'], axes=0) - self.check_result('label', 'loc', Timestamp('20130102'), 'ix', 1, - typs=['ts'], axes=0) - self.check_result('label', 'loc', 'c', 'ix', 'c', typs=['empty'], - fails=KeyError) - - def test_loc_getitem_label_out_of_range(self): - - # out of range label - self.check_result('label range', 'loc', 'f', 'ix', 'f', - typs=['ints', 'uints', 'labels', 'mixed', 'ts'], - fails=KeyError) - self.check_result('label range', 'loc', 'f', 'ix', 'f', - typs=['floats'], fails=TypeError) - self.check_result('label range', 'loc', 20, 'ix', 20, - typs=['ints', 'uints', 'mixed'], fails=KeyError) - self.check_result('label range', 'loc', 20, 'ix', 20, - typs=['labels'], fails=TypeError) - self.check_result('label range', 'loc', 20, 'ix', 20, typs=['ts'], - axes=0, fails=TypeError) - self.check_result('label range', 'loc', 20, 'ix', 20, typs=['floats'], - axes=0, fails=TypeError) - - def test_loc_getitem_label_list(self): - - # list of labels - self.check_result('list lbl', 'loc', [0, 2, 4], 'ix', [0, 2, 4], - typs=['ints', 'uints'], axes=0) - self.check_result('list lbl', 'loc', [3, 6, 9], 'ix', [3, 6, 9], - typs=['ints', 'uints'], axes=1) - self.check_result('list lbl', 'loc', [4, 8, 12], 'ix', [4, 8, 12], - typs=['ints', 'uints'], axes=2) - self.check_result('list lbl', 'loc', ['a', 'b', 'd'], 'ix', - ['a', 'b', 'd'], typs=['labels'], axes=0) - self.check_result('list lbl', 'loc', ['A', 'B', 'C'], 'ix', - ['A', 'B', 'C'], typs=['labels'], axes=1) - self.check_result('list lbl', 'loc', ['Z', 'Y', 'W'], 'ix', - ['Z', 'Y', 'W'], typs=['labels'], axes=2) - self.check_result('list lbl', 'loc', [2, 8, 'null'], 'ix', - [2, 8, 'null'], typs=['mixed'], axes=0) - self.check_result('list lbl', 'loc', - [Timestamp('20130102'), Timestamp('20130103')], 'ix', - [Timestamp('20130102'), Timestamp('20130103')], - typs=['ts'], axes=0) - - self.check_result('list lbl', 'loc', [0, 1, 2], 'indexer', [0, 1, 2], - typs=['empty'], fails=KeyError) - self.check_result('list lbl', 'loc', [0, 2, 3], 'ix', [0, 2, 3], - typs=['ints', 'uints'], axes=0, fails=KeyError) - self.check_result('list lbl', 'loc', [3, 6, 7], 'ix', [3, 6, 7], - typs=['ints', 'uints'], axes=1, fails=KeyError) - self.check_result('list lbl', 'loc', [4, 8, 10], 'ix', [4, 8, 10], - typs=['ints', 'uints'], axes=2, fails=KeyError) - - def test_loc_getitem_label_list_fails(self): - # fails - self.check_result('list lbl', 'loc', [20, 30, 40], 'ix', [20, 30, 40], - typs=['ints', 'uints'], axes=1, fails=KeyError) - self.check_result('list lbl', 'loc', [20, 30, 40], 'ix', [20, 30, 40], - typs=['ints', 'uints'], axes=2, fails=KeyError) - - def test_loc_getitem_label_array_like(self): - # array like - self.check_result('array like', 'loc', Series(index=[0, 2, 4]).index, - 'ix', [0, 2, 4], typs=['ints', 'uints'], axes=0) - self.check_result('array like', 'loc', Series(index=[3, 6, 9]).index, - 'ix', [3, 6, 9], typs=['ints', 'uints'], axes=1) - self.check_result('array like', 'loc', Series(index=[4, 8, 12]).index, - 'ix', [4, 8, 12], typs=['ints', 'uints'], axes=2) - - def test_loc_getitem_bool(self): - # boolean indexers - b = [True, False, True, False] - self.check_result('bool', 'loc', b, 'ix', b, - typs=['ints', 'uints', 'labels', - 'mixed', 'ts', 'floats']) - self.check_result('bool', 'loc', b, 'ix', b, typs=['empty'], - fails=KeyError) - - def test_loc_getitem_int_slice(self): - - # ok - self.check_result('int slice2', 'loc', slice(2, 4), 'ix', [2, 4], - typs=['ints', 'uints'], axes=0) - self.check_result('int slice2', 'loc', slice(3, 6), 'ix', [3, 6], - typs=['ints', 'uints'], axes=1) - self.check_result('int slice2', 'loc', slice(4, 8), 'ix', [4, 8], - typs=['ints', 'uints'], axes=2) - - # GH 3053 - # loc should treat integer slices like label slices - from itertools import product - - index = MultiIndex.from_tuples([t for t in product( - [6, 7, 8], ['a', 'b'])]) - df = DataFrame(np.random.randn(6, 6), index, index) - result = df.loc[6:8, :] - with catch_warnings(record=True): - expected = df.ix[6:8, :] - tm.assert_frame_equal(result, expected) - - index = MultiIndex.from_tuples([t - for t in product( - [10, 20, 30], ['a', 'b'])]) - df = DataFrame(np.random.randn(6, 6), index, index) - result = df.loc[20:30, :] - with catch_warnings(record=True): - expected = df.ix[20:30, :] - tm.assert_frame_equal(result, expected) - - # doc examples - result = df.loc[10, :] - with catch_warnings(record=True): - expected = df.ix[10, :] - tm.assert_frame_equal(result, expected) - - result = df.loc[:, 10] - # expected = df.ix[:,10] (this fails) - expected = df[10] - tm.assert_frame_equal(result, expected) - - def test_loc_to_fail(self): - - # GH3449 - df = DataFrame(np.random.random((3, 3)), - index=['a', 'b', 'c'], - columns=['e', 'f', 'g']) - - # raise a KeyError? - self.assertRaises(KeyError, df.loc.__getitem__, - tuple([[1, 2], [1, 2]])) - - # GH 7496 - # loc should not fallback - - s = Series() - s.loc[1] = 1 - s.loc['a'] = 2 - - self.assertRaises(KeyError, lambda: s.loc[-1]) - self.assertRaises(KeyError, lambda: s.loc[[-1, -2]]) - - self.assertRaises(KeyError, lambda: s.loc[['4']]) - - s.loc[-1] = 3 - result = s.loc[[-1, -2]] - expected = Series([3, np.nan], index=[-1, -2]) - tm.assert_series_equal(result, expected) - - s['a'] = 2 - self.assertRaises(KeyError, lambda: s.loc[[-2]]) - - del s['a'] - - def f(): - s.loc[[-2]] = 0 - - self.assertRaises(KeyError, f) - - # inconsistency between .loc[values] and .loc[values,:] - # GH 7999 - df = DataFrame([['a'], ['b']], index=[1, 2], columns=['value']) - - def f(): - df.loc[[3], :] - - self.assertRaises(KeyError, f) - - def f(): - df.loc[[3]] - - self.assertRaises(KeyError, f) - - def test_at_to_fail(self): - # at should not fallback - # GH 7814 - s = Series([1, 2, 3], index=list('abc')) - result = s.at['a'] - self.assertEqual(result, 1) - self.assertRaises(ValueError, lambda: s.at[0]) - - df = DataFrame({'A': [1, 2, 3]}, index=list('abc')) - result = df.at['a', 'A'] - self.assertEqual(result, 1) - self.assertRaises(ValueError, lambda: df.at['a', 0]) - - s = Series([1, 2, 3], index=[3, 2, 1]) - result = s.at[1] - self.assertEqual(result, 3) - self.assertRaises(ValueError, lambda: s.at['a']) - - df = DataFrame({0: [1, 2, 3]}, index=[3, 2, 1]) - result = df.at[1, 0] - self.assertEqual(result, 3) - self.assertRaises(ValueError, lambda: df.at['a', 0]) - - # GH 13822, incorrect error string with non-unique columns when missing - # column is accessed - df = DataFrame({'x': [1.], 'y': [2.], 'z': [3.]}) - df.columns = ['x', 'x', 'z'] - - # Check that we get the correct value in the KeyError - self.assertRaisesRegexp(KeyError, r"\['y'\] not in index", - lambda: df[['x', 'y', 'z']]) - - def test_loc_getitem_label_slice(self): - - # label slices (with ints) - self.check_result('lab slice', 'loc', slice(1, 3), - 'ix', slice(1, 3), - typs=['labels', 'mixed', 'empty', 'ts', 'floats'], - fails=TypeError) - - # real label slices - self.check_result('lab slice', 'loc', slice('a', 'c'), - 'ix', slice('a', 'c'), typs=['labels'], axes=0) - self.check_result('lab slice', 'loc', slice('A', 'C'), - 'ix', slice('A', 'C'), typs=['labels'], axes=1) - self.check_result('lab slice', 'loc', slice('W', 'Z'), - 'ix', slice('W', 'Z'), typs=['labels'], axes=2) - - self.check_result('ts slice', 'loc', slice('20130102', '20130104'), - 'ix', slice('20130102', '20130104'), - typs=['ts'], axes=0) - self.check_result('ts slice', 'loc', slice('20130102', '20130104'), - 'ix', slice('20130102', '20130104'), - typs=['ts'], axes=1, fails=TypeError) - self.check_result('ts slice', 'loc', slice('20130102', '20130104'), - 'ix', slice('20130102', '20130104'), - typs=['ts'], axes=2, fails=TypeError) - - # GH 14316 - self.check_result('ts slice rev', 'loc', slice('20130104', '20130102'), - 'indexer', [0, 1, 2], typs=['ts_rev'], axes=0) - - self.check_result('mixed slice', 'loc', slice(2, 8), 'ix', slice(2, 8), - typs=['mixed'], axes=0, fails=TypeError) - self.check_result('mixed slice', 'loc', slice(2, 8), 'ix', slice(2, 8), - typs=['mixed'], axes=1, fails=KeyError) - self.check_result('mixed slice', 'loc', slice(2, 8), 'ix', slice(2, 8), - typs=['mixed'], axes=2, fails=KeyError) - - self.check_result('mixed slice', 'loc', slice(2, 4, 2), 'ix', slice( - 2, 4, 2), typs=['mixed'], axes=0, fails=TypeError) - - def test_loc_general(self): - - df = DataFrame( - np.random.rand(4, 4), columns=['A', 'B', 'C', 'D'], - index=['A', 'B', 'C', 'D']) - - # want this to work - result = df.loc[:, "A":"B"].iloc[0:2, :] - self.assertTrue((result.columns == ['A', 'B']).all()) - self.assertTrue((result.index == ['A', 'B']).all()) - - # mixed type - result = DataFrame({'a': [Timestamp('20130101')], 'b': [1]}).iloc[0] - expected = Series([Timestamp('20130101'), 1], index=['a', 'b'], name=0) - tm.assert_series_equal(result, expected) - self.assertEqual(result.dtype, object) - - def test_loc_setitem_consistency(self): - # GH 6149 - # coerce similary for setitem and loc when rows have a null-slice - expected = DataFrame({'date': Series(0, index=range(5), - dtype=np.int64), - 'val': Series(range(5), dtype=np.int64)}) - - df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'), - 'val': Series( - range(5), dtype=np.int64)}) - df.loc[:, 'date'] = 0 - tm.assert_frame_equal(df, expected) - - df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'), - 'val': Series(range(5), dtype=np.int64)}) - df.loc[:, 'date'] = np.array(0, dtype=np.int64) - tm.assert_frame_equal(df, expected) - - df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'), - 'val': Series(range(5), dtype=np.int64)}) - df.loc[:, 'date'] = np.array([0, 0, 0, 0, 0], dtype=np.int64) - tm.assert_frame_equal(df, expected) - - expected = DataFrame({'date': Series('foo', index=range(5)), - 'val': Series(range(5), dtype=np.int64)}) - df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'), - 'val': Series(range(5), dtype=np.int64)}) - df.loc[:, 'date'] = 'foo' - tm.assert_frame_equal(df, expected) - - expected = DataFrame({'date': Series(1.0, index=range(5)), - 'val': Series(range(5), dtype=np.int64)}) - df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'), - 'val': Series(range(5), dtype=np.int64)}) - df.loc[:, 'date'] = 1.0 - tm.assert_frame_equal(df, expected) - - def test_loc_setitem_consistency_empty(self): - # empty (essentially noops) - expected = DataFrame(columns=['x', 'y']) - expected['x'] = expected['x'].astype(np.int64) - df = DataFrame(columns=['x', 'y']) - df.loc[:, 'x'] = 1 - tm.assert_frame_equal(df, expected) - - df = DataFrame(columns=['x', 'y']) - df['x'] = 1 - tm.assert_frame_equal(df, expected) - - def test_loc_setitem_consistency_slice_column_len(self): - # .loc[:,column] setting with slice == len of the column - # GH10408 - data = """Level_0,,,Respondent,Respondent,Respondent,OtherCat,OtherCat -Level_1,,,Something,StartDate,EndDate,Yes/No,SomethingElse -Region,Site,RespondentID,,,,, -Region_1,Site_1,3987227376,A,5/25/2015 10:59,5/25/2015 11:22,Yes, -Region_1,Site_1,3980680971,A,5/21/2015 9:40,5/21/2015 9:52,Yes,Yes -Region_1,Site_2,3977723249,A,5/20/2015 8:27,5/20/2015 8:41,Yes, -Region_1,Site_2,3977723089,A,5/20/2015 8:33,5/20/2015 9:09,Yes,No""" - - df = pd.read_csv(StringIO(data), header=[0, 1], index_col=[0, 1, 2]) - df.loc[:, ('Respondent', 'StartDate')] = pd.to_datetime(df.loc[:, ( - 'Respondent', 'StartDate')]) - df.loc[:, ('Respondent', 'EndDate')] = pd.to_datetime(df.loc[:, ( - 'Respondent', 'EndDate')]) - df.loc[:, ('Respondent', 'Duration')] = df.loc[:, ( - 'Respondent', 'EndDate')] - df.loc[:, ('Respondent', 'StartDate')] - - df.loc[:, ('Respondent', 'Duration')] = df.loc[:, ( - 'Respondent', 'Duration')].astype('timedelta64[s]') - expected = Series([1380, 720, 840, 2160.], index=df.index, - name=('Respondent', 'Duration')) - tm.assert_series_equal(df[('Respondent', 'Duration')], expected) - - def test_loc_setitem_frame(self): - df = self.frame_labels - - result = df.iloc[0, 0] - - df.loc['a', 'A'] = 1 - result = df.loc['a', 'A'] - self.assertEqual(result, 1) - - result = df.iloc[0, 0] - self.assertEqual(result, 1) - - df.loc[:, 'B':'D'] = 0 - expected = df.loc[:, 'B':'D'] - with catch_warnings(record=True): - result = df.ix[:, 1:] - tm.assert_frame_equal(result, expected) - - # GH 6254 - # setting issue - df = DataFrame(index=[3, 5, 4], columns=['A']) - df.loc[[4, 3, 5], 'A'] = np.array([1, 2, 3], dtype='int64') - expected = DataFrame(dict(A=Series( - [1, 2, 3], index=[4, 3, 5]))).reindex(index=[3, 5, 4]) - tm.assert_frame_equal(df, expected) - - # GH 6252 - # setting with an empty frame - keys1 = ['@' + str(i) for i in range(5)] - val1 = np.arange(5, dtype='int64') - - keys2 = ['@' + str(i) for i in range(4)] - val2 = np.arange(4, dtype='int64') - - index = list(set(keys1).union(keys2)) - df = DataFrame(index=index) - df['A'] = nan - df.loc[keys1, 'A'] = val1 - - df['B'] = nan - df.loc[keys2, 'B'] = val2 - - expected = DataFrame(dict(A=Series(val1, index=keys1), B=Series( - val2, index=keys2))).reindex(index=index) - tm.assert_frame_equal(df, expected) - - # GH 8669 - # invalid coercion of nan -> int - df = DataFrame({'A': [1, 2, 3], 'B': np.nan}) - df.loc[df.B > df.A, 'B'] = df.A - expected = DataFrame({'A': [1, 2, 3], 'B': np.nan}) - tm.assert_frame_equal(df, expected) - - # GH 6546 - # setting with mixed labels - df = DataFrame({1: [1, 2], 2: [3, 4], 'a': ['a', 'b']}) - - result = df.loc[0, [1, 2]] - expected = Series([1, 3], index=[1, 2], dtype=object, name=0) - tm.assert_series_equal(result, expected) - - expected = DataFrame({1: [5, 2], 2: [6, 4], 'a': ['a', 'b']}) - df.loc[0, [1, 2]] = [5, 6] - tm.assert_frame_equal(df, expected) - - def test_loc_setitem_frame_multiples(self): - # multiple setting - df = DataFrame({'A': ['foo', 'bar', 'baz'], - 'B': Series( - range(3), dtype=np.int64)}) - rhs = df.loc[1:2] - rhs.index = df.index[0:2] - df.loc[0:1] = rhs - expected = DataFrame({'A': ['bar', 'baz', 'baz'], - 'B': Series( - [1, 2, 2], dtype=np.int64)}) - tm.assert_frame_equal(df, expected) - - # multiple setting with frame on rhs (with M8) - df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'), - 'val': Series( - range(5), dtype=np.int64)}) - expected = DataFrame({'date': [Timestamp('20000101'), Timestamp( - '20000102'), Timestamp('20000101'), Timestamp('20000102'), - Timestamp('20000103')], - 'val': Series( - [0, 1, 0, 1, 2], dtype=np.int64)}) - rhs = df.loc[0:2] - rhs.index = df.index[2:5] - df.loc[2:4] = rhs - tm.assert_frame_equal(df, expected) - - def test_iloc_getitem_frame(self): - df = DataFrame(np.random.randn(10, 4), index=lrange(0, 20, 2), - columns=lrange(0, 8, 2)) - - result = df.iloc[2] - with catch_warnings(record=True): - exp = df.ix[4] - tm.assert_series_equal(result, exp) - - result = df.iloc[2, 2] - with catch_warnings(record=True): - exp = df.ix[4, 4] - self.assertEqual(result, exp) - - # slice - result = df.iloc[4:8] - with catch_warnings(record=True): - expected = df.ix[8:14] - tm.assert_frame_equal(result, expected) - - result = df.iloc[:, 2:3] - with catch_warnings(record=True): - expected = df.ix[:, 4:5] - tm.assert_frame_equal(result, expected) - - # list of integers - result = df.iloc[[0, 1, 3]] - with catch_warnings(record=True): - expected = df.ix[[0, 2, 6]] - tm.assert_frame_equal(result, expected) - - result = df.iloc[[0, 1, 3], [0, 1]] - with catch_warnings(record=True): - expected = df.ix[[0, 2, 6], [0, 2]] - tm.assert_frame_equal(result, expected) - - # neg indicies - result = df.iloc[[-1, 1, 3], [-1, 1]] - with catch_warnings(record=True): - expected = df.ix[[18, 2, 6], [6, 2]] - tm.assert_frame_equal(result, expected) - - # dups indicies - result = df.iloc[[-1, -1, 1, 3], [-1, 1]] - with catch_warnings(record=True): - expected = df.ix[[18, 18, 2, 6], [6, 2]] - tm.assert_frame_equal(result, expected) - - # with index-like - s = Series(index=lrange(1, 5)) - result = df.iloc[s.index] - with catch_warnings(record=True): - expected = df.ix[[2, 4, 6, 8]] - tm.assert_frame_equal(result, expected) - - def test_iloc_getitem_labelled_frame(self): - # try with labelled frame - df = DataFrame(np.random.randn(10, 4), - index=list('abcdefghij'), columns=list('ABCD')) - - result = df.iloc[1, 1] - exp = df.loc['b', 'B'] - self.assertEqual(result, exp) - - result = df.iloc[:, 2:3] - expected = df.loc[:, ['C']] - tm.assert_frame_equal(result, expected) - - # negative indexing - result = df.iloc[-1, -1] - exp = df.loc['j', 'D'] - self.assertEqual(result, exp) - - # out-of-bounds exception - self.assertRaises(IndexError, df.iloc.__getitem__, tuple([10, 5])) - - # trying to use a label - self.assertRaises(ValueError, df.iloc.__getitem__, tuple(['j', 'D'])) - - def test_iloc_getitem_doc_issue(self): - - # multi axis slicing issue with single block - # surfaced in GH 6059 - - arr = np.random.randn(6, 4) - index = date_range('20130101', periods=6) - columns = list('ABCD') - df = DataFrame(arr, index=index, columns=columns) - - # defines ref_locs - df.describe() - - result = df.iloc[3:5, 0:2] - str(result) - result.dtypes - - expected = DataFrame(arr[3:5, 0:2], index=index[3:5], - columns=columns[0:2]) - tm.assert_frame_equal(result, expected) - - # for dups - df.columns = list('aaaa') - result = df.iloc[3:5, 0:2] - str(result) - result.dtypes - - expected = DataFrame(arr[3:5, 0:2], index=index[3:5], - columns=list('aa')) - tm.assert_frame_equal(result, expected) - - # related - arr = np.random.randn(6, 4) - index = list(range(0, 12, 2)) - columns = list(range(0, 8, 2)) - df = DataFrame(arr, index=index, columns=columns) - - df._data.blocks[0].mgr_locs - result = df.iloc[1:5, 2:4] - str(result) - result.dtypes - expected = DataFrame(arr[1:5, 2:4], index=index[1:5], - columns=columns[2:4]) - tm.assert_frame_equal(result, expected) +class TestFancy(Base, tm.TestCase): + """ pure get/set item & fancy indexing """ def test_setitem_ndarray_1d(self): # GH5508 - # len of indexer vs length of the 1d ndarray - df = DataFrame(index=Index(lrange(1, 11))) - df['foo'] = np.zeros(10, dtype=np.float64) - df['bar'] = np.zeros(10, dtype=np.complex) - - # invalid - def f(): - with catch_warnings(record=True): - df.ix[2:5, 'bar'] = np.array([2.33j, 1.23 + 0.1j, 2.2]) - - self.assertRaises(ValueError, f) - - def f(): - df.loc[df.index[2:5], 'bar'] = np.array([2.33j, 1.23 + 0.1j, - 2.2, 1.0]) - - self.assertRaises(ValueError, f) - - # valid - df.loc[df.index[2:6], 'bar'] = np.array([2.33j, 1.23 + 0.1j, - 2.2, 1.0]) - - result = df.loc[df.index[2:6], 'bar'] - expected = Series([2.33j, 1.23 + 0.1j, 2.2, 1.0], index=[3, 4, 5, 6], - name='bar') - tm.assert_series_equal(result, expected) - - # dtype getting changed? - df = DataFrame(index=Index(lrange(1, 11))) - df['foo'] = np.zeros(10, dtype=np.float64) - df['bar'] = np.zeros(10, dtype=np.complex) - - def f(): - df[2:5] = np.arange(1, 4) * 1j - - self.assertRaises(ValueError, f) - - def test_iloc_setitem_series(self): - df = DataFrame(np.random.randn(10, 4), index=list('abcdefghij'), - columns=list('ABCD')) - - df.iloc[1, 1] = 1 - result = df.iloc[1, 1] - self.assertEqual(result, 1) - - df.iloc[:, 2:3] = 0 - expected = df.iloc[:, 2:3] - result = df.iloc[:, 2:3] - tm.assert_frame_equal(result, expected) - - s = Series(np.random.randn(10), index=lrange(0, 20, 2)) - - s.iloc[1] = 1 - result = s.iloc[1] - self.assertEqual(result, 1) - - s.iloc[:4] = 0 - expected = s.iloc[:4] - result = s.iloc[:4] - tm.assert_series_equal(result, expected) - - s = Series([-1] * 6) - s.iloc[0::2] = [0, 2, 4] - s.iloc[1::2] = [1, 3, 5] - result = s - expected = Series([0, 1, 2, 3, 4, 5]) - tm.assert_series_equal(result, expected) - - def test_iloc_setitem_list_of_lists(self): - - # GH 7551 - # list-of-list is set incorrectly in mixed vs. single dtyped frames - df = DataFrame(dict(A=np.arange(5, dtype='int64'), - B=np.arange(5, 10, dtype='int64'))) - df.iloc[2:4] = [[10, 11], [12, 13]] - expected = DataFrame(dict(A=[0, 1, 10, 12, 4], B=[5, 6, 11, 13, 9])) - tm.assert_frame_equal(df, expected) - - df = DataFrame( - dict(A=list('abcde'), B=np.arange(5, 10, dtype='int64'))) - df.iloc[2:4] = [['x', 11], ['y', 13]] - expected = DataFrame(dict(A=['a', 'b', 'x', 'y', 'e'], - B=[5, 6, 11, 13, 9])) - tm.assert_frame_equal(df, expected) - - def test_ix_general(self): - - # ix general issues - - # GH 2817 - data = {'amount': {0: 700, 1: 600, 2: 222, 3: 333, 4: 444}, - 'col': {0: 3.5, 1: 3.5, 2: 4.0, 3: 4.0, 4: 4.0}, - 'year': {0: 2012, 1: 2011, 2: 2012, 3: 2012, 4: 2012}} - df = DataFrame(data).set_index(keys=['col', 'year']) - key = 4.0, 2012 - - # emits a PerformanceWarning, ok - with self.assert_produces_warning(PerformanceWarning): - tm.assert_frame_equal(df.loc[key], df.iloc[2:]) - - # this is ok - df.sort_index(inplace=True) - res = df.loc[key] - - # col has float dtype, result should be Float64Index - index = MultiIndex.from_arrays([[4.] * 3, [2012] * 3], - names=['col', 'year']) - expected = DataFrame({'amount': [222, 333, 444]}, index=index) - tm.assert_frame_equal(res, expected) - - def test_ix_weird_slicing(self): - # http://stackoverflow.com/q/17056560/1240268 - df = DataFrame({'one': [1, 2, 3, np.nan, np.nan], - 'two': [1, 2, 3, 4, 5]}) - df.loc[df['one'] > 1, 'two'] = -df['two'] - - expected = DataFrame({'one': {0: 1.0, - 1: 2.0, - 2: 3.0, - 3: nan, - 4: nan}, - 'two': {0: 1, - 1: -2, - 2: -3, - 3: 4, - 4: 5}}) - tm.assert_frame_equal(df, expected) - - def test_loc_coerceion(self): - - # 12411 - df = DataFrame({'date': [pd.Timestamp('20130101').tz_localize('UTC'), - pd.NaT]}) - expected = df.dtypes - - result = df.iloc[[0]] - tm.assert_series_equal(result.dtypes, expected) + # len of indexer vs length of the 1d ndarray + df = DataFrame(index=Index(lrange(1, 11))) + df['foo'] = np.zeros(10, dtype=np.float64) + df['bar'] = np.zeros(10, dtype=np.complex) - result = df.iloc[[1]] - tm.assert_series_equal(result.dtypes, expected) + # invalid + def f(): + df.loc[df.index[2:5], 'bar'] = np.array([2.33j, 1.23 + 0.1j, + 2.2, 1.0]) - # 12045 - import datetime - df = DataFrame({'date': [datetime.datetime(2012, 1, 1), - datetime.datetime(1012, 1, 2)]}) - expected = df.dtypes + self.assertRaises(ValueError, f) - result = df.iloc[[0]] - tm.assert_series_equal(result.dtypes, expected) + # valid + df.loc[df.index[2:6], 'bar'] = np.array([2.33j, 1.23 + 0.1j, + 2.2, 1.0]) - result = df.iloc[[1]] - tm.assert_series_equal(result.dtypes, expected) + result = df.loc[df.index[2:6], 'bar'] + expected = Series([2.33j, 1.23 + 0.1j, 2.2, 1.0], index=[3, 4, 5, 6], + name='bar') + tm.assert_series_equal(result, expected) - # 11594 - df = DataFrame({'text': ['some words'] + [None] * 9}) - expected = df.dtypes + # dtype getting changed? + df = DataFrame(index=Index(lrange(1, 11))) + df['foo'] = np.zeros(10, dtype=np.float64) + df['bar'] = np.zeros(10, dtype=np.complex) - result = df.iloc[0:2] - tm.assert_series_equal(result.dtypes, expected) + def f(): + df[2:5] = np.arange(1, 4) * 1j - result = df.iloc[3:] - tm.assert_series_equal(result.dtypes, expected) + self.assertRaises(ValueError, f) def test_setitem_dtype_upcast(self): @@ -1683,19 +99,6 @@ def test_setitem_dtype_upcast(self): self.assertTrue(is_float_dtype(left['foo'])) self.assertTrue(is_float_dtype(left['baz'])) - def test_setitem_iloc(self): - - # setitem with an iloc list - df = DataFrame(np.arange(9).reshape((3, 3)), index=["A", "B", "C"], - columns=["A", "B", "C"]) - df.iloc[[0, 1], [1, 2]] - df.iloc[[0, 1], [1, 2]] += 100 - - expected = DataFrame( - np.array([0, 101, 102, 3, 104, 105, 6, 7, 8]).reshape((3, 3)), - index=["A", "B", "C"], columns=["A", "B", "C"]) - tm.assert_frame_equal(df, expected) - def test_dups_fancy_indexing(self): # GH 3455 @@ -1757,23 +160,24 @@ def test_dups_fancy_indexing(self): # inconsistent returns for unique/duplicate indices when values are # missing - df = DataFrame(randn(4, 3), index=list('ABCD')) - expected = df.ix[['E']] + df = DataFrame(np.random.randn(4, 3), index=list('ABCD')) + expected = df.reindex(['E']) - dfnu = DataFrame(randn(5, 3), index=list('AABCD')) - result = dfnu.ix[['E']] + dfnu = DataFrame(np.random.randn(5, 3), index=list('AABCD')) + with catch_warnings(record=True): + result = dfnu.ix[['E']] tm.assert_frame_equal(result, expected) # ToDo: check_index_type can be True after GH 11497 # GH 4619; duplicate indexer with missing label df = DataFrame({"A": [0, 1, 2]}) - result = df.ix[[0, 8, 0]] + result = df.loc[[0, 8, 0]] expected = DataFrame({"A": [0, np.nan, 0]}, index=[0, 8, 0]) tm.assert_frame_equal(result, expected, check_index_type=False) df = DataFrame({"A": list('abc')}) - result = df.ix[[0, 8, 0]] + result = df.loc[[0, 8, 0]] expected = DataFrame({"A": ['a', np.nan, 'a']}, index=[0, 8, 0]) tm.assert_frame_equal(result, expected, check_index_type=False) @@ -1781,7 +185,7 @@ def test_dups_fancy_indexing(self): df = DataFrame({'test': [5, 7, 9, 11]}, index=['A', 'A', 'B', 'C']) expected = DataFrame( {'test': [5, 7, 5, 7, np.nan]}, index=['A', 'A', 'A', 'A', 'E']) - result = df.ix[['A', 'A', 'E']] + result = df.loc[['A', 'A', 'E']] tm.assert_frame_equal(result, expected) # GH 5835 @@ -1790,9 +194,9 @@ def test_dups_fancy_indexing(self): np.random.randn(5, 5), columns=['A', 'B', 'B', 'B', 'A']) expected = pd.concat( - [df.ix[:, ['A', 'B']], DataFrame(np.nan, columns=['C'], - index=df.index)], axis=1) - result = df.ix[:, ['A', 'B', 'C']] + [df.loc[:, ['A', 'B']], DataFrame(np.nan, columns=['C'], + index=df.index)], axis=1) + result = df.loc[:, ['A', 'B', 'C']] tm.assert_frame_equal(result, expected) # GH 6504, multi-axis indexing @@ -1822,8 +226,8 @@ def test_indexing_mixed_frame_bug(self): # this does not work, ie column test is not changed idx = df['test'] == '_' - temp = df.ix[idx, 'a'].apply(lambda x: '-----' if x == 'aaa' else x) - df.ix[idx, 'test'] = temp + temp = df.loc[idx, 'a'].apply(lambda x: '-----' if x == 'aaa' else x) + df.loc[idx, 'test'] = temp self.assertEqual(df.iloc[0, 2], '-----') # if I look at df, then element [0,2] equals '_'. If instead I type @@ -1859,17 +263,17 @@ def test_set_index_nan(self): 'QC': {17: 0.0, 18: 0.0, 19: 0.0, - 20: nan, - 21: nan, - 22: nan, - 23: nan, + 20: np.nan, + 21: np.nan, + 22: np.nan, + 23: np.nan, 24: 1.0, - 25: nan, - 26: nan, - 27: nan, - 28: nan, - 29: nan, - 30: nan}, + 25: np.nan, + 26: np.nan, + 27: np.nan, + 28: np.nan, + 29: np.nan, + 30: np.nan}, 'data': {17: 7.9544899999999998, 18: 8.0142609999999994, 19: 7.8591520000000008, @@ -1925,14 +329,14 @@ def test_multi_assign(self): 'PF': [0, 0, 0, 0, 1, 1], 'col1': lrange(6), 'col2': lrange(6, 12)}) - df.ix[1, 0] = np.nan + df.iloc[1, 0] = np.nan df2 = df.copy() mask = ~df2.FC.isnull() cols = ['col1', 'col2'] dft = df2 * 2 - dft.ix[3, 3] = np.nan + dft.iloc[3, 3] = np.nan expected = DataFrame({'FC': ['a', np.nan, 'a', 'b', 'a', 'b'], 'PF': [0, 0, 0, 0, 1, 1], @@ -1940,17 +344,17 @@ def test_multi_assign(self): 'col2': [12, 7, 16, np.nan, 20, 22]}) # frame on rhs - df2.ix[mask, cols] = dft.ix[mask, cols] + df2.loc[mask, cols] = dft.loc[mask, cols] tm.assert_frame_equal(df2, expected) - df2.ix[mask, cols] = dft.ix[mask, cols] + df2.loc[mask, cols] = dft.loc[mask, cols] tm.assert_frame_equal(df2, expected) # with an ndarray on rhs df2 = df.copy() - df2.ix[mask, cols] = dft.ix[mask, cols].values + df2.loc[mask, cols] = dft.loc[mask, cols].values tm.assert_frame_equal(df2, expected) - df2.ix[mask, cols] = dft.ix[mask, cols].values + df2.loc[mask, cols] = dft.loc[mask, cols].values tm.assert_frame_equal(df2, expected) # broadcasting on the rhs is required @@ -1965,79 +369,18 @@ def test_multi_assign(self): df.loc[df['A'] == 0, ['A', 'B']] = df['D'] tm.assert_frame_equal(df, expected) - def test_ix_assign_column_mixed(self): - # GH #1142 - df = DataFrame(tm.getSeriesData()) - df['foo'] = 'bar' - - orig = df.ix[:, 'B'].copy() - df.ix[:, 'B'] = df.ix[:, 'B'] + 1 - tm.assert_series_equal(df.B, orig + 1) - - # GH 3668, mixed frame with series value - df = DataFrame({'x': lrange(10), 'y': lrange(10, 20), 'z': 'bar'}) - expected = df.copy() - - for i in range(5): - indexer = i * 2 - v = 1000 + i * 200 - expected.ix[indexer, 'y'] = v - self.assertEqual(expected.ix[indexer, 'y'], v) - - df.ix[df.x % 2 == 0, 'y'] = df.ix[df.x % 2 == 0, 'y'] * 100 - tm.assert_frame_equal(df, expected) - - # GH 4508, making sure consistency of assignments - df = DataFrame({'a': [1, 2, 3], 'b': [0, 1, 2]}) - df.ix[[0, 2, ], 'b'] = [100, -100] - expected = DataFrame({'a': [1, 2, 3], 'b': [100, 1, -100]}) - tm.assert_frame_equal(df, expected) - - df = pd.DataFrame({'a': lrange(4)}) - df['b'] = np.nan - df.ix[[1, 3], 'b'] = [100, -100] - expected = DataFrame({'a': [0, 1, 2, 3], - 'b': [np.nan, 100, np.nan, -100]}) - tm.assert_frame_equal(df, expected) - - # ok, but chained assignments are dangerous - # if we turn off chained assignement it will work - with option_context('chained_assignment', None): - df = pd.DataFrame({'a': lrange(4)}) - df['b'] = np.nan - df['b'].ix[[1, 3]] = [100, -100] - tm.assert_frame_equal(df, expected) - - def test_ix_get_set_consistency(self): - - # GH 4544 - # ix/loc get/set not consistent when - # a mixed int/string index - df = DataFrame(np.arange(16).reshape((4, 4)), - columns=['a', 'b', 8, 'c'], - index=['e', 7, 'f', 'g']) - - self.assertEqual(df.ix['e', 8], 2) - self.assertEqual(df.loc['e', 8], 2) - - df.ix['e', 8] = 42 - self.assertEqual(df.ix['e', 8], 42) - self.assertEqual(df.loc['e', 8], 42) - - df.loc['e', 8] = 45 - self.assertEqual(df.ix['e', 8], 45) - self.assertEqual(df.loc['e', 8], 45) - def test_setitem_list(self): # GH 6043 # ix with a list df = DataFrame(index=[0, 1], columns=[0]) - df.ix[1, 0] = [1, 2, 3] - df.ix[1, 0] = [1, 2] + with catch_warnings(record=True): + df.ix[1, 0] = [1, 2, 3] + df.ix[1, 0] = [1, 2] result = DataFrame(index=[0, 1], columns=[0]) - result.ix[1, 0] = [1, 2] + with catch_warnings(record=True): + result.ix[1, 0] = [1, 2] tm.assert_frame_equal(result, df) @@ -2059,187 +402,25 @@ def view(self): return self df = DataFrame(index=[0, 1], columns=[0]) - df.ix[1, 0] = TO(1) - df.ix[1, 0] = TO(2) + with catch_warnings(record=True): + df.ix[1, 0] = TO(1) + df.ix[1, 0] = TO(2) result = DataFrame(index=[0, 1], columns=[0]) - result.ix[1, 0] = TO(2) + with catch_warnings(record=True): + result.ix[1, 0] = TO(2) tm.assert_frame_equal(result, df) # remains object dtype even after setting it back df = DataFrame(index=[0, 1], columns=[0]) - df.ix[1, 0] = TO(1) - df.ix[1, 0] = np.nan + with catch_warnings(record=True): + df.ix[1, 0] = TO(1) + df.ix[1, 0] = np.nan result = DataFrame(index=[0, 1], columns=[0]) tm.assert_frame_equal(result, df) - def test_iloc_mask(self): - - # GH 3631, iloc with a mask (of a series) should raise - df = DataFrame(lrange(5), list('ABCDE'), columns=['a']) - mask = (df.a % 2 == 0) - self.assertRaises(ValueError, df.iloc.__getitem__, tuple([mask])) - mask.index = lrange(len(mask)) - self.assertRaises(NotImplementedError, df.iloc.__getitem__, - tuple([mask])) - - # ndarray ok - result = df.iloc[np.array([True] * len(mask), dtype=bool)] - tm.assert_frame_equal(result, df) - - # the possibilities - locs = np.arange(4) - nums = 2 ** locs - reps = lmap(bin, nums) - df = DataFrame({'locs': locs, 'nums': nums}, reps) - - expected = { - (None, ''): '0b1100', - (None, '.loc'): '0b1100', - (None, '.iloc'): '0b1100', - ('index', ''): '0b11', - ('index', '.loc'): '0b11', - ('index', '.iloc'): ('iLocation based boolean indexing ' - 'cannot use an indexable as a mask'), - ('locs', ''): 'Unalignable boolean Series provided as indexer ' - '(index of the boolean Series and of the indexed ' - 'object do not match', - ('locs', '.loc'): 'Unalignable boolean Series provided as indexer ' - '(index of the boolean Series and of the ' - 'indexed object do not match', - ('locs', '.iloc'): ('iLocation based boolean indexing on an ' - 'integer type is not available'), - } - - # UserWarnings from reindex of a boolean mask - with warnings.catch_warnings(record=True): - result = dict() - for idx in [None, 'index', 'locs']: - mask = (df.nums > 2).values - if idx: - mask = Series(mask, list(reversed(getattr(df, idx)))) - for method in ['', '.loc', '.iloc']: - try: - if method: - accessor = getattr(df, method[1:]) - else: - accessor = df - ans = str(bin(accessor[mask]['nums'].sum())) - except Exception as e: - ans = str(e) - - key = tuple([idx, method]) - r = expected.get(key) - if r != ans: - raise AssertionError( - "[%s] does not match [%s], received [%s]" - % (key, ans, r)) - - def test_ix_slicing_strings(self): - # GH3836 - data = {'Classification': - ['SA EQUITY CFD', 'bbb', 'SA EQUITY', 'SA SSF', 'aaa'], - 'Random': [1, 2, 3, 4, 5], - 'X': ['correct', 'wrong', 'correct', 'correct', 'wrong']} - df = DataFrame(data) - x = df[~df.Classification.isin(['SA EQUITY CFD', 'SA EQUITY', 'SA SSF' - ])] - df.ix[x.index, 'X'] = df['Classification'] - - expected = DataFrame({'Classification': {0: 'SA EQUITY CFD', - 1: 'bbb', - 2: 'SA EQUITY', - 3: 'SA SSF', - 4: 'aaa'}, - 'Random': {0: 1, - 1: 2, - 2: 3, - 3: 4, - 4: 5}, - 'X': {0: 'correct', - 1: 'bbb', - 2: 'correct', - 3: 'correct', - 4: 'aaa'}}) # bug was 4: 'bbb' - - tm.assert_frame_equal(df, expected) - - def test_non_unique_loc(self): - # GH3659 - # non-unique indexer with loc slice - # https://groups.google.com/forum/?fromgroups#!topic/pydata/zTm2No0crYs - - # these are going to raise becuase the we are non monotonic - df = DataFrame({'A': [1, 2, 3, 4, 5, 6], - 'B': [3, 4, 5, 6, 7, 8]}, index=[0, 1, 0, 1, 2, 3]) - self.assertRaises(KeyError, df.loc.__getitem__, - tuple([slice(1, None)])) - self.assertRaises(KeyError, df.loc.__getitem__, - tuple([slice(0, None)])) - self.assertRaises(KeyError, df.loc.__getitem__, tuple([slice(1, 2)])) - - # monotonic are ok - df = DataFrame({'A': [1, 2, 3, 4, 5, 6], - 'B': [3, 4, 5, 6, 7, 8]}, - index=[0, 1, 0, 1, 2, 3]).sort_index(axis=0) - result = df.loc[1:] - expected = DataFrame({'A': [2, 4, 5, 6], 'B': [4, 6, 7, 8]}, - index=[1, 1, 2, 3]) - tm.assert_frame_equal(result, expected) - - result = df.loc[0:] - tm.assert_frame_equal(result, df) - - result = df.loc[1:2] - expected = DataFrame({'A': [2, 4, 5], 'B': [4, 6, 7]}, - index=[1, 1, 2]) - tm.assert_frame_equal(result, expected) - - def test_loc_name(self): - # GH 3880 - df = DataFrame([[1, 1], [1, 1]]) - df.index.name = 'index_name' - result = df.iloc[[0, 1]].index.name - self.assertEqual(result, 'index_name') - - result = df.ix[[0, 1]].index.name - self.assertEqual(result, 'index_name') - - result = df.loc[[0, 1]].index.name - self.assertEqual(result, 'index_name') - - def test_iloc_non_unique_indexing(self): - - # GH 4017, non-unique indexing (on the axis) - df = DataFrame({'A': [0.1] * 3000, 'B': [1] * 3000}) - idx = np.array(lrange(30)) * 99 - expected = df.iloc[idx] - - df3 = pd.concat([df, 2 * df, 3 * df]) - result = df3.iloc[idx] - - tm.assert_frame_equal(result, expected) - - df2 = DataFrame({'A': [0.1] * 1000, 'B': [1] * 1000}) - df2 = pd.concat([df2, 2 * df2, 3 * df2]) - - sidx = df2.index.to_series() - expected = df2.iloc[idx[idx <= sidx.max()]] - - new_list = [] - for r, s in expected.iterrows(): - new_list.append(s) - new_list.append(s * 2) - new_list.append(s * 3) - - expected = DataFrame(new_list) - expected = pd.concat([expected, DataFrame(index=idx[idx > sidx.max()]) - ]) - result = df2.loc[idx] - tm.assert_frame_equal(result, expected, check_index_type=False) - def test_string_slice(self): # GH 14424 # string indexing against datetimelike with object @@ -2300,43 +481,6 @@ def test_mi_access(self): result = df2['A']['B2'] tm.assert_frame_equal(result, expected) - def test_non_unique_loc_memory_error(self): - - # GH 4280 - # non_unique index with a large selection triggers a memory error - - columns = list('ABCDEFG') - - def gen_test(l, l2): - return pd.concat([DataFrame(randn(l, len(columns)), - index=lrange(l), columns=columns), - DataFrame(np.ones((l2, len(columns))), - index=[0] * l2, columns=columns)]) - - def gen_expected(df, mask): - l = len(mask) - return pd.concat([df.take([0], convert=False), - DataFrame(np.ones((l, len(columns))), - index=[0] * l, - columns=columns), - df.take(mask[1:], convert=False)]) - - df = gen_test(900, 100) - self.assertFalse(df.index.is_unique) - - mask = np.arange(100) - result = df.loc[mask] - expected = gen_expected(df, mask) - tm.assert_frame_equal(result, expected) - - df = gen_test(900000, 100000) - self.assertFalse(df.index.is_unique) - - mask = np.arange(100000) - result = df.loc[mask] - expected = gen_expected(df, mask) - tm.assert_frame_equal(result, expected) - def test_astype_assignment(self): # GH4312 (iloc) @@ -2395,745 +539,79 @@ def test_astype_assignment_with_dups(self): # result = df.get_dtype_counts().sort_index() # expected = Series({'float64': 2, 'object': 1}).sort_index() - def test_dups_loc(self): - - # GH4726 - # dup indexing with iloc/loc - df = DataFrame([[1, 2, 'foo', 'bar', Timestamp('20130101')]], - columns=['a', 'a', 'a', 'a', 'a'], index=[1]) - expected = Series([1, 2, 'foo', 'bar', Timestamp('20130101')], - index=['a', 'a', 'a', 'a', 'a'], name=1) - - result = df.iloc[0] - tm.assert_series_equal(result, expected) - - result = df.loc[1] - tm.assert_series_equal(result, expected) - - def test_partial_setting(self): - - # GH2578, allow ix and friends to partially set - - # series - s_orig = Series([1, 2, 3]) - - s = s_orig.copy() - s[5] = 5 - expected = Series([1, 2, 3, 5], index=[0, 1, 2, 5]) - tm.assert_series_equal(s, expected) - - s = s_orig.copy() - s.loc[5] = 5 - expected = Series([1, 2, 3, 5], index=[0, 1, 2, 5]) - tm.assert_series_equal(s, expected) - - s = s_orig.copy() - s[5] = 5. - expected = Series([1, 2, 3, 5.], index=[0, 1, 2, 5]) - tm.assert_series_equal(s, expected) - - s = s_orig.copy() - s.loc[5] = 5. - expected = Series([1, 2, 3, 5.], index=[0, 1, 2, 5]) - tm.assert_series_equal(s, expected) - - # iloc/iat raise - s = s_orig.copy() - - def f(): - s.iloc[3] = 5. - - self.assertRaises(IndexError, f) - - def f(): - s.iat[3] = 5. - - self.assertRaises(IndexError, f) - - # ## frame ## - - df_orig = DataFrame( - np.arange(6).reshape(3, 2), columns=['A', 'B'], dtype='int64') - - # iloc/iat raise - df = df_orig.copy() - - def f(): - df.iloc[4, 2] = 5. - - self.assertRaises(IndexError, f) - - def f(): - df.iat[4, 2] = 5. - - self.assertRaises(IndexError, f) - - # row setting where it exists - expected = DataFrame(dict({'A': [0, 4, 4], 'B': [1, 5, 5]})) - df = df_orig.copy() - df.iloc[1] = df.iloc[2] - tm.assert_frame_equal(df, expected) - - expected = DataFrame(dict({'A': [0, 4, 4], 'B': [1, 5, 5]})) - df = df_orig.copy() - df.loc[1] = df.loc[2] - tm.assert_frame_equal(df, expected) - - # like 2578, partial setting with dtype preservation - expected = DataFrame(dict({'A': [0, 2, 4, 4], 'B': [1, 3, 5, 5]})) - df = df_orig.copy() - df.loc[3] = df.loc[2] - tm.assert_frame_equal(df, expected) - - # single dtype frame, overwrite - expected = DataFrame(dict({'A': [0, 2, 4], 'B': [0, 2, 4]})) - df = df_orig.copy() - df.ix[:, 'B'] = df.ix[:, 'A'] - tm.assert_frame_equal(df, expected) - - # mixed dtype frame, overwrite - expected = DataFrame(dict({'A': [0, 2, 4], 'B': Series([0, 2, 4])})) - df = df_orig.copy() - df['B'] = df['B'].astype(np.float64) - df.ix[:, 'B'] = df.ix[:, 'A'] - tm.assert_frame_equal(df, expected) - - # single dtype frame, partial setting - expected = df_orig.copy() - expected['C'] = df['A'] - df = df_orig.copy() - df.ix[:, 'C'] = df.ix[:, 'A'] - tm.assert_frame_equal(df, expected) - - # mixed frame, partial setting - expected = df_orig.copy() - expected['C'] = df['A'] - df = df_orig.copy() - df.ix[:, 'C'] = df.ix[:, 'A'] - tm.assert_frame_equal(df, expected) - - # ## panel ## - p_orig = Panel(np.arange(16).reshape(2, 4, 2), - items=['Item1', 'Item2'], - major_axis=pd.date_range('2001/1/12', periods=4), - minor_axis=['A', 'B'], dtype='float64') - - # panel setting via item - p_orig = Panel(np.arange(16).reshape(2, 4, 2), - items=['Item1', 'Item2'], - major_axis=pd.date_range('2001/1/12', periods=4), - minor_axis=['A', 'B'], dtype='float64') - expected = p_orig.copy() - expected['Item3'] = expected['Item1'] - p = p_orig.copy() - p.loc['Item3'] = p['Item1'] - tm.assert_panel_equal(p, expected) - - # panel with aligned series - expected = p_orig.copy() - expected = expected.transpose(2, 1, 0) - expected['C'] = DataFrame({'Item1': [30, 30, 30, 30], - 'Item2': [32, 32, 32, 32]}, - index=p_orig.major_axis) - expected = expected.transpose(2, 1, 0) - p = p_orig.copy() - p.loc[:, :, 'C'] = Series([30, 32], index=p_orig.items) - tm.assert_panel_equal(p, expected) - - # GH 8473 - dates = date_range('1/1/2000', periods=8) - df_orig = DataFrame(np.random.randn(8, 4), index=dates, - columns=['A', 'B', 'C', 'D']) - - expected = pd.concat([df_orig, DataFrame( - {'A': 7}, index=[dates[-1] + 1])]) - df = df_orig.copy() - df.loc[dates[-1] + 1, 'A'] = 7 - tm.assert_frame_equal(df, expected) - df = df_orig.copy() - df.at[dates[-1] + 1, 'A'] = 7 - tm.assert_frame_equal(df, expected) - - exp_other = DataFrame({0: 7}, index=[dates[-1] + 1]) - expected = pd.concat([df_orig, exp_other], axis=1) - - df = df_orig.copy() - df.loc[dates[-1] + 1, 0] = 7 - tm.assert_frame_equal(df, expected) - df = df_orig.copy() - df.at[dates[-1] + 1, 0] = 7 - tm.assert_frame_equal(df, expected) - - def test_partial_setting_mixed_dtype(self): - - # in a mixed dtype environment, try to preserve dtypes - # by appending - df = DataFrame([[True, 1], [False, 2]], columns=["female", "fitness"]) - - s = df.loc[1].copy() - s.name = 2 - expected = df.append(s) - - df.loc[2] = df.loc[1] - tm.assert_frame_equal(df, expected) - - # columns will align - df = DataFrame(columns=['A', 'B']) - df.loc[0] = Series(1, index=range(4)) - tm.assert_frame_equal(df, DataFrame(columns=['A', 'B'], index=[0])) - - # columns will align - df = DataFrame(columns=['A', 'B']) - df.loc[0] = Series(1, index=['B']) - - exp = DataFrame([[np.nan, 1]], columns=['A', 'B'], - index=[0], dtype='float64') - tm.assert_frame_equal(df, exp) - - # list-like must conform - df = DataFrame(columns=['A', 'B']) - - def f(): - df.loc[0] = [1, 2, 3] - - self.assertRaises(ValueError, f) - - # these are coerced to float unavoidably (as its a list-like to begin) - df = DataFrame(columns=['A', 'B']) - df.loc[3] = [6, 7] - - exp = DataFrame([[6, 7]], index=[3], columns=['A', 'B'], - dtype='float64') - tm.assert_frame_equal(df, exp) - - def test_series_partial_set(self): - # partial set with new index - # Regression from GH4825 - ser = Series([0.1, 0.2], index=[1, 2]) - - # loc - expected = Series([np.nan, 0.2, np.nan], index=[3, 2, 3]) - result = ser.loc[[3, 2, 3]] - tm.assert_series_equal(result, expected, check_index_type=True) - - expected = Series([np.nan, 0.2, np.nan, np.nan], index=[3, 2, 3, 'x']) - result = ser.loc[[3, 2, 3, 'x']] - tm.assert_series_equal(result, expected, check_index_type=True) - - expected = Series([0.2, 0.2, 0.1], index=[2, 2, 1]) - result = ser.loc[[2, 2, 1]] - tm.assert_series_equal(result, expected, check_index_type=True) - - expected = Series([0.2, 0.2, np.nan, 0.1], index=[2, 2, 'x', 1]) - result = ser.loc[[2, 2, 'x', 1]] - tm.assert_series_equal(result, expected, check_index_type=True) - - # raises as nothing in in the index - self.assertRaises(KeyError, lambda: ser.loc[[3, 3, 3]]) - - expected = Series([0.2, 0.2, np.nan], index=[2, 2, 3]) - result = ser.loc[[2, 2, 3]] - tm.assert_series_equal(result, expected, check_index_type=True) - - expected = Series([0.3, np.nan, np.nan], index=[3, 4, 4]) - result = Series([0.1, 0.2, 0.3], index=[1, 2, 3]).loc[[3, 4, 4]] - tm.assert_series_equal(result, expected, check_index_type=True) - - expected = Series([np.nan, 0.3, 0.3], index=[5, 3, 3]) - result = Series([0.1, 0.2, 0.3, 0.4], - index=[1, 2, 3, 4]).loc[[5, 3, 3]] - tm.assert_series_equal(result, expected, check_index_type=True) - - expected = Series([np.nan, 0.4, 0.4], index=[5, 4, 4]) - result = Series([0.1, 0.2, 0.3, 0.4], - index=[1, 2, 3, 4]).loc[[5, 4, 4]] - tm.assert_series_equal(result, expected, check_index_type=True) - - expected = Series([0.4, np.nan, np.nan], index=[7, 2, 2]) - result = Series([0.1, 0.2, 0.3, 0.4], - index=[4, 5, 6, 7]).loc[[7, 2, 2]] - tm.assert_series_equal(result, expected, check_index_type=True) - - expected = Series([0.4, np.nan, np.nan], index=[4, 5, 5]) - result = Series([0.1, 0.2, 0.3, 0.4], - index=[1, 2, 3, 4]).loc[[4, 5, 5]] - tm.assert_series_equal(result, expected, check_index_type=True) - - # iloc - expected = Series([0.2, 0.2, 0.1, 0.1], index=[2, 2, 1, 1]) - result = ser.iloc[[1, 1, 0, 0]] - tm.assert_series_equal(result, expected, check_index_type=True) - - def test_series_partial_set_with_name(self): - # GH 11497 - - idx = Index([1, 2], dtype='int64', name='idx') - ser = Series([0.1, 0.2], index=idx, name='s') - - # loc - exp_idx = Index([3, 2, 3], dtype='int64', name='idx') - expected = Series([np.nan, 0.2, np.nan], index=exp_idx, name='s') - result = ser.loc[[3, 2, 3]] - tm.assert_series_equal(result, expected, check_index_type=True) - - exp_idx = Index([3, 2, 3, 'x'], dtype='object', name='idx') - expected = Series([np.nan, 0.2, np.nan, np.nan], index=exp_idx, - name='s') - result = ser.loc[[3, 2, 3, 'x']] - tm.assert_series_equal(result, expected, check_index_type=True) - - exp_idx = Index([2, 2, 1], dtype='int64', name='idx') - expected = Series([0.2, 0.2, 0.1], index=exp_idx, name='s') - result = ser.loc[[2, 2, 1]] - tm.assert_series_equal(result, expected, check_index_type=True) - - exp_idx = Index([2, 2, 'x', 1], dtype='object', name='idx') - expected = Series([0.2, 0.2, np.nan, 0.1], index=exp_idx, name='s') - result = ser.loc[[2, 2, 'x', 1]] - tm.assert_series_equal(result, expected, check_index_type=True) - - # raises as nothing in in the index - self.assertRaises(KeyError, lambda: ser.loc[[3, 3, 3]]) - - exp_idx = Index([2, 2, 3], dtype='int64', name='idx') - expected = Series([0.2, 0.2, np.nan], index=exp_idx, name='s') - result = ser.loc[[2, 2, 3]] - tm.assert_series_equal(result, expected, check_index_type=True) - - exp_idx = Index([3, 4, 4], dtype='int64', name='idx') - expected = Series([0.3, np.nan, np.nan], index=exp_idx, name='s') - idx = Index([1, 2, 3], dtype='int64', name='idx') - result = Series([0.1, 0.2, 0.3], index=idx, name='s').loc[[3, 4, 4]] - tm.assert_series_equal(result, expected, check_index_type=True) - - exp_idx = Index([5, 3, 3], dtype='int64', name='idx') - expected = Series([np.nan, 0.3, 0.3], index=exp_idx, name='s') - idx = Index([1, 2, 3, 4], dtype='int64', name='idx') - result = Series([0.1, 0.2, 0.3, 0.4], index=idx, - name='s').loc[[5, 3, 3]] - tm.assert_series_equal(result, expected, check_index_type=True) - - exp_idx = Index([5, 4, 4], dtype='int64', name='idx') - expected = Series([np.nan, 0.4, 0.4], index=exp_idx, name='s') - idx = Index([1, 2, 3, 4], dtype='int64', name='idx') - result = Series([0.1, 0.2, 0.3, 0.4], index=idx, - name='s').loc[[5, 4, 4]] - tm.assert_series_equal(result, expected, check_index_type=True) - - exp_idx = Index([7, 2, 2], dtype='int64', name='idx') - expected = Series([0.4, np.nan, np.nan], index=exp_idx, name='s') - idx = Index([4, 5, 6, 7], dtype='int64', name='idx') - result = Series([0.1, 0.2, 0.3, 0.4], index=idx, - name='s').loc[[7, 2, 2]] - tm.assert_series_equal(result, expected, check_index_type=True) - - exp_idx = Index([4, 5, 5], dtype='int64', name='idx') - expected = Series([0.4, np.nan, np.nan], index=exp_idx, name='s') - idx = Index([1, 2, 3, 4], dtype='int64', name='idx') - result = Series([0.1, 0.2, 0.3, 0.4], index=idx, - name='s').loc[[4, 5, 5]] - tm.assert_series_equal(result, expected, check_index_type=True) - - # iloc - exp_idx = Index([2, 2, 1, 1], dtype='int64', name='idx') - expected = Series([0.2, 0.2, 0.1, 0.1], index=exp_idx, name='s') - result = ser.iloc[[1, 1, 0, 0]] - tm.assert_series_equal(result, expected, check_index_type=True) - - def test_partial_set_invalid(self): - - # GH 4940 - # allow only setting of 'valid' values - - orig = tm.makeTimeDataFrame() - df = orig.copy() - - # don't allow not string inserts - def f(): - df.loc[100.0, :] = df.ix[0] - - self.assertRaises(TypeError, f) - - def f(): - df.loc[100, :] = df.ix[0] - - self.assertRaises(TypeError, f) - - def f(): - df.ix[100.0, :] = df.ix[0] - - self.assertRaises(TypeError, f) - - def f(): - df.ix[100, :] = df.ix[0] - - self.assertRaises(ValueError, f) - - # allow object conversion here - df = orig.copy() - df.loc['a', :] = df.ix[0] - exp = orig.append(pd.Series(df.ix[0], name='a')) - tm.assert_frame_equal(df, exp) - tm.assert_index_equal(df.index, - pd.Index(orig.index.tolist() + ['a'])) - self.assertEqual(df.index.dtype, 'object') - - def test_partial_set_empty_series(self): - - # GH5226 - - # partially set with an empty object series - s = Series() - s.loc[1] = 1 - tm.assert_series_equal(s, Series([1], index=[1])) - s.loc[3] = 3 - tm.assert_series_equal(s, Series([1, 3], index=[1, 3])) - - s = Series() - s.loc[1] = 1. - tm.assert_series_equal(s, Series([1.], index=[1])) - s.loc[3] = 3. - tm.assert_series_equal(s, Series([1., 3.], index=[1, 3])) - - s = Series() - s.loc['foo'] = 1 - tm.assert_series_equal(s, Series([1], index=['foo'])) - s.loc['bar'] = 3 - tm.assert_series_equal(s, Series([1, 3], index=['foo', 'bar'])) - s.loc[3] = 4 - tm.assert_series_equal(s, Series([1, 3, 4], index=['foo', 'bar', 3])) - - def test_partial_set_empty_frame(self): - - # partially set with an empty object - # frame - df = DataFrame() - - def f(): - df.loc[1] = 1 - - self.assertRaises(ValueError, f) - - def f(): - df.loc[1] = Series([1], index=['foo']) - - self.assertRaises(ValueError, f) - - def f(): - df.loc[:, 1] = 1 - - self.assertRaises(ValueError, f) - - # these work as they don't really change - # anything but the index - # GH5632 - expected = DataFrame(columns=['foo'], index=pd.Index( - [], dtype='int64')) - - def f(): - df = DataFrame() - df['foo'] = Series([], dtype='object') - return df - - tm.assert_frame_equal(f(), expected) - - def f(): - df = DataFrame() - df['foo'] = Series(df.index) - return df - - tm.assert_frame_equal(f(), expected) - - def f(): - df = DataFrame() - df['foo'] = df.index - return df - - tm.assert_frame_equal(f(), expected) - - expected = DataFrame(columns=['foo'], - index=pd.Index([], dtype='int64')) - expected['foo'] = expected['foo'].astype('float64') - - def f(): - df = DataFrame() - df['foo'] = [] - return df - - tm.assert_frame_equal(f(), expected) - - def f(): - df = DataFrame() - df['foo'] = Series(range(len(df))) - return df - - tm.assert_frame_equal(f(), expected) - - def f(): - df = DataFrame() - tm.assert_index_equal(df.index, pd.Index([], dtype='object')) - df['foo'] = range(len(df)) - return df - - expected = DataFrame(columns=['foo'], - index=pd.Index([], dtype='int64')) - expected['foo'] = expected['foo'].astype('float64') - tm.assert_frame_equal(f(), expected) - - df = DataFrame() - tm.assert_index_equal(df.columns, pd.Index([], dtype=object)) - df2 = DataFrame() - df2[1] = Series([1], index=['foo']) - df.loc[:, 1] = Series([1], index=['foo']) - tm.assert_frame_equal(df, DataFrame([[1]], index=['foo'], columns=[1])) - tm.assert_frame_equal(df, df2) - - # no index to start - expected = DataFrame({0: Series(1, index=range(4))}, - columns=['A', 'B', 0]) - - df = DataFrame(columns=['A', 'B']) - df[0] = Series(1, index=range(4)) - df.dtypes - str(df) - tm.assert_frame_equal(df, expected) - - df = DataFrame(columns=['A', 'B']) - df.loc[:, 0] = Series(1, index=range(4)) - df.dtypes - str(df) - tm.assert_frame_equal(df, expected) - - def test_partial_set_empty_frame_row(self): - # GH5720, GH5744 - # don't create rows when empty - expected = DataFrame(columns=['A', 'B', 'New'], - index=pd.Index([], dtype='int64')) - expected['A'] = expected['A'].astype('int64') - expected['B'] = expected['B'].astype('float64') - expected['New'] = expected['New'].astype('float64') - - df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]}) - y = df[df.A > 5] - y['New'] = np.nan - tm.assert_frame_equal(y, expected) - # tm.assert_frame_equal(y,expected) - - expected = DataFrame(columns=['a', 'b', 'c c', 'd']) - expected['d'] = expected['d'].astype('int64') - df = DataFrame(columns=['a', 'b', 'c c']) - df['d'] = 3 - tm.assert_frame_equal(df, expected) - tm.assert_series_equal(df['c c'], Series(name='c c', dtype=object)) - - # reindex columns is ok - df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]}) - y = df[df.A > 5] - result = y.reindex(columns=['A', 'B', 'C']) - expected = DataFrame(columns=['A', 'B', 'C'], - index=pd.Index([], dtype='int64')) - expected['A'] = expected['A'].astype('int64') - expected['B'] = expected['B'].astype('float64') - expected['C'] = expected['C'].astype('float64') - tm.assert_frame_equal(result, expected) - - def test_partial_set_empty_frame_set_series(self): - # GH 5756 - # setting with empty Series - df = DataFrame(Series()) - tm.assert_frame_equal(df, DataFrame({0: Series()})) - - df = DataFrame(Series(name='foo')) - tm.assert_frame_equal(df, DataFrame({'foo': Series()})) - - def test_partial_set_empty_frame_empty_copy_assignment(self): - # GH 5932 - # copy on empty with assignment fails - df = DataFrame(index=[0]) - df = df.copy() - df['a'] = 0 - expected = DataFrame(0, index=[0], columns=['a']) - tm.assert_frame_equal(df, expected) - - def test_partial_set_empty_frame_empty_consistencies(self): - # GH 6171 - # consistency on empty frames - df = DataFrame(columns=['x', 'y']) - df['x'] = [1, 2] - expected = DataFrame(dict(x=[1, 2], y=[np.nan, np.nan])) - tm.assert_frame_equal(df, expected, check_dtype=False) + def test_index_type_coercion(self): - df = DataFrame(columns=['x', 'y']) - df['x'] = ['1', '2'] - expected = DataFrame( - dict(x=['1', '2'], y=[np.nan, np.nan]), dtype=object) - tm.assert_frame_equal(df, expected) + with catch_warnings(record=True): - df = DataFrame(columns=['x', 'y']) - df.loc[0, 'x'] = 1 - expected = DataFrame(dict(x=[1], y=[np.nan])) - tm.assert_frame_equal(df, expected, check_dtype=False) - - def test_cache_updating(self): - # GH 4939, make sure to update the cache on setitem - - df = tm.makeDataFrame() - df['A'] # cache series - df.ix["Hello Friend"] = df.ix[0] - self.assertIn("Hello Friend", df['A'].index) - self.assertIn("Hello Friend", df['B'].index) - - panel = tm.makePanel() - panel.ix[0] # get first item into cache - panel.ix[:, :, 'A+1'] = panel.ix[:, :, 'A'] + 1 - self.assertIn("A+1", panel.ix[0].columns) - self.assertIn("A+1", panel.ix[1].columns) - - # 5216 - # make sure that we don't try to set a dead cache - a = np.random.rand(10, 3) - df = DataFrame(a, columns=['x', 'y', 'z']) - tuples = [(i, j) for i in range(5) for j in range(2)] - index = MultiIndex.from_tuples(tuples) - df.index = index - - # setting via chained assignment - # but actually works, since everything is a view - df.loc[0]['z'].iloc[0] = 1. - result = df.loc[(0, 0), 'z'] - self.assertEqual(result, 1) - - # correct setting - df.loc[(0, 0), 'z'] = 2 - result = df.loc[(0, 0), 'z'] - self.assertEqual(result, 2) - - # 10264 - df = DataFrame(np.zeros((5, 5), dtype='int64'), columns=[ - 'a', 'b', 'c', 'd', 'e'], index=range(5)) - df['f'] = 0 - df.f.values[3] = 1 + # GH 11836 + # if we have an index type and set it with something that looks + # to numpy like the same, but is actually, not + # (e.g. setting with a float or string '0') + # then we need to coerce to object - # TODO(wesm): unused? - # y = df.iloc[np.arange(2, len(df))] + # integer indexes + for s in [Series(range(5)), + Series(range(5), index=range(1, 6))]: - df.f.values[3] = 2 - expected = DataFrame(np.zeros((5, 6), dtype='int64'), columns=[ - 'a', 'b', 'c', 'd', 'e', 'f'], index=range(5)) - expected.at[3, 'f'] = 2 - tm.assert_frame_equal(df, expected) - expected = Series([0, 0, 0, 2, 0], name='f') - tm.assert_series_equal(df.f, expected) - - def test_set_ix_out_of_bounds_axis_0(self): - df = pd.DataFrame( - randn(2, 5), index=["row%s" % i for i in range(2)], - columns=["col%s" % i for i in range(5)]) - self.assertRaises(ValueError, df.ix.__setitem__, (2, 0), 100) - - def test_set_ix_out_of_bounds_axis_1(self): - df = pd.DataFrame( - randn(5, 2), index=["row%s" % i for i in range(5)], - columns=["col%s" % i for i in range(2)]) - self.assertRaises(ValueError, df.ix.__setitem__, (0, 2), 100) - - def test_iloc_empty_list_indexer_is_ok(self): - from pandas.util.testing import makeCustomDataframe as mkdf - df = mkdf(5, 2) - # vertical empty - tm.assert_frame_equal(df.iloc[:, []], df.iloc[:, :0], - check_index_type=True, check_column_type=True) - # horizontal empty - tm.assert_frame_equal(df.iloc[[], :], df.iloc[:0, :], - check_index_type=True, check_column_type=True) - # horizontal empty - tm.assert_frame_equal(df.iloc[[]], df.iloc[:0, :], - check_index_type=True, - check_column_type=True) - - def test_loc_empty_list_indexer_is_ok(self): - from pandas.util.testing import makeCustomDataframe as mkdf - df = mkdf(5, 2) - # vertical empty - tm.assert_frame_equal(df.loc[:, []], df.iloc[:, :0], - check_index_type=True, check_column_type=True) - # horizontal empty - tm.assert_frame_equal(df.loc[[], :], df.iloc[:0, :], - check_index_type=True, check_column_type=True) - # horizontal empty - tm.assert_frame_equal(df.loc[[]], df.iloc[:0, :], - check_index_type=True, - check_column_type=True) - - def test_ix_empty_list_indexer_is_ok(self): - from pandas.util.testing import makeCustomDataframe as mkdf - df = mkdf(5, 2) - # vertical empty - tm.assert_frame_equal(df.ix[:, []], df.iloc[:, :0], - check_index_type=True, - check_column_type=True) - # horizontal empty - tm.assert_frame_equal(df.ix[[], :], df.iloc[:0, :], - check_index_type=True, - check_column_type=True) - # horizontal empty - tm.assert_frame_equal(df.ix[[]], df.iloc[:0, :], - check_index_type=True, - check_column_type=True) + self.assertTrue(s.index.is_integer()) - def test_index_type_coercion(self): + for indexer in [lambda x: x.ix, + lambda x: x.loc, + lambda x: x]: + s2 = s.copy() + indexer(s2)[0.1] = 0 + self.assertTrue(s2.index.is_floating()) + self.assertTrue(indexer(s2)[0.1] == 0) - # GH 11836 - # if we have an index type and set it with something that looks - # to numpy like the same, but is actually, not - # (e.g. setting with a float or string '0') - # then we need to coerce to object + s2 = s.copy() + indexer(s2)[0.0] = 0 + exp = s.index + if 0 not in s: + exp = Index(s.index.tolist() + [0]) + tm.assert_index_equal(s2.index, exp) - # integer indexes - for s in [Series(range(5)), - Series(range(5), index=range(1, 6))]: + s2 = s.copy() + indexer(s2)['0'] = 0 + self.assertTrue(s2.index.is_object()) - self.assertTrue(s.index.is_integer()) + for s in [Series(range(5), index=np.arange(5.))]: - for indexer in [lambda x: x.ix, - lambda x: x.loc, - lambda x: x]: - s2 = s.copy() - indexer(s2)[0.1] = 0 - self.assertTrue(s2.index.is_floating()) - self.assertTrue(indexer(s2)[0.1] == 0) + self.assertTrue(s.index.is_floating()) - s2 = s.copy() - indexer(s2)[0.0] = 0 - exp = s.index - if 0 not in s: - exp = Index(s.index.tolist() + [0]) - tm.assert_index_equal(s2.index, exp) + for idxr in [lambda x: x.ix, + lambda x: x.loc, + lambda x: x]: - s2 = s.copy() - indexer(s2)['0'] = 0 - self.assertTrue(s2.index.is_object()) + s2 = s.copy() + idxr(s2)[0.1] = 0 + self.assertTrue(s2.index.is_floating()) + self.assertTrue(idxr(s2)[0.1] == 0) - for s in [Series(range(5), index=np.arange(5.))]: + s2 = s.copy() + idxr(s2)[0.0] = 0 + tm.assert_index_equal(s2.index, s.index) - self.assertTrue(s.index.is_floating()) + s2 = s.copy() + idxr(s2)['0'] = 0 + self.assertTrue(s2.index.is_object()) - for idxr in [lambda x: x.ix, - lambda x: x.loc, - lambda x: x]: - s2 = s.copy() - idxr(s2)[0.1] = 0 - self.assertTrue(s2.index.is_floating()) - self.assertTrue(idxr(s2)[0.1] == 0) +class TestMisc(Base, tm.TestCase): - s2 = s.copy() - idxr(s2)[0.0] = 0 - tm.assert_index_equal(s2.index, s.index) + def test_indexer_caching(self): + # GH5727 + # make sure that indexers are in the _internal_names_set + n = 1000001 + arrays = [lrange(n), lrange(n)] + index = MultiIndex.from_tuples(lzip(*arrays)) + s = Series(np.zeros(n), index=index) + str(s) - s2 = s.copy() - idxr(s2)['0'] = 0 - self.assertTrue(s2.index.is_object()) + # setitem + expected = Series(np.ones(n), index=index) + s = Series(np.zeros(n), index=index) + s[s == 0] = 1 + tm.assert_series_equal(s, expected) def test_float_index_to_mixed(self): df = DataFrame({0.0: np.random.rand(10), 1.0: np.random.rand(10)}) @@ -3143,13 +621,6 @@ def test_float_index_to_mixed(self): 'a': [10] * 10}), df) - def test_duplicate_ix_returns_series(self): - df = DataFrame(np.random.randn(3, 3), index=[0.1, 0.2, 0.2], - columns=list('abc')) - r = df.ix[0.2, 'a'] - e = df.loc[0.2, 'a'] - tm.assert_series_equal(r, e) - def test_float_index_non_scalar_assignment(self): df = DataFrame({'a': [1, 2, 3], 'b': [3, 4, 5]}, index=[1., 2., 3.]) df.loc[df.index[:2]] = 1 @@ -3185,15 +656,18 @@ def run_tests(df, rhs, right): tm.assert_frame_equal(left, right) left = df.copy() - left.ix[s, l] = rhs + with catch_warnings(record=True): + left.ix[s, l] = rhs tm.assert_frame_equal(left, right) left = df.copy() - left.ix[i, j] = rhs + with catch_warnings(record=True): + left.ix[i, j] = rhs tm.assert_frame_equal(left, right) left = df.copy() - left.ix[r, c] = rhs + with catch_warnings(record=True): + left.ix[r, c] = rhs tm.assert_frame_equal(left, right) xs = np.arange(20).reshape(5, 4) @@ -3226,7 +700,7 @@ def assert_slices_equivalent(l_slc, i_slc): if not idx.is_integer: # For integer indices, ix and plain getitem are position-based. tm.assert_series_equal(s[l_slc], s.iloc[i_slc]) - tm.assert_series_equal(s.ix[l_slc], s.iloc[i_slc]) + tm.assert_series_equal(s.loc[l_slc], s.iloc[i_slc]) for idx in [_mklbl('A', 20), np.arange(20) + 100, np.linspace(100, 150, 20)]: @@ -3243,8 +717,9 @@ def test_slice_with_zero_step_raises(self): lambda: s[::0]) self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', lambda: s.loc[::0]) - self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: s.ix[::0]) + with catch_warnings(record=True): + self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', + lambda: s.ix[::0]) def test_indexing_assignment_dict_already_exists(self): df = pd.DataFrame({'x': [1, 2, 6], @@ -3259,11 +734,13 @@ def test_indexing_assignment_dict_already_exists(self): def test_indexing_dtypes_on_empty(self): # Check that .iloc and .ix return correct dtypes GH9983 df = DataFrame({'a': [1, 2, 3], 'b': ['b', 'b2', 'b3']}) - df2 = df.ix[[], :] + with catch_warnings(record=True): + df2 = df.ix[[], :] self.assertEqual(df2.loc[:, 'a'].dtype, np.int64) tm.assert_series_equal(df2.loc[:, 'a'], df2.iloc[:, 0]) - tm.assert_series_equal(df2.loc[:, 'a'], df2.ix[:, 0]) + with catch_warnings(record=True): + tm.assert_series_equal(df2.loc[:, 'a'], df2.ix[:, 0]) def test_range_in_series_indexing(self): # range can cause an indexing error diff --git a/pandas/tests/indexing/test_ix.py b/pandas/tests/indexing/test_ix.py new file mode 100644 index 0000000000000..e68e8015a2f39 --- /dev/null +++ b/pandas/tests/indexing/test_ix.py @@ -0,0 +1,333 @@ +""" test indexing with ix """ + +from warnings import catch_warnings + +import numpy as np +import pandas as pd + +from pandas.types.common import is_scalar +from pandas.compat import lrange +from pandas import Series, DataFrame, option_context, MultiIndex +from pandas.util import testing as tm +from pandas.core.common import PerformanceWarning + + +class TestIX(tm.TestCase): + + def test_ix_deprecation(self): + # GH 15114 + + df = DataFrame({'A': [1, 2, 3]}) + with tm.assert_produces_warning(DeprecationWarning, + check_stacklevel=False): + df.ix[1, 'A'] + + def test_ix_loc_setitem_consistency(self): + + # GH 5771 + # loc with slice and series + s = Series(0, index=[4, 5, 6]) + s.loc[4:5] += 1 + expected = Series([1, 1, 0], index=[4, 5, 6]) + tm.assert_series_equal(s, expected) + + # GH 5928 + # chained indexing assignment + df = DataFrame({'a': [0, 1, 2]}) + expected = df.copy() + with catch_warnings(record=True): + expected.ix[[0, 1, 2], 'a'] = -expected.ix[[0, 1, 2], 'a'] + + with catch_warnings(record=True): + df['a'].ix[[0, 1, 2]] = -df['a'].ix[[0, 1, 2]] + tm.assert_frame_equal(df, expected) + + df = DataFrame({'a': [0, 1, 2], 'b': [0, 1, 2]}) + with catch_warnings(record=True): + df['a'].ix[[0, 1, 2]] = -df['a'].ix[[0, 1, 2]].astype( + 'float64') + 0.5 + expected = DataFrame({'a': [0.5, -0.5, -1.5], 'b': [0, 1, 2]}) + tm.assert_frame_equal(df, expected) + + # GH 8607 + # ix setitem consistency + df = DataFrame({'timestamp': [1413840976, 1413842580, 1413760580], + 'delta': [1174, 904, 161], + 'elapsed': [7673, 9277, 1470]}) + expected = DataFrame({'timestamp': pd.to_datetime( + [1413840976, 1413842580, 1413760580], unit='s'), + 'delta': [1174, 904, 161], + 'elapsed': [7673, 9277, 1470]}) + + df2 = df.copy() + df2['timestamp'] = pd.to_datetime(df['timestamp'], unit='s') + tm.assert_frame_equal(df2, expected) + + df2 = df.copy() + df2.loc[:, 'timestamp'] = pd.to_datetime(df['timestamp'], unit='s') + tm.assert_frame_equal(df2, expected) + + df2 = df.copy() + with catch_warnings(record=True): + df2.ix[:, 2] = pd.to_datetime(df['timestamp'], unit='s') + tm.assert_frame_equal(df2, expected) + + def test_ix_loc_consistency(self): + + # GH 8613 + # some edge cases where ix/loc should return the same + # this is not an exhaustive case + + def compare(result, expected): + if is_scalar(expected): + self.assertEqual(result, expected) + else: + self.assertTrue(expected.equals(result)) + + # failure cases for .loc, but these work for .ix + df = pd.DataFrame(np.random.randn(5, 4), columns=list('ABCD')) + for key in [slice(1, 3), tuple([slice(0, 2), slice(0, 2)]), + tuple([slice(0, 2), df.columns[0:2]])]: + + for index in [tm.makeStringIndex, tm.makeUnicodeIndex, + tm.makeDateIndex, tm.makePeriodIndex, + tm.makeTimedeltaIndex]: + df.index = index(len(df.index)) + with catch_warnings(record=True): + df.ix[key] + + self.assertRaises(TypeError, lambda: df.loc[key]) + + df = pd.DataFrame(np.random.randn(5, 4), columns=list('ABCD'), + index=pd.date_range('2012-01-01', periods=5)) + + for key in ['2012-01-03', + '2012-01-31', + slice('2012-01-03', '2012-01-03'), + slice('2012-01-03', '2012-01-04'), + slice('2012-01-03', '2012-01-06', 2), + slice('2012-01-03', '2012-01-31'), + tuple([[True, True, True, False, True]]), ]: + + # getitem + + # if the expected raises, then compare the exceptions + try: + with catch_warnings(record=True): + expected = df.ix[key] + except KeyError: + self.assertRaises(KeyError, lambda: df.loc[key]) + continue + + result = df.loc[key] + compare(result, expected) + + # setitem + df1 = df.copy() + df2 = df.copy() + + with catch_warnings(record=True): + df1.ix[key] = 10 + df2.loc[key] = 10 + compare(df2, df1) + + # edge cases + s = Series([1, 2, 3, 4], index=list('abde')) + + result1 = s['a':'c'] + with catch_warnings(record=True): + result2 = s.ix['a':'c'] + result3 = s.loc['a':'c'] + tm.assert_series_equal(result1, result2) + tm.assert_series_equal(result1, result3) + + # now work rather than raising KeyError + s = Series(range(5), [-2, -1, 1, 2, 3]) + + with catch_warnings(record=True): + result1 = s.ix[-10:3] + result2 = s.loc[-10:3] + tm.assert_series_equal(result1, result2) + + with catch_warnings(record=True): + result1 = s.ix[0:3] + result2 = s.loc[0:3] + tm.assert_series_equal(result1, result2) + + def test_ix_weird_slicing(self): + # http://stackoverflow.com/q/17056560/1240268 + df = DataFrame({'one': [1, 2, 3, np.nan, np.nan], + 'two': [1, 2, 3, 4, 5]}) + df.loc[df['one'] > 1, 'two'] = -df['two'] + + expected = DataFrame({'one': {0: 1.0, + 1: 2.0, + 2: 3.0, + 3: np.nan, + 4: np.nan}, + 'two': {0: 1, + 1: -2, + 2: -3, + 3: 4, + 4: 5}}) + tm.assert_frame_equal(df, expected) + + def test_ix_general(self): + + # ix general issues + + # GH 2817 + data = {'amount': {0: 700, 1: 600, 2: 222, 3: 333, 4: 444}, + 'col': {0: 3.5, 1: 3.5, 2: 4.0, 3: 4.0, 4: 4.0}, + 'year': {0: 2012, 1: 2011, 2: 2012, 3: 2012, 4: 2012}} + df = DataFrame(data).set_index(keys=['col', 'year']) + key = 4.0, 2012 + + # emits a PerformanceWarning, ok + with self.assert_produces_warning(PerformanceWarning): + tm.assert_frame_equal(df.loc[key], df.iloc[2:]) + + # this is ok + df.sort_index(inplace=True) + res = df.loc[key] + + # col has float dtype, result should be Float64Index + index = MultiIndex.from_arrays([[4.] * 3, [2012] * 3], + names=['col', 'year']) + expected = DataFrame({'amount': [222, 333, 444]}, index=index) + tm.assert_frame_equal(res, expected) + + def test_ix_assign_column_mixed(self): + # GH #1142 + df = DataFrame(tm.getSeriesData()) + df['foo'] = 'bar' + + orig = df.loc[:, 'B'].copy() + df.loc[:, 'B'] = df.loc[:, 'B'] + 1 + tm.assert_series_equal(df.B, orig + 1) + + # GH 3668, mixed frame with series value + df = DataFrame({'x': lrange(10), 'y': lrange(10, 20), 'z': 'bar'}) + expected = df.copy() + + for i in range(5): + indexer = i * 2 + v = 1000 + i * 200 + expected.loc[indexer, 'y'] = v + self.assertEqual(expected.loc[indexer, 'y'], v) + + df.loc[df.x % 2 == 0, 'y'] = df.loc[df.x % 2 == 0, 'y'] * 100 + tm.assert_frame_equal(df, expected) + + # GH 4508, making sure consistency of assignments + df = DataFrame({'a': [1, 2, 3], 'b': [0, 1, 2]}) + df.loc[[0, 2, ], 'b'] = [100, -100] + expected = DataFrame({'a': [1, 2, 3], 'b': [100, 1, -100]}) + tm.assert_frame_equal(df, expected) + + df = pd.DataFrame({'a': lrange(4)}) + df['b'] = np.nan + df.loc[[1, 3], 'b'] = [100, -100] + expected = DataFrame({'a': [0, 1, 2, 3], + 'b': [np.nan, 100, np.nan, -100]}) + tm.assert_frame_equal(df, expected) + + # ok, but chained assignments are dangerous + # if we turn off chained assignement it will work + with option_context('chained_assignment', None): + df = pd.DataFrame({'a': lrange(4)}) + df['b'] = np.nan + df['b'].loc[[1, 3]] = [100, -100] + tm.assert_frame_equal(df, expected) + + def test_ix_get_set_consistency(self): + + # GH 4544 + # ix/loc get/set not consistent when + # a mixed int/string index + df = DataFrame(np.arange(16).reshape((4, 4)), + columns=['a', 'b', 8, 'c'], + index=['e', 7, 'f', 'g']) + + with catch_warnings(record=True): + self.assertEqual(df.ix['e', 8], 2) + self.assertEqual(df.loc['e', 8], 2) + + with catch_warnings(record=True): + df.ix['e', 8] = 42 + self.assertEqual(df.ix['e', 8], 42) + self.assertEqual(df.loc['e', 8], 42) + + df.loc['e', 8] = 45 + with catch_warnings(record=True): + self.assertEqual(df.ix['e', 8], 45) + self.assertEqual(df.loc['e', 8], 45) + + def test_ix_slicing_strings(self): + # GH3836 + data = {'Classification': + ['SA EQUITY CFD', 'bbb', 'SA EQUITY', 'SA SSF', 'aaa'], + 'Random': [1, 2, 3, 4, 5], + 'X': ['correct', 'wrong', 'correct', 'correct', 'wrong']} + df = DataFrame(data) + x = df[~df.Classification.isin(['SA EQUITY CFD', 'SA EQUITY', 'SA SSF' + ])] + with catch_warnings(record=True): + df.ix[x.index, 'X'] = df['Classification'] + + expected = DataFrame({'Classification': {0: 'SA EQUITY CFD', + 1: 'bbb', + 2: 'SA EQUITY', + 3: 'SA SSF', + 4: 'aaa'}, + 'Random': {0: 1, + 1: 2, + 2: 3, + 3: 4, + 4: 5}, + 'X': {0: 'correct', + 1: 'bbb', + 2: 'correct', + 3: 'correct', + 4: 'aaa'}}) # bug was 4: 'bbb' + + tm.assert_frame_equal(df, expected) + + def test_ix_setitem_out_of_bounds_axis_0(self): + df = pd.DataFrame( + np.random.randn(2, 5), index=["row%s" % i for i in range(2)], + columns=["col%s" % i for i in range(5)]) + with catch_warnings(record=True): + self.assertRaises(ValueError, df.ix.__setitem__, (2, 0), 100) + + def test_ix_setitem_out_of_bounds_axis_1(self): + df = pd.DataFrame( + np.random.randn(5, 2), index=["row%s" % i for i in range(5)], + columns=["col%s" % i for i in range(2)]) + with catch_warnings(record=True): + self.assertRaises(ValueError, df.ix.__setitem__, (0, 2), 100) + + def test_ix_empty_list_indexer_is_ok(self): + with catch_warnings(record=True): + from pandas.util.testing import makeCustomDataframe as mkdf + df = mkdf(5, 2) + # vertical empty + tm.assert_frame_equal(df.ix[:, []], df.iloc[:, :0], + check_index_type=True, + check_column_type=True) + # horizontal empty + tm.assert_frame_equal(df.ix[[], :], df.iloc[:0, :], + check_index_type=True, + check_column_type=True) + # horizontal empty + tm.assert_frame_equal(df.ix[[]], df.iloc[:0, :], + check_index_type=True, + check_column_type=True) + + def test_ix_duplicate_returns_series(self): + df = DataFrame(np.random.randn(3, 3), index=[0.1, 0.2, 0.2], + columns=list('abc')) + with catch_warnings(record=True): + r = df.ix[0.2, 'a'] + e = df.loc[0.2, 'a'] + tm.assert_series_equal(r, e) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py new file mode 100644 index 0000000000000..af9d3ffdf6671 --- /dev/null +++ b/pandas/tests/indexing/test_loc.py @@ -0,0 +1,630 @@ +""" test label based indexing with loc """ + +import itertools +from warnings import catch_warnings +import numpy as np + +import pandas as pd +from pandas.compat import lrange, StringIO +from pandas import (Series, DataFrame, Timestamp, + date_range, MultiIndex) +from pandas.util import testing as tm +from pandas.tests.indexing.common import Base + + +class TestLoc(Base, tm.TestCase): + + def test_loc_getitem_dups(self): + # GH 5678 + # repeated gettitems on a dup index returing a ndarray + df = DataFrame( + np.random.random_sample((20, 5)), + index=['ABCDE' [x % 5] for x in range(20)]) + expected = df.loc['A', 0] + result = df.loc[:, 0].loc['A'] + tm.assert_series_equal(result, expected) + + def test_loc_getitem_dups2(self): + + # GH4726 + # dup indexing with iloc/loc + df = DataFrame([[1, 2, 'foo', 'bar', Timestamp('20130101')]], + columns=['a', 'a', 'a', 'a', 'a'], index=[1]) + expected = Series([1, 2, 'foo', 'bar', Timestamp('20130101')], + index=['a', 'a', 'a', 'a', 'a'], name=1) + + result = df.iloc[0] + tm.assert_series_equal(result, expected) + + result = df.loc[1] + tm.assert_series_equal(result, expected) + + def test_loc_setitem_dups(self): + + # GH 6541 + df_orig = DataFrame( + {'me': list('rttti'), + 'foo': list('aaade'), + 'bar': np.arange(5, dtype='float64') * 1.34 + 2, + 'bar2': np.arange(5, dtype='float64') * -.34 + 2}).set_index('me') + + indexer = tuple(['r', ['bar', 'bar2']]) + df = df_orig.copy() + df.loc[indexer] *= 2.0 + tm.assert_series_equal(df.loc[indexer], 2.0 * df_orig.loc[indexer]) + + indexer = tuple(['r', 'bar']) + df = df_orig.copy() + df.loc[indexer] *= 2.0 + self.assertEqual(df.loc[indexer], 2.0 * df_orig.loc[indexer]) + + indexer = tuple(['t', ['bar', 'bar2']]) + df = df_orig.copy() + df.loc[indexer] *= 2.0 + tm.assert_frame_equal(df.loc[indexer], 2.0 * df_orig.loc[indexer]) + + def test_loc_setitem_slice(self): + # GH10503 + + # assigning the same type should not change the type + df1 = DataFrame({'a': [0, 1, 1], + 'b': Series([100, 200, 300], dtype='uint32')}) + ix = df1['a'] == 1 + newb1 = df1.loc[ix, 'b'] + 1 + df1.loc[ix, 'b'] = newb1 + expected = DataFrame({'a': [0, 1, 1], + 'b': Series([100, 201, 301], dtype='uint32')}) + tm.assert_frame_equal(df1, expected) + + # assigning a new type should get the inferred type + df2 = DataFrame({'a': [0, 1, 1], 'b': [100, 200, 300]}, + dtype='uint64') + ix = df1['a'] == 1 + newb2 = df2.loc[ix, 'b'] + df1.loc[ix, 'b'] = newb2 + expected = DataFrame({'a': [0, 1, 1], 'b': [100, 200, 300]}, + dtype='uint64') + tm.assert_frame_equal(df2, expected) + + def test_loc_getitem_int(self): + + # int label + self.check_result('int label', 'loc', 2, 'ix', 2, + typs=['ints', 'uints'], axes=0) + self.check_result('int label', 'loc', 3, 'ix', 3, + typs=['ints', 'uints'], axes=1) + self.check_result('int label', 'loc', 4, 'ix', 4, + typs=['ints', 'uints'], axes=2) + self.check_result('int label', 'loc', 2, 'ix', 2, + typs=['label'], fails=KeyError) + + def test_loc_getitem_label(self): + + # label + self.check_result('label', 'loc', 'c', 'ix', 'c', typs=['labels'], + axes=0) + self.check_result('label', 'loc', 'null', 'ix', 'null', typs=['mixed'], + axes=0) + self.check_result('label', 'loc', 8, 'ix', 8, typs=['mixed'], axes=0) + self.check_result('label', 'loc', Timestamp('20130102'), 'ix', 1, + typs=['ts'], axes=0) + self.check_result('label', 'loc', 'c', 'ix', 'c', typs=['empty'], + fails=KeyError) + + def test_loc_getitem_label_out_of_range(self): + + # out of range label + self.check_result('label range', 'loc', 'f', 'ix', 'f', + typs=['ints', 'uints', 'labels', 'mixed', 'ts'], + fails=KeyError) + self.check_result('label range', 'loc', 'f', 'ix', 'f', + typs=['floats'], fails=TypeError) + self.check_result('label range', 'loc', 20, 'ix', 20, + typs=['ints', 'uints', 'mixed'], fails=KeyError) + self.check_result('label range', 'loc', 20, 'ix', 20, + typs=['labels'], fails=TypeError) + self.check_result('label range', 'loc', 20, 'ix', 20, typs=['ts'], + axes=0, fails=TypeError) + self.check_result('label range', 'loc', 20, 'ix', 20, typs=['floats'], + axes=0, fails=TypeError) + + def test_loc_getitem_label_list(self): + + # list of labels + self.check_result('list lbl', 'loc', [0, 2, 4], 'ix', [0, 2, 4], + typs=['ints', 'uints'], axes=0) + self.check_result('list lbl', 'loc', [3, 6, 9], 'ix', [3, 6, 9], + typs=['ints', 'uints'], axes=1) + self.check_result('list lbl', 'loc', [4, 8, 12], 'ix', [4, 8, 12], + typs=['ints', 'uints'], axes=2) + self.check_result('list lbl', 'loc', ['a', 'b', 'd'], 'ix', + ['a', 'b', 'd'], typs=['labels'], axes=0) + self.check_result('list lbl', 'loc', ['A', 'B', 'C'], 'ix', + ['A', 'B', 'C'], typs=['labels'], axes=1) + self.check_result('list lbl', 'loc', ['Z', 'Y', 'W'], 'ix', + ['Z', 'Y', 'W'], typs=['labels'], axes=2) + self.check_result('list lbl', 'loc', [2, 8, 'null'], 'ix', + [2, 8, 'null'], typs=['mixed'], axes=0) + self.check_result('list lbl', 'loc', + [Timestamp('20130102'), Timestamp('20130103')], 'ix', + [Timestamp('20130102'), Timestamp('20130103')], + typs=['ts'], axes=0) + + self.check_result('list lbl', 'loc', [0, 1, 2], 'indexer', [0, 1, 2], + typs=['empty'], fails=KeyError) + self.check_result('list lbl', 'loc', [0, 2, 3], 'ix', [0, 2, 3], + typs=['ints', 'uints'], axes=0, fails=KeyError) + self.check_result('list lbl', 'loc', [3, 6, 7], 'ix', [3, 6, 7], + typs=['ints', 'uints'], axes=1, fails=KeyError) + self.check_result('list lbl', 'loc', [4, 8, 10], 'ix', [4, 8, 10], + typs=['ints', 'uints'], axes=2, fails=KeyError) + + def test_loc_getitem_label_list_fails(self): + # fails + self.check_result('list lbl', 'loc', [20, 30, 40], 'ix', [20, 30, 40], + typs=['ints', 'uints'], axes=1, fails=KeyError) + self.check_result('list lbl', 'loc', [20, 30, 40], 'ix', [20, 30, 40], + typs=['ints', 'uints'], axes=2, fails=KeyError) + + def test_loc_getitem_label_array_like(self): + # array like + self.check_result('array like', 'loc', Series(index=[0, 2, 4]).index, + 'ix', [0, 2, 4], typs=['ints', 'uints'], axes=0) + self.check_result('array like', 'loc', Series(index=[3, 6, 9]).index, + 'ix', [3, 6, 9], typs=['ints', 'uints'], axes=1) + self.check_result('array like', 'loc', Series(index=[4, 8, 12]).index, + 'ix', [4, 8, 12], typs=['ints', 'uints'], axes=2) + + def test_loc_getitem_bool(self): + # boolean indexers + b = [True, False, True, False] + self.check_result('bool', 'loc', b, 'ix', b, + typs=['ints', 'uints', 'labels', + 'mixed', 'ts', 'floats']) + self.check_result('bool', 'loc', b, 'ix', b, typs=['empty'], + fails=KeyError) + + def test_loc_getitem_int_slice(self): + + # ok + self.check_result('int slice2', 'loc', slice(2, 4), 'ix', [2, 4], + typs=['ints', 'uints'], axes=0) + self.check_result('int slice2', 'loc', slice(3, 6), 'ix', [3, 6], + typs=['ints', 'uints'], axes=1) + self.check_result('int slice2', 'loc', slice(4, 8), 'ix', [4, 8], + typs=['ints', 'uints'], axes=2) + + # GH 3053 + # loc should treat integer slices like label slices + + index = MultiIndex.from_tuples([t for t in itertools.product( + [6, 7, 8], ['a', 'b'])]) + df = DataFrame(np.random.randn(6, 6), index, index) + result = df.loc[6:8, :] + expected = df + tm.assert_frame_equal(result, expected) + + index = MultiIndex.from_tuples([t + for t in itertools.product( + [10, 20, 30], ['a', 'b'])]) + df = DataFrame(np.random.randn(6, 6), index, index) + result = df.loc[20:30, :] + expected = df.iloc[2:] + tm.assert_frame_equal(result, expected) + + # doc examples + result = df.loc[10, :] + expected = df.iloc[0:2] + expected.index = ['a', 'b'] + tm.assert_frame_equal(result, expected) + + result = df.loc[:, 10] + # expected = df.ix[:,10] (this fails) + expected = df[10] + tm.assert_frame_equal(result, expected) + + def test_loc_to_fail(self): + + # GH3449 + df = DataFrame(np.random.random((3, 3)), + index=['a', 'b', 'c'], + columns=['e', 'f', 'g']) + + # raise a KeyError? + self.assertRaises(KeyError, df.loc.__getitem__, + tuple([[1, 2], [1, 2]])) + + # GH 7496 + # loc should not fallback + + s = Series() + s.loc[1] = 1 + s.loc['a'] = 2 + + self.assertRaises(KeyError, lambda: s.loc[-1]) + self.assertRaises(KeyError, lambda: s.loc[[-1, -2]]) + + self.assertRaises(KeyError, lambda: s.loc[['4']]) + + s.loc[-1] = 3 + result = s.loc[[-1, -2]] + expected = Series([3, np.nan], index=[-1, -2]) + tm.assert_series_equal(result, expected) + + s['a'] = 2 + self.assertRaises(KeyError, lambda: s.loc[[-2]]) + + del s['a'] + + def f(): + s.loc[[-2]] = 0 + + self.assertRaises(KeyError, f) + + # inconsistency between .loc[values] and .loc[values,:] + # GH 7999 + df = DataFrame([['a'], ['b']], index=[1, 2], columns=['value']) + + def f(): + df.loc[[3], :] + + self.assertRaises(KeyError, f) + + def f(): + df.loc[[3]] + + self.assertRaises(KeyError, f) + + def test_loc_getitem_label_slice(self): + + # label slices (with ints) + self.check_result('lab slice', 'loc', slice(1, 3), + 'ix', slice(1, 3), + typs=['labels', 'mixed', 'empty', 'ts', 'floats'], + fails=TypeError) + + # real label slices + self.check_result('lab slice', 'loc', slice('a', 'c'), + 'ix', slice('a', 'c'), typs=['labels'], axes=0) + self.check_result('lab slice', 'loc', slice('A', 'C'), + 'ix', slice('A', 'C'), typs=['labels'], axes=1) + self.check_result('lab slice', 'loc', slice('W', 'Z'), + 'ix', slice('W', 'Z'), typs=['labels'], axes=2) + + self.check_result('ts slice', 'loc', slice('20130102', '20130104'), + 'ix', slice('20130102', '20130104'), + typs=['ts'], axes=0) + self.check_result('ts slice', 'loc', slice('20130102', '20130104'), + 'ix', slice('20130102', '20130104'), + typs=['ts'], axes=1, fails=TypeError) + self.check_result('ts slice', 'loc', slice('20130102', '20130104'), + 'ix', slice('20130102', '20130104'), + typs=['ts'], axes=2, fails=TypeError) + + # GH 14316 + self.check_result('ts slice rev', 'loc', slice('20130104', '20130102'), + 'indexer', [0, 1, 2], typs=['ts_rev'], axes=0) + + self.check_result('mixed slice', 'loc', slice(2, 8), 'ix', slice(2, 8), + typs=['mixed'], axes=0, fails=TypeError) + self.check_result('mixed slice', 'loc', slice(2, 8), 'ix', slice(2, 8), + typs=['mixed'], axes=1, fails=KeyError) + self.check_result('mixed slice', 'loc', slice(2, 8), 'ix', slice(2, 8), + typs=['mixed'], axes=2, fails=KeyError) + + self.check_result('mixed slice', 'loc', slice(2, 4, 2), 'ix', slice( + 2, 4, 2), typs=['mixed'], axes=0, fails=TypeError) + + def test_loc_general(self): + + df = DataFrame( + np.random.rand(4, 4), columns=['A', 'B', 'C', 'D'], + index=['A', 'B', 'C', 'D']) + + # want this to work + result = df.loc[:, "A":"B"].iloc[0:2, :] + self.assertTrue((result.columns == ['A', 'B']).all()) + self.assertTrue((result.index == ['A', 'B']).all()) + + # mixed type + result = DataFrame({'a': [Timestamp('20130101')], 'b': [1]}).iloc[0] + expected = Series([Timestamp('20130101'), 1], index=['a', 'b'], name=0) + tm.assert_series_equal(result, expected) + self.assertEqual(result.dtype, object) + + def test_loc_setitem_consistency(self): + # GH 6149 + # coerce similary for setitem and loc when rows have a null-slice + expected = DataFrame({'date': Series(0, index=range(5), + dtype=np.int64), + 'val': Series(range(5), dtype=np.int64)}) + + df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'), + 'val': Series( + range(5), dtype=np.int64)}) + df.loc[:, 'date'] = 0 + tm.assert_frame_equal(df, expected) + + df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'), + 'val': Series(range(5), dtype=np.int64)}) + df.loc[:, 'date'] = np.array(0, dtype=np.int64) + tm.assert_frame_equal(df, expected) + + df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'), + 'val': Series(range(5), dtype=np.int64)}) + df.loc[:, 'date'] = np.array([0, 0, 0, 0, 0], dtype=np.int64) + tm.assert_frame_equal(df, expected) + + expected = DataFrame({'date': Series('foo', index=range(5)), + 'val': Series(range(5), dtype=np.int64)}) + df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'), + 'val': Series(range(5), dtype=np.int64)}) + df.loc[:, 'date'] = 'foo' + tm.assert_frame_equal(df, expected) + + expected = DataFrame({'date': Series(1.0, index=range(5)), + 'val': Series(range(5), dtype=np.int64)}) + df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'), + 'val': Series(range(5), dtype=np.int64)}) + df.loc[:, 'date'] = 1.0 + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_consistency_empty(self): + # empty (essentially noops) + expected = DataFrame(columns=['x', 'y']) + expected['x'] = expected['x'].astype(np.int64) + df = DataFrame(columns=['x', 'y']) + df.loc[:, 'x'] = 1 + tm.assert_frame_equal(df, expected) + + df = DataFrame(columns=['x', 'y']) + df['x'] = 1 + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_consistency_slice_column_len(self): + # .loc[:,column] setting with slice == len of the column + # GH10408 + data = """Level_0,,,Respondent,Respondent,Respondent,OtherCat,OtherCat +Level_1,,,Something,StartDate,EndDate,Yes/No,SomethingElse +Region,Site,RespondentID,,,,, +Region_1,Site_1,3987227376,A,5/25/2015 10:59,5/25/2015 11:22,Yes, +Region_1,Site_1,3980680971,A,5/21/2015 9:40,5/21/2015 9:52,Yes,Yes +Region_1,Site_2,3977723249,A,5/20/2015 8:27,5/20/2015 8:41,Yes, +Region_1,Site_2,3977723089,A,5/20/2015 8:33,5/20/2015 9:09,Yes,No""" + + df = pd.read_csv(StringIO(data), header=[0, 1], index_col=[0, 1, 2]) + df.loc[:, ('Respondent', 'StartDate')] = pd.to_datetime(df.loc[:, ( + 'Respondent', 'StartDate')]) + df.loc[:, ('Respondent', 'EndDate')] = pd.to_datetime(df.loc[:, ( + 'Respondent', 'EndDate')]) + df.loc[:, ('Respondent', 'Duration')] = df.loc[:, ( + 'Respondent', 'EndDate')] - df.loc[:, ('Respondent', 'StartDate')] + + df.loc[:, ('Respondent', 'Duration')] = df.loc[:, ( + 'Respondent', 'Duration')].astype('timedelta64[s]') + expected = Series([1380, 720, 840, 2160.], index=df.index, + name=('Respondent', 'Duration')) + tm.assert_series_equal(df[('Respondent', 'Duration')], expected) + + def test_loc_setitem_frame(self): + df = self.frame_labels + + result = df.iloc[0, 0] + + df.loc['a', 'A'] = 1 + result = df.loc['a', 'A'] + self.assertEqual(result, 1) + + result = df.iloc[0, 0] + self.assertEqual(result, 1) + + df.loc[:, 'B':'D'] = 0 + expected = df.loc[:, 'B':'D'] + result = df.iloc[:, 1:] + tm.assert_frame_equal(result, expected) + + # GH 6254 + # setting issue + df = DataFrame(index=[3, 5, 4], columns=['A']) + df.loc[[4, 3, 5], 'A'] = np.array([1, 2, 3], dtype='int64') + expected = DataFrame(dict(A=Series( + [1, 2, 3], index=[4, 3, 5]))).reindex(index=[3, 5, 4]) + tm.assert_frame_equal(df, expected) + + # GH 6252 + # setting with an empty frame + keys1 = ['@' + str(i) for i in range(5)] + val1 = np.arange(5, dtype='int64') + + keys2 = ['@' + str(i) for i in range(4)] + val2 = np.arange(4, dtype='int64') + + index = list(set(keys1).union(keys2)) + df = DataFrame(index=index) + df['A'] = np.nan + df.loc[keys1, 'A'] = val1 + + df['B'] = np.nan + df.loc[keys2, 'B'] = val2 + + expected = DataFrame(dict(A=Series(val1, index=keys1), B=Series( + val2, index=keys2))).reindex(index=index) + tm.assert_frame_equal(df, expected) + + # GH 8669 + # invalid coercion of nan -> int + df = DataFrame({'A': [1, 2, 3], 'B': np.nan}) + df.loc[df.B > df.A, 'B'] = df.A + expected = DataFrame({'A': [1, 2, 3], 'B': np.nan}) + tm.assert_frame_equal(df, expected) + + # GH 6546 + # setting with mixed labels + df = DataFrame({1: [1, 2], 2: [3, 4], 'a': ['a', 'b']}) + + result = df.loc[0, [1, 2]] + expected = Series([1, 3], index=[1, 2], dtype=object, name=0) + tm.assert_series_equal(result, expected) + + expected = DataFrame({1: [5, 2], 2: [6, 4], 'a': ['a', 'b']}) + df.loc[0, [1, 2]] = [5, 6] + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_frame_multiples(self): + # multiple setting + df = DataFrame({'A': ['foo', 'bar', 'baz'], + 'B': Series( + range(3), dtype=np.int64)}) + rhs = df.loc[1:2] + rhs.index = df.index[0:2] + df.loc[0:1] = rhs + expected = DataFrame({'A': ['bar', 'baz', 'baz'], + 'B': Series( + [1, 2, 2], dtype=np.int64)}) + tm.assert_frame_equal(df, expected) + + # multiple setting with frame on rhs (with M8) + df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'), + 'val': Series( + range(5), dtype=np.int64)}) + expected = DataFrame({'date': [Timestamp('20000101'), Timestamp( + '20000102'), Timestamp('20000101'), Timestamp('20000102'), + Timestamp('20000103')], + 'val': Series( + [0, 1, 0, 1, 2], dtype=np.int64)}) + rhs = df.loc[0:2] + rhs.index = df.index[2:5] + df.loc[2:4] = rhs + tm.assert_frame_equal(df, expected) + + def test_loc_coerceion(self): + + # 12411 + df = DataFrame({'date': [pd.Timestamp('20130101').tz_localize('UTC'), + pd.NaT]}) + expected = df.dtypes + + result = df.iloc[[0]] + tm.assert_series_equal(result.dtypes, expected) + + result = df.iloc[[1]] + tm.assert_series_equal(result.dtypes, expected) + + # 12045 + import datetime + df = DataFrame({'date': [datetime.datetime(2012, 1, 1), + datetime.datetime(1012, 1, 2)]}) + expected = df.dtypes + + result = df.iloc[[0]] + tm.assert_series_equal(result.dtypes, expected) + + result = df.iloc[[1]] + tm.assert_series_equal(result.dtypes, expected) + + # 11594 + df = DataFrame({'text': ['some words'] + [None] * 9}) + expected = df.dtypes + + result = df.iloc[0:2] + tm.assert_series_equal(result.dtypes, expected) + + result = df.iloc[3:] + tm.assert_series_equal(result.dtypes, expected) + + def test_loc_non_unique(self): + # GH3659 + # non-unique indexer with loc slice + # https://groups.google.com/forum/?fromgroups#!topic/pydata/zTm2No0crYs + + # these are going to raise becuase the we are non monotonic + df = DataFrame({'A': [1, 2, 3, 4, 5, 6], + 'B': [3, 4, 5, 6, 7, 8]}, index=[0, 1, 0, 1, 2, 3]) + self.assertRaises(KeyError, df.loc.__getitem__, + tuple([slice(1, None)])) + self.assertRaises(KeyError, df.loc.__getitem__, + tuple([slice(0, None)])) + self.assertRaises(KeyError, df.loc.__getitem__, tuple([slice(1, 2)])) + + # monotonic are ok + df = DataFrame({'A': [1, 2, 3, 4, 5, 6], + 'B': [3, 4, 5, 6, 7, 8]}, + index=[0, 1, 0, 1, 2, 3]).sort_index(axis=0) + result = df.loc[1:] + expected = DataFrame({'A': [2, 4, 5, 6], 'B': [4, 6, 7, 8]}, + index=[1, 1, 2, 3]) + tm.assert_frame_equal(result, expected) + + result = df.loc[0:] + tm.assert_frame_equal(result, df) + + result = df.loc[1:2] + expected = DataFrame({'A': [2, 4, 5], 'B': [4, 6, 7]}, + index=[1, 1, 2]) + tm.assert_frame_equal(result, expected) + + def test_loc_non_unique_memory_error(self): + + # GH 4280 + # non_unique index with a large selection triggers a memory error + + columns = list('ABCDEFG') + + def gen_test(l, l2): + return pd.concat([ + DataFrame(np.random.randn(l, len(columns)), + index=lrange(l), columns=columns), + DataFrame(np.ones((l2, len(columns))), + index=[0] * l2, columns=columns)]) + + def gen_expected(df, mask): + l = len(mask) + return pd.concat([df.take([0], convert=False), + DataFrame(np.ones((l, len(columns))), + index=[0] * l, + columns=columns), + df.take(mask[1:], convert=False)]) + + df = gen_test(900, 100) + self.assertFalse(df.index.is_unique) + + mask = np.arange(100) + result = df.loc[mask] + expected = gen_expected(df, mask) + tm.assert_frame_equal(result, expected) + + df = gen_test(900000, 100000) + self.assertFalse(df.index.is_unique) + + mask = np.arange(100000) + result = df.loc[mask] + expected = gen_expected(df, mask) + tm.assert_frame_equal(result, expected) + + def test_loc_name(self): + # GH 3880 + df = DataFrame([[1, 1], [1, 1]]) + df.index.name = 'index_name' + result = df.iloc[[0, 1]].index.name + self.assertEqual(result, 'index_name') + + with catch_warnings(record=True): + result = df.ix[[0, 1]].index.name + self.assertEqual(result, 'index_name') + + result = df.loc[[0, 1]].index.name + self.assertEqual(result, 'index_name') + + def test_loc_empty_list_indexer_is_ok(self): + from pandas.util.testing import makeCustomDataframe as mkdf + df = mkdf(5, 2) + # vertical empty + tm.assert_frame_equal(df.loc[:, []], df.iloc[:, :0], + check_index_type=True, check_column_type=True) + # horizontal empty + tm.assert_frame_equal(df.loc[[], :], df.iloc[:0, :], + check_index_type=True, check_column_type=True) + # horizontal empty + tm.assert_frame_equal(df.loc[[]], df.iloc[:0, :], + check_index_type=True, + check_column_type=True) diff --git a/pandas/tests/indexing/test_multiindex.py b/pandas/tests/indexing/test_multiindex.py index b40f0b8cd9976..ed943202872a7 100644 --- a/pandas/tests/indexing/test_multiindex.py +++ b/pandas/tests/indexing/test_multiindex.py @@ -46,101 +46,103 @@ def test_iloc_getitem_multiindex2(self): tm.assert_frame_equal(rs, xp) def test_setitem_multiindex(self): - for index_fn in ('ix', 'loc'): - - def check(target, indexers, value, compare_fn, expected=None): - fn = getattr(target, index_fn) - fn.__setitem__(indexers, value) - result = fn.__getitem__(indexers) - if expected is None: - expected = value - compare_fn(result, expected) - # GH7190 - index = pd.MultiIndex.from_product([np.arange(0, 100), - np.arange(0, 80)], - names=['time', 'firm']) - t, n = 0, 2 - df = DataFrame(np.nan, columns=['A', 'w', 'l', 'a', 'x', - 'X', 'd', 'profit'], - index=index) - check(target=df, indexers=((t, n), 'X'), value=0, - compare_fn=self.assertEqual) - - df = DataFrame(-999, columns=['A', 'w', 'l', 'a', 'x', - 'X', 'd', 'profit'], - index=index) - check(target=df, indexers=((t, n), 'X'), value=1, - compare_fn=self.assertEqual) - - df = DataFrame(columns=['A', 'w', 'l', 'a', 'x', - 'X', 'd', 'profit'], - index=index) - check(target=df, indexers=((t, n), 'X'), value=2, - compare_fn=self.assertEqual) - - # GH 7218, assinging with 0-dim arrays - df = DataFrame(-999, columns=['A', 'w', 'l', 'a', 'x', - 'X', 'd', 'profit'], - index=index) - check(target=df, - indexers=((t, n), 'X'), - value=np.array(3), - compare_fn=self.assertEqual, - expected=3, ) - - # GH5206 - df = pd.DataFrame(np.arange(25).reshape(5, 5), - columns='A,B,C,D,E'.split(','), dtype=float) - df['F'] = 99 - row_selection = df['A'] % 2 == 0 - col_selection = ['B', 'C'] - with catch_warnings(record=True): - df.ix[row_selection, col_selection] = df['F'] - output = pd.DataFrame(99., index=[0, 2, 4], columns=['B', 'C']) - with catch_warnings(record=True): - tm.assert_frame_equal(df.ix[row_selection, col_selection], - output) - check(target=df, - indexers=(row_selection, col_selection), - value=df['F'], - compare_fn=tm.assert_frame_equal, - expected=output, ) - - # GH11372 - idx = pd.MultiIndex.from_product([ - ['A', 'B', 'C'], - pd.date_range('2015-01-01', '2015-04-01', freq='MS')]) - cols = pd.MultiIndex.from_product([ - ['foo', 'bar'], - pd.date_range('2016-01-01', '2016-02-01', freq='MS')]) - - df = pd.DataFrame(np.random.random((12, 4)), - index=idx, columns=cols) - - subidx = pd.MultiIndex.from_tuples( - [('A', pd.Timestamp('2015-01-01')), - ('A', pd.Timestamp('2015-02-01'))]) - subcols = pd.MultiIndex.from_tuples( - [('foo', pd.Timestamp('2016-01-01')), - ('foo', pd.Timestamp('2016-02-01'))]) - - vals = pd.DataFrame(np.random.random((2, 2)), - index=subidx, columns=subcols) - check(target=df, - indexers=(subidx, subcols), - value=vals, - compare_fn=tm.assert_frame_equal, ) - # set all columns - vals = pd.DataFrame( - np.random.random((2, 4)), index=subidx, columns=cols) - check(target=df, - indexers=(subidx, slice(None, None, None)), - value=vals, - compare_fn=tm.assert_frame_equal, ) - # identity - copy = df.copy() - check(target=df, indexers=(df.index, df.columns), value=df, - compare_fn=tm.assert_frame_equal, expected=copy) + with catch_warnings(record=True): + + for index_fn in ('ix', 'loc'): + + def check(target, indexers, value, compare_fn, expected=None): + fn = getattr(target, index_fn) + fn.__setitem__(indexers, value) + result = fn.__getitem__(indexers) + if expected is None: + expected = value + compare_fn(result, expected) + # GH7190 + index = pd.MultiIndex.from_product([np.arange(0, 100), + np.arange(0, 80)], + names=['time', 'firm']) + t, n = 0, 2 + df = DataFrame(np.nan, columns=['A', 'w', 'l', 'a', 'x', + 'X', 'd', 'profit'], + index=index) + check(target=df, indexers=((t, n), 'X'), value=0, + compare_fn=self.assertEqual) + + df = DataFrame(-999, columns=['A', 'w', 'l', 'a', 'x', + 'X', 'd', 'profit'], + index=index) + check(target=df, indexers=((t, n), 'X'), value=1, + compare_fn=self.assertEqual) + + df = DataFrame(columns=['A', 'w', 'l', 'a', 'x', + 'X', 'd', 'profit'], + index=index) + check(target=df, indexers=((t, n), 'X'), value=2, + compare_fn=self.assertEqual) + + # GH 7218, assinging with 0-dim arrays + df = DataFrame(-999, columns=['A', 'w', 'l', 'a', 'x', + 'X', 'd', 'profit'], + index=index) + check(target=df, + indexers=((t, n), 'X'), + value=np.array(3), + compare_fn=self.assertEqual, + expected=3, ) + + # GH5206 + df = pd.DataFrame(np.arange(25).reshape(5, 5), + columns='A,B,C,D,E'.split(','), dtype=float) + df['F'] = 99 + row_selection = df['A'] % 2 == 0 + col_selection = ['B', 'C'] + with catch_warnings(record=True): + df.ix[row_selection, col_selection] = df['F'] + output = pd.DataFrame(99., index=[0, 2, 4], columns=['B', 'C']) + with catch_warnings(record=True): + tm.assert_frame_equal(df.ix[row_selection, col_selection], + output) + check(target=df, + indexers=(row_selection, col_selection), + value=df['F'], + compare_fn=tm.assert_frame_equal, + expected=output, ) + + # GH11372 + idx = pd.MultiIndex.from_product([ + ['A', 'B', 'C'], + pd.date_range('2015-01-01', '2015-04-01', freq='MS')]) + cols = pd.MultiIndex.from_product([ + ['foo', 'bar'], + pd.date_range('2016-01-01', '2016-02-01', freq='MS')]) + + df = pd.DataFrame(np.random.random((12, 4)), + index=idx, columns=cols) + + subidx = pd.MultiIndex.from_tuples( + [('A', pd.Timestamp('2015-01-01')), + ('A', pd.Timestamp('2015-02-01'))]) + subcols = pd.MultiIndex.from_tuples( + [('foo', pd.Timestamp('2016-01-01')), + ('foo', pd.Timestamp('2016-02-01'))]) + + vals = pd.DataFrame(np.random.random((2, 2)), + index=subidx, columns=subcols) + check(target=df, + indexers=(subidx, subcols), + value=vals, + compare_fn=tm.assert_frame_equal, ) + # set all columns + vals = pd.DataFrame( + np.random.random((2, 4)), index=subidx, columns=cols) + check(target=df, + indexers=(subidx, slice(None, None, None)), + value=vals, + compare_fn=tm.assert_frame_equal, ) + # identity + copy = df.copy() + check(target=df, indexers=(df.index, df.columns), value=df, + compare_fn=tm.assert_frame_equal, expected=copy) def test_loc_getitem_series(self): # GH14730 @@ -559,32 +561,37 @@ def test_multiindex_assignment(self): df['d'] = np.nan arr = np.array([0., 1.]) - df.ix[4, 'd'] = arr - tm.assert_series_equal(df.ix[4, 'd'], - Series(arr, index=[8, 10], name='d')) + with catch_warnings(record=True): + df.ix[4, 'd'] = arr + tm.assert_series_equal(df.ix[4, 'd'], + Series(arr, index=[8, 10], name='d')) # single dtype df = DataFrame(np.random.randint(5, 10, size=9).reshape(3, 3), columns=list('abc'), index=[[4, 4, 8], [8, 10, 12]]) - df.ix[4, 'c'] = arr - exp = Series(arr, index=[8, 10], name='c', dtype='float64') - tm.assert_series_equal(df.ix[4, 'c'], exp) + with catch_warnings(record=True): + df.ix[4, 'c'] = arr + exp = Series(arr, index=[8, 10], name='c', dtype='float64') + tm.assert_series_equal(df.ix[4, 'c'], exp) # scalar ok - df.ix[4, 'c'] = 10 - exp = Series(10, index=[8, 10], name='c', dtype='float64') - tm.assert_series_equal(df.ix[4, 'c'], exp) + with catch_warnings(record=True): + df.ix[4, 'c'] = 10 + exp = Series(10, index=[8, 10], name='c', dtype='float64') + tm.assert_series_equal(df.ix[4, 'c'], exp) # invalid assignments def f(): - df.ix[4, 'c'] = [0, 1, 2, 3] + with catch_warnings(record=True): + df.ix[4, 'c'] = [0, 1, 2, 3] self.assertRaises(ValueError, f) def f(): - df.ix[4, 'c'] = [0] + with catch_warnings(record=True): + df.ix[4, 'c'] = [0] self.assertRaises(ValueError, f) @@ -614,7 +621,8 @@ def f(name, df2): # but in this case, that's ok for name, df2 in grp: new_vals = np.arange(df2.shape[0]) - df.ix[name, 'new_col'] = new_vals + with catch_warnings(record=True): + df.ix[name, 'new_col'] = new_vals def test_multiindex_label_slicing_with_negative_step(self): s = Series(np.arange(20), @@ -624,7 +632,8 @@ def test_multiindex_label_slicing_with_negative_step(self): def assert_slices_equivalent(l_slc, i_slc): tm.assert_series_equal(s.loc[l_slc], s.iloc[i_slc]) tm.assert_series_equal(s[l_slc], s.iloc[i_slc]) - tm.assert_series_equal(s.ix[l_slc], s.iloc[i_slc]) + with catch_warnings(record=True): + tm.assert_series_equal(s.ix[l_slc], s.iloc[i_slc]) assert_slices_equivalent(SLC[::-1], SLC[::-1]) diff --git a/pandas/tests/indexing/test_panel.py b/pandas/tests/indexing/test_panel.py index 5ec3076af599a..0677ea498c282 100644 --- a/pandas/tests/indexing/test_panel.py +++ b/pandas/tests/indexing/test_panel.py @@ -1,3 +1,5 @@ +from warnings import catch_warnings + import numpy as np from pandas.util import testing as tm from pandas import Panel, date_range, DataFrame @@ -112,8 +114,8 @@ def test_panel_getitem(self): len(ind), 5), index=ind, columns=list('ABCDE')) panel = Panel(dict([('frame_' + c, df) for c in list('ABC')])) - test2 = panel.ix[:, "2002":"2002-12-31"] - test1 = panel.ix[:, "2002"] + test2 = panel.loc[:, "2002":"2002-12-31"] + test1 = panel.loc[:, "2002"] tm.assert_panel_equal(test1, test2) # GH8710 @@ -134,10 +136,8 @@ def test_panel_getitem(self): result = panel.loc['ItemA':'ItemB'] tm.assert_panel_equal(result, expected) - result = panel.ix['ItemA':'ItemB'] - tm.assert_panel_equal(result, expected) - - result = panel.ix[['ItemA', 'ItemB']] + with catch_warnings(record=True): + result = panel.ix[['ItemA', 'ItemB']] tm.assert_panel_equal(result, expected) # with an object-like diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py new file mode 100644 index 0000000000000..a00f880ff6591 --- /dev/null +++ b/pandas/tests/indexing/test_partial.py @@ -0,0 +1,587 @@ +""" +test setting *parts* of objects both positionally and label based + +TOD: these should be split among the indexer tests +""" +from warnings import catch_warnings +import numpy as np + +import pandas as pd +from pandas import Series, DataFrame, Panel, Index, date_range +from pandas.util import testing as tm + + +class TestPartialSetting(tm.TestCase): + + def test_partial_setting(self): + + # GH2578, allow ix and friends to partially set + + # series + s_orig = Series([1, 2, 3]) + + s = s_orig.copy() + s[5] = 5 + expected = Series([1, 2, 3, 5], index=[0, 1, 2, 5]) + tm.assert_series_equal(s, expected) + + s = s_orig.copy() + s.loc[5] = 5 + expected = Series([1, 2, 3, 5], index=[0, 1, 2, 5]) + tm.assert_series_equal(s, expected) + + s = s_orig.copy() + s[5] = 5. + expected = Series([1, 2, 3, 5.], index=[0, 1, 2, 5]) + tm.assert_series_equal(s, expected) + + s = s_orig.copy() + s.loc[5] = 5. + expected = Series([1, 2, 3, 5.], index=[0, 1, 2, 5]) + tm.assert_series_equal(s, expected) + + # iloc/iat raise + s = s_orig.copy() + + def f(): + s.iloc[3] = 5. + + self.assertRaises(IndexError, f) + + def f(): + s.iat[3] = 5. + + self.assertRaises(IndexError, f) + + # ## frame ## + + df_orig = DataFrame( + np.arange(6).reshape(3, 2), columns=['A', 'B'], dtype='int64') + + # iloc/iat raise + df = df_orig.copy() + + def f(): + df.iloc[4, 2] = 5. + + self.assertRaises(IndexError, f) + + def f(): + df.iat[4, 2] = 5. + + self.assertRaises(IndexError, f) + + # row setting where it exists + expected = DataFrame(dict({'A': [0, 4, 4], 'B': [1, 5, 5]})) + df = df_orig.copy() + df.iloc[1] = df.iloc[2] + tm.assert_frame_equal(df, expected) + + expected = DataFrame(dict({'A': [0, 4, 4], 'B': [1, 5, 5]})) + df = df_orig.copy() + df.loc[1] = df.loc[2] + tm.assert_frame_equal(df, expected) + + # like 2578, partial setting with dtype preservation + expected = DataFrame(dict({'A': [0, 2, 4, 4], 'B': [1, 3, 5, 5]})) + df = df_orig.copy() + df.loc[3] = df.loc[2] + tm.assert_frame_equal(df, expected) + + # single dtype frame, overwrite + expected = DataFrame(dict({'A': [0, 2, 4], 'B': [0, 2, 4]})) + df = df_orig.copy() + with catch_warnings(record=True): + df.ix[:, 'B'] = df.ix[:, 'A'] + tm.assert_frame_equal(df, expected) + + # mixed dtype frame, overwrite + expected = DataFrame(dict({'A': [0, 2, 4], 'B': Series([0, 2, 4])})) + df = df_orig.copy() + df['B'] = df['B'].astype(np.float64) + with catch_warnings(record=True): + df.ix[:, 'B'] = df.ix[:, 'A'] + tm.assert_frame_equal(df, expected) + + # single dtype frame, partial setting + expected = df_orig.copy() + expected['C'] = df['A'] + df = df_orig.copy() + with catch_warnings(record=True): + df.ix[:, 'C'] = df.ix[:, 'A'] + tm.assert_frame_equal(df, expected) + + # mixed frame, partial setting + expected = df_orig.copy() + expected['C'] = df['A'] + df = df_orig.copy() + with catch_warnings(record=True): + df.ix[:, 'C'] = df.ix[:, 'A'] + tm.assert_frame_equal(df, expected) + + # ## panel ## + p_orig = Panel(np.arange(16).reshape(2, 4, 2), + items=['Item1', 'Item2'], + major_axis=pd.date_range('2001/1/12', periods=4), + minor_axis=['A', 'B'], dtype='float64') + + # panel setting via item + p_orig = Panel(np.arange(16).reshape(2, 4, 2), + items=['Item1', 'Item2'], + major_axis=pd.date_range('2001/1/12', periods=4), + minor_axis=['A', 'B'], dtype='float64') + expected = p_orig.copy() + expected['Item3'] = expected['Item1'] + p = p_orig.copy() + p.loc['Item3'] = p['Item1'] + tm.assert_panel_equal(p, expected) + + # panel with aligned series + expected = p_orig.copy() + expected = expected.transpose(2, 1, 0) + expected['C'] = DataFrame({'Item1': [30, 30, 30, 30], + 'Item2': [32, 32, 32, 32]}, + index=p_orig.major_axis) + expected = expected.transpose(2, 1, 0) + p = p_orig.copy() + p.loc[:, :, 'C'] = Series([30, 32], index=p_orig.items) + tm.assert_panel_equal(p, expected) + + # GH 8473 + dates = date_range('1/1/2000', periods=8) + df_orig = DataFrame(np.random.randn(8, 4), index=dates, + columns=['A', 'B', 'C', 'D']) + + expected = pd.concat([df_orig, DataFrame( + {'A': 7}, index=[dates[-1] + 1])]) + df = df_orig.copy() + df.loc[dates[-1] + 1, 'A'] = 7 + tm.assert_frame_equal(df, expected) + df = df_orig.copy() + df.at[dates[-1] + 1, 'A'] = 7 + tm.assert_frame_equal(df, expected) + + exp_other = DataFrame({0: 7}, index=[dates[-1] + 1]) + expected = pd.concat([df_orig, exp_other], axis=1) + + df = df_orig.copy() + df.loc[dates[-1] + 1, 0] = 7 + tm.assert_frame_equal(df, expected) + df = df_orig.copy() + df.at[dates[-1] + 1, 0] = 7 + tm.assert_frame_equal(df, expected) + + def test_partial_setting_mixed_dtype(self): + + # in a mixed dtype environment, try to preserve dtypes + # by appending + df = DataFrame([[True, 1], [False, 2]], columns=["female", "fitness"]) + + s = df.loc[1].copy() + s.name = 2 + expected = df.append(s) + + df.loc[2] = df.loc[1] + tm.assert_frame_equal(df, expected) + + # columns will align + df = DataFrame(columns=['A', 'B']) + df.loc[0] = Series(1, index=range(4)) + tm.assert_frame_equal(df, DataFrame(columns=['A', 'B'], index=[0])) + + # columns will align + df = DataFrame(columns=['A', 'B']) + df.loc[0] = Series(1, index=['B']) + + exp = DataFrame([[np.nan, 1]], columns=['A', 'B'], + index=[0], dtype='float64') + tm.assert_frame_equal(df, exp) + + # list-like must conform + df = DataFrame(columns=['A', 'B']) + + def f(): + df.loc[0] = [1, 2, 3] + + self.assertRaises(ValueError, f) + + # these are coerced to float unavoidably (as its a list-like to begin) + df = DataFrame(columns=['A', 'B']) + df.loc[3] = [6, 7] + + exp = DataFrame([[6, 7]], index=[3], columns=['A', 'B'], + dtype='float64') + tm.assert_frame_equal(df, exp) + + def test_series_partial_set(self): + # partial set with new index + # Regression from GH4825 + ser = Series([0.1, 0.2], index=[1, 2]) + + # loc + expected = Series([np.nan, 0.2, np.nan], index=[3, 2, 3]) + result = ser.loc[[3, 2, 3]] + tm.assert_series_equal(result, expected, check_index_type=True) + + expected = Series([np.nan, 0.2, np.nan, np.nan], index=[3, 2, 3, 'x']) + result = ser.loc[[3, 2, 3, 'x']] + tm.assert_series_equal(result, expected, check_index_type=True) + + expected = Series([0.2, 0.2, 0.1], index=[2, 2, 1]) + result = ser.loc[[2, 2, 1]] + tm.assert_series_equal(result, expected, check_index_type=True) + + expected = Series([0.2, 0.2, np.nan, 0.1], index=[2, 2, 'x', 1]) + result = ser.loc[[2, 2, 'x', 1]] + tm.assert_series_equal(result, expected, check_index_type=True) + + # raises as nothing in in the index + self.assertRaises(KeyError, lambda: ser.loc[[3, 3, 3]]) + + expected = Series([0.2, 0.2, np.nan], index=[2, 2, 3]) + result = ser.loc[[2, 2, 3]] + tm.assert_series_equal(result, expected, check_index_type=True) + + expected = Series([0.3, np.nan, np.nan], index=[3, 4, 4]) + result = Series([0.1, 0.2, 0.3], index=[1, 2, 3]).loc[[3, 4, 4]] + tm.assert_series_equal(result, expected, check_index_type=True) + + expected = Series([np.nan, 0.3, 0.3], index=[5, 3, 3]) + result = Series([0.1, 0.2, 0.3, 0.4], + index=[1, 2, 3, 4]).loc[[5, 3, 3]] + tm.assert_series_equal(result, expected, check_index_type=True) + + expected = Series([np.nan, 0.4, 0.4], index=[5, 4, 4]) + result = Series([0.1, 0.2, 0.3, 0.4], + index=[1, 2, 3, 4]).loc[[5, 4, 4]] + tm.assert_series_equal(result, expected, check_index_type=True) + + expected = Series([0.4, np.nan, np.nan], index=[7, 2, 2]) + result = Series([0.1, 0.2, 0.3, 0.4], + index=[4, 5, 6, 7]).loc[[7, 2, 2]] + tm.assert_series_equal(result, expected, check_index_type=True) + + expected = Series([0.4, np.nan, np.nan], index=[4, 5, 5]) + result = Series([0.1, 0.2, 0.3, 0.4], + index=[1, 2, 3, 4]).loc[[4, 5, 5]] + tm.assert_series_equal(result, expected, check_index_type=True) + + # iloc + expected = Series([0.2, 0.2, 0.1, 0.1], index=[2, 2, 1, 1]) + result = ser.iloc[[1, 1, 0, 0]] + tm.assert_series_equal(result, expected, check_index_type=True) + + def test_series_partial_set_with_name(self): + # GH 11497 + + idx = Index([1, 2], dtype='int64', name='idx') + ser = Series([0.1, 0.2], index=idx, name='s') + + # loc + exp_idx = Index([3, 2, 3], dtype='int64', name='idx') + expected = Series([np.nan, 0.2, np.nan], index=exp_idx, name='s') + result = ser.loc[[3, 2, 3]] + tm.assert_series_equal(result, expected, check_index_type=True) + + exp_idx = Index([3, 2, 3, 'x'], dtype='object', name='idx') + expected = Series([np.nan, 0.2, np.nan, np.nan], index=exp_idx, + name='s') + result = ser.loc[[3, 2, 3, 'x']] + tm.assert_series_equal(result, expected, check_index_type=True) + + exp_idx = Index([2, 2, 1], dtype='int64', name='idx') + expected = Series([0.2, 0.2, 0.1], index=exp_idx, name='s') + result = ser.loc[[2, 2, 1]] + tm.assert_series_equal(result, expected, check_index_type=True) + + exp_idx = Index([2, 2, 'x', 1], dtype='object', name='idx') + expected = Series([0.2, 0.2, np.nan, 0.1], index=exp_idx, name='s') + result = ser.loc[[2, 2, 'x', 1]] + tm.assert_series_equal(result, expected, check_index_type=True) + + # raises as nothing in in the index + self.assertRaises(KeyError, lambda: ser.loc[[3, 3, 3]]) + + exp_idx = Index([2, 2, 3], dtype='int64', name='idx') + expected = Series([0.2, 0.2, np.nan], index=exp_idx, name='s') + result = ser.loc[[2, 2, 3]] + tm.assert_series_equal(result, expected, check_index_type=True) + + exp_idx = Index([3, 4, 4], dtype='int64', name='idx') + expected = Series([0.3, np.nan, np.nan], index=exp_idx, name='s') + idx = Index([1, 2, 3], dtype='int64', name='idx') + result = Series([0.1, 0.2, 0.3], index=idx, name='s').loc[[3, 4, 4]] + tm.assert_series_equal(result, expected, check_index_type=True) + + exp_idx = Index([5, 3, 3], dtype='int64', name='idx') + expected = Series([np.nan, 0.3, 0.3], index=exp_idx, name='s') + idx = Index([1, 2, 3, 4], dtype='int64', name='idx') + result = Series([0.1, 0.2, 0.3, 0.4], index=idx, + name='s').loc[[5, 3, 3]] + tm.assert_series_equal(result, expected, check_index_type=True) + + exp_idx = Index([5, 4, 4], dtype='int64', name='idx') + expected = Series([np.nan, 0.4, 0.4], index=exp_idx, name='s') + idx = Index([1, 2, 3, 4], dtype='int64', name='idx') + result = Series([0.1, 0.2, 0.3, 0.4], index=idx, + name='s').loc[[5, 4, 4]] + tm.assert_series_equal(result, expected, check_index_type=True) + + exp_idx = Index([7, 2, 2], dtype='int64', name='idx') + expected = Series([0.4, np.nan, np.nan], index=exp_idx, name='s') + idx = Index([4, 5, 6, 7], dtype='int64', name='idx') + result = Series([0.1, 0.2, 0.3, 0.4], index=idx, + name='s').loc[[7, 2, 2]] + tm.assert_series_equal(result, expected, check_index_type=True) + + exp_idx = Index([4, 5, 5], dtype='int64', name='idx') + expected = Series([0.4, np.nan, np.nan], index=exp_idx, name='s') + idx = Index([1, 2, 3, 4], dtype='int64', name='idx') + result = Series([0.1, 0.2, 0.3, 0.4], index=idx, + name='s').loc[[4, 5, 5]] + tm.assert_series_equal(result, expected, check_index_type=True) + + # iloc + exp_idx = Index([2, 2, 1, 1], dtype='int64', name='idx') + expected = Series([0.2, 0.2, 0.1, 0.1], index=exp_idx, name='s') + result = ser.iloc[[1, 1, 0, 0]] + tm.assert_series_equal(result, expected, check_index_type=True) + + def test_partial_set_invalid(self): + + # GH 4940 + # allow only setting of 'valid' values + + orig = tm.makeTimeDataFrame() + df = orig.copy() + + # don't allow not string inserts + def f(): + with catch_warnings(record=True): + df.loc[100.0, :] = df.ix[0] + + self.assertRaises(TypeError, f) + + def f(): + with catch_warnings(record=True): + df.loc[100, :] = df.ix[0] + + self.assertRaises(TypeError, f) + + def f(): + with catch_warnings(record=True): + df.ix[100.0, :] = df.ix[0] + + self.assertRaises(TypeError, f) + + def f(): + with catch_warnings(record=True): + df.ix[100, :] = df.ix[0] + + self.assertRaises(ValueError, f) + + # allow object conversion here + df = orig.copy() + with catch_warnings(record=True): + df.loc['a', :] = df.ix[0] + exp = orig.append(pd.Series(df.ix[0], name='a')) + tm.assert_frame_equal(df, exp) + tm.assert_index_equal(df.index, + pd.Index(orig.index.tolist() + ['a'])) + self.assertEqual(df.index.dtype, 'object') + + def test_partial_set_empty_series(self): + + # GH5226 + + # partially set with an empty object series + s = Series() + s.loc[1] = 1 + tm.assert_series_equal(s, Series([1], index=[1])) + s.loc[3] = 3 + tm.assert_series_equal(s, Series([1, 3], index=[1, 3])) + + s = Series() + s.loc[1] = 1. + tm.assert_series_equal(s, Series([1.], index=[1])) + s.loc[3] = 3. + tm.assert_series_equal(s, Series([1., 3.], index=[1, 3])) + + s = Series() + s.loc['foo'] = 1 + tm.assert_series_equal(s, Series([1], index=['foo'])) + s.loc['bar'] = 3 + tm.assert_series_equal(s, Series([1, 3], index=['foo', 'bar'])) + s.loc[3] = 4 + tm.assert_series_equal(s, Series([1, 3, 4], index=['foo', 'bar', 3])) + + def test_partial_set_empty_frame(self): + + # partially set with an empty object + # frame + df = DataFrame() + + def f(): + df.loc[1] = 1 + + self.assertRaises(ValueError, f) + + def f(): + df.loc[1] = Series([1], index=['foo']) + + self.assertRaises(ValueError, f) + + def f(): + df.loc[:, 1] = 1 + + self.assertRaises(ValueError, f) + + # these work as they don't really change + # anything but the index + # GH5632 + expected = DataFrame(columns=['foo'], index=pd.Index( + [], dtype='int64')) + + def f(): + df = DataFrame() + df['foo'] = Series([], dtype='object') + return df + + tm.assert_frame_equal(f(), expected) + + def f(): + df = DataFrame() + df['foo'] = Series(df.index) + return df + + tm.assert_frame_equal(f(), expected) + + def f(): + df = DataFrame() + df['foo'] = df.index + return df + + tm.assert_frame_equal(f(), expected) + + expected = DataFrame(columns=['foo'], + index=pd.Index([], dtype='int64')) + expected['foo'] = expected['foo'].astype('float64') + + def f(): + df = DataFrame() + df['foo'] = [] + return df + + tm.assert_frame_equal(f(), expected) + + def f(): + df = DataFrame() + df['foo'] = Series(range(len(df))) + return df + + tm.assert_frame_equal(f(), expected) + + def f(): + df = DataFrame() + tm.assert_index_equal(df.index, pd.Index([], dtype='object')) + df['foo'] = range(len(df)) + return df + + expected = DataFrame(columns=['foo'], + index=pd.Index([], dtype='int64')) + expected['foo'] = expected['foo'].astype('float64') + tm.assert_frame_equal(f(), expected) + + df = DataFrame() + tm.assert_index_equal(df.columns, pd.Index([], dtype=object)) + df2 = DataFrame() + df2[1] = Series([1], index=['foo']) + df.loc[:, 1] = Series([1], index=['foo']) + tm.assert_frame_equal(df, DataFrame([[1]], index=['foo'], columns=[1])) + tm.assert_frame_equal(df, df2) + + # no index to start + expected = DataFrame({0: Series(1, index=range(4))}, + columns=['A', 'B', 0]) + + df = DataFrame(columns=['A', 'B']) + df[0] = Series(1, index=range(4)) + df.dtypes + str(df) + tm.assert_frame_equal(df, expected) + + df = DataFrame(columns=['A', 'B']) + df.loc[:, 0] = Series(1, index=range(4)) + df.dtypes + str(df) + tm.assert_frame_equal(df, expected) + + def test_partial_set_empty_frame_row(self): + # GH5720, GH5744 + # don't create rows when empty + expected = DataFrame(columns=['A', 'B', 'New'], + index=pd.Index([], dtype='int64')) + expected['A'] = expected['A'].astype('int64') + expected['B'] = expected['B'].astype('float64') + expected['New'] = expected['New'].astype('float64') + + df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]}) + y = df[df.A > 5] + y['New'] = np.nan + tm.assert_frame_equal(y, expected) + # tm.assert_frame_equal(y,expected) + + expected = DataFrame(columns=['a', 'b', 'c c', 'd']) + expected['d'] = expected['d'].astype('int64') + df = DataFrame(columns=['a', 'b', 'c c']) + df['d'] = 3 + tm.assert_frame_equal(df, expected) + tm.assert_series_equal(df['c c'], Series(name='c c', dtype=object)) + + # reindex columns is ok + df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]}) + y = df[df.A > 5] + result = y.reindex(columns=['A', 'B', 'C']) + expected = DataFrame(columns=['A', 'B', 'C'], + index=pd.Index([], dtype='int64')) + expected['A'] = expected['A'].astype('int64') + expected['B'] = expected['B'].astype('float64') + expected['C'] = expected['C'].astype('float64') + tm.assert_frame_equal(result, expected) + + def test_partial_set_empty_frame_set_series(self): + # GH 5756 + # setting with empty Series + df = DataFrame(Series()) + tm.assert_frame_equal(df, DataFrame({0: Series()})) + + df = DataFrame(Series(name='foo')) + tm.assert_frame_equal(df, DataFrame({'foo': Series()})) + + def test_partial_set_empty_frame_empty_copy_assignment(self): + # GH 5932 + # copy on empty with assignment fails + df = DataFrame(index=[0]) + df = df.copy() + df['a'] = 0 + expected = DataFrame(0, index=[0], columns=['a']) + tm.assert_frame_equal(df, expected) + + def test_partial_set_empty_frame_empty_consistencies(self): + # GH 6171 + # consistency on empty frames + df = DataFrame(columns=['x', 'y']) + df['x'] = [1, 2] + expected = DataFrame(dict(x=[1, 2], y=[np.nan, np.nan])) + tm.assert_frame_equal(df, expected, check_dtype=False) + + df = DataFrame(columns=['x', 'y']) + df['x'] = ['1', '2'] + expected = DataFrame( + dict(x=['1', '2'], y=[np.nan, np.nan]), dtype=object) + tm.assert_frame_equal(df, expected) + + df = DataFrame(columns=['x', 'y']) + df.loc[0, 'x'] = 1 + expected = DataFrame(dict(x=[1], y=[np.nan])) + tm.assert_frame_equal(df, expected, check_dtype=False) diff --git a/pandas/tests/indexing/test_scalar.py b/pandas/tests/indexing/test_scalar.py new file mode 100644 index 0000000000000..4e81cd01cd5d2 --- /dev/null +++ b/pandas/tests/indexing/test_scalar.py @@ -0,0 +1,156 @@ +""" test scalar indexing, including at and iat """ + +import numpy as np + +from pandas import (Series, DataFrame, Timestamp, + Timedelta, date_range) +from pandas.util import testing as tm +from pandas.tests.indexing.common import Base + + +class TestScalar(Base, tm.TestCase): + + def test_at_and_iat_get(self): + def _check(f, func, values=False): + + if f is not None: + indicies = self.generate_indices(f, values) + for i in indicies: + result = getattr(f, func)[i] + expected = self.get_value(f, i, values) + tm.assert_almost_equal(result, expected) + + for o in self._objs: + + d = getattr(self, o) + + # iat + for f in [d['ints'], d['uints']]: + _check(f, 'iat', values=True) + + for f in [d['labels'], d['ts'], d['floats']]: + if f is not None: + self.assertRaises(ValueError, self.check_values, f, 'iat') + + # at + for f in [d['ints'], d['uints'], d['labels'], + d['ts'], d['floats']]: + _check(f, 'at') + + def test_at_and_iat_set(self): + def _check(f, func, values=False): + + if f is not None: + indicies = self.generate_indices(f, values) + for i in indicies: + getattr(f, func)[i] = 1 + expected = self.get_value(f, i, values) + tm.assert_almost_equal(expected, 1) + + for t in self._objs: + + d = getattr(self, t) + + # iat + for f in [d['ints'], d['uints']]: + _check(f, 'iat', values=True) + + for f in [d['labels'], d['ts'], d['floats']]: + if f is not None: + self.assertRaises(ValueError, _check, f, 'iat') + + # at + for f in [d['ints'], d['uints'], d['labels'], + d['ts'], d['floats']]: + _check(f, 'at') + + def test_at_iat_coercion(self): + + # as timestamp is not a tuple! + dates = date_range('1/1/2000', periods=8) + df = DataFrame(np.random.randn(8, 4), + index=dates, + columns=['A', 'B', 'C', 'D']) + s = df['A'] + + result = s.at[dates[5]] + xp = s.values[5] + self.assertEqual(result, xp) + + # GH 7729 + # make sure we are boxing the returns + s = Series(['2014-01-01', '2014-02-02'], dtype='datetime64[ns]') + expected = Timestamp('2014-02-02') + + for r in [lambda: s.iat[1], lambda: s.iloc[1]]: + result = r() + self.assertEqual(result, expected) + + s = Series(['1 days', '2 days'], dtype='timedelta64[ns]') + expected = Timedelta('2 days') + + for r in [lambda: s.iat[1], lambda: s.iloc[1]]: + result = r() + self.assertEqual(result, expected) + + def test_iat_invalid_args(self): + pass + + def test_imethods_with_dups(self): + + # GH6493 + # iat/iloc with dups + + s = Series(range(5), index=[1, 1, 2, 2, 3], dtype='int64') + result = s.iloc[2] + self.assertEqual(result, 2) + result = s.iat[2] + self.assertEqual(result, 2) + + self.assertRaises(IndexError, lambda: s.iat[10]) + self.assertRaises(IndexError, lambda: s.iat[-10]) + + result = s.iloc[[2, 3]] + expected = Series([2, 3], [2, 2], dtype='int64') + tm.assert_series_equal(result, expected) + + df = s.to_frame() + result = df.iloc[2] + expected = Series(2, index=[0], name=2) + tm.assert_series_equal(result, expected) + + result = df.iat[2, 0] + expected = 2 + self.assertEqual(result, 2) + + def test_at_to_fail(self): + # at should not fallback + # GH 7814 + s = Series([1, 2, 3], index=list('abc')) + result = s.at['a'] + self.assertEqual(result, 1) + self.assertRaises(ValueError, lambda: s.at[0]) + + df = DataFrame({'A': [1, 2, 3]}, index=list('abc')) + result = df.at['a', 'A'] + self.assertEqual(result, 1) + self.assertRaises(ValueError, lambda: df.at['a', 0]) + + s = Series([1, 2, 3], index=[3, 2, 1]) + result = s.at[1] + self.assertEqual(result, 3) + self.assertRaises(ValueError, lambda: s.at['a']) + + df = DataFrame({0: [1, 2, 3]}, index=[3, 2, 1]) + result = df.at[1, 0] + self.assertEqual(result, 3) + self.assertRaises(ValueError, lambda: df.at['a', 0]) + + # GH 13822, incorrect error string with non-unique columns when missing + # column is accessed + df = DataFrame({'x': [1.], 'y': [2.], 'z': [3.]}) + df.columns = ['x', 'x', 'z'] + + # Check that we get the correct value in the KeyError + self.assertRaisesRegexp(KeyError, r"\['y'\] not in index", + lambda: df[['x', 'y', 'z']]) From 4ce9c0c9b9ef0c6665a0d9ead1afbfb05a864252 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 10 Mar 2017 06:25:22 -0500 Subject: [PATCH 184/353] BUG: Incorrect value updating for groupby.cummin/max (#15635) closes #15635 Author: Matt Roeschke Closes #15642 from mroeschke/fix_15635 and squashes the following commits: b92b81a [Matt Roeschke] BUG: Incorrect value updating for groupby.cummin/max (#15635) --- doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/_libs/algos_groupby_helper.pxi.in | 20 ++++++++++---------- pandas/tests/groupby/test_groupby.py | 11 +++++++++++ 3 files changed, 22 insertions(+), 11 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index cf3dddc3a2933..47aa4450b897f 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -716,7 +716,7 @@ Performance Improvements - Increased performance of ``pd.factorize()`` by releasing the GIL with ``object`` dtype when inferred as strings (:issue:`14859`) - Improved performance of timeseries plotting with an irregular DatetimeIndex (or with ``compat_x=True``) (:issue:`15073`). -- Improved performance of ``groupby().cummin()`` and ``groupby().cummax()`` (:issue:`15048`, :issue:`15109`, :issue:`15561`) +- Improved performance of ``groupby().cummin()`` and ``groupby().cummax()`` (:issue:`15048`, :issue:`15109`, :issue:`15561`, :issue:`15635`) - Improved performance and reduced memory when indexing with a ``MultiIndex`` (:issue:`15245`) - When reading buffer object in ``read_sas()`` method without specified format, filepath string is inferred rather than buffer object. (:issue:`14947`) - Improved performance of ``.rank()`` for categorical data (:issue:`15498`) diff --git a/pandas/_libs/algos_groupby_helper.pxi.in b/pandas/_libs/algos_groupby_helper.pxi.in index 9552b4299fe6a..e2c263f49b110 100644 --- a/pandas/_libs/algos_groupby_helper.pxi.in +++ b/pandas/_libs/algos_groupby_helper.pxi.in @@ -603,7 +603,7 @@ def group_cummin_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, """ cdef: Py_ssize_t i, j, N, K, size - {{dest_type2}} val, min_val = 0 + {{dest_type2}} val, mval ndarray[{{dest_type2}}, ndim=2] accum int64_t lab @@ -628,10 +628,10 @@ def group_cummin_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, {{else}} if val == val: {{endif}} - if val < accum[lab, j]: - min_val = val - accum[lab, j] = min_val - out[i, j] = accum[lab, j] + mval = accum[lab, j] + if val < mval: + accum[lab, j] = mval = val + out[i, j] = mval @cython.boundscheck(False) @@ -645,7 +645,7 @@ def group_cummax_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, """ cdef: Py_ssize_t i, j, N, K, size - {{dest_type2}} val, max_val = 0 + {{dest_type2}} val, mval ndarray[{{dest_type2}}, ndim=2] accum int64_t lab @@ -669,10 +669,10 @@ def group_cummax_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, {{else}} if val == val: {{endif}} - if val > accum[lab, j]: - max_val = val - accum[lab, j] = max_val - out[i, j] = accum[lab, j] + mval = accum[lab, j] + if val > mval: + accum[lab, j] = mval = val + out[i, j] = mval {{endfor}} diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index e846963732883..d7fa3beda0abf 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -4303,6 +4303,17 @@ def test_cummin_cummax(self): result = getattr(df.groupby('a')['b'], method)() tm.assert_series_equal(expected, result) + # GH 15635 + df = pd.DataFrame(dict(a=[1, 2, 1], b=[2, 1, 1])) + result = df.groupby('a').b.cummax() + expected = pd.Series([2, 1, 2], name='b') + tm.assert_series_equal(result, expected) + + df = pd.DataFrame(dict(a=[1, 2, 1], b=[1, 2, 2])) + result = df.groupby('a').b.cummin() + expected = pd.Series([1, 2, 1], name='b') + tm.assert_series_equal(result, expected) + def _check_groupby(df, result, keys, field, f=lambda x: x.sum()): tups = lmap(tuple, df[keys].values) From a37c610c3c0759a0b587e6776df8ab8a55f6a266 Mon Sep 17 00:00:00 2001 From: linebp Date: Fri, 10 Mar 2017 13:07:10 +0100 Subject: [PATCH 185/353] DOC GH15643 Removed pytest-xdist from requirements_dev.txt file (#15646) --- ci/requirements_dev.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/ci/requirements_dev.txt b/ci/requirements_dev.txt index b0a8adc8df5cb..1e051802ec9f8 100644 --- a/ci/requirements_dev.txt +++ b/ci/requirements_dev.txt @@ -4,5 +4,4 @@ numpy cython pytest pytest-cov -pytest-xdist flake8 From 94c6c0ce46ceae8ceb3a81f17014e6551902e653 Mon Sep 17 00:00:00 2001 From: JennaVergeynst Date: Fri, 10 Mar 2017 14:44:26 +0100 Subject: [PATCH 186/353] DOC: add examples to DataFrame.dropna (#15620) --- pandas/core/frame.py | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 2062f301b9e0e..987eb10101f12 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3084,6 +3084,50 @@ def dropna(self, axis=0, how='any', thresh=None, subset=None, Returns ------- dropped : DataFrame + + Examples + -------- + >>> df = pd.DataFrame([[np.nan, 2, np.nan, 0], [3, 4, np.nan, 1], + ... [np.nan, np.nan, np.nan, 5]], + ... columns=list('ABCD')) + >>> df + A B C D + 0 NaN 2.0 NaN 0 + 1 3.0 4.0 NaN 1 + 2 NaN NaN NaN 5 + + Drop the columns where all elements are nan: + + >>> df.dropna(axis=1, how='all') + A B D + 0 NaN 2.0 0 + 1 3.0 4.0 1 + 2 NaN NaN 5 + + Drop the columns where any of the elements is nan + + >>> df.dropna(axis=1, how='any') + D + 0 0 + 1 1 + 2 5 + + Drop the rows where all of the elements are nan + (there is no row to drop, so df stays the same): + + >>> df.dropna(axis=0, how='all') + A B C D + 0 NaN 2.0 NaN 0 + 1 3.0 4.0 NaN 1 + 2 NaN NaN NaN 5 + + Keep only the rows with at least 2 non-na values: + + >>> df.dropna(thresh=2) + A B C D + 0 NaN 2.0 NaN 0 + 1 3.0 4.0 NaN 1 + """ inplace = validate_bool_kwarg(inplace, 'inplace') if isinstance(axis, (tuple, list)): From 67d529a00066c6f6278d9971048c69a22febe0cc Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 10 Mar 2017 09:24:46 -0500 Subject: [PATCH 187/353] DOC: doc warnings Author: Jeff Reback Closes #15647 from jreback/doc and squashes the following commits: 6afb394 [Jeff Reback] gbq install adjustment 0fd4499 [Jeff Reback] maybe d5ec228 [Jeff Reback] DOC: fixup some doc-links b7ea898 [Jeff Reback] DOC: some deprecation warnings removed --- doc/source/index.rst.template | 1 - doc/source/install.rst | 2 +- doc/source/whatsnew/v0.10.0.txt | 5 ++--- doc/source/whatsnew/v0.10.1.txt | 4 ++-- doc/source/whatsnew/v0.17.0.txt | 2 +- doc/source/whatsnew/v0.18.0.txt | 2 +- doc/source/whatsnew/v0.19.0.txt | 2 +- doc/source/whatsnew/v0.20.0.txt | 6 +++--- pandas/io/gbq.py | 5 +++-- 9 files changed, 14 insertions(+), 15 deletions(-) diff --git a/doc/source/index.rst.template b/doc/source/index.rst.template index 67072ff9fb224..0bfb2b635f53a 100644 --- a/doc/source/index.rst.template +++ b/doc/source/index.rst.template @@ -116,7 +116,6 @@ See the package overview for more detail about what's in the library. whatsnew install contributing - faq overview 10min tutorials diff --git a/doc/source/install.rst b/doc/source/install.rst index fe2a9fa4ba509..578caae605471 100644 --- a/doc/source/install.rst +++ b/doc/source/install.rst @@ -260,7 +260,7 @@ Optional Dependencies `__, or `xclip `__: necessary to use :func:`~pandas.read_clipboard`. Most package managers on Linux distributions will have ``xclip`` and/or ``xsel`` immediately available for installation. -* For Google BigQuery I/O - see :ref:`here `. +* For Google BigQuery I/O - see `here `__ * `Backports.lzma `__: Only for Python 2, for writing to and/or reading from an xz compressed DataFrame in CSV; Python 3 support is built into the standard library. * One of the following combinations of libraries is needed to use the diff --git a/doc/source/whatsnew/v0.10.0.txt b/doc/source/whatsnew/v0.10.0.txt index fed3ba3ce3a84..cf5369466308c 100644 --- a/doc/source/whatsnew/v0.10.0.txt +++ b/doc/source/whatsnew/v0.10.0.txt @@ -303,11 +303,10 @@ Updated PyTables Support store.append('wp',wp) # selecting via A QUERY - store.select('wp', - [ Term('major_axis>20000102'), Term('minor_axis', '=', ['A','B']) ]) + store.select('wp', "major_axis>20000102 and minor_axis=['A','B']") # removing data from tables - store.remove('wp', Term('major_axis>20000103')) + store.remove('wp', "major_axis>20000103") store.select('wp') # deleting a store diff --git a/doc/source/whatsnew/v0.10.1.txt b/doc/source/whatsnew/v0.10.1.txt index edc628fe85027..d5880e44e46c6 100644 --- a/doc/source/whatsnew/v0.10.1.txt +++ b/doc/source/whatsnew/v0.10.1.txt @@ -58,7 +58,7 @@ perform queries on a table, by passing a list to ``data_columns`` # on-disk operations store.append('df', df, data_columns = ['B','C','string','string2']) - store.select('df',[ 'B > 0', 'string == foo' ]) + store.select('df', "B>0 and string=='foo'") # this is in-memory version of this type of selection df[(df.B > 0) & (df.string == 'foo')] @@ -110,7 +110,7 @@ columns, this is equivalent to passing a store.select('mi') # the levels are automatically included as data columns - store.select('mi', Term('foo=bar')) + store.select('mi', "foo='bar'") Multi-table creation via ``append_to_multiple`` and selection via ``select_as_multiple`` can create/select from multiple tables and return a diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index 9cb299593076d..a3bbaf73c01ca 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -329,7 +329,7 @@ has been changed to make this keyword unnecessary - the change is shown below. Google BigQuery Enhancements ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - Added ability to automatically create a table/dataset using the :func:`pandas.io.gbq.to_gbq` function if the destination table/dataset does not exist. (:issue:`8325`, :issue:`11121`). -- Added ability to replace an existing table and schema when calling the :func:`pandas.io.gbq.to_gbq` function via the ``if_exists`` argument. See the :ref:`docs ` for more details (:issue:`8325`). +- Added ability to replace an existing table and schema when calling the :func:`pandas.io.gbq.to_gbq` function via the ``if_exists`` argument. See the `docs `__ for more details (:issue:`8325`). - ``InvalidColumnOrder`` and ``InvalidPageToken`` in the gbq module will raise ``ValueError`` instead of ``IOError``. - The ``generate_bq_schema()`` function is now deprecated and will be removed in a future version (:issue:`11121`) - The gbq module will now support Python 3 (:issue:`11094`). diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt index 893922b719b34..4b27cf706f9b2 100644 --- a/doc/source/whatsnew/v0.18.0.txt +++ b/doc/source/whatsnew/v0.18.0.txt @@ -518,7 +518,7 @@ Other enhancements - Added ``DataFrame.style.format`` for more flexible formatting of cell values (:issue:`11692`) - ``DataFrame.select_dtypes`` now allows the ``np.float16`` typecode (:issue:`11990`) - ``pivot_table()`` now accepts most iterables for the ``values`` parameter (:issue:`12017`) -- Added Google ``BigQuery`` service account authentication support, which enables authentication on remote servers. (:issue:`11881`, :issue:`12572`). For further details see :ref:`here ` +- Added Google ``BigQuery`` service account authentication support, which enables authentication on remote servers. (:issue:`11881`, :issue:`12572`). For further details see `here `__ - ``HDFStore`` is now iterable: ``for k in store`` is equivalent to ``for k in store.keys()`` (:issue:`12221`). - Add missing methods/fields to ``.dt`` for ``Period`` (:issue:`8848`) - The entire codebase has been ``PEP``-ified (:issue:`12096`) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 8e7e95c071ea4..9b003034aa94a 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -377,7 +377,7 @@ For ``MultiIndex``, values are dropped if any level is missing by default. Speci Google BigQuery Enhancements ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -- The :func:`read_gbq` method has gained the ``dialect`` argument to allow users to specify whether to use BigQuery's legacy SQL or BigQuery's standard SQL. See the :ref:`docs ` for more details (:issue:`13615`). +- The :func:`read_gbq` method has gained the ``dialect`` argument to allow users to specify whether to use BigQuery's legacy SQL or BigQuery's standard SQL. See the `docs `__ for more details (:issue:`13615`). - The :func:`~DataFrame.to_gbq` method now allows the DataFrame column order to differ from the destination table schema (:issue:`11359`). .. _whatsnew_0190.errstate: diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 47aa4450b897f..7b24264cd09db 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -203,7 +203,7 @@ New Behavior: df[df.chromosomes != '1'].groupby('chromosomes', sort=False).sum() -.. _whatsnew_0200.enhancements.table_schema +.. _whatsnew_0200.enhancements.table_schema: Table Schema Output ^^^^^^^^^^^^^^^^^^^ @@ -337,7 +337,7 @@ Using ``.iloc``. Here we will get the location of the 'A' column, then use *posi df.iloc[[0, 2], df.columns.get_loc('A')] -.. _whatsnew.api_breaking.io_compat +.. _whatsnew.api_breaking.io_compat: Possible incompat for HDF5 formats for pandas < 0.13.0 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -758,7 +758,7 @@ Bug Fixes - Bug in the display of ``.info()`` where a qualifier (+) would always be displayed with a ``MultiIndex`` that contains only non-strings (:issue:`15245`) -- Bug in ``.asfreq()``, where frequency was not set for empty ``Series` (:issue:`14320`) +- Bug in ``.asfreq()``, where frequency was not set for empty ``Series`` (:issue:`14320`) - Bug in ``pd.read_msgpack()`` in which ``Series`` categoricals were being improperly processed (:issue:`14901`) - Bug in ``Series.ffill()`` with mixed dtypes containing tz-aware datetimes. (:issue:`14956`) diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index 9cfb27a92bfef..b4dc9173f11ba 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -14,8 +14,9 @@ def _try_import(): "the pandas-gbq package is not installed\n" "see the docs: https://pandas-gbq.readthedocs.io\n" "\n" - "you can install via:\n" - "pip install pandas-gbq\n") + "you can install via pip or conda:\n" + "pip install pandas-gbq\n" + "conda install pandas-gbq -c conda-forge\n") return pandas_gbq From 1be66ac975d89be9c5b695ce34a4a18ffed355ec Mon Sep 17 00:00:00 2001 From: Kernc Date: Fri, 10 Mar 2017 09:27:45 -0500 Subject: [PATCH 188/353] ENH: Native conversion from/to scipy.sparse matrix to SparseDataFrame closes #4343 Author: Kernc Closes #15497 from kernc/scipy-sparse and squashes the following commits: a0f2208 [Kernc] DOC: Fix some whatsnew/v0.20.0.txt sphinx warnings e72e594 [Kernc] ENH: Native conversion from/to scipy.sparse matrix to SparseDataFrame --- doc/source/api.rst | 11 ++- doc/source/sparse.rst | 32 +++++++- doc/source/whatsnew/v0.20.0.txt | 29 +++++++- pandas/sparse/array.py | 9 ++- pandas/sparse/frame.py | 107 ++++++++++++++++++++++----- pandas/tests/sparse/common.py | 10 +++ pandas/tests/sparse/test_frame.py | 62 ++++++++++++++++ pandas/tests/types/test_inference.py | 9 +++ pandas/types/common.py | 14 ++++ pandas/util/testing.py | 5 ++ 10 files changed, 266 insertions(+), 22 deletions(-) create mode 100644 pandas/tests/sparse/common.py diff --git a/doc/source/api.rst b/doc/source/api.rst index 7e297a15055a0..f6bf480bebcfc 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -711,8 +711,8 @@ Serialization / IO / Conversion Series.to_string Series.to_clipboard -Sparse methods -~~~~~~~~~~~~~~ +Sparse +~~~~~~ .. autosummary:: :toctree: generated/ @@ -1030,6 +1030,13 @@ Serialization / IO / Conversion DataFrame.to_string DataFrame.to_clipboard +Sparse +~~~~~~ +.. autosummary:: + :toctree: generated/ + + SparseDataFrame.to_coo + .. _api.panel: Panel diff --git a/doc/source/sparse.rst b/doc/source/sparse.rst index 2bc5d3f6dd0f5..b4884cf1c4141 100644 --- a/doc/source/sparse.rst +++ b/doc/source/sparse.rst @@ -186,7 +186,37 @@ the correct dense result. Interaction with scipy.sparse ----------------------------- -Experimental api to transform between sparse pandas and scipy.sparse structures. +SparseDataFrame +~~~~~~~~~~~~~~~ + +.. versionadded:: 0.20.0 + +Pandas supports creating sparse dataframes directly from ``scipy.sparse`` matrices. + +.. ipython:: python + + from scipy.sparse import csr_matrix + + arr = np.random.random(size=(1000, 5)) + arr[arr < .9] = 0 + + sp_arr = csr_matrix(arr) + sp_arr + + sdf = pd.SparseDataFrame(sp_arr) + sdf + +All sparse formats are supported, but matrices that are not in :mod:`COOrdinate ` format will be converted, copying data as needed. +To convert a ``SparseDataFrame`` back to sparse SciPy matrix in COO format, you can use the :meth:`SparseDataFrame.to_coo` method: + +.. ipython:: python + + sdf.to_coo() + +SparseSeries +~~~~~~~~~~~~ + +.. versionadded:: 0.16.0 A :meth:`SparseSeries.to_coo` method is implemented for transforming a ``SparseSeries`` indexed by a ``MultiIndex`` to a ``scipy.sparse.coo_matrix``. diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 7b24264cd09db..3c82e533dd158 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -237,10 +237,37 @@ You must enable this by setting the ``display.html.table_schema`` option to True .. _Table Schema: http://specs.frictionlessdata.io/json-table-schema/ .. _nteract: http://nteract.io/ +.. _whatsnew_0200.enhancements.scipy_sparse: + +SciPy sparse matrix from/to SparseDataFrame +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Pandas now supports creating sparse dataframes directly from ``scipy.sparse.spmatrix`` instances. +See the :ref:`documentation ` for more information. (:issue:`4343`) + +All sparse formats are supported, but matrices that are not in :mod:`COOrdinate ` format will be converted, copying data as needed. + +.. ipython:: python + + from scipy.sparse import csr_matrix + arr = np.random.random(size=(1000, 5)) + arr[arr < .9] = 0 + sp_arr = csr_matrix(arr) + sp_arr + sdf = pd.SparseDataFrame(sp_arr) + sdf + +To convert a ``SparseDataFrame`` back to sparse SciPy matrix in COO format, you can use: + +.. ipython:: python + + sdf.to_coo() + .. _whatsnew_0200.enhancements.other: Other enhancements ^^^^^^^^^^^^^^^^^^ + - Integration with the ``feather-format``, including a new top-level ``pd.read_feather()`` and ``DataFrame.to_feather()`` method, see :ref:`here `. - ``Series.str.replace()`` now accepts a callable, as replacement, which is passed to ``re.sub`` (:issue:`15055`) - ``Series.str.replace()`` now accepts a compiled regular expression as a pattern (:issue:`15446`) @@ -752,7 +779,6 @@ Bug Fixes - Bug in ``Rolling.quantile`` function that caused a segmentation fault when called with a quantile value outside of the range [0, 1] (:issue:`15463`) - Bug in ``pd.cut()`` with a single bin on an all 0s array (:issue:`15428`) - Bug in ``pd.qcut()`` with a single quantile and an array with identical values (:issue:`15431`) -- Bug in ``SparseSeries.reindex`` on single level with list of length 1 (:issue:`15447`) @@ -783,6 +809,7 @@ Bug Fixes - Bug in ``to_sql`` when writing a DataFrame with numeric index names (:issue:`15404`). - Bug in ``Series.iloc`` where a ``Categorical`` object for list-like indexes input was returned, where a ``Series`` was expected. (:issue:`14580`) - Bug in repr-formatting a ``SparseDataFrame`` after a value was set on (a copy of) one of its series (:issue:`15488`) +- Bug in ``SparseSeries.reindex`` on single level with list of length 1 (:issue:`15447`) - Bug in groupby operations with timedelta64 when passing ``numeric_only=False`` (:issue:`5724`) diff --git a/pandas/sparse/array.py b/pandas/sparse/array.py index 762b6d869eae0..5f4c07971d37e 100644 --- a/pandas/sparse/array.py +++ b/pandas/sparse/array.py @@ -20,6 +20,7 @@ is_integer_dtype, is_bool_dtype, is_list_like, + is_string_dtype, is_scalar, is_dtype_equal) from pandas.types.cast import (_possibly_convert_platform, _maybe_promote, _astype_nansafe, _find_common_type) @@ -769,6 +770,12 @@ def make_sparse(arr, kind='block', fill_value=None): if isnull(fill_value): mask = notnull(arr) else: + # For str arrays in NumPy 1.12.0, operator!= below isn't + # element-wise but just returns False if fill_value is not str, + # so cast to object comparison to be safe + if is_string_dtype(arr): + arr = arr.astype(object) + mask = arr != fill_value length = len(arr) @@ -776,7 +783,7 @@ def make_sparse(arr, kind='block', fill_value=None): # the arr is a SparseArray indices = mask.sp_index.indices else: - indices = np.arange(length, dtype=np.int32)[mask] + indices = mask.nonzero()[0].astype(np.int32) index = _make_index(length, indices, kind) sparsified_values = arr[mask] diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py index 61b8434b0ea09..a21f64f524a0a 100644 --- a/pandas/sparse/frame.py +++ b/pandas/sparse/frame.py @@ -11,8 +11,8 @@ import numpy as np from pandas.types.missing import isnull, notnull -from pandas.types.cast import _maybe_upcast -from pandas.types.common import _ensure_platform_int +from pandas.types.cast import _maybe_upcast, _find_common_type +from pandas.types.common import _ensure_platform_int, is_scipy_sparse from pandas.core.common import _try_sort from pandas.compat.numpy import function as nv @@ -25,6 +25,7 @@ create_block_manager_from_arrays) import pandas.core.generic as generic from pandas.sparse.series import SparseSeries, SparseArray +from pandas.sparse.libsparse import BlockIndex, get_blocks from pandas.util.decorators import Appender import pandas.core.ops as ops @@ -39,15 +40,15 @@ class SparseDataFrame(DataFrame): Parameters ---------- - data : same types as can be passed to DataFrame + data : same types as can be passed to DataFrame or scipy.sparse.spmatrix index : array-like, optional column : array-like, optional default_kind : {'block', 'integer'}, default 'block' Default sparse kind for converting Series to SparseSeries. Will not override SparseSeries passed into constructor default_fill_value : float - Default fill_value for converting Series to SparseSeries. Will not - override SparseSeries passed in + Default fill_value for converting Series to SparseSeries + (default: nan). Will not override SparseSeries passed in. """ _constructor_sliced = SparseSeries _subtyp = 'sparse_frame' @@ -84,22 +85,19 @@ def __init__(self, data=None, index=None, columns=None, default_kind=None, self._default_kind = default_kind self._default_fill_value = default_fill_value - if isinstance(data, dict): - mgr = self._init_dict(data, index, columns) - if dtype is not None: - mgr = mgr.astype(dtype) + if is_scipy_sparse(data): + mgr = self._init_spmatrix(data, index, columns, dtype=dtype, + fill_value=default_fill_value) + elif isinstance(data, dict): + mgr = self._init_dict(data, index, columns, dtype=dtype) elif isinstance(data, (np.ndarray, list)): - mgr = self._init_matrix(data, index, columns) - if dtype is not None: - mgr = mgr.astype(dtype) + mgr = self._init_matrix(data, index, columns, dtype=dtype) elif isinstance(data, SparseDataFrame): mgr = self._init_mgr(data._data, dict(index=index, columns=columns), dtype=dtype, copy=copy) elif isinstance(data, DataFrame): - mgr = self._init_dict(data, data.index, data.columns) - if dtype is not None: - mgr = mgr.astype(dtype) + mgr = self._init_dict(data, data.index, data.columns, dtype=dtype) elif isinstance(data, BlockManager): mgr = self._init_mgr(data, axes=dict(index=index, columns=columns), dtype=dtype, copy=copy) @@ -174,7 +172,43 @@ def _init_dict(self, data, index, columns, dtype=None): return to_manager(sdict, columns, index) def _init_matrix(self, data, index, columns, dtype=None): + """ Init self from ndarray or list of lists """ data = _prep_ndarray(data, copy=False) + index, columns = self._prep_index(data, index, columns) + data = dict([(idx, data[:, i]) for i, idx in enumerate(columns)]) + return self._init_dict(data, index, columns, dtype) + + def _init_spmatrix(self, data, index, columns, dtype=None, + fill_value=None): + """ Init self from scipy.sparse matrix """ + index, columns = self._prep_index(data, index, columns) + data = data.tocoo() + N = len(index) + + # Construct a dict of SparseSeries + sdict = {} + values = Series(data.data, index=data.row, copy=False) + for col, rowvals in values.groupby(data.col): + # get_blocks expects int32 row indices in sorted order + rows = rowvals.index.values.astype(np.int32) + rows.sort() + blocs, blens = get_blocks(rows) + + sdict[columns[col]] = SparseSeries( + rowvals.values, index=index, + fill_value=fill_value, + sparse_index=BlockIndex(N, blocs, blens)) + + # Add any columns that were empty and thus not grouped on above + sdict.update({column: SparseSeries(index=index, + fill_value=fill_value, + sparse_index=BlockIndex(N, [], [])) + for column in columns + if column not in sdict}) + + return self._init_dict(sdict, index, columns, dtype) + + def _prep_index(self, data, index, columns): N, K = data.shape if index is None: index = _default_index(N) @@ -187,9 +221,48 @@ def _init_matrix(self, data, index, columns, dtype=None): if len(index) != N: raise ValueError('Index length mismatch: %d vs. %d' % (len(index), N)) + return index, columns - data = dict([(idx, data[:, i]) for i, idx in enumerate(columns)]) - return self._init_dict(data, index, columns, dtype) + def to_coo(self): + """ + Return the contents of the frame as a sparse SciPy COO matrix. + + .. versionadded:: 0.20.0 + + Returns + ------- + coo_matrix : scipy.sparse.spmatrix + If the caller is heterogeneous and contains booleans or objects, + the result will be of dtype=object. See Notes. + + Notes + ----- + The dtype will be the lowest-common-denominator type (implicit + upcasting); that is to say if the dtypes (even of numeric types) + are mixed, the one that accommodates all will be chosen. + + e.g. If the dtypes are float16 and float32, dtype will be upcast to + float32. By numpy.find_common_type convention, mixing int64 and + and uint64 will result in a float64 dtype. + """ + try: + from scipy.sparse import coo_matrix + except ImportError: + raise ImportError('Scipy is not installed') + + dtype = _find_common_type(self.dtypes) + cols, rows, datas = [], [], [] + for col, name in enumerate(self): + s = self[name] + row = s.sp_index.to_int_index().indices + cols.append(np.repeat(col, len(row))) + rows.append(row) + datas.append(s.sp_values.astype(dtype, copy=False)) + + cols = np.concatenate(cols) + rows = np.concatenate(rows) + datas = np.concatenate(datas) + return coo_matrix((datas, (rows, cols)), shape=self.shape) def __array_wrap__(self, result): return self._constructor( diff --git a/pandas/tests/sparse/common.py b/pandas/tests/sparse/common.py new file mode 100644 index 0000000000000..3aeef8d436e1a --- /dev/null +++ b/pandas/tests/sparse/common.py @@ -0,0 +1,10 @@ +import pytest + +import pandas.util.testing as tm + + +@pytest.fixture(params=['bsr', 'coo', 'csc', 'csr', 'dia', 'dok', 'lil']) +def spmatrix(request): + tm._skip_if_no_scipy() + from scipy import sparse + return getattr(sparse, request.param + '_matrix') diff --git a/pandas/tests/sparse/test_frame.py b/pandas/tests/sparse/test_frame.py index a7dd7f2e81033..4cd5a643ce4be 100644 --- a/pandas/tests/sparse/test_frame.py +++ b/pandas/tests/sparse/test_frame.py @@ -2,11 +2,17 @@ import operator +import pytest + from numpy import nan import numpy as np import pandas as pd from pandas import Series, DataFrame, bdate_range, Panel +from pandas.types.common import (is_bool_dtype, + is_float_dtype, + is_object_dtype, + is_float) from pandas.tseries.index import DatetimeIndex from pandas.tseries.offsets import BDay import pandas.util.testing as tm @@ -18,6 +24,8 @@ from pandas.sparse.api import SparseSeries, SparseDataFrame, SparseArray from pandas.tests.frame.test_misc_api import SharedWithSparse +from pandas.tests.sparse.common import spmatrix # noqa: F401 + class TestSparseDataFrame(tm.TestCase, SharedWithSparse): @@ -1118,6 +1126,60 @@ def test_isnotnull(self): tm.assert_frame_equal(res.to_dense(), exp) +@pytest.mark.parametrize('index', [None, list('ab')]) # noqa: F811 +@pytest.mark.parametrize('columns', [None, list('cd')]) +@pytest.mark.parametrize('fill_value', [None, 0, np.nan]) +@pytest.mark.parametrize('dtype', [object, bool, int, float, np.uint16]) +def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype): + # GH 4343 + tm._skip_if_no_scipy() + + # Make one ndarray and from it one sparse matrix, both to be used for + # constructing frames and comparing results + arr = np.eye(2, dtype=dtype) + try: + spm = spmatrix(arr) + assert spm.dtype == arr.dtype + except (TypeError, AssertionError): + # If conversion to sparse fails for this spmatrix type and arr.dtype, + # then the combination is not currently supported in NumPy, so we + # can just skip testing it thoroughly + return + + sdf = pd.SparseDataFrame(spm, index=index, columns=columns, + default_fill_value=fill_value) + + # Expected result construction is kind of tricky for all + # dtype-fill_value combinations; easiest to cast to something generic + # and except later on + rarr = arr.astype(object) + rarr[arr == 0] = np.nan + expected = pd.SparseDataFrame(rarr, index=index, columns=columns).fillna( + fill_value if fill_value is not None else np.nan) + + # Assert frame is as expected + sdf_obj = sdf.astype(object) + tm.assert_sp_frame_equal(sdf_obj, expected) + tm.assert_frame_equal(sdf_obj.to_dense(), expected.to_dense()) + + # Assert spmatrices equal + tm.assert_equal(dict(sdf.to_coo().todok()), dict(spm.todok())) + + # Ensure dtype is preserved if possible + was_upcast = ((fill_value is None or is_float(fill_value)) and + not is_object_dtype(dtype) and + not is_float_dtype(dtype)) + res_dtype = (bool if is_bool_dtype(dtype) else + float if was_upcast else + dtype) + tm.assert_contains_all(sdf.dtypes, {np.dtype(res_dtype)}) + tm.assert_equal(sdf.to_coo().dtype, res_dtype) + + # However, adding a str column results in an upcast to object + sdf['strings'] = np.arange(len(sdf)).astype(str) + tm.assert_equal(sdf.to_coo().dtype, np.object_) + + class TestSparseDataFrameArithmetic(tm.TestCase): def test_numeric_op_scalar(self): diff --git a/pandas/tests/types/test_inference.py b/pandas/tests/types/test_inference.py index a36a77a70f9ad..b41df0da45234 100644 --- a/pandas/tests/types/test_inference.py +++ b/pandas/tests/types/test_inference.py @@ -30,11 +30,14 @@ is_float, is_bool, is_scalar, + is_scipy_sparse, _ensure_int32, _ensure_categorical) from pandas.types.missing import isnull from pandas.util import testing as tm +from pandas.tests.sparse.test_frame import spmatrix # noqa: F401 + def test_is_sequence(): is_seq = inference.is_sequence @@ -946,6 +949,12 @@ def test_nan_to_nat_conversions(): assert (s[8].value == np.datetime64('NaT').astype(np.int64)) +def test_is_scipy_sparse(spmatrix): # noqa: F811 + tm._skip_if_no_scipy() + assert is_scipy_sparse(spmatrix([[0, 1]])) + assert not is_scipy_sparse(np.array([1])) + + def test_ensure_int32(): values = np.arange(10, dtype=np.int32) result = _ensure_int32(values) diff --git a/pandas/types/common.py b/pandas/types/common.py index 1be5b5f6f1368..a1f03e59a5e6e 100644 --- a/pandas/types/common.py +++ b/pandas/types/common.py @@ -23,6 +23,9 @@ _TD_DTYPE = np.dtype('m8[ns]') _INT64_DTYPE = np.dtype(np.int64) +# oh the troubles to reduce import time +_is_scipy_sparse = None + _ensure_float64 = algos.ensure_float64 _ensure_float32 = algos.ensure_float32 @@ -59,6 +62,17 @@ def is_sparse(array): return isinstance(array, (ABCSparseArray, ABCSparseSeries)) +def is_scipy_sparse(array): + """ return if we are a scipy.sparse.spmatrix """ + global _is_scipy_sparse + if _is_scipy_sparse is None: + try: + from scipy.sparse import issparse as _is_scipy_sparse + except ImportError: + _is_scipy_sparse = lambda _: False + return _is_scipy_sparse(array) + + def is_categorical(array): """ return if we are a categorical possibility """ return isinstance(array, ABCCategorical) or is_categorical_dtype(array) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index b68bf55a347b2..ec30a9376a9da 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -297,6 +297,11 @@ def _skip_if_no_scipy(): except ImportError: import pytest pytest.skip('scipy.interpolate missing') + try: + import scipy.sparse # noqa + except ImportError: + import pytest + pytest.skip('scipy.sparse missing') def _skip_if_scipy_0_17(): From 15e8e9a53d036f8e436ae5ad4eff66fc48f67d30 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Fri, 10 Mar 2017 09:38:25 -0500 Subject: [PATCH 189/353] BUG: Error when specifying int index containing NaN xref #15187. Author: gfyoung Closes #15616 from gfyoung/nan-int-index and squashes the following commits: 195b830 [gfyoung] BUG: Error when specifying int index containing NaN --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/indexes/base.py | 27 +++++++++++++++++++++++---- pandas/tests/indexes/test_base.py | 17 +++++++++++++++++ pandas/tests/indexes/test_numeric.py | 27 ++++++++++++++++++++++++++- pandas/tests/indexes/test_range.py | 28 +++++++++++++++++++++++++++- 5 files changed, 94 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 3c82e533dd158..dd081ea605c01 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -792,6 +792,7 @@ Bug Fixes - Bug in ``DataFrame.isin`` comparing datetimelike to empty frame (:issue:`15473`) - Bug in ``Series.where()`` and ``DataFrame.where()`` where array-like conditionals were being rejected (:issue:`15414`) +- Bug in ``Index`` construction with ``NaN`` elements and integer dtype specified (:issue:`15187`) - Bug in ``Series`` construction with a datetimetz (:issue:`14928`) - Bug in output formatting of a ``MultiIndex`` when names are integers (:issue:`12223`, :issue:`15262`) diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index 607a463083fdd..7f46f437489a1 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -203,6 +203,9 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, if inferred == 'integer': data = np.array(data, copy=copy, dtype=dtype) elif inferred in ['floating', 'mixed-integer-float']: + if isnull(data).any(): + raise ValueError('cannot convert float ' + 'NaN to integer') # If we are actually all equal to integers, # then coerce to integer. @@ -230,8 +233,10 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, else: data = np.array(data, dtype=dtype, copy=copy) - except (TypeError, ValueError): - pass + except (TypeError, ValueError) as e: + msg = str(e) + if 'cannot convert float' in msg: + raise # maybe coerce to a sub-class from pandas.tseries.period import (PeriodIndex, @@ -585,7 +590,14 @@ def where(self, cond, other=None): if other is None: other = self._na_value values = np.where(cond, self.values, other) - return self._shallow_copy_with_infer(values, dtype=self.dtype) + + dtype = self.dtype + if self._is_numeric_dtype and np.any(isnull(values)): + # We can't coerce to the numeric dtype of "self" (unless + # it's float) if there are NaN values in our output. + dtype = None + + return self._shallow_copy_with_infer(values, dtype=dtype) def ravel(self, order='C'): """ @@ -689,7 +701,14 @@ def _coerce_scalar_to_index(self, item): ---------- item : scalar item to coerce """ - return Index([item], dtype=self.dtype, **self._get_attributes_dict()) + dtype = self.dtype + + if self._is_numeric_dtype and isnull(item): + # We can't coerce to the numeric dtype of "self" (unless + # it's float) if there are NaN values in our output. + dtype = None + + return Index([item], dtype=dtype, **self._get_attributes_dict()) _index_shared_docs['copy'] = """ Make a copy of this object. Name and dtype sets those attributes on diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 8c0a399cb58b3..05d3478ab0705 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -199,6 +199,23 @@ def __array__(self, dtype=None): result = pd.Index(ArrayLike(array)) self.assert_index_equal(result, expected) + def test_constructor_int_dtype_nan(self): + # see gh-15187 + data = [np.nan] + msg = "cannot convert" + + with tm.assertRaisesRegexp(ValueError, msg): + Index(data, dtype='int64') + + with tm.assertRaisesRegexp(ValueError, msg): + Index(data, dtype='uint64') + + # This, however, should not break + # because NaN is float. + expected = Float64Index(data) + result = Index(data, dtype='float') + tm.assert_index_equal(result, expected) + def test_index_ctor_infer_nan_nat(self): # GH 13467 exp = pd.Float64Index([np.nan, np.nan]) diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index e23e7c19ed799..d0ce34169f79e 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -5,7 +5,7 @@ import numpy as np -from pandas import (date_range, Series, Index, Float64Index, +from pandas import (date_range, notnull, Series, Index, Float64Index, Int64Index, UInt64Index, RangeIndex) import pandas.util.testing as tm @@ -686,6 +686,31 @@ def test_coerce_list(self): arr = Index([1, 2, 3, 4], dtype=object) tm.assertIsInstance(arr, Index) + def test_where(self): + i = self.create_index() + result = i.where(notnull(i)) + expected = i + tm.assert_index_equal(result, expected) + + _nan = i._na_value + cond = [False] + [True] * len(i[1:]) + expected = pd.Index([_nan] + i[1:].tolist()) + + result = i.where(cond) + tm.assert_index_equal(result, expected) + + def test_where_array_like(self): + i = self.create_index() + + _nan = i._na_value + cond = [False] + [True] * (len(i) - 1) + klasses = [list, tuple, np.array, pd.Series] + expected = pd.Index([_nan] + i[1:].tolist()) + + for klass in klasses: + result = i.where(klass(cond)) + tm.assert_index_equal(result, expected) + def test_get_indexer(self): target = Int64Index(np.arange(10)) indexer = self.index.get_indexer(target) diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index 38e715fce2720..53c88897d6764 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -8,7 +8,8 @@ import numpy as np -from pandas import (Series, Index, Float64Index, Int64Index, RangeIndex) +from pandas import (notnull, Series, Index, Float64Index, + Int64Index, RangeIndex) from pandas.util.testing import assertRaisesRegexp import pandas.util.testing as tm @@ -915,3 +916,28 @@ def test_len_specialised(self): i = RangeIndex(0, 5, step) self.assertEqual(len(i), 0) + + def test_where(self): + i = self.create_index() + result = i.where(notnull(i)) + expected = i + tm.assert_index_equal(result, expected) + + _nan = i._na_value + cond = [False] + [True] * len(i[1:]) + expected = pd.Index([_nan] + i[1:].tolist()) + + result = i.where(cond) + tm.assert_index_equal(result, expected) + + def test_where_array_like(self): + i = self.create_index() + + _nan = i._na_value + cond = [False] + [True] * (len(i) - 1) + klasses = [list, tuple, np.array, pd.Series] + expected = pd.Index([_nan] + i[1:].tolist()) + + for klass in klasses: + result = i.where(klass(cond)) + tm.assert_index_equal(result, expected) From 5dee1f18ac2a06d38e4bb3800eee11424ec25ca1 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Fri, 10 Mar 2017 16:25:29 -0500 Subject: [PATCH 190/353] API: Drop DataFrame.iterkv() Deprecated since 0.17.0 xref #10711 Author: gfyoung Closes #15650 from gfyoung/df-iterkv-remove and squashes the following commits: e40fc9e [gfyoung] API: Drop DataFrame.iterkv() --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/generic.py | 10 ---------- pandas/tests/frame/test_misc_api.py | 6 +----- 3 files changed, 2 insertions(+), 15 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index dd081ea605c01..f42dfb80924e0 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -692,6 +692,7 @@ Other API Changes - Reorganization of timeseries development tests (:issue:`14854`) - Specific support for ``copy.copy()`` and ``copy.deepcopy()`` functions on NDFrame objects (:issue:`15444`) - ``Series.sort_values()`` accepts a one element list of bool for consistency with the behavior of ``DataFrame.sort_values()`` (:issue:`15604`) +- ``DataFrame.iterkv()`` has been removed in favor of ``DataFrame.iteritems()`` (:issue:`10711`) .. _whatsnew_0200.deprecations: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a0111cb9ef7ec..1db9677659ca3 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -899,16 +899,6 @@ def iteritems(self): for h in self._info_axis: yield h, self[h] - # originally used to get around 2to3's changes to iteritems. - # Now unnecessary. Sidenote: don't want to deprecate this for a while, - # otherwise libraries that use 2to3 will have issues. - def iterkv(self, *args, **kwargs): - "iteritems alias used to get around 2to3. Deprecated" - warnings.warn("iterkv is deprecated and will be removed in a future " - "release, use ``iteritems`` instead.", FutureWarning, - stacklevel=2) - return self.iteritems(*args, **kwargs) - def __len__(self): """Returns length of info axis""" return len(self._info_axis) diff --git a/pandas/tests/frame/test_misc_api.py b/pandas/tests/frame/test_misc_api.py index 674202980807a..321d46739b24c 100644 --- a/pandas/tests/frame/test_misc_api.py +++ b/pandas/tests/frame/test_misc_api.py @@ -389,11 +389,7 @@ def test_repr_with_mi_nat(self): exp = ' X\nNaT a 1\n2013-01-01 b 2' self.assertEqual(res, exp) - def test_iterkv_deprecation(self): - with tm.assert_produces_warning(FutureWarning): - self.mixed_float.iterkv() - - def test_iterkv_names(self): + def test_iteritems_names(self): for k, v in compat.iteritems(self.mixed_frame): self.assertEqual(v.name, k) From 026e748e4ff558de80c92c04986a78754b430902 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 10 Mar 2017 18:04:41 -0500 Subject: [PATCH 191/353] BUG/API: .merge() and .join() on category dtype columns will now preserve category dtype closes #10409 Author: Jeff Reback Closes #15321 from jreback/merge_cat and squashes the following commits: 3671dad [Jeff Reback] DOC: merge docs a4b2ee6 [Jeff Reback] BUG/API: .merge() and .join() on category dtype columns will now preserve the category dtype when possible --- asv_bench/benchmarks/join_merge.py | 36 +++++- doc/source/categorical.rst | 3 + doc/source/merging.rst | 73 +++++++++++ doc/source/whatsnew/v0.20.0.txt | 4 +- pandas/core/internals.py | 2 + pandas/tests/test_categorical.py | 3 + pandas/tests/tools/test_merge.py | 177 +++++++++++++++++++++----- pandas/tests/tools/test_merge_asof.py | 1 + pandas/tests/types/test_common.py | 50 ++++++-- pandas/tools/merge.py | 86 ++++++++++--- 10 files changed, 364 insertions(+), 71 deletions(-) diff --git a/asv_bench/benchmarks/join_merge.py b/asv_bench/benchmarks/join_merge.py index d9c631fa92efd..776316343e009 100644 --- a/asv_bench/benchmarks/join_merge.py +++ b/asv_bench/benchmarks/join_merge.py @@ -6,7 +6,7 @@ from pandas import ordered_merge as merge_ordered -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # Append class Append(object): @@ -35,7 +35,7 @@ def time_append_mixed(self): self.mdf1.append(self.mdf2) -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # Concat class Concat(object): @@ -120,7 +120,7 @@ def time_f_ordered_axis1(self): concat(self.frames_f, axis=1, ignore_index=True) -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # Joins class Join(object): @@ -202,7 +202,7 @@ def time_join_non_unique_equal(self): (self.fracofday * self.temp[self.fracofday.index]) -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # Merges class Merge(object): @@ -257,7 +257,31 @@ def time_i8merge(self): merge(self.left, self.right, how='outer') -#---------------------------------------------------------------------- +class MergeCategoricals(object): + goal_time = 0.2 + + def setup(self): + self.left_object = pd.DataFrame( + {'X': np.random.choice(range(0, 10), size=(10000,)), + 'Y': np.random.choice(['one', 'two', 'three'], size=(10000,))}) + + self.right_object = pd.DataFrame( + {'X': np.random.choice(range(0, 10), size=(10000,)), + 'Z': np.random.choice(['jjj', 'kkk', 'sss'], size=(10000,))}) + + self.left_cat = self.left_object.assign( + Y=self.left_object['Y'].astype('category')) + self.right_cat = self.right_object.assign( + Z=self.right_object['Z'].astype('category')) + + def time_merge_object(self): + merge(self.left_object, self.right_object, on='X') + + def time_merge_cat(self): + merge(self.left_cat, self.right_cat, on='X') + + +# ---------------------------------------------------------------------- # Ordered merge class MergeOrdered(object): @@ -332,7 +356,7 @@ def time_multiby(self): merge_asof(self.df1e, self.df2e, on='time', by=['key', 'key2']) -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # data alignment class Align(object): diff --git a/doc/source/categorical.rst b/doc/source/categorical.rst index db974922e1d76..6d85e1a6560b0 100644 --- a/doc/source/categorical.rst +++ b/doc/source/categorical.rst @@ -646,6 +646,9 @@ In this case the categories are not the same and so an error is raised: The same applies to ``df.append(df_different)``. +See also the section on :ref:`merge dtypes` for notes about preserving merge dtypes and performance. + + .. _categorical.union: Unioning diff --git a/doc/source/merging.rst b/doc/source/merging.rst index f732f0a4cc749..70d2ce5b1a664 100644 --- a/doc/source/merging.rst +++ b/doc/source/merging.rst @@ -746,6 +746,79 @@ The ``indicator`` argument will also accept string arguments, in which case the pd.merge(df1, df2, on='col1', how='outer', indicator='indicator_column') +.. _merging.dtypes: + +Merge Dtypes +~~~~~~~~~~~~ + +.. versionadded:: 0.19.0 + +Merging will preserve the dtype of the join keys. + +.. ipython:: python + + left = pd.DataFrame({'key': [1], 'v1': [10]}) + left + right = pd.DataFrame({'key': [1, 2], 'v1': [20, 30]}) + right + +We are able to preserve the join keys + +.. ipython:: python + + pd.merge(left, right, how='outer') + pd.merge(left, right, how='outer').dtypes + +Of course if you have missing values that are introduced, then the +resulting dtype will be upcast. + +.. ipython:: python + + pd.merge(left, right, how='outer', on='key') + pd.merge(left, right, how='outer', on='key').dtypes + +.. versionadded:: 0.20.0 + +Merging will preserve ``category`` dtypes of the mergands. + +The left frame. + +.. ipython:: python + + X = pd.Series(np.random.choice(['foo', 'bar'], size=(10,))) + X = X.astype('category', categories=['foo', 'bar']) + + left = DataFrame({'X': X, + 'Y': np.random.choice(['one', 'two', 'three'], size=(10,))}) + left + left.dtypes + +The right frame. + +.. ipython:: python + + right = DataFrame({'X': Series(['foo', 'bar']).astype('category', categories=['foo', 'bar']), + 'Z': [1, 2]}) + right + right.dtypes + +The merged result + +.. ipython:: python + + result = pd.merge(left, right, how='outer') + result + result.dtypes + +.. note:: + + The category dtypes must be *exactly* the same, meaning the same categories and the ordered attribute. + Otherwise the result will coerce to ``object`` dtype. + +.. note:: + + Merging on ``category`` dtypes that are the same can be quite performant compared to ``object`` dtype merging. + .. _merging.join.index: Joining on index diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index f42dfb80924e0..e392023423eb0 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -692,7 +692,7 @@ Other API Changes - Reorganization of timeseries development tests (:issue:`14854`) - Specific support for ``copy.copy()`` and ``copy.deepcopy()`` functions on NDFrame objects (:issue:`15444`) - ``Series.sort_values()`` accepts a one element list of bool for consistency with the behavior of ``DataFrame.sort_values()`` (:issue:`15604`) -- ``DataFrame.iterkv()`` has been removed in favor of ``DataFrame.iteritems()`` (:issue:`10711`) +- ``.merge()`` and ``.join()`` on ``category`` dtype columns will now preserve the category dtype when possible (:issue:`10409`) .. _whatsnew_0200.deprecations: @@ -733,6 +733,7 @@ Removal of prior version deprecations/changes - ``Series.is_time_series`` is dropped in favor of ``Series.index.is_all_dates`` (:issue:`15098`) - The deprecated ``irow``, ``icol``, ``iget`` and ``iget_value`` methods are removed in favor of ``iloc`` and ``iat`` as explained :ref:`here ` (:issue:`10711`). +- The deprecated ``DataFrame.iterkv()`` has been removed in favor of ``DataFrame.iteritems()`` (:issue:`10711`) .. _whatsnew_0200.performance: @@ -749,6 +750,7 @@ Performance Improvements - When reading buffer object in ``read_sas()`` method without specified format, filepath string is inferred rather than buffer object. (:issue:`14947`) - Improved performance of ``.rank()`` for categorical data (:issue:`15498`) - Improved performance when using ``.unstack()`` (:issue:`15503`) +- Improved performance of merge/join on ``category`` columns (:issue:`10409`) .. _whatsnew_0200.bug_fixes: diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 4b43574f49820..aa954fbee9a60 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -5227,6 +5227,8 @@ def get_reindexed_values(self, empty_dtype, upcasted_na): # External code requested filling/upcasting, bool values must # be upcasted to object to avoid being upcasted to numeric. values = self.block.astype(np.object_).values + elif self.block.is_categorical: + values = self.block.values else: # No dtype upcasting is done here, it will be performed during # concatenation itself. diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index cc99cf0f830aa..2d5e98d49e152 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -4097,9 +4097,12 @@ def test_merge(self): expected = df.copy() # object-cat + # note that we propogate the category + # because we don't have any matching rows cright = right.copy() cright['d'] = cright['d'].astype('category') result = pd.merge(left, cright, how='left', left_on='b', right_on='c') + expected['d'] = expected['d'].astype('category', categories=['null']) tm.assert_frame_equal(result, expected) # cat-object diff --git a/pandas/tests/tools/test_merge.py b/pandas/tests/tools/test_merge.py index b3b5e7e29319b..ff27500355998 100644 --- a/pandas/tests/tools/test_merge.py +++ b/pandas/tests/tools/test_merge.py @@ -1,5 +1,6 @@ # pylint: disable=E1103 +import pytest from datetime import datetime from numpy.random import randn from numpy import nan @@ -11,6 +12,8 @@ from pandas.tools.concat import concat from pandas.tools.merge import merge, MergeError from pandas.util.testing import assert_frame_equal, assert_series_equal +from pandas.types.dtypes import CategoricalDtype +from pandas.types.common import is_categorical_dtype, is_object_dtype from pandas import DataFrame, Index, MultiIndex, Series, Categorical import pandas.util.testing as tm @@ -1024,38 +1027,6 @@ def test_left_join_index_multi_match(self): expected.index = np.arange(len(expected)) tm.assert_frame_equal(result, expected) - def test_join_multi_dtypes(self): - - # test with multi dtypes in the join index - def _test(dtype1, dtype2): - left = DataFrame({'k1': np.array([0, 1, 2] * 8, dtype=dtype1), - 'k2': ['foo', 'bar'] * 12, - 'v': np.array(np.arange(24), dtype=np.int64)}) - - index = MultiIndex.from_tuples([(2, 'bar'), (1, 'foo')]) - right = DataFrame( - {'v2': np.array([5, 7], dtype=dtype2)}, index=index) - - result = left.join(right, on=['k1', 'k2']) - - expected = left.copy() - - if dtype2.kind == 'i': - dtype2 = np.dtype('float64') - expected['v2'] = np.array(np.nan, dtype=dtype2) - expected.loc[(expected.k1 == 2) & (expected.k2 == 'bar'), 'v2'] = 5 - expected.loc[(expected.k1 == 1) & (expected.k2 == 'foo'), 'v2'] = 7 - - tm.assert_frame_equal(result, expected) - - result = left.join(right, on=['k1', 'k2'], sort=True) - expected.sort_values(['k1', 'k2'], kind='mergesort', inplace=True) - tm.assert_frame_equal(result, expected) - - for d1 in [np.int64, np.int32, np.int16, np.int8, np.uint8]: - for d2 in [np.int64, np.float64, np.float32, np.float16]: - _test(np.dtype(d1), np.dtype(d2)) - def test_left_merge_na_buglet(self): left = DataFrame({'id': list('abcde'), 'v1': randn(5), 'v2': randn(5), 'dummy': list('abcde'), @@ -1242,3 +1213,145 @@ def f(): def f(): household.join(log_return, how='outer') self.assertRaises(NotImplementedError, f) + + +@pytest.fixture +def df(): + return DataFrame( + {'A': ['foo', 'bar'], + 'B': Series(['foo', 'bar']).astype('category'), + 'C': [1, 2], + 'D': [1.0, 2.0], + 'E': Series([1, 2], dtype='uint64'), + 'F': Series([1, 2], dtype='int32')}) + + +class TestMergeDtypes(object): + + def test_different(self, df): + + # we expect differences by kind + # to be ok, while other differences should return object + + left = df + for col in df.columns: + right = DataFrame({'A': df[col]}) + result = pd.merge(left, right, on='A') + assert is_object_dtype(result.A.dtype) + + @pytest.mark.parametrize('d1', [np.int64, np.int32, + np.int16, np.int8, np.uint8]) + @pytest.mark.parametrize('d2', [np.int64, np.float64, + np.float32, np.float16]) + def test_join_multi_dtypes(self, d1, d2): + + dtype1 = np.dtype(d1) + dtype2 = np.dtype(d2) + + left = DataFrame({'k1': np.array([0, 1, 2] * 8, dtype=dtype1), + 'k2': ['foo', 'bar'] * 12, + 'v': np.array(np.arange(24), dtype=np.int64)}) + + index = MultiIndex.from_tuples([(2, 'bar'), (1, 'foo')]) + right = DataFrame({'v2': np.array([5, 7], dtype=dtype2)}, index=index) + + result = left.join(right, on=['k1', 'k2']) + + expected = left.copy() + + if dtype2.kind == 'i': + dtype2 = np.dtype('float64') + expected['v2'] = np.array(np.nan, dtype=dtype2) + expected.loc[(expected.k1 == 2) & (expected.k2 == 'bar'), 'v2'] = 5 + expected.loc[(expected.k1 == 1) & (expected.k2 == 'foo'), 'v2'] = 7 + + tm.assert_frame_equal(result, expected) + + result = left.join(right, on=['k1', 'k2'], sort=True) + expected.sort_values(['k1', 'k2'], kind='mergesort', inplace=True) + tm.assert_frame_equal(result, expected) + + +@pytest.fixture +def left(): + np.random.seed(1234) + return DataFrame( + {'X': Series(np.random.choice( + ['foo', 'bar'], + size=(10,))).astype('category', categories=['foo', 'bar']), + 'Y': np.random.choice(['one', 'two', 'three'], size=(10,))}) + + +@pytest.fixture +def right(): + np.random.seed(1234) + return DataFrame( + {'X': Series(['foo', 'bar']).astype('category', + categories=['foo', 'bar']), + 'Z': [1, 2]}) + + +class TestMergeCategorical(object): + + def test_identical(self, left): + # merging on the same, should preserve dtypes + merged = pd.merge(left, left, on='X') + result = merged.dtypes.sort_index() + expected = Series([CategoricalDtype(), + np.dtype('O'), + np.dtype('O')], + index=['X', 'Y_x', 'Y_y']) + assert_series_equal(result, expected) + + def test_basic(self, left, right): + # we have matching Categorical dtypes in X + # so should preserve the merged column + merged = pd.merge(left, right, on='X') + result = merged.dtypes.sort_index() + expected = Series([CategoricalDtype(), + np.dtype('O'), + np.dtype('int64')], + index=['X', 'Y', 'Z']) + assert_series_equal(result, expected) + + def test_other_columns(self, left, right): + # non-merge columns should preserve if possible + right = right.assign(Z=right.Z.astype('category')) + + merged = pd.merge(left, right, on='X') + result = merged.dtypes.sort_index() + expected = Series([CategoricalDtype(), + np.dtype('O'), + CategoricalDtype()], + index=['X', 'Y', 'Z']) + assert_series_equal(result, expected) + + # categories are preserved + assert left.X.values.is_dtype_equal(merged.X.values) + assert right.Z.values.is_dtype_equal(merged.Z.values) + + @pytest.mark.parametrize( + 'change', [lambda x: x, + lambda x: x.astype('category', + categories=['bar', 'foo']), + lambda x: x.astype('category', + categories=['foo', 'bar', 'bah']), + lambda x: x.astype('category', ordered=True)]) + @pytest.mark.parametrize('how', ['inner', 'outer', 'left', 'right']) + def test_dtype_on_merged_different(self, change, how, left, right): + # our merging columns, X now has 2 different dtypes + # so we must be object as a result + + X = change(right.X.astype('object')) + right = right.assign(X=X) + assert is_categorical_dtype(left.X.values) + assert not left.X.values.is_dtype_equal(right.X.values) + + merged = pd.merge(left, right, on='X', how=how) + + result = merged.dtypes.sort_index() + expected = Series([np.dtype('O'), + np.dtype('O'), + np.dtype('int64')], + index=['X', 'Y', 'Z']) + assert_series_equal(result, expected) diff --git a/pandas/tests/tools/test_merge_asof.py b/pandas/tests/tools/test_merge_asof.py index 76798b3c895ea..cdff8f0349c15 100644 --- a/pandas/tests/tools/test_merge_asof.py +++ b/pandas/tests/tools/test_merge_asof.py @@ -147,6 +147,7 @@ def test_basic_categorical(self): trades.ticker = trades.ticker.astype('category') quotes = self.quotes.copy() quotes.ticker = quotes.ticker.astype('category') + expected.ticker = expected.ticker.astype('category') result = merge_asof(trades, quotes, on='time', diff --git a/pandas/tests/types/test_common.py b/pandas/tests/types/test_common.py index 4667bbd47ad18..c15f219c8fad6 100644 --- a/pandas/tests/types/test_common.py +++ b/pandas/tests/types/test_common.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- +import pytest import numpy as np from pandas.types.dtypes import DatetimeTZDtype, PeriodDtype, CategoricalDtype @@ -38,17 +39,44 @@ def test_period_dtype(self): self.assertEqual(pandas_dtype(dtype), dtype) -def test_dtype_equal(): - assert is_dtype_equal(np.int64, np.int64) - assert not is_dtype_equal(np.int64, np.float64) +dtypes = dict(datetime_tz=pandas_dtype('datetime64[ns, US/Eastern]'), + datetime=pandas_dtype('datetime64[ns]'), + timedelta=pandas_dtype('timedelta64[ns]'), + period=PeriodDtype('D'), + integer=np.dtype(np.int64), + float=np.dtype(np.float64), + object=np.dtype(np.object), + category=pandas_dtype('category')) - p1 = PeriodDtype('D') - p2 = PeriodDtype('D') - assert is_dtype_equal(p1, p2) - assert not is_dtype_equal(np.int64, p1) - p3 = PeriodDtype('2D') - assert not is_dtype_equal(p1, p3) +@pytest.mark.parametrize('name1,dtype1', + list(dtypes.items()), + ids=lambda x: str(x)) +@pytest.mark.parametrize('name2,dtype2', + list(dtypes.items()), + ids=lambda x: str(x)) +def test_dtype_equal(name1, dtype1, name2, dtype2): - assert not DatetimeTZDtype.is_dtype(np.int64) - assert not PeriodDtype.is_dtype(np.int64) + # match equal to self, but not equal to other + assert is_dtype_equal(dtype1, dtype1) + if name1 != name2: + assert not is_dtype_equal(dtype1, dtype2) + + +def test_dtype_equal_strict(): + + # we are strict on kind equality + for dtype in [np.int8, np.int16, np.int32]: + assert not is_dtype_equal(np.int64, dtype) + + for dtype in [np.float32]: + assert not is_dtype_equal(np.float64, dtype) + + # strict w.r.t. PeriodDtype + assert not is_dtype_equal(PeriodDtype('D'), + PeriodDtype('2D')) + + # strict w.r.t. datetime64 + assert not is_dtype_equal( + pandas_dtype('datetime64[ns, US/Eastern]'), + pandas_dtype('datetime64[ns, CET]')) diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index 3f1e7640ba538..d02f4c5b26c86 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -18,8 +18,10 @@ is_datetime64_dtype, needs_i8_conversion, is_int64_dtype, + is_categorical_dtype, is_integer_dtype, is_float_dtype, + is_numeric_dtype, is_integer, is_int_or_datetime_dtype, is_dtype_equal, @@ -37,7 +39,7 @@ from pandas.core.sorting import is_int64_overflow_possible import pandas.core.algorithms as algos import pandas.core.common as com -from pandas._libs import hashtable as libhashtable, join as libjoin +from pandas._libs import hashtable as libhashtable, join as libjoin, lib # back-compat of pseudo-public API @@ -570,6 +572,10 @@ def __init__(self, left, right, how='inner', on=None, self.right_join_keys, self.join_names) = self._get_merge_keys() + # validate the merge keys dtypes. We may need to coerce + # to avoid incompat dtypes + self._maybe_coerce_merge_keys() + def get_result(self): if self.indicator: self.left, self.right = self._indicator_pre_merge( @@ -760,26 +766,6 @@ def _get_join_info(self): join_index = join_index.astype(object) return join_index, left_indexer, right_indexer - def _get_merge_data(self): - """ - Handles overlapping column names etc. - """ - ldata, rdata = self.left._data, self.right._data - lsuf, rsuf = self.suffixes - - llabels, rlabels = items_overlap_with_suffix( - ldata.items, lsuf, rdata.items, rsuf) - - if not llabels.equals(ldata.items): - ldata = ldata.copy(deep=False) - ldata.set_axis(0, llabels) - - if not rlabels.equals(rdata.items): - rdata = rdata.copy(deep=False) - rdata.set_axis(0, rlabels) - - return ldata, rdata - def _get_merge_keys(self): """ Note: has side effects (copy/delete key columns) @@ -891,6 +877,51 @@ def _get_merge_keys(self): return left_keys, right_keys, join_names + def _maybe_coerce_merge_keys(self): + # we have valid mergee's but we may have to further + # coerce these if they are originally incompatible types + # + # for example if these are categorical, but are not dtype_equal + # or if we have object and integer dtypes + + for lk, rk, name in zip(self.left_join_keys, + self.right_join_keys, + self.join_names): + if (len(lk) and not len(rk)) or (not len(lk) and len(rk)): + continue + + # if either left or right is a categorical + # then the must match exactly in categories & ordered + if is_categorical_dtype(lk) and is_categorical_dtype(rk): + if lk.is_dtype_equal(rk): + continue + elif is_categorical_dtype(lk) or is_categorical_dtype(rk): + pass + + elif is_dtype_equal(lk.dtype, rk.dtype): + continue + + # if we are numeric, then allow differing + # kinds to proceed, eg. int64 and int8 + # further if we are object, but we infer to + # the same, then proceed + if (is_numeric_dtype(lk) and is_numeric_dtype(rk)): + if lk.dtype.kind == rk.dtype.kind: + continue + + # let's infer and see if we are ok + if lib.infer_dtype(lk) == lib.infer_dtype(rk): + continue + + # Houston, we have a problem! + # let's coerce to object + if name in self.left.columns: + self.left = self.left.assign( + **{name: self.left[name].astype(object)}) + if name in self.right.columns: + self.right = self.right.assign( + **{name: self.right[name].astype(object)}) + def _validate_specification(self): # Hm, any way to make this logic less complicated?? if self.on is None and self.left_on is None and self.right_on is None: @@ -942,9 +973,15 @@ def _get_join_indexers(left_keys, right_keys, sort=False, how='inner', Parameters ---------- + left_keys: ndarray, Index, Series + right_keys: ndarray, Index, Series + sort: boolean, default False + how: string {'inner', 'outer', 'left', 'right'}, default 'inner' Returns ------- + tuple of (left_indexer, right_indexer) + indexers into the left_keys, right_keys """ from functools import partial @@ -1349,6 +1386,13 @@ def _factorize_keys(lk, rk, sort=True): if is_datetime64tz_dtype(lk) and is_datetime64tz_dtype(rk): lk = lk.values rk = rk.values + + # if we exactly match in categories, allow us to use codes + if (is_categorical_dtype(lk) and + is_categorical_dtype(rk) and + lk.is_dtype_equal(rk)): + return lk.codes, rk.codes, len(lk.categories) + if is_int_or_datetime_dtype(lk) and is_int_or_datetime_dtype(rk): klass = libhashtable.Int64Factorizer lk = _ensure_int64(com._values_from_object(lk)) From aa53e4fb2de3219173f4d304795f185d5bf934c5 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sat, 11 Mar 2017 12:20:46 -0500 Subject: [PATCH 192/353] API: Drop the name parameter from Categorical Deprecated in 0.17.0 xref #10632 Author: gfyoung Closes #15654 from gfyoung/categorical-name-drop and squashes the following commits: 7e1e7d8 [gfyoung] API: Drop the name parameter from Categorical --- doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/core/categorical.py | 17 ++--------------- pandas/io/packers.py | 3 +-- pandas/tests/io/test_pickle.py | 16 ++++------------ pandas/tests/test_categorical.py | 11 +---------- 5 files changed, 9 insertions(+), 40 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index e392023423eb0..f6d5e3df814fc 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -734,7 +734,7 @@ Removal of prior version deprecations/changes - The deprecated ``irow``, ``icol``, ``iget`` and ``iget_value`` methods are removed in favor of ``iloc`` and ``iat`` as explained :ref:`here ` (:issue:`10711`). - The deprecated ``DataFrame.iterkv()`` has been removed in favor of ``DataFrame.iteritems()`` (:issue:`10711`) - +- The ``Categorical`` constructor has dropped the ``name`` parameter (:issue:`10632`) .. _whatsnew_0200.performance: diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 47db86ce1e73e..c1e5904693d1c 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -231,8 +231,7 @@ class Categorical(PandasObject): __array_priority__ = 1000 _typ = 'categorical' - def __init__(self, values, categories=None, ordered=False, - name=None, fastpath=False): + def __init__(self, values, categories=None, ordered=False, fastpath=False): self._validate_ordered(ordered) @@ -244,12 +243,6 @@ def __init__(self, values, categories=None, ordered=False, self._ordered = ordered return - if name is not None: - msg = ("the 'name' keyword is removed, use 'name' with consumers " - "of the categorical instead (e.g. 'Series(cat, " - "name=\"something\")'") - warn(msg, UserWarning, stacklevel=2) - # sanitize input if is_categorical_dtype(values): @@ -431,7 +424,7 @@ def from_array(cls, data, **kwargs): return cls(data, **kwargs) @classmethod - def from_codes(cls, codes, categories, ordered=False, name=None): + def from_codes(cls, codes, categories, ordered=False): """ Make a Categorical type from codes and categories arrays. @@ -454,12 +447,6 @@ def from_codes(cls, codes, categories, ordered=False, name=None): categorical. If not given, the resulting categorical will be unordered. """ - if name is not None: - msg = ("the 'name' keyword is removed, use 'name' with consumers " - "of the categorical instead (e.g. 'Series(cat, " - "name=\"something\")'") - warn(msg, UserWarning, stacklevel=2) - try: codes = np.asarray(codes, np.int64) except: diff --git a/pandas/io/packers.py b/pandas/io/packers.py index 404be758a7fbe..4662e8b635d3f 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -589,8 +589,7 @@ def decode(obj): from_codes = globals()[obj[u'klass']].from_codes return from_codes(codes=obj[u'codes'], categories=obj[u'categories'], - ordered=obj[u'ordered'], - name=obj[u'name']) + ordered=obj[u'ordered']) elif typ == u'series': dtype = dtype_for(obj[u'dtype']) diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index 91e70e942089c..fad6237d851fb 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -265,12 +265,8 @@ def python_unpickler(path): def test_pickle_v0_14_1(): - # we have the name warning - # 10482 - with tm.assert_produces_warning(UserWarning): - cat = pd.Categorical(values=['a', 'b', 'c'], - categories=['a', 'b', 'c', 'd'], - name='foobar', ordered=False) + cat = pd.Categorical(values=['a', 'b', 'c'], ordered=False, + categories=['a', 'b', 'c', 'd']) pickle_path = os.path.join(tm.get_data_path(), 'categorical_0_14_1.pickle') # This code was executed once on v0.14.1 to generate the pickle: @@ -286,12 +282,8 @@ def test_pickle_v0_15_2(): # ordered -> _ordered # GH 9347 - # we have the name warning - # 10482 - with tm.assert_produces_warning(UserWarning): - cat = pd.Categorical(values=['a', 'b', 'c'], - categories=['a', 'b', 'c', 'd'], - name='foobar', ordered=False) + cat = pd.Categorical(values=['a', 'b', 'c'], ordered=False, + categories=['a', 'b', 'c', 'd']) pickle_path = os.path.join(tm.get_data_path(), 'categorical_0_15_2.pickle') # This code was executed once on v0.15.2 to generate the pickle: diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 2d5e98d49e152..6c8aeba704c7b 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -682,7 +682,7 @@ def test_print(self): def test_big_print(self): factor = Categorical([0, 1, 2, 0, 1, 2] * 100, ['a', 'b', 'c'], - name='cat', fastpath=True) + fastpath=True) expected = ["[a, b, c, a, b, ..., b, c, a, b, c]", "Length: 600", "Categories (3, object): [a, b, c]"] expected = "\n".join(expected) @@ -1635,15 +1635,6 @@ def test_deprecated_from_array(self): with tm.assert_produces_warning(FutureWarning): Categorical.from_array([0, 1]) - def test_removed_names_produces_warning(self): - - # 10482 - with tm.assert_produces_warning(UserWarning): - Categorical([0, 1], name="a") - - with tm.assert_produces_warning(UserWarning): - Categorical.from_codes([1, 2], ["a", "b", "c"], name="a") - def test_datetime_categorical_comparison(self): dt_cat = pd.Categorical( pd.date_range('2014-01-01', periods=3), ordered=True) From 5eac08a4f7fb2416fd7d3470e111d203a9a23feb Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sat, 11 Mar 2017 12:24:10 -0500 Subject: [PATCH 193/353] MAINT: Remove testing.assert_isinstance (#15652) Deprecated in 0.17.0 xref gh-10458 --- pandas/tests/test_testing.py | 3 --- pandas/util/testing.py | 5 ----- 2 files changed, 8 deletions(-) diff --git a/pandas/tests/test_testing.py b/pandas/tests/test_testing.py index 2fb58ef70e3cb..e5cb953cb35a5 100644 --- a/pandas/tests/test_testing.py +++ b/pandas/tests/test_testing.py @@ -765,9 +765,6 @@ def test_warning(self): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): self.assertNotAlmostEquals(1, 2) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - tm.assert_isinstance(Series([1, 2]), Series, msg='xxx') - class TestLocale(tm.TestCase): diff --git a/pandas/util/testing.py b/pandas/util/testing.py index ec30a9376a9da..74ff480a9c198 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -991,11 +991,6 @@ def assertIsInstance(obj, cls, msg=''): raise AssertionError(err_msg.format(msg, cls, type(obj))) -def assert_isinstance(obj, class_type_or_tuple, msg=''): - return deprecate('assert_isinstance', assertIsInstance)( - obj, class_type_or_tuple, msg=msg) - - def assertNotIsInstance(obj, cls, msg=''): """Test that obj is not an instance of cls (which can be a class or a tuple of classes, From e0b37f9bb40e2d27629c573bb985d75360282cd4 Mon Sep 17 00:00:00 2001 From: Rouz Azari Date: Sun, 12 Mar 2017 11:15:17 -0400 Subject: [PATCH 194/353] CLN: Cleanup tests for .rank() closes #15640 Author: Rouz Azari Closes #15658 from rouzazari/GH15640 and squashes the following commits: d0a2abc [Rouz Azari] Fixed linting error with datetime.datetime import 9580af0 [Rouz Azari] CLN: Cleanup tests for .rank() --- pandas/tests/frame/test_analytics.py | 169 +------------- pandas/tests/frame/test_rank.py | 268 +++++++++++++++++++++ pandas/tests/series/test_analytics.py | 201 ---------------- pandas/tests/series/test_rank.py | 323 ++++++++++++++++++++++++++ pandas/tests/test_stats.py | 185 --------------- 5 files changed, 592 insertions(+), 554 deletions(-) create mode 100644 pandas/tests/frame/test_rank.py create mode 100644 pandas/tests/series/test_rank.py delete mode 100644 pandas/tests/test_stats.py diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 4758ee1323ca0..6c917444f9f43 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -2,7 +2,7 @@ from __future__ import print_function -from datetime import timedelta, datetime +from datetime import timedelta from distutils.version import LooseVersion import sys import pytest @@ -642,173 +642,6 @@ def test_cumprod(self): df.cumprod(0) df.cumprod(1) - def test_rank(self): - tm._skip_if_no_scipy() - from scipy.stats import rankdata - - self.frame['A'][::2] = np.nan - self.frame['B'][::3] = np.nan - self.frame['C'][::4] = np.nan - self.frame['D'][::5] = np.nan - - ranks0 = self.frame.rank() - ranks1 = self.frame.rank(1) - mask = np.isnan(self.frame.values) - - fvals = self.frame.fillna(np.inf).values - - exp0 = np.apply_along_axis(rankdata, 0, fvals) - exp0[mask] = np.nan - - exp1 = np.apply_along_axis(rankdata, 1, fvals) - exp1[mask] = np.nan - - tm.assert_almost_equal(ranks0.values, exp0) - tm.assert_almost_equal(ranks1.values, exp1) - - # integers - df = DataFrame(np.random.randint(0, 5, size=40).reshape((10, 4))) - - result = df.rank() - exp = df.astype(float).rank() - tm.assert_frame_equal(result, exp) - - result = df.rank(1) - exp = df.astype(float).rank(1) - tm.assert_frame_equal(result, exp) - - def test_rank2(self): - df = DataFrame([[1, 3, 2], [1, 2, 3]]) - expected = DataFrame([[1.0, 3.0, 2.0], [1, 2, 3]]) / 3.0 - result = df.rank(1, pct=True) - tm.assert_frame_equal(result, expected) - - df = DataFrame([[1, 3, 2], [1, 2, 3]]) - expected = df.rank(0) / 2.0 - result = df.rank(0, pct=True) - tm.assert_frame_equal(result, expected) - - df = DataFrame([['b', 'c', 'a'], ['a', 'c', 'b']]) - expected = DataFrame([[2.0, 3.0, 1.0], [1, 3, 2]]) - result = df.rank(1, numeric_only=False) - tm.assert_frame_equal(result, expected) - - expected = DataFrame([[2.0, 1.5, 1.0], [1, 1.5, 2]]) - result = df.rank(0, numeric_only=False) - tm.assert_frame_equal(result, expected) - - df = DataFrame([['b', np.nan, 'a'], ['a', 'c', 'b']]) - expected = DataFrame([[2.0, nan, 1.0], [1.0, 3.0, 2.0]]) - result = df.rank(1, numeric_only=False) - tm.assert_frame_equal(result, expected) - - expected = DataFrame([[2.0, nan, 1.0], [1.0, 1.0, 2.0]]) - result = df.rank(0, numeric_only=False) - tm.assert_frame_equal(result, expected) - - # f7u12, this does not work without extensive workaround - data = [[datetime(2001, 1, 5), nan, datetime(2001, 1, 2)], - [datetime(2000, 1, 2), datetime(2000, 1, 3), - datetime(2000, 1, 1)]] - df = DataFrame(data) - - # check the rank - expected = DataFrame([[2., nan, 1.], - [2., 3., 1.]]) - result = df.rank(1, numeric_only=False, ascending=True) - tm.assert_frame_equal(result, expected) - - expected = DataFrame([[1., nan, 2.], - [2., 1., 3.]]) - result = df.rank(1, numeric_only=False, ascending=False) - tm.assert_frame_equal(result, expected) - - # mixed-type frames - self.mixed_frame['datetime'] = datetime.now() - self.mixed_frame['timedelta'] = timedelta(days=1, seconds=1) - - result = self.mixed_frame.rank(1) - expected = self.mixed_frame.rank(1, numeric_only=True) - tm.assert_frame_equal(result, expected) - - df = DataFrame({"a": [1e-20, -5, 1e-20 + 1e-40, 10, - 1e60, 1e80, 1e-30]}) - exp = DataFrame({"a": [3.5, 1., 3.5, 5., 6., 7., 2.]}) - tm.assert_frame_equal(df.rank(), exp) - - def test_rank_na_option(self): - tm._skip_if_no_scipy() - from scipy.stats import rankdata - - self.frame['A'][::2] = np.nan - self.frame['B'][::3] = np.nan - self.frame['C'][::4] = np.nan - self.frame['D'][::5] = np.nan - - # bottom - ranks0 = self.frame.rank(na_option='bottom') - ranks1 = self.frame.rank(1, na_option='bottom') - - fvals = self.frame.fillna(np.inf).values - - exp0 = np.apply_along_axis(rankdata, 0, fvals) - exp1 = np.apply_along_axis(rankdata, 1, fvals) - - tm.assert_almost_equal(ranks0.values, exp0) - tm.assert_almost_equal(ranks1.values, exp1) - - # top - ranks0 = self.frame.rank(na_option='top') - ranks1 = self.frame.rank(1, na_option='top') - - fval0 = self.frame.fillna((self.frame.min() - 1).to_dict()).values - fval1 = self.frame.T - fval1 = fval1.fillna((fval1.min() - 1).to_dict()).T - fval1 = fval1.fillna(np.inf).values - - exp0 = np.apply_along_axis(rankdata, 0, fval0) - exp1 = np.apply_along_axis(rankdata, 1, fval1) - - tm.assert_almost_equal(ranks0.values, exp0) - tm.assert_almost_equal(ranks1.values, exp1) - - # descending - - # bottom - ranks0 = self.frame.rank(na_option='top', ascending=False) - ranks1 = self.frame.rank(1, na_option='top', ascending=False) - - fvals = self.frame.fillna(np.inf).values - - exp0 = np.apply_along_axis(rankdata, 0, -fvals) - exp1 = np.apply_along_axis(rankdata, 1, -fvals) - - tm.assert_almost_equal(ranks0.values, exp0) - tm.assert_almost_equal(ranks1.values, exp1) - - # descending - - # top - ranks0 = self.frame.rank(na_option='bottom', ascending=False) - ranks1 = self.frame.rank(1, na_option='bottom', ascending=False) - - fval0 = self.frame.fillna((self.frame.min() - 1).to_dict()).values - fval1 = self.frame.T - fval1 = fval1.fillna((fval1.min() - 1).to_dict()).T - fval1 = fval1.fillna(np.inf).values - - exp0 = np.apply_along_axis(rankdata, 0, -fval0) - exp1 = np.apply_along_axis(rankdata, 1, -fval1) - - tm.assert_numpy_array_equal(ranks0.values, exp0) - tm.assert_numpy_array_equal(ranks1.values, exp1) - - def test_rank_axis(self): - # check if using axes' names gives the same result - df = pd.DataFrame([[2, 1], [4, 3]]) - tm.assert_frame_equal(df.rank(axis=0), df.rank(axis='index')) - tm.assert_frame_equal(df.rank(axis=1), df.rank(axis='columns')) - def test_sem(self): alt = lambda x: np.std(x, ddof=1) / np.sqrt(len(x)) self._check_stat_op('sem', alt) diff --git a/pandas/tests/frame/test_rank.py b/pandas/tests/frame/test_rank.py new file mode 100644 index 0000000000000..151a89888c329 --- /dev/null +++ b/pandas/tests/frame/test_rank.py @@ -0,0 +1,268 @@ +# -*- coding: utf-8 -*- +from datetime import timedelta, datetime +from distutils.version import LooseVersion +from numpy import nan +import numpy as np + +from pandas import Series, DataFrame + +from pandas.compat import product +from pandas.util.testing import assert_frame_equal +import pandas.util.testing as tm +from pandas.tests.frame.common import TestData + + +class TestRank(tm.TestCase, TestData): + s = Series([1, 3, 4, 2, nan, 2, 1, 5, nan, 3]) + df = DataFrame({'A': s, 'B': s}) + + results = { + 'average': np.array([1.5, 5.5, 7.0, 3.5, nan, + 3.5, 1.5, 8.0, nan, 5.5]), + 'min': np.array([1, 5, 7, 3, nan, 3, 1, 8, nan, 5]), + 'max': np.array([2, 6, 7, 4, nan, 4, 2, 8, nan, 6]), + 'first': np.array([1, 5, 7, 3, nan, 4, 2, 8, nan, 6]), + 'dense': np.array([1, 3, 4, 2, nan, 2, 1, 5, nan, 3]), + } + + def test_rank(self): + tm._skip_if_no_scipy() + from scipy.stats import rankdata + + self.frame['A'][::2] = np.nan + self.frame['B'][::3] = np.nan + self.frame['C'][::4] = np.nan + self.frame['D'][::5] = np.nan + + ranks0 = self.frame.rank() + ranks1 = self.frame.rank(1) + mask = np.isnan(self.frame.values) + + fvals = self.frame.fillna(np.inf).values + + exp0 = np.apply_along_axis(rankdata, 0, fvals) + exp0[mask] = np.nan + + exp1 = np.apply_along_axis(rankdata, 1, fvals) + exp1[mask] = np.nan + + tm.assert_almost_equal(ranks0.values, exp0) + tm.assert_almost_equal(ranks1.values, exp1) + + # integers + df = DataFrame(np.random.randint(0, 5, size=40).reshape((10, 4))) + + result = df.rank() + exp = df.astype(float).rank() + tm.assert_frame_equal(result, exp) + + result = df.rank(1) + exp = df.astype(float).rank(1) + tm.assert_frame_equal(result, exp) + + def test_rank2(self): + df = DataFrame([[1, 3, 2], [1, 2, 3]]) + expected = DataFrame([[1.0, 3.0, 2.0], [1, 2, 3]]) / 3.0 + result = df.rank(1, pct=True) + tm.assert_frame_equal(result, expected) + + df = DataFrame([[1, 3, 2], [1, 2, 3]]) + expected = df.rank(0) / 2.0 + result = df.rank(0, pct=True) + tm.assert_frame_equal(result, expected) + + df = DataFrame([['b', 'c', 'a'], ['a', 'c', 'b']]) + expected = DataFrame([[2.0, 3.0, 1.0], [1, 3, 2]]) + result = df.rank(1, numeric_only=False) + tm.assert_frame_equal(result, expected) + + expected = DataFrame([[2.0, 1.5, 1.0], [1, 1.5, 2]]) + result = df.rank(0, numeric_only=False) + tm.assert_frame_equal(result, expected) + + df = DataFrame([['b', np.nan, 'a'], ['a', 'c', 'b']]) + expected = DataFrame([[2.0, nan, 1.0], [1.0, 3.0, 2.0]]) + result = df.rank(1, numeric_only=False) + tm.assert_frame_equal(result, expected) + + expected = DataFrame([[2.0, nan, 1.0], [1.0, 1.0, 2.0]]) + result = df.rank(0, numeric_only=False) + tm.assert_frame_equal(result, expected) + + # f7u12, this does not work without extensive workaround + data = [[datetime(2001, 1, 5), nan, datetime(2001, 1, 2)], + [datetime(2000, 1, 2), datetime(2000, 1, 3), + datetime(2000, 1, 1)]] + df = DataFrame(data) + + # check the rank + expected = DataFrame([[2., nan, 1.], + [2., 3., 1.]]) + result = df.rank(1, numeric_only=False, ascending=True) + tm.assert_frame_equal(result, expected) + + expected = DataFrame([[1., nan, 2.], + [2., 1., 3.]]) + result = df.rank(1, numeric_only=False, ascending=False) + tm.assert_frame_equal(result, expected) + + # mixed-type frames + self.mixed_frame['datetime'] = datetime.now() + self.mixed_frame['timedelta'] = timedelta(days=1, seconds=1) + + result = self.mixed_frame.rank(1) + expected = self.mixed_frame.rank(1, numeric_only=True) + tm.assert_frame_equal(result, expected) + + df = DataFrame({"a": [1e-20, -5, 1e-20 + 1e-40, 10, + 1e60, 1e80, 1e-30]}) + exp = DataFrame({"a": [3.5, 1., 3.5, 5., 6., 7., 2.]}) + tm.assert_frame_equal(df.rank(), exp) + + def test_rank_na_option(self): + tm._skip_if_no_scipy() + from scipy.stats import rankdata + + self.frame['A'][::2] = np.nan + self.frame['B'][::3] = np.nan + self.frame['C'][::4] = np.nan + self.frame['D'][::5] = np.nan + + # bottom + ranks0 = self.frame.rank(na_option='bottom') + ranks1 = self.frame.rank(1, na_option='bottom') + + fvals = self.frame.fillna(np.inf).values + + exp0 = np.apply_along_axis(rankdata, 0, fvals) + exp1 = np.apply_along_axis(rankdata, 1, fvals) + + tm.assert_almost_equal(ranks0.values, exp0) + tm.assert_almost_equal(ranks1.values, exp1) + + # top + ranks0 = self.frame.rank(na_option='top') + ranks1 = self.frame.rank(1, na_option='top') + + fval0 = self.frame.fillna((self.frame.min() - 1).to_dict()).values + fval1 = self.frame.T + fval1 = fval1.fillna((fval1.min() - 1).to_dict()).T + fval1 = fval1.fillna(np.inf).values + + exp0 = np.apply_along_axis(rankdata, 0, fval0) + exp1 = np.apply_along_axis(rankdata, 1, fval1) + + tm.assert_almost_equal(ranks0.values, exp0) + tm.assert_almost_equal(ranks1.values, exp1) + + # descending + + # bottom + ranks0 = self.frame.rank(na_option='top', ascending=False) + ranks1 = self.frame.rank(1, na_option='top', ascending=False) + + fvals = self.frame.fillna(np.inf).values + + exp0 = np.apply_along_axis(rankdata, 0, -fvals) + exp1 = np.apply_along_axis(rankdata, 1, -fvals) + + tm.assert_almost_equal(ranks0.values, exp0) + tm.assert_almost_equal(ranks1.values, exp1) + + # descending + + # top + ranks0 = self.frame.rank(na_option='bottom', ascending=False) + ranks1 = self.frame.rank(1, na_option='bottom', ascending=False) + + fval0 = self.frame.fillna((self.frame.min() - 1).to_dict()).values + fval1 = self.frame.T + fval1 = fval1.fillna((fval1.min() - 1).to_dict()).T + fval1 = fval1.fillna(np.inf).values + + exp0 = np.apply_along_axis(rankdata, 0, -fval0) + exp1 = np.apply_along_axis(rankdata, 1, -fval1) + + tm.assert_numpy_array_equal(ranks0.values, exp0) + tm.assert_numpy_array_equal(ranks1.values, exp1) + + def test_rank_axis(self): + # check if using axes' names gives the same result + df = DataFrame([[2, 1], [4, 3]]) + tm.assert_frame_equal(df.rank(axis=0), df.rank(axis='index')) + tm.assert_frame_equal(df.rank(axis=1), df.rank(axis='columns')) + + def test_rank_methods_frame(self): + tm.skip_if_no_package('scipy', '0.13', 'scipy.stats.rankdata') + import scipy + from scipy.stats import rankdata + + xs = np.random.randint(0, 21, (100, 26)) + xs = (xs - 10.0) / 10.0 + cols = [chr(ord('z') - i) for i in range(xs.shape[1])] + + for vals in [xs, xs + 1e6, xs * 1e-6]: + df = DataFrame(vals, columns=cols) + + for ax in [0, 1]: + for m in ['average', 'min', 'max', 'first', 'dense']: + result = df.rank(axis=ax, method=m) + sprank = np.apply_along_axis( + rankdata, ax, vals, + m if m != 'first' else 'ordinal') + sprank = sprank.astype(np.float64) + expected = DataFrame(sprank, columns=cols) + + if LooseVersion(scipy.__version__) >= '0.17.0': + expected = expected.astype('float64') + tm.assert_frame_equal(result, expected) + + def test_rank_descending(self): + dtypes = ['O', 'f8', 'i8'] + + for dtype, method in product(dtypes, self.results): + if 'i' in dtype: + df = self.df.dropna() + else: + df = self.df.astype(dtype) + + res = df.rank(ascending=False) + expected = (df.max() - df).rank() + assert_frame_equal(res, expected) + + if method == 'first' and dtype == 'O': + continue + + expected = (df.max() - df).rank(method=method) + + if dtype != 'O': + res2 = df.rank(method=method, ascending=False, + numeric_only=True) + assert_frame_equal(res2, expected) + + res3 = df.rank(method=method, ascending=False, + numeric_only=False) + assert_frame_equal(res3, expected) + + def test_rank_2d_tie_methods(self): + df = self.df + + def _check2d(df, expected, method='average', axis=0): + exp_df = DataFrame({'A': expected, 'B': expected}) + + if axis == 1: + df = df.T + exp_df = exp_df.T + + result = df.rank(method=method, axis=axis) + assert_frame_equal(result, exp_df) + + dtypes = [None, object] + disabled = set([(object, 'first')]) + results = self.results + + for method, axis, dtype in product(results, [0, 1], dtypes): + if (dtype, method) in disabled: + continue + frame = df if dtype is None else df.astype(dtype) + _check2d(frame, results[method], method=method, axis=axis) diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index b6985abb64e40..c2543581dca50 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -969,207 +969,6 @@ def test_drop_duplicates(self): sc.drop_duplicates(keep=False, inplace=True) assert_series_equal(sc, s[~expected]) - def test_rank(self): - tm._skip_if_no_scipy() - from scipy.stats import rankdata - - self.ts[::2] = np.nan - self.ts[:10][::3] = 4. - - ranks = self.ts.rank() - oranks = self.ts.astype('O').rank() - - assert_series_equal(ranks, oranks) - - mask = np.isnan(self.ts) - filled = self.ts.fillna(np.inf) - - # rankdata returns a ndarray - exp = Series(rankdata(filled), index=filled.index, name='ts') - exp[mask] = np.nan - - tm.assert_series_equal(ranks, exp) - - iseries = Series(np.arange(5).repeat(2)) - - iranks = iseries.rank() - exp = iseries.astype(float).rank() - assert_series_equal(iranks, exp) - iseries = Series(np.arange(5)) + 1.0 - exp = iseries / 5.0 - iranks = iseries.rank(pct=True) - - assert_series_equal(iranks, exp) - - iseries = Series(np.repeat(1, 100)) - exp = Series(np.repeat(0.505, 100)) - iranks = iseries.rank(pct=True) - assert_series_equal(iranks, exp) - - iseries[1] = np.nan - exp = Series(np.repeat(50.0 / 99.0, 100)) - exp[1] = np.nan - iranks = iseries.rank(pct=True) - assert_series_equal(iranks, exp) - - iseries = Series(np.arange(5)) + 1.0 - iseries[4] = np.nan - exp = iseries / 4.0 - iranks = iseries.rank(pct=True) - assert_series_equal(iranks, exp) - - iseries = Series(np.repeat(np.nan, 100)) - exp = iseries.copy() - iranks = iseries.rank(pct=True) - assert_series_equal(iranks, exp) - - iseries = Series(np.arange(5)) + 1 - iseries[4] = np.nan - exp = iseries / 4.0 - iranks = iseries.rank(pct=True) - assert_series_equal(iranks, exp) - - rng = date_range('1/1/1990', periods=5) - iseries = Series(np.arange(5), rng) + 1 - iseries.iloc[4] = np.nan - exp = iseries / 4.0 - iranks = iseries.rank(pct=True) - assert_series_equal(iranks, exp) - - iseries = Series([1e-50, 1e-100, 1e-20, 1e-2, 1e-20 + 1e-30, 1e-1]) - exp = Series([2, 1, 3, 5, 4, 6.0]) - iranks = iseries.rank() - assert_series_equal(iranks, exp) - - # GH 5968 - iseries = Series(['3 day', '1 day 10m', '-2 day', pd.NaT], - dtype='m8[ns]') - exp = Series([3, 2, 1, np.nan]) - iranks = iseries.rank() - assert_series_equal(iranks, exp) - - values = np.array( - [-50, -1, -1e-20, -1e-25, -1e-50, 0, 1e-40, 1e-20, 1e-10, 2, 40 - ], dtype='float64') - random_order = np.random.permutation(len(values)) - iseries = Series(values[random_order]) - exp = Series(random_order + 1.0, dtype='float64') - iranks = iseries.rank() - assert_series_equal(iranks, exp) - - def test_rank_categorical(self): - # GH issue #15420 rank incorrectly orders ordered categories - - # Test ascending/descending ranking for ordered categoricals - exp = pd.Series([1., 2., 3., 4., 5., 6.]) - exp_desc = pd.Series([6., 5., 4., 3., 2., 1.]) - ordered = pd.Series( - ['first', 'second', 'third', 'fourth', 'fifth', 'sixth'] - ).astype( - 'category', - categories=['first', 'second', 'third', - 'fourth', 'fifth', 'sixth'], - ordered=True - ) - assert_series_equal(ordered.rank(), exp) - assert_series_equal(ordered.rank(ascending=False), exp_desc) - - # Unordered categoricals should be ranked as objects - unordered = pd.Series( - ['first', 'second', 'third', 'fourth', 'fifth', 'sixth'], - ).astype( - 'category', - categories=['first', 'second', 'third', - 'fourth', 'fifth', 'sixth'], - ordered=False - ) - exp_unordered = pd.Series([2., 4., 6., 3., 1., 5.]) - res = unordered.rank() - assert_series_equal(res, exp_unordered) - - unordered1 = pd.Series( - [1, 2, 3, 4, 5, 6], - ).astype( - 'category', - categories=[1, 2, 3, 4, 5, 6], - ordered=False - ) - exp_unordered1 = pd.Series([1., 2., 3., 4., 5., 6.]) - res1 = unordered1.rank() - assert_series_equal(res1, exp_unordered1) - - # Test na_option for rank data - na_ser = pd.Series( - ['first', 'second', 'third', 'fourth', 'fifth', 'sixth', np.NaN] - ).astype( - 'category', - categories=[ - 'first', 'second', 'third', 'fourth', - 'fifth', 'sixth', 'seventh' - ], - ordered=True - ) - - exp_top = pd.Series([2., 3., 4., 5., 6., 7., 1.]) - exp_bot = pd.Series([1., 2., 3., 4., 5., 6., 7.]) - exp_keep = pd.Series([1., 2., 3., 4., 5., 6., np.NaN]) - - assert_series_equal(na_ser.rank(na_option='top'), exp_top) - assert_series_equal(na_ser.rank(na_option='bottom'), exp_bot) - assert_series_equal(na_ser.rank(na_option='keep'), exp_keep) - - # Test na_option for rank data with ascending False - exp_top = pd.Series([7., 6., 5., 4., 3., 2., 1.]) - exp_bot = pd.Series([6., 5., 4., 3., 2., 1., 7.]) - exp_keep = pd.Series([6., 5., 4., 3., 2., 1., np.NaN]) - - assert_series_equal( - na_ser.rank(na_option='top', ascending=False), - exp_top - ) - assert_series_equal( - na_ser.rank(na_option='bottom', ascending=False), - exp_bot - ) - assert_series_equal( - na_ser.rank(na_option='keep', ascending=False), - exp_keep - ) - - # Test with pct=True - na_ser = pd.Series( - ['first', 'second', 'third', 'fourth', np.NaN], - ).astype( - 'category', - categories=['first', 'second', 'third', 'fourth'], - ordered=True - ) - exp_top = pd.Series([0.4, 0.6, 0.8, 1., 0.2]) - exp_bot = pd.Series([0.2, 0.4, 0.6, 0.8, 1.]) - exp_keep = pd.Series([0.25, 0.5, 0.75, 1., np.NaN]) - - assert_series_equal(na_ser.rank(na_option='top', pct=True), exp_top) - assert_series_equal(na_ser.rank(na_option='bottom', pct=True), exp_bot) - assert_series_equal(na_ser.rank(na_option='keep', pct=True), exp_keep) - - def test_rank_signature(self): - s = Series([0, 1]) - s.rank(method='average') - self.assertRaises(ValueError, s.rank, 'average') - - def test_rank_inf(self): - pytest.skip('DataFrame.rank does not currently rank ' - 'np.inf and -np.inf properly') - - values = np.array( - [-np.inf, -50, -1, -1e-20, -1e-25, -1e-50, 0, 1e-40, 1e-20, 1e-10, - 2, 40, np.inf], dtype='float64') - random_order = np.random.permutation(len(values)) - iseries = Series(values[random_order]) - exp = Series(random_order + 1.0, dtype='float64') - iranks = iseries.rank() - assert_series_equal(iranks, exp) - def test_clip(self): val = self.ts.median() diff --git a/pandas/tests/series/test_rank.py b/pandas/tests/series/test_rank.py new file mode 100644 index 0000000000000..99257b343310f --- /dev/null +++ b/pandas/tests/series/test_rank.py @@ -0,0 +1,323 @@ +# -*- coding: utf-8 -*- +from pandas import compat + +import pytest + +from distutils.version import LooseVersion +from numpy import nan +import numpy as np + +from pandas import (Series, date_range, NaT) + +from pandas.compat import product +from pandas.util.testing import assert_series_equal +import pandas.util.testing as tm +from pandas.tests.series.common import TestData + + +class TestSeriesRank(tm.TestCase, TestData): + s = Series([1, 3, 4, 2, nan, 2, 1, 5, nan, 3]) + + results = { + 'average': np.array([1.5, 5.5, 7.0, 3.5, nan, + 3.5, 1.5, 8.0, nan, 5.5]), + 'min': np.array([1, 5, 7, 3, nan, 3, 1, 8, nan, 5]), + 'max': np.array([2, 6, 7, 4, nan, 4, 2, 8, nan, 6]), + 'first': np.array([1, 5, 7, 3, nan, 4, 2, 8, nan, 6]), + 'dense': np.array([1, 3, 4, 2, nan, 2, 1, 5, nan, 3]), + } + + def test_rank(self): + tm._skip_if_no_scipy() + from scipy.stats import rankdata + + self.ts[::2] = np.nan + self.ts[:10][::3] = 4. + + ranks = self.ts.rank() + oranks = self.ts.astype('O').rank() + + assert_series_equal(ranks, oranks) + + mask = np.isnan(self.ts) + filled = self.ts.fillna(np.inf) + + # rankdata returns a ndarray + exp = Series(rankdata(filled), index=filled.index, name='ts') + exp[mask] = np.nan + + tm.assert_series_equal(ranks, exp) + + iseries = Series(np.arange(5).repeat(2)) + + iranks = iseries.rank() + exp = iseries.astype(float).rank() + assert_series_equal(iranks, exp) + iseries = Series(np.arange(5)) + 1.0 + exp = iseries / 5.0 + iranks = iseries.rank(pct=True) + + assert_series_equal(iranks, exp) + + iseries = Series(np.repeat(1, 100)) + exp = Series(np.repeat(0.505, 100)) + iranks = iseries.rank(pct=True) + assert_series_equal(iranks, exp) + + iseries[1] = np.nan + exp = Series(np.repeat(50.0 / 99.0, 100)) + exp[1] = np.nan + iranks = iseries.rank(pct=True) + assert_series_equal(iranks, exp) + + iseries = Series(np.arange(5)) + 1.0 + iseries[4] = np.nan + exp = iseries / 4.0 + iranks = iseries.rank(pct=True) + assert_series_equal(iranks, exp) + + iseries = Series(np.repeat(np.nan, 100)) + exp = iseries.copy() + iranks = iseries.rank(pct=True) + assert_series_equal(iranks, exp) + + iseries = Series(np.arange(5)) + 1 + iseries[4] = np.nan + exp = iseries / 4.0 + iranks = iseries.rank(pct=True) + assert_series_equal(iranks, exp) + + rng = date_range('1/1/1990', periods=5) + iseries = Series(np.arange(5), rng) + 1 + iseries.iloc[4] = np.nan + exp = iseries / 4.0 + iranks = iseries.rank(pct=True) + assert_series_equal(iranks, exp) + + iseries = Series([1e-50, 1e-100, 1e-20, 1e-2, 1e-20 + 1e-30, 1e-1]) + exp = Series([2, 1, 3, 5, 4, 6.0]) + iranks = iseries.rank() + assert_series_equal(iranks, exp) + + # GH 5968 + iseries = Series(['3 day', '1 day 10m', '-2 day', NaT], + dtype='m8[ns]') + exp = Series([3, 2, 1, np.nan]) + iranks = iseries.rank() + assert_series_equal(iranks, exp) + + values = np.array( + [-50, -1, -1e-20, -1e-25, -1e-50, 0, 1e-40, 1e-20, 1e-10, 2, 40 + ], dtype='float64') + random_order = np.random.permutation(len(values)) + iseries = Series(values[random_order]) + exp = Series(random_order + 1.0, dtype='float64') + iranks = iseries.rank() + assert_series_equal(iranks, exp) + + def test_rank_categorical(self): + # GH issue #15420 rank incorrectly orders ordered categories + + # Test ascending/descending ranking for ordered categoricals + exp = Series([1., 2., 3., 4., 5., 6.]) + exp_desc = Series([6., 5., 4., 3., 2., 1.]) + ordered = Series( + ['first', 'second', 'third', 'fourth', 'fifth', 'sixth'] + ).astype( + 'category', + categories=['first', 'second', 'third', + 'fourth', 'fifth', 'sixth'], + ordered=True + ) + assert_series_equal(ordered.rank(), exp) + assert_series_equal(ordered.rank(ascending=False), exp_desc) + + # Unordered categoricals should be ranked as objects + unordered = Series( + ['first', 'second', 'third', 'fourth', 'fifth', 'sixth'], + ).astype( + 'category', + categories=['first', 'second', 'third', + 'fourth', 'fifth', 'sixth'], + ordered=False + ) + exp_unordered = Series([2., 4., 6., 3., 1., 5.]) + res = unordered.rank() + assert_series_equal(res, exp_unordered) + + unordered1 = Series( + [1, 2, 3, 4, 5, 6], + ).astype( + 'category', + categories=[1, 2, 3, 4, 5, 6], + ordered=False + ) + exp_unordered1 = Series([1., 2., 3., 4., 5., 6.]) + res1 = unordered1.rank() + assert_series_equal(res1, exp_unordered1) + + # Test na_option for rank data + na_ser = Series( + ['first', 'second', 'third', 'fourth', 'fifth', 'sixth', np.NaN] + ).astype( + 'category', + categories=[ + 'first', 'second', 'third', 'fourth', + 'fifth', 'sixth', 'seventh' + ], + ordered=True + ) + + exp_top = Series([2., 3., 4., 5., 6., 7., 1.]) + exp_bot = Series([1., 2., 3., 4., 5., 6., 7.]) + exp_keep = Series([1., 2., 3., 4., 5., 6., np.NaN]) + + assert_series_equal(na_ser.rank(na_option='top'), exp_top) + assert_series_equal(na_ser.rank(na_option='bottom'), exp_bot) + assert_series_equal(na_ser.rank(na_option='keep'), exp_keep) + + # Test na_option for rank data with ascending False + exp_top = Series([7., 6., 5., 4., 3., 2., 1.]) + exp_bot = Series([6., 5., 4., 3., 2., 1., 7.]) + exp_keep = Series([6., 5., 4., 3., 2., 1., np.NaN]) + + assert_series_equal( + na_ser.rank(na_option='top', ascending=False), + exp_top + ) + assert_series_equal( + na_ser.rank(na_option='bottom', ascending=False), + exp_bot + ) + assert_series_equal( + na_ser.rank(na_option='keep', ascending=False), + exp_keep + ) + + # Test with pct=True + na_ser = Series( + ['first', 'second', 'third', 'fourth', np.NaN], + ).astype( + 'category', + categories=['first', 'second', 'third', 'fourth'], + ordered=True + ) + exp_top = Series([0.4, 0.6, 0.8, 1., 0.2]) + exp_bot = Series([0.2, 0.4, 0.6, 0.8, 1.]) + exp_keep = Series([0.25, 0.5, 0.75, 1., np.NaN]) + + assert_series_equal(na_ser.rank(na_option='top', pct=True), exp_top) + assert_series_equal(na_ser.rank(na_option='bottom', pct=True), exp_bot) + assert_series_equal(na_ser.rank(na_option='keep', pct=True), exp_keep) + + def test_rank_signature(self): + s = Series([0, 1]) + s.rank(method='average') + self.assertRaises(ValueError, s.rank, 'average') + + def test_rank_inf(self): + pytest.skip('DataFrame.rank does not currently rank ' + 'np.inf and -np.inf properly') + + values = np.array( + [-np.inf, -50, -1, -1e-20, -1e-25, -1e-50, 0, 1e-40, 1e-20, 1e-10, + 2, 40, np.inf], dtype='float64') + random_order = np.random.permutation(len(values)) + iseries = Series(values[random_order]) + exp = Series(random_order + 1.0, dtype='float64') + iranks = iseries.rank() + assert_series_equal(iranks, exp) + + def test_rank_tie_methods(self): + s = self.s + + def _check(s, expected, method='average'): + result = s.rank(method=method) + tm.assert_series_equal(result, Series(expected)) + + dtypes = [None, object] + disabled = set([(object, 'first')]) + results = self.results + + for method, dtype in product(results, dtypes): + if (dtype, method) in disabled: + continue + series = s if dtype is None else s.astype(dtype) + _check(series, results[method], method=method) + + def test_rank_methods_series(self): + tm.skip_if_no_package('scipy', '0.13', 'scipy.stats.rankdata') + import scipy + from scipy.stats import rankdata + + xs = np.random.randn(9) + xs = np.concatenate([xs[i:] for i in range(0, 9, 2)]) # add duplicates + np.random.shuffle(xs) + + index = [chr(ord('a') + i) for i in range(len(xs))] + + for vals in [xs, xs + 1e6, xs * 1e-6]: + ts = Series(vals, index=index) + + for m in ['average', 'min', 'max', 'first', 'dense']: + result = ts.rank(method=m) + sprank = rankdata(vals, m if m != 'first' else 'ordinal') + expected = Series(sprank, index=index) + + if LooseVersion(scipy.__version__) >= '0.17.0': + expected = expected.astype('float64') + tm.assert_series_equal(result, expected) + + def test_rank_dense_method(self): + dtypes = ['O', 'f8', 'i8'] + in_out = [([1], [1]), + ([2], [1]), + ([0], [1]), + ([2, 2], [1, 1]), + ([1, 2, 3], [1, 2, 3]), + ([4, 2, 1], [3, 2, 1],), + ([1, 1, 5, 5, 3], [1, 1, 3, 3, 2]), + ([-5, -4, -3, -2, -1], [1, 2, 3, 4, 5])] + + for ser, exp in in_out: + for dtype in dtypes: + s = Series(ser).astype(dtype) + result = s.rank(method='dense') + expected = Series(exp).astype(result.dtype) + assert_series_equal(result, expected) + + def test_rank_descending(self): + dtypes = ['O', 'f8', 'i8'] + + for dtype, method in product(dtypes, self.results): + if 'i' in dtype: + s = self.s.dropna() + else: + s = self.s.astype(dtype) + + res = s.rank(ascending=False) + expected = (s.max() - s).rank() + assert_series_equal(res, expected) + + if method == 'first' and dtype == 'O': + continue + + expected = (s.max() - s).rank(method=method) + res2 = s.rank(method=method, ascending=False) + assert_series_equal(res2, expected) + + def test_rank_int(self): + s = self.s.dropna().astype('i8') + + for method, res in compat.iteritems(self.results): + result = s.rank(method=method) + expected = Series(res).dropna() + expected.index = result.index + assert_series_equal(result, expected) + + def test_rank_object_bug(self): + # GH 13445 + + # smoke tests + Series([np.nan] * 32).astype(object).rank(ascending=True) + Series([np.nan] * 32).astype(object).rank(ascending=False) diff --git a/pandas/tests/test_stats.py b/pandas/tests/test_stats.py deleted file mode 100644 index 118c4147a2019..0000000000000 --- a/pandas/tests/test_stats.py +++ /dev/null @@ -1,185 +0,0 @@ -# -*- coding: utf-8 -*- -from pandas import compat - -from distutils.version import LooseVersion -from numpy import nan -import numpy as np - -from pandas import Series, DataFrame - -from pandas.compat import product -from pandas.util.testing import (assert_frame_equal, assert_series_equal) -import pandas.util.testing as tm - - -class TestRank(tm.TestCase): - s = Series([1, 3, 4, 2, nan, 2, 1, 5, nan, 3]) - df = DataFrame({'A': s, 'B': s}) - - results = { - 'average': np.array([1.5, 5.5, 7.0, 3.5, nan, - 3.5, 1.5, 8.0, nan, 5.5]), - 'min': np.array([1, 5, 7, 3, nan, 3, 1, 8, nan, 5]), - 'max': np.array([2, 6, 7, 4, nan, 4, 2, 8, nan, 6]), - 'first': np.array([1, 5, 7, 3, nan, 4, 2, 8, nan, 6]), - 'dense': np.array([1, 3, 4, 2, nan, 2, 1, 5, nan, 3]), - } - - def test_rank_tie_methods(self): - s = self.s - - def _check(s, expected, method='average'): - result = s.rank(method=method) - tm.assert_series_equal(result, Series(expected)) - - dtypes = [None, object] - disabled = set([(object, 'first')]) - results = self.results - - for method, dtype in product(results, dtypes): - if (dtype, method) in disabled: - continue - series = s if dtype is None else s.astype(dtype) - _check(series, results[method], method=method) - - def test_rank_methods_series(self): - tm.skip_if_no_package('scipy', '0.13', 'scipy.stats.rankdata') - import scipy - from scipy.stats import rankdata - - xs = np.random.randn(9) - xs = np.concatenate([xs[i:] for i in range(0, 9, 2)]) # add duplicates - np.random.shuffle(xs) - - index = [chr(ord('a') + i) for i in range(len(xs))] - - for vals in [xs, xs + 1e6, xs * 1e-6]: - ts = Series(vals, index=index) - - for m in ['average', 'min', 'max', 'first', 'dense']: - result = ts.rank(method=m) - sprank = rankdata(vals, m if m != 'first' else 'ordinal') - expected = Series(sprank, index=index) - - if LooseVersion(scipy.__version__) >= '0.17.0': - expected = expected.astype('float64') - tm.assert_series_equal(result, expected) - - def test_rank_methods_frame(self): - tm.skip_if_no_package('scipy', '0.13', 'scipy.stats.rankdata') - import scipy - from scipy.stats import rankdata - - xs = np.random.randint(0, 21, (100, 26)) - xs = (xs - 10.0) / 10.0 - cols = [chr(ord('z') - i) for i in range(xs.shape[1])] - - for vals in [xs, xs + 1e6, xs * 1e-6]: - df = DataFrame(vals, columns=cols) - - for ax in [0, 1]: - for m in ['average', 'min', 'max', 'first', 'dense']: - result = df.rank(axis=ax, method=m) - sprank = np.apply_along_axis( - rankdata, ax, vals, - m if m != 'first' else 'ordinal') - sprank = sprank.astype(np.float64) - expected = DataFrame(sprank, columns=cols) - - if LooseVersion(scipy.__version__) >= '0.17.0': - expected = expected.astype('float64') - tm.assert_frame_equal(result, expected) - - def test_rank_dense_method(self): - dtypes = ['O', 'f8', 'i8'] - in_out = [([1], [1]), - ([2], [1]), - ([0], [1]), - ([2, 2], [1, 1]), - ([1, 2, 3], [1, 2, 3]), - ([4, 2, 1], [3, 2, 1],), - ([1, 1, 5, 5, 3], [1, 1, 3, 3, 2]), - ([-5, -4, -3, -2, -1], [1, 2, 3, 4, 5])] - - for ser, exp in in_out: - for dtype in dtypes: - s = Series(ser).astype(dtype) - result = s.rank(method='dense') - expected = Series(exp).astype(result.dtype) - assert_series_equal(result, expected) - - def test_rank_descending(self): - dtypes = ['O', 'f8', 'i8'] - - for dtype, method in product(dtypes, self.results): - if 'i' in dtype: - s = self.s.dropna() - df = self.df.dropna() - else: - s = self.s.astype(dtype) - df = self.df.astype(dtype) - - res = s.rank(ascending=False) - expected = (s.max() - s).rank() - assert_series_equal(res, expected) - - res = df.rank(ascending=False) - expected = (df.max() - df).rank() - assert_frame_equal(res, expected) - - if method == 'first' and dtype == 'O': - continue - - expected = (s.max() - s).rank(method=method) - res2 = s.rank(method=method, ascending=False) - assert_series_equal(res2, expected) - - expected = (df.max() - df).rank(method=method) - - if dtype != 'O': - res2 = df.rank(method=method, ascending=False, - numeric_only=True) - assert_frame_equal(res2, expected) - - res3 = df.rank(method=method, ascending=False, - numeric_only=False) - assert_frame_equal(res3, expected) - - def test_rank_2d_tie_methods(self): - df = self.df - - def _check2d(df, expected, method='average', axis=0): - exp_df = DataFrame({'A': expected, 'B': expected}) - - if axis == 1: - df = df.T - exp_df = exp_df.T - - result = df.rank(method=method, axis=axis) - assert_frame_equal(result, exp_df) - - dtypes = [None, object] - disabled = set([(object, 'first')]) - results = self.results - - for method, axis, dtype in product(results, [0, 1], dtypes): - if (dtype, method) in disabled: - continue - frame = df if dtype is None else df.astype(dtype) - _check2d(frame, results[method], method=method, axis=axis) - - def test_rank_int(self): - s = self.s.dropna().astype('i8') - - for method, res in compat.iteritems(self.results): - result = s.rank(method=method) - expected = Series(res).dropna() - expected.index = result.index - assert_series_equal(result, expected) - - def test_rank_object_bug(self): - # GH 13445 - - # smoke tests - Series([np.nan] * 32).astype(object).rank(ascending=True) - Series([np.nan] * 32).astype(object).rank(ascending=False) From a212738f07e89a3a6f5905399c3531090d471021 Mon Sep 17 00:00:00 2001 From: "Adam J. Stewart" Date: Sun, 12 Mar 2017 15:55:32 -0500 Subject: [PATCH 195/353] DOC: fix typo in timeseries documentation (#15666) --- doc/source/timeseries.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index e09d240ed91b7..c0c178ad2fb49 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -610,7 +610,7 @@ There are several time/date properties that one can access from ``Timestamp`` or dayofweek,"The numer of the day of the week with Monday=0, Sunday=6" weekday,"The number of the day of the week with Monday=0, Sunday=6" weekday_name,"The name of the day in a week (ex: Friday)" - quarter,"Quarter of the date: Jan=Mar = 1, Apr-Jun = 2, etc." + quarter,"Quarter of the date: Jan-Mar = 1, Apr-Jun = 2, etc." days_in_month,"The number of days in the month of the datetime" is_month_start,"Logical indicating if first day of month (defined by frequency)" is_month_end,"Logical indicating if last day of month (defined by frequency)" From 7c5ebd50bbf5b659c6b40205bea5b42dbc892699 Mon Sep 17 00:00:00 2001 From: "Adam J. Stewart" Date: Sun, 12 Mar 2017 17:40:05 -0500 Subject: [PATCH 196/353] Fix another typo in the timeseries documentation (#15667) --- doc/source/timeseries.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index c0c178ad2fb49..7136b15a7633a 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -607,7 +607,7 @@ There are several time/date properties that one can access from ``Timestamp`` or dayofyear,"The ordinal day of year" weekofyear,"The week ordinal of the year" week,"The week ordinal of the year" - dayofweek,"The numer of the day of the week with Monday=0, Sunday=6" + dayofweek,"The number of the day of the week with Monday=0, Sunday=6" weekday,"The number of the day of the week with Monday=0, Sunday=6" weekday_name,"The name of the day in a week (ex: Friday)" quarter,"Quarter of the date: Jan-Mar = 1, Apr-Jun = 2, etc." From 35109568489401dd2172fb76fd38c1c212355227 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 12 Mar 2017 14:01:40 -0400 Subject: [PATCH 197/353] BLD: make 3.6 use *only* conda-forge channels --- .travis.yml | 1 + ci/install_travis.sh | 7 +++++-- ci/requirements-3.6.run | 1 + ci/requirements-3.6.sh | 7 ------- 4 files changed, 7 insertions(+), 9 deletions(-) delete mode 100644 ci/requirements-3.6.sh diff --git a/.travis.yml b/.travis.yml index 97bf881f3b6fc..b0331941e2a1e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -86,6 +86,7 @@ matrix: - JOB_NAME: "36" - TEST_ARGS="--skip-slow --skip-network" - PANDAS_TESTING_MODE="deprecate" + - CONDA_FORGE=true addons: apt: packages: diff --git a/ci/install_travis.sh b/ci/install_travis.sh index b337f6e443be2..12202b4ceee70 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -53,14 +53,17 @@ conda config --set ssl_verify false || exit 1 conda config --set always_yes true --set changeps1 false || exit 1 conda update -q conda +echo "[add channels]" # add the pandas channel to take priority # to add extra packages -echo "[add channels]" conda config --add channels pandas || exit 1 conda config --remove channels defaults || exit 1 conda config --add channels defaults || exit 1 -conda install anaconda-client +if [ "$CONDA_FORGE" ]; then + # add conda-forge channel as priority + conda config --add channels conda-forge || exit 1 +fi # Useful for debugging any issues with conda conda info -a || exit 1 diff --git a/ci/requirements-3.6.run b/ci/requirements-3.6.run index 9a6c1c7edbc5e..41c9680ce1b7e 100644 --- a/ci/requirements-3.6.run +++ b/ci/requirements-3.6.run @@ -14,6 +14,7 @@ html5lib jinja2 sqlalchemy pymysql +feather-format # psycopg2 (not avail on defaults ATM) beautifulsoup4 s3fs diff --git a/ci/requirements-3.6.sh b/ci/requirements-3.6.sh deleted file mode 100644 index 7d88ede751ec8..0000000000000 --- a/ci/requirements-3.6.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash - -source activate pandas - -echo "install 36" - -conda install -n pandas -c conda-forge feather-format From 56b5a30937e79335029b1727a80f109b7eb0840a Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 12 Mar 2017 19:27:30 -0400 Subject: [PATCH 198/353] TST: skip scipy tests for >= 0.19.0 as needed in interpolation / window / sparse closes #15668 --- pandas/tests/frame/test_missing.py | 3 +- pandas/tests/frame/test_rank.py | 3 +- pandas/tests/series/test_missing.py | 7 +++-- pandas/tests/series/test_rank.py | 3 +- pandas/tests/sparse/test_frame.py | 49 +++++++++++++++++++++++++++-- pandas/tests/test_nanops.py | 14 +++------ pandas/tests/test_window.py | 10 +++--- pandas/util/testing.py | 28 ++++++++--------- 8 files changed, 80 insertions(+), 37 deletions(-) diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py index 80ea01d3a05aa..923ed2e7c3444 100644 --- a/pandas/tests/frame/test_missing.py +++ b/pandas/tests/frame/test_missing.py @@ -548,7 +548,8 @@ def test_interp_nan_idx(self): df.interpolate(method='values') def test_interp_various(self): - tm._skip_if_no_scipy() + tm.skip_if_no_package('scipy', max_version='0.19.0') + df = DataFrame({'A': [1, 2, np.nan, 4, 5, np.nan, 7], 'C': [1, 2, 3, 5, 8, 13, 21]}) df = df.set_index('C') diff --git a/pandas/tests/frame/test_rank.py b/pandas/tests/frame/test_rank.py index 151a89888c329..b115218d76958 100644 --- a/pandas/tests/frame/test_rank.py +++ b/pandas/tests/frame/test_rank.py @@ -193,7 +193,8 @@ def test_rank_axis(self): tm.assert_frame_equal(df.rank(axis=1), df.rank(axis='columns')) def test_rank_methods_frame(self): - tm.skip_if_no_package('scipy', '0.13', 'scipy.stats.rankdata') + tm.skip_if_no_package('scipy', min_version='0.13', + app='scipy.stats.rankdata') import scipy from scipy.stats import rankdata diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 87cfcf32229b4..9e997da517bf6 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -827,7 +827,8 @@ def test_interp_quad(self): assert_series_equal(result, expected) def test_interp_scipy_basic(self): - tm._skip_if_no_scipy() + tm.skip_if_no_package('scipy', max_version='0.19.0') + s = Series([1, 3, np.nan, 12, np.nan, 25]) # slinear expected = Series([1., 3., 7.5, 12., 18.5, 25.]) @@ -1027,8 +1028,8 @@ def test_spline(self): def test_spline_extrapolate(self): tm.skip_if_no_package( - 'scipy', '0.15', - 'setting ext on scipy.interpolate.UnivariateSpline') + 'scipy', min_version='0.15', + app='setting ext on scipy.interpolate.UnivariateSpline') s = Series([1, 2, 3, 4, np.nan, 6, np.nan]) result3 = s.interpolate(method='spline', order=1, ext=3) expected3 = Series([1., 2., 3., 4., 5., 6., 6.]) diff --git a/pandas/tests/series/test_rank.py b/pandas/tests/series/test_rank.py index 99257b343310f..f47eae3adc3ae 100644 --- a/pandas/tests/series/test_rank.py +++ b/pandas/tests/series/test_rank.py @@ -246,7 +246,8 @@ def _check(s, expected, method='average'): _check(series, results[method], method=method) def test_rank_methods_series(self): - tm.skip_if_no_package('scipy', '0.13', 'scipy.stats.rankdata') + tm.skip_if_no_package('scipy', min_version='0.13', + app='scipy.stats.rankdata') import scipy from scipy.stats import rankdata diff --git a/pandas/tests/sparse/test_frame.py b/pandas/tests/sparse/test_frame.py index 4cd5a643ce4be..c0c678c184ee8 100644 --- a/pandas/tests/sparse/test_frame.py +++ b/pandas/tests/sparse/test_frame.py @@ -1129,10 +1129,10 @@ def test_isnotnull(self): @pytest.mark.parametrize('index', [None, list('ab')]) # noqa: F811 @pytest.mark.parametrize('columns', [None, list('cd')]) @pytest.mark.parametrize('fill_value', [None, 0, np.nan]) -@pytest.mark.parametrize('dtype', [object, bool, int, float, np.uint16]) +@pytest.mark.parametrize('dtype', [bool, int, float, np.uint16]) def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype): # GH 4343 - tm._skip_if_no_scipy() + tm.skip_if_no_package('scipy') # Make one ndarray and from it one sparse matrix, both to be used for # constructing frames and comparing results @@ -1180,6 +1180,51 @@ def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype): tm.assert_equal(sdf.to_coo().dtype, np.object_) +@pytest.mark.parametrize('fill_value', [None, 0, np.nan]) # noqa: F811 +def test_from_to_scipy_object(spmatrix, fill_value): + # GH 4343 + dtype = object + columns = list('cd') + index = list('ab') + tm.skip_if_no_package('scipy', max_version='0.19.0') + + # Make one ndarray and from it one sparse matrix, both to be used for + # constructing frames and comparing results + arr = np.eye(2, dtype=dtype) + try: + spm = spmatrix(arr) + assert spm.dtype == arr.dtype + except (TypeError, AssertionError): + # If conversion to sparse fails for this spmatrix type and arr.dtype, + # then the combination is not currently supported in NumPy, so we + # can just skip testing it thoroughly + return + + sdf = pd.SparseDataFrame(spm, index=index, columns=columns, + default_fill_value=fill_value) + + # Expected result construction is kind of tricky for all + # dtype-fill_value combinations; easiest to cast to something generic + # and except later on + rarr = arr.astype(object) + rarr[arr == 0] = np.nan + expected = pd.SparseDataFrame(rarr, index=index, columns=columns).fillna( + fill_value if fill_value is not None else np.nan) + + # Assert frame is as expected + sdf_obj = sdf.astype(object) + tm.assert_sp_frame_equal(sdf_obj, expected) + tm.assert_frame_equal(sdf_obj.to_dense(), expected.to_dense()) + + # Assert spmatrices equal + tm.assert_equal(dict(sdf.to_coo().todok()), dict(spm.todok())) + + # Ensure dtype is preserved if possible + res_dtype = object + tm.assert_contains_all(sdf.dtypes, {np.dtype(res_dtype)}) + tm.assert_equal(sdf.to_coo().dtype, res_dtype) + + class TestSparseDataFrameArithmetic(tm.TestCase): def test_numeric_op_scalar(self): diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index 937c20d009b6b..75a7555d58ca5 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -5,7 +5,7 @@ import warnings import numpy as np -from pandas import Series, isnull +from pandas import Series, isnull, _np_version_under1p9 from pandas.types.common import is_integer_dtype import pandas.core.nanops as nanops import pandas.util.testing as tm @@ -338,8 +338,7 @@ def test_nanmean_overflow(self): # is now consistent with numpy # numpy < 1.9.0 is not computing this correctly - from distutils.version import LooseVersion - if LooseVersion(np.__version__) >= '1.9.0': + if not _np_version_under1p9: for a in [2 ** 55, -2 ** 55, 20150515061816532]: s = Series(a, index=range(500), dtype=np.int64) result = s.mean() @@ -388,8 +387,7 @@ def test_nanstd(self): allow_tdelta=True, allow_obj='convert') def test_nansem(self): - tm.skip_if_no_package('scipy.stats') - tm._skip_if_scipy_0_17() + tm.skip_if_no_package('scipy', min_version='0.17.0') from scipy.stats import sem self.check_funs_ddof(nanops.nansem, sem, allow_complex=False, allow_str=False, allow_date=False, @@ -448,16 +446,14 @@ def _skew_kurt_wrap(self, values, axis=None, func=None): return result def test_nanskew(self): - tm.skip_if_no_package('scipy.stats') - tm._skip_if_scipy_0_17() + tm.skip_if_no_package('scipy', min_version='0.17.0') from scipy.stats import skew func = partial(self._skew_kurt_wrap, func=skew) self.check_funs(nanops.nanskew, func, allow_complex=False, allow_str=False, allow_date=False, allow_tdelta=False) def test_nankurt(self): - tm.skip_if_no_package('scipy.stats') - tm._skip_if_scipy_0_17() + tm.skip_if_no_package('scipy', min_version='0.17.0') from scipy.stats import kurtosis func1 = partial(kurtosis, fisher=True) func = partial(self._skew_kurt_wrap, func=func1) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 3f2973a9834ca..b7164d31b2a5e 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -905,7 +905,7 @@ def test_cmov_window_na_min_periods(self): def test_cmov_window_regular(self): # GH 8238 - tm._skip_if_no_scipy() + tm.skip_if_no_package('scipy', max_version='0.19.0') win_types = ['triang', 'blackman', 'hamming', 'bartlett', 'bohman', 'blackmanharris', 'nuttall', 'barthann'] @@ -938,7 +938,7 @@ def test_cmov_window_regular(self): def test_cmov_window_regular_linear_range(self): # GH 8238 - tm._skip_if_no_scipy() + tm.skip_if_no_package('scipy', max_version='0.19.0') win_types = ['triang', 'blackman', 'hamming', 'bartlett', 'bohman', 'blackmanharris', 'nuttall', 'barthann'] @@ -955,7 +955,7 @@ def test_cmov_window_regular_linear_range(self): def test_cmov_window_regular_missing_data(self): # GH 8238 - tm._skip_if_no_scipy() + tm.skip_if_no_package('scipy', max_version='0.19.0') win_types = ['triang', 'blackman', 'hamming', 'bartlett', 'bohman', 'blackmanharris', 'nuttall', 'barthann'] @@ -988,7 +988,7 @@ def test_cmov_window_regular_missing_data(self): def test_cmov_window_special(self): # GH 8238 - tm._skip_if_no_scipy() + tm.skip_if_no_package('scipy', max_version='0.19.0') win_types = ['kaiser', 'gaussian', 'general_gaussian', 'slepian'] kwds = [{'beta': 1.}, {'std': 1.}, {'power': 2., @@ -1015,7 +1015,7 @@ def test_cmov_window_special(self): def test_cmov_window_special_linear_range(self): # GH 8238 - tm._skip_if_no_scipy() + tm.skip_if_no_package('scipy', max_version='0.19.0') win_types = ['kaiser', 'gaussian', 'general_gaussian', 'slepian'] kwds = [{'beta': 1.}, {'std': 1.}, {'power': 2., diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 74ff480a9c198..529ecef3e2d6a 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -304,14 +304,6 @@ def _skip_if_no_scipy(): pytest.skip('scipy.sparse missing') -def _skip_if_scipy_0_17(): - import scipy - v = scipy.__version__ - if v >= LooseVersion("0.17.0"): - import pytest - pytest.skip("scipy 0.17") - - def _check_if_lzma(): try: return compat.import_lzma() @@ -2020,15 +2012,18 @@ def __init__(self, *args, **kwargs): # Dependency checks. Copied this from Nipy/Nipype (Copyright of # respective developers, license: BSD-3) -def package_check(pkg_name, version=None, app='pandas', checker=LooseVersion): - """Check that the minimal version of the required package is installed. +def package_check(pkg_name, min_version=None, max_version=None, app='pandas', + checker=LooseVersion): + """Check that the min/max version of the required package is installed. Parameters ---------- pkg_name : string Name of the required package. - version : string, optional + min_version : string, optional Minimal version number for required package. + max_version : string, optional + Max version number for required package. app : string, optional Application that is performing the check. For instance, the name of the tutorial being executed that depends on specific @@ -2040,7 +2035,6 @@ def package_check(pkg_name, version=None, app='pandas', checker=LooseVersion): Examples -------- package_check('numpy', '1.3') - package_check('networkx', '1.0', 'tutorial1') """ @@ -2049,8 +2043,10 @@ def package_check(pkg_name, version=None, app='pandas', checker=LooseVersion): msg = '%s requires %s' % (app, pkg_name) else: msg = 'module requires %s' % pkg_name - if version: - msg += ' with version >= %s' % (version,) + if min_version: + msg += ' with version >= %s' % (min_version,) + if max_version: + msg += ' with version < %s' % (max_version,) try: mod = __import__(pkg_name) except ImportError: @@ -2059,7 +2055,9 @@ def package_check(pkg_name, version=None, app='pandas', checker=LooseVersion): have_version = mod.__version__ except AttributeError: pytest.skip('Cannot find version for %s' % pkg_name) - if version and checker(have_version) < checker(version): + if min_version and checker(have_version) < checker(min_version): + pytest.skip(msg) + if max_version and checker(have_version) >= checker(max_version): pytest.skip(msg) From 7d04391dd3240c2d7cc80d638a39ad06b1ab679a Mon Sep 17 00:00:00 2001 From: mattip Date: Sun, 12 Mar 2017 20:54:52 -0400 Subject: [PATCH 199/353] COMPAT: free parser memory at close() for non-refcnt gc relying on __dealloc__ to clean up malloc() ed memory can lead to a perceived "leak" on PyPy since the garbage collector will not necessarily collect the object as soon as its refcnt reaches 0. Instead, pre-emptively release memory when close() is called The code still maintains backward compatibility for the case where close() is never called Author: mattip Closes #15665 from mattip/pypy-compat and squashes the following commits: eaf50fe [mattip] COMPAT: free parser memory at close() for non-refcnt gc --- pandas/_libs/src/parser/tokenizer.c | 4 ++++ pandas/_libs/src/parser/tokenizer.h | 2 ++ pandas/io/parsers.pyx | 18 ++++++++++++++++-- 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/src/parser/tokenizer.c b/pandas/_libs/src/parser/tokenizer.c index 916f06d357473..6b0775e54da0c 100644 --- a/pandas/_libs/src/parser/tokenizer.c +++ b/pandas/_libs/src/parser/tokenizer.c @@ -162,6 +162,7 @@ int parser_cleanup(parser_t *self) { if (self->cb_cleanup(self->source) < 0) { status = -1; } + self->cb_cleanup = NULL; } return status; @@ -239,6 +240,9 @@ int parser_init(parser_t *self) { void parser_free(parser_t *self) { // opposite of parser_init parser_cleanup(self); +} + +void parser_del(parser_t *self) { free(self); } diff --git a/pandas/_libs/src/parser/tokenizer.h b/pandas/_libs/src/parser/tokenizer.h index 9853b5149bee3..b4344e8a6c070 100644 --- a/pandas/_libs/src/parser/tokenizer.h +++ b/pandas/_libs/src/parser/tokenizer.h @@ -243,6 +243,8 @@ int parser_set_skipfirstnrows(parser_t *self, int64_t nrows); void parser_free(parser_t *self); +void parser_del(parser_t *self); + void parser_set_default_options(parser_t *self); void debug_print_parser(parser_t *self); diff --git a/pandas/io/parsers.pyx b/pandas/io/parsers.pyx index a5858accbb6f5..3728cda559050 100644 --- a/pandas/io/parsers.pyx +++ b/pandas/io/parsers.pyx @@ -214,6 +214,7 @@ cdef extern from "parser/tokenizer.h": int parser_init(parser_t *self) nogil void parser_free(parser_t *self) nogil + void parser_del(parser_t *self) nogil int parser_add_skiprow(parser_t *self, int64_t row) int parser_set_skipfirstnrows(parser_t *self, int64_t nrows) @@ -573,8 +574,13 @@ cdef class TextReader: def __dealloc__(self): parser_free(self.parser) - kh_destroy_str(self.true_set) - kh_destroy_str(self.false_set) + if self.true_set: + kh_destroy_str(self.true_set) + self.true_set = NULL + if self.false_set: + kh_destroy_str(self.false_set) + self.false_set = NULL + parser_del(self.parser) def close(self): # we need to properly close an open derived @@ -584,6 +590,14 @@ cdef class TextReader: self.handle.close() except: pass + # also preemptively free all allocated memory + parser_free(self.parser) + if self.true_set: + kh_destroy_str(self.true_set) + self.true_set = NULL + if self.false_set: + kh_destroy_str(self.false_set) + self.false_set = NULL def set_error_bad_lines(self, int status): self.parser.error_bad_lines = status From 97c065ebbba0760685343b16e1759bf77f0f9ce0 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 13 Mar 2017 10:08:01 -0400 Subject: [PATCH 200/353] DOC: typo in merge.rst --- doc/source/categorical.rst | 1 + doc/source/merging.rst | 10 +++++----- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/doc/source/categorical.rst b/doc/source/categorical.rst index 6d85e1a6560b0..2203737ecd7b5 100644 --- a/doc/source/categorical.rst +++ b/doc/source/categorical.rst @@ -617,6 +617,7 @@ Assigning a `Categorical` to parts of a column of other types will use the value df df.dtypes +.. _categorical.merge: Merging ~~~~~~~ diff --git a/doc/source/merging.rst b/doc/source/merging.rst index 70d2ce5b1a664..0b7f9f18190a4 100644 --- a/doc/source/merging.rst +++ b/doc/source/merging.rst @@ -779,7 +779,7 @@ resulting dtype will be upcast. .. versionadded:: 0.20.0 -Merging will preserve ``category`` dtypes of the mergands. +Merging will preserve ``category`` dtypes of the mergands. See also the section on :ref:`categoricals ` The left frame. @@ -788,8 +788,8 @@ The left frame. X = pd.Series(np.random.choice(['foo', 'bar'], size=(10,))) X = X.astype('category', categories=['foo', 'bar']) - left = DataFrame({'X': X, - 'Y': np.random.choice(['one', 'two', 'three'], size=(10,))}) + left = pd.DataFrame({'X': X, + 'Y': np.random.choice(['one', 'two', 'three'], size=(10,))}) left left.dtypes @@ -797,8 +797,8 @@ The right frame. .. ipython:: python - right = DataFrame({'X': Series(['foo', 'bar']).astype('category', categories=['foo', 'bar']), - 'Z': [1, 2]}) + right = pd.DataFrame({'X': Series(['foo', 'bar']).astype('category', categories=['foo', 'bar']), + 'Z': [1, 2]}) right right.dtypes From 03dca9610b6ad91538c1cd1da71fb5196d7bb3f7 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 13 Mar 2017 15:47:49 +0100 Subject: [PATCH 201/353] DOC: correct whatsnew note of #15515 --- doc/source/whatsnew/v0.20.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index f6d5e3df814fc..8a4f2f47b9853 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -880,7 +880,7 @@ Bug Fixes - Bug in ``.read_csv()`` with ``parse_dates`` when multiline headers are specified (:issue:`15376`) - Bug in ``groupby.transform()`` that would coerce the resultant dtypes back to the original (:issue:`10972`, :issue:`11444`) -- Bug in ``DataFrame.hist`` where ``plt.tight_layout`` caused an ``AttributeError`` (use ``matplotlib >= 0.2.0``) (:issue:`9351`) +- Bug in ``DataFrame.hist`` where ``plt.tight_layout`` caused an ``AttributeError`` (use ``matplotlib >= 2.0.1``) (:issue:`9351`) - Bug in ``DataFrame.boxplot`` where ``fontsize`` was not applied to the tick labels on both axes (:issue:`15108`) - Bug in ``Series.replace`` and ``DataFrame.replace`` which failed on empty replacement dicts (:issue:`15289`) - Bug in ``pd.melt()`` where passing a tuple value for ``value_vars`` caused a ``TypeError`` (:issue:`15348`) From 32df1e6ae452f7ddd31dc41fa613992493eb51c4 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 13 Mar 2017 11:10:38 -0400 Subject: [PATCH 202/353] DOC: typo in merge.rst --- doc/source/merging.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/merging.rst b/doc/source/merging.rst index 0b7f9f18190a4..fb020727d077e 100644 --- a/doc/source/merging.rst +++ b/doc/source/merging.rst @@ -797,7 +797,7 @@ The right frame. .. ipython:: python - right = pd.DataFrame({'X': Series(['foo', 'bar']).astype('category', categories=['foo', 'bar']), + right = pd.DataFrame({'X': pd.Series(['foo', 'bar']).astype('category', categories=['foo', 'bar']), 'Z': [1, 2]}) right right.dtypes From 998c801f76256990b98d3f0d2ad885ae27c955a1 Mon Sep 17 00:00:00 2001 From: Aleksey Bilogur Date: Mon, 13 Mar 2017 19:04:39 -0400 Subject: [PATCH 203/353] TST: fix errant tight_layout test (#15671) closes #9351 --- pandas/tests/plotting/common.py | 1 + pandas/tests/plotting/test_hist_method.py | 4 ++-- pandas/tools/plotting.py | 8 ++++++++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/pandas/tests/plotting/common.py b/pandas/tests/plotting/common.py index 92e2dc7b5d934..c31d8b539ae6f 100644 --- a/pandas/tests/plotting/common.py +++ b/pandas/tests/plotting/common.py @@ -53,6 +53,7 @@ def setUp(self): self.mpl_ge_1_4_0 = plotting._mpl_ge_1_4_0() self.mpl_ge_1_5_0 = plotting._mpl_ge_1_5_0() self.mpl_ge_2_0_0 = plotting._mpl_ge_2_0_0() + self.mpl_ge_2_0_1 = plotting._mpl_ge_2_0_1() if self.mpl_ge_1_4_0: self.bp_n_objects = 7 diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py index 22de7055e3cea..380bdc12abce4 100644 --- a/pandas/tests/plotting/test_hist_method.py +++ b/pandas/tests/plotting/test_hist_method.py @@ -241,8 +241,8 @@ def test_hist_layout(self): @slow # GH 9351 def test_tight_layout(self): - if self.mpl_ge_2_0_0: - df = DataFrame(randn(100, 2)) + if self.mpl_ge_2_0_1: + df = DataFrame(randn(100, 3)) _check_plot_works(df.hist) self.plt.tight_layout() diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index d46c38c117445..d311b0e6d83eb 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -150,6 +150,14 @@ def _mpl_ge_2_0_0(): return False +def _mpl_ge_2_0_1(): + try: + import matplotlib + return matplotlib.__version__ >= LooseVersion('2.0.1') + except ImportError: + return False + + if _mpl_ge_1_5_0(): # Compat with mp 1.5, which uses cycler. import cycler From 05d70f4e617a274813bdb02db69143b5554aa106 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 13 Mar 2017 19:49:42 -0400 Subject: [PATCH 204/353] DOC: use shared docs on Index._convert_list_indexer (#15678) CLN: push key coercion to the indexes themselves to simplify a bit --- pandas/core/indexing.py | 86 ++++++++++---------------------------- pandas/indexes/base.py | 37 ++++++++++++++++ pandas/indexes/category.py | 19 ++++++--- pandas/indexes/multi.py | 33 +++++++++++++++ pandas/indexes/numeric.py | 1 + 5 files changed, 106 insertions(+), 70 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 546cbd8337e7e..19b7771251da3 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -7,7 +7,6 @@ from pandas.types.generic import ABCDataFrame, ABCPanel, ABCSeries from pandas.types.common import (is_integer_dtype, is_integer, is_float, - is_categorical_dtype, is_list_like, is_sequence, is_iterator, @@ -1087,51 +1086,24 @@ def _getitem_iterable(self, key, axis=0): inds, = key.nonzero() return self.obj.take(inds, axis=axis, convert=False) else: - if isinstance(key, Index): - keyarr = labels._convert_index_indexer(key) - else: - keyarr = _asarray_tuplesafe(key) - keyarr = labels._convert_arr_indexer(keyarr) - - if is_categorical_dtype(labels): - keyarr = labels._shallow_copy(keyarr) - - # have the index handle the indexer and possibly return - # an indexer or raising - indexer = labels._convert_list_indexer(keyarr, kind=self.name) + # Have the index compute an indexer or return None + # if it cannot handle + indexer, keyarr = labels._convert_listlike_indexer( + key, kind=self.name) if indexer is not None: return self.obj.take(indexer, axis=axis) - # this is not the most robust, but... - if (isinstance(labels, MultiIndex) and len(keyarr) and - not isinstance(keyarr[0], tuple)): - level = 0 - else: - level = None - # existing labels are unique and indexer are unique if labels.is_unique and Index(keyarr).is_unique: try: - result = self.obj.reindex_axis(keyarr, axis=axis, - level=level) - - # this is an error as we are trying to find - # keys in a multi-index that don't exist - if isinstance(labels, MultiIndex) and level is not None: - if (hasattr(result, 'ndim') and - not np.prod(result.shape) and len(keyarr)): - raise KeyError("cannot index a multi-index axis " - "with these keys") - - return result - + return self.obj.reindex_axis(keyarr, axis=axis) except AttributeError: # Series if axis != 0: raise AssertionError('axis must be 0') - return self.obj.reindex(keyarr, level=level) + return self.obj.reindex(keyarr) # existing labels are non-unique else: @@ -1225,49 +1197,33 @@ def _convert_to_indexer(self, obj, axis=0, is_setter=False): if is_nested_tuple(obj, labels): return labels.get_locs(obj) + elif is_list_like_indexer(obj): + if is_bool_indexer(obj): obj = check_bool_indexer(labels, obj) inds, = obj.nonzero() return inds else: - if isinstance(obj, Index): - # want Index objects to pass through untouched - objarr = obj - else: - objarr = _asarray_tuplesafe(obj) - # The index may want to handle a list indexer differently - # by returning an indexer or raising - indexer = labels._convert_list_indexer(objarr, kind=self.name) + # Have the index compute an indexer or return None + # if it cannot handle + indexer, objarr = labels._convert_listlike_indexer( + obj, kind=self.name) if indexer is not None: return indexer - # this is not the most robust, but... - if (isinstance(labels, MultiIndex) and - not isinstance(objarr[0], tuple)): - level = 0 - _, indexer = labels.reindex(objarr, level=level) + # unique index + if labels.is_unique: + indexer = check = labels.get_indexer(objarr) - # take all - if indexer is None: - indexer = np.arange(len(labels)) - - check = labels.levels[0].get_indexer(objarr) + # non-unique (dups) else: - level = None - - # unique index - if labels.is_unique: - indexer = check = labels.get_indexer(objarr) - - # non-unique (dups) - else: - (indexer, - missing) = labels.get_indexer_non_unique(objarr) - # 'indexer' has dupes, create 'check' using 'missing' - check = np.zeros_like(objarr) - check[missing] = -1 + (indexer, + missing) = labels.get_indexer_non_unique(objarr) + # 'indexer' has dupes, create 'check' using 'missing' + check = np.zeros_like(objarr) + check[missing] = -1 mask = check == -1 if mask.any(): diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index 7f46f437489a1..5b942e2565c29 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -1339,6 +1339,27 @@ def is_int(v): return indexer + def _convert_listlike_indexer(self, keyarr, kind=None): + """ + Parameters + ---------- + keyarr : list-like + Indexer to convert. + + Returns + ------- + tuple (indexer, keyarr) + indexer is an ndarray or None if cannot convert + keyarr are tuple-safe keys + """ + if isinstance(keyarr, Index): + keyarr = self._convert_index_indexer(keyarr) + else: + keyarr = self._convert_arr_indexer(keyarr) + + indexer = self._convert_list_indexer(keyarr, kind=kind) + return indexer, keyarr + _index_shared_docs['_convert_arr_indexer'] = """ Convert an array-like indexer to the appropriate dtype. @@ -1354,6 +1375,7 @@ def is_int(v): @Appender(_index_shared_docs['_convert_arr_indexer']) def _convert_arr_indexer(self, keyarr): + keyarr = _asarray_tuplesafe(keyarr) return keyarr _index_shared_docs['_convert_index_indexer'] = """ @@ -1373,6 +1395,21 @@ def _convert_arr_indexer(self, keyarr): def _convert_index_indexer(self, keyarr): return keyarr + _index_shared_docs['_convert_list_indexer'] = """ + Convert a list-like indexer to the appropriate dtype. + + Parameters + ---------- + keyarr : Index (or sub-class) + Indexer to convert. + kind : iloc, ix, loc, optional + + Returns + ------- + positional indexer or None + """ + + @Appender(_index_shared_docs['_convert_list_indexer']) def _convert_list_indexer(self, keyarr, kind=None): """ passed a key that is tuplesafe that is integer based diff --git a/pandas/indexes/category.py b/pandas/indexes/category.py index 3d8f76fc56b01..923dd4ec785c5 100644 --- a/pandas/indexes/category.py +++ b/pandas/indexes/category.py @@ -18,6 +18,8 @@ import pandas.core.base as base import pandas.core.missing as missing import pandas.indexes.base as ibase +from pandas.core.common import _asarray_tuplesafe + _index_doc_kwargs = dict(ibase._index_doc_kwargs) _index_doc_kwargs.update(dict(target_klass='CategoricalIndex')) @@ -458,12 +460,10 @@ def get_indexer_non_unique(self, target): codes = self.categories.get_indexer(target) return self._engine.get_indexer_non_unique(codes) + @Appender(_index_shared_docs['_convert_list_indexer']) def _convert_list_indexer(self, keyarr, kind=None): - """ - we are passed a list indexer. - Return our indexer or raise if all of the values are not included in - the categories - """ + # Return our indexer or raise if all of the values are not included in + # the categories codes = self.categories.get_indexer(keyarr) if (codes == -1).any(): raise KeyError("a list-indexer must only include values that are " @@ -471,6 +471,15 @@ def _convert_list_indexer(self, keyarr, kind=None): return None + @Appender(_index_shared_docs['_convert_arr_indexer']) + def _convert_arr_indexer(self, keyarr): + keyarr = _asarray_tuplesafe(keyarr) + return self._shallow_copy(keyarr) + + @Appender(_index_shared_docs['_convert_index_indexer']) + def _convert_index_indexer(self, keyarr): + return self._shallow_copy(keyarr) + @Appender(_index_shared_docs['take'] % _index_doc_kwargs) def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index bca1db83b6645..1c1609fed1dd1 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -1568,6 +1568,39 @@ def sortlevel(self, level=0, ascending=True, sort_remaining=True): return new_index, indexer + def _convert_listlike_indexer(self, keyarr, kind=None): + """ + Parameters + ---------- + keyarr : list-like + Indexer to convert. + + Returns + ------- + tuple (indexer, keyarr) + indexer is an ndarray or None if cannot convert + keyarr are tuple-safe keys + """ + indexer, keyarr = super(MultiIndex, self)._convert_listlike_indexer( + keyarr, kind=kind) + + # are we indexing a specific level + if indexer is None and len(keyarr) and not isinstance(keyarr[0], + tuple): + level = 0 + _, indexer = self.reindex(keyarr, level=level) + + # take all + if indexer is None: + indexer = np.arange(len(self)) + + check = self.levels[0].get_indexer(keyarr) + mask = check == -1 + if mask.any(): + raise KeyError('%s not in index' % keyarr[mask]) + + return indexer, keyarr + @Appender(_index_shared_docs['get_indexer'] % _index_doc_kwargs) def get_indexer(self, target, method=None, limit=None, tolerance=None): method = missing.clean_reindex_fill_method(method) diff --git a/pandas/indexes/numeric.py b/pandas/indexes/numeric.py index 9bb70feb2501f..2f897c81975c2 100644 --- a/pandas/indexes/numeric.py +++ b/pandas/indexes/numeric.py @@ -203,6 +203,7 @@ def _convert_arr_indexer(self, keyarr): # Cast the indexer to uint64 if possible so # that the values returned from indexing are # also uint64. + keyarr = _asarray_tuplesafe(keyarr) if is_integer_dtype(keyarr): return _asarray_tuplesafe(keyarr, dtype=np.uint64) return keyarr From 7d34d4d5c2d2c6c68b4124076571cfab9c3b4aee Mon Sep 17 00:00:00 2001 From: Jaehoon Hwang Date: Tue, 14 Mar 2017 08:28:05 -0400 Subject: [PATCH 205/353] BUG: upcasting on reshaping ops #13247 Original work done by @jennolsen84, in #13337 closes #13247 Author: Jaehoon Hwang Author: Jae Closes #15594 from jaehoonhwang/Bug13247 and squashes the following commits: 3cd1734 [Jaehoon Hwang] Pass the non-related tests in test_partial and test_reshape 1fa578b [Jaehoon Hwang] Applying request changes removing unnecessary test and renameing 6744636 [Jaehoon Hwang] Merge remote-tracking branch 'pandas-dev/master' into Bug13247 5bb72c7 [Jaehoon Hwang] Merge remote-tracking branch 'pandas-dev/master' into Bug13247 a1d5d40 [Jaehoon Hwang] Completed pytest 8122359 [Jaehoon Hwang] Merge remote-tracking branch 'pandas-dev/master' into Bug13247 0e52b74 [Jaehoon Hwang] Working: Except for pytest 8fec07c [Jaehoon Hwang] Fix: test_concat.py and internals.py 4f6c03e [Jaehoon Hwang] Fix: is_float_dtypes and is_numeric_dtype wrong place d3476c0 [Jaehoon Hwang] Merge branch 'master' into Bug13247 b977615 [Jaehoon Hwang] Merge remote-tracking branch 'pandas-dev/master' 4b1e5c6 [Jaehoon Hwang] Merge remote-tracking branch 'pandas-dev/master' into Bug13247 45f7ae9 [Jaehoon Hwang] Added pytest function 468baee [Jae] BUG: upcasting on reshaping ops #13247 --- doc/source/whatsnew/v0.20.0.txt | 2 ++ pandas/core/internals.py | 20 ++++++++++++++++---- pandas/tests/indexing/test_partial.py | 2 +- pandas/tests/test_internals.py | 2 +- pandas/tests/test_reshape.py | 1 + pandas/tests/tools/test_concat.py | 14 ++++++++++++++ 6 files changed, 35 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 8a4f2f47b9853..097efdd097eec 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -886,3 +886,5 @@ Bug Fixes - Bug in ``pd.melt()`` where passing a tuple value for ``value_vars`` caused a ``TypeError`` (:issue:`15348`) - Bug in ``.eval()`` which caused multiline evals to fail with local variables not on the first line (:issue:`15342`) - Bug in ``pd.read_msgpack`` which did not allow to load dataframe with an index of type ``CategoricalIndex`` (:issue:`15487`) + +- Concating multiple objects will no longer result in automatically upcast to `float64`, and instead try to find the smallest `dtype` that would suffice (:issue:`13247`) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index aa954fbee9a60..1c070b3ed34a9 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -21,6 +21,7 @@ is_datetime64tz_dtype, is_object_dtype, is_datetimelike_v_numeric, + is_float_dtype, is_numeric_dtype, is_numeric_v_string_like, is_extension_type, is_list_like, is_re, @@ -4522,6 +4523,8 @@ def _interleaved_dtype(blocks): return np.dtype('int%s' % (lcd.itemsize * 8 * 2)) return lcd + elif have_int and have_float and not have_complex: + return np.dtype('float64') elif have_complex: return np.dtype('c16') else: @@ -4891,6 +4894,8 @@ def get_empty_dtype_and_na(join_units): upcast_cls = 'datetime' elif is_timedelta64_dtype(dtype): upcast_cls = 'timedelta' + elif is_float_dtype(dtype) or is_numeric_dtype(dtype): + upcast_cls = dtype.name else: upcast_cls = 'float' @@ -4915,8 +4920,6 @@ def get_empty_dtype_and_na(join_units): return np.dtype(np.bool_), None elif 'category' in upcast_classes: return np.dtype(np.object_), np.nan - elif 'float' in upcast_classes: - return np.dtype(np.float64), np.nan elif 'datetimetz' in upcast_classes: dtype = upcast_classes['datetimetz'] return dtype[0], tslib.iNaT @@ -4925,7 +4928,17 @@ def get_empty_dtype_and_na(join_units): elif 'timedelta' in upcast_classes: return np.dtype('m8[ns]'), tslib.iNaT else: # pragma - raise AssertionError("invalid dtype determination in get_concat_dtype") + g = np.find_common_type(upcast_classes, []) + if is_float_dtype(g): + return g, g.type(np.nan) + elif is_numeric_dtype(g): + if has_none_blocks: + return np.float64, np.nan + else: + return g, None + else: + msg = "invalid dtype determination in get_concat_dtype" + raise AssertionError(msg) def concatenate_join_units(join_units, concat_axis, copy): @@ -5190,7 +5203,6 @@ def is_null(self): return True def get_reindexed_values(self, empty_dtype, upcasted_na): - if upcasted_na is None: # No upcasting is necessary fill_value = self.block.fill_value diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index a00f880ff6591..b92ffbfb6fe59 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -210,7 +210,7 @@ def f(): df.loc[3] = [6, 7] exp = DataFrame([[6, 7]], index=[3], columns=['A', 'B'], - dtype='float64') + dtype='object') tm.assert_frame_equal(df, exp) def test_series_partial_set(self): diff --git a/pandas/tests/test_internals.py b/pandas/tests/test_internals.py index 5ab2bbc4ac6ba..df5e843097514 100644 --- a/pandas/tests/test_internals.py +++ b/pandas/tests/test_internals.py @@ -651,7 +651,7 @@ def test_interleave(self): mgr = create_mgr('a: f8; b: i8') self.assertEqual(mgr.as_matrix().dtype, 'f8') mgr = create_mgr('a: f4; b: i8') - self.assertEqual(mgr.as_matrix().dtype, 'f4') + self.assertEqual(mgr.as_matrix().dtype, 'f8') mgr = create_mgr('a: f4; b: i8; d: object') self.assertEqual(mgr.as_matrix().dtype, 'object') mgr = create_mgr('a: bool; b: i8') diff --git a/pandas/tests/test_reshape.py b/pandas/tests/test_reshape.py index d587e4ea6a1fa..24e26be15a44b 100644 --- a/pandas/tests/test_reshape.py +++ b/pandas/tests/test_reshape.py @@ -250,6 +250,7 @@ def test_basic_types(self): self.assertEqual(type(r), exp_df_type) r = get_dummies(s_df, sparse=self.sparse, columns=['a']) + exp_blk_type = pd.core.internals.IntBlock self.assertEqual(type(r[['a_0']]._data.blocks[0]), exp_blk_type) self.assertEqual(type(r[['a_1']]._data.blocks[0]), exp_blk_type) self.assertEqual(type(r[['a_2']]._data.blocks[0]), exp_blk_type) diff --git a/pandas/tests/tools/test_concat.py b/pandas/tests/tools/test_concat.py index a2b5773f551c9..a0b22892e74c5 100644 --- a/pandas/tests/tools/test_concat.py +++ b/pandas/tests/tools/test_concat.py @@ -13,6 +13,8 @@ makeCustomDataframe as mkdf, assert_almost_equal) +import pytest + class ConcatenateBase(tm.TestCase): @@ -1899,3 +1901,15 @@ def test_concat_multiindex_dfs_with_deepcopy(self): tm.assert_frame_equal(result_copy, expected) result_no_copy = pd.concat(example_dict, names=['testname']) tm.assert_frame_equal(result_no_copy, expected) + + +@pytest.mark.parametrize('pdt', [pd.Series, pd.DataFrame, pd.Panel]) +@pytest.mark.parametrize('dt', np.sctypes['float']) +def test_concat_no_unnecessary_upcast(dt, pdt): + # GH 13247 + dims = pdt().ndim + dfs = [pdt(np.array([1], dtype=dt, ndmin=dims)), + pdt(np.array([np.nan], dtype=dt, ndmin=dims)), + pdt(np.array([5], dtype=dt, ndmin=dims))] + x = pd.concat(dfs) + assert x.values.dtype == dt From c7c74ad7b2fc33f68e59a7a4f677ce48c2829b18 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 14 Mar 2017 08:35:38 -0400 Subject: [PATCH 206/353] DOC/TST: clean up docs & tests, xref #15594 BUG: default_fill_value for get_dummies will be 0 --- doc/source/whatsnew/v0.20.0.txt | 37 +++++++++++++++++++++-- pandas/core/internals.py | 6 ++-- pandas/core/reshape.py | 3 +- pandas/tests/indexing/test_partial.py | 2 +- pandas/tests/test_reshape.py | 42 +++++++++++++++------------ pandas/tests/tools/test_concat.py | 11 +++++++ 6 files changed, 74 insertions(+), 27 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 097efdd097eec..a509e45b13d9a 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -516,6 +516,39 @@ New Behavior: In [5]: df['a']['2011-12-31 23:59:59'] Out[5]: 1 +.. _whatsnew_0200.api_breaking.concat_dtypes: + +Concat of different float dtypes will not automatically upcast +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Previously, ``concat`` of multiple objects with different ``float`` dtypes would automatically upcast results to a dtype of ``float64``. +Now the smallest acceptable dtype will be used (:issue:`13247`) + +.. ipython:: python + + df1 = pd.DataFrame(np.array([1.0], dtype=np.float32, ndmin=2)) + df1.dtypes + +.. ipython:: python + + df2 = pd.DataFrame(np.array([np.nan], dtype=np.float32, ndmin=2)) + df2.dtypes + +Previous Behavior: + +.. code-block:: ipython + + In [7]: pd.concat([df1,df2]).dtypes + Out[7]: + 0 float64 + dtype: object + +New Behavior: + +.. ipython:: python + + pd.concat([df1,df2]).dtypes + .. _whatsnew_0200.api_breaking.gbq: Pandas Google BigQuery support has moved @@ -693,6 +726,7 @@ Other API Changes - Specific support for ``copy.copy()`` and ``copy.deepcopy()`` functions on NDFrame objects (:issue:`15444`) - ``Series.sort_values()`` accepts a one element list of bool for consistency with the behavior of ``DataFrame.sort_values()`` (:issue:`15604`) - ``.merge()`` and ``.join()`` on ``category`` dtype columns will now preserve the category dtype when possible (:issue:`10409`) +- ``SparseDataFrame.default_fill_value`` will be 0, previously was ``nan`` in the return from ``pd.get_dummies(..., sparse=True)`` (:issue:`15594`) .. _whatsnew_0200.deprecations: @@ -784,7 +818,6 @@ Bug Fixes - Bug in ``pd.qcut()`` with a single quantile and an array with identical values (:issue:`15431`) - - Bug in the display of ``.info()`` where a qualifier (+) would always be displayed with a ``MultiIndex`` that contains only non-strings (:issue:`15245`) - Bug in ``.asfreq()``, where frequency was not set for empty ``Series`` (:issue:`14320`) @@ -886,5 +919,3 @@ Bug Fixes - Bug in ``pd.melt()`` where passing a tuple value for ``value_vars`` caused a ``TypeError`` (:issue:`15348`) - Bug in ``.eval()`` which caused multiline evals to fail with local variables not on the first line (:issue:`15342`) - Bug in ``pd.read_msgpack`` which did not allow to load dataframe with an index of type ``CategoricalIndex`` (:issue:`15487`) - -- Concating multiple objects will no longer result in automatically upcast to `float64`, and instead try to find the smallest `dtype` that would suffice (:issue:`13247`) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 1c070b3ed34a9..0e6c176d950a1 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -4936,9 +4936,9 @@ def get_empty_dtype_and_na(join_units): return np.float64, np.nan else: return g, None - else: - msg = "invalid dtype determination in get_concat_dtype" - raise AssertionError(msg) + + msg = "invalid dtype determination in get_concat_dtype" + raise AssertionError(msg) def concatenate_join_units(join_units, concat_axis, copy): diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index 3279a8f2be39d..1e685ae6895ad 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -1308,7 +1308,7 @@ def get_empty_Frame(data, sparse): if not sparse: return DataFrame(index=index) else: - return SparseDataFrame(index=index) + return SparseDataFrame(index=index, default_fill_value=0) # if all NaN if not dummy_na and len(levels) == 0: @@ -1357,6 +1357,7 @@ def get_empty_Frame(data, sparse): sparse_series[col] = SparseSeries(data=sarr, index=index) out = SparseDataFrame(sparse_series, index=index, columns=dummy_cols, + default_fill_value=0, dtype=np.uint8) return out diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index b92ffbfb6fe59..31fadcc88583c 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -205,7 +205,7 @@ def f(): self.assertRaises(ValueError, f) - # these are coerced to float unavoidably (as its a list-like to begin) + # TODO: #15657, these are left as object and not coerced df = DataFrame(columns=['A', 'B']) df.loc[3] = [6, 7] diff --git a/pandas/tests/test_reshape.py b/pandas/tests/test_reshape.py index 24e26be15a44b..7ba743a6c425c 100644 --- a/pandas/tests/test_reshape.py +++ b/pandas/tests/test_reshape.py @@ -2,7 +2,6 @@ # pylint: disable-msg=W0612,E1101 from pandas import DataFrame, Series -from pandas.core.sparse import SparseDataFrame import pandas as pd from numpy import nan @@ -234,26 +233,31 @@ def test_basic_types(self): 'b': ['A', 'A', 'B', 'C', 'C'], 'c': [2, 3, 3, 3, 2]}) + expected = DataFrame({'a': [1, 0, 0], + 'b': [0, 1, 0], + 'c': [0, 0, 1]}, + dtype='uint8', + columns=list('abc')) if not self.sparse: - exp_df_type = DataFrame - exp_blk_type = pd.core.internals.IntBlock + compare = tm.assert_frame_equal else: - exp_df_type = SparseDataFrame - exp_blk_type = pd.core.internals.SparseBlock - - self.assertEqual( - type(get_dummies(s_list, sparse=self.sparse)), exp_df_type) - self.assertEqual( - type(get_dummies(s_series, sparse=self.sparse)), exp_df_type) - - r = get_dummies(s_df, sparse=self.sparse, columns=s_df.columns) - self.assertEqual(type(r), exp_df_type) - - r = get_dummies(s_df, sparse=self.sparse, columns=['a']) - exp_blk_type = pd.core.internals.IntBlock - self.assertEqual(type(r[['a_0']]._data.blocks[0]), exp_blk_type) - self.assertEqual(type(r[['a_1']]._data.blocks[0]), exp_blk_type) - self.assertEqual(type(r[['a_2']]._data.blocks[0]), exp_blk_type) + expected = expected.to_sparse(fill_value=0, kind='integer') + compare = tm.assert_sp_frame_equal + + result = get_dummies(s_list, sparse=self.sparse) + compare(result, expected) + + result = get_dummies(s_series, sparse=self.sparse) + compare(result, expected) + + result = get_dummies(s_df, sparse=self.sparse, columns=s_df.columns) + tm.assert_series_equal(result.get_dtype_counts(), + Series({'uint8': 8})) + + result = get_dummies(s_df, sparse=self.sparse, columns=['a']) + expected = Series({'uint8': 3, 'int64': 1, 'object': 1}).sort_values() + tm.assert_series_equal(result.get_dtype_counts().sort_values(), + expected) def test_just_na(self): just_na_list = [np.nan] diff --git a/pandas/tests/tools/test_concat.py b/pandas/tests/tools/test_concat.py index a0b22892e74c5..392036a99a297 100644 --- a/pandas/tests/tools/test_concat.py +++ b/pandas/tests/tools/test_concat.py @@ -1913,3 +1913,14 @@ def test_concat_no_unnecessary_upcast(dt, pdt): pdt(np.array([5], dtype=dt, ndmin=dims))] x = pd.concat(dfs) assert x.values.dtype == dt + + +@pytest.mark.parametrize('pdt', [pd.Series, pd.DataFrame, pd.Panel]) +@pytest.mark.parametrize('dt', np.sctypes['int']) +def test_concat_will_upcast(dt, pdt): + dims = pdt().ndim + dfs = [pdt(np.array([1], dtype=dt, ndmin=dims)), + pdt(np.array([np.nan], ndmin=dims)), + pdt(np.array([5], dtype=dt, ndmin=dims))] + x = pd.concat(dfs) + assert x.values.dtype == 'float64' From 2621b31c7dbd68126867266d2b2e32d3e5e222d5 Mon Sep 17 00:00:00 2001 From: "Christopher C. Aycock" Date: Tue, 14 Mar 2017 10:05:38 -0400 Subject: [PATCH 207/353] BUG: Allow multiple 'by' parameters in merge_asof() when DataFrames are indexed (#15676) closes #15676 Author: Christopher C. Aycock Closes #15679 from chrisaycock/GH15676 and squashes the following commits: 965caf2 [Christopher C. Aycock] Verify that 'by' parameters are the same length 4a2cc09 [Christopher C. Aycock] BUG: Allow multiple 'by' parameters in merge_asof() when DataFrames are indexed (#15676) --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/tests/tools/test_merge_asof.py | 35 +++++++++++++++++++++++++++ pandas/tools/merge.py | 25 +++++++++++++------ 3 files changed, 54 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index a509e45b13d9a..3548cbf6eb4a7 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -884,6 +884,7 @@ Bug Fixes - Bug in the HTML display with with a ``MultiIndex`` and truncation (:issue:`14882`) +- Bug in ``pd.merge_asof()`` where ``left_index`` or ``right_index`` caused a failure when multiple ``by`` was specified (:issue:`15676`) - Bug in ``pd.merge_asof()`` where ``left_index``/``right_index`` together caused a failure when ``tolerance`` was specified (:issue:`15135`) - Bug in ``DataFrame.pivot_table()`` where ``dropna=True`` would not drop all-NaN columns when the columns was a ``category`` dtype (:issue:`15193`) diff --git a/pandas/tests/tools/test_merge_asof.py b/pandas/tests/tools/test_merge_asof.py index cdff8f0349c15..c9460cc74c94a 100644 --- a/pandas/tests/tools/test_merge_asof.py +++ b/pandas/tests/tools/test_merge_asof.py @@ -368,6 +368,41 @@ def test_multiby_heterogeneous_types(self): by=['ticker', 'exch']) assert_frame_equal(result, expected) + def test_multiby_indexed(self): + # GH15676 + left = pd.DataFrame([ + [pd.to_datetime('20160602'), 1, 'a'], + [pd.to_datetime('20160602'), 2, 'a'], + [pd.to_datetime('20160603'), 1, 'b'], + [pd.to_datetime('20160603'), 2, 'b']], + columns=['time', 'k1', 'k2']).set_index('time') + + right = pd.DataFrame([ + [pd.to_datetime('20160502'), 1, 'a', 1.0], + [pd.to_datetime('20160502'), 2, 'a', 2.0], + [pd.to_datetime('20160503'), 1, 'b', 3.0], + [pd.to_datetime('20160503'), 2, 'b', 4.0]], + columns=['time', 'k1', 'k2', 'value']).set_index('time') + + expected = pd.DataFrame([ + [pd.to_datetime('20160602'), 1, 'a', 1.0], + [pd.to_datetime('20160602'), 2, 'a', 2.0], + [pd.to_datetime('20160603'), 1, 'b', 3.0], + [pd.to_datetime('20160603'), 2, 'b', 4.0]], + columns=['time', 'k1', 'k2', 'value']).set_index('time') + + result = pd.merge_asof(left, + right, + left_index=True, + right_index=True, + by=['k1', 'k2']) + + assert_frame_equal(expected, result) + + with self.assertRaises(MergeError): + pd.merge_asof(left, right, left_index=True, right_index=True, + left_by=['k1', 'k2'], right_by=['k1']) + def test_basic2(self): expected = self.read_data('asof2.csv') diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index d02f4c5b26c86..261884bba54bd 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -1165,7 +1165,7 @@ def _validate_specification(self): if self.left_by is not None and self.right_by is None: raise MergeError('missing right_by') - # add by to our key-list so we can have it in the + # add 'by' to our key-list so we can have it in the # output as a key if self.left_by is not None: if not is_list_like(self.left_by): @@ -1173,6 +1173,9 @@ def _validate_specification(self): if not is_list_like(self.right_by): self.right_by = [self.right_by] + if len(self.left_by) != len(self.right_by): + raise MergeError('left_by and right_by must be same length') + self.left_on = self.left_by + list(self.left_on) self.right_on = self.right_by + list(self.right_on) @@ -1264,13 +1267,21 @@ def flip(xs): # a "by" parameter requires special handling if self.left_by is not None: - if len(self.left_join_keys) > 2: - # get tuple representation of values if more than one - left_by_values = flip(self.left_join_keys[0:-1]) - right_by_values = flip(self.right_join_keys[0:-1]) + # remove 'on' parameter from values if one existed + if self.left_index and self.right_index: + left_by_values = self.left_join_keys + right_by_values = self.right_join_keys + else: + left_by_values = self.left_join_keys[0:-1] + right_by_values = self.right_join_keys[0:-1] + + # get tuple representation of values if more than one + if len(left_by_values) == 1: + left_by_values = left_by_values[0] + right_by_values = right_by_values[0] else: - left_by_values = self.left_join_keys[0] - right_by_values = self.right_join_keys[0] + left_by_values = flip(left_by_values) + right_by_values = flip(right_by_values) # upcast 'by' parameter because HashTable is limited by_type = _get_cython_type_upcast(left_by_values.dtype) From 2cad4dd0b48946add99d3d90e3dba958f2885349 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 14 Mar 2017 11:06:20 -0400 Subject: [PATCH 208/353] DOC: elevate deprecations / removals to top-level of whatsnew doc to promote visibility --- doc/source/whatsnew/v0.20.0.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 3548cbf6eb4a7..9c6f5d3e0596d 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -731,7 +731,7 @@ Other API Changes .. _whatsnew_0200.deprecations: Deprecations -^^^^^^^^^^^^ +~~~~~~~~~~~~ - ``SparseArray.to_dense()`` has deprecated the ``fill`` parameter, as that parameter was not being respected (:issue:`14647`) - ``SparseSeries.to_dense()`` has deprecated the ``sparse_only`` parameter (:issue:`14647`) @@ -753,7 +753,7 @@ Deprecations .. _whatsnew_0200.prior_deprecations: Removal of prior version deprecations/changes -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - The ``pandas.rpy`` module is removed. Similar functionality can be accessed through the `rpy2 `__ project. From 76e5185a5ad07672688b096acc94ad5a8a2ec18d Mon Sep 17 00:00:00 2001 From: Yimeng Zhang Date: Wed, 15 Mar 2017 09:26:37 -0400 Subject: [PATCH 209/353] compatibility with scipy 0.19 fix #15662 Author: Yimeng Zhang Closes #15689 from zym1010/fix_scipy019 and squashes the following commits: 3cc6528 [Yimeng Zhang] doc and PEP8 9ed7524 [Yimeng Zhang] fix interpolation related issue with scipy 0.19 ca09705 [Yimeng Zhang] get symmetric window --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/window.py | 3 ++- pandas/tests/frame/test_missing.py | 33 ++++++++++++++++++++--------- pandas/tests/series/test_missing.py | 17 +++++++++++++-- pandas/tests/test_window.py | 10 ++++----- 5 files changed, 46 insertions(+), 18 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 9c6f5d3e0596d..2a6c8a1e26955 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -816,6 +816,7 @@ Bug Fixes - Bug in ``Rolling.quantile`` function that caused a segmentation fault when called with a quantile value outside of the range [0, 1] (:issue:`15463`) - Bug in ``pd.cut()`` with a single bin on an all 0s array (:issue:`15428`) - Bug in ``pd.qcut()`` with a single quantile and an array with identical values (:issue:`15431`) +- Compat with SciPy 0.19.0 for testing on ``.interpolate()`` (:issue:`15662`) - Bug in the display of ``.info()`` where a qualifier (+) would always be displayed with a ``MultiIndex`` that contains only non-strings (:issue:`15245`) diff --git a/pandas/core/window.py b/pandas/core/window.py index 6fda60c449f42..9c9f861451309 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -544,7 +544,8 @@ def _pop_args(win_type, arg_names, kwargs): return all_args win_type = _validate_win_type(self.win_type, kwargs) - return sig.get_window(win_type, window).astype(float) + # GH #15662. `False` makes symmetric window, rather than periodic. + return sig.get_window(win_type, window, False).astype(float) def _apply_window(self, mean=True, how=None, **kwargs): """ diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py index 923ed2e7c3444..93c3ba78a0abf 100644 --- a/pandas/tests/frame/test_missing.py +++ b/pandas/tests/frame/test_missing.py @@ -19,6 +19,13 @@ from pandas.tests.frame.common import TestData, _check_mixed_float +try: + import scipy + _is_scipy_ge_0190 = scipy.__version__ >= LooseVersion('0.19.0') +except: + _is_scipy_ge_0190 = False + + def _skip_if_no_pchip(): try: from scipy.interpolate import pchip_interpolate # noqa @@ -548,7 +555,7 @@ def test_interp_nan_idx(self): df.interpolate(method='values') def test_interp_various(self): - tm.skip_if_no_package('scipy', max_version='0.19.0') + tm._skip_if_no_scipy() df = DataFrame({'A': [1, 2, np.nan, 4, 5, np.nan, 7], 'C': [1, 2, 3, 5, 8, 13, 21]}) @@ -561,8 +568,15 @@ def test_interp_various(self): assert_frame_equal(result, expected) result = df.interpolate(method='cubic') - expected.A.loc[3] = 2.81621174 - expected.A.loc[13] = 5.64146581 + # GH #15662. + # new cubic and quadratic interpolation algorithms from scipy 0.19.0. + # previously `splmake` was used. See scipy/scipy#6710 + if _is_scipy_ge_0190: + expected.A.loc[3] = 2.81547781 + expected.A.loc[13] = 5.52964175 + else: + expected.A.loc[3] = 2.81621174 + expected.A.loc[13] = 5.64146581 assert_frame_equal(result, expected) result = df.interpolate(method='nearest') @@ -571,8 +585,12 @@ def test_interp_various(self): assert_frame_equal(result, expected, check_dtype=False) result = df.interpolate(method='quadratic') - expected.A.loc[3] = 2.82533638 - expected.A.loc[13] = 6.02817974 + if _is_scipy_ge_0190: + expected.A.loc[3] = 2.82150771 + expected.A.loc[13] = 6.12648668 + else: + expected.A.loc[3] = 2.82533638 + expected.A.loc[13] = 6.02817974 assert_frame_equal(result, expected) result = df.interpolate(method='slinear') @@ -585,11 +603,6 @@ def test_interp_various(self): expected.A.loc[13] = 5 assert_frame_equal(result, expected, check_dtype=False) - result = df.interpolate(method='quadratic') - expected.A.loc[3] = 2.82533638 - expected.A.loc[13] = 6.02817974 - assert_frame_equal(result, expected) - def test_interp_alt_scipy(self): tm._skip_if_no_scipy() df = DataFrame({'A': [1, 2, np.nan, 4, 5, np.nan, 7], diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 9e997da517bf6..7174283494fe7 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -4,6 +4,7 @@ import pytz from datetime import timedelta, datetime +from distutils.version import LooseVersion from numpy import nan import numpy as np import pandas as pd @@ -17,6 +18,12 @@ from .common import TestData +try: + import scipy + _is_scipy_ge_0190 = scipy.__version__ >= LooseVersion('0.19.0') +except: + _is_scipy_ge_0190 = False + def _skip_if_no_pchip(): try: @@ -827,7 +834,7 @@ def test_interp_quad(self): assert_series_equal(result, expected) def test_interp_scipy_basic(self): - tm.skip_if_no_package('scipy', max_version='0.19.0') + tm._skip_if_no_scipy() s = Series([1, 3, np.nan, 12, np.nan, 25]) # slinear @@ -852,7 +859,13 @@ def test_interp_scipy_basic(self): result = s.interpolate(method='zero', downcast='infer') assert_series_equal(result, expected) # quadratic - expected = Series([1, 3., 6.769231, 12., 18.230769, 25.]) + # GH #15662. + # new cubic and quadratic interpolation algorithms from scipy 0.19.0. + # previously `splmake` was used. See scipy/scipy#6710 + if _is_scipy_ge_0190: + expected = Series([1, 3., 6.823529, 12., 18.058824, 25.]) + else: + expected = Series([1, 3., 6.769231, 12., 18.230769, 25.]) result = s.interpolate(method='quadratic') assert_series_equal(result, expected) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index b7164d31b2a5e..3f2973a9834ca 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -905,7 +905,7 @@ def test_cmov_window_na_min_periods(self): def test_cmov_window_regular(self): # GH 8238 - tm.skip_if_no_package('scipy', max_version='0.19.0') + tm._skip_if_no_scipy() win_types = ['triang', 'blackman', 'hamming', 'bartlett', 'bohman', 'blackmanharris', 'nuttall', 'barthann'] @@ -938,7 +938,7 @@ def test_cmov_window_regular(self): def test_cmov_window_regular_linear_range(self): # GH 8238 - tm.skip_if_no_package('scipy', max_version='0.19.0') + tm._skip_if_no_scipy() win_types = ['triang', 'blackman', 'hamming', 'bartlett', 'bohman', 'blackmanharris', 'nuttall', 'barthann'] @@ -955,7 +955,7 @@ def test_cmov_window_regular_linear_range(self): def test_cmov_window_regular_missing_data(self): # GH 8238 - tm.skip_if_no_package('scipy', max_version='0.19.0') + tm._skip_if_no_scipy() win_types = ['triang', 'blackman', 'hamming', 'bartlett', 'bohman', 'blackmanharris', 'nuttall', 'barthann'] @@ -988,7 +988,7 @@ def test_cmov_window_regular_missing_data(self): def test_cmov_window_special(self): # GH 8238 - tm.skip_if_no_package('scipy', max_version='0.19.0') + tm._skip_if_no_scipy() win_types = ['kaiser', 'gaussian', 'general_gaussian', 'slepian'] kwds = [{'beta': 1.}, {'std': 1.}, {'power': 2., @@ -1015,7 +1015,7 @@ def test_cmov_window_special(self): def test_cmov_window_special_linear_range(self): # GH 8238 - tm.skip_if_no_package('scipy', max_version='0.19.0') + tm._skip_if_no_scipy() win_types = ['kaiser', 'gaussian', 'general_gaussian', 'slepian'] kwds = [{'beta': 1.}, {'std': 1.}, {'power': 2., From 68212918a65accffb33e0db6d986ad8f080e67ed Mon Sep 17 00:00:00 2001 From: John Zwinck Date: Wed, 15 Mar 2017 12:04:12 -0400 Subject: [PATCH 210/353] ENH: use constant f32 eps, not np.finfo() during import NumPy docs for np.finfo() say not to call it during import (at module scope). It's a relatively expensive call, and it modifies the GIL state. Now we just hard-code it, because it is always the value anyway. This avoids touching the GIL at import, which helps avoid deadlocks in practice. closes #14641 Author: John Zwinck Closes #15691 from jzwinck/patch-1 and squashes the following commits: dadb97c [John Zwinck] DOC: mention #14641 in 0.20.0 whatsnew e565230 [John Zwinck] ENH: use constant f32 eps, not np.finfo() during import --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/indexing.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 2a6c8a1e26955..41b6519eb740f 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -905,6 +905,7 @@ Bug Fixes - Bug in ``pd.read_csv()`` with ``float_precision='round_trip'`` which caused a segfault when a text entry is parsed (:issue:`15140`) +- Avoid use of ``np.finfo()`` during ``import pandas`` removed to mitigate deadlock on Python GIL misuse (:issue:`14641`) - Bug in ``DataFrame.to_stata()`` and ``StataWriter`` which produces incorrectly formatted files to be produced for some locales (:issue:`13856`) - Bug in ``pd.concat()`` in which concatting with an empty dataframe with ``join='inner'`` was being improperly handled (:issue:`15328`) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 19b7771251da3..c80e8c34aa88f 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1852,7 +1852,7 @@ def _convert_key(self, key, is_setter=False): # 32-bit floating point machine epsilon -_eps = np.finfo('f4').eps +_eps = 1.1920929e-07 def length_of_indexer(indexer, target=None): From e7956c45e11244cb1346f088697f3c494612bae4 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 15 Mar 2017 19:15:36 -0400 Subject: [PATCH 211/353] TST: reorg tests_multilevel.py tests --- pandas/tests/test_multilevel.py | 813 ++++++++++++++++---------------- 1 file changed, 411 insertions(+), 402 deletions(-) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index d1b7fdadce6ae..d7b115d808312 100755 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -12,18 +12,15 @@ from pandas import Panel, DataFrame, Series, notnull, isnull, Timestamp from pandas.types.common import is_float_dtype, is_integer_dtype -from pandas.util.testing import (assert_almost_equal, assert_series_equal, - assert_frame_equal, assertRaisesRegexp) import pandas.core.common as com import pandas.util.testing as tm from pandas.compat import (range, lrange, StringIO, lzip, u, product as cart_product, zip) import pandas as pd - import pandas._libs.index as _index -class TestMultiLevel(tm.TestCase): +class Base(object): def setUp(self): @@ -58,6 +55,9 @@ def setUp(self): inplace=True) self.ymd.index.set_names(['year', 'month', 'day'], inplace=True) + +class TestMultiLevel(Base, tm.TestCase): + def test_append(self): a, b = self.frame[:5], self.frame[5:] @@ -87,19 +87,19 @@ def test_append_index(self): (1.2, datetime.datetime(2011, 1, 2, tzinfo=tz)), (1.3, datetime.datetime(2011, 1, 3, tzinfo=tz))] expected = Index([1.1, 1.2, 1.3] + expected_tuples) - self.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected) result = midx_lv2.append(idx1) expected = Index(expected_tuples + [1.1, 1.2, 1.3]) - self.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected) result = midx_lv2.append(midx_lv2) expected = MultiIndex.from_arrays([idx1.append(idx1), idx2.append(idx2)]) - self.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected) result = midx_lv2.append(midx_lv3) - self.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected) result = midx_lv3.append(midx_lv2) expected = Index._simple_new( @@ -107,7 +107,7 @@ def test_append_index(self): (1.2, datetime.datetime(2011, 1, 2, tzinfo=tz), 'B'), (1.3, datetime.datetime(2011, 1, 3, tzinfo=tz), 'C')] + expected_tuples), None) - self.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected) def test_dataframe_constructor(self): multi = DataFrame(np.random.randn(4, 4), @@ -139,18 +139,18 @@ def test_reindex_level(self): result = month_sums.reindex(self.ymd.index, level=1) expected = self.ymd.groupby(level='month').transform(np.sum) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # Series result = month_sums['A'].reindex(self.ymd.index, level=1) expected = self.ymd['A'].groupby(level='month').transform(np.sum) - assert_series_equal(result, expected, check_names=False) + tm.assert_series_equal(result, expected, check_names=False) # axis=1 month_sums = self.ymd.T.sum(axis=1, level='month') result = month_sums.reindex(columns=self.ymd.index, level=1) expected = self.ymd.groupby(level='month').transform(np.sum).T - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_binops_level(self): def _check_op(opname): @@ -160,7 +160,7 @@ def _check_op(opname): broadcasted = self.ymd.groupby(level='month').transform(np.sum) expected = op(self.ymd, broadcasted) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # Series op = getattr(Series, opname) @@ -169,7 +169,7 @@ def _check_op(opname): np.sum) expected = op(self.ymd['A'], broadcasted) expected.name = 'A' - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) _check_op('sub') _check_op('add') @@ -179,7 +179,7 @@ def _check_op(opname): def test_pickle(self): def _test_roundtrip(frame): unpickled = self.round_trip_pickle(frame) - assert_frame_equal(frame, unpickled) + tm.assert_frame_equal(frame, unpickled) _test_roundtrip(self.frame) _test_roundtrip(self.frame.T) @@ -189,11 +189,11 @@ def _test_roundtrip(frame): def test_reindex(self): expected = self.frame.iloc[[0, 3]] reindexed = self.frame.loc[[('foo', 'one'), ('bar', 'one')]] - assert_frame_equal(reindexed, expected) + tm.assert_frame_equal(reindexed, expected) with catch_warnings(record=True): reindexed = self.frame.ix[[('foo', 'one'), ('bar', 'one')]] - assert_frame_equal(reindexed, expected) + tm.assert_frame_equal(reindexed, expected) def test_reindex_preserve_levels(self): new_index = self.ymd.index[::10] @@ -214,50 +214,6 @@ def test_reindex_preserve_levels(self): chunk = ymdT.loc[:, new_index] self.assertIs(chunk.columns, new_index) - def test_sort_index_preserve_levels(self): - result = self.frame.sort_index() - self.assertEqual(result.index.names, self.frame.index.names) - - def test_sorting_repr_8017(self): - - np.random.seed(0) - data = np.random.randn(3, 4) - - for gen, extra in [([1., 3., 2., 5.], 4.), ([1, 3, 2, 5], 4), - ([Timestamp('20130101'), Timestamp('20130103'), - Timestamp('20130102'), Timestamp('20130105')], - Timestamp('20130104')), - (['1one', '3one', '2one', '5one'], '4one')]: - columns = MultiIndex.from_tuples([('red', i) for i in gen]) - df = DataFrame(data, index=list('def'), columns=columns) - df2 = pd.concat([df, - DataFrame('world', index=list('def'), - columns=MultiIndex.from_tuples( - [('red', extra)]))], axis=1) - - # check that the repr is good - # make sure that we have a correct sparsified repr - # e.g. only 1 header of read - self.assertEqual(str(df2).splitlines()[0].split(), ['red']) - - # GH 8017 - # sorting fails after columns added - - # construct single-dtype then sort - result = df.copy().sort_index(axis=1) - expected = df.iloc[:, [0, 2, 1, 3]] - assert_frame_equal(result, expected) - - result = df2.sort_index(axis=1) - expected = df2.iloc[:, [0, 2, 1, 4, 3]] - assert_frame_equal(result, expected) - - # setitem then sort - result = df.copy() - result[('red', extra)] = 'world' - result = result.sort_index(axis=1) - assert_frame_equal(result, expected) - def test_repr_to_string(self): repr(self.frame) repr(self.ymd) @@ -283,9 +239,11 @@ def test_getitem_simple(self): df = self.frame.T col = df['foo', 'one'] - assert_almost_equal(col.values, df.values[:, 0]) - self.assertRaises(KeyError, df.__getitem__, ('foo', 'four')) - self.assertRaises(KeyError, df.__getitem__, 'foobar') + tm.assert_almost_equal(col.values, df.values[:, 0]) + with pytest.raises(KeyError): + df[('foo', 'four')] + with pytest.raises(KeyError): + df['foobar'] def test_series_getitem(self): s = self.ymd['A'] @@ -297,7 +255,7 @@ def test_series_getitem(self): expected = s.reindex(s.index[42:65]) expected.index = expected.index.droplevel(0).droplevel(0) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) result = s[2000, 3, 10] expected = s[49] @@ -306,11 +264,11 @@ def test_series_getitem(self): # fancy expected = s.reindex(s.index[49:51]) result = s.loc[[(2000, 3, 10), (2000, 3, 13)]] - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) with catch_warnings(record=True): result = s.ix[[(2000, 3, 10), (2000, 3, 13)]] - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) # key error self.assertRaises(KeyError, s.__getitem__, (2000, 3, 4)) @@ -325,7 +283,7 @@ def test_series_getitem_corner(self): # generator result = s[(x > 0 for x in s)] expected = s[s > 0] - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_series_setitem(self): s = self.ymd['A'] @@ -347,29 +305,29 @@ def test_frame_getitem_setitem_boolean(self): result = df[df > 0] expected = df.where(df > 0) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) df[df > 0] = 5 values[values > 0] = 5 - assert_almost_equal(df.values, values) + tm.assert_almost_equal(df.values, values) df[df == 5] = 0 values[values == 5] = 0 - assert_almost_equal(df.values, values) + tm.assert_almost_equal(df.values, values) # a df that needs alignment first df[df[:-1] < 0] = 2 np.putmask(values[:-1], values[:-1] < 0, 2) - assert_almost_equal(df.values, values) + tm.assert_almost_equal(df.values, values) - with assertRaisesRegexp(TypeError, 'boolean values only'): + with tm.assertRaisesRegexp(TypeError, 'boolean values only'): df[df * 0] = 2 def test_frame_getitem_setitem_slice(self): # getitem result = self.frame.iloc[:4] expected = self.frame[:4] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # setitem cp = self.frame.copy() @@ -385,25 +343,25 @@ def test_frame_getitem_setitem_multislice(self): df = DataFrame({'value': [1, 2, 3, 7, 8]}, index=midx) result = df.loc[:, 'value'] - assert_series_equal(df['value'], result) + tm.assert_series_equal(df['value'], result) with catch_warnings(record=True): result = df.ix[:, 'value'] - assert_series_equal(df['value'], result) + tm.assert_series_equal(df['value'], result) result = df.loc[df.index[1:3], 'value'] - assert_series_equal(df['value'][1:3], result) + tm.assert_series_equal(df['value'][1:3], result) result = df.loc[:, :] - assert_frame_equal(df, result) + tm.assert_frame_equal(df, result) result = df df.loc[:, 'value'] = 10 result['value'] = 10 - assert_frame_equal(df, result) + tm.assert_frame_equal(df, result) df.loc[:, :] = 10 - assert_frame_equal(df, result) + tm.assert_frame_equal(df, result) def test_frame_getitem_multicolumn_empty_level(self): f = DataFrame({'a': ['1', '2', '3'], 'b': ['2', '3', '4']}) @@ -413,7 +371,7 @@ def test_frame_getitem_multicolumn_empty_level(self): result = f['level1 item1'] expected = DataFrame([['1'], ['2'], ['3']], index=f.index, columns=['level3 item1']) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_frame_setitem_multi_column(self): df = DataFrame(randn(10, 4), columns=[['a', 'a', 'b', 'b'], @@ -421,12 +379,12 @@ def test_frame_setitem_multi_column(self): cp = df.copy() cp['a'] = cp['b'] - assert_frame_equal(cp['a'], cp['b']) + tm.assert_frame_equal(cp['a'], cp['b']) # set with ndarray cp = df.copy() cp['a'] = cp['b'].values - assert_frame_equal(cp['a'], cp['b']) + tm.assert_frame_equal(cp['a'], cp['b']) # --------------------------------------- # #1803 @@ -444,8 +402,8 @@ def test_frame_setitem_multi_column(self): sliced_a1 = df['A', '1'] sliced_a2 = df['A', '2'] sliced_b1 = df['B', '1'] - assert_series_equal(sliced_a1, sliced_b1, check_names=False) - assert_series_equal(sliced_a2, sliced_b1, check_names=False) + tm.assert_series_equal(sliced_a1, sliced_b1, check_names=False) + tm.assert_series_equal(sliced_a2, sliced_b1, check_names=False) self.assertEqual(sliced_a1.name, ('A', '1')) self.assertEqual(sliced_a2.name, ('A', '2')) self.assertEqual(sliced_b1.name, ('B', '1')) @@ -465,9 +423,9 @@ def test_getitem_tuple_plus_slice(self): with catch_warnings(record=True): expected3 = idf.ix[0, 0] - assert_series_equal(result, expected) - assert_series_equal(result, expected2) - assert_series_equal(result, expected3) + tm.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected2) + tm.assert_series_equal(result, expected3) def test_getitem_setitem_tuple_plus_columns(self): # GH #1013 @@ -476,26 +434,14 @@ def test_getitem_setitem_tuple_plus_columns(self): result = df.loc[(2000, 1, 6), ['A', 'B', 'C']] expected = df.loc[2000, 1, 6][['A', 'B', 'C']] - assert_series_equal(result, expected) - - def test_getitem_multilevel_index_tuple_unsorted(self): - index_columns = list("abc") - df = DataFrame([[0, 1, 0, "x"], [0, 0, 1, "y"]], - columns=index_columns + ["data"]) - df = df.set_index(index_columns) - query_index = df.index[:1] - rs = df.loc[query_index, "data"] - - xp_idx = MultiIndex.from_tuples([(0, 1, 0)], names=['a', 'b', 'c']) - xp = Series(['x'], index=xp_idx, name='data') - assert_series_equal(rs, xp) + tm.assert_series_equal(result, expected) def test_xs(self): xs = self.frame.xs(('bar', 'two')) xs2 = self.frame.loc[('bar', 'two')] - assert_series_equal(xs, xs2) - assert_almost_equal(xs.values, self.frame.values[4]) + tm.assert_series_equal(xs, xs2) + tm.assert_almost_equal(xs.values, self.frame.values[4]) # GH 6574 # missing values in returned index should be preserrved @@ -514,18 +460,18 @@ def test_xs(self): ['xbcde', np.nan, 'zbcde', 'ybcde'], name='a2')) result = df.xs('z', level='a1') - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_xs_partial(self): result = self.frame.xs('foo') result2 = self.frame.loc['foo'] expected = self.frame.T['foo'].T - assert_frame_equal(result, expected) - assert_frame_equal(result, result2) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, result2) result = self.ymd.xs((2000, 4)) expected = self.ymd.loc[2000, 4] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # ex from #1796 index = MultiIndex(levels=[['foo', 'bar'], ['one', 'two'], [-1, 1]], @@ -537,14 +483,14 @@ def test_xs_partial(self): result = df.xs(['foo', 'one']) expected = df.loc['foo', 'one'] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_xs_level(self): result = self.frame.xs('two', level='second') expected = self.frame[self.frame.index.get_level_values(1) == 'two'] expected.index = expected.index.droplevel(1) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) index = MultiIndex.from_tuples([('x', 'y', 'z'), ('a', 'b', 'c'), ( 'p', 'q', 'r')]) @@ -552,7 +498,7 @@ def test_xs_level(self): result = df.xs('c', level=2) expected = df[1:2] expected.index = expected.index.droplevel(2) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # this is a copy in 0.14 result = self.frame.xs('two', level='second') @@ -576,7 +522,7 @@ def test_xs_level_multiple(self): result = df.xs(('a', 4), level=['one', 'four']) expected = df.xs('a').xs(4, level='four') - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # this is a copy in 0.14 result = df.xs(('a', 4), level=['one', 'four']) @@ -597,7 +543,7 @@ def f(x): rs = df.xs(20111201, level='date') xp = df.loc[20111201, :] - assert_frame_equal(rs, xp) + tm.assert_frame_equal(rs, xp) def test_xs_level0(self): from pandas import read_table @@ -612,18 +558,18 @@ def test_xs_level0(self): result = df.xs('a', level=0) expected = df.xs('a') self.assertEqual(len(result), 2) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_xs_level_series(self): s = self.frame['A'] result = s[:, 'two'] expected = self.frame.xs('two', level=1)['A'] - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) s = self.ymd['A'] result = s[2000, 5] expected = self.ymd.loc[2000, 5]['A'] - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) # not implementing this for now @@ -633,7 +579,7 @@ def test_xs_level_series(self): # lv =s.index.get_level_values(1) # expected = s[(lv == 3) | (lv == 4)] # expected.index = expected.index.droplevel(0) - # assert_series_equal(result, expected) + # tm.assert_series_equal(result, expected) # can do this though @@ -649,15 +595,15 @@ def test_getitem_toplevel(self): result = df['foo'] expected = df.reindex(columns=df.columns[:3]) expected.columns = expected.columns.droplevel(0) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = df['bar'] result2 = df.loc[:, 'bar'] expected = df.reindex(columns=df.columns[3:5]) expected.columns = expected.columns.droplevel(0) - assert_frame_equal(result, expected) - assert_frame_equal(result, result2) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, result2) def test_getitem_setitem_slice_integers(self): index = MultiIndex(levels=[[0, 1, 2], [0, 2]], @@ -667,7 +613,7 @@ def test_getitem_setitem_slice_integers(self): columns=['a', 'b', 'c', 'd']) res = frame.loc[1:2] exp = frame.reindex(frame.index[2:]) - assert_frame_equal(res, exp) + tm.assert_frame_equal(res, exp) frame.loc[1:2] = 7 self.assertTrue((frame.loc[1:2] == 7).values.all()) @@ -676,7 +622,7 @@ def test_getitem_setitem_slice_integers(self): res = series.loc[1:2] exp = series.reindex(series.index[2:]) - assert_series_equal(res, exp) + tm.assert_series_equal(res, exp) series.loc[1:2] = 7 self.assertTrue((series.loc[1:2] == 7).values.all()) @@ -691,7 +637,7 @@ def test_getitem_int(self): result = frame.loc[1] expected = frame[-3:] expected.index = expected.index.droplevel(0) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # raises exception self.assertRaises(KeyError, frame.loc.__getitem__, 3) @@ -699,7 +645,7 @@ def test_getitem_int(self): # however this will work result = self.frame.iloc[2] expected = self.frame.xs(self.frame.index[2]) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_getitem_partial(self): ymd = self.ymd.T @@ -707,25 +653,17 @@ def test_getitem_partial(self): expected = ymd.reindex(columns=ymd.columns[ymd.columns.labels[1] == 1]) expected.columns = expected.columns.droplevel(0).droplevel(0) - assert_frame_equal(result, expected) - - def test_getitem_slice_not_sorted(self): - df = self.frame.sort_index(level=1).T - - # buglet with int typechecking - result = df.iloc[:, :np.int32(3)] - expected = df.reindex(columns=df.columns[:3]) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_setitem_change_dtype(self): dft = self.frame.T s = dft['foo', 'two'] dft['foo', 'two'] = s > s.median() - assert_series_equal(dft['foo', 'two'], s > s.median()) + tm.assert_series_equal(dft['foo', 'two'], s > s.median()) # tm.assertIsInstance(dft._data.blocks[1].items, MultiIndex) reindexed = dft.reindex(columns=[('foo', 'two')]) - assert_series_equal(reindexed['foo', 'two'], s > s.median()) + tm.assert_series_equal(reindexed['foo', 'two'], s > s.median()) def test_frame_setitem_ix(self): self.frame.loc[('bar', 'two'), 'B'] = 5 @@ -746,12 +684,12 @@ def test_frame_setitem_ix(self): def test_fancy_slice_partial(self): result = self.frame.loc['bar':'baz'] expected = self.frame[3:7] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = self.ymd.loc[(2000, 2):(2000, 4)] lev = self.ymd.index.labels[1] expected = self.ymd[(lev >= 1) & (lev <= 3)] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_getitem_partial_column_select(self): idx = MultiIndex(labels=[[0, 0, 0], [0, 1, 1], [1, 0, 1]], @@ -760,55 +698,19 @@ def test_getitem_partial_column_select(self): result = df.loc[('a', 'y'), :] expected = df.loc[('a', 'y')] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = df.loc[('a', 'y'), [1, 0]] expected = df.loc[('a', 'y')][[1, 0]] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) with catch_warnings(record=True): result = df.ix[('a', 'y'), [1, 0]] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) self.assertRaises(KeyError, df.loc.__getitem__, (('a', 'foo'), slice(None, None))) - def test_sort_index_level(self): - df = self.frame.copy() - df.index = np.arange(len(df)) - - # axis=1 - - # series - a_sorted = self.frame['A'].sort_index(level=0) - - # preserve names - self.assertEqual(a_sorted.index.names, self.frame.index.names) - - # inplace - rs = self.frame.copy() - rs.sort_index(level=0, inplace=True) - assert_frame_equal(rs, self.frame.sort_index(level=0)) - - def test_sort_index_level_large_cardinality(self): - - # #2684 (int64) - index = MultiIndex.from_arrays([np.arange(4000)] * 3) - df = DataFrame(np.random.randn(4000), index=index, dtype=np.int64) - - # it works! - result = df.sort_index(level=0) - self.assertTrue(result.index.lexsort_depth == 3) - - # #2684 (int32) - index = MultiIndex.from_arrays([np.arange(4000)] * 3) - df = DataFrame(np.random.randn(4000), index=index, dtype=np.int32) - - # it works! - result = df.sort_index(level=0) - self.assertTrue((result.dtypes.values == df.dtypes.values).all()) - self.assertTrue(result.index.lexsort_depth == 3) - def test_delevel_infer_dtype(self): tuples = [tuple for tuple in cart_product( @@ -832,28 +734,6 @@ def test_reset_index_with_drop(self): deleveled = self.series.reset_index(drop=True) tm.assertIsInstance(deleveled, Series) - def test_sort_index_level_by_name(self): - self.frame.index.names = ['first', 'second'] - result = self.frame.sort_index(level='second') - expected = self.frame.sort_index(level=1) - assert_frame_equal(result, expected) - - def test_sort_index_level_mixed(self): - sorted_before = self.frame.sort_index(level=1) - - df = self.frame.copy() - df['foo'] = 'bar' - sorted_after = df.sort_index(level=1) - assert_frame_equal(sorted_before, sorted_after.drop(['foo'], axis=1)) - - dft = self.frame.T - sorted_before = dft.sort_index(level=1, axis=1) - dft['foo', 'three'] = 'bar' - - sorted_after = dft.sort_index(level=1, axis=1) - assert_frame_equal(sorted_before.drop([('foo', 'three')], axis=1), - sorted_after.drop([('foo', 'three')], axis=1)) - def test_count_level(self): def _check_counts(frame, axis=0): index = frame._get_axis(axis) @@ -861,7 +741,7 @@ def _check_counts(frame, axis=0): result = frame.count(axis=axis, level=i) expected = frame.groupby(axis=axis, level=i).count() expected = expected.reindex_like(result).astype('i8') - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) self.frame.iloc[1, [1, 2]] = np.nan self.frame.iloc[7, [0, 1]] = np.nan @@ -875,7 +755,7 @@ def _check_counts(frame, axis=0): # can't call with level on regular DataFrame df = tm.makeTimeDataFrame() - assertRaisesRegexp(TypeError, 'hierarchical', df.count, level=0) + tm.assertRaisesRegexp(TypeError, 'hierarchical', df.count, level=0) self.frame['D'] = 'foo' result = self.frame.count(level=0, numeric_only=True) @@ -891,30 +771,30 @@ def test_count_level_series(self): result = s.count(level=0) expected = s.groupby(level=0).count() - assert_series_equal(result.astype('f8'), - expected.reindex(result.index).fillna(0)) + tm.assert_series_equal( + result.astype('f8'), expected.reindex(result.index).fillna(0)) result = s.count(level=1) expected = s.groupby(level=1).count() - assert_series_equal(result.astype('f8'), - expected.reindex(result.index).fillna(0)) + tm.assert_series_equal( + result.astype('f8'), expected.reindex(result.index).fillna(0)) def test_count_level_corner(self): s = self.frame['A'][:0] result = s.count(level=0) expected = Series(0, index=s.index.levels[0], name='A') - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) df = self.frame[:0] result = df.count(level=0) expected = DataFrame({}, index=s.index.levels[0], columns=df.columns).fillna(0).astype(np.int64) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_get_level_number_out_of_bounds(self): - with assertRaisesRegexp(IndexError, "Too many levels"): + with tm.assertRaisesRegexp(IndexError, "Too many levels"): self.frame.index._get_level_number(2) - with assertRaisesRegexp(IndexError, "not a valid level number"): + with tm.assertRaisesRegexp(IndexError, "not a valid level number"): self.frame.index._get_level_number(-3) def test_unstack(self): @@ -936,56 +816,56 @@ def test_unstack_multiple_no_empty_columns(self): unstacked = s.unstack([1, 2]) expected = unstacked.dropna(axis=1, how='all') - assert_frame_equal(unstacked, expected) + tm.assert_frame_equal(unstacked, expected) def test_stack(self): # regular roundtrip unstacked = self.ymd.unstack() restacked = unstacked.stack() - assert_frame_equal(restacked, self.ymd) + tm.assert_frame_equal(restacked, self.ymd) unlexsorted = self.ymd.sort_index(level=2) unstacked = unlexsorted.unstack(2) restacked = unstacked.stack() - assert_frame_equal(restacked.sort_index(level=0), self.ymd) + tm.assert_frame_equal(restacked.sort_index(level=0), self.ymd) unlexsorted = unlexsorted[::-1] unstacked = unlexsorted.unstack(1) restacked = unstacked.stack().swaplevel(1, 2) - assert_frame_equal(restacked.sort_index(level=0), self.ymd) + tm.assert_frame_equal(restacked.sort_index(level=0), self.ymd) unlexsorted = unlexsorted.swaplevel(0, 1) unstacked = unlexsorted.unstack(0).swaplevel(0, 1, axis=1) restacked = unstacked.stack(0).swaplevel(1, 2) - assert_frame_equal(restacked.sort_index(level=0), self.ymd) + tm.assert_frame_equal(restacked.sort_index(level=0), self.ymd) # columns unsorted unstacked = self.ymd.unstack() unstacked = unstacked.sort_index(axis=1, ascending=False) restacked = unstacked.stack() - assert_frame_equal(restacked, self.ymd) + tm.assert_frame_equal(restacked, self.ymd) # more than 2 levels in the columns unstacked = self.ymd.unstack(1).unstack(1) result = unstacked.stack(1) expected = self.ymd.unstack() - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = unstacked.stack(2) expected = self.ymd.unstack(1) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = unstacked.stack(0) expected = self.ymd.stack().unstack(1).unstack(1) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # not all levels present in each echelon unstacked = self.ymd.unstack(2).loc[:, ::3] stacked = unstacked.stack().stack() ymd_stacked = self.ymd.stack() - assert_series_equal(stacked, ymd_stacked.reindex(stacked.index)) + tm.assert_series_equal(stacked, ymd_stacked.reindex(stacked.index)) # stack with negative number result = self.ymd.unstack(0).stack(-2) @@ -993,7 +873,7 @@ def test_stack(self): # GH10417 def check(left, right): - assert_series_equal(left, right) + tm.assert_series_equal(left, right) self.assertFalse(left.index.is_unique) li, ri = left.index, right.index tm.assert_index_equal(li, ri) @@ -1049,7 +929,7 @@ def test_unstack_odd_failure(self): result = df.unstack(2) recons = result.stack() - assert_frame_equal(recons, df) + tm.assert_frame_equal(recons, df) def test_stack_mixed_dtype(self): df = self.frame.T @@ -1058,7 +938,7 @@ def test_stack_mixed_dtype(self): stacked = df.stack() result = df['foo'].stack() - assert_series_equal(stacked['foo'], result, check_names=False) + tm.assert_series_equal(stacked['foo'], result, check_names=False) self.assertIs(result.name, None) self.assertEqual(stacked['bar'].dtype, np.float_) @@ -1074,8 +954,8 @@ def test_unstack_bug(self): unstacked = result.unstack() restacked = unstacked.stack() - assert_series_equal(restacked, - result.reindex(restacked.index).astype(float)) + tm.assert_series_equal( + restacked, result.reindex(restacked.index).astype(float)) def test_stack_unstack_preserve_names(self): unstacked = self.frame.unstack() @@ -1088,59 +968,59 @@ def test_stack_unstack_preserve_names(self): def test_unstack_level_name(self): result = self.frame.unstack('second') expected = self.frame.unstack(level=1) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_stack_level_name(self): unstacked = self.frame.unstack('second') result = unstacked.stack('exp') expected = self.frame.unstack().stack(0) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = self.frame.stack('exp') expected = self.frame.stack() - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_stack_unstack_multiple(self): unstacked = self.ymd.unstack(['year', 'month']) expected = self.ymd.unstack('year').unstack('month') - assert_frame_equal(unstacked, expected) + tm.assert_frame_equal(unstacked, expected) self.assertEqual(unstacked.columns.names, expected.columns.names) # series s = self.ymd['A'] s_unstacked = s.unstack(['year', 'month']) - assert_frame_equal(s_unstacked, expected['A']) + tm.assert_frame_equal(s_unstacked, expected['A']) restacked = unstacked.stack(['year', 'month']) restacked = restacked.swaplevel(0, 1).swaplevel(1, 2) restacked = restacked.sort_index(level=0) - assert_frame_equal(restacked, self.ymd) + tm.assert_frame_equal(restacked, self.ymd) self.assertEqual(restacked.index.names, self.ymd.index.names) # GH #451 unstacked = self.ymd.unstack([1, 2]) expected = self.ymd.unstack(1).unstack(1).dropna(axis=1, how='all') - assert_frame_equal(unstacked, expected) + tm.assert_frame_equal(unstacked, expected) unstacked = self.ymd.unstack([2, 1]) expected = self.ymd.unstack(2).unstack(1).dropna(axis=1, how='all') - assert_frame_equal(unstacked, expected.loc[:, unstacked.columns]) + tm.assert_frame_equal(unstacked, expected.loc[:, unstacked.columns]) def test_stack_names_and_numbers(self): unstacked = self.ymd.unstack(['year', 'month']) # Can't use mixture of names and numbers to stack - with assertRaisesRegexp(ValueError, "level should contain"): + with tm.assertRaisesRegexp(ValueError, "level should contain"): unstacked.stack([0, 'month']) def test_stack_multiple_out_of_bounds(self): # nlevels == 3 unstacked = self.ymd.unstack(['year', 'month']) - with assertRaisesRegexp(IndexError, "Too many levels"): + with tm.assertRaisesRegexp(IndexError, "Too many levels"): unstacked.stack([2, 3]) - with assertRaisesRegexp(IndexError, "not a valid level number"): + with tm.assertRaisesRegexp(IndexError, "not a valid level number"): unstacked.stack([-4, -3]) def test_unstack_period_series(self): @@ -1163,9 +1043,9 @@ def test_unstack_period_series(self): columns=['A', 'B']) expected.columns.name = 'str' - assert_frame_equal(result1, expected) - assert_frame_equal(result2, expected) - assert_frame_equal(result3, expected.T) + tm.assert_frame_equal(result1, expected) + tm.assert_frame_equal(result2, expected) + tm.assert_frame_equal(result3, expected.T) idx1 = pd.PeriodIndex(['2013-01', '2013-01', '2013-02', '2013-02', '2013-03', '2013-03'], freq='M', name='period1') @@ -1189,9 +1069,9 @@ def test_unstack_period_series(self): [6, 5, np.nan, np.nan, np.nan, np.nan]], index=e_idx, columns=e_cols) - assert_frame_equal(result1, expected) - assert_frame_equal(result2, expected) - assert_frame_equal(result3, expected.T) + tm.assert_frame_equal(result1, expected) + tm.assert_frame_equal(result2, expected) + tm.assert_frame_equal(result3, expected.T) def test_unstack_period_frame(self): # GH 4342 @@ -1216,8 +1096,8 @@ def test_unstack_period_frame(self): expected = DataFrame([[5, 1, 6, 2, 6, 1], [4, 2, 3, 3, 5, 4]], index=e_1, columns=e_cols) - assert_frame_equal(result1, expected) - assert_frame_equal(result2, expected) + tm.assert_frame_equal(result1, expected) + tm.assert_frame_equal(result2, expected) e_1 = pd.PeriodIndex(['2014-01', '2014-02', '2014-01', '2014-02'], freq='M', name='period1') @@ -1227,7 +1107,7 @@ def test_unstack_period_frame(self): expected = DataFrame([[5, 4, 2, 3], [1, 2, 6, 5], [6, 3, 1, 4]], index=e_2, columns=e_cols) - assert_frame_equal(result3, expected) + tm.assert_frame_equal(result3, expected) def test_stack_multiple_bug(self): """ bug when some uniques are not present in the data #3170""" @@ -1245,7 +1125,7 @@ def test_stack_multiple_bug(self): rs = down.stack('ID') xp = unst.loc[:, ['VAR1']].resample('W-THU').mean().stack('ID') xp.columns.name = 'Params' - assert_frame_equal(rs, xp) + tm.assert_frame_equal(rs, xp) def test_stack_dropna(self): # GH #3997 @@ -1256,7 +1136,7 @@ def test_stack_dropna(self): self.assertTrue(len(stacked) > len(stacked.dropna())) stacked = df.unstack().stack(dropna=True) - assert_frame_equal(stacked, stacked.dropna()) + tm.assert_frame_equal(stacked, stacked.dropna()) def test_unstack_multiple_hierarchical(self): df = DataFrame(index=[[0, 0, 0, 0, 1, 1, 1, 1], @@ -1279,7 +1159,7 @@ def test_groupby_transform(self): applied = grouped.apply(lambda x: x * 2) expected = grouped.transform(lambda x: x * 2) result = applied.reindex(expected.index) - assert_series_equal(result, expected, check_names=False) + tm.assert_series_equal(result, expected, check_names=False) def test_unstack_sparse_keyspace(self): # memory problems with naive impl #2278 @@ -1311,7 +1191,7 @@ def test_unstack_unobserved_keys(self): self.assertEqual(len(result.columns), 4) recons = result.stack() - assert_frame_equal(recons, df) + tm.assert_frame_equal(recons, df) def test_groupby_corner(self): midx = MultiIndex(levels=[['foo'], ['bar'], ['baz']], @@ -1344,8 +1224,8 @@ def test_join(self): self.assertFalse(np.isnan(joined.values).all()) - assert_frame_equal(joined, expected, check_names=False - ) # TODO what should join do with names ? + # TODO what should join do with names ? + tm.assert_frame_equal(joined, expected, check_names=False) def test_swaplevel(self): swapped = self.frame['A'].swaplevel() @@ -1353,23 +1233,23 @@ def test_swaplevel(self): swapped3 = self.frame['A'].swaplevel(0, 1) swapped4 = self.frame['A'].swaplevel('first', 'second') self.assertFalse(swapped.index.equals(self.frame.index)) - assert_series_equal(swapped, swapped2) - assert_series_equal(swapped, swapped3) - assert_series_equal(swapped, swapped4) + tm.assert_series_equal(swapped, swapped2) + tm.assert_series_equal(swapped, swapped3) + tm.assert_series_equal(swapped, swapped4) back = swapped.swaplevel() back2 = swapped.swaplevel(0) back3 = swapped.swaplevel(0, 1) back4 = swapped.swaplevel('second', 'first') self.assertTrue(back.index.equals(self.frame.index)) - assert_series_equal(back, back2) - assert_series_equal(back, back3) - assert_series_equal(back, back4) + tm.assert_series_equal(back, back2) + tm.assert_series_equal(back, back3) + tm.assert_series_equal(back, back4) ft = self.frame.T swapped = ft.swaplevel('first', 'second', axis=1) exp = self.frame.swaplevel('first', 'second').T - assert_frame_equal(swapped, exp) + tm.assert_frame_equal(swapped, exp) def test_swaplevel_panel(self): panel = Panel({'ItemA': self.frame, 'ItemB': self.frame * 2}) @@ -1384,20 +1264,20 @@ def test_swaplevel_panel(self): def test_reorder_levels(self): result = self.ymd.reorder_levels(['month', 'day', 'year']) expected = self.ymd.swaplevel(0, 1).swaplevel(1, 2) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = self.ymd['A'].reorder_levels(['month', 'day', 'year']) expected = self.ymd['A'].swaplevel(0, 1).swaplevel(1, 2) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) result = self.ymd.T.reorder_levels(['month', 'day', 'year'], axis=1) expected = self.ymd.T.swaplevel(0, 1, axis=1).swaplevel(1, 2, axis=1) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) - with assertRaisesRegexp(TypeError, 'hierarchical axis'): + with tm.assertRaisesRegexp(TypeError, 'hierarchical axis'): self.ymd.reorder_levels([1, 2], axis=1) - with assertRaisesRegexp(IndexError, 'Too many levels'): + with tm.assertRaisesRegexp(IndexError, 'Too many levels'): self.ymd.index.reorder_levels([1, 2, 3]) def test_insert_index(self): @@ -1416,29 +1296,13 @@ def test_alignment(self): res = x - y exp_index = x.index.union(y.index) exp = x.reindex(exp_index) - y.reindex(exp_index) - assert_series_equal(res, exp) + tm.assert_series_equal(res, exp) # hit non-monotonic code path res = x[::-1] - y[::-1] exp_index = x.index.union(y.index) exp = x.reindex(exp_index) - y.reindex(exp_index) - assert_series_equal(res, exp) - - def test_is_lexsorted(self): - levels = [[0, 1], [0, 1, 2]] - - index = MultiIndex(levels=levels, - labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]) - self.assertTrue(index.is_lexsorted()) - - index = MultiIndex(levels=levels, - labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]]) - self.assertFalse(index.is_lexsorted()) - - index = MultiIndex(levels=levels, - labels=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]]) - self.assertFalse(index.is_lexsorted()) - self.assertEqual(index.lexsort_depth, 0) + tm.assert_series_equal(res, exp) def test_frame_getitem_view(self): df = self.frame.T.copy() @@ -1465,66 +1329,29 @@ def f(): pass self.assertTrue((df['foo', 'one'] == 0).all()) - def test_frame_getitem_not_sorted(self): - df = self.frame.T - df['foo', 'four'] = 'foo' - - arrays = [np.array(x) for x in zip(*df.columns.values)] - - result = df['foo'] - result2 = df.loc[:, 'foo'] - expected = df.reindex(columns=df.columns[arrays[0] == 'foo']) - expected.columns = expected.columns.droplevel(0) - assert_frame_equal(result, expected) - assert_frame_equal(result2, expected) - - df = df.T - result = df.xs('foo') - result2 = df.loc['foo'] - expected = df.reindex(df.index[arrays[0] == 'foo']) - expected.index = expected.index.droplevel(0) - assert_frame_equal(result, expected) - assert_frame_equal(result2, expected) - - def test_series_getitem_not_sorted(self): - arrays = [['bar', 'bar', 'baz', 'baz', 'qux', 'qux', 'foo', 'foo'], - ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] - tuples = lzip(*arrays) - index = MultiIndex.from_tuples(tuples) - s = Series(randn(8), index=index) - - arrays = [np.array(x) for x in zip(*index.values)] - - result = s['qux'] - result2 = s.loc['qux'] - expected = s[arrays[0] == 'qux'] - expected.index = expected.index.droplevel(0) - assert_series_equal(result, expected) - assert_series_equal(result2, expected) - def test_count(self): frame = self.frame.copy() frame.index.names = ['a', 'b'] result = frame.count(level='b') expect = self.frame.count(level=1) - assert_frame_equal(result, expect, check_names=False) + tm.assert_frame_equal(result, expect, check_names=False) result = frame.count(level='a') expect = self.frame.count(level=0) - assert_frame_equal(result, expect, check_names=False) + tm.assert_frame_equal(result, expect, check_names=False) series = self.series.copy() series.index.names = ['a', 'b'] result = series.count(level='b') expect = self.series.count(level=1) - assert_series_equal(result, expect, check_names=False) + tm.assert_series_equal(result, expect, check_names=False) self.assertEqual(result.index.name, 'b') result = series.count(level='a') expect = self.series.count(level=0) - assert_series_equal(result, expect, check_names=False) + tm.assert_series_equal(result, expect, check_names=False) self.assertEqual(result.index.name, 'a') self.assertRaises(KeyError, series.count, 'x') @@ -1541,7 +1368,7 @@ def test_series_group_min_max(self): # skipna=True leftside = grouped.agg(aggf) rightside = getattr(self.series, op)(level=level, skipna=skipna) - assert_series_equal(leftside, rightside) + tm.assert_series_equal(leftside, rightside) def test_frame_group_ops(self): self.frame.iloc[1, [1, 2]] = np.nan @@ -1550,6 +1377,7 @@ def test_frame_group_ops(self): for op, level, axis, skipna in cart_product(self.AGG_FUNCTIONS, lrange(2), lrange(2), [False, True]): + if axis == 0: frame = self.frame else: @@ -1570,17 +1398,17 @@ def aggf(x): # for good measure, groupby detail level_index = frame._get_axis(axis).levels[level] - self.assert_index_equal(leftside._get_axis(axis), level_index) - self.assert_index_equal(rightside._get_axis(axis), level_index) + tm.assert_index_equal(leftside._get_axis(axis), level_index) + tm.assert_index_equal(rightside._get_axis(axis), level_index) - assert_frame_equal(leftside, rightside) + tm.assert_frame_equal(leftside, rightside) def test_stat_op_corner(self): obj = Series([10.0], index=MultiIndex.from_tuples([(2, 3)])) result = obj.sum(level=0) expected = Series([10.0], index=[2]) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_frame_any_all_group(self): df = DataFrame( @@ -1591,11 +1419,11 @@ def test_frame_any_all_group(self): result = df.any(level=0) ex = DataFrame({'data': [False, True]}, index=['one', 'two']) - assert_frame_equal(result, ex) + tm.assert_frame_equal(result, ex) result = df.all(level=0) ex = DataFrame({'data': [False, False]}, index=['one', 'two']) - assert_frame_equal(result, ex) + tm.assert_frame_equal(result, ex) def test_std_var_pass_ddof(self): index = MultiIndex.from_arrays([np.arange(5).repeat(10), np.tile( @@ -1608,20 +1436,20 @@ def test_std_var_pass_ddof(self): result = getattr(df[0], meth)(level=0, ddof=ddof) expected = df[0].groupby(level=0).agg(alt) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) result = getattr(df, meth)(level=0, ddof=ddof) expected = df.groupby(level=0).agg(alt) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_frame_series_agg_multiple_levels(self): result = self.ymd.sum(level=['year', 'month']) expected = self.ymd.groupby(level=['year', 'month']).sum() - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = self.ymd['A'].sum(level=['year', 'month']) expected = self.ymd['A'].groupby(level=['year', 'month']).sum() - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_groupby_multilevel(self): result = self.ymd.groupby(level=[0, 1]).mean() @@ -1631,12 +1459,12 @@ def test_groupby_multilevel(self): expected = self.ymd.groupby([k1, k2]).mean() - assert_frame_equal(result, expected, check_names=False - ) # TODO groupby with level_values drops names + # TODO groupby with level_values drops names + tm.assert_frame_equal(result, expected, check_names=False) self.assertEqual(result.index.names, self.ymd.index.names[:2]) result2 = self.ymd.groupby(level=self.ymd.index.names[:2]).mean() - assert_frame_equal(result, result2) + tm.assert_frame_equal(result, result2) def test_groupby_multilevel_with_transform(self): pass @@ -1665,15 +1493,15 @@ def test_partial_set(self): exp = self.ymd.copy() df.loc[2000, 4] = 0 exp.loc[2000, 4].values[:] = 0 - assert_frame_equal(df, exp) + tm.assert_frame_equal(df, exp) df['A'].loc[2000, 4] = 1 exp['A'].loc[2000, 4].values[:] = 1 - assert_frame_equal(df, exp) + tm.assert_frame_equal(df, exp) df.loc[2000] = 5 exp.loc[2000].values[:] = 5 - assert_frame_equal(df, exp) + tm.assert_frame_equal(df, exp) # this works...for now df['A'].iloc[14] = 5 @@ -1702,7 +1530,7 @@ def test_unstack_group_index_overflow(self): # test roundtrip stacked = result.stack() - assert_series_equal(s, stacked.reindex(s.index)) + tm.assert_series_equal(s, stacked.reindex(s.index)) # put it at beginning index = MultiIndex(levels=[[0, 1]] + [level] * 8, @@ -1737,7 +1565,7 @@ def test_partial_ix_missing(self): result = self.ymd.loc[2000, 0] expected = self.ymd.loc[2000]['A'] - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) # need to put in some work here @@ -1767,8 +1595,8 @@ def test_level_with_tuples(self): result2 = series.loc[('foo', 'bar', 0)] expected = series[:2] expected.index = expected.index.droplevel(0) - assert_series_equal(result, expected) - assert_series_equal(result2, expected) + tm.assert_series_equal(result, expected) + tm.assert_series_equal(result2, expected) self.assertRaises(KeyError, series.__getitem__, (('foo', 'bar', 0), 2)) @@ -1776,8 +1604,8 @@ def test_level_with_tuples(self): result2 = frame.xs(('foo', 'bar', 0)) expected = frame[:2] expected.index = expected.index.droplevel(0) - assert_frame_equal(result, expected) - assert_frame_equal(result2, expected) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result2, expected) index = MultiIndex(levels=[[('foo', 'bar'), ('foo', 'baz'), ( 'foo', 'qux')], [0, 1]], @@ -1790,30 +1618,30 @@ def test_level_with_tuples(self): result2 = series.loc[('foo', 'bar')] expected = series[:2] expected.index = expected.index.droplevel(0) - assert_series_equal(result, expected) - assert_series_equal(result2, expected) + tm.assert_series_equal(result, expected) + tm.assert_series_equal(result2, expected) result = frame.loc[('foo', 'bar')] result2 = frame.xs(('foo', 'bar')) expected = frame[:2] expected.index = expected.index.droplevel(0) - assert_frame_equal(result, expected) - assert_frame_equal(result2, expected) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result2, expected) def test_int_series_slicing(self): s = self.ymd['A'] result = s[5:] expected = s.reindex(s.index[5:]) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) exp = self.ymd['A'].copy() s[5:] = 0 exp.values[5:] = 0 - self.assert_numpy_array_equal(s.values, exp.values) + tm.assert_numpy_array_equal(s.values, exp.values) result = self.ymd[5:] expected = self.ymd.reindex(s.index[5:]) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_mixed_depth_get(self): arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'], @@ -1826,12 +1654,12 @@ def test_mixed_depth_get(self): result = df['a'] expected = df['a', '', ''] - assert_series_equal(result, expected, check_names=False) + tm.assert_series_equal(result, expected, check_names=False) self.assertEqual(result.name, 'a') result = df['routine1', 'result1'] expected = df['routine1', 'result1', ''] - assert_series_equal(result, expected, check_names=False) + tm.assert_series_equal(result, expected, check_names=False) self.assertEqual(result.name, ('routine1', 'result1')) def test_mixed_depth_insert(self): @@ -1847,7 +1675,7 @@ def test_mixed_depth_insert(self): expected = df.copy() result['b'] = [1, 2, 3, 4] expected['b', '', ''] = [1, 2, 3, 4] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_mixed_depth_drop(self): arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'], @@ -1860,16 +1688,16 @@ def test_mixed_depth_drop(self): result = df.drop('a', axis=1) expected = df.drop([('a', '', '')], axis=1) - assert_frame_equal(expected, result) + tm.assert_frame_equal(expected, result) result = df.drop(['top'], axis=1) expected = df.drop([('top', 'OD', 'wx')], axis=1) expected = expected.drop([('top', 'OD', 'wy')], axis=1) - assert_frame_equal(expected, result) + tm.assert_frame_equal(expected, result) result = df.drop(('top', 'OD', 'wx'), axis=1) expected = df.drop([('top', 'OD', 'wx')], axis=1) - assert_frame_equal(expected, result) + tm.assert_frame_equal(expected, result) expected = df.drop([('top', 'OD', 'wy')], axis=1) expected = df.drop('top', axis=1) @@ -1877,7 +1705,7 @@ def test_mixed_depth_drop(self): result = df.drop('result1', level=1, axis=1) expected = df.drop([('routine1', 'result1', ''), ('routine2', 'result1', '')], axis=1) - assert_frame_equal(expected, result) + tm.assert_frame_equal(expected, result) def test_drop_nonunique(self): df = DataFrame([["x-a", "x", "a", 1.5], ["x-a", "x", "a", 1.2], @@ -1898,7 +1726,7 @@ def test_drop_nonunique(self): result.index = expected.index - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_mixed_depth_pop(self): arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'], @@ -1913,32 +1741,32 @@ def test_mixed_depth_pop(self): df2 = df.copy() result = df1.pop('a') expected = df2.pop(('a', '', '')) - assert_series_equal(expected, result, check_names=False) - assert_frame_equal(df1, df2) + tm.assert_series_equal(expected, result, check_names=False) + tm.assert_frame_equal(df1, df2) self.assertEqual(result.name, 'a') expected = df1['top'] df1 = df1.drop(['top'], axis=1) result = df2.pop('top') - assert_frame_equal(expected, result) - assert_frame_equal(df1, df2) + tm.assert_frame_equal(expected, result) + tm.assert_frame_equal(df1, df2) def test_reindex_level_partial_selection(self): result = self.frame.reindex(['foo', 'qux'], level=0) expected = self.frame.iloc[[0, 1, 2, 7, 8, 9]] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = self.frame.T.reindex_axis(['foo', 'qux'], axis=1, level=0) - assert_frame_equal(result, expected.T) + tm.assert_frame_equal(result, expected.T) result = self.frame.loc[['foo', 'qux']] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = self.frame['A'].loc[['foo', 'qux']] - assert_series_equal(result, expected['A']) + tm.assert_series_equal(result, expected['A']) result = self.frame.T.loc[:, ['foo', 'qux']] - assert_frame_equal(result, expected.T) + tm.assert_frame_equal(result, expected.T) def test_setitem_multiple_partial(self): expected = self.frame.copy() @@ -1946,45 +1774,45 @@ def test_setitem_multiple_partial(self): result.loc[['foo', 'bar']] = 0 expected.loc['foo'] = 0 expected.loc['bar'] = 0 - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) expected = self.frame.copy() result = self.frame.copy() result.loc['foo':'bar'] = 0 expected.loc['foo'] = 0 expected.loc['bar'] = 0 - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) expected = self.frame['A'].copy() result = self.frame['A'].copy() result.loc[['foo', 'bar']] = 0 expected.loc['foo'] = 0 expected.loc['bar'] = 0 - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) expected = self.frame['A'].copy() result = self.frame['A'].copy() result.loc['foo':'bar'] = 0 expected.loc['foo'] = 0 expected.loc['bar'] = 0 - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_drop_level(self): result = self.frame.drop(['bar', 'qux'], level='first') expected = self.frame.iloc[[0, 1, 2, 5, 6]] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = self.frame.drop(['two'], level='second') expected = self.frame.iloc[[0, 2, 3, 6, 7, 9]] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = self.frame.T.drop(['bar', 'qux'], axis=1, level='first') expected = self.frame.iloc[[0, 1, 2, 5, 6]].T - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = self.frame.T.drop(['two'], axis=1, level='second') expected = self.frame.iloc[[0, 2, 3, 6, 7, 9]].T - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_drop_level_nonunique_datetime(self): # GH 12701 @@ -2003,7 +1831,7 @@ def test_drop_level_nonunique_datetime(self): result = df.drop(ts, level='tstamp') expected = df.loc[idx != 4] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_drop_preserve_names(self): index = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1], @@ -2089,7 +1917,7 @@ def test_indexing_ambiguity_bug_1678(self): result = frame.iloc[:, 1] exp = frame.loc[:, ('Ohio', 'Red')] tm.assertIsInstance(result, Series) - assert_series_equal(result, exp) + tm.assert_series_equal(result, exp) def test_nonunique_assignment_1750(self): df = DataFrame([[1, 1, "x", "X"], [1, 1, "y", "Y"], [1, 2, "z", "Z"]], @@ -2181,7 +2009,7 @@ def test_duplicate_mi(self): ['foo', 'bar', 5.0, 5]], columns=list('ABCD')).set_index(['A', 'B']) result = df.loc[('foo', 'bar')] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_duplicated_drop_duplicates(self): # GH 4060 @@ -2242,8 +2070,8 @@ def test_datetimeindex(self): expected1 = pd.DatetimeIndex(['2013-04-01 9:00', '2013-04-02 9:00', '2013-04-03 9:00'], tz='Asia/Tokyo') - self.assert_index_equal(idx.levels[0], expected1) - self.assert_index_equal(idx.levels[1], idx2) + tm.assert_index_equal(idx.levels[0], expected1) + tm.assert_index_equal(idx.levels[1], idx2) # from datetime combos # GH 7888 @@ -2289,14 +2117,14 @@ def test_set_index_datetime(self): expected = expected.tz_localize('UTC').tz_convert('US/Pacific') df = df.set_index('label', append=True) - self.assert_index_equal(df.index.levels[0], expected) - self.assert_index_equal(df.index.levels[1], - pd.Index(['a', 'b'], name='label')) + tm.assert_index_equal(df.index.levels[0], expected) + tm.assert_index_equal(df.index.levels[1], + pd.Index(['a', 'b'], name='label')) df = df.swaplevel(0, 1) - self.assert_index_equal(df.index.levels[0], - pd.Index(['a', 'b'], name='label')) - self.assert_index_equal(df.index.levels[1], expected) + tm.assert_index_equal(df.index.levels[0], + pd.Index(['a', 'b'], name='label')) + tm.assert_index_equal(df.index.levels[1], expected) df = DataFrame(np.random.random(6)) idx1 = pd.DatetimeIndex(['2011-07-19 07:00:00', '2011-07-19 08:00:00', @@ -2319,14 +2147,14 @@ def test_set_index_datetime(self): expected2 = pd.DatetimeIndex(['2012-04-01 09:00', '2012-04-02 09:00'], tz='US/Eastern') - self.assert_index_equal(df.index.levels[0], expected1) - self.assert_index_equal(df.index.levels[1], expected2) - self.assert_index_equal(df.index.levels[2], idx3) + tm.assert_index_equal(df.index.levels[0], expected1) + tm.assert_index_equal(df.index.levels[1], expected2) + tm.assert_index_equal(df.index.levels[2], idx3) # GH 7092 - self.assert_index_equal(df.index.get_level_values(0), idx1) - self.assert_index_equal(df.index.get_level_values(1), idx2) - self.assert_index_equal(df.index.get_level_values(2), idx3) + tm.assert_index_equal(df.index.get_level_values(0), idx1) + tm.assert_index_equal(df.index.get_level_values(1), idx2) + tm.assert_index_equal(df.index.get_level_values(2), idx3) def test_reset_index_datetime(self): # GH 3950 @@ -2351,7 +2179,7 @@ def test_reset_index_datetime(self): expected['idx1'] = expected['idx1'].apply( lambda d: pd.Timestamp(d, tz=tz)) - assert_frame_equal(df.reset_index(), expected) + tm.assert_frame_equal(df.reset_index(), expected) idx3 = pd.date_range('1/1/2012', periods=5, freq='MS', tz='Europe/Paris', name='idx3') @@ -2378,7 +2206,7 @@ def test_reset_index_datetime(self): lambda d: pd.Timestamp(d, tz=tz)) expected['idx3'] = expected['idx3'].apply( lambda d: pd.Timestamp(d, tz='Europe/Paris')) - assert_frame_equal(df.reset_index(), expected) + tm.assert_frame_equal(df.reset_index(), expected) # GH 7793 idx = pd.MultiIndex.from_product([['a', 'b'], pd.date_range( @@ -2396,7 +2224,7 @@ def test_reset_index_datetime(self): columns=['level_0', 'level_1', 'a']) expected['level_1'] = expected['level_1'].apply( lambda d: pd.Timestamp(d, freq='D', tz=tz)) - assert_frame_equal(df.reset_index(), expected) + tm.assert_frame_equal(df.reset_index(), expected) def test_reset_index_period(self): # GH 7746 @@ -2415,7 +2243,7 @@ def test_reset_index_period(self): 'feature': ['a', 'b', 'c'] * 3, 'a': np.arange(9, dtype='int64') }, columns=['month', 'feature', 'a']) - assert_frame_equal(df.reset_index(), expected) + tm.assert_frame_equal(df.reset_index(), expected) def test_set_index_period(self): # GH 6631 @@ -2433,13 +2261,13 @@ def test_set_index_period(self): expected1 = pd.period_range('2011-01-01', periods=3, freq='M') expected2 = pd.period_range('2013-01-01 09:00', periods=2, freq='H') - self.assert_index_equal(df.index.levels[0], expected1) - self.assert_index_equal(df.index.levels[1], expected2) - self.assert_index_equal(df.index.levels[2], idx3) + tm.assert_index_equal(df.index.levels[0], expected1) + tm.assert_index_equal(df.index.levels[1], expected2) + tm.assert_index_equal(df.index.levels[2], idx3) - self.assert_index_equal(df.index.get_level_values(0), idx1) - self.assert_index_equal(df.index.get_level_values(1), idx2) - self.assert_index_equal(df.index.get_level_values(2), idx3) + tm.assert_index_equal(df.index.get_level_values(0), idx1) + tm.assert_index_equal(df.index.get_level_values(1), idx2) + tm.assert_index_equal(df.index.get_level_values(2), idx3) def test_repeat(self): # GH 9361 @@ -2475,4 +2303,185 @@ def test_iloc_mi(self): result = pd.DataFrame([[df_mi.iloc[r, c] for c in range(2)] for r in range(5)]) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) + + +class TestSorted(Base, tm.TestCase): + """ everthing you wanted to test about sorting """ + + def test_sort_index_preserve_levels(self): + result = self.frame.sort_index() + self.assertEqual(result.index.names, self.frame.index.names) + + def test_sorting_repr_8017(self): + + np.random.seed(0) + data = np.random.randn(3, 4) + + for gen, extra in [([1., 3., 2., 5.], 4.), ([1, 3, 2, 5], 4), + ([Timestamp('20130101'), Timestamp('20130103'), + Timestamp('20130102'), Timestamp('20130105')], + Timestamp('20130104')), + (['1one', '3one', '2one', '5one'], '4one')]: + columns = MultiIndex.from_tuples([('red', i) for i in gen]) + df = DataFrame(data, index=list('def'), columns=columns) + df2 = pd.concat([df, + DataFrame('world', index=list('def'), + columns=MultiIndex.from_tuples( + [('red', extra)]))], axis=1) + + # check that the repr is good + # make sure that we have a correct sparsified repr + # e.g. only 1 header of read + self.assertEqual(str(df2).splitlines()[0].split(), ['red']) + + # GH 8017 + # sorting fails after columns added + + # construct single-dtype then sort + result = df.copy().sort_index(axis=1) + expected = df.iloc[:, [0, 2, 1, 3]] + tm.assert_frame_equal(result, expected) + + result = df2.sort_index(axis=1) + expected = df2.iloc[:, [0, 2, 1, 4, 3]] + tm.assert_frame_equal(result, expected) + + # setitem then sort + result = df.copy() + result[('red', extra)] = 'world' + + result = result.sort_index(axis=1) + tm.assert_frame_equal(result, expected) + + def test_sort_index_level(self): + df = self.frame.copy() + df.index = np.arange(len(df)) + + # axis=1 + + # series + a_sorted = self.frame['A'].sort_index(level=0) + + # preserve names + self.assertEqual(a_sorted.index.names, self.frame.index.names) + + # inplace + rs = self.frame.copy() + rs.sort_index(level=0, inplace=True) + tm.assert_frame_equal(rs, self.frame.sort_index(level=0)) + + def test_sort_index_level_large_cardinality(self): + + # #2684 (int64) + index = MultiIndex.from_arrays([np.arange(4000)] * 3) + df = DataFrame(np.random.randn(4000), index=index, dtype=np.int64) + + # it works! + result = df.sort_index(level=0) + self.assertTrue(result.index.lexsort_depth == 3) + + # #2684 (int32) + index = MultiIndex.from_arrays([np.arange(4000)] * 3) + df = DataFrame(np.random.randn(4000), index=index, dtype=np.int32) + + # it works! + result = df.sort_index(level=0) + self.assertTrue((result.dtypes.values == df.dtypes.values).all()) + self.assertTrue(result.index.lexsort_depth == 3) + + def test_sort_index_level_by_name(self): + self.frame.index.names = ['first', 'second'] + result = self.frame.sort_index(level='second') + expected = self.frame.sort_index(level=1) + tm.assert_frame_equal(result, expected) + + def test_sort_index_level_mixed(self): + sorted_before = self.frame.sort_index(level=1) + + df = self.frame.copy() + df['foo'] = 'bar' + sorted_after = df.sort_index(level=1) + tm.assert_frame_equal(sorted_before, + sorted_after.drop(['foo'], axis=1)) + + dft = self.frame.T + sorted_before = dft.sort_index(level=1, axis=1) + dft['foo', 'three'] = 'bar' + + sorted_after = dft.sort_index(level=1, axis=1) + tm.assert_frame_equal(sorted_before.drop([('foo', 'three')], axis=1), + sorted_after.drop([('foo', 'three')], axis=1)) + + def test_is_lexsorted(self): + levels = [[0, 1], [0, 1, 2]] + + index = MultiIndex(levels=levels, + labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]) + self.assertTrue(index.is_lexsorted()) + + index = MultiIndex(levels=levels, + labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]]) + self.assertFalse(index.is_lexsorted()) + + index = MultiIndex(levels=levels, + labels=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]]) + self.assertFalse(index.is_lexsorted()) + self.assertEqual(index.lexsort_depth, 0) + + def test_getitem_multilevel_index_tuple_not_sorted(self): + index_columns = list("abc") + df = DataFrame([[0, 1, 0, "x"], [0, 0, 1, "y"]], + columns=index_columns + ["data"]) + df = df.set_index(index_columns) + query_index = df.index[:1] + rs = df.loc[query_index, "data"] + + xp_idx = MultiIndex.from_tuples([(0, 1, 0)], names=['a', 'b', 'c']) + xp = Series(['x'], index=xp_idx, name='data') + tm.assert_series_equal(rs, xp) + + def test_getitem_slice_not_sorted(self): + df = self.frame.sort_index(level=1).T + + # buglet with int typechecking + result = df.iloc[:, :np.int32(3)] + expected = df.reindex(columns=df.columns[:3]) + tm.assert_frame_equal(result, expected) + + def test_frame_getitem_not_sorted(self): + df = self.frame.T + df['foo', 'four'] = 'foo' + + arrays = [np.array(x) for x in zip(*df.columns.values)] + + result = df['foo'] + result2 = df.loc[:, 'foo'] + expected = df.reindex(columns=df.columns[arrays[0] == 'foo']) + expected.columns = expected.columns.droplevel(0) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result2, expected) + + df = df.T + result = df.xs('foo') + result2 = df.loc['foo'] + expected = df.reindex(df.index[arrays[0] == 'foo']) + expected.index = expected.index.droplevel(0) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result2, expected) + + def test_series_getitem_not_sorted(self): + arrays = [['bar', 'bar', 'baz', 'baz', 'qux', 'qux', 'foo', 'foo'], + ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] + tuples = lzip(*arrays) + index = MultiIndex.from_tuples(tuples) + s = Series(randn(8), index=index) + + arrays = [np.array(x) for x in zip(*index.values)] + + result = s['qux'] + result2 = s.loc['qux'] + expected = s[arrays[0] == 'qux'] + expected.index = expected.index.droplevel(0) + tm.assert_series_equal(result, expected) + tm.assert_series_equal(result2, expected) From 37e5f78b4e9ff03cbff4dea928445cc3b1f707c8 Mon Sep 17 00:00:00 2001 From: Greg Williams Date: Thu, 16 Mar 2017 07:56:46 -0400 Subject: [PATCH 212/353] BUG: Group-by numeric type-coercion with datetime closes #14423 closes #15421 closes #15670 During a group-by/apply on a DataFrame, in the presence of one or more DateTime-like columns, Pandas would incorrectly coerce the type of all other columns to numeric. E.g. a String column would be coerced to numeric, producing NaNs. Author: Greg Williams Closes #15680 from gwpdt/bugfix14423 and squashes the following commits: e1ed104 [Greg Williams] TST: Rename and expand test_numeric_coercion 0a15674 [Greg Williams] CLN: move import, add whatsnew entry c8844e0 [Greg Williams] CLN: PEP8 (whitespace fixes) 46d12c2 [Greg Williams] BUG: Group-by numeric type-coericion with datetime --- doc/source/whatsnew/v0.20.0.txt | 3 +- pandas/core/groupby.py | 5 ++- pandas/tests/groupby/test_groupby.py | 48 ++++++++++++++++++++++++++++ 3 files changed, 54 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 41b6519eb740f..a56212328f5c3 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -850,7 +850,8 @@ Bug Fixes - Bug in ``SparseSeries.reindex`` on single level with list of length 1 (:issue:`15447`) -- Bug in groupby operations with timedelta64 when passing ``numeric_only=False`` (:issue:`5724`) +- Bug in groupby operations with timedelta64 when passing ``numeric_only=False`` (:issue:`5724`) +- Bug in ``groupby.apply()`` coercing ``object`` dtypes to numeric types, when not all values were numeric (:issue:`14423`, :issue:`15421`, :issue:`15670`) - Bug in ``DataFrame.to_html`` with ``index=False`` and ``max_rows`` raising in ``IndexError`` (:issue:`14998`) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index a10be078a8f96..7a017ffae284c 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -10,6 +10,7 @@ zip, range, lzip, callable, map ) + from pandas import compat from pandas.compat.numpy import function as nv from pandas.compat.numpy import _np_version_under1p8 @@ -3424,6 +3425,7 @@ def _decide_output_index(self, output, labels): def _wrap_applied_output(self, keys, values, not_indexed_same=False): from pandas.core.index import _all_indexes_same + from pandas.tools.util import to_numeric if len(keys) == 0: return DataFrame(index=keys) @@ -3566,7 +3568,8 @@ def first_non_None_value(values): # as we are stacking can easily have object dtypes here so = self._selected_obj if (so.ndim == 2 and so.dtypes.apply(is_datetimelike).any()): - result = result._convert(numeric=True) + result = result.apply( + lambda x: to_numeric(x, errors='ignore')) date_cols = self._selected_obj.select_dtypes( include=['datetime', 'timedelta']).columns date_cols = date_cols.intersection(result.columns) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index d7fa3beda0abf..c25974c94bfd1 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -4314,6 +4314,54 @@ def test_cummin_cummax(self): expected = pd.Series([1, 2, 1], name='b') tm.assert_series_equal(result, expected) + def test_apply_numeric_coercion_when_datetime(self): + # In the past, group-by/apply operations have been over-eager + # in converting dtypes to numeric, in the presence of datetime + # columns. Various GH issues were filed, the reproductions + # for which are here. + + # GH 15670 + df = pd.DataFrame({'Number': [1, 2], + 'Date': ["2017-03-02"] * 2, + 'Str': ["foo", "inf"]}) + expected = df.groupby(['Number']).apply(lambda x: x.iloc[0]) + df.Date = pd.to_datetime(df.Date) + result = df.groupby(['Number']).apply(lambda x: x.iloc[0]) + tm.assert_series_equal(result['Str'], expected['Str']) + + # GH 15421 + df = pd.DataFrame({'A': [10, 20, 30], + 'B': ['foo', '3', '4'], + 'T': [pd.Timestamp("12:31:22")] * 3}) + + def get_B(g): + return g.iloc[0][['B']] + result = df.groupby('A').apply(get_B)['B'] + expected = df.B + expected.index = df.A + tm.assert_series_equal(result, expected) + + # GH 14423 + def predictions(tool): + out = pd.Series(index=['p1', 'p2', 'useTime'], dtype=object) + if 'step1' in list(tool.State): + out['p1'] = str(tool[tool.State == 'step1'].Machine.values[0]) + if 'step2' in list(tool.State): + out['p2'] = str(tool[tool.State == 'step2'].Machine.values[0]) + out['useTime'] = str( + tool[tool.State == 'step2'].oTime.values[0]) + return out + df1 = pd.DataFrame({'Key': ['B', 'B', 'A', 'A'], + 'State': ['step1', 'step2', 'step1', 'step2'], + 'oTime': ['', '2016-09-19 05:24:33', + '', '2016-09-19 23:59:04'], + 'Machine': ['23', '36L', '36R', '36R']}) + df2 = df1.copy() + df2.oTime = pd.to_datetime(df2.oTime) + expected = df1.groupby('Key').apply(predictions).p1 + result = df2.groupby('Key').apply(predictions).p1 + tm.assert_series_equal(expected, result) + def _check_groupby(df, result, keys, field, f=lambda x: x.sum()): tups = lmap(tuple, df[keys].values) From fe15466cff9184e38ecee16639c1eefaa45c3c92 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 16 Mar 2017 12:20:37 -0400 Subject: [PATCH 213/353] CI: remove dev-scipy from testing on numpy-dev build as really old wheels (#15699) closes #15696 --- ci/requirements-3.5_NUMPY_DEV.build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/requirements-3.5_NUMPY_DEV.build.sh b/ci/requirements-3.5_NUMPY_DEV.build.sh index 91fa15491bbf7..b6c8a477e6f5e 100644 --- a/ci/requirements-3.5_NUMPY_DEV.build.sh +++ b/ci/requirements-3.5_NUMPY_DEV.build.sh @@ -8,6 +8,6 @@ echo "install numpy master wheel" pip uninstall numpy -y # install numpy wheel from master -pip install --pre --upgrade --no-index --timeout=60 --trusted-host travis-dev-wheels.scipy.org -f http://travis-dev-wheels.scipy.org/ numpy scipy +pip install --pre --upgrade --no-index --timeout=60 --trusted-host travis-dev-wheels.scipy.org -f http://travis-dev-wheels.scipy.org/ numpy true From 3cac2d5a74c50a2728e1b977e2ee6593b391c9b1 Mon Sep 17 00:00:00 2001 From: Matthew Brett Date: Thu, 16 Mar 2017 12:12:08 -0700 Subject: [PATCH 214/353] MAINT: test with manylinux numpy/scipy pre-release (#15702) Numpy switching to daily manylinux wheels of trunk, instead of building wheels specific to Ubuntu 12.04 for every commit. Use these new wheels for numpy pre-release testing. --- .travis.yml | 10 ---------- ci/requirements-3.5_NUMPY_DEV.build.sh | 3 ++- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/.travis.yml b/.travis.yml index b0331941e2a1e..ee093e5bf0e60 100644 --- a/.travis.yml +++ b/.travis.yml @@ -123,11 +123,6 @@ matrix: - PANDAS_TESTING_MODE="deprecate" - CACHE_NAME="35_numpy_dev" - USE_CACHE=true - addons: - apt: - packages: - - libatlas-base-dev - - gfortran # In allow_failures - python: 3.5 env: @@ -167,11 +162,6 @@ matrix: - PANDAS_TESTING_MODE="deprecate" - CACHE_NAME="35_numpy_dev" - USE_CACHE=true - addons: - apt: - packages: - - libatlas-base-dev - - gfortran - python: 3.5 env: - PYTHON_VERSION=3.5 diff --git a/ci/requirements-3.5_NUMPY_DEV.build.sh b/ci/requirements-3.5_NUMPY_DEV.build.sh index b6c8a477e6f5e..4af1307f26a18 100644 --- a/ci/requirements-3.5_NUMPY_DEV.build.sh +++ b/ci/requirements-3.5_NUMPY_DEV.build.sh @@ -8,6 +8,7 @@ echo "install numpy master wheel" pip uninstall numpy -y # install numpy wheel from master -pip install --pre --upgrade --no-index --timeout=60 --trusted-host travis-dev-wheels.scipy.org -f http://travis-dev-wheels.scipy.org/ numpy +PRE_WHEELS="https://7933911d6844c6c53a7d-47bd50c35cd79bd838daf386af554a83.ssl.cf2.rackcdn.com" +pip install --pre --upgrade --timeout=60 -f $PRE_WHEELS numpy scipy true From acb9d0132bb824052adc2c13a34b88700a735a45 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 16 Mar 2017 16:46:21 -0400 Subject: [PATCH 215/353] TST: missing __init__.py file in pandas/tests/io/sas --- pandas/tests/io/sas/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 pandas/tests/io/sas/__init__.py diff --git a/pandas/tests/io/sas/__init__.py b/pandas/tests/io/sas/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d From 61f6f6333fb7bb2dedf82736aee6c9878382a06f Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 16 Mar 2017 18:28:56 -0400 Subject: [PATCH 216/353] TST: report the exit code on pandas.test() exit --- pandas/util/_tester.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/util/_tester.py b/pandas/util/_tester.py index 8d9701e0b4672..aeb4259a9edae 100644 --- a/pandas/util/_tester.py +++ b/pandas/util/_tester.py @@ -2,6 +2,7 @@ Entrypoint for testing from the top-level namespace """ import os +import sys PKG = os.path.dirname(os.path.dirname(__file__)) @@ -20,7 +21,7 @@ def test(extra_args=None): cmd = extra_args cmd += [PKG] print("running: pytest {}".format(' '.join(cmd))) - pytest.main(cmd) + sys.exit(pytest.main(cmd)) __all__ = ['test'] From d313808337cca3969ec1a323dc3c1dbc21956608 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 17 Mar 2017 08:37:10 -0400 Subject: [PATCH 217/353] CI: re-enable miniconda cache --- ci/install_travis.sh | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/ci/install_travis.sh b/ci/install_travis.sh index 12202b4ceee70..aad87ea37439f 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -33,20 +33,26 @@ home_dir=$(pwd) echo "[home_dir: $home_dir]" # install miniconda -echo "[Using clean Miniconda install]" - MINICONDA_DIR="$HOME/miniconda3" -if [ -d "$MINICONDA_DIR" ]; then - rm -rf "$MINICONDA_DIR" -fi -# install miniconda -if [ "${TRAVIS_OS_NAME}" == "osx" ]; then - wget http://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh || exit 1 +if [ "$USE_CACHE" ] && [ -d "$MINICONDA_DIR" ]; then + echo "[Using cached Miniconda install]" + else - wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh || exit 1 + echo "[Using clean Miniconda install]" + + if [ -d "$MINICONDA_DIR" ]; then + rm -rf "$MINICONDA_DIR" + fi + + # install miniconda + if [ "${TRAVIS_OS_NAME}" == "osx" ]; then + wget http://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh || exit 1 + else + wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh || exit 1 + fi + bash miniconda.sh -b -p "$MINICONDA_DIR" || exit 1 fi -bash miniconda.sh -b -p "$MINICONDA_DIR" || exit 1 echo "[update conda]" conda config --set ssl_verify false || exit 1 From 087c2f1143e3f67663c121c81f722b8d18029fa4 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Fri, 17 Mar 2017 08:50:27 -0400 Subject: [PATCH 218/353] TST: Replace check_package with skip_if_no_package (#15709) check_package literally just called skip_if_no_package with no additional decorations. --- pandas/tests/io/test_pytables.py | 3 ++- pandas/util/testing.py | 27 ++++++++++----------------- 2 files changed, 12 insertions(+), 18 deletions(-) diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 5592c564e51df..8ea8088a297b8 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -728,7 +728,8 @@ def test_put_compression(self): format='fixed', complib='zlib') def test_put_compression_blosc(self): - tm.skip_if_no_package('tables', '2.2', app='blosc support') + tm.skip_if_no_package('tables', min_version='2.2', + app='blosc support') if skip_compression: pytest.skip("skipping on windows/PY3") diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 529ecef3e2d6a..154476ce8340a 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -2010,12 +2010,16 @@ def __init__(self, *args, **kwargs): dict.__init__(self, *args, **kwargs) -# Dependency checks. Copied this from Nipy/Nipype (Copyright of -# respective developers, license: BSD-3) -def package_check(pkg_name, min_version=None, max_version=None, app='pandas', - checker=LooseVersion): +# Dependency checker when running tests. +# +# Copied this from nipy/nipype +# Copyright of respective developers, License: BSD-3 +def skip_if_no_package(pkg_name, min_version=None, max_version=None, + app='pandas', checker=LooseVersion): """Check that the min/max version of the required package is installed. + If the package check fails, the test is automatically skipped. + Parameters ---------- pkg_name : string @@ -2025,11 +2029,11 @@ def package_check(pkg_name, min_version=None, max_version=None, app='pandas', max_version : string, optional Max version number for required package. app : string, optional - Application that is performing the check. For instance, the + Application that is performing the check. For instance, the name of the tutorial being executed that depends on specific packages. checker : object, optional - The class that will perform the version checking. Default is + The class that will perform the version checking. Default is distutils.version.LooseVersion. Examples @@ -2061,17 +2065,6 @@ def package_check(pkg_name, min_version=None, max_version=None, app='pandas', pytest.skip(msg) -def skip_if_no_package(*args, **kwargs): - """pytest.skip() if package_check fails - - Parameters - ---------- - *args Positional parameters passed to `package_check` - *kwargs Keyword parameters passed to `package_check` - """ - package_check(*args, **kwargs) - - def optional_args(decorator): """allows a decorator to take optional positional and keyword arguments. Assumes that taking a single, callable, positional argument means that From b69c8775b64a1d2fa5382f04b209888d989030c0 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Fri, 17 Mar 2017 08:50:55 -0400 Subject: [PATCH 219/353] DOC: Add gotcha about flake8-ing diff The `flake8`-ing the diff will not catch any import style errors. I put an alternative check that is more comprehensive but will take longer to run since you will be checking entire files instead of the diff. Author: gfyoung Closes #15712 from gfyoung/pep8-diff-gotcha and squashes the following commits: 42c13de [gfyoung] DOC: Add gotcha about flake8-ing diff --- doc/source/contributing.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index 83f99b4f01b26..7961780d0c79b 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -520,6 +520,15 @@ submitting code to run the check yourself on the diff:: git diff master | flake8 --diff +This command will catch any stylistic errors in your changes specifically, but +be beware it may not catch all of them. For example, if you delete the only +usage of an imported function, it is stylistically incorrect to import an +unused function. However, style-checking the diff will not catch this because +the actual import is not part of the diff. Thus, for completeness, you should +run this command, though it will take longer:: + + git diff master --name-only -- '*.py' | grep 'pandas' | xargs -r flake8 + Backwards Compatibility ~~~~~~~~~~~~~~~~~~~~~~~ From 3ba68a72f12dd7b1361f1a3ac60720ddb6fd7a34 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 17 Mar 2017 09:06:27 -0400 Subject: [PATCH 220/353] TST: don't catch, but supress warnings in panel4d/panelnd (#15705) --- pandas/core/categorical.py | 4 +- pandas/io/pytables.py | 18 ++- pandas/tests/io/test_pytables.py | 221 ++++++++++++------------------ pandas/tests/test_panel.py | 3 +- pandas/tests/test_panel4d.py | 187 +++++++++++++------------ pandas/tests/test_panelnd.py | 7 +- pandas/tests/tools/test_concat.py | 5 +- 7 files changed, 210 insertions(+), 235 deletions(-) diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index c1e5904693d1c..af51c7f2e2dc1 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -550,8 +550,8 @@ def _validate_categories(cls, categories, fastpath=False): # we don't allow NaNs in the categories themselves if categories.hasnans: - # NaNs in cats deprecated in 0.17, - # remove in 0.18 or 0.19 GH 10748 + # NaNs in cats deprecated in 0.17 + # GH 10748 msg = ('\nSetting NaNs in `categories` is deprecated and ' 'will be removed in a future version of pandas.') warn(msg, FutureWarning, stacklevel=3) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 72efc47a3c744..b3b253f151541 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2094,7 +2094,17 @@ def convert(self, values, nan_rep, encoding): # we have a categorical categories = self.metadata - self.data = Categorical.from_codes(self.data.ravel(), + codes = self.data.ravel() + + # if we have stored a NaN in the categories + # then strip it; in theory we could have BOTH + # -1s in the codes and nulls :< + mask = isnull(categories) + if mask.any(): + categories = categories[~mask] + codes[codes != -1] -= mask.astype(int).cumsum().values + + self.data = Categorical.from_codes(codes, categories=categories, ordered=self.ordered) @@ -3404,10 +3414,12 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, if existing_table is not None: indexer = len(self.non_index_axes) exist_axis = existing_table.non_index_axes[indexer][1] - if append_axis != exist_axis: + if not array_equivalent(np.array(append_axis), + np.array(exist_axis)): # ahah! -> reindex - if sorted(append_axis) == sorted(exist_axis): + if array_equivalent(np.array(sorted(append_axis)), + np.array(sorted(exist_axis))): append_axis = exist_axis # the non_index_axes info diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 8ea8088a297b8..40866b8702fe2 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -1,11 +1,12 @@ import pytest import sys import os -import warnings +from warnings import catch_warnings import tempfile from contextlib import contextmanager import datetime +from datetime import timedelta import numpy as np import pandas @@ -22,7 +23,7 @@ from pandas.io.pytables import TableIterator from pandas.io.pytables import (HDFStore, get_store, Term, read_hdf, IncompatibilityWarning, PerformanceWarning, - AttributeConflictWarning, DuplicateWarning, + AttributeConflictWarning, PossibleDataLossError, ClosedFileError) from pandas.io import pytables as pytables @@ -31,7 +32,6 @@ assert_panel_equal, assert_frame_equal, assert_series_equal, - assert_produces_warning, set_timezone) from pandas import concat, Timestamp from pandas import compat @@ -123,17 +123,6 @@ def _maybe_remove(store, key): pass -@contextmanager -def compat_assert_produces_warning(w): - """ don't produce a warning under PY3 """ - if compat.PY3: - yield - else: - with tm.assert_produces_warning(expected_warning=w, - check_stacklevel=False): - yield - - class Base(tm.TestCase): @classmethod @@ -151,8 +140,6 @@ def tearDownClass(cls): tm.set_testing_mode() def setUp(self): - warnings.filterwarnings(action='ignore', category=FutureWarning) - self.path = 'tmp.__%s__.h5' % tm.rands(10) def tearDown(self): @@ -420,9 +407,9 @@ def test_repr(self): df.loc[3:6, ['obj1']] = np.nan df = df._consolidate()._convert(datetime=True) - warnings.filterwarnings('ignore', category=PerformanceWarning) - store['df'] = df - warnings.filterwarnings('always', category=PerformanceWarning) + # PerformanceWarning + with catch_warnings(record=True): + store['df'] = df # make a random group in hdf space store._handle.create_group(store._handle.root, 'bah') @@ -455,9 +442,9 @@ def test_contains(self): self.assertNotIn('bar', store) # GH 2694 - warnings.filterwarnings( - 'ignore', category=tables.NaturalNameWarning) - store['node())'] = tm.makeDataFrame() + # tables.NaturalNameWarning + with catch_warnings(record=True): + store['node())'] = tm.makeDataFrame() self.assertIn('node())', store) def test_versioning(self): @@ -768,11 +755,8 @@ def test_put_mixed_type(self): with ensure_clean_store(self.path) as store: _maybe_remove(store, 'df') - # cannot use assert_produces_warning here for some reason - # a PendingDeprecationWarning is also raised? - warnings.filterwarnings('ignore', category=PerformanceWarning) - store.put('df', df) - warnings.filterwarnings('always', category=PerformanceWarning) + with catch_warnings(record=True): + store.put('df', df) expected = store.get('df') tm.assert_frame_equal(expected, df) @@ -797,8 +781,8 @@ def test_append(self): tm.assert_frame_equal(store['df3'], df) # this is allowed by almost always don't want to do it - with tm.assert_produces_warning( - expected_warning=tables.NaturalNameWarning): + # tables.NaturalNameWarning): + with catch_warnings(record=True): _maybe_remove(store, '/df3 foo') store.append('/df3 foo', df[:10]) store.append('/df3 foo', df[10:]) @@ -812,8 +796,7 @@ def test_append(self): assert_panel_equal(store['wp1'], wp) # ndim - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): + with catch_warnings(record=True): p4d = tm.makePanel4D() _maybe_remove(store, 'p4d') store.append('p4d', p4d.iloc[:, :, :10, :]) @@ -901,12 +884,12 @@ def test_append_series(self): # select on the values expected = ns[ns > 60] - result = store.select('ns', Term('foo>60')) + result = store.select('ns', 'foo>60') tm.assert_series_equal(result, expected) # select on the index and values expected = ns[(ns > 70) & (ns.index < 90)] - result = store.select('ns', [Term('foo>70'), Term('index<90')]) + result = store.select('ns', 'foo>70 and index<90') tm.assert_series_equal(result, expected) # multi-index @@ -1228,7 +1211,7 @@ def test_append_with_different_block_ordering(self): def test_ndim_indexables(self): # test using ndim tables in new ways - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): with ensure_clean_store(self.path) as store: p4d = tm.makePanel4D() @@ -1888,8 +1871,7 @@ def test_append_misc(self): with ensure_clean_store(self.path) as store: - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): + with catch_warnings(record=True): # unsuported data types for non-tables p4d = tm.makePanel4D() @@ -1930,7 +1912,7 @@ def check(obj, comparator): p = tm.makePanel() check(p, assert_panel_equal) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): p4d = tm.makePanel4D() check(p4d, assert_panel4d_equal) @@ -2058,8 +2040,8 @@ def test_table_values_dtypes_roundtrip(self): expected = Series({'float32': 2, 'float64': 1, 'int32': 1, 'bool': 1, 'int16': 1, 'int8': 1, 'int64': 1, 'object': 1, 'datetime64[ns]': 2}) - result.sort() - expected.sort() + result = result.sort_index() + result = expected.sort_index() tm.assert_series_equal(result, expected) def test_table_mixed_dtypes(self): @@ -2098,7 +2080,8 @@ def test_table_mixed_dtypes(self): store.append('p1_mixed', wp) assert_panel_equal(store.select('p1_mixed'), wp) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): + # ndim wp = tm.makePanel4D() wp['obj1'] = 'foo' @@ -2170,7 +2153,6 @@ def test_append_with_timedelta(self): # GH 3577 # append timedelta - from datetime import timedelta df = DataFrame(dict(A=Timestamp('20130101'), B=[Timestamp( '20130101') + timedelta(days=i, seconds=10) for i in range(10)])) df['C'] = df['A'] - df['B'] @@ -2184,12 +2166,9 @@ def test_append_with_timedelta(self): result = store.select('df') assert_frame_equal(result, df) - result = store.select('df', Term("C<100000")) + result = store.select('df', "C<100000") assert_frame_equal(result, df) - result = store.select('df', Term("C", "<", -3 * 86400)) - assert_frame_equal(result, df.iloc[3:]) - result = store.select('df', "C<'-3D'") assert_frame_equal(result, df.iloc[3:]) @@ -2432,7 +2411,7 @@ def test_invalid_terms(self): with ensure_clean_store(self.path) as store: - with compat_assert_produces_warning(FutureWarning): + with catch_warnings(record=True): df = tm.makeTimeDataFrame() df['string'] = 'foo' @@ -2490,7 +2469,7 @@ def test_terms(self): 0: tm.makeDataFrame(), 1: tm.makeDataFrame()}) - with compat_assert_produces_warning(FutureWarning): + with catch_warnings(record=True): p4d = tm.makePanel4D() store.put('p4d', p4d, format='table') @@ -2499,39 +2478,23 @@ def test_terms(self): store.put('wpneg', wpneg, format='table') # panel - result = store.select('wp', [Term( - 'major_axis<"20000108"'), Term("minor_axis=['A', 'B']")]) + result = store.select( + 'wp', "major_axis<'20000108' and minor_axis=['A', 'B']") expected = wp.truncate(after='20000108').reindex(minor=['A', 'B']) assert_panel_equal(result, expected) - # with deprecation - result = store.select('wp', [Term( - 'major_axis', '<', "20000108"), Term("minor_axis=['A', 'B']")]) - expected = wp.truncate(after='20000108').reindex(minor=['A', 'B']) - tm.assert_panel_equal(result, expected) - # p4d - with compat_assert_produces_warning(FutureWarning): + with catch_warnings(record=True): result = store.select('p4d', - [Term('major_axis<"20000108"'), - Term("minor_axis=['A', 'B']"), - Term("items=['ItemA', 'ItemB']")]) + ("major_axis<'20000108' and " + "minor_axis=['A', 'B'] and " + "items=['ItemA', 'ItemB']")) expected = p4d.truncate(after='20000108').reindex( minor=['A', 'B'], items=['ItemA', 'ItemB']) assert_panel4d_equal(result, expected) - # back compat invalid terms - terms = [dict(field='major_axis', op='>', value='20121114'), - [dict(field='major_axis', op='>', value='20121114')], - ["minor_axis=['A','B']", - dict(field='major_axis', op='>', value='20121114')]] - for t in terms: - with tm.assert_produces_warning(expected_warning=FutureWarning, - check_stacklevel=False): - Term(t) - - with compat_assert_produces_warning(FutureWarning): + with catch_warnings(record=True): # valid terms terms = [('major_axis=20121114'), @@ -2582,13 +2545,13 @@ def test_term_compat(self): minor_axis=['A', 'B', 'C', 'D']) store.append('wp', wp) - result = store.select('wp', [Term('major_axis>20000102'), - Term('minor_axis', '=', ['A', 'B'])]) + result = store.select( + 'wp', "major_axis>20000102 and minor_axis=['A', 'B']") expected = wp.loc[:, wp.major_axis > Timestamp('20000102'), ['A', 'B']] assert_panel_equal(result, expected) - store.remove('wp', Term('major_axis>20000103')) + store.remove('wp', 'major_axis>20000103') result = store.select('wp') expected = wp.loc[:, wp.major_axis <= Timestamp('20000103'), :] assert_panel_equal(result, expected) @@ -2602,25 +2565,23 @@ def test_term_compat(self): # stringified datetimes result = store.select( - 'wp', [Term('major_axis', '>', datetime.datetime(2000, 1, 2))]) + 'wp', "major_axis>datetime.datetime(2000, 1, 2)") expected = wp.loc[:, wp.major_axis > Timestamp('20000102')] assert_panel_equal(result, expected) result = store.select( - 'wp', [Term('major_axis', '>', - datetime.datetime(2000, 1, 2, 0, 0))]) + 'wp', "major_axis>datetime.datetime(2000, 1, 2, 0, 0)") expected = wp.loc[:, wp.major_axis > Timestamp('20000102')] assert_panel_equal(result, expected) result = store.select( - 'wp', [Term('major_axis', '=', - [datetime.datetime(2000, 1, 2, 0, 0), - datetime.datetime(2000, 1, 3, 0, 0)])]) + 'wp', ("major_axis=[datetime.datetime(2000, 1, 2, 0, 0), " + "datetime.datetime(2000, 1, 3, 0, 0)]")) expected = wp.loc[:, [Timestamp('20000102'), Timestamp('20000103')]] assert_panel_equal(result, expected) - result = store.select('wp', [Term('minor_axis', '=', ['A', 'B'])]) + result = store.select('wp', "minor_axis=['A', 'B']") expected = wp.loc[:, :, ['A', 'B']] assert_panel_equal(result, expected) @@ -2631,8 +2592,7 @@ def test_backwards_compat_without_term_object(self): major_axis=date_range('1/1/2000', periods=5), minor_axis=['A', 'B', 'C', 'D']) store.append('wp', wp) - with assert_produces_warning(expected_warning=FutureWarning, - check_stacklevel=False): + with catch_warnings(record=True): result = store.select('wp', [('major_axis>20000102'), ('minor_axis', '=', ['A', 'B'])]) expected = wp.loc[:, @@ -2653,24 +2613,21 @@ def test_backwards_compat_without_term_object(self): store.append('wp', wp) # stringified datetimes - with assert_produces_warning(expected_warning=FutureWarning, - check_stacklevel=False): + with catch_warnings(record=True): result = store.select('wp', [('major_axis', '>', datetime.datetime(2000, 1, 2))]) expected = wp.loc[:, wp.major_axis > Timestamp('20000102')] assert_panel_equal(result, expected) - with assert_produces_warning(expected_warning=FutureWarning, - check_stacklevel=False): + with catch_warnings(record=True): result = store.select('wp', [('major_axis', '>', datetime.datetime(2000, 1, 2, 0, 0))]) expected = wp.loc[:, wp.major_axis > Timestamp('20000102')] assert_panel_equal(result, expected) - with assert_produces_warning(expected_warning=FutureWarning, - check_stacklevel=False): + with catch_warnings(record=True): result = store.select('wp', [('major_axis', '=', @@ -2769,9 +2726,7 @@ def test_tuple_index(self): data = np.random.randn(30).reshape((3, 10)) DF = DataFrame(data, index=idx, columns=col) - expected_warning = Warning if PY35 else PerformanceWarning - with tm.assert_produces_warning(expected_warning=expected_warning, - check_stacklevel=False): + with catch_warnings(record=True): self._check_roundtrip(DF, tm.assert_frame_equal) def test_index_types(self): @@ -2783,30 +2738,23 @@ def test_index_types(self): check_index_type=True, check_series_type=True) - # nose has a deprecation warning in 3.5 - expected_warning = Warning if PY35 else PerformanceWarning - with tm.assert_produces_warning(expected_warning=expected_warning, - check_stacklevel=False): + with catch_warnings(record=True): ser = Series(values, [0, 'y']) self._check_roundtrip(ser, func) - with tm.assert_produces_warning(expected_warning=expected_warning, - check_stacklevel=False): + with catch_warnings(record=True): ser = Series(values, [datetime.datetime.today(), 0]) self._check_roundtrip(ser, func) - with tm.assert_produces_warning(expected_warning=expected_warning, - check_stacklevel=False): + with catch_warnings(record=True): ser = Series(values, ['y', 0]) self._check_roundtrip(ser, func) - with tm.assert_produces_warning(expected_warning=expected_warning, - check_stacklevel=False): + with catch_warnings(record=True): ser = Series(values, [datetime.date.today(), 'a']) self._check_roundtrip(ser, func) - with tm.assert_produces_warning(expected_warning=expected_warning, - check_stacklevel=False): + with catch_warnings(record=True): ser = Series(values, [1.23, 'b']) self._check_roundtrip(ser, func) @@ -3054,7 +3002,7 @@ def test_wide_table_dups(self): store.put('panel', wp, format='table') store.put('panel', wp, format='table', append=True) - with tm.assert_produces_warning(expected_warning=DuplicateWarning): + with catch_warnings(record=True): recons = store['panel'] assert_panel_equal(recons, wp) @@ -3648,6 +3596,7 @@ def test_retain_index_attributes(self): def test_retain_index_attributes2(self): with ensure_clean_path(self.path) as path: + expected_warning = Warning if PY35 else AttributeConflictWarning with tm.assert_produces_warning(expected_warning=expected_warning, check_stacklevel=False): @@ -3805,15 +3754,10 @@ def test_frame_select_complex2(self): hist.to_hdf(hh, 'df', mode='w', format='table') - expected = read_hdf(hh, 'df', where=Term('l1', '=', [2, 3, 4])) - - # list like - result = read_hdf(hh, 'df', where=Term( - 'l1', '=', selection.index.tolist())) - assert_frame_equal(result, expected) - l = selection.index.tolist() # noqa + expected = read_hdf(hh, 'df', where="l1=[2, 3, 4]") # sccope with list like + l = selection.index.tolist() # noqa store = HDFStore(hh) result = store.select('df', where='l1=l') assert_frame_equal(result, expected) @@ -3882,12 +3826,12 @@ def test_string_select(self): store.append('df', df, data_columns=['x']) - result = store.select('df', Term('x=none')) + result = store.select('df', 'x=none') expected = df[df.x == 'none'] assert_frame_equal(result, expected) try: - result = store.select('df', Term('x!=none')) + result = store.select('df', 'x!=none') expected = df[df.x != 'none'] assert_frame_equal(result, expected) except Exception as detail: @@ -3899,7 +3843,7 @@ def test_string_select(self): df2.loc[df2.x == '', 'x'] = np.nan store.append('df2', df2, data_columns=['x']) - result = store.select('df2', Term('x!=none')) + result = store.select('df2', 'x!=none') expected = df2[isnull(df2.x)] assert_frame_equal(result, expected) @@ -3909,11 +3853,11 @@ def test_string_select(self): store.append('df3', df, data_columns=['int']) - result = store.select('df3', Term('int=2')) + result = store.select('df3', 'int=2') expected = df[df.int == 2] assert_frame_equal(result, expected) - result = store.select('df3', Term('int!=2')) + result = store.select('df3', 'int!=2') expected = df[df.int != 2] assert_frame_equal(result, expected) @@ -4179,8 +4123,8 @@ def test_select_as_multiple(self): tm.assert_frame_equal(result, expected) # multiple (diff selector) - result = store.select_as_multiple(['df1', 'df2'], where=[Term( - 'index>df2.index[4]')], selector='df2') + result = store.select_as_multiple( + ['df1', 'df2'], where='index>df2.index[4]', selector='df2') expected = concat([df1, df2], axis=1) expected = expected[5:] tm.assert_frame_equal(result, expected) @@ -4222,13 +4166,13 @@ def test_start_stop_table(self): store.append('df', df) result = store.select( - 'df', [Term("columns=['A']")], start=0, stop=5) + 'df', "columns=['A']", start=0, stop=5) expected = df.loc[0:4, ['A']] tm.assert_frame_equal(result, expected) # out of range result = store.select( - 'df', [Term("columns=['A']")], start=30, stop=40) + 'df', "columns=['A']", start=30, stop=40) self.assertTrue(len(result) == 0) expected = df.loc[30:40, ['A']] tm.assert_frame_equal(result, expected) @@ -4288,11 +4232,11 @@ def test_select_filter_corner(self): with ensure_clean_store(self.path) as store: store.put('frame', df, format='table') - crit = Term('columns=df.columns[:75]') + crit = 'columns=df.columns[:75]' result = store.select('frame', [crit]) tm.assert_frame_equal(result, df.loc[:, df.columns[:75]]) - crit = Term('columns=df.columns[:75:2]') + crit = 'columns=df.columns[:75:2]' result = store.select('frame', [crit]) tm.assert_frame_equal(result, df.loc[:, df.columns[:75:2]]) @@ -4471,16 +4415,16 @@ def test_legacy_table_read(self): with tm.assert_produces_warning( expected_warning=IncompatibilityWarning): self.assertRaises( - Exception, store.select, 'wp1', Term('minor_axis=B')) + Exception, store.select, 'wp1', 'minor_axis=B') df2 = store.select('df2') - result = store.select('df2', Term('index>df2.index[2]')) + result = store.select('df2', 'index>df2.index[2]') expected = df2[df2.index > df2.index[2]] assert_frame_equal(expected, result) def test_legacy_0_10_read(self): # legacy from 0.10 - with compat_assert_produces_warning(FutureWarning): + with catch_warnings(record=True): path = tm.get_data_path('legacy_hdf/legacy_0.10.h5') with ensure_clean_store(path, mode='r') as store: str(store) @@ -4504,7 +4448,7 @@ def test_legacy_0_11_read(self): def test_copy(self): - with compat_assert_produces_warning(FutureWarning): + with catch_warnings(record=True): def do_copy(f=None, new_f=None, keys=None, propindexes=True, **kwargs): @@ -4646,7 +4590,8 @@ def test_unicode_index(self): unicode_values = [u('\u03c3'), u('\u03c3\u03c3')] - with compat_assert_produces_warning(PerformanceWarning): + # PerformanceWarning + with catch_warnings(record=True): s = Series(np.random.randn(len(unicode_values)), unicode_values) self._check_roundtrip(s, tm.assert_series_equal) @@ -4914,15 +4859,19 @@ def test_to_hdf_with_object_column_names(self): with self.assertRaises( ValueError, msg=("cannot have non-object label " "DataIndexableCol")): - df.to_hdf(path, 'df', format='table', data_columns=True) + with catch_warnings(record=True): + df.to_hdf(path, 'df', + format='table', + data_columns=True) for index in types_should_run: df = DataFrame(np.random.randn(10, 2), columns=index(2)) with ensure_clean_path(self.path) as path: - df.to_hdf(path, 'df', format='table', data_columns=True) - result = pd.read_hdf( - path, 'df', where="index = [{0}]".format(df.index[0])) - assert(len(result)) + with catch_warnings(record=True): + df.to_hdf(path, 'df', format='table', data_columns=True) + result = pd.read_hdf( + path, 'df', where="index = [{0}]".format(df.index[0])) + assert(len(result)) def test_read_hdf_open_store(self): # GH10330 @@ -5187,7 +5136,7 @@ def test_complex_mixed_table(self): with ensure_clean_store(self.path) as store: store.append('df', df, data_columns=['A', 'B']) - result = store.select('df', where=Term('A>2')) + result = store.select('df', where='A>2') assert_frame_equal(df.loc[df.A > 2], result) with ensure_clean_path(self.path) as path: @@ -5216,7 +5165,7 @@ def test_complex_across_dimensions(self): df = DataFrame({'A': s, 'B': s}) p = Panel({'One': df, 'Two': df}) - with compat_assert_produces_warning(FutureWarning): + with catch_warnings(record=True): p4d = pd.Panel4D({'i': p, 'ii': p}) objs = [df, p, p4d] @@ -5300,7 +5249,7 @@ def test_append_with_timezones_dateutil(self): # select with tz aware expected = df[df.A >= df.A[3]] - result = store.select('df_tz', where=Term('A>=df.A[3]')) + result = store.select('df_tz', where='A>=df.A[3]') self._compare_with_tz(result, expected) # ensure we include dates in DST and STD time here. @@ -5371,7 +5320,7 @@ def test_append_with_timezones_pytz(self): # select with tz aware self._compare_with_tz(store.select( - 'df_tz', where=Term('A>=df.A[3]')), df[df.A >= df.A[3]]) + 'df_tz', where='A>=df.A[3]'), df[df.A >= df.A[3]]) _maybe_remove(store, 'df_tz') # ensure we include dates in DST and STD time here. diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 373f590cbf9eb..ab0322abbcf06 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # pylint: disable=W0612,E1101 +from warnings import catch_warnings from datetime import datetime import operator @@ -1272,7 +1273,7 @@ def test_apply_slabs(self): f = lambda x: ((x.T - x.mean(1)) / x.std(1)).T # make sure that we don't trigger any warnings - with tm.assert_produces_warning(False): + with catch_warnings(record=True): result = self.panel.apply(f, axis=['items', 'major_axis']) expected = Panel(dict([(ax, f(self.panel.loc[:, :, ax])) for ax in self.panel.minor_axis])) diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py index 2491bac2a7f19..c0511581cd299 100644 --- a/pandas/tests/test_panel4d.py +++ b/pandas/tests/test_panel4d.py @@ -3,7 +3,7 @@ from pandas.compat import range, lrange import operator import pytest - +from warnings import catch_warnings import numpy as np from pandas.types.common import is_float_dtype @@ -129,17 +129,21 @@ def skipna_wrapper(x): def wrapper(x): return alternative(np.asarray(x)) - for i in range(obj.ndim): - result = f(axis=i, skipna=False) - assert_panel_equal(result, obj.apply(wrapper, axis=i)) + with catch_warnings(record=True): + for i in range(obj.ndim): + result = f(axis=i, skipna=False) + expected = obj.apply(wrapper, axis=i) + assert_panel_equal(result, expected) else: skipna_wrapper = alternative wrapper = alternative - for i in range(obj.ndim): - result = f(axis=i) - if not tm._incompat_bottleneck_version(name): - assert_panel_equal(result, obj.apply(skipna_wrapper, axis=i)) + with catch_warnings(record=True): + for i in range(obj.ndim): + result = f(axis=i) + if not tm._incompat_bottleneck_version(name): + expected = obj.apply(skipna_wrapper, axis=i) + assert_panel_equal(result, expected) self.assertRaises(Exception, f, axis=obj.ndim) @@ -161,32 +165,33 @@ def test_get_axis(self): assert(self.panel4d._get_axis(3) is self.panel4d.minor_axis) def test_set_axis(self): - new_labels = Index(np.arange(len(self.panel4d.labels))) + with catch_warnings(record=True): + new_labels = Index(np.arange(len(self.panel4d.labels))) - # TODO: unused? - # new_items = Index(np.arange(len(self.panel4d.items))) + # TODO: unused? + # new_items = Index(np.arange(len(self.panel4d.items))) - new_major = Index(np.arange(len(self.panel4d.major_axis))) - new_minor = Index(np.arange(len(self.panel4d.minor_axis))) + new_major = Index(np.arange(len(self.panel4d.major_axis))) + new_minor = Index(np.arange(len(self.panel4d.minor_axis))) - # ensure propagate to potentially prior-cached items too + # ensure propagate to potentially prior-cached items too - # TODO: unused? - # label = self.panel4d['l1'] + # TODO: unused? + # label = self.panel4d['l1'] - self.panel4d.labels = new_labels + self.panel4d.labels = new_labels - if hasattr(self.panel4d, '_item_cache'): - self.assertNotIn('l1', self.panel4d._item_cache) - self.assertIs(self.panel4d.labels, new_labels) + if hasattr(self.panel4d, '_item_cache'): + self.assertNotIn('l1', self.panel4d._item_cache) + self.assertIs(self.panel4d.labels, new_labels) - self.panel4d.major_axis = new_major - self.assertIs(self.panel4d[0].major_axis, new_major) - self.assertIs(self.panel4d.major_axis, new_major) + self.panel4d.major_axis = new_major + self.assertIs(self.panel4d[0].major_axis, new_major) + self.assertIs(self.panel4d.major_axis, new_major) - self.panel4d.minor_axis = new_minor - self.assertIs(self.panel4d[0].minor_axis, new_minor) - self.assertIs(self.panel4d.minor_axis, new_minor) + self.panel4d.minor_axis = new_minor + self.assertIs(self.panel4d[0].minor_axis, new_minor) + self.assertIs(self.panel4d.minor_axis, new_minor) def test_get_axis_number(self): self.assertEqual(self.panel4d._get_axis_number('labels'), 0) @@ -201,7 +206,7 @@ def test_get_axis_name(self): self.assertEqual(self.panel4d._get_axis_name(3), 'minor_axis') def test_arith(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): self._test_op(self.panel4d, operator.add) self._test_op(self.panel4d, operator.sub) self._test_op(self.panel4d, operator.mul) @@ -233,16 +238,16 @@ def test_iteritems(self): len(self.panel4d.labels)) def test_combinePanel4d(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): result = self.panel4d.add(self.panel4d) self.assert_panel4d_equal(result, self.panel4d * 2) def test_neg(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): self.assert_panel4d_equal(-self.panel4d, self.panel4d * -1) def test_select(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): p = self.panel4d @@ -283,7 +288,7 @@ def test_get_value(self): def test_abs(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): result = self.panel4d.abs() expected = np.abs(self.panel4d) self.assert_panel4d_equal(result, expected) @@ -306,7 +311,7 @@ def test_getitem(self): def test_delitem_and_pop(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): expected = self.panel4d['l2'] result = self.panel4d.pop('l2') assert_panel_equal(expected, result) @@ -351,40 +356,38 @@ def test_delitem_and_pop(self): assert_panel_equal(panel4dc[0], panel4d[0]) def test_setitem(self): - # LongPanel with one item - # lp = self.panel.filter(['ItemA', 'ItemB']).to_frame() - # self.assertRaises(Exception, self.panel.__setitem__, - # 'ItemE', lp) + with catch_warnings(record=True): - # Panel - p = Panel(dict( - ItemA=self.panel4d['l1']['ItemA'][2:].filter(items=['A', 'B']))) - self.panel4d['l4'] = p - self.panel4d['l5'] = p + # Panel + p = Panel(dict( + ItemA=self.panel4d['l1']['ItemA'][2:].filter( + items=['A', 'B']))) + self.panel4d['l4'] = p + self.panel4d['l5'] = p - p2 = self.panel4d['l4'] + p2 = self.panel4d['l4'] - assert_panel_equal(p, p2.reindex(items=p.items, - major_axis=p.major_axis, - minor_axis=p.minor_axis)) + assert_panel_equal(p, p2.reindex(items=p.items, + major_axis=p.major_axis, + minor_axis=p.minor_axis)) - # scalar - self.panel4d['lG'] = 1 - self.panel4d['lE'] = True - self.assertEqual(self.panel4d['lG'].values.dtype, np.int64) - self.assertEqual(self.panel4d['lE'].values.dtype, np.bool_) + # scalar + self.panel4d['lG'] = 1 + self.panel4d['lE'] = True + self.assertEqual(self.panel4d['lG'].values.dtype, np.int64) + self.assertEqual(self.panel4d['lE'].values.dtype, np.bool_) - # object dtype - self.panel4d['lQ'] = 'foo' - self.assertEqual(self.panel4d['lQ'].values.dtype, np.object_) + # object dtype + self.panel4d['lQ'] = 'foo' + self.assertEqual(self.panel4d['lQ'].values.dtype, np.object_) - # boolean dtype - self.panel4d['lP'] = self.panel4d['l1'] > 0 - self.assertEqual(self.panel4d['lP'].values.dtype, np.bool_) + # boolean dtype + self.panel4d['lP'] = self.panel4d['l1'] > 0 + self.assertEqual(self.panel4d['lP'].values.dtype, np.bool_) def test_setitem_by_indexer(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): # Panel panel4dc = self.panel4d.copy() @@ -419,7 +422,7 @@ def func(): def test_setitem_by_indexer_mixed_type(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): # GH 8702 self.panel4d['foo'] = 'bar' @@ -433,7 +436,7 @@ def test_setitem_by_indexer_mixed_type(self): self.assertTrue((panel4dc.iloc[2].values == 'foo').all()) def test_comparisons(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): p1 = tm.makePanel4D() p2 = tm.makePanel4D() @@ -467,7 +470,8 @@ def test_major_xs(self): ref = self.panel4d['l1']['ItemA'] idx = self.panel4d.major_axis[5] - xs = self.panel4d.major_xs(idx) + with catch_warnings(record=True): + xs = self.panel4d.major_xs(idx) assert_series_equal(xs['l1'].T['ItemA'], ref.xs(idx), check_names=False) @@ -478,15 +482,17 @@ def test_major_xs(self): def test_major_xs_mixed(self): self.panel4d['l4'] = 'foo' - xs = self.panel4d.major_xs(self.panel4d.major_axis[0]) + with catch_warnings(record=True): + xs = self.panel4d.major_xs(self.panel4d.major_axis[0]) self.assertEqual(xs['l1']['A'].dtype, np.float64) self.assertEqual(xs['l4']['A'].dtype, np.object_) def test_minor_xs(self): ref = self.panel4d['l1']['ItemA'] - idx = self.panel4d.minor_axis[1] - xs = self.panel4d.minor_xs(idx) + with catch_warnings(record=True): + idx = self.panel4d.minor_axis[1] + xs = self.panel4d.minor_xs(idx) assert_series_equal(xs['l1'].T['ItemA'], ref[idx], check_names=False) @@ -496,7 +502,8 @@ def test_minor_xs(self): def test_minor_xs_mixed(self): self.panel4d['l4'] = 'foo' - xs = self.panel4d.minor_xs('D') + with catch_warnings(record=True): + xs = self.panel4d.minor_xs('D') self.assertEqual(xs['l1'].T['ItemA'].dtype, np.float64) self.assertEqual(xs['l4'].T['ItemA'].dtype, np.object_) @@ -512,11 +519,12 @@ def test_xs(self): # mixed-type self.panel4d['strings'] = 'foo' - result = self.panel4d.xs('D', axis=3) + with catch_warnings(record=True): + result = self.panel4d.xs('D', axis=3) self.assertIsNotNone(result.is_copy) def test_getitem_fancy_labels(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): panel4d = self.panel4d labels = panel4d.labels[[1, 0]] @@ -572,7 +580,7 @@ def test_get_value(self): def test_set_value(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): for label in self.panel4d.labels: for item in self.panel4d.items: @@ -603,13 +611,13 @@ def assert_panel4d_equal(cls, x, y): assert_panel4d_equal(x, y) def setUp(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): self.panel4d = tm.makePanel4D(nper=8) add_nans(self.panel4d) def test_constructor(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): panel4d = Panel4D(self.panel4d._data) self.assertIs(panel4d._data, self.panel4d._data) @@ -649,7 +657,7 @@ def test_constructor(self): assert_panel4d_equal(panel4d, expected) def test_constructor_cast(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): zero_filled = self.panel4d.fillna(0) casted = Panel4D(zero_filled._data, dtype=int) @@ -671,7 +679,7 @@ def test_constructor_cast(self): self.assertRaises(ValueError, Panel, data, dtype=float) def test_consolidate(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): self.assertTrue(self.panel4d._data.is_consolidated()) self.panel4d['foo'] = 1. @@ -681,7 +689,7 @@ def test_consolidate(self): self.assertTrue(panel4d._data.is_consolidated()) def test_ctor_dict(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): l1 = self.panel4d['l1'] l2 = self.panel4d['l2'] @@ -694,7 +702,7 @@ def test_ctor_dict(self): :, :]['ItemB']) def test_constructor_dict_mixed(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): data = dict((k, v.values) for k, v in self.panel4d.iteritems()) result = Panel4D(data) @@ -721,7 +729,7 @@ def test_constructor_dict_mixed(self): self.assertRaises(Exception, Panel4D, data) def test_constructor_resize(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): data = self.panel4d._data labels = self.panel4d.labels[:-1] items = self.panel4d.items[:-1] @@ -747,16 +755,19 @@ def test_constructor_resize(self): assert_panel4d_equal(result, expected) def test_conform(self): + with catch_warnings(record=True): - p = self.panel4d['l1'].filter(items=['ItemA', 'ItemB']) - conformed = self.panel4d.conform(p) + p = self.panel4d['l1'].filter(items=['ItemA', 'ItemB']) + conformed = self.panel4d.conform(p) - tm.assert_index_equal(conformed.items, self.panel4d.labels) - tm.assert_index_equal(conformed.major_axis, self.panel4d.major_axis) - tm.assert_index_equal(conformed.minor_axis, self.panel4d.minor_axis) + tm.assert_index_equal(conformed.items, self.panel4d.labels) + tm.assert_index_equal(conformed.major_axis, + self.panel4d.major_axis) + tm.assert_index_equal(conformed.minor_axis, + self.panel4d.minor_axis) def test_reindex(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): ref = self.panel4d['l2'] # labels @@ -810,14 +821,14 @@ def test_reindex(self): self.assertTrue(result is self.panel4d) def test_not_hashable(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): p4D_empty = Panel4D() self.assertRaises(TypeError, hash, p4D_empty) self.assertRaises(TypeError, hash, self.panel4d) def test_reindex_like(self): # reindex_like - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): smaller = self.panel4d.reindex(labels=self.panel4d.labels[:-1], items=self.panel4d.items[:-1], major=self.panel4d.major_axis[:-1], @@ -826,7 +837,7 @@ def test_reindex_like(self): assert_panel4d_equal(smaller, smaller_like) def test_sort_index(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): import random rlabels = list(self.panel4d.labels) @@ -844,7 +855,7 @@ def test_sort_index(self): def test_fillna(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): self.assertFalse(np.isfinite(self.panel4d.values).all()) filled = self.panel4d.fillna(0) self.assertTrue(np.isfinite(filled.values).all()) @@ -853,7 +864,7 @@ def test_fillna(self): self.panel4d.fillna, method='pad') def test_swapaxes(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): result = self.panel4d.swapaxes('labels', 'items') self.assertIs(result.items, self.panel4d.labels) @@ -880,7 +891,7 @@ def test_swapaxes(self): def test_update(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): p4d = Panel4D([[[[1.5, np.nan, 3.], [1.5, np.nan, 3.], [1.5, np.nan, 3.], @@ -913,12 +924,12 @@ def test_dtypes(self): assert_series_equal(result, expected) def test_repr_empty(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): empty = Panel4D() repr(empty) def test_rename(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): mapper = {'l1': 'foo', 'l2': 'bar', diff --git a/pandas/tests/test_panelnd.py b/pandas/tests/test_panelnd.py index 6a578d85d3ee3..7ecc773cd7bea 100644 --- a/pandas/tests/test_panelnd.py +++ b/pandas/tests/test_panelnd.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +from warnings import catch_warnings from pandas.core import panelnd from pandas.core.panel import Panel @@ -13,7 +14,7 @@ def setUp(self): def test_4d_construction(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): # create a 4D Panel4D = panelnd.create_nd_panel_factory( @@ -29,7 +30,7 @@ def test_4d_construction(self): def test_4d_construction_alt(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): # create a 4D Panel4D = panelnd.create_nd_panel_factory( @@ -61,7 +62,7 @@ def test_4d_construction_error(self): def test_5d_construction(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): # create a 4D Panel4D = panelnd.create_nd_panel_factory( diff --git a/pandas/tests/tools/test_concat.py b/pandas/tests/tools/test_concat.py index 392036a99a297..c41924a7987bd 100644 --- a/pandas/tests/tools/test_concat.py +++ b/pandas/tests/tools/test_concat.py @@ -1,3 +1,4 @@ +from warnings import catch_warnings import numpy as np from numpy.random import randn @@ -1373,7 +1374,7 @@ def df(): concat([panel1, panel3], axis=1, verify_integrity=True) def test_panel4d_concat(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): p4d = tm.makePanel4D() p1 = p4d.iloc[:, :, :5, :] @@ -1389,7 +1390,7 @@ def test_panel4d_concat(self): tm.assert_panel4d_equal(result, p4d) def test_panel4d_concat_mixed_type(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with catch_warnings(record=True): p4d = tm.makePanel4D() # if things are a bit misbehaved From de17fd9bfb170e7c8ef3c6706bbc7d2630ab893c Mon Sep 17 00:00:00 2001 From: "Christopher C. Aycock" Date: Fri, 17 Mar 2017 09:08:52 -0400 Subject: [PATCH 221/353] BUG: TZ-aware Series.where() appropriately handles default other=nan (#15701) closes #15701 Author: Christopher C. Aycock Closes #15711 from chrisaycock/GH15701 and squashes the following commits: b77f5ed [Christopher C. Aycock] BUG: TZ-aware Series.where() appropriately handles default other=nan (#15701) --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/internals.py | 3 ++- pandas/tests/series/test_indexing.py | 8 ++++++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index a56212328f5c3..29d05ddcfb497 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -829,6 +829,7 @@ Bug Fixes - Bug in ``DataFrame.isin`` comparing datetimelike to empty frame (:issue:`15473`) - Bug in ``Series.where()`` and ``DataFrame.where()`` where array-like conditionals were being rejected (:issue:`15414`) +- Bug in ``Series.where()`` where TZ-aware data was converted to float representation (:issue:`15701`) - Bug in ``Index`` construction with ``NaN`` elements and integer dtype specified (:issue:`15187`) - Bug in ``Series`` construction with a datetimetz (:issue:`14928`) - Bug in output formatting of a ``MultiIndex`` when names are integers (:issue:`12223`, :issue:`15262`) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 0e6c176d950a1..9db01713b05ed 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -2440,7 +2440,8 @@ def _try_coerce_args(self, values, other): if isinstance(other, bool): raise TypeError - elif is_null_datelike_scalar(other): + elif (is_null_datelike_scalar(other) or + (is_scalar(other) and isnull(other))): other = tslib.iNaT other_mask = True elif isinstance(other, self._holder): diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index 9d93d9f01b161..0b6c0c601ac72 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -1385,6 +1385,14 @@ def test_where_datetime(self): expected = Series([10, None], dtype='datetime64[ns]') assert_series_equal(rs, expected) + # GH 15701 + timestamps = ['2016-12-31 12:00:04+00:00', + '2016-12-31 12:00:04.010000+00:00'] + s = Series([pd.Timestamp(t) for t in timestamps]) + rs = s.where(Series([False, True])) + expected = Series([pd.NaT, s[1]]) + assert_series_equal(rs, expected) + def test_where_timedelta(self): s = Series([1, 2], dtype='timedelta64[ns]') expected = Series([10, 10], dtype='timedelta64[ns]') From a73e4518cf3d10fd239cdbd1be3bcda43443bf2a Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 17 Mar 2017 10:08:48 -0400 Subject: [PATCH 222/353] TST: remove rest of yield warnings (#15708) * TST: remove yield warnings from test_internals.py * TST: remove yield warnings from test_windows.py --- pandas/tests/formats/test_format.py | 38 +- pandas/tests/test_internals.py | 567 ++++++++++++++-------------- pandas/tests/test_window.py | 2 +- pandas/util/testing.py | 14 +- setup.cfg | 1 - 5 files changed, 322 insertions(+), 300 deletions(-) diff --git a/pandas/tests/formats/test_format.py b/pandas/tests/formats/test_format.py index b1f163ccf9429..44a7f2b45e759 100644 --- a/pandas/tests/formats/test_format.py +++ b/pandas/tests/formats/test_format.py @@ -1392,24 +1392,26 @@ def test_repr_html_long(self): assert u('2 columns') in long_repr def test_repr_html_float(self): - max_rows = get_option('display.max_rows') - h = max_rows - 1 - df = DataFrame({'idx': np.linspace(-10, 10, h), - 'A': np.arange(1, 1 + h), - 'B': np.arange(41, 41 + h)}).set_index('idx') - reg_repr = df._repr_html_() - assert '..' not in reg_repr - assert str(40 + h) in reg_repr - - h = max_rows + 1 - df = DataFrame({'idx': np.linspace(-10, 10, h), - 'A': np.arange(1, 1 + h), - 'B': np.arange(41, 41 + h)}).set_index('idx') - long_repr = df._repr_html_() - assert '..' in long_repr - assert '31' not in long_repr - assert u('%d rows ') % h in long_repr - assert u('2 columns') in long_repr + with option_context('display.max_rows', 60): + + max_rows = get_option('display.max_rows') + h = max_rows - 1 + df = DataFrame({'idx': np.linspace(-10, 10, h), + 'A': np.arange(1, 1 + h), + 'B': np.arange(41, 41 + h)}).set_index('idx') + reg_repr = df._repr_html_() + assert '..' not in reg_repr + assert str(40 + h) in reg_repr + + h = max_rows + 1 + df = DataFrame({'idx': np.linspace(-10, 10, h), + 'A': np.arange(1, 1 + h), + 'B': np.arange(41, 41 + h)}).set_index('idx') + long_repr = df._repr_html_() + assert '..' in long_repr + assert '31' not in long_repr + assert u('%d rows ') % h in long_repr + assert u('2 columns') in long_repr def test_repr_html_long_multiindex(self): max_rows = get_option('display.max_rows') diff --git a/pandas/tests/test_internals.py b/pandas/tests/test_internals.py index df5e843097514..29920b165d3f6 100644 --- a/pandas/tests/test_internals.py +++ b/pandas/tests/test_internals.py @@ -23,11 +23,19 @@ from pandas.compat import zip, u +@pytest.fixture +def mgr(): + return create_mgr( + 'a: f8; b: object; c: f8; d: object; e: f8;' + 'f: bool; g: i8; h: complex; i: datetime-1; j: datetime-2;' + 'k: M8[ns, US/Eastern]; l: M8[ns, CET];') + + def assert_block_equal(left, right): tm.assert_numpy_array_equal(left.values, right.values) - assert (left.dtype == right.dtype) - tm.assertIsInstance(left.mgr_locs, lib.BlockPlacement) - tm.assertIsInstance(right.mgr_locs, lib.BlockPlacement) + assert left.dtype == right.dtype + assert isinstance(left.mgr_locs, lib.BlockPlacement) + assert isinstance(right.mgr_locs, lib.BlockPlacement) tm.assert_numpy_array_equal(left.mgr_locs.as_array, right.mgr_locs.as_array) @@ -197,11 +205,11 @@ def setUp(self): def test_constructor(self): int32block = create_block('i4', [0]) - self.assertEqual(int32block.dtype, np.int32) + assert int32block.dtype == np.int32 def test_pickle(self): def _check(blk): - assert_block_equal(self.round_trip_pickle(blk), blk) + assert_block_equal(tm.round_trip_pickle(blk), blk) _check(self.fblock) _check(self.cblock) @@ -209,14 +217,14 @@ def _check(blk): _check(self.bool_block) def test_mgr_locs(self): - tm.assertIsInstance(self.fblock.mgr_locs, lib.BlockPlacement) + assert isinstance(self.fblock.mgr_locs, lib.BlockPlacement) tm.assert_numpy_array_equal(self.fblock.mgr_locs.as_array, np.array([0, 2, 4], dtype=np.int64)) def test_attrs(self): - self.assertEqual(self.fblock.shape, self.fblock.values.shape) - self.assertEqual(self.fblock.dtype, self.fblock.values.dtype) - self.assertEqual(len(self.fblock), len(self.fblock.values)) + assert self.fblock.shape == self.fblock.values.shape + assert self.fblock.dtype == self.fblock.values.dtype + assert len(self.fblock) == len(self.fblock.values) def test_merge(self): avals = randn(2, 10) @@ -251,26 +259,27 @@ def test_insert(self): def test_delete(self): newb = self.fblock.copy() newb.delete(0) - tm.assertIsInstance(newb.mgr_locs, lib.BlockPlacement) + assert isinstance(newb.mgr_locs, lib.BlockPlacement) tm.assert_numpy_array_equal(newb.mgr_locs.as_array, np.array([2, 4], dtype=np.int64)) - self.assertTrue((newb.values[0] == 1).all()) + assert (newb.values[0] == 1).all() newb = self.fblock.copy() newb.delete(1) - tm.assertIsInstance(newb.mgr_locs, lib.BlockPlacement) + assert isinstance(newb.mgr_locs, lib.BlockPlacement) tm.assert_numpy_array_equal(newb.mgr_locs.as_array, np.array([0, 4], dtype=np.int64)) - self.assertTrue((newb.values[1] == 2).all()) + assert (newb.values[1] == 2).all() newb = self.fblock.copy() newb.delete(2) tm.assert_numpy_array_equal(newb.mgr_locs.as_array, np.array([0, 2], dtype=np.int64)) - self.assertTrue((newb.values[1] == 1).all()) + assert (newb.values[1] == 1).all() newb = self.fblock.copy() - self.assertRaises(Exception, newb.delete, 3) + with pytest.raises(Exception): + newb.delete(3) def test_split_block_at(self): @@ -279,21 +288,21 @@ def test_split_block_at(self): pytest.skip("skipping for now") bs = list(self.fblock.split_block_at('a')) - self.assertEqual(len(bs), 1) - self.assertTrue(np.array_equal(bs[0].items, ['c', 'e'])) + assert len(bs) == 1 + assert np.array_equal(bs[0].items, ['c', 'e']) bs = list(self.fblock.split_block_at('c')) - self.assertEqual(len(bs), 2) - self.assertTrue(np.array_equal(bs[0].items, ['a'])) - self.assertTrue(np.array_equal(bs[1].items, ['e'])) + assert len(bs) == 2 + assert np.array_equal(bs[0].items, ['a']) + assert np.array_equal(bs[1].items, ['e']) bs = list(self.fblock.split_block_at('e')) - self.assertEqual(len(bs), 1) - self.assertTrue(np.array_equal(bs[0].items, ['a', 'c'])) + assert len(bs) == 1 + assert np.array_equal(bs[0].items, ['a', 'c']) # bblock = get_bool_ex(['f']) # bs = list(bblock.split_block_at('f')) - # self.assertEqual(len(bs), 0) + # assert len(bs), 0) class TestDatetimeBlock(tm.TestCase): @@ -303,50 +312,44 @@ def test_try_coerce_arg(self): # coerce None none_coerced = block._try_coerce_args(block.values, None)[2] - self.assertTrue(pd.Timestamp(none_coerced) is pd.NaT) + assert pd.Timestamp(none_coerced) is pd.NaT # coerce different types of date bojects vals = (np.datetime64('2010-10-10'), datetime(2010, 10, 10), date(2010, 10, 10)) for val in vals: coerced = block._try_coerce_args(block.values, val)[2] - self.assertEqual(np.int64, type(coerced)) - self.assertEqual(pd.Timestamp('2010-10-10'), pd.Timestamp(coerced)) - + assert np.int64 == type(coerced) + assert pd.Timestamp('2010-10-10') == pd.Timestamp(coerced) -class TestBlockManager(tm.TestCase): - def setUp(self): - self.mgr = create_mgr( - 'a: f8; b: object; c: f8; d: object; e: f8;' - 'f: bool; g: i8; h: complex; i: datetime-1; j: datetime-2;' - 'k: M8[ns, US/Eastern]; l: M8[ns, CET];') +class TestBlockManager(object): def test_constructor_corner(self): pass def test_attrs(self): mgr = create_mgr('a,b,c: f8-1; d,e,f: f8-2') - self.assertEqual(mgr.nblocks, 2) - self.assertEqual(len(mgr), 6) + assert mgr.nblocks == 2 + assert len(mgr) == 6 def test_is_mixed_dtype(self): - self.assertFalse(create_mgr('a,b:f8').is_mixed_type) - self.assertFalse(create_mgr('a:f8-1; b:f8-2').is_mixed_type) + assert not create_mgr('a,b:f8').is_mixed_type + assert not create_mgr('a:f8-1; b:f8-2').is_mixed_type - self.assertTrue(create_mgr('a,b:f8; c,d: f4').is_mixed_type) - self.assertTrue(create_mgr('a,b:f8; c,d: object').is_mixed_type) + assert create_mgr('a,b:f8; c,d: f4').is_mixed_type + assert create_mgr('a,b:f8; c,d: object').is_mixed_type def test_is_indexed_like(self): mgr1 = create_mgr('a,b: f8') mgr2 = create_mgr('a:i8; b:bool') mgr3 = create_mgr('a,b,c: f8') - self.assertTrue(mgr1._is_indexed_like(mgr1)) - self.assertTrue(mgr1._is_indexed_like(mgr2)) - self.assertTrue(mgr1._is_indexed_like(mgr3)) + assert mgr1._is_indexed_like(mgr1) + assert mgr1._is_indexed_like(mgr2) + assert mgr1._is_indexed_like(mgr3) - self.assertFalse(mgr1._is_indexed_like(mgr1.get_slice( - slice(-1), axis=1))) + assert not mgr1._is_indexed_like(mgr1.get_slice( + slice(-1), axis=1)) def test_duplicate_ref_loc_failure(self): tmp_mgr = create_mgr('a:bool; a: f8') @@ -355,61 +358,63 @@ def test_duplicate_ref_loc_failure(self): blocks[0].mgr_locs = np.array([0]) blocks[1].mgr_locs = np.array([0]) + # test trying to create block manager with overlapping ref locs - self.assertRaises(AssertionError, BlockManager, blocks, axes) + with pytest.raises(AssertionError): + BlockManager(blocks, axes) blocks[0].mgr_locs = np.array([0]) blocks[1].mgr_locs = np.array([1]) mgr = BlockManager(blocks, axes) mgr.iget(1) - def test_contains(self): - self.assertIn('a', self.mgr) - self.assertNotIn('baz', self.mgr) + def test_contains(self, mgr): + assert 'a' in mgr + assert 'baz' not in mgr - def test_pickle(self): + def test_pickle(self, mgr): - mgr2 = self.round_trip_pickle(self.mgr) - assert_frame_equal(DataFrame(self.mgr), DataFrame(mgr2)) + mgr2 = tm.round_trip_pickle(mgr) + assert_frame_equal(DataFrame(mgr), DataFrame(mgr2)) # share ref_items # self.assertIs(mgr2.blocks[0].ref_items, mgr2.blocks[1].ref_items) # GH2431 - self.assertTrue(hasattr(mgr2, "_is_consolidated")) - self.assertTrue(hasattr(mgr2, "_known_consolidated")) + assert hasattr(mgr2, "_is_consolidated") + assert hasattr(mgr2, "_known_consolidated") # reset to False on load - self.assertFalse(mgr2._is_consolidated) - self.assertFalse(mgr2._known_consolidated) + assert not mgr2._is_consolidated + assert not mgr2._known_consolidated def test_non_unique_pickle(self): mgr = create_mgr('a,a,a:f8') - mgr2 = self.round_trip_pickle(mgr) + mgr2 = tm.round_trip_pickle(mgr) assert_frame_equal(DataFrame(mgr), DataFrame(mgr2)) mgr = create_mgr('a: f8; a: i8') - mgr2 = self.round_trip_pickle(mgr) + mgr2 = tm.round_trip_pickle(mgr) assert_frame_equal(DataFrame(mgr), DataFrame(mgr2)) def test_categorical_block_pickle(self): mgr = create_mgr('a: category') - mgr2 = self.round_trip_pickle(mgr) + mgr2 = tm.round_trip_pickle(mgr) assert_frame_equal(DataFrame(mgr), DataFrame(mgr2)) smgr = create_single_mgr('category') - smgr2 = self.round_trip_pickle(smgr) + smgr2 = tm.round_trip_pickle(smgr) assert_series_equal(Series(smgr), Series(smgr2)) - def test_get_scalar(self): - for item in self.mgr.items: - for i, index in enumerate(self.mgr.axes[1]): - res = self.mgr.get_scalar((item, index)) - exp = self.mgr.get(item, fastpath=False)[i] - self.assertEqual(res, exp) - exp = self.mgr.get(item).internal_values()[i] - self.assertEqual(res, exp) + def test_get_scalar(self, mgr): + for item in mgr.items: + for i, index in enumerate(mgr.axes[1]): + res = mgr.get_scalar((item, index)) + exp = mgr.get(item, fastpath=False)[i] + assert res == exp + exp = mgr.get(item).internal_values()[i] + assert res == exp def test_get(self): cols = Index(list('abc')) @@ -438,30 +443,21 @@ def test_set(self): tm.assert_numpy_array_equal(mgr.get('d').internal_values(), np.array(['foo'] * 3, dtype=np.object_)) - def test_insert(self): - self.mgr.insert(0, 'inserted', np.arange(N)) - - self.assertEqual(self.mgr.items[0], 'inserted') - assert_almost_equal(self.mgr.get('inserted'), np.arange(N)) + def test_set_change_dtype(self, mgr): + mgr.set('baz', np.zeros(N, dtype=bool)) - for blk in self.mgr.blocks: - yield self.assertIs, self.mgr.items, blk.ref_items + mgr.set('baz', np.repeat('foo', N)) + assert mgr.get('baz').dtype == np.object_ - def test_set_change_dtype(self): - self.mgr.set('baz', np.zeros(N, dtype=bool)) - - self.mgr.set('baz', np.repeat('foo', N)) - self.assertEqual(self.mgr.get('baz').dtype, np.object_) - - mgr2 = self.mgr.consolidate() + mgr2 = mgr.consolidate() mgr2.set('baz', np.repeat('foo', N)) - self.assertEqual(mgr2.get('baz').dtype, np.object_) + assert mgr2.get('baz').dtype == np.object_ mgr2.set('quux', randn(N).astype(int)) - self.assertEqual(mgr2.get('quux').dtype, np.int_) + assert mgr2.get('quux').dtype == np.int_ mgr2.set('quux', randn(N)) - self.assertEqual(mgr2.get('quux').dtype, np.float_) + assert mgr2.get('quux').dtype == np.float_ def test_set_change_dtype_slice(self): # GH8850 cols = MultiIndex.from_tuples([('1st', 'a'), ('2nd', 'b'), ('3rd', 'c') @@ -469,70 +465,69 @@ def test_set_change_dtype_slice(self): # GH8850 df = DataFrame([[1.0, 2, 3], [4.0, 5, 6]], columns=cols) df['2nd'] = df['2nd'] * 2.0 - self.assertEqual(sorted(df.blocks.keys()), ['float64', 'int64']) + assert sorted(df.blocks.keys()) == ['float64', 'int64'] assert_frame_equal(df.blocks['float64'], DataFrame( [[1.0, 4.0], [4.0, 10.0]], columns=cols[:2])) assert_frame_equal(df.blocks['int64'], DataFrame( [[3], [6]], columns=cols[2:])) - def test_copy(self): - cp = self.mgr.copy(deep=False) - for blk, cp_blk in zip(self.mgr.blocks, cp.blocks): + def test_copy(self, mgr): + cp = mgr.copy(deep=False) + for blk, cp_blk in zip(mgr.blocks, cp.blocks): # view assertion - self.assertTrue(cp_blk.equals(blk)) - self.assertTrue(cp_blk.values.base is blk.values.base) + assert cp_blk.equals(blk) + assert cp_blk.values.base is blk.values.base - cp = self.mgr.copy(deep=True) - for blk, cp_blk in zip(self.mgr.blocks, cp.blocks): + cp = mgr.copy(deep=True) + for blk, cp_blk in zip(mgr.blocks, cp.blocks): # copy assertion we either have a None for a base or in case of # some blocks it is an array (e.g. datetimetz), but was copied - self.assertTrue(cp_blk.equals(blk)) + assert cp_blk.equals(blk) if cp_blk.values.base is not None and blk.values.base is not None: - self.assertFalse(cp_blk.values.base is blk.values.base) + assert cp_blk.values.base is not blk.values.base else: - self.assertTrue(cp_blk.values.base is None and blk.values.base - is None) + assert cp_blk.values.base is None and blk.values.base is None def test_sparse(self): mgr = create_mgr('a: sparse-1; b: sparse-2') # what to test here? - self.assertEqual(mgr.as_matrix().dtype, np.float64) + assert mgr.as_matrix().dtype == np.float64 def test_sparse_mixed(self): mgr = create_mgr('a: sparse-1; b: sparse-2; c: f8') - self.assertEqual(len(mgr.blocks), 3) - self.assertIsInstance(mgr, BlockManager) + assert len(mgr.blocks) == 3 + assert isinstance(mgr, BlockManager) # what to test here? def test_as_matrix_float(self): mgr = create_mgr('c: f4; d: f2; e: f8') - self.assertEqual(mgr.as_matrix().dtype, np.float64) + assert mgr.as_matrix().dtype == np.float64 mgr = create_mgr('c: f4; d: f2') - self.assertEqual(mgr.as_matrix().dtype, np.float32) + assert mgr.as_matrix().dtype == np.float32 def test_as_matrix_int_bool(self): mgr = create_mgr('a: bool-1; b: bool-2') - self.assertEqual(mgr.as_matrix().dtype, np.bool_) + assert mgr.as_matrix().dtype == np.bool_ mgr = create_mgr('a: i8-1; b: i8-2; c: i4; d: i2; e: u1') - self.assertEqual(mgr.as_matrix().dtype, np.int64) + assert mgr.as_matrix().dtype == np.int64 mgr = create_mgr('c: i4; d: i2; e: u1') - self.assertEqual(mgr.as_matrix().dtype, np.int32) + assert mgr.as_matrix().dtype == np.int32 def test_as_matrix_datetime(self): mgr = create_mgr('h: datetime-1; g: datetime-2') - self.assertEqual(mgr.as_matrix().dtype, 'M8[ns]') + assert mgr.as_matrix().dtype == 'M8[ns]' def test_as_matrix_datetime_tz(self): mgr = create_mgr('h: M8[ns, US/Eastern]; g: M8[ns, CET]') - self.assertEqual(mgr.get('h').dtype, 'datetime64[ns, US/Eastern]') - self.assertEqual(mgr.get('g').dtype, 'datetime64[ns, CET]') - self.assertEqual(mgr.as_matrix().dtype, 'object') + assert mgr.get('h').dtype == 'datetime64[ns, US/Eastern]' + assert mgr.get('g').dtype == 'datetime64[ns, CET]' + assert mgr.as_matrix().dtype == 'object' def test_astype(self): # coerce all @@ -540,9 +535,9 @@ def test_astype(self): for t in ['float16', 'float32', 'float64', 'int32', 'int64']: t = np.dtype(t) tmgr = mgr.astype(t) - self.assertEqual(tmgr.get('c').dtype.type, t) - self.assertEqual(tmgr.get('d').dtype.type, t) - self.assertEqual(tmgr.get('e').dtype.type, t) + assert tmgr.get('c').dtype.type == t + assert tmgr.get('d').dtype.type == t + assert tmgr.get('e').dtype.type == t # mixed mgr = create_mgr('a,b: object; c: bool; d: datetime;' @@ -550,24 +545,24 @@ def test_astype(self): for t in ['float16', 'float32', 'float64', 'int32', 'int64']: t = np.dtype(t) tmgr = mgr.astype(t, errors='ignore') - self.assertEqual(tmgr.get('c').dtype.type, t) - self.assertEqual(tmgr.get('e').dtype.type, t) - self.assertEqual(tmgr.get('f').dtype.type, t) - self.assertEqual(tmgr.get('g').dtype.type, t) + assert tmgr.get('c').dtype.type == t + assert tmgr.get('e').dtype.type == t + assert tmgr.get('f').dtype.type == t + assert tmgr.get('g').dtype.type == t - self.assertEqual(tmgr.get('a').dtype.type, np.object_) - self.assertEqual(tmgr.get('b').dtype.type, np.object_) + assert tmgr.get('a').dtype.type == np.object_ + assert tmgr.get('b').dtype.type == np.object_ if t != np.int64: - self.assertEqual(tmgr.get('d').dtype.type, np.datetime64) + assert tmgr.get('d').dtype.type == np.datetime64 else: - self.assertEqual(tmgr.get('d').dtype.type, t) + assert tmgr.get('d').dtype.type == t def test_convert(self): def _compare(old_mgr, new_mgr): """ compare the blocks, numeric compare ==, object don't """ old_blocks = set(old_mgr.blocks) new_blocks = set(new_mgr.blocks) - self.assertEqual(len(old_blocks), len(new_blocks)) + assert len(old_blocks) == len(new_blocks) # compare non-numeric for b in old_blocks: @@ -576,7 +571,7 @@ def _compare(old_mgr, new_mgr): if (b.values == nb.values).all(): found = True break - self.assertTrue(found) + assert found for b in new_blocks: found = False @@ -584,7 +579,7 @@ def _compare(old_mgr, new_mgr): if (b.values == ob.values).all(): found = True break - self.assertTrue(found) + assert found # noops mgr = create_mgr('f: i8; g: f8') @@ -601,11 +596,11 @@ def _compare(old_mgr, new_mgr): mgr.set('b', np.array(['2.'] * N, dtype=np.object_)) mgr.set('foo', np.array(['foo.'] * N, dtype=np.object_)) new_mgr = mgr.convert(numeric=True) - self.assertEqual(new_mgr.get('a').dtype, np.int64) - self.assertEqual(new_mgr.get('b').dtype, np.float64) - self.assertEqual(new_mgr.get('foo').dtype, np.object_) - self.assertEqual(new_mgr.get('f').dtype, np.int64) - self.assertEqual(new_mgr.get('g').dtype, np.float64) + assert new_mgr.get('a').dtype == np.int64 + assert new_mgr.get('b').dtype == np.float64 + assert new_mgr.get('foo').dtype == np.object_ + assert new_mgr.get('f').dtype == np.int64 + assert new_mgr.get('g').dtype == np.float64 mgr = create_mgr('a,b,foo: object; f: i4; bool: bool; dt: datetime;' 'i: i8; g: f8; h: f2') @@ -613,15 +608,15 @@ def _compare(old_mgr, new_mgr): mgr.set('b', np.array(['2.'] * N, dtype=np.object_)) mgr.set('foo', np.array(['foo.'] * N, dtype=np.object_)) new_mgr = mgr.convert(numeric=True) - self.assertEqual(new_mgr.get('a').dtype, np.int64) - self.assertEqual(new_mgr.get('b').dtype, np.float64) - self.assertEqual(new_mgr.get('foo').dtype, np.object_) - self.assertEqual(new_mgr.get('f').dtype, np.int32) - self.assertEqual(new_mgr.get('bool').dtype, np.bool_) - self.assertEqual(new_mgr.get('dt').dtype.type, np.datetime64) - self.assertEqual(new_mgr.get('i').dtype, np.int64) - self.assertEqual(new_mgr.get('g').dtype, np.float64) - self.assertEqual(new_mgr.get('h').dtype, np.float16) + assert new_mgr.get('a').dtype == np.int64 + assert new_mgr.get('b').dtype == np.float64 + assert new_mgr.get('foo').dtype == np.object_ + assert new_mgr.get('f').dtype == np.int32 + assert new_mgr.get('bool').dtype == np.bool_ + assert new_mgr.get('dt').dtype.type, np.datetime64 + assert new_mgr.get('i').dtype == np.int64 + assert new_mgr.get('g').dtype == np.float64 + assert new_mgr.get('h').dtype == np.float16 def test_interleave(self): @@ -629,49 +624,49 @@ def test_interleave(self): for dtype in ['f8', 'i8', 'object', 'bool', 'complex', 'M8[ns]', 'm8[ns]']: mgr = create_mgr('a: {0}'.format(dtype)) - self.assertEqual(mgr.as_matrix().dtype, dtype) + assert mgr.as_matrix().dtype == dtype mgr = create_mgr('a: {0}; b: {0}'.format(dtype)) - self.assertEqual(mgr.as_matrix().dtype, dtype) + assert mgr.as_matrix().dtype == dtype # will be converted according the actual dtype of the underlying mgr = create_mgr('a: category') - self.assertEqual(mgr.as_matrix().dtype, 'i8') + assert mgr.as_matrix().dtype == 'i8' mgr = create_mgr('a: category; b: category') - self.assertEqual(mgr.as_matrix().dtype, 'i8'), + assert mgr.as_matrix().dtype == 'i8' mgr = create_mgr('a: category; b: category2') - self.assertEqual(mgr.as_matrix().dtype, 'object') + assert mgr.as_matrix().dtype == 'object' mgr = create_mgr('a: category2') - self.assertEqual(mgr.as_matrix().dtype, 'object') + assert mgr.as_matrix().dtype == 'object' mgr = create_mgr('a: category2; b: category2') - self.assertEqual(mgr.as_matrix().dtype, 'object') + assert mgr.as_matrix().dtype == 'object' # combinations mgr = create_mgr('a: f8') - self.assertEqual(mgr.as_matrix().dtype, 'f8') + assert mgr.as_matrix().dtype == 'f8' mgr = create_mgr('a: f8; b: i8') - self.assertEqual(mgr.as_matrix().dtype, 'f8') + assert mgr.as_matrix().dtype == 'f8' mgr = create_mgr('a: f4; b: i8') - self.assertEqual(mgr.as_matrix().dtype, 'f8') + assert mgr.as_matrix().dtype == 'f8' mgr = create_mgr('a: f4; b: i8; d: object') - self.assertEqual(mgr.as_matrix().dtype, 'object') + assert mgr.as_matrix().dtype == 'object' mgr = create_mgr('a: bool; b: i8') - self.assertEqual(mgr.as_matrix().dtype, 'object') + assert mgr.as_matrix().dtype == 'object' mgr = create_mgr('a: complex') - self.assertEqual(mgr.as_matrix().dtype, 'complex') + assert mgr.as_matrix().dtype == 'complex' mgr = create_mgr('a: f8; b: category') - self.assertEqual(mgr.as_matrix().dtype, 'object') + assert mgr.as_matrix().dtype == 'object' mgr = create_mgr('a: M8[ns]; b: category') - self.assertEqual(mgr.as_matrix().dtype, 'object') + assert mgr.as_matrix().dtype == 'object' mgr = create_mgr('a: M8[ns]; b: bool') - self.assertEqual(mgr.as_matrix().dtype, 'object') + assert mgr.as_matrix().dtype == 'object' mgr = create_mgr('a: M8[ns]; b: i8') - self.assertEqual(mgr.as_matrix().dtype, 'object') + assert mgr.as_matrix().dtype == 'object' mgr = create_mgr('a: m8[ns]; b: bool') - self.assertEqual(mgr.as_matrix().dtype, 'object') + assert mgr.as_matrix().dtype == 'object' mgr = create_mgr('a: m8[ns]; b: i8') - self.assertEqual(mgr.as_matrix().dtype, 'object') + assert mgr.as_matrix().dtype == 'object' mgr = create_mgr('a: M8[ns]; b: m8[ns]') - self.assertEqual(mgr.as_matrix().dtype, 'object') + assert mgr.as_matrix().dtype == 'object' def test_interleave_non_unique_cols(self): df = DataFrame([ @@ -682,26 +677,26 @@ def test_interleave_non_unique_cols(self): df_unique = df.copy() df_unique.columns = ['x', 'y'] - self.assertEqual(df_unique.values.shape, df.values.shape) + assert df_unique.values.shape == df.values.shape tm.assert_numpy_array_equal(df_unique.values[0], df.values[0]) tm.assert_numpy_array_equal(df_unique.values[1], df.values[1]) def test_consolidate(self): pass - def test_consolidate_ordering_issues(self): - self.mgr.set('f', randn(N)) - self.mgr.set('d', randn(N)) - self.mgr.set('b', randn(N)) - self.mgr.set('g', randn(N)) - self.mgr.set('h', randn(N)) - - # we have datetime/tz blocks in self.mgr - cons = self.mgr.consolidate() - self.assertEqual(cons.nblocks, 4) - cons = self.mgr.consolidate().get_numeric_data() - self.assertEqual(cons.nblocks, 1) - tm.assertIsInstance(cons.blocks[0].mgr_locs, lib.BlockPlacement) + def test_consolidate_ordering_issues(self, mgr): + mgr.set('f', randn(N)) + mgr.set('d', randn(N)) + mgr.set('b', randn(N)) + mgr.set('g', randn(N)) + mgr.set('h', randn(N)) + + # we have datetime/tz blocks in mgr + cons = mgr.consolidate() + assert cons.nblocks == 4 + cons = mgr.consolidate().get_numeric_data() + assert cons.nblocks == 1 + assert isinstance(cons.blocks[0].mgr_locs, lib.BlockPlacement) tm.assert_numpy_array_equal(cons.blocks[0].mgr_locs.as_array, np.arange(len(cons.items), dtype=np.int64)) @@ -714,7 +709,7 @@ def test_reindex_items(self): 'f: bool; g: f8-2') reindexed = mgr.reindex_axis(['g', 'c', 'a', 'd'], axis=0) - self.assertEqual(reindexed.nblocks, 2) + assert reindexed.nblocks == 2 tm.assert_index_equal(reindexed.items, pd.Index(['g', 'c', 'a', 'd'])) assert_almost_equal( mgr.get('g', fastpath=False), reindexed.get('g', fastpath=False)) @@ -748,9 +743,9 @@ def test_multiindex_xs(self): mgr.set_axis(1, index) result = mgr.xs('bar', axis=1) - self.assertEqual(result.shape, (6, 2)) - self.assertEqual(result.axes[1][0], ('bar', 'one')) - self.assertEqual(result.axes[1][1], ('bar', 'two')) + assert result.shape == (6, 2) + assert result.axes[1][0] == ('bar', 'one') + assert result.axes[1][1] == ('bar', 'two') def test_get_numeric_data(self): mgr = create_mgr('int: int; float: float; complex: complex;' @@ -826,11 +821,11 @@ def test_equals(self): # unique items bm1 = create_mgr('a,b,c: i8-1; d,e,f: i8-2') bm2 = BlockManager(bm1.blocks[::-1], bm1.axes) - self.assertTrue(bm1.equals(bm2)) + assert bm1.equals(bm2) bm1 = create_mgr('a,a,a: i8-1; b,b,b: i8-2') bm2 = BlockManager(bm1.blocks[::-1], bm1.axes) - self.assertTrue(bm1.equals(bm2)) + assert bm1.equals(bm2) def test_equals_block_order_different_dtypes(self): # GH 9330 @@ -848,19 +843,19 @@ def test_equals_block_order_different_dtypes(self): block_perms = itertools.permutations(bm.blocks) for bm_perm in block_perms: bm_this = BlockManager(bm_perm, bm.axes) - self.assertTrue(bm.equals(bm_this)) - self.assertTrue(bm_this.equals(bm)) + assert bm.equals(bm_this) + assert bm_this.equals(bm) def test_single_mgr_ctor(self): mgr = create_single_mgr('f8', num_rows=5) - self.assertEqual(mgr.as_matrix().tolist(), [0., 1., 2., 3., 4.]) + assert mgr.as_matrix().tolist() == [0., 1., 2., 3., 4.] def test_validate_bool_args(self): invalid_values = [1, "True", [1, 2, 3], 5.0] bm1 = create_mgr('a,b,c: i8-1; d,e,f: i8-2') for value in invalid_values: - with self.assertRaises(ValueError): + with pytest.raises(ValueError): bm1.replace_list([1], [2], inplace=value) @@ -918,32 +913,37 @@ def assert_slice_ok(mgr, axis, slobj): for mgr in self.MANAGERS: for ax in range(mgr.ndim): # slice - yield assert_slice_ok, mgr, ax, slice(None) - yield assert_slice_ok, mgr, ax, slice(3) - yield assert_slice_ok, mgr, ax, slice(100) - yield assert_slice_ok, mgr, ax, slice(1, 4) - yield assert_slice_ok, mgr, ax, slice(3, 0, -2) + assert_slice_ok(mgr, ax, slice(None)) + assert_slice_ok(mgr, ax, slice(3)) + assert_slice_ok(mgr, ax, slice(100)) + assert_slice_ok(mgr, ax, slice(1, 4)) + assert_slice_ok(mgr, ax, slice(3, 0, -2)) # boolean mask - yield assert_slice_ok, mgr, ax, np.array([], dtype=np.bool_) - yield (assert_slice_ok, mgr, ax, - np.ones(mgr.shape[ax], dtype=np.bool_)) - yield (assert_slice_ok, mgr, ax, - np.zeros(mgr.shape[ax], dtype=np.bool_)) + assert_slice_ok( + mgr, ax, np.array([], dtype=np.bool_)) + assert_slice_ok( + mgr, ax, + np.ones(mgr.shape[ax], dtype=np.bool_)) + assert_slice_ok( + mgr, ax, + np.zeros(mgr.shape[ax], dtype=np.bool_)) if mgr.shape[ax] >= 3: - yield (assert_slice_ok, mgr, ax, - np.arange(mgr.shape[ax]) % 3 == 0) - yield (assert_slice_ok, mgr, ax, np.array( - [True, True, False], dtype=np.bool_)) + assert_slice_ok( + mgr, ax, + np.arange(mgr.shape[ax]) % 3 == 0) + assert_slice_ok( + mgr, ax, np.array( + [True, True, False], dtype=np.bool_)) # fancy indexer - yield assert_slice_ok, mgr, ax, [] - yield assert_slice_ok, mgr, ax, lrange(mgr.shape[ax]) + assert_slice_ok(mgr, ax, []) + assert_slice_ok(mgr, ax, lrange(mgr.shape[ax])) if mgr.shape[ax] >= 3: - yield assert_slice_ok, mgr, ax, [0, 1, 2] - yield assert_slice_ok, mgr, ax, [-1, -2, -3] + assert_slice_ok(mgr, ax, [0, 1, 2]) + assert_slice_ok(mgr, ax, [-1, -2, -3]) def test_take(self): def assert_take_ok(mgr, axis, indexer): @@ -957,13 +957,13 @@ def assert_take_ok(mgr, axis, indexer): for mgr in self.MANAGERS: for ax in range(mgr.ndim): # take/fancy indexer - yield assert_take_ok, mgr, ax, [] - yield assert_take_ok, mgr, ax, [0, 0, 0] - yield assert_take_ok, mgr, ax, lrange(mgr.shape[ax]) + assert_take_ok(mgr, ax, []) + assert_take_ok(mgr, ax, [0, 0, 0]) + assert_take_ok(mgr, ax, lrange(mgr.shape[ax])) if mgr.shape[ax] >= 3: - yield assert_take_ok, mgr, ax, [0, 1, 2] - yield assert_take_ok, mgr, ax, [-1, -2, -3] + assert_take_ok(mgr, ax, [0, 1, 2]) + assert_take_ok(mgr, ax, [-1, -2, -3]) def test_reindex_axis(self): def assert_reindex_axis_is_ok(mgr, axis, new_labels, fill_value): @@ -981,25 +981,33 @@ def assert_reindex_axis_is_ok(mgr, axis, new_labels, fill_value): for mgr in self.MANAGERS: for ax in range(mgr.ndim): for fill_value in (None, np.nan, 100.): - yield (assert_reindex_axis_is_ok, mgr, ax, - pd.Index([]), fill_value) - yield (assert_reindex_axis_is_ok, mgr, ax, mgr.axes[ax], - fill_value) - yield (assert_reindex_axis_is_ok, mgr, ax, - mgr.axes[ax][[0, 0, 0]], fill_value) - yield (assert_reindex_axis_is_ok, mgr, ax, - pd.Index(['foo', 'bar', 'baz']), fill_value) - yield (assert_reindex_axis_is_ok, mgr, ax, - pd.Index(['foo', mgr.axes[ax][0], 'baz']), - fill_value) + assert_reindex_axis_is_ok( + mgr, ax, + pd.Index([]), fill_value) + assert_reindex_axis_is_ok( + mgr, ax, mgr.axes[ax], + fill_value) + assert_reindex_axis_is_ok( + mgr, ax, + mgr.axes[ax][[0, 0, 0]], fill_value) + assert_reindex_axis_is_ok( + mgr, ax, + pd.Index(['foo', 'bar', 'baz']), fill_value) + assert_reindex_axis_is_ok( + mgr, ax, + pd.Index(['foo', mgr.axes[ax][0], 'baz']), + fill_value) if mgr.shape[ax] >= 3: - yield (assert_reindex_axis_is_ok, mgr, ax, - mgr.axes[ax][:-3], fill_value) - yield (assert_reindex_axis_is_ok, mgr, ax, - mgr.axes[ax][-3::-1], fill_value) - yield (assert_reindex_axis_is_ok, mgr, ax, - mgr.axes[ax][[0, 1, 2, 0, 1, 2]], fill_value) + assert_reindex_axis_is_ok( + mgr, ax, + mgr.axes[ax][:-3], fill_value) + assert_reindex_axis_is_ok( + mgr, ax, + mgr.axes[ax][-3::-1], fill_value) + assert_reindex_axis_is_ok( + mgr, ax, + mgr.axes[ax][[0, 1, 2, 0, 1, 2]], fill_value) def test_reindex_indexer(self): @@ -1018,33 +1026,41 @@ def assert_reindex_indexer_is_ok(mgr, axis, new_labels, indexer, for mgr in self.MANAGERS: for ax in range(mgr.ndim): for fill_value in (None, np.nan, 100.): - yield (assert_reindex_indexer_is_ok, mgr, ax, - pd.Index([]), [], fill_value) - yield (assert_reindex_indexer_is_ok, mgr, ax, - mgr.axes[ax], np.arange(mgr.shape[ax]), fill_value) - yield (assert_reindex_indexer_is_ok, mgr, ax, - pd.Index(['foo'] * mgr.shape[ax]), - np.arange(mgr.shape[ax]), fill_value) - - yield (assert_reindex_indexer_is_ok, mgr, ax, - mgr.axes[ax][::-1], np.arange(mgr.shape[ax]), - fill_value) - yield (assert_reindex_indexer_is_ok, mgr, ax, mgr.axes[ax], - np.arange(mgr.shape[ax])[::-1], fill_value) - yield (assert_reindex_indexer_is_ok, mgr, ax, - pd.Index(['foo', 'bar', 'baz']), - [0, 0, 0], fill_value) - yield (assert_reindex_indexer_is_ok, mgr, ax, - pd.Index(['foo', 'bar', 'baz']), - [-1, 0, -1], fill_value) - yield (assert_reindex_indexer_is_ok, mgr, ax, - pd.Index(['foo', mgr.axes[ax][0], 'baz']), - [-1, -1, -1], fill_value) + assert_reindex_indexer_is_ok( + mgr, ax, + pd.Index([]), [], fill_value) + assert_reindex_indexer_is_ok( + mgr, ax, + mgr.axes[ax], np.arange(mgr.shape[ax]), fill_value) + assert_reindex_indexer_is_ok( + mgr, ax, + pd.Index(['foo'] * mgr.shape[ax]), + np.arange(mgr.shape[ax]), fill_value) + assert_reindex_indexer_is_ok( + mgr, ax, + mgr.axes[ax][::-1], np.arange(mgr.shape[ax]), + fill_value) + assert_reindex_indexer_is_ok( + mgr, ax, mgr.axes[ax], + np.arange(mgr.shape[ax])[::-1], fill_value) + assert_reindex_indexer_is_ok( + mgr, ax, + pd.Index(['foo', 'bar', 'baz']), + [0, 0, 0], fill_value) + assert_reindex_indexer_is_ok( + mgr, ax, + pd.Index(['foo', 'bar', 'baz']), + [-1, 0, -1], fill_value) + assert_reindex_indexer_is_ok( + mgr, ax, + pd.Index(['foo', mgr.axes[ax][0], 'baz']), + [-1, -1, -1], fill_value) if mgr.shape[ax] >= 3: - yield (assert_reindex_indexer_is_ok, mgr, ax, - pd.Index(['foo', 'bar', 'baz']), - [0, 1, 2], fill_value) + assert_reindex_indexer_is_ok( + mgr, ax, + pd.Index(['foo', 'bar', 'baz']), + [0, 1, 2], fill_value) # test_get_slice(slice_like, axis) # take(indexer, axis) @@ -1055,21 +1071,23 @@ def assert_reindex_indexer_is_ok(mgr, axis, new_labels, indexer, class TestBlockPlacement(tm.TestCase): def test_slice_len(self): - self.assertEqual(len(BlockPlacement(slice(0, 4))), 4) - self.assertEqual(len(BlockPlacement(slice(0, 4, 2))), 2) - self.assertEqual(len(BlockPlacement(slice(0, 3, 2))), 2) + assert len(BlockPlacement(slice(0, 4))) == 4 + assert len(BlockPlacement(slice(0, 4, 2))) == 2 + assert len(BlockPlacement(slice(0, 3, 2))) == 2 - self.assertEqual(len(BlockPlacement(slice(0, 1, 2))), 1) - self.assertEqual(len(BlockPlacement(slice(1, 0, -1))), 1) + assert len(BlockPlacement(slice(0, 1, 2))) == 1 + assert len(BlockPlacement(slice(1, 0, -1))) == 1 def test_zero_step_raises(self): - self.assertRaises(ValueError, BlockPlacement, slice(1, 1, 0)) - self.assertRaises(ValueError, BlockPlacement, slice(1, 2, 0)) + with pytest.raises(ValueError): + BlockPlacement(slice(1, 1, 0)) + with pytest.raises(ValueError): + BlockPlacement(slice(1, 2, 0)) def test_unbounded_slice_raises(self): def assert_unbounded_slice_error(slc): - self.assertRaisesRegexp(ValueError, "unbounded slice", - lambda: BlockPlacement(slc)) + tm.assertRaisesRegexp(ValueError, "unbounded slice", + lambda: BlockPlacement(slc)) assert_unbounded_slice_error(slice(None, None)) assert_unbounded_slice_error(slice(10, None)) @@ -1087,7 +1105,7 @@ def assert_unbounded_slice_error(slc): def test_not_slice_like_slices(self): def assert_not_slice_like(slc): - self.assertTrue(not BlockPlacement(slc).is_slice_like) + assert not BlockPlacement(slc).is_slice_like assert_not_slice_like(slice(0, 0)) assert_not_slice_like(slice(100, 0)) @@ -1095,12 +1113,12 @@ def assert_not_slice_like(slc): assert_not_slice_like(slice(100, 100, -1)) assert_not_slice_like(slice(0, 100, -1)) - self.assertTrue(not BlockPlacement(slice(0, 0)).is_slice_like) - self.assertTrue(not BlockPlacement(slice(100, 100)).is_slice_like) + assert not BlockPlacement(slice(0, 0)).is_slice_like + assert not BlockPlacement(slice(100, 100)).is_slice_like def test_array_to_slice_conversion(self): def assert_as_slice_equals(arr, slc): - self.assertEqual(BlockPlacement(arr).as_slice, slc) + assert BlockPlacement(arr).as_slice == slc assert_as_slice_equals([0], slice(0, 1, 1)) assert_as_slice_equals([100], slice(100, 101, 1)) @@ -1115,7 +1133,7 @@ def assert_as_slice_equals(arr, slc): def test_not_slice_like_arrays(self): def assert_not_slice_like(arr): - self.assertTrue(not BlockPlacement(arr).is_slice_like) + assert not BlockPlacement(arr).is_slice_like assert_not_slice_like([]) assert_not_slice_like([-1]) @@ -1128,13 +1146,12 @@ def assert_not_slice_like(arr): assert_not_slice_like([1, 1, 1]) def test_slice_iter(self): - self.assertEqual(list(BlockPlacement(slice(0, 3))), [0, 1, 2]) - self.assertEqual(list(BlockPlacement(slice(0, 0))), []) - self.assertEqual(list(BlockPlacement(slice(3, 0))), []) + assert list(BlockPlacement(slice(0, 3))) == [0, 1, 2] + assert list(BlockPlacement(slice(0, 0))) == [] + assert list(BlockPlacement(slice(3, 0))) == [] - self.assertEqual(list(BlockPlacement(slice(3, 0, -1))), [3, 2, 1]) - self.assertEqual(list(BlockPlacement(slice(3, None, -1))), - [3, 2, 1, 0]) + assert list(BlockPlacement(slice(3, 0, -1))) == [3, 2, 1] + assert list(BlockPlacement(slice(3, None, -1))) == [3, 2, 1, 0] def test_slice_to_array_conversion(self): def assert_as_array_equals(slc, asarray): @@ -1152,13 +1169,13 @@ def assert_as_array_equals(slc, asarray): def test_blockplacement_add(self): bpl = BlockPlacement(slice(0, 5)) - self.assertEqual(bpl.add(1).as_slice, slice(1, 6, 1)) - self.assertEqual(bpl.add(np.arange(5)).as_slice, slice(0, 10, 2)) - self.assertEqual(list(bpl.add(np.arange(5, 0, -1))), [5, 5, 5, 5, 5]) + assert bpl.add(1).as_slice == slice(1, 6, 1) + assert bpl.add(np.arange(5)).as_slice == slice(0, 10, 2) + assert list(bpl.add(np.arange(5, 0, -1))) == [5, 5, 5, 5, 5] def test_blockplacement_add_int(self): def assert_add_equals(val, inc, result): - self.assertEqual(list(BlockPlacement(val).add(inc)), result) + assert list(BlockPlacement(val).add(inc)) == result assert_add_equals(slice(0, 0), 0, []) assert_add_equals(slice(1, 4), 0, [1, 2, 3]) @@ -1177,9 +1194,9 @@ def assert_add_equals(val, inc, result): assert_add_equals(slice(3, 0, -1), -1, [2, 1, 0]) assert_add_equals([1, 2, 4], -1, [0, 1, 3]) - self.assertRaises(ValueError, - lambda: BlockPlacement(slice(1, 4)).add(-10)) - self.assertRaises(ValueError, - lambda: BlockPlacement([1, 2, 4]).add(-10)) - self.assertRaises(ValueError, - lambda: BlockPlacement(slice(2, None, -1)).add(-1)) + with pytest.raises(ValueError): + BlockPlacement(slice(1, 4)).add(-10) + with pytest.raises(ValueError): + BlockPlacement([1, 2, 4]).add(-10) + with pytest.raises(ValueError): + BlockPlacement(slice(2, None, -1)).add(-1) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 3f2973a9834ca..fe03d7886e661 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -646,7 +646,7 @@ def test_dtypes(self): f = self.funcs[f_name] d = self.data[d_name] exp = self.expects[d_name][f_name] - yield self.check_dtypes, f, f_name, d, d_name, exp + self.check_dtypes(f, f_name, d, d_name, exp) def check_dtypes(self, f, f_name, d, d_name, exp): roll = d.rolling(window=self.window) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 154476ce8340a..cf76f4ead77e3 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -93,11 +93,7 @@ def reset_display_options(self): pd.reset_option('^display.', silent=True) def round_trip_pickle(self, obj, path=None): - if path is None: - path = u('__%s__.pickle' % rands(10)) - with ensure_clean(path) as path: - pd.to_pickle(obj, path) - return pd.read_pickle(path) + return round_trip_pickle(obj, path=path) # https://docs.python.org/3/library/unittest.html#deprecated-aliases def assertEquals(self, *args, **kwargs): @@ -121,6 +117,14 @@ def assertNotAlmostEquals(self, *args, **kwargs): self.assertNotAlmostEqual)(*args, **kwargs) +def round_trip_pickle(obj, path=None): + if path is None: + path = u('__%s__.pickle' % rands(10)) + with ensure_clean(path) as path: + pd.to_pickle(obj, path) + return pd.read_pickle(path) + + def assert_almost_equal(left, right, check_exact=False, check_dtype='equiv', check_less_precise=False, **kwargs): diff --git a/setup.cfg b/setup.cfg index 8de4fc955bd50..8b32f0f62fe28 100644 --- a/setup.cfg +++ b/setup.cfg @@ -24,7 +24,6 @@ split_penalty_logical_operator = 30 [tool:pytest] # TODO: Change all yield-based (nose-style) fixutures to pytest fixtures # Silencing the warning until then -addopts = --disable-pytest-warnings testpaths = pandas markers = single: mark a test as single cpu only From 2c3f808e55d8dc61959b2fd33a103575f2fb85ef Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 17 Mar 2017 10:43:35 -0400 Subject: [PATCH 223/353] CI: actually use the miniconda cache :> --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index ee093e5bf0e60..af3098b3fc715 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,7 +8,7 @@ language: python # The cash directories will be deleted if anything in ci/ changes in a commit cache: directories: - - $HOME/miniconda # miniconda cache + - $HOME/miniconda3 # miniconda cache - $HOME/.cache # cython cache - $HOME/.ccache # compiler cache From d96826024aad3f08c365bd74a43a684677982a89 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 17 Mar 2017 10:58:34 -0400 Subject: [PATCH 224/353] TST: only catch deprecation warnings for top-level module imports (#15718) --- pandas/tests/api/test_api.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index db92210478182..73222c246fc70 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -249,31 +249,26 @@ def test_groupby(self): class TestJson(tm.TestCase): def test_deprecation_access_func(self): - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): + with catch_warnings(record=True): pd.json.dumps([]) class TestParser(tm.TestCase): def test_deprecation_access_func(self): - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): + with catch_warnings(record=True): pd.parser.na_values class TestLib(tm.TestCase): def test_deprecation_access_func(self): - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): + with catch_warnings(record=True): pd.lib.infer_dtype class TestTSLib(tm.TestCase): def test_deprecation_access_func(self): - # some libraries may be imported before we - # test and could show the warning with catch_warnings(record=True): pd.tslib.Timestamp From f0533e4f72bc3d98c2aa54f6bb8bcec157c4db41 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 17 Mar 2017 11:08:38 -0400 Subject: [PATCH 225/353] CI: fix cache again --- ci/install_travis.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ci/install_travis.sh b/ci/install_travis.sh index aad87ea37439f..67b94da120d90 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -35,7 +35,7 @@ echo "[home_dir: $home_dir]" # install miniconda MINICONDA_DIR="$HOME/miniconda3" -if [ "$USE_CACHE" ] && [ -d "$MINICONDA_DIR" ]; then +if [ "$USE_CACHE" ] && [ -d "$MINICONDA_DIR/bin" ]; then echo "[Using cached Miniconda install]" else @@ -54,6 +54,9 @@ else bash miniconda.sh -b -p "$MINICONDA_DIR" || exit 1 fi +echo "[show conds]" +which conda + echo "[update conda]" conda config --set ssl_verify false || exit 1 conda config --set always_yes true --set changeps1 false || exit 1 From 0d9d27cba5381bec0fea8385ed26a836f82d9520 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 17 Mar 2017 11:28:44 -0400 Subject: [PATCH 226/353] CI: typo in using ccache --- ci/install_travis.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/install_travis.sh b/ci/install_travis.sh index 67b94da120d90..f0f4bc0873e05 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -54,7 +54,7 @@ else bash miniconda.sh -b -p "$MINICONDA_DIR" || exit 1 fi -echo "[show conds]" +echo "[show conda]" which conda echo "[update conda]" @@ -78,7 +78,7 @@ fi conda info -a || exit 1 # set the compiler cache to work -if [ "$USE_CACHE" ] && "${TRAVIS_OS_NAME}" == "linux" ]; then +if [ "$USE_CACHE" ] && [ "${TRAVIS_OS_NAME}" == "linux" ]; then echo "[Using ccache]" export PATH=/usr/lib/ccache:/usr/lib64/ccache:$PATH gcc=$(which gcc) From 0ad89761df376d52eaee90b52b9b15eb0f06af54 Mon Sep 17 00:00:00 2001 From: Lorenzo Cestaro Date: Fri, 17 Mar 2017 11:41:26 -0400 Subject: [PATCH 227/353] DOC: Update broken link in cookbook.rst #15605 closes #15605 Author: Lorenzo Cestaro Closes #15720 from LorenzoCestaro/fix-15605 and squashes the following commits: 006eefa [Lorenzo Cestaro] DOC: Update broken link in cookbook.rst #15605 --- doc/source/cookbook.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/cookbook.rst b/doc/source/cookbook.rst index 841195de3da47..8fa1283ffc924 100644 --- a/doc/source/cookbook.rst +++ b/doc/source/cookbook.rst @@ -905,7 +905,7 @@ CSV The :ref:`CSV ` docs -`read_csv in action `__ +`read_csv in action `__ `appending to a csv `__ From ad3d88600825d02f4540e2c2614f0c7a93e4af35 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 17 Mar 2017 12:35:27 -0400 Subject: [PATCH 228/353] CI: don't fail if our env already exists in caching --- ci/install_travis.sh | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/ci/install_travis.sh b/ci/install_travis.sh index f0f4bc0873e05..8bf6de3efe7c4 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -47,11 +47,11 @@ else # install miniconda if [ "${TRAVIS_OS_NAME}" == "osx" ]; then - wget http://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh || exit 1 + time wget http://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh || exit 1 else - wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh || exit 1 + time wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh || exit 1 fi - bash miniconda.sh -b -p "$MINICONDA_DIR" || exit 1 + time bash miniconda.sh -b -p "$MINICONDA_DIR" || exit 1 fi echo "[show conda]" @@ -90,13 +90,16 @@ else echo "[Not using ccache]" fi +echo "[create env]" + # may have installation instructions for this build INSTALL="ci/install-${PYTHON_VERSION}${JOB_TAG}.sh" if [ -e ${INSTALL} ]; then time bash $INSTALL || exit 1 else # create new env - time conda create -n pandas python=$PYTHON_VERSION pytest || exit 1 + # this may already exists, in which case our caching worked + time conda create -n pandas python=$PYTHON_VERSION pytest fi # build deps From a1b118cf46dc0a92fc16f2268b07731e27ed00d3 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 17 Mar 2017 13:11:55 -0400 Subject: [PATCH 229/353] CI: remove caching for miniconda itself (#15722) --- .travis.yml | 1 - ci/install_travis.sh | 25 ++++++++++--------------- 2 files changed, 10 insertions(+), 16 deletions(-) diff --git a/.travis.yml b/.travis.yml index af3098b3fc715..c1419dd0c5d3b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,7 +8,6 @@ language: python # The cash directories will be deleted if anything in ci/ changes in a commit cache: directories: - - $HOME/miniconda3 # miniconda cache - $HOME/.cache # cython cache - $HOME/.ccache # compiler cache diff --git a/ci/install_travis.sh b/ci/install_travis.sh index 8bf6de3efe7c4..e59502b810975 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -35,24 +35,19 @@ echo "[home_dir: $home_dir]" # install miniconda MINICONDA_DIR="$HOME/miniconda3" -if [ "$USE_CACHE" ] && [ -d "$MINICONDA_DIR/bin" ]; then - echo "[Using cached Miniconda install]" +echo "[Using clean Miniconda install]" -else - echo "[Using clean Miniconda install]" - - if [ -d "$MINICONDA_DIR" ]; then - rm -rf "$MINICONDA_DIR" - fi +if [ -d "$MINICONDA_DIR" ]; then + rm -rf "$MINICONDA_DIR" +fi - # install miniconda - if [ "${TRAVIS_OS_NAME}" == "osx" ]; then - time wget http://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh || exit 1 - else - time wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh || exit 1 - fi - time bash miniconda.sh -b -p "$MINICONDA_DIR" || exit 1 +# install miniconda +if [ "${TRAVIS_OS_NAME}" == "osx" ]; then + time wget http://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh || exit 1 +else + time wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh || exit 1 fi +time bash miniconda.sh -b -p "$MINICONDA_DIR" || exit 1 echo "[show conda]" which conda From 043efa6c94e6abdd033293ba55cd8da7e3763d16 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 17 Mar 2017 15:24:35 -0400 Subject: [PATCH 230/353] CI: remove miniconda from actual cache scripts --- ci/check_cache.sh | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/ci/check_cache.sh b/ci/check_cache.sh index cd7a6e8f6b6f9..1c9de7b017569 100755 --- a/ci/check_cache.sh +++ b/ci/check_cache.sh @@ -12,14 +12,12 @@ else ci_changes=$(git diff PR_HEAD~2 --numstat | grep -E "ci/"| wc -l) fi -MINICONDA_DIR="$HOME/miniconda/" CACHE_DIR="$HOME/.cache/" CCACHE_DIR="$HOME/.ccache/" if [ $ci_changes -ne 0 ] then echo "Files have changed in ci/ deleting all caches" - rm -rf "$MINICONDA_DIR" rm -rf "$CACHE_DIR" rm -rf "$CCACHE_DIR" -fi \ No newline at end of file +fi From 5e96fb050afc192d464904f8f4a2a6e07723ee37 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 17 Mar 2017 15:39:53 -0400 Subject: [PATCH 231/353] CI: install ccache on osx --- ci/install_travis.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ci/install_travis.sh b/ci/install_travis.sh index e59502b810975..610e6255e6832 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -32,6 +32,11 @@ edit_init home_dir=$(pwd) echo "[home_dir: $home_dir]" +if [ "${TRAVIS_OS_NAME}" == "osx" ]; then + echo "[install ccache]" + time brew install ccache +fi + # install miniconda MINICONDA_DIR="$HOME/miniconda3" From a9c823922c08305b3cfd12cca52e3302f831429e Mon Sep 17 00:00:00 2001 From: gfyoung Date: Fri, 17 Mar 2017 20:11:31 -0400 Subject: [PATCH 232/353] MAINT: Drop take_last kwarg from method signatures Affected methods: 1) nlargest 2) nsmallest 3) duplicated 4) drop_duplicates xref #10236, #10792, #10920. Author: gfyoung Closes #15710 from gfyoung/create-last-kw-drop and squashes the following commits: b416290 [gfyoung] MAINT: Drop take_last kwarg from method signatures --- asv_bench/benchmarks/series_methods.py | 12 ++--- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/base.py | 6 --- pandas/core/frame.py | 9 +--- pandas/core/groupby.py | 28 +++------- pandas/core/series.py | 10 ---- pandas/indexes/base.py | 4 -- pandas/indexes/category.py | 5 +- pandas/indexes/multi.py | 2 - pandas/tests/frame/test_analytics.py | 75 -------------------------- pandas/tests/groupby/test_groupby.py | 7 +-- pandas/tests/series/test_analytics.py | 33 ------------ pandas/tests/test_base.py | 16 ------ pandas/tests/test_multilevel.py | 11 ---- vb_suite/series_methods.py | 16 +++--- 15 files changed, 26 insertions(+), 209 deletions(-) diff --git a/asv_bench/benchmarks/series_methods.py b/asv_bench/benchmarks/series_methods.py index 413c4e044fd3a..c66654ee1e006 100644 --- a/asv_bench/benchmarks/series_methods.py +++ b/asv_bench/benchmarks/series_methods.py @@ -68,8 +68,8 @@ def setup(self): self.s4 = self.s3.astype('object') def time_series_nlargest1(self): - self.s1.nlargest(3, take_last=True) - self.s1.nlargest(3, take_last=False) + self.s1.nlargest(3, keep='last') + self.s1.nlargest(3, keep='first') class series_nlargest2(object): @@ -83,8 +83,8 @@ def setup(self): self.s4 = self.s3.astype('object') def time_series_nlargest2(self): - self.s2.nlargest(3, take_last=True) - self.s2.nlargest(3, take_last=False) + self.s2.nlargest(3, keep='last') + self.s2.nlargest(3, keep='first') class series_nsmallest2(object): @@ -98,8 +98,8 @@ def setup(self): self.s4 = self.s3.astype('object') def time_series_nsmallest2(self): - self.s2.nsmallest(3, take_last=True) - self.s2.nsmallest(3, take_last=False) + self.s2.nsmallest(3, keep='last') + self.s2.nsmallest(3, keep='first') class series_dropna_int64(object): diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 29d05ddcfb497..9cf53300f8cca 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -769,6 +769,7 @@ Removal of prior version deprecations/changes in favor of ``iloc`` and ``iat`` as explained :ref:`here ` (:issue:`10711`). - The deprecated ``DataFrame.iterkv()`` has been removed in favor of ``DataFrame.iteritems()`` (:issue:`10711`) - The ``Categorical`` constructor has dropped the ``name`` parameter (:issue:`10632`) +- The ``take_last`` parameter has been dropped from ``duplicated()``, ``drop_duplicates()``, ``nlargest()``, and ``nsmallest()`` methods (:issue:`10236`, :issue:`10792`, :issue:`10920`) .. _whatsnew_0200.performance: diff --git a/pandas/core/base.py b/pandas/core/base.py index d7c9e35ab6a51..bde60be3ddcff 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1065,7 +1065,6 @@ def searchsorted(self, value, side='left', sorter=None): - ``first`` : Drop duplicates except for the first occurrence. - ``last`` : Drop duplicates except for the last occurrence. - False : Drop all duplicates. - take_last : deprecated %(inplace)s Returns @@ -1073,8 +1072,6 @@ def searchsorted(self, value, side='left', sorter=None): deduplicated : %(klass)s """) - @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', - False: 'first'}) @Appender(_shared_docs['drop_duplicates'] % _indexops_doc_kwargs) def drop_duplicates(self, keep='first', inplace=False): inplace = validate_bool_kwarg(inplace, 'inplace') @@ -1100,15 +1097,12 @@ def drop_duplicates(self, keep='first', inplace=False): - ``last`` : Mark duplicates as ``True`` except for the last occurrence. - False : Mark all duplicates as ``True``. - take_last : deprecated Returns ------- duplicated : %(duplicated)s """) - @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', - False: 'first'}) @Appender(_shared_docs['duplicated'] % _indexops_doc_kwargs) def duplicated(self, keep='first'): from pandas.core.algorithms import duplicated diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 987eb10101f12..3696051b269e3 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -77,8 +77,7 @@ OrderedDict, raise_with_traceback) from pandas import compat from pandas.compat.numpy import function as nv -from pandas.util.decorators import (deprecate_kwarg, Appender, - Substitution) +from pandas.util.decorators import Appender, Substitution from pandas.util.validators import validate_bool_kwarg from pandas.tseries.period import PeriodIndex @@ -3169,8 +3168,6 @@ def dropna(self, axis=0, how='any', thresh=None, subset=None, else: return result - @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', - False: 'first'}) def drop_duplicates(self, subset=None, keep='first', inplace=False): """ Return DataFrame with duplicate rows removed, optionally only @@ -3185,7 +3182,6 @@ def drop_duplicates(self, subset=None, keep='first', inplace=False): - ``first`` : Drop duplicates except for the first occurrence. - ``last`` : Drop duplicates except for the last occurrence. - False : Drop all duplicates. - take_last : deprecated inplace : boolean, default False Whether to drop duplicates in place or to return a copy @@ -3203,8 +3199,6 @@ def drop_duplicates(self, subset=None, keep='first', inplace=False): else: return self[-duplicated] - @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', - False: 'first'}) def duplicated(self, subset=None, keep='first'): """ Return boolean Series denoting duplicate rows, optionally only @@ -3221,7 +3215,6 @@ def duplicated(self, subset=None, keep='first'): - ``last`` : Mark duplicates as ``True`` except for the last occurrence. - False : Mark all duplicates as ``True``. - take_last : deprecated Returns ------- diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 7a017ffae284c..4095a14aa5970 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -51,8 +51,8 @@ from pandas.core.sorting import (get_group_index_sorter, get_group_index, compress_group_index, get_flattened_iterator, decons_obs_group_ids, get_indexer_dict) -from pandas.util.decorators import (cache_readonly, Substitution, Appender, - make_signature, deprecate_kwarg) +from pandas.util.decorators import (cache_readonly, Substitution, + Appender, make_signature) from pandas.formats.printing import pprint_thing from pandas.util.validators import validate_kwargs @@ -94,12 +94,12 @@ 'corr', 'cov', 'diff', ]) | _plotting_methods -_series_apply_whitelist = \ - (_common_apply_whitelist - set(['boxplot'])) | \ - frozenset(['dtype', 'unique']) +_series_apply_whitelist = ((_common_apply_whitelist | + {'nlargest', 'nsmallest'}) - + {'boxplot'}) | frozenset(['dtype', 'unique']) -_dataframe_apply_whitelist = \ - _common_apply_whitelist | frozenset(['dtypes', 'corrwith']) +_dataframe_apply_whitelist = (_common_apply_whitelist | + frozenset(['dtypes', 'corrwith'])) _cython_transforms = frozenset(['cumprod', 'cumsum', 'shift', 'cummin', 'cummax']) @@ -3025,20 +3025,6 @@ def nunique(self, dropna=True): index=ri, name=self.name) - @deprecate_kwarg('take_last', 'keep', - mapping={True: 'last', False: 'first'}) - @Appender(Series.nlargest.__doc__) - def nlargest(self, n=5, keep='first'): - # ToDo: When we remove deprecate_kwargs, we can remote these methods - # and include nlargest and nsmallest to _series_apply_whitelist - return self.apply(lambda x: x.nlargest(n=n, keep=keep)) - - @deprecate_kwarg('take_last', 'keep', - mapping={True: 'last', False: 'first'}) - @Appender(Series.nsmallest.__doc__) - def nsmallest(self, n=5, keep='first'): - return self.apply(lambda x: x.nsmallest(n=n, keep=keep)) - @Appender(Series.describe.__doc__) def describe(self, **kwargs): self._set_group_selection() diff --git a/pandas/core/series.py b/pandas/core/series.py index cfa25ca1299eb..7ee3b3e8fb519 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1211,14 +1211,10 @@ def unique(self): return result.asobject.values return result - @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', - False: 'first'}) @Appender(base._shared_docs['drop_duplicates'] % _shared_doc_kwargs) def drop_duplicates(self, keep='first', inplace=False): return super(Series, self).drop_duplicates(keep=keep, inplace=inplace) - @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', - False: 'first'}) @Appender(base._shared_docs['duplicated'] % _shared_doc_kwargs) def duplicated(self, keep='first'): return super(Series, self).duplicated(keep=keep) @@ -1888,8 +1884,6 @@ def argsort(self, axis=0, kind='quicksort', order=None): np.argsort(values, kind=kind), index=self.index, dtype='int64').__finalize__(self) - @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', - False: 'first'}) def nlargest(self, n=5, keep='first'): """Return the largest `n` elements. @@ -1901,7 +1895,6 @@ def nlargest(self, n=5, keep='first'): Where there are duplicate values: - ``first`` : take the first occurrence. - ``last`` : take the last occurrence. - take_last : deprecated Returns ------- @@ -1938,8 +1931,6 @@ def nlargest(self, n=5, keep='first'): return algorithms.select_n_series(self, n=n, keep=keep, method='nlargest') - @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', - False: 'first'}) def nsmallest(self, n=5, keep='first'): """Return the smallest `n` elements. @@ -1951,7 +1942,6 @@ def nsmallest(self, n=5, keep='first'): Where there are duplicate values: - ``first`` : take the first occurrence. - ``last`` : take the last occurrence. - take_last : deprecated Returns ------- diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index 5b942e2565c29..381e4d5caa8ac 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -3500,14 +3500,10 @@ def unique(self): result = super(Index, self).unique() return self._shallow_copy(result) - @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', - False: 'first'}) @Appender(base._shared_docs['drop_duplicates'] % _index_doc_kwargs) def drop_duplicates(self, keep='first'): return super(Index, self).drop_duplicates(keep=keep) - @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', - False: 'first'}) @Appender(base._shared_docs['duplicated'] % _index_doc_kwargs) def duplicated(self, keep='first'): return super(Index, self).duplicated(keep=keep) diff --git a/pandas/indexes/category.py b/pandas/indexes/category.py index 923dd4ec785c5..7cfc95de5f538 100644 --- a/pandas/indexes/category.py +++ b/pandas/indexes/category.py @@ -11,8 +11,7 @@ from pandas.types.missing import array_equivalent -from pandas.util.decorators import (Appender, cache_readonly, - deprecate_kwarg) +from pandas.util.decorators import Appender, cache_readonly from pandas.core.config import get_option from pandas.indexes.base import Index, _index_shared_docs import pandas.core.base as base @@ -301,8 +300,6 @@ def unique(self): return self._shallow_copy(result, categories=result.categories, ordered=result.ordered) - @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', - False: 'first'}) @Appender(base._shared_docs['duplicated'] % _index_doc_kwargs) def duplicated(self, keep='first'): from pandas._libs.hashtable import duplicated_int64 diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index 1c1609fed1dd1..978492131ca89 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -755,8 +755,6 @@ def f(k, stringify): for k, stringify in zip(key, self._have_mixed_levels)]) return hash_tuples(key) - @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', - False: 'first'}) @Appender(base._shared_docs['duplicated'] % _index_doc_kwargs) def duplicated(self, keep='first'): from pandas.core.sorting import get_group_index diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 6c917444f9f43..4fb1d2222fa06 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1381,12 +1381,6 @@ def test_drop_duplicates(self): tm.assert_frame_equal(result, expected) self.assertEqual(len(result), 0) - # deprecate take_last - with tm.assert_produces_warning(FutureWarning): - result = df.drop_duplicates('AAA', take_last=True) - expected = df.loc[[6, 7]] - tm.assert_frame_equal(result, expected) - # multi column expected = df.loc[[0, 1, 2, 3]] result = df.drop_duplicates(np.array(['AAA', 'B'])) @@ -1402,12 +1396,6 @@ def test_drop_duplicates(self): expected = df.loc[[0]] tm.assert_frame_equal(result, expected) - # deprecate take_last - with tm.assert_produces_warning(FutureWarning): - result = df.drop_duplicates(('AAA', 'B'), take_last=True) - expected = df.loc[[0, 5, 6, 7]] - tm.assert_frame_equal(result, expected) - # consider everything df2 = df.loc[:, ['AAA', 'B', 'C']] @@ -1424,13 +1412,6 @@ def test_drop_duplicates(self): expected = df2.drop_duplicates(['AAA', 'B'], keep=False) tm.assert_frame_equal(result, expected) - # deprecate take_last - with tm.assert_produces_warning(FutureWarning): - result = df2.drop_duplicates(take_last=True) - with tm.assert_produces_warning(FutureWarning): - expected = df2.drop_duplicates(['AAA', 'B'], take_last=True) - tm.assert_frame_equal(result, expected) - # integers result = df.drop_duplicates('C') expected = df.iloc[[0, 2]] @@ -1529,12 +1510,6 @@ def test_drop_duplicates_tuple(self): self.assertEqual(len(result), 0) tm.assert_frame_equal(result, expected) - # deprecate take_last - with tm.assert_produces_warning(FutureWarning): - result = df.drop_duplicates(('AA', 'AB'), take_last=True) - expected = df.loc[[6, 7]] - tm.assert_frame_equal(result, expected) - # multi column expected = df.loc[[0, 1, 2, 3]] result = df.drop_duplicates((('AA', 'AB'), 'B')) @@ -1563,12 +1538,6 @@ def test_drop_duplicates_NA(self): tm.assert_frame_equal(result, expected) self.assertEqual(len(result), 0) - # deprecate take_last - with tm.assert_produces_warning(FutureWarning): - result = df.drop_duplicates('A', take_last=True) - expected = df.loc[[1, 6, 7]] - tm.assert_frame_equal(result, expected) - # multi column result = df.drop_duplicates(['A', 'B']) expected = df.loc[[0, 2, 3, 6]] @@ -1582,12 +1551,6 @@ def test_drop_duplicates_NA(self): expected = df.loc[[6]] tm.assert_frame_equal(result, expected) - # deprecate take_last - with tm.assert_produces_warning(FutureWarning): - result = df.drop_duplicates(['A', 'B'], take_last=True) - expected = df.loc[[1, 5, 6, 7]] - tm.assert_frame_equal(result, expected) - # nan df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'bar', 'foo'], @@ -1610,12 +1573,6 @@ def test_drop_duplicates_NA(self): tm.assert_frame_equal(result, expected) self.assertEqual(len(result), 0) - # deprecate take_last - with tm.assert_produces_warning(FutureWarning): - result = df.drop_duplicates('C', take_last=True) - expected = df.loc[[3, 7]] - tm.assert_frame_equal(result, expected) - # multi column result = df.drop_duplicates(['C', 'B']) expected = df.loc[[0, 1, 2, 4]] @@ -1629,12 +1586,6 @@ def test_drop_duplicates_NA(self): expected = df.loc[[1]] tm.assert_frame_equal(result, expected) - # deprecate take_last - with tm.assert_produces_warning(FutureWarning): - result = df.drop_duplicates(['C', 'B'], take_last=True) - expected = df.loc[[1, 3, 6, 7]] - tm.assert_frame_equal(result, expected) - def test_drop_duplicates_NA_for_take_all(self): # none df = DataFrame({'A': [None, None, 'foo', 'bar', @@ -1697,14 +1648,6 @@ def test_drop_duplicates_inplace(self): tm.assert_frame_equal(result, expected) self.assertEqual(len(df), 0) - # deprecate take_last - df = orig.copy() - with tm.assert_produces_warning(FutureWarning): - df.drop_duplicates('A', take_last=True, inplace=True) - expected = orig.loc[[6, 7]] - result = df - tm.assert_frame_equal(result, expected) - # multi column df = orig.copy() df.drop_duplicates(['A', 'B'], inplace=True) @@ -1724,14 +1667,6 @@ def test_drop_duplicates_inplace(self): result = df tm.assert_frame_equal(result, expected) - # deprecate take_last - df = orig.copy() - with tm.assert_produces_warning(FutureWarning): - df.drop_duplicates(['A', 'B'], take_last=True, inplace=True) - expected = orig.loc[[0, 5, 6, 7]] - result = df - tm.assert_frame_equal(result, expected) - # consider everything orig2 = orig.loc[:, ['A', 'B', 'C']].copy() @@ -1754,17 +1689,7 @@ def test_drop_duplicates_inplace(self): result = df2 tm.assert_frame_equal(result, expected) - # deprecate take_last - df2 = orig2.copy() - with tm.assert_produces_warning(FutureWarning): - df2.drop_duplicates(take_last=True, inplace=True) - with tm.assert_produces_warning(FutureWarning): - expected = orig2.drop_duplicates(['A', 'B'], take_last=True) - result = df2 - tm.assert_frame_equal(result, expected) - # Rounding - def test_round(self): # GH 2665 diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index c25974c94bfd1..a355dca3029c7 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -3816,7 +3816,8 @@ def test_groupby_whitelist(self): 'cov', 'diff', 'unique', - # 'nlargest', 'nsmallest', + 'nlargest', + 'nsmallest', ]) for obj, whitelist in zip((df, s), (df_whitelist, s_whitelist)): @@ -4025,8 +4026,6 @@ def test_nlargest(self): 3, 2, 1, 3, 3, 2 ], index=MultiIndex.from_arrays([list('aaabbb'), [2, 3, 1, 6, 5, 7]])) assert_series_equal(gb.nlargest(3, keep='last'), e) - with tm.assert_produces_warning(FutureWarning): - assert_series_equal(gb.nlargest(3, take_last=True), e) def test_nsmallest(self): a = Series([1, 3, 5, 7, 2, 9, 0, 4, 6, 10]) @@ -4044,8 +4043,6 @@ def test_nsmallest(self): 0, 1, 1, 0, 1, 2 ], index=MultiIndex.from_arrays([list('aaabbb'), [4, 1, 0, 9, 8, 7]])) assert_series_equal(gb.nsmallest(3, keep='last'), e) - with tm.assert_produces_warning(FutureWarning): - assert_series_equal(gb.nsmallest(3, take_last=True), e) def test_transform_doesnt_clobber_ints(self): # GH 7972 diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index c2543581dca50..dc71fafb1094f 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -917,17 +917,6 @@ def test_drop_duplicates(self): sc.drop_duplicates(keep='last', inplace=True) assert_series_equal(sc, s[~expected]) - # deprecate take_last - with tm.assert_produces_warning(FutureWarning): - assert_series_equal(s.duplicated(take_last=True), expected) - with tm.assert_produces_warning(FutureWarning): - assert_series_equal( - s.drop_duplicates(take_last=True), s[~expected]) - sc = s.copy() - with tm.assert_produces_warning(FutureWarning): - sc.drop_duplicates(take_last=True, inplace=True) - assert_series_equal(sc, s[~expected]) - expected = Series([False, False, True, True]) assert_series_equal(s.duplicated(keep=False), expected) assert_series_equal(s.drop_duplicates(keep=False), s[~expected]) @@ -951,17 +940,6 @@ def test_drop_duplicates(self): sc.drop_duplicates(keep='last', inplace=True) assert_series_equal(sc, s[~expected]) - # deprecate take_last - with tm.assert_produces_warning(FutureWarning): - assert_series_equal(s.duplicated(take_last=True), expected) - with tm.assert_produces_warning(FutureWarning): - assert_series_equal( - s.drop_duplicates(take_last=True), s[~expected]) - sc = s.copy() - with tm.assert_produces_warning(FutureWarning): - sc.drop_duplicates(take_last=True, inplace=True) - assert_series_equal(sc, s[~expected]) - expected = Series([False, True, True, False, True, True, False]) assert_series_equal(s.duplicated(keep=False), expected) assert_series_equal(s.drop_duplicates(keep=False), s[~expected]) @@ -1443,18 +1421,7 @@ def test_nsmallest_nlargest(self): for s in s_list: assert_series_equal(s.nsmallest(2), s.iloc[[2, 1]]) - assert_series_equal(s.nsmallest(2, keep='last'), s.iloc[[2, 3]]) - with tm.assert_produces_warning(FutureWarning): - assert_series_equal( - s.nsmallest(2, take_last=True), s.iloc[[2, 3]]) - - assert_series_equal(s.nlargest(3), s.iloc[[4, 0, 1]]) - - assert_series_equal(s.nlargest(3, keep='last'), s.iloc[[4, 0, 3]]) - with tm.assert_produces_warning(FutureWarning): - assert_series_equal( - s.nlargest(3, take_last=True), s.iloc[[4, 0, 3]]) empty = s.iloc[0:0] assert_series_equal(s.nsmallest(0), empty) diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 1d4dddf6477df..68db0d19344b9 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -816,15 +816,6 @@ def test_duplicated_drop_duplicates_index(self): result = idx.drop_duplicates(keep='last') tm.assert_index_equal(result, idx[~expected]) - # deprecate take_last - with tm.assert_produces_warning(FutureWarning): - duplicated = idx.duplicated(take_last=True) - tm.assert_numpy_array_equal(duplicated, expected) - self.assertTrue(duplicated.dtype == bool) - with tm.assert_produces_warning(FutureWarning): - result = idx.drop_duplicates(take_last=True) - tm.assert_index_equal(result, idx[~expected]) - base = [False] * len(original) + [True, True] base[3] = True base[5] = True @@ -867,13 +858,6 @@ def test_duplicated_drop_duplicates_index(self): tm.assert_series_equal(s.drop_duplicates(keep='last'), s[~np.array(base)]) - # deprecate take_last - with tm.assert_produces_warning(FutureWarning): - tm.assert_series_equal( - s.duplicated(take_last=True), expected) - with tm.assert_produces_warning(FutureWarning): - tm.assert_series_equal(s.drop_duplicates(take_last=True), - s[~np.array(base)]) base = [False] * len(original) + [True, True] base[3] = True base[5] = True diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index d7b115d808312..fd5421abc89ad 100755 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -2037,17 +2037,6 @@ def test_duplicated_drop_duplicates(self): expected = MultiIndex.from_arrays(([2, 3, 2, 3], [1, 1, 2, 2])) tm.assert_index_equal(idx.drop_duplicates(keep=False), expected) - # deprecate take_last - expected = np.array([True, False, False, False, False, False]) - with tm.assert_produces_warning(FutureWarning): - duplicated = idx.duplicated(take_last=True) - tm.assert_numpy_array_equal(duplicated, expected) - self.assertTrue(duplicated.dtype == bool) - expected = MultiIndex.from_arrays(([2, 3, 1, 2, 3], [1, 1, 1, 2, 2])) - with tm.assert_produces_warning(FutureWarning): - tm.assert_index_equal( - idx.drop_duplicates(take_last=True), expected) - def test_multiindex_set_index(self): # segfault in #3308 d = {'t1': [2, 2.5, 3], 't2': [4, 5, 6]} diff --git a/vb_suite/series_methods.py b/vb_suite/series_methods.py index cd8688495fa09..c545f419c2dec 100644 --- a/vb_suite/series_methods.py +++ b/vb_suite/series_methods.py @@ -12,22 +12,22 @@ s4 = s3.astype('object') """ -series_nlargest1 = Benchmark('s1.nlargest(3, take_last=True);' - 's1.nlargest(3, take_last=False)', +series_nlargest1 = Benchmark("s1.nlargest(3, keep='last');" + "s1.nlargest(3, keep='first')", setup, start_date=datetime(2014, 1, 25)) -series_nlargest2 = Benchmark('s2.nlargest(3, take_last=True);' - 's2.nlargest(3, take_last=False)', +series_nlargest2 = Benchmark("s2.nlargest(3, keep='last');" + "s2.nlargest(3, keep='first')", setup, start_date=datetime(2014, 1, 25)) -series_nsmallest2 = Benchmark('s1.nsmallest(3, take_last=True);' - 's1.nsmallest(3, take_last=False)', +series_nsmallest2 = Benchmark("s1.nsmallest(3, keep='last');" + "s1.nsmallest(3, keep='first')", setup, start_date=datetime(2014, 1, 25)) -series_nsmallest2 = Benchmark('s2.nsmallest(3, take_last=True);' - 's2.nsmallest(3, take_last=False)', +series_nsmallest2 = Benchmark("s2.nsmallest(3, keep='last');" + "s2.nsmallest(3, keep='first')", setup, start_date=datetime(2014, 1, 25)) From ee19222c98175a99ec47b1359973141bb9f1dc50 Mon Sep 17 00:00:00 2001 From: Jaehoon Hwang Date: Fri, 17 Mar 2017 21:55:38 -0400 Subject: [PATCH 233/353] TST: move pandas/tests/io/test_date_converters.py to pandas/tests/io/parsers/parse_dates.py closes #15519 Author: Jaehoon Hwang Closes #15707 from jaehoonhwang/TST15519 and squashes the following commits: 0b309d3 [Jaehoon Hwang] Fixed frame email and PEP8 ef6e8fa [Jaehoon Hwang] Fixing up few lines and imports e019e95 [Jaehoon Hwang] Imported read_table and using self.readcsv 3eb63c5 [Jaehoon Hwang] TST15519 Moving Unit tests to appropriate file 9b20caa [Jaehoon Hwang] Merge remote-tracking branch 'pandas-dev/master' b977615 [Jaehoon Hwang] Merge remote-tracking branch 'pandas-dev/master' --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/tests/io/parser/parse_dates.py | 148 ++++++++++++++++++++++- pandas/tests/io/test_date_converters.py | 150 ------------------------ 3 files changed, 147 insertions(+), 152 deletions(-) delete mode 100644 pandas/tests/io/test_date_converters.py diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 9cf53300f8cca..4949b68d46723 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -723,6 +723,7 @@ Other API Changes - ``pandas.api.types.is_datetime64_ns_dtype`` will now report ``True`` on a tz-aware dtype, similar to ``pandas.api.types.is_datetime64_any_dtype`` - ``DataFrame.asof()`` will return a null filled ``Series`` instead the scalar ``NaN`` if a match is not found (:issue:`15118`) - Reorganization of timeseries development tests (:issue:`14854`) +- Reorganization of date converter tests (:issue:`15707`) - Specific support for ``copy.copy()`` and ``copy.deepcopy()`` functions on NDFrame objects (:issue:`15444`) - ``Series.sort_values()`` accepts a one element list of bool for consistency with the behavior of ``DataFrame.sort_values()`` (:issue:`15604`) - ``.merge()`` and ``.join()`` on ``category`` dtype columns will now preserve the category dtype when possible (:issue:`10409`) diff --git a/pandas/tests/io/parser/parse_dates.py b/pandas/tests/io/parser/parse_dates.py index 4cba9276a9d1e..de4e3fbc0d943 100644 --- a/pandas/tests/io/parser/parse_dates.py +++ b/pandas/tests/io/parser/parse_dates.py @@ -6,7 +6,7 @@ """ from distutils.version import LooseVersion -from datetime import datetime +from datetime import datetime, date import pytest import numpy as np @@ -19,9 +19,10 @@ import pandas.util.testing as tm import pandas.io.date_converters as conv -from pandas import DataFrame, Series, Index, DatetimeIndex +from pandas import DataFrame, Series, Index, DatetimeIndex, MultiIndex from pandas import compat from pandas.compat import parse_date, StringIO, lrange +from pandas.compat.numpy import np_array_datetime64_compat from pandas.tseries.index import date_range @@ -510,3 +511,146 @@ def test_parse_date_time_multi_level_column_name(self): expected = DataFrame(expected_data, columns=['date_time', ('A', 'a'), ('B', 'b')]) tm.assert_frame_equal(result, expected) + + def test_parse_date_time(self): + dates = np.array(['2007/1/3', '2008/2/4'], dtype=object) + times = np.array(['05:07:09', '06:08:00'], dtype=object) + expected = np.array([datetime(2007, 1, 3, 5, 7, 9), + datetime(2008, 2, 4, 6, 8, 0)]) + + result = conv.parse_date_time(dates, times) + self.assertTrue((result == expected).all()) + + data = """\ +date, time, a, b +2001-01-05, 10:00:00, 0.0, 10. +2001-01-05, 00:00:00, 1., 11. +""" + datecols = {'date_time': [0, 1]} + df = self.read_csv(StringIO(data), sep=',', header=0, + parse_dates=datecols, + date_parser=conv.parse_date_time) + self.assertIn('date_time', df) + self.assertEqual(df.date_time.loc[0], datetime(2001, 1, 5, 10, 0, 0)) + + data = ("KORD,19990127, 19:00:00, 18:56:00, 0.8100\n" + "KORD,19990127, 20:00:00, 19:56:00, 0.0100\n" + "KORD,19990127, 21:00:00, 20:56:00, -0.5900\n" + "KORD,19990127, 21:00:00, 21:18:00, -0.9900\n" + "KORD,19990127, 22:00:00, 21:56:00, -0.5900\n" + "KORD,19990127, 23:00:00, 22:56:00, -0.5900") + + date_spec = {'nominal': [1, 2], 'actual': [1, 3]} + df = self.read_csv(StringIO(data), header=None, parse_dates=date_spec, + date_parser=conv.parse_date_time) + + def test_parse_date_fields(self): + years = np.array([2007, 2008]) + months = np.array([1, 2]) + days = np.array([3, 4]) + result = conv.parse_date_fields(years, months, days) + expected = np.array([datetime(2007, 1, 3), datetime(2008, 2, 4)]) + self.assertTrue((result == expected).all()) + + data = ("year, month, day, a\n 2001 , 01 , 10 , 10.\n" + "2001 , 02 , 1 , 11.") + datecols = {'ymd': [0, 1, 2]} + df = self.read_csv(StringIO(data), sep=',', header=0, + parse_dates=datecols, + date_parser=conv.parse_date_fields) + self.assertIn('ymd', df) + self.assertEqual(df.ymd.loc[0], datetime(2001, 1, 10)) + + def test_datetime_six_col(self): + years = np.array([2007, 2008]) + months = np.array([1, 2]) + days = np.array([3, 4]) + hours = np.array([5, 6]) + minutes = np.array([7, 8]) + seconds = np.array([9, 0]) + expected = np.array([datetime(2007, 1, 3, 5, 7, 9), + datetime(2008, 2, 4, 6, 8, 0)]) + + result = conv.parse_all_fields(years, months, days, + hours, minutes, seconds) + + self.assertTrue((result == expected).all()) + + data = """\ +year, month, day, hour, minute, second, a, b +2001, 01, 05, 10, 00, 0, 0.0, 10. +2001, 01, 5, 10, 0, 00, 1., 11. +""" + datecols = {'ymdHMS': [0, 1, 2, 3, 4, 5]} + df = self.read_csv(StringIO(data), sep=',', header=0, + parse_dates=datecols, + date_parser=conv.parse_all_fields) + self.assertIn('ymdHMS', df) + self.assertEqual(df.ymdHMS.loc[0], datetime(2001, 1, 5, 10, 0, 0)) + + def test_datetime_fractional_seconds(self): + data = """\ +year, month, day, hour, minute, second, a, b +2001, 01, 05, 10, 00, 0.123456, 0.0, 10. +2001, 01, 5, 10, 0, 0.500000, 1., 11. +""" + datecols = {'ymdHMS': [0, 1, 2, 3, 4, 5]} + df = self.read_csv(StringIO(data), sep=',', header=0, + parse_dates=datecols, + date_parser=conv.parse_all_fields) + self.assertIn('ymdHMS', df) + self.assertEqual(df.ymdHMS.loc[0], datetime(2001, 1, 5, 10, 0, 0, + microsecond=123456)) + self.assertEqual(df.ymdHMS.loc[1], datetime(2001, 1, 5, 10, 0, 0, + microsecond=500000)) + + def test_generic(self): + data = "year, month, day, a\n 2001, 01, 10, 10.\n 2001, 02, 1, 11." + datecols = {'ym': [0, 1]} + dateconverter = lambda y, m: date(year=int(y), month=int(m), day=1) + df = self.read_csv(StringIO(data), sep=',', header=0, + parse_dates=datecols, + date_parser=dateconverter) + self.assertIn('ym', df) + self.assertEqual(df.ym.loc[0], date(2001, 1, 1)) + + def test_dateparser_resolution_if_not_ns(self): + # GH 10245 + data = """\ +date,time,prn,rxstatus +2013-11-03,19:00:00,126,00E80000 +2013-11-03,19:00:00,23,00E80000 +2013-11-03,19:00:00,13,00E80000 +""" + + def date_parser(date, time): + datetime = np_array_datetime64_compat( + date + 'T' + time + 'Z', dtype='datetime64[s]') + return datetime + + df = self.read_csv(StringIO(data), date_parser=date_parser, + parse_dates={'datetime': ['date', 'time']}, + index_col=['datetime', 'prn']) + + datetimes = np_array_datetime64_compat(['2013-11-03T19:00:00Z'] * 3, + dtype='datetime64[s]') + df_correct = DataFrame(data={'rxstatus': ['00E80000'] * 3}, + index=MultiIndex.from_tuples( + [(datetimes[0], 126), + (datetimes[1], 23), + (datetimes[2], 13)], + names=['datetime', 'prn'])) + tm.assert_frame_equal(df, df_correct) + + def test_parse_date_column_with_empty_string(self): + # GH 6428 + data = """case,opdate + 7,10/18/2006 + 7,10/18/2008 + 621, """ + result = self.read_csv(StringIO(data), parse_dates=['opdate']) + expected_data = [[7, '10/18/2006'], + [7, '10/18/2008'], + [621, ' ']] + expected = DataFrame(expected_data, columns=['case', 'opdate']) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/test_date_converters.py b/pandas/tests/io/test_date_converters.py deleted file mode 100644 index 5b54925c65fbd..0000000000000 --- a/pandas/tests/io/test_date_converters.py +++ /dev/null @@ -1,150 +0,0 @@ -from pandas.compat import StringIO -from datetime import date, datetime - -import numpy as np - -from pandas import DataFrame, MultiIndex -from pandas.io.parsers import (read_csv, read_table) -from pandas.util.testing import assert_frame_equal -import pandas.io.date_converters as conv -import pandas.util.testing as tm -from pandas.compat.numpy import np_array_datetime64_compat - - -class TestConverters(tm.TestCase): - - def setUp(self): - self.years = np.array([2007, 2008]) - self.months = np.array([1, 2]) - self.days = np.array([3, 4]) - self.hours = np.array([5, 6]) - self.minutes = np.array([7, 8]) - self.seconds = np.array([9, 0]) - self.dates = np.array(['2007/1/3', '2008/2/4'], dtype=object) - self.times = np.array(['05:07:09', '06:08:00'], dtype=object) - self.expected = np.array([datetime(2007, 1, 3, 5, 7, 9), - datetime(2008, 2, 4, 6, 8, 0)]) - - def test_parse_date_time(self): - result = conv.parse_date_time(self.dates, self.times) - self.assertTrue((result == self.expected).all()) - - data = """\ -date, time, a, b -2001-01-05, 10:00:00, 0.0, 10. -2001-01-05, 00:00:00, 1., 11. -""" - datecols = {'date_time': [0, 1]} - df = read_table(StringIO(data), sep=',', header=0, - parse_dates=datecols, date_parser=conv.parse_date_time) - self.assertIn('date_time', df) - self.assertEqual(df.date_time.loc[0], datetime(2001, 1, 5, 10, 0, 0)) - - data = ("KORD,19990127, 19:00:00, 18:56:00, 0.8100\n" - "KORD,19990127, 20:00:00, 19:56:00, 0.0100\n" - "KORD,19990127, 21:00:00, 20:56:00, -0.5900\n" - "KORD,19990127, 21:00:00, 21:18:00, -0.9900\n" - "KORD,19990127, 22:00:00, 21:56:00, -0.5900\n" - "KORD,19990127, 23:00:00, 22:56:00, -0.5900") - - date_spec = {'nominal': [1, 2], 'actual': [1, 3]} - df = read_csv(StringIO(data), header=None, parse_dates=date_spec, - date_parser=conv.parse_date_time) - - def test_parse_date_fields(self): - result = conv.parse_date_fields(self.years, self.months, self.days) - expected = np.array([datetime(2007, 1, 3), datetime(2008, 2, 4)]) - self.assertTrue((result == expected).all()) - - data = ("year, month, day, a\n 2001 , 01 , 10 , 10.\n" - "2001 , 02 , 1 , 11.") - datecols = {'ymd': [0, 1, 2]} - df = read_table(StringIO(data), sep=',', header=0, - parse_dates=datecols, - date_parser=conv.parse_date_fields) - self.assertIn('ymd', df) - self.assertEqual(df.ymd.loc[0], datetime(2001, 1, 10)) - - def test_datetime_six_col(self): - result = conv.parse_all_fields(self.years, self.months, self.days, - self.hours, self.minutes, self.seconds) - self.assertTrue((result == self.expected).all()) - - data = """\ -year, month, day, hour, minute, second, a, b -2001, 01, 05, 10, 00, 0, 0.0, 10. -2001, 01, 5, 10, 0, 00, 1., 11. -""" - datecols = {'ymdHMS': [0, 1, 2, 3, 4, 5]} - df = read_table(StringIO(data), sep=',', header=0, - parse_dates=datecols, - date_parser=conv.parse_all_fields) - self.assertIn('ymdHMS', df) - self.assertEqual(df.ymdHMS.loc[0], datetime(2001, 1, 5, 10, 0, 0)) - - def test_datetime_fractional_seconds(self): - data = """\ -year, month, day, hour, minute, second, a, b -2001, 01, 05, 10, 00, 0.123456, 0.0, 10. -2001, 01, 5, 10, 0, 0.500000, 1., 11. -""" - datecols = {'ymdHMS': [0, 1, 2, 3, 4, 5]} - df = read_table(StringIO(data), sep=',', header=0, - parse_dates=datecols, - date_parser=conv.parse_all_fields) - self.assertIn('ymdHMS', df) - self.assertEqual(df.ymdHMS.loc[0], datetime(2001, 1, 5, 10, 0, 0, - microsecond=123456)) - self.assertEqual(df.ymdHMS.loc[1], datetime(2001, 1, 5, 10, 0, 0, - microsecond=500000)) - - def test_generic(self): - data = "year, month, day, a\n 2001, 01, 10, 10.\n 2001, 02, 1, 11." - datecols = {'ym': [0, 1]} - dateconverter = lambda y, m: date(year=int(y), month=int(m), day=1) - df = read_table(StringIO(data), sep=',', header=0, - parse_dates=datecols, - date_parser=dateconverter) - self.assertIn('ym', df) - self.assertEqual(df.ym.loc[0], date(2001, 1, 1)) - - def test_dateparser_resolution_if_not_ns(self): - # issue 10245 - data = """\ -date,time,prn,rxstatus -2013-11-03,19:00:00,126,00E80000 -2013-11-03,19:00:00,23,00E80000 -2013-11-03,19:00:00,13,00E80000 -""" - - def date_parser(date, time): - datetime = np_array_datetime64_compat( - date + 'T' + time + 'Z', dtype='datetime64[s]') - return datetime - - df = read_csv(StringIO(data), date_parser=date_parser, - parse_dates={'datetime': ['date', 'time']}, - index_col=['datetime', 'prn']) - - datetimes = np_array_datetime64_compat(['2013-11-03T19:00:00Z'] * 3, - dtype='datetime64[s]') - df_correct = DataFrame(data={'rxstatus': ['00E80000'] * 3}, - index=MultiIndex.from_tuples( - [(datetimes[0], 126), - (datetimes[1], 23), - (datetimes[2], 13)], - names=['datetime', 'prn'])) - assert_frame_equal(df, df_correct) - - def test_parse_date_column_with_empty_string(self): - # GH 6428 - data = """case,opdate - 7,10/18/2006 - 7,10/18/2008 - 621, """ - result = read_csv(StringIO(data), parse_dates=['opdate']) - expected_data = [[7, '10/18/2006'], - [7, '10/18/2008'], - [621, ' ']] - expected = DataFrame(expected_data, columns=['case', 'opdate']) - assert_frame_equal(result, expected) From 492b8f7cd652267a1aeab6485abd354930db95d2 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 17 Mar 2017 22:51:36 -0400 Subject: [PATCH 234/353] CI: install nomkl to speed building (#15728) CI: use cache on all builds --- .travis.yml | 17 +++++++++-------- ci/install_travis.sh | 2 +- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/.travis.yml b/.travis.yml index c1419dd0c5d3b..cafe46059e6c0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -74,7 +74,7 @@ matrix: - CLIPBOARD=xsel - COVERAGE=true - CACHE_NAME="35_nslow" -# - USE_CACHE=true # Don't use cache for 35_nslow + - USE_CACHE=true addons: apt: packages: @@ -86,6 +86,7 @@ matrix: - TEST_ARGS="--skip-slow --skip-network" - PANDAS_TESTING_MODE="deprecate" - CONDA_FORGE=true + - USE_CACHE=true addons: apt: packages: @@ -154,13 +155,13 @@ matrix: - USE_CACHE=true - python: 3.5 env: - - PYTHON_VERSION=3.5 - - JOB_NAME: "35_numpy_dev" - - JOB_TAG=_NUMPY_DEV - - TEST_ARGS="--skip-slow --skip-network" - - PANDAS_TESTING_MODE="deprecate" - - CACHE_NAME="35_numpy_dev" - - USE_CACHE=true + - PYTHON_VERSION=3.5 + - JOB_NAME: "35_numpy_dev" + - JOB_TAG=_NUMPY_DEV + - TEST_ARGS="--skip-slow --skip-network" + - PANDAS_TESTING_MODE="deprecate" + - CACHE_NAME="35_numpy_dev" + - USE_CACHE=true - python: 3.5 env: - PYTHON_VERSION=3.5 diff --git a/ci/install_travis.sh b/ci/install_travis.sh index 610e6255e6832..de3b3fb6a464e 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -99,7 +99,7 @@ if [ -e ${INSTALL} ]; then else # create new env # this may already exists, in which case our caching worked - time conda create -n pandas python=$PYTHON_VERSION pytest + time conda create -n pandas python=$PYTHON_VERSION pytest nomkl fi # build deps From be2dad17a7e0c39ecb7ed03ed0384856b018bdc3 Mon Sep 17 00:00:00 2001 From: "Christopher C. Aycock" Date: Fri, 17 Mar 2017 23:32:46 -0400 Subject: [PATCH 235/353] DOC: Fix typos in merge_asof() docstring (#15729) --- pandas/tools/merge.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index 261884bba54bd..60d523a8ea539 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -295,7 +295,7 @@ def merge_asof(left, right, on=None, - A "nearest" search selects the row in the right DataFrame whose 'on' key is closest in absolute distance to the left's key. - The default is "backward" and is the compatible in versions below 0.20.0. + The default is "backward" and is compatible in versions below 0.20.0. The direction parameter was added in version 0.20.0 and introduces "forward" and "nearest". @@ -340,13 +340,13 @@ def merge_asof(left, right, on=None, suffixes : 2-length sequence (tuple, list, ...) Suffix to apply to overlapping column names in the left and right - side, respectively + side, respectively. tolerance : integer or Timedelta, optional, default None - select asof tolerance within this range; must be compatible - to the merge index. + Select asof tolerance within this range; must be compatible + with the merge index. allow_exact_matches : boolean, default True - - If True, allow matching the same 'on' value + - If True, allow matching with the same 'on' value (i.e. less-than-or-equal-to / greater-than-or-equal-to) - If False, don't match the same 'on' value (i.e., stricly less-than / strictly greater-than) From 6a52c15a4ac7b2228e7f8ca45412cacfe301b040 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 18 Mar 2017 12:01:19 -0400 Subject: [PATCH 236/353] TST: move conftest.py to top-level (#15731) --- pandas/conftest.py => conftest.py | 0 pandas/tests/api/test_api.py | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename pandas/conftest.py => conftest.py (100%) diff --git a/pandas/conftest.py b/conftest.py similarity index 100% rename from pandas/conftest.py rename to conftest.py diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 73222c246fc70..2972427f1b245 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -29,7 +29,7 @@ class TestPDApi(Base, tm.TestCase): # these are optionally imported based on testing # & need to be ignored - ignored = ['tests', 'locale', 'conftest'] + ignored = ['tests', 'locale'] # top-level sub-packages lib = ['api', 'compat', 'computation', 'core', From fe8420ae1a108b6ad3fc14209f8bf7623bb5016f Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 18 Mar 2017 12:02:09 -0400 Subject: [PATCH 237/353] CI: remove 3.5 appveyor build (#15730) --- appveyor.yml | 7 ------- ci/requirements-3.5-64.run | 13 ------------- 2 files changed, 20 deletions(-) delete mode 100644 ci/requirements-3.5-64.run diff --git a/appveyor.yml b/appveyor.yml index 1c14698430996..5d748ddf1a108 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -30,13 +30,6 @@ environment: CONDA_PY: "27" CONDA_NPY: "110" - - CONDA_ROOT: "C:\\Miniconda3_64" - PYTHON_VERSION: "3.5" - PYTHON_ARCH: "64" - CONDA_PY: "35" - CONDA_NPY: "111" - - # We always use a 64-bit machine, but can build x86 distributions # with the PYTHON_ARCH variable (which is used by CMD_IN_ENV). platform: diff --git a/ci/requirements-3.5-64.run b/ci/requirements-3.5-64.run deleted file mode 100644 index ad66f578d702a..0000000000000 --- a/ci/requirements-3.5-64.run +++ /dev/null @@ -1,13 +0,0 @@ -python-dateutil -pytz -numpy=1.11* -openpyxl -xlsxwriter -xlrd -xlwt -scipy -feather-format -numexpr -pytables -matplotlib -blosc From 63334122acccee705d834e05f394eb38e37f6392 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 18 Mar 2017 12:50:26 -0400 Subject: [PATCH 238/353] CI: turn on cache for osx (#15733) --- .travis.yml | 1 + ci/submit_cython_cache.sh | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index cafe46059e6c0..88e1655363a4e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -28,6 +28,7 @@ matrix: os: osx compiler: clang osx_image: xcode6.4 + cache: ccache env: - PYTHON_VERSION=3.5 - JOB_NAME: "35_osx" diff --git a/ci/submit_cython_cache.sh b/ci/submit_cython_cache.sh index cfbced4988357..b87acef0ba11c 100755 --- a/ci/submit_cython_cache.sh +++ b/ci/submit_cython_cache.sh @@ -9,7 +9,7 @@ rm -rf $PYX_CACHE_DIR home_dir=$(pwd) -mkdir $PYX_CACHE_DIR +mkdir -p $PYX_CACHE_DIR rsync -Rv $pyx_file_list $PYX_CACHE_DIR echo "pyx files:" From 59b88ab6123338e1dfcc0f77dfb3e5c4c511889a Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 18 Mar 2017 16:44:46 -0400 Subject: [PATCH 239/353] Revert "TST: move conftest.py to top-level (#15731)" This reverts commit 6a52c15a4ac7b2228e7f8ca45412cacfe301b040. --- conftest.py => pandas/conftest.py | 0 pandas/tests/api/test_api.py | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename conftest.py => pandas/conftest.py (100%) diff --git a/conftest.py b/pandas/conftest.py similarity index 100% rename from conftest.py rename to pandas/conftest.py diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 2972427f1b245..73222c246fc70 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -29,7 +29,7 @@ class TestPDApi(Base, tm.TestCase): # these are optionally imported based on testing # & need to be ignored - ignored = ['tests', 'locale'] + ignored = ['tests', 'locale', 'conftest'] # top-level sub-packages lib = ['api', 'compat', 'computation', 'core', From 5f4a5b4fba87c96a583dda57cff864dea7333759 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 18 Mar 2017 22:00:00 -0400 Subject: [PATCH 240/353] TST: clean up build testing Author: Jeff Reback Closes #15734 from jreback/build and squashes the following commits: a99b713 [Jeff Reback] suppress the import json warning when generating _version ed7c526 [Jeff Reback] modify install tests 1cc5b67 [Jeff Reback] TST: have the build test exercise pandas.test() --- .travis.yml | 9 ++++---- ci/install_test.sh | 17 --------------- ci/install_travis.sh | 52 +++++++++++++++++++++++++------------------- ci/script_multi.sh | 3 ++- versioneer.py | 4 +++- 5 files changed, 39 insertions(+), 46 deletions(-) delete mode 100755 ci/install_test.sh diff --git a/.travis.yml b/.travis.yml index 88e1655363a4e..705b2380ac697 100644 --- a/.travis.yml +++ b/.travis.yml @@ -33,7 +33,6 @@ matrix: - PYTHON_VERSION=3.5 - JOB_NAME: "35_osx" - TEST_ARGS="--skip-slow --skip-network" - - BUILD_TYPE=conda - JOB_TAG=_OSX - TRAVIS_PYTHON_VERSION=3.5 - CACHE_NAME="35_osx" @@ -107,12 +106,12 @@ matrix: - python: 2.7 env: - PYTHON_VERSION=2.7 - - JOB_NAME: "27_build_test_conda" + - JOB_NAME: "27_build_test" - JOB_TAG=_BUILD_TEST - TEST_ARGS="--skip-slow" - FULL_DEPS=true - BUILD_TEST=true - - CACHE_NAME="27_build_test_conda" + - CACHE_NAME="27_build_test" - USE_CACHE=true # In allow_failures - python: 3.5 @@ -147,12 +146,12 @@ matrix: - python: 2.7 env: - PYTHON_VERSION=2.7 - - JOB_NAME: "27_build_test_conda" + - JOB_NAME: "27_build_test" - JOB_TAG=_BUILD_TEST - TEST_ARGS="--skip-slow" - FULL_DEPS=true - BUILD_TEST=true - - CACHE_NAME="27_build_test_conda" + - CACHE_NAME="27_build_test" - USE_CACHE=true - python: 3.5 env: diff --git a/ci/install_test.sh b/ci/install_test.sh deleted file mode 100755 index 9ace633d7f39d..0000000000000 --- a/ci/install_test.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash - -echo "inside $0" - -if [ "$INSTALL_TEST" ]; then - source activate pandas - echo "Starting installation test." - conda uninstall cython || exit 1 - python "$TRAVIS_BUILD_DIR"/setup.py sdist --formats=zip,gztar || exit 1 - pip install "$TRAVIS_BUILD_DIR"/dist/*tar.gz || exit 1 - pytest pandas/tests/test_series.py --junitxml=/tmp/pytest_install.xml -else - echo "Skipping installation test." -fi -RET="$?" - -exit "$RET" diff --git a/ci/install_travis.sh b/ci/install_travis.sh index de3b3fb6a464e..053a2d15a287c 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -131,10 +131,13 @@ fi if [ "$BUILD_TEST" ]; then - # build testing - pip uninstall --yes cython - pip install cython==0.23 - ( python setup.py build_ext --inplace && python setup.py develop ) || true + # build & install testing + echo ["Starting installation test."] + python setup.py clean + python setup.py build_ext --inplace + python setup.py sdist --formats=gztar + conda uninstall cython + pip install dist/*tar.gz || exit 1 else @@ -142,26 +145,31 @@ else echo "[build em]" time python setup.py build_ext --inplace || exit 1 - # we may have run installations - echo "[conda installs]" - REQ="ci/requirements-${PYTHON_VERSION}${JOB_TAG}.run" - if [ -e ${REQ} ]; then - time conda install -n pandas --file=${REQ} || exit 1 - fi +fi - # we may have additional pip installs - echo "[pip installs]" - REQ="ci/requirements-${PYTHON_VERSION}${JOB_TAG}.pip" - if [ -e ${REQ} ]; then - pip install -r $REQ - fi +# we may have run installations +echo "[conda installs]" +REQ="ci/requirements-${PYTHON_VERSION}${JOB_TAG}.run" +if [ -e ${REQ} ]; then + time conda install -n pandas --file=${REQ} || exit 1 +fi - # may have addtl installation instructions for this build - echo "[addtl installs]" - REQ="ci/requirements-${PYTHON_VERSION}${JOB_TAG}.sh" - if [ -e ${REQ} ]; then - time bash $REQ || exit 1 - fi +# we may have additional pip installs +echo "[pip installs]" +REQ="ci/requirements-${PYTHON_VERSION}${JOB_TAG}.pip" +if [ -e ${REQ} ]; then + pip install -r $REQ +fi + +# may have addtl installation instructions for this build +echo "[addtl installs]" +REQ="ci/requirements-${PYTHON_VERSION}${JOB_TAG}.sh" +if [ -e ${REQ} ]; then + time bash $REQ || exit 1 +fi + +# finish install if we are not doing a build-testk +if [ -z "$BUILD_TEST" ]; then # remove any installed pandas package # w/o removing anything else diff --git a/ci/script_multi.sh b/ci/script_multi.sh index 41f71fd21f63f..2d1211b2f7b96 100755 --- a/ci/script_multi.sh +++ b/ci/script_multi.sh @@ -24,7 +24,8 @@ export PYTHONHASHSEED=$(python -c 'import random; print(random.randint(1, 429496 echo PYTHONHASHSEED=$PYTHONHASHSEED if [ "$BUILD_TEST" ]; then - echo "We are not running pytest as this is simply a build test." + cd /tmp + python -c "import pandas; pandas.test(['-n 2'])" elif [ "$COVERAGE" ]; then echo pytest -s -n 2 -m "not single" --cov=pandas --cov-append --cov-report xml:/tmp/cov.xml --junitxml=/tmp/multiple.xml $TEST_ARGS pandas pytest -s -n 2 -m "not single" --cov=pandas --cov-append --cov-report xml:/tmp/cov.xml --junitxml=/tmp/multiple.xml $TEST_ARGS pandas diff --git a/versioneer.py b/versioneer.py index c010f63e3ead8..104e8e97c6bd6 100644 --- a/versioneer.py +++ b/versioneer.py @@ -1130,7 +1130,9 @@ def versions_from_parentdir(parentdir_prefix, root, verbose): # unpacked source archive. Distribution tarballs contain a pre-generated copy # of this file. -import json +from warnings import catch_warnings +with catch_warnings(record=True): + import json import sys version_json = ''' From 9ab57dc522a41d42cb230272a3a0df0ad8a7eb27 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sat, 18 Mar 2017 22:02:20 -0400 Subject: [PATCH 241/353] MAINT: Drop order and sort from pandas objects (#15735) Affect classes: 1) Index 2) Series 2) DataFrame xref gh-10726 --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/frame.py | 50 -------------------- pandas/core/series.py | 71 ---------------------------- pandas/indexes/base.py | 11 ----- pandas/tests/frame/test_analytics.py | 20 -------- pandas/tests/frame/test_sorting.py | 6 +-- pandas/tests/indexes/common.py | 6 --- pandas/tests/indexes/test_base.py | 15 ------ pandas/tests/series/test_sorting.py | 15 +----- 9 files changed, 4 insertions(+), 191 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 4949b68d46723..680aefc4041fb 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -771,6 +771,7 @@ Removal of prior version deprecations/changes - The deprecated ``DataFrame.iterkv()`` has been removed in favor of ``DataFrame.iteritems()`` (:issue:`10711`) - The ``Categorical`` constructor has dropped the ``name`` parameter (:issue:`10632`) - The ``take_last`` parameter has been dropped from ``duplicated()``, ``drop_duplicates()``, ``nlargest()``, and ``nsmallest()`` methods (:issue:`10236`, :issue:`10792`, :issue:`10920`) +- ``Series``, ``Index``, and ``DataFrame`` have dropped the ``sort`` and ``order`` methods (:issue:`10726`) .. _whatsnew_0200.performance: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3696051b269e3..732d88b47ae2a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3304,56 +3304,6 @@ def trans(v): else: return self._constructor(new_data).__finalize__(self) - def sort(self, columns=None, axis=0, ascending=True, inplace=False, - kind='quicksort', na_position='last', **kwargs): - """ - DEPRECATED: use :meth:`DataFrame.sort_values` - - Sort DataFrame either by labels (along either axis) or by the values in - column(s) - - Parameters - ---------- - columns : object - Column name(s) in frame. Accepts a column name or a list - for a nested sort. A tuple will be interpreted as the - levels of a multi-index. - ascending : boolean or list, default True - Sort ascending vs. descending. Specify list for multiple sort - orders - axis : {0 or 'index', 1 or 'columns'}, default 0 - Sort index/rows versus columns - inplace : boolean, default False - Sort the DataFrame without creating a new instance - kind : {'quicksort', 'mergesort', 'heapsort'}, optional - This option is only applied when sorting on a single column or - label. - na_position : {'first', 'last'} (optional, default='last') - 'first' puts NaNs at the beginning - 'last' puts NaNs at the end - - Examples - -------- - >>> result = df.sort(['A', 'B'], ascending=[1, 0]) - - Returns - ------- - sorted : DataFrame - """ - nv.validate_sort(tuple(), kwargs) - - if columns is None: - warnings.warn("sort(....) is deprecated, use sort_index(.....)", - FutureWarning, stacklevel=2) - return self.sort_index(axis=axis, ascending=ascending, - inplace=inplace) - - warnings.warn("sort(columns=....) is deprecated, use " - "sort_values(by=.....)", FutureWarning, stacklevel=2) - return self.sort_values(by=columns, axis=axis, ascending=ascending, - inplace=inplace, kind=kind, - na_position=na_position) - @Appender(_shared_docs['sort_index'] % _shared_doc_kwargs) def sort_index(self, axis=0, level=None, ascending=True, inplace=False, kind='quicksort', na_position='last', sort_remaining=True, diff --git a/pandas/core/series.py b/pandas/core/series.py index 7ee3b3e8fb519..4c51ced1845fe 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1777,77 +1777,6 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, else: return result.__finalize__(self) - def sort(self, axis=0, ascending=True, kind='quicksort', - na_position='last', inplace=True): - """ - DEPRECATED: use :meth:`Series.sort_values(inplace=True)` for INPLACE - sorting - - Sort values and index labels by value. This is an inplace sort by - default. Series.order is the equivalent but returns a new Series. - - Parameters - ---------- - axis : int (can only be zero) - ascending : boolean, default True - Sort ascending. Passing False sorts descending - kind : {'mergesort', 'quicksort', 'heapsort'}, default 'quicksort' - Choice of sorting algorithm. See np.sort for more - information. 'mergesort' is the only stable algorithm - na_position : {'first', 'last'} (optional, default='last') - 'first' puts NaNs at the beginning - 'last' puts NaNs at the end - inplace : boolean, default True - Do operation in place. - - See Also - -------- - Series.sort_values - """ - warnings.warn("sort is deprecated, use sort_values(inplace=True) for " - "INPLACE sorting", FutureWarning, stacklevel=2) - - return self.sort_values(ascending=ascending, kind=kind, - na_position=na_position, inplace=inplace) - - def order(self, na_last=None, ascending=True, kind='quicksort', - na_position='last', inplace=False): - """ - DEPRECATED: use :meth:`Series.sort_values` - - Sorts Series object, by value, maintaining index-value link. - This will return a new Series by default. Series.sort is the equivalent - but as an inplace method. - - Parameters - ---------- - na_last : boolean (optional, default=True)--DEPRECATED; use na_position - Put NaN's at beginning or end - ascending : boolean, default True - Sort ascending. Passing False sorts descending - kind : {'mergesort', 'quicksort', 'heapsort'}, default 'quicksort' - Choice of sorting algorithm. See np.sort for more - information. 'mergesort' is the only stable algorithm - na_position : {'first', 'last'} (optional, default='last') - 'first' puts NaNs at the beginning - 'last' puts NaNs at the end - inplace : boolean, default False - Do operation in place. - - Returns - ------- - y : Series - - See Also - -------- - Series.sort_values - """ - warnings.warn("order is deprecated, use sort_values(...)", - FutureWarning, stacklevel=2) - - return self.sort_values(ascending=ascending, kind=kind, - na_position=na_position, inplace=inplace) - def argsort(self, axis=0, kind='quicksort', order=None): """ Overrides ndarray.argsort. Argsorts the value, omitting NA/null values, diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index 381e4d5caa8ac..d262ecd818f1d 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -1912,17 +1912,6 @@ def sort_values(self, return_indexer=False, ascending=True): else: return sorted_index - def order(self, return_indexer=False, ascending=True): - """ - Return sorted copy of Index - - DEPRECATED: use :meth:`Index.sort_values` - """ - warnings.warn("order is deprecated, use sort_values(...)", - FutureWarning, stacklevel=2) - return self.sort_values(return_indexer=return_indexer, - ascending=ascending) - def sort(self, *args, **kwargs): raise TypeError("cannot sort an Index object in-place, use " "sort_values instead") diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 4fb1d2222fa06..735d3786e6a54 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -660,26 +660,6 @@ def test_sem(self): self.assertFalse((result < 0).any()) nanops._USE_BOTTLENECK = True - def test_sort_invalid_kwargs(self): - df = DataFrame([1, 2, 3], columns=['a']) - - msg = r"sort\(\) got an unexpected keyword argument 'foo'" - tm.assertRaisesRegexp(TypeError, msg, df.sort, foo=2) - - # Neither of these should raise an error because they - # are explicit keyword arguments in the signature and - # hence should not be swallowed by the kwargs parameter - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - df.sort(axis=1) - - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - df.sort(kind='mergesort') - - msg = "the 'order' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, df.sort, order=2) - def test_skew(self): tm._skip_if_no_scipy() from scipy.stats import skew diff --git a/pandas/tests/frame/test_sorting.py b/pandas/tests/frame/test_sorting.py index 7779afdc47b48..5108fc6080866 100644 --- a/pandas/tests/frame/test_sorting.py +++ b/pandas/tests/frame/test_sorting.py @@ -62,11 +62,7 @@ def test_sort(self): frame = DataFrame(np.arange(16).reshape(4, 4), index=[1, 2, 3, 4], columns=['A', 'B', 'C', 'D']) - # 9816 deprecated - with tm.assert_produces_warning(FutureWarning): - frame.sort(columns='A') - with tm.assert_produces_warning(FutureWarning): - frame.sort() + # see gh-9816 with tm.assert_produces_warning(FutureWarning): frame.sortlevel() diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 3581f894e53a3..b1e6bd7520c69 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -346,12 +346,6 @@ def test_sort(self): for ind in self.indices.values(): self.assertRaises(TypeError, ind.sort) - def test_order(self): - for ind in self.indices.values(): - # 9816 deprecated - with tm.assert_produces_warning(FutureWarning): - ind.order() - def test_mutability(self): for ind in self.indices.values(): if not len(ind): diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 05d3478ab0705..7199a38bb7a80 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1808,21 +1808,6 @@ def setUp(self): def create_index(self): return self.mixedIndex - def test_order(self): - idx = self.create_index() - # 9816 deprecated - if PY36: - with tm.assertRaisesRegexp(TypeError, "'>' not supported"): - with tm.assert_produces_warning(FutureWarning): - idx.order() - elif PY3: - with tm.assertRaisesRegexp(TypeError, "unorderable types"): - with tm.assert_produces_warning(FutureWarning): - idx.order() - else: - with tm.assert_produces_warning(FutureWarning): - idx.order() - def test_argsort(self): idx = self.create_index() if PY36: diff --git a/pandas/tests/series/test_sorting.py b/pandas/tests/series/test_sorting.py index 590a530a847bd..66ecba960ae0b 100644 --- a/pandas/tests/series/test_sorting.py +++ b/pandas/tests/series/test_sorting.py @@ -13,24 +13,13 @@ class TestSeriesSorting(TestData, tm.TestCase): - def test_sort(self): - + def test_sortlevel_deprecated(self): ts = self.ts.copy() - # 9816 deprecated - with tm.assert_produces_warning(FutureWarning): - ts.sort() # sorts inplace - self.assert_series_equal(ts, self.ts.sort_values()) + # see gh-9816 with tm.assert_produces_warning(FutureWarning): ts.sortlevel() - def test_order(self): - - # 9816 deprecated - with tm.assert_produces_warning(FutureWarning): - result = self.ts.order() - self.assert_series_equal(result, self.ts.sort_values()) - def test_sort_values(self): # check indexes are reordered corresponding with the values From bd24926bff3cac204a4d459488a9a64c4e8eece1 Mon Sep 17 00:00:00 2001 From: John Zwinck Date: Mon, 20 Mar 2017 16:13:52 +0800 Subject: [PATCH 242/353] DOC: Fix typo in docstring param name (#15739) --- pandas/tseries/holiday.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tseries/holiday.py b/pandas/tseries/holiday.py index d3d936693c266..9acb52ebe0e9f 100644 --- a/pandas/tseries/holiday.py +++ b/pandas/tseries/holiday.py @@ -365,7 +365,7 @@ def holidays(self, start=None, end=None, return_name=False): ---------- start : starting date, datetime-like, optional end : ending date, datetime-like, optional - return_names : bool, optional + return_name : bool, optional If True, return a series that has dates and holiday names. False will only return a DatetimeIndex of dates. From b1e29dba26ff86b826fe0f866182466ae42c0bc5 Mon Sep 17 00:00:00 2001 From: Pankaj Pandey Date: Mon, 20 Mar 2017 09:44:29 -0400 Subject: [PATCH 243/353] BUG: Fix linux clipboard QApplication() creation closes #14372 A Qt application cannot instantiate multiple `QApplication` instances, so we create a new `QApplication` only when the global `QApplication.instance()` is None. Author: Pankaj Pandey Closes #14815 from pankajp/patch-2 and squashes the following commits: 40d70f9 [Pankaj Pandey] BUG: Fix linux clipboard QApplication() creation --- doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/util/clipboard/clipboards.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 680aefc4041fb..af0d0d7b04475 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -828,7 +828,7 @@ Bug Fixes - Bug in ``pd.read_msgpack()`` in which ``Series`` categoricals were being improperly processed (:issue:`14901`) - Bug in ``Series.ffill()`` with mixed dtypes containing tz-aware datetimes. (:issue:`14956`) - +- Bug in interactions with ``Qt`` when a ``QtApplication`` already exists (:issue:`14372`) - Bug in ``DataFrame.isin`` comparing datetimelike to empty frame (:issue:`15473`) - Bug in ``Series.where()`` and ``DataFrame.where()`` where array-like conditionals were being rejected (:issue:`15414`) diff --git a/pandas/util/clipboard/clipboards.py b/pandas/util/clipboard/clipboards.py index f73f4f191d577..bd5528334168f 100644 --- a/pandas/util/clipboard/clipboards.py +++ b/pandas/util/clipboard/clipboards.py @@ -50,7 +50,8 @@ def init_qt_clipboard(): # $DISPLAY should exist from PyQt4.QtGui import QApplication - app = QApplication([]) + # use the global instance if it exists + app = QApplication.instance() or QApplication([]) def copy_qt(text): cb = app.clipboard() From 8bde21a9f01c4a09d6e305906e81794b45935d5e Mon Sep 17 00:00:00 2001 From: sinhrks Date: Mon, 20 Mar 2017 10:19:18 -0400 Subject: [PATCH 244/353] BUG: replace coerces incorrect dtype closes #12747 Author: sinhrks This patch had conflicts when merged, resolved by Committer: Jeff Reback Closes #12780 from sinhrks/replace_type and squashes the following commits: f9154e8 [sinhrks] remove unnecessary comments 279fdf6 [sinhrks] remove import failure de44877 [sinhrks] BUG: replace coerces incorrect dtype --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/internals.py | 20 +++++++++-- pandas/tests/indexing/test_coercion.py | 50 ++++++++++++++++++++------ pandas/tests/series/test_replace.py | 4 +-- pandas/types/cast.py | 37 ++++++++++++++----- 5 files changed, 88 insertions(+), 24 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index af0d0d7b04475..7c78132232077 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -823,6 +823,7 @@ Bug Fixes - Bug in the display of ``.info()`` where a qualifier (+) would always be displayed with a ``MultiIndex`` that contains only non-strings (:issue:`15245`) +- Bug in ``.replace()`` may result in incorrect dtypes. (:issue:`12747`) - Bug in ``.asfreq()``, where frequency was not set for empty ``Series`` (:issue:`14320`) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 9db01713b05ed..60684a929889b 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -1894,8 +1894,11 @@ def convert(self, *args, **kwargs): blocks.append(newb) else: - values = fn( - self.values.ravel(), **fn_kwargs).reshape(self.values.shape) + values = fn(self.values.ravel(), **fn_kwargs) + try: + values = values.reshape(self.values.shape) + except NotImplementedError: + pass blocks.append(make_block(values, ndim=self.ndim, placement=self.mgr_locs)) @@ -3238,6 +3241,16 @@ def comp(s): return _possibly_compare(values, getattr(s, 'asm8', s), operator.eq) + def _cast_scalar(block, scalar): + dtype, val = _infer_dtype_from_scalar(scalar, pandas_dtype=True) + if not is_dtype_equal(block.dtype, dtype): + dtype = _find_common_type([block.dtype, dtype]) + block = block.astype(dtype) + # use original value + val = scalar + + return block, val + masks = [comp(s) for i, s in enumerate(src_list)] result_blocks = [] @@ -3260,7 +3273,8 @@ def comp(s): # particular block m = masks[i][b.mgr_locs.indexer] if m.any(): - new_rb.extend(b.putmask(m, d, inplace=True)) + b, val = _cast_scalar(b, d) + new_rb.extend(b.putmask(m, val, inplace=True)) else: new_rb.append(b) rb = new_rb diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py index 38f8bb5355a69..df95f563c0832 100644 --- a/pandas/tests/indexing/test_coercion.py +++ b/pandas/tests/indexing/test_coercion.py @@ -1153,12 +1153,27 @@ def setUp(self): self.rep['float64'] = [1.1, 2.2] self.rep['complex128'] = [1 + 1j, 2 + 2j] self.rep['bool'] = [True, False] + self.rep['datetime64[ns]'] = [pd.Timestamp('2011-01-01'), + pd.Timestamp('2011-01-03')] + + for tz in ['UTC', 'US/Eastern']: + # to test tz => different tz replacement + key = 'datetime64[ns, {0}]'.format(tz) + self.rep[key] = [pd.Timestamp('2011-01-01', tz=tz), + pd.Timestamp('2011-01-03', tz=tz)] + + self.rep['timedelta64[ns]'] = [pd.Timedelta('1 day'), + pd.Timedelta('2 day')] def _assert_replace_conversion(self, from_key, to_key, how): index = pd.Index([3, 4], name='xxx') obj = pd.Series(self.rep[from_key], index=index, name='yyy') self.assertEqual(obj.dtype, from_key) + if (from_key.startswith('datetime') and to_key.startswith('datetime')): + # different tz, currently mask_missing raises SystemError + return + if how == 'dict': replacer = dict(zip(self.rep[from_key], self.rep[to_key])) elif how == 'series': @@ -1175,17 +1190,12 @@ def _assert_replace_conversion(self, from_key, to_key, how): pytest.skip("windows platform buggy: {0} -> {1}".format (from_key, to_key)) - if ((from_key == 'float64' and - to_key in ('bool', 'int64')) or - + if ((from_key == 'float64' and to_key in ('bool', 'int64')) or (from_key == 'complex128' and to_key in ('bool', 'int64', 'float64')) or - (from_key == 'int64' and - to_key in ('bool')) or - - # TODO_GH12747 The result must be int? - (from_key == 'bool' and to_key == 'int64')): + # GH12747 The result must be int? + (from_key == 'int64' and to_key in ('bool'))): # buggy on 32-bit if tm.is_platform_32bit(): @@ -1248,13 +1258,31 @@ def test_replace_series_bool(self): self._assert_replace_conversion(from_key, to_key, how='series') def test_replace_series_datetime64(self): - pass + from_key = 'datetime64[ns]' + for to_key in self.rep: + self._assert_replace_conversion(from_key, to_key, how='dict') + + from_key = 'datetime64[ns]' + for to_key in self.rep: + self._assert_replace_conversion(from_key, to_key, how='series') def test_replace_series_datetime64tz(self): - pass + from_key = 'datetime64[ns, US/Eastern]' + for to_key in self.rep: + self._assert_replace_conversion(from_key, to_key, how='dict') + + from_key = 'datetime64[ns, US/Eastern]' + for to_key in self.rep: + self._assert_replace_conversion(from_key, to_key, how='series') def test_replace_series_timedelta64(self): - pass + from_key = 'timedelta64[ns]' + for to_key in self.rep: + self._assert_replace_conversion(from_key, to_key, how='dict') + + from_key = 'timedelta64[ns]' + for to_key in self.rep: + self._assert_replace_conversion(from_key, to_key, how='series') def test_replace_series_period(self): pass diff --git a/pandas/tests/series/test_replace.py b/pandas/tests/series/test_replace.py index 0acd03316339e..f5a25e93cc82d 100644 --- a/pandas/tests/series/test_replace.py +++ b/pandas/tests/series/test_replace.py @@ -132,8 +132,8 @@ def check_replace(to_rep, val, expected): tm.assert_series_equal(expected, r) tm.assert_series_equal(expected, sc) - # should NOT upcast to float - e = pd.Series([0, 1, 2, 3, 4]) + # MUST upcast to float + e = pd.Series([0., 1., 2., 3., 4.]) tr, v = [3], [3.0] check_replace(tr, v, e) diff --git a/pandas/types/cast.py b/pandas/types/cast.py index 1cd55274b9b49..11a837dd21159 100644 --- a/pandas/types/cast.py +++ b/pandas/types/cast.py @@ -21,7 +21,7 @@ _ensure_int32, _ensure_int64, _NS_DTYPE, _TD_DTYPE, _INT64_DTYPE, _POSSIBLY_CAST_DTYPES) -from .dtypes import ExtensionDtype +from .dtypes import ExtensionDtype, DatetimeTZDtype, PeriodDtype from .generic import ABCDatetimeIndex, ABCPeriodIndex, ABCSeries from .missing import isnull, notnull from .inference import is_list_like @@ -312,8 +312,17 @@ def _maybe_promote(dtype, fill_value=np.nan): return dtype, fill_value -def _infer_dtype_from_scalar(val): - """ interpret the dtype from a scalar """ +def _infer_dtype_from_scalar(val, pandas_dtype=False): + """ + interpret the dtype from a scalar + + Parameters + ---------- + pandas_dtype : bool, default False + whether to infer dtype including pandas extension types. + If False, scalar belongs to pandas extension types is inferred as + object + """ dtype = np.object_ @@ -336,13 +345,20 @@ def _infer_dtype_from_scalar(val): dtype = np.object_ - elif isinstance(val, (np.datetime64, - datetime)) and getattr(val, 'tzinfo', None) is None: - val = lib.Timestamp(val).value - dtype = np.dtype('M8[ns]') + elif isinstance(val, (np.datetime64, datetime)): + val = tslib.Timestamp(val) + if val is tslib.NaT or val.tz is None: + dtype = np.dtype('M8[ns]') + else: + if pandas_dtype: + dtype = DatetimeTZDtype(unit='ns', tz=val.tz) + else: + # return datetimetz as object + return np.object_, val + val = val.value elif isinstance(val, (np.timedelta64, timedelta)): - val = lib.Timedelta(val).value + val = tslib.Timedelta(val).value dtype = np.dtype('m8[ns]') elif is_bool(val): @@ -363,6 +379,11 @@ def _infer_dtype_from_scalar(val): elif is_complex(val): dtype = np.complex_ + elif pandas_dtype: + if lib.is_period(val): + dtype = PeriodDtype(freq=val.freq) + val = val.ordinal + return dtype, val From 771e36c32f922c6a0c4a147f08fef32a011d534f Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 20 Mar 2017 13:45:49 -0400 Subject: [PATCH 245/353] BUG: tz aware Timestamp field accessors returns local values (#13303) closes #13303 Previously, calling a date/time attribute with Timestamp that's tz aware (e.g. `Timestamp('...', tz='...').dayofyear`) would return the attribute in UTC instead of the local tz. Author: Matt Roeschke Closes #15740 from mroeschke/fix_13303 and squashes the following commits: b78b333 [Matt Roeschke] BUG: tz aware Timestamp field accessors returns local values (#13303) --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/_libs/tslib.pyx | 10 +- pandas/tests/indexes/datetimes/test_misc.py | 158 ++++++++++---------- pandas/tests/scalar/test_timestamp.py | 26 ++++ 4 files changed, 117 insertions(+), 78 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 7c78132232077..98407aacb993b 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -796,6 +796,7 @@ Bug Fixes ~~~~~~~~~ - Bug in ``Timestamp.replace`` now raises ``TypeError`` when incorrect argument names are given; previously this raised ``ValueError`` (:issue:`15240`) +- Bug in ``Timestamp`` returning UTC based time/date attributes when a timezone was provided (:issue:`13303`) - Bug in ``Index`` power operations with reversed operands (:issue:`14973`) - Bug in ``TimedeltaIndex`` addition where overflow was being allowed without error (:issue:`14816`) - Bug in ``TimedeltaIndex`` raising a ``ValueError`` when boolean indexing with ``loc`` (:issue:`14946`) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 8ee92e9fb900d..055534bbdb7ee 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -1233,7 +1233,10 @@ cdef class _Timestamp(datetime): return datetime.__sub__(self, other) cpdef _get_field(self, field): - out = get_date_field(np.array([self.value], dtype=np.int64), field) + val = self.value + if self.tz is not None and not _is_utc(self.tz): + val = tz_convert_single(self.value, 'UTC', self.tz) + out = get_date_field(np.array([val], dtype=np.int64), field) return int(out[0]) cpdef _get_start_end_field(self, field): @@ -1241,8 +1244,11 @@ cdef class _Timestamp(datetime): 'startingMonth', self.freq.kwds.get( 'month', 12)) if self.freq else 12 freqstr = self.freqstr if self.freq else None + val = self.value + if self.tz is not None and not _is_utc(self.tz): + val = tz_convert_single(self.value, 'UTC', self.tz) out = get_start_end_field( - np.array([self.value], dtype=np.int64), field, freqstr, month_kw) + np.array([val], dtype=np.int64), field, freqstr, month_kw) return out[0] property _repr_base: diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py index 6b0191edbda5a..e99f1d46637c2 100644 --- a/pandas/tests/indexes/datetimes/test_misc.py +++ b/pandas/tests/indexes/datetimes/test_misc.py @@ -172,82 +172,88 @@ def test_normalize(self): class TestDatetime64(tm.TestCase): def test_datetimeindex_accessors(self): - dti = DatetimeIndex(freq='D', start=datetime(1998, 1, 1), periods=365) - - self.assertEqual(dti.year[0], 1998) - self.assertEqual(dti.month[0], 1) - self.assertEqual(dti.day[0], 1) - self.assertEqual(dti.hour[0], 0) - self.assertEqual(dti.minute[0], 0) - self.assertEqual(dti.second[0], 0) - self.assertEqual(dti.microsecond[0], 0) - self.assertEqual(dti.dayofweek[0], 3) - - self.assertEqual(dti.dayofyear[0], 1) - self.assertEqual(dti.dayofyear[120], 121) - - self.assertEqual(dti.weekofyear[0], 1) - self.assertEqual(dti.weekofyear[120], 18) - - self.assertEqual(dti.quarter[0], 1) - self.assertEqual(dti.quarter[120], 2) - - self.assertEqual(dti.days_in_month[0], 31) - self.assertEqual(dti.days_in_month[90], 30) - - self.assertEqual(dti.is_month_start[0], True) - self.assertEqual(dti.is_month_start[1], False) - self.assertEqual(dti.is_month_start[31], True) - self.assertEqual(dti.is_quarter_start[0], True) - self.assertEqual(dti.is_quarter_start[90], True) - self.assertEqual(dti.is_year_start[0], True) - self.assertEqual(dti.is_year_start[364], False) - self.assertEqual(dti.is_month_end[0], False) - self.assertEqual(dti.is_month_end[30], True) - self.assertEqual(dti.is_month_end[31], False) - self.assertEqual(dti.is_month_end[364], True) - self.assertEqual(dti.is_quarter_end[0], False) - self.assertEqual(dti.is_quarter_end[30], False) - self.assertEqual(dti.is_quarter_end[89], True) - self.assertEqual(dti.is_quarter_end[364], True) - self.assertEqual(dti.is_year_end[0], False) - self.assertEqual(dti.is_year_end[364], True) - - # GH 11128 - self.assertEqual(dti.weekday_name[4], u'Monday') - self.assertEqual(dti.weekday_name[5], u'Tuesday') - self.assertEqual(dti.weekday_name[6], u'Wednesday') - self.assertEqual(dti.weekday_name[7], u'Thursday') - self.assertEqual(dti.weekday_name[8], u'Friday') - self.assertEqual(dti.weekday_name[9], u'Saturday') - self.assertEqual(dti.weekday_name[10], u'Sunday') - - self.assertEqual(Timestamp('2016-04-04').weekday_name, u'Monday') - self.assertEqual(Timestamp('2016-04-05').weekday_name, u'Tuesday') - self.assertEqual(Timestamp('2016-04-06').weekday_name, u'Wednesday') - self.assertEqual(Timestamp('2016-04-07').weekday_name, u'Thursday') - self.assertEqual(Timestamp('2016-04-08').weekday_name, u'Friday') - self.assertEqual(Timestamp('2016-04-09').weekday_name, u'Saturday') - self.assertEqual(Timestamp('2016-04-10').weekday_name, u'Sunday') - - self.assertEqual(len(dti.year), 365) - self.assertEqual(len(dti.month), 365) - self.assertEqual(len(dti.day), 365) - self.assertEqual(len(dti.hour), 365) - self.assertEqual(len(dti.minute), 365) - self.assertEqual(len(dti.second), 365) - self.assertEqual(len(dti.microsecond), 365) - self.assertEqual(len(dti.dayofweek), 365) - self.assertEqual(len(dti.dayofyear), 365) - self.assertEqual(len(dti.weekofyear), 365) - self.assertEqual(len(dti.quarter), 365) - self.assertEqual(len(dti.is_month_start), 365) - self.assertEqual(len(dti.is_month_end), 365) - self.assertEqual(len(dti.is_quarter_start), 365) - self.assertEqual(len(dti.is_quarter_end), 365) - self.assertEqual(len(dti.is_year_start), 365) - self.assertEqual(len(dti.is_year_end), 365) - self.assertEqual(len(dti.weekday_name), 365) + dti_naive = DatetimeIndex(freq='D', start=datetime(1998, 1, 1), + periods=365) + # GH 13303 + dti_tz = DatetimeIndex(freq='D', start=datetime(1998, 1, 1), + periods=365, tz='US/Eastern') + for dti in [dti_naive, dti_tz]: + + self.assertEqual(dti.year[0], 1998) + self.assertEqual(dti.month[0], 1) + self.assertEqual(dti.day[0], 1) + self.assertEqual(dti.hour[0], 0) + self.assertEqual(dti.minute[0], 0) + self.assertEqual(dti.second[0], 0) + self.assertEqual(dti.microsecond[0], 0) + self.assertEqual(dti.dayofweek[0], 3) + + self.assertEqual(dti.dayofyear[0], 1) + self.assertEqual(dti.dayofyear[120], 121) + + self.assertEqual(dti.weekofyear[0], 1) + self.assertEqual(dti.weekofyear[120], 18) + + self.assertEqual(dti.quarter[0], 1) + self.assertEqual(dti.quarter[120], 2) + + self.assertEqual(dti.days_in_month[0], 31) + self.assertEqual(dti.days_in_month[90], 30) + + self.assertEqual(dti.is_month_start[0], True) + self.assertEqual(dti.is_month_start[1], False) + self.assertEqual(dti.is_month_start[31], True) + self.assertEqual(dti.is_quarter_start[0], True) + self.assertEqual(dti.is_quarter_start[90], True) + self.assertEqual(dti.is_year_start[0], True) + self.assertEqual(dti.is_year_start[364], False) + self.assertEqual(dti.is_month_end[0], False) + self.assertEqual(dti.is_month_end[30], True) + self.assertEqual(dti.is_month_end[31], False) + self.assertEqual(dti.is_month_end[364], True) + self.assertEqual(dti.is_quarter_end[0], False) + self.assertEqual(dti.is_quarter_end[30], False) + self.assertEqual(dti.is_quarter_end[89], True) + self.assertEqual(dti.is_quarter_end[364], True) + self.assertEqual(dti.is_year_end[0], False) + self.assertEqual(dti.is_year_end[364], True) + + # GH 11128 + self.assertEqual(dti.weekday_name[4], u'Monday') + self.assertEqual(dti.weekday_name[5], u'Tuesday') + self.assertEqual(dti.weekday_name[6], u'Wednesday') + self.assertEqual(dti.weekday_name[7], u'Thursday') + self.assertEqual(dti.weekday_name[8], u'Friday') + self.assertEqual(dti.weekday_name[9], u'Saturday') + self.assertEqual(dti.weekday_name[10], u'Sunday') + + self.assertEqual(Timestamp('2016-04-04').weekday_name, u'Monday') + self.assertEqual(Timestamp('2016-04-05').weekday_name, u'Tuesday') + self.assertEqual(Timestamp('2016-04-06').weekday_name, + u'Wednesday') + self.assertEqual(Timestamp('2016-04-07').weekday_name, u'Thursday') + self.assertEqual(Timestamp('2016-04-08').weekday_name, u'Friday') + self.assertEqual(Timestamp('2016-04-09').weekday_name, u'Saturday') + self.assertEqual(Timestamp('2016-04-10').weekday_name, u'Sunday') + + self.assertEqual(len(dti.year), 365) + self.assertEqual(len(dti.month), 365) + self.assertEqual(len(dti.day), 365) + self.assertEqual(len(dti.hour), 365) + self.assertEqual(len(dti.minute), 365) + self.assertEqual(len(dti.second), 365) + self.assertEqual(len(dti.microsecond), 365) + self.assertEqual(len(dti.dayofweek), 365) + self.assertEqual(len(dti.dayofyear), 365) + self.assertEqual(len(dti.weekofyear), 365) + self.assertEqual(len(dti.quarter), 365) + self.assertEqual(len(dti.is_month_start), 365) + self.assertEqual(len(dti.is_month_end), 365) + self.assertEqual(len(dti.is_quarter_start), 365) + self.assertEqual(len(dti.is_quarter_end), 365) + self.assertEqual(len(dti.is_year_start), 365) + self.assertEqual(len(dti.is_year_end), 365) + self.assertEqual(len(dti.weekday_name), 365) dti = DatetimeIndex(freq='BQ-FEB', start=datetime(1998, 1, 1), periods=4) diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py index d5d92dcf96eab..082f0fa9c40d5 100644 --- a/pandas/tests/scalar/test_timestamp.py +++ b/pandas/tests/scalar/test_timestamp.py @@ -550,6 +550,32 @@ def check(value, equal): check(ts.daysinmonth, 31) check(ts.daysinmonth, 31) + # GH 13303 + ts = Timestamp('2014-12-31 23:59:00-05:00', tz='US/Eastern') + check(ts.year, 2014) + check(ts.month, 12) + check(ts.day, 31) + check(ts.hour, 23) + check(ts.minute, 59) + check(ts.second, 0) + self.assertRaises(AttributeError, lambda: ts.millisecond) + check(ts.microsecond, 0) + check(ts.nanosecond, 0) + check(ts.dayofweek, 2) + check(ts.quarter, 4) + check(ts.dayofyear, 365) + check(ts.week, 1) + check(ts.daysinmonth, 31) + + ts = Timestamp('2014-01-01 00:00:00+01:00') + starts = ['is_month_start', 'is_quarter_start', 'is_year_start'] + for start in starts: + self.assertTrue(getattr(ts, start)) + ts = Timestamp('2014-12-31 23:59:59+01:00') + ends = ['is_month_end', 'is_year_end', 'is_quarter_end'] + for end in ends: + self.assertTrue(getattr(ts, end)) + def test_nat_fields(self): # GH 10050 ts = Timestamp('NaT') From 2b45e448458f5176d78147e6449ba595bc785973 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Mon, 20 Mar 2017 15:38:35 -0400 Subject: [PATCH 246/353] DOC: Patch new flake8 command grep The grep was initially matching to "pandas," which is incorrect because that was also matching files containing "pandas" in the name but that were not in the main `pandas` directory (e.g. performance test code). This change enforces that we match to any Python files in the main `pandas` directory. Also picked up compatibility issue with OSX, in which the `-r` flag does not exist. However, `xargs` terminates if the argument list is empty, which was the whole point of passing in `-r` in the first place. Follow-up to #15712 Author: gfyoung Closes #15749 from gfyoung/flake8-diff-patch and squashes the following commits: d1543b5 [gfyoung] COMPAT: Do not run xargs with -r on OSX da57857 [gfyoung] DOC: Patch new flake8 command grep --- doc/source/contributing.rst | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index 7961780d0c79b..7ad5916a8809d 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -527,7 +527,12 @@ unused function. However, style-checking the diff will not catch this because the actual import is not part of the diff. Thus, for completeness, you should run this command, though it will take longer:: - git diff master --name-only -- '*.py' | grep 'pandas' | xargs -r flake8 + git diff master --name-only -- '*.py' | grep 'pandas/' | xargs -r flake8 + +Note that on OSX, the ``-r`` flag is not available, so you have to omit it and +run this slightly modified command:: + + git diff master --name-only -- '*.py' | grep 'pandas/' | xargs flake8 Backwards Compatibility ~~~~~~~~~~~~~~~~~~~~~~~ From bff47f2302a0be4dcbf7e5055e525d5652e08fb5 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Mon, 20 Mar 2017 15:47:48 -0400 Subject: [PATCH 247/353] MAINT: Remove Long and WidePanel (#15748) Deprecated since 0.17.0. xref gh-10892 --- asv_bench/benchmarks/pandas_vb_common.py | 5 ---- bench/bench_join_panel.py | 4 +-- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/api.py | 2 +- pandas/core/panel.py | 23 ---------------- pandas/tests/api/test_api.py | 3 +- pandas/tests/io/test_pytables.py | 3 -- pandas/tests/test_panel.py | 35 ++++++++---------------- vb_suite/pandas_vb_common.py | 5 ---- 9 files changed, 17 insertions(+), 64 deletions(-) diff --git a/asv_bench/benchmarks/pandas_vb_common.py b/asv_bench/benchmarks/pandas_vb_common.py index 56ccc94c414fb..a7e530e7f5ef1 100644 --- a/asv_bench/benchmarks/pandas_vb_common.py +++ b/asv_bench/benchmarks/pandas_vb_common.py @@ -25,11 +25,6 @@ except: pass -try: - Panel = Panel -except Exception: - Panel = WidePanel - # didn't add to namespace until later try: from pandas.core.index import MultiIndex diff --git a/bench/bench_join_panel.py b/bench/bench_join_panel.py index f3c3f8ba15f70..113b317dd8ff8 100644 --- a/bench/bench_join_panel.py +++ b/bench/bench_join_panel.py @@ -45,8 +45,8 @@ def reindex_on_axis(panels, axis, axis_reindex): return p -# does the job but inefficient (better to handle like you read a table in -# pytables...e.g create a LongPanel then convert to Wide) +# Does the job but inefficient. It is better to handle +# this like you read a table in pytables. def create_panels_join(cls, panels): """ given an array of panels's, create a single panel """ panels = [a for a in panels if a is not None] diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 98407aacb993b..ebdd4060f0588 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -772,6 +772,7 @@ Removal of prior version deprecations/changes - The ``Categorical`` constructor has dropped the ``name`` parameter (:issue:`10632`) - The ``take_last`` parameter has been dropped from ``duplicated()``, ``drop_duplicates()``, ``nlargest()``, and ``nsmallest()`` methods (:issue:`10236`, :issue:`10792`, :issue:`10920`) - ``Series``, ``Index``, and ``DataFrame`` have dropped the ``sort`` and ``order`` methods (:issue:`10726`) +- The ``LongPanel`` and ``WidePanel`` classes have been removed (:issue:`10892`) .. _whatsnew_0200.performance: diff --git a/pandas/core/api.py b/pandas/core/api.py index 65253dedb8b53..5018de39ca907 100644 --- a/pandas/core/api.py +++ b/pandas/core/api.py @@ -15,7 +15,7 @@ from pandas.core.series import Series from pandas.core.frame import DataFrame -from pandas.core.panel import Panel, WidePanel +from pandas.core.panel import Panel from pandas.core.panel4d import Panel4D from pandas.core.reshape import (pivot_simple as pivot, get_dummies, lreshape, wide_to_long) diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 4a6c6cf291316..5c7b66a2d1356 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -4,8 +4,6 @@ # pylint: disable=E1103,W0231,W0212,W0621 from __future__ import division -import warnings - import numpy as np from pandas.types.cast import (_infer_dtype_from_scalar, @@ -1556,24 +1554,3 @@ def f(self, other, axis=0): ops.add_special_arithmetic_methods(Panel, **ops.panel_special_funcs) Panel._add_aggregate_operations() Panel._add_numeric_operations() - - -# legacy -class WidePanel(Panel): - - def __init__(self, *args, **kwargs): - # deprecation, #10892 - warnings.warn("WidePanel is deprecated. Please use Panel", - FutureWarning, stacklevel=2) - - super(WidePanel, self).__init__(*args, **kwargs) - - -class LongPanel(DataFrame): - - def __init__(self, *args, **kwargs): - # deprecation, #10892 - warnings.warn("LongPanel is deprecated. Please use DataFrame", - FutureWarning, stacklevel=2) - - super(LongPanel, self).__init__(*args, **kwargs) diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 73222c246fc70..2c7dcf2501f32 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -54,8 +54,7 @@ class TestPDApi(Base, tm.TestCase): 'TimedeltaIndex', 'Timestamp'] # these are already deprecated; awaiting removal - deprecated_classes = ['WidePanel', 'Panel4D', - 'SparseList', 'Expr', 'Term'] + deprecated_classes = ['Panel4D', 'SparseList', 'Expr', 'Term'] # these should be deprecated in the future deprecated_classes_in_future = ['Panel'] diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 40866b8702fe2..324160d5b1ae6 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -3017,9 +3017,6 @@ def _check(left, right): # empty # self._check_roundtrip(wp.to_frame()[:0], _check) - def test_longpanel(self): - pass - def test_overwrite_node(self): with ensure_clean_store(self.path) as store: diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index ab0322abbcf06..13e16f3b90730 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -178,10 +178,6 @@ def wrapper(x): class SafeForSparse(object): - @classmethod - def assert_panel_equal(cls, x, y): - assert_panel_equal(x, y) - def test_get_axis(self): assert (self.panel._get_axis(0) is self.panel.items) assert (self.panel._get_axis(1) is self.panel.major_axis) @@ -346,10 +342,10 @@ def check_op(op, name): def test_combinePanel(self): result = self.panel.add(self.panel) - self.assert_panel_equal(result, self.panel * 2) + assert_panel_equal(result, self.panel * 2) def test_neg(self): - self.assert_panel_equal(-self.panel, self.panel * -1) + assert_panel_equal(-self.panel, self.panel * -1) # issue 7692 def test_raise_when_not_implemented(self): @@ -369,22 +365,22 @@ def test_select(self): # select items result = p.select(lambda x: x in ('ItemA', 'ItemC'), axis='items') expected = p.reindex(items=['ItemA', 'ItemC']) - self.assert_panel_equal(result, expected) + assert_panel_equal(result, expected) # select major_axis result = p.select(lambda x: x >= datetime(2000, 1, 15), axis='major') new_major = p.major_axis[p.major_axis >= datetime(2000, 1, 15)] expected = p.reindex(major=new_major) - self.assert_panel_equal(result, expected) + assert_panel_equal(result, expected) # select minor_axis result = p.select(lambda x: x in ('D', 'A'), axis=2) expected = p.reindex(minor=['A', 'D']) - self.assert_panel_equal(result, expected) + assert_panel_equal(result, expected) # corner case, empty thing result = p.select(lambda x: x in ('foo', ), axis='items') - self.assert_panel_equal(result, p.reindex(items=[])) + assert_panel_equal(result, p.reindex(items=[])) def test_get_value(self): for item in self.panel.items: @@ -399,8 +395,8 @@ def test_abs(self): result = self.panel.abs() result2 = abs(self.panel) expected = np.abs(self.panel) - self.assert_panel_equal(result, expected) - self.assert_panel_equal(result2, expected) + assert_panel_equal(result, expected) + assert_panel_equal(result2, expected) df = self.panel['ItemA'] result = df.abs() @@ -867,10 +863,6 @@ def test_set_value(self): class TestPanel(tm.TestCase, PanelTests, CheckIndexing, SafeForLongAndSparse, SafeForSparse): - @classmethod - def assert_panel_equal(cls, x, y): - assert_panel_equal(x, y) - def setUp(self): self.panel = _panel.copy() self.panel.major_axis.name = None @@ -1967,7 +1959,7 @@ def test_round(self): major_axis=pd.date_range('1/1/2000', periods=5), minor_axis=['A', 'B']) result = p.round() - self.assert_panel_equal(expected, result) + assert_panel_equal(expected, result) def test_numpy_round(self): values = [[[-3.2, 2.2], [0, -4.8213], [3.123, 123.12], @@ -1983,7 +1975,7 @@ def test_numpy_round(self): major_axis=pd.date_range('1/1/2000', periods=5), minor_axis=['A', 'B']) result = np.round(p) - self.assert_panel_equal(expected, result) + assert_panel_equal(expected, result) msg = "the 'out' parameter is not supported" tm.assertRaisesRegexp(ValueError, msg, np.round, p, out=p) @@ -2270,15 +2262,12 @@ def test_all_any_unhandled(self): self.assertRaises(NotImplementedError, self.panel.any, bool_only=True) -class TestLongPanel(tm.TestCase): +class TestPanelFrame(tm.TestCase): """ - LongPanel no longer exists, but... + Check that conversions to and from Panel to DataFrame work. """ def setUp(self): - import warnings - warnings.filterwarnings(action='ignore', category=FutureWarning) - panel = tm.makePanel() tm.add_nans(panel) diff --git a/vb_suite/pandas_vb_common.py b/vb_suite/pandas_vb_common.py index bd2e8a1c1d504..41e43d6ab10e5 100644 --- a/vb_suite/pandas_vb_common.py +++ b/vb_suite/pandas_vb_common.py @@ -18,11 +18,6 @@ except: import pandas._libs.lib as lib -try: - Panel = WidePanel -except Exception: - pass - # didn't add to namespace until later try: from pandas.core.index import MultiIndex From f2e942e185da9369f2c1f4d3b38f57af7b4243bd Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 20 Mar 2017 18:45:19 -0400 Subject: [PATCH 248/353] PERF: Improve drop_duplicates for bool columns (#12963) closes #12963 Author: Matt Roeschke Closes #15738 from mroeschke/fix_12963 and squashes the following commits: a020c10 [Matt Roeschke] PERF: Improve drop_duplicates for bool columns (#12963) --- asv_bench/benchmarks/reindex.py | 5 +++++ doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/algorithms.py | 7 ++++++- 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/asv_bench/benchmarks/reindex.py b/asv_bench/benchmarks/reindex.py index 6fe6c32a96df9..537d275e7c727 100644 --- a/asv_bench/benchmarks/reindex.py +++ b/asv_bench/benchmarks/reindex.py @@ -132,6 +132,9 @@ def setup(self): self.K = 10000 self.key1 = np.random.randint(0, self.K, size=self.N) self.df_int = DataFrame({'key1': self.key1}) + self.df_bool = DataFrame({i: np.random.randint(0, 2, size=self.K, + dtype=bool) + for i in range(10)}) def time_frame_drop_dups(self): self.df.drop_duplicates(['key1', 'key2']) @@ -154,6 +157,8 @@ def time_series_drop_dups_string(self): def time_frame_drop_dups_int(self): self.df_int.drop_duplicates() + def time_frame_drop_dups_bool(self): + self.df_bool.drop_duplicates() #---------------------------------------------------------------------- # blog "pandas escaped the zoo" diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index ebdd4060f0588..d036049e3ffdb 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -789,6 +789,7 @@ Performance Improvements - Improved performance of ``.rank()`` for categorical data (:issue:`15498`) - Improved performance when using ``.unstack()`` (:issue:`15503`) - Improved performance of merge/join on ``category`` columns (:issue:`10409`) +- Improved performance of ``drop_duplicates()`` on ``bool`` columns (:issue:`12963`) .. _whatsnew_0200.bug_fixes: diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 6937675603c10..f9d4c9107d7cd 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -19,6 +19,7 @@ is_period_dtype, is_period_arraylike, is_float_dtype, + is_bool_dtype, needs_i8_conversion, is_categorical, is_datetime64_dtype, @@ -325,8 +326,9 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None): """ from pandas import Index, Series, DatetimeIndex, PeriodIndex - # handling two possibilities here + # handling possibilities here # - for a numpy datetimelike simply view as i8 then cast back + # - bool handled as uint8 then cast back # - for an extension datetimelike view as i8 then # reconstruct from boxed values to transfer metadata dtype = None @@ -341,6 +343,9 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None): # numpy dtype dtype = values.dtype vals = values.view(np.int64) + elif is_bool_dtype(values): + dtype = bool + vals = np.asarray(values).view('uint8') else: vals = np.asarray(values) From 92239f5dcfb02f97b5b1eed651895fe70dfd7eb1 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 20 Mar 2017 19:38:01 -0400 Subject: [PATCH 249/353] CI: trying for osx cache again --- .travis.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 705b2380ac697..67b37f1d58931 100644 --- a/.travis.yml +++ b/.travis.yml @@ -28,7 +28,11 @@ matrix: os: osx compiler: clang osx_image: xcode6.4 - cache: ccache + cache: + ccache: true + directories: + - $HOME/.cache # cython cache + - $HOME/.ccache # compiler cache env: - PYTHON_VERSION=3.5 - JOB_NAME: "35_osx" From 8c80b6bb939a77ffb0ed11d468f22925abcd555a Mon Sep 17 00:00:00 2001 From: Wiktor Tomczak Date: Tue, 21 Mar 2017 09:52:37 +0100 Subject: [PATCH 250/353] Fix num_days in PandasAutoDateLocator (#14716) --- pandas/tseries/converter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tseries/converter.py b/pandas/tseries/converter.py index 1f99e88ce86d6..8aea14a2688d1 100644 --- a/pandas/tseries/converter.py +++ b/pandas/tseries/converter.py @@ -261,7 +261,7 @@ def get_locator(self, dmin, dmax): 'Pick the best locator based on a distance.' delta = relativedelta(dmax, dmin) - num_days = ((delta.years * 12.0) + delta.months * 31.0) + delta.days + num_days = (delta.years * 12.0 + delta.months) * 31.0 + delta.days num_sec = (delta.hours * 60.0 + delta.minutes) * 60.0 + delta.seconds tot_sec = num_days * 86400. + num_sec From 783ae69f0edc350bdf7b20932351c79a5a1fad3c Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 21 Mar 2017 08:21:25 -0400 Subject: [PATCH 251/353] CI: set path for osx ccache --- ci/install_travis.sh | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/ci/install_travis.sh b/ci/install_travis.sh index 053a2d15a287c..c940083f5ae9e 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -32,11 +32,6 @@ edit_init home_dir=$(pwd) echo "[home_dir: $home_dir]" -if [ "${TRAVIS_OS_NAME}" == "osx" ]; then - echo "[install ccache]" - time brew install ccache -fi - # install miniconda MINICONDA_DIR="$HOME/miniconda3" @@ -86,6 +81,14 @@ if [ "$USE_CACHE" ] && [ "${TRAVIS_OS_NAME}" == "linux" ]; then ccache=$(which ccache) echo "[ccache: $ccache]" export CC='ccache gcc' +elif [ "$USE_CACHE" ] && [ "${TRAVIS_OS_NAME}" == "osx" ]; then + echo "[Using ccache]" + time brew install ccache + export PATH=/usr/local/opt/ccache/libexec:$PATH + gcc=$(which gcc) + echo "[gcc: $gcc]" + ccache=$(which ccache) + echo "[ccache: $ccache]" else echo "[Not using ccache]" fi From 1e753d7ce9dca129d7ec5383612f874ee1393788 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 21 Mar 2017 10:49:46 -0400 Subject: [PATCH 252/353] CLN: replace _interleave_dtype with _find_common_type xref #15736 xref #12780 Author: Jeff Reback Closes #15765 from jreback/common_types and squashes the following commits: d472646 [Jeff Reback] try removing restriction on windows 8d07cae [Jeff Reback] CLN: replace _interleave_dtype with _find_common_type --- doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/core/internals.py | 59 ++++---------------------- pandas/tests/indexing/test_coercion.py | 14 +----- pandas/tests/series/test_replace.py | 4 +- pandas/tests/types/test_cast.py | 14 ++++++ pandas/types/cast.py | 28 +++++++++++- 6 files changed, 55 insertions(+), 66 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index d036049e3ffdb..e0d15c218ec85 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -826,7 +826,7 @@ Bug Fixes - Bug in the display of ``.info()`` where a qualifier (+) would always be displayed with a ``MultiIndex`` that contains only non-strings (:issue:`15245`) -- Bug in ``.replace()`` may result in incorrect dtypes. (:issue:`12747`) +- Bug in ``.replace()`` may result in incorrect dtypes. (:issue:`12747`, :issue:`15765`) - Bug in ``.asfreq()``, where frequency was not set for empty ``Series`` (:issue:`14320`) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 60684a929889b..6487c2108028e 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -9,7 +9,8 @@ from pandas.core.base import PandasObject -from pandas.types.dtypes import DatetimeTZDtype, CategoricalDtype +from pandas.types.dtypes import (ExtensionDtype, DatetimeTZDtype, + CategoricalDtype) from pandas.types.common import (_TD_DTYPE, _NS_DTYPE, _ensure_int64, _ensure_platform_int, is_integer, @@ -4496,55 +4497,13 @@ def _interleaved_dtype(blocks): if not len(blocks): return None - counts = defaultdict(list) - for x in blocks: - counts[type(x)].append(x) - - have_int = len(counts[IntBlock]) > 0 - have_bool = len(counts[BoolBlock]) > 0 - have_object = len(counts[ObjectBlock]) > 0 - have_float = len(counts[FloatBlock]) > 0 - have_complex = len(counts[ComplexBlock]) > 0 - have_dt64 = len(counts[DatetimeBlock]) > 0 - have_dt64_tz = len(counts[DatetimeTZBlock]) > 0 - have_td64 = len(counts[TimeDeltaBlock]) > 0 - have_cat = len(counts[CategoricalBlock]) > 0 - # TODO: have_sparse is not used - have_sparse = len(counts[SparseBlock]) > 0 # noqa - have_numeric = have_float or have_complex or have_int - has_non_numeric = have_dt64 or have_dt64_tz or have_td64 or have_cat - - if (have_object or - (have_bool and - (have_numeric or have_dt64 or have_dt64_tz or have_td64)) or - (have_numeric and has_non_numeric) or have_cat or have_dt64 or - have_dt64_tz or have_td64): - return np.dtype(object) - elif have_bool: - return np.dtype(bool) - elif have_int and not have_float and not have_complex: - # if we are mixing unsigned and signed, then return - # the next biggest int type (if we can) - lcd = _find_common_type([b.dtype for b in counts[IntBlock]]) - kinds = set([i.dtype.kind for i in counts[IntBlock]]) - if len(kinds) == 1: - return lcd - - if lcd == 'uint64' or lcd == 'int64': - return np.dtype('int64') - - # return 1 bigger on the itemsize if unsinged - if lcd.kind == 'u': - return np.dtype('int%s' % (lcd.itemsize * 8 * 2)) - return lcd - - elif have_int and have_float and not have_complex: - return np.dtype('float64') - elif have_complex: - return np.dtype('c16') - else: - introspection_blks = counts[FloatBlock] + counts[SparseBlock] - return _find_common_type([b.dtype for b in introspection_blks]) + dtype = _find_common_type([b.dtype for b in blocks]) + + # only numpy compat + if isinstance(dtype, ExtensionDtype): + dtype = np.object + + return dtype def _consolidate(blocks): diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py index df95f563c0832..7216c05657102 100644 --- a/pandas/tests/indexing/test_coercion.py +++ b/pandas/tests/indexing/test_coercion.py @@ -1183,19 +1183,9 @@ def _assert_replace_conversion(self, from_key, to_key, how): result = obj.replace(replacer) - # buggy on windows for bool/int64 - if (from_key == 'bool' and - to_key == 'int64' and - tm.is_platform_windows()): - pytest.skip("windows platform buggy: {0} -> {1}".format - (from_key, to_key)) - - if ((from_key == 'float64' and to_key in ('bool', 'int64')) or + if ((from_key == 'float64' and to_key in ('int64')) or (from_key == 'complex128' and - to_key in ('bool', 'int64', 'float64')) or - - # GH12747 The result must be int? - (from_key == 'int64' and to_key in ('bool'))): + to_key in ('int64', 'float64'))): # buggy on 32-bit if tm.is_platform_32bit(): diff --git a/pandas/tests/series/test_replace.py b/pandas/tests/series/test_replace.py index f5a25e93cc82d..0a53581e24ba5 100644 --- a/pandas/tests/series/test_replace.py +++ b/pandas/tests/series/test_replace.py @@ -152,8 +152,8 @@ def check_replace(to_rep, val, expected): tr, v = [3, 4], [3.5, pd.Timestamp('20130101')] check_replace(tr, v, e) - # casts to float - e = pd.Series([0, 1, 2, 3.5, 1]) + # casts to object + e = pd.Series([0, 1, 2, 3.5, True], dtype='object') tr, v = [3, 4], [3.5, True] check_replace(tr, v, e) diff --git a/pandas/tests/types/test_cast.py b/pandas/tests/types/test_cast.py index 70f69cc7d5701..d7b086daea1e3 100644 --- a/pandas/tests/types/test_cast.py +++ b/pandas/tests/types/test_cast.py @@ -238,6 +238,20 @@ def test_numpy_dtypes(self): ((np.object, np.float32), np.object), ((np.object, np.int16), np.object), + # bool with int + ((np.dtype('bool'), np.int64), np.object), + ((np.dtype('bool'), np.int32), np.object), + ((np.dtype('bool'), np.int16), np.object), + ((np.dtype('bool'), np.int8), np.object), + ((np.dtype('bool'), np.uint64), np.object), + ((np.dtype('bool'), np.uint32), np.object), + ((np.dtype('bool'), np.uint16), np.object), + ((np.dtype('bool'), np.uint8), np.object), + + # bool with float + ((np.dtype('bool'), np.float64), np.object), + ((np.dtype('bool'), np.float32), np.object), + ((np.dtype('datetime64[ns]'), np.dtype('datetime64[ns]')), np.dtype('datetime64[ns]')), ((np.dtype('timedelta64[ns]'), np.dtype('timedelta64[ns]')), diff --git a/pandas/types/cast.py b/pandas/types/cast.py index 11a837dd21159..0e26cd085db5a 100644 --- a/pandas/types/cast.py +++ b/pandas/types/cast.py @@ -892,12 +892,28 @@ def _possibly_cast_to_datetime(value, dtype, errors='raise'): def _find_common_type(types): - """Find a common data type among the given dtypes.""" + """ + Find a common data type among the given dtypes. + + Parameters + ---------- + types : list of dtypes + + Returns + ------- + pandas extension or numpy dtype + + See Also + -------- + numpy.find_common_type + + """ if len(types) == 0: raise ValueError('no types given') first = types[0] + # workaround for find_common_type([np.dtype('datetime64[ns]')] * 2) # => object if all(is_dtype_equal(first, t) for t in types[1:]): @@ -912,4 +928,14 @@ def _find_common_type(types): if all(is_timedelta64_dtype(t) for t in types): return np.dtype('timedelta64[ns]') + # don't mix bool / int or float or complex + # this is different from numpy, which casts bool with float/int as int + has_bools = any(is_bool_dtype(t) for t in types) + if has_bools: + has_ints = any(is_integer_dtype(t) for t in types) + has_floats = any(is_float_dtype(t) for t in types) + has_complex = any(is_complex_dtype(t) for t in types) + if has_ints or has_floats or has_complex: + return np.object + return np.find_common_type(types, []) From aa9d0cf7fa0061058125d79d22d86f82f69c9185 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 21 Mar 2017 13:39:55 -0400 Subject: [PATCH 253/353] BUG: various 32bit compat issues closes #14866 xref #14183 Author: Jeff Reback Closes #15766 from jreback/32bit and squashes the following commits: 93c03e3 [Jeff Reback] BUG: 32bit compat for .get_indexer 4163918 [Jeff Reback] BUG: fix isin for 32bit platform issues 1bb2f60 [Jeff Reback] BUG: cut/qcut should always return int64 bins --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/algorithms.py | 44 +++++++++++++++++++----------- pandas/tests/indexes/test_multi.py | 2 +- pandas/tests/tools/test_tile.py | 4 +-- pandas/tools/tile.py | 4 +-- 5 files changed, 34 insertions(+), 21 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index e0d15c218ec85..55e3d979b07dd 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -824,6 +824,7 @@ Bug Fixes - Bug in ``pd.qcut()`` with a single quantile and an array with identical values (:issue:`15431`) - Compat with SciPy 0.19.0 for testing on ``.interpolate()`` (:issue:`15662`) +- Compat for 32-bit platforms for ``.qcut/cut``; bins will now be ``int64`` dtype (:issue:`14866`) - Bug in the display of ``.info()`` where a qualifier (+) would always be displayed with a ``MultiIndex`` that contains only non-strings (:issue:`15245`) - Bug in ``.replace()`` may result in incorrect dtypes. (:issue:`12747`, :issue:`15765`) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index f9d4c9107d7cd..00a3264e6c74a 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -169,33 +169,45 @@ def isin(comps, values): raise TypeError("only list-like objects are allowed to be passed" " to isin(), you passed a " "[{0}]".format(type(comps).__name__)) - comps = np.asarray(comps) if not is_list_like(values): raise TypeError("only list-like objects are allowed to be passed" " to isin(), you passed a " "[{0}]".format(type(values).__name__)) - if not isinstance(values, np.ndarray): - values = list(values) + + from pandas import DatetimeIndex, PeriodIndex + + if not isinstance(values, (ABCIndex, ABCSeries, np.ndarray)): + values = np.array(list(values), dtype='object') + + if needs_i8_conversion(comps): + if is_period_dtype(values): + comps = PeriodIndex(comps) + values = PeriodIndex(values) + else: + comps = DatetimeIndex(comps) + values = DatetimeIndex(values) + + values = values.asi8 + comps = comps.asi8 + elif is_bool_dtype(comps): + + try: + comps = np.asarray(comps).view('uint8') + values = np.asarray(values).view('uint8') + except TypeError: + # object array conversion will fail + pass + else: + comps = np.asarray(comps) + values = np.asarray(values) # GH11232 # work-around for numpy < 1.8 and comparisions on py3 # faster for larger cases to use np.in1d if (_np_version_under1p8 and compat.PY3) or len(comps) > 1000000: f = lambda x, y: np.in1d(x, np.asarray(list(y))) - else: - f = lambda x, y: lib.ismember_int64(x, set(y)) - - # may need i8 conversion for proper membership testing - if is_datetime64_dtype(comps): - from pandas.tseries.tools import to_datetime - values = to_datetime(values)._values.view('i8') - comps = comps.view('i8') - elif is_timedelta64_dtype(comps): - from pandas.tseries.timedeltas import to_timedelta - values = to_timedelta(values)._values.view('i8') - comps = comps.view('i8') elif is_int64_dtype(comps): - pass + f = lambda x, y: lib.ismember_int64(x, set(y)) else: f = lambda x, y: lib.ismember(x, set(values)) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index f67231e78983c..0c274b2f6c4ff 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -1359,7 +1359,7 @@ def test_hash_collisions(self): names=['one', 'two']) result = index.get_indexer(index.values) self.assert_numpy_array_equal(result, - np.arange(len(index), dtype='int64')) + np.arange(len(index), dtype='intp')) for i in [0, 1, len(index) - 2, len(index) - 1]: result = index.get_loc(index[i]) diff --git a/pandas/tests/tools/test_tile.py b/pandas/tests/tools/test_tile.py index 11b242bc06e15..cc80c1ff5db29 100644 --- a/pandas/tests/tools/test_tile.py +++ b/pandas/tests/tools/test_tile.py @@ -19,8 +19,8 @@ class TestCut(tm.TestCase): def test_simple(self): data = np.ones(5) result = cut(data, 4, labels=False) - desired = np.array([1, 1, 1, 1, 1]) - tm.assert_numpy_array_equal(result, desired, + expected = np.array([1, 1, 1, 1, 1]) + tm.assert_numpy_array_equal(result, expected, check_dtype=False) def test_bins(self): diff --git a/pandas/tools/tile.py b/pandas/tools/tile.py index ccd8c2478e8a5..4a3d452228e01 100644 --- a/pandas/tools/tile.py +++ b/pandas/tools/tile.py @@ -4,7 +4,7 @@ from pandas.types.missing import isnull from pandas.types.common import (is_float, is_integer, - is_scalar) + is_scalar, _ensure_int64) from pandas.core.api import Series from pandas.core.categorical import Categorical @@ -215,7 +215,7 @@ def _bins_to_cuts(x, bins, right=True, labels=None, bins = unique_bins side = 'left' if right else 'right' - ids = bins.searchsorted(x, side=side) + ids = _ensure_int64(bins.searchsorted(x, side=side)) if include_lowest: ids[x == bins[0]] = 1 From 19c8032bc4f05bf79ae927d5235ac63bf6b33ebe Mon Sep 17 00:00:00 2001 From: gfyoung Date: Tue, 21 Mar 2017 14:00:10 -0400 Subject: [PATCH 254/353] DOC: Ensure basic flake8 diff checks only Python (#15769) Follow-up to gh-15749 --- .github/PULL_REQUEST_TEMPLATE.md | 2 +- doc/source/contributing.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 918d427ee4f4c..9281c51059087 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,4 +1,4 @@ - [ ] closes #xxxx - [ ] tests added / passed - - [ ] passes ``git diff upstream/master | flake8 --diff`` + - [ ] passes ``git diff upstream/master --name-only -- '*.py' | flake8 --diff`` - [ ] whatsnew entry diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index 7ad5916a8809d..5e551a7fd5349 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -518,7 +518,7 @@ Travis-CI will run the `flake8 `_ tool and report any stylistic errors in your code. Therefore, it is helpful before submitting code to run the check yourself on the diff:: - git diff master | flake8 --diff + git diff master --name-only -- '*.py' | flake8 --diff This command will catch any stylistic errors in your changes specifically, but be beware it may not catch all of them. For example, if you delete the only From 163d18ed0d46eeb375f8170f1044808ff40b2a65 Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Tue, 21 Mar 2017 14:01:32 -0400 Subject: [PATCH 255/353] ENH: support "nrows" and "chunksize" together closes #15755 Author: Pietro Battiston Closes #15756 from toobaz/nrows_chunksize and squashes the following commits: d0288e3 [Pietro Battiston] ENH: support "nrows" and "chunksize" together --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/io/parsers.py | 24 ++++++++----------- pandas/tests/io/parser/common.py | 27 ++++++++++++++++++++++ pandas/tests/io/parser/test_unsupported.py | 9 -------- 4 files changed, 37 insertions(+), 24 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 55e3d979b07dd..44f0752fc3df4 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -291,6 +291,7 @@ Other enhancements - ``Series`` provides a ``to_excel`` method to output Excel files (:issue:`8825`) - The ``usecols`` argument in ``pd.read_csv`` now accepts a callable function as a value (:issue:`14154`) - The ``skiprows`` argument in ``pd.read_csv`` now accepts a callable function as a value (:issue:`10882`) +- The ``nrows`` and ``chunksize`` arguments in ``pd.read_csv()`` are supported if both are passed (:issue:`6774`, :issue:`15755`) - ``pd.DataFrame.plot`` now prints a title above each subplot if ``suplots=True`` and ``title`` is a list of strings (:issue:`14753`) - ``pd.Series.interpolate`` now supports timedelta as an index type with ``method='time'`` (:issue:`6424`) - ``Timedelta.isoformat`` method added for formatting Timedeltas as an `ISO 8601 duration`_. See the :ref:`Timedelta docs ` (:issue:`15136`) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 9aedddc811830..18343670fb39e 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -384,29 +384,18 @@ def _read(filepath_or_buffer, kwds): # Extract some of the arguments (pass chunksize on). iterator = kwds.get('iterator', False) chunksize = kwds.get('chunksize', None) - nrows = _validate_nrows(kwds.pop('nrows', None)) + nrows = _validate_nrows(kwds.get('nrows', None)) # Create the parser. parser = TextFileReader(filepath_or_buffer, **kwds) - if (nrows is not None) and (chunksize is not None): - raise NotImplementedError("'nrows' and 'chunksize' cannot be used" - " together yet.") - elif nrows is not None: - try: - data = parser.read(nrows) - finally: - parser.close() - return data - - elif chunksize or iterator: + if chunksize or iterator: return parser try: - data = parser.read() + data = parser.read(nrows) finally: parser.close() - return data @@ -445,7 +434,7 @@ def _read(filepath_or_buffer, kwds): 'usecols': None, - # 'nrows': None, + 'nrows': None, # 'iterator': False, 'chunksize': None, 'verbose': False, @@ -749,6 +738,7 @@ def __init__(self, f, engine=None, **kwds): options = self._get_options_with_defaults(engine) self.chunksize = options.pop('chunksize', None) + self.nrows = options.pop('nrows', None) self.squeeze = options.pop('squeeze', False) # might mutate self.engine @@ -1009,6 +999,10 @@ def _create_index(self, ret): def get_chunk(self, size=None): if size is None: size = self.chunksize + if self.nrows is not None: + if self._currow >= self.nrows: + raise StopIteration + size = min(size, self.nrows - self._currow) return self.read(nrows=size) diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py index df75d14e9702d..24d15dcb96fe7 100644 --- a/pandas/tests/io/parser/common.py +++ b/pandas/tests/io/parser/common.py @@ -402,6 +402,33 @@ def test_read_chunksize(self): tm.assert_frame_equal(chunks[1], df[2:4]) tm.assert_frame_equal(chunks[2], df[4:]) + def test_read_chunksize_and_nrows(self): + + # gh-15755 + # With nrows + reader = self.read_csv(StringIO(self.data1), index_col=0, + chunksize=2, nrows=5) + df = self.read_csv(StringIO(self.data1), index_col=0, nrows=5) + + tm.assert_frame_equal(pd.concat(reader), df) + + # chunksize > nrows + reader = self.read_csv(StringIO(self.data1), index_col=0, + chunksize=8, nrows=5) + df = self.read_csv(StringIO(self.data1), index_col=0, nrows=5) + + tm.assert_frame_equal(pd.concat(reader), df) + + # with changing "size": + reader = self.read_csv(StringIO(self.data1), index_col=0, + chunksize=8, nrows=5) + df = self.read_csv(StringIO(self.data1), index_col=0, nrows=5) + + tm.assert_frame_equal(reader.get_chunk(size=2), df.iloc[:2]) + tm.assert_frame_equal(reader.get_chunk(size=4), df.iloc[2:5]) + with tm.assertRaises(StopIteration): + reader.get_chunk(size=3) + def test_read_chunksize_named(self): reader = self.read_csv( StringIO(self.data1), index_col='index', chunksize=2) diff --git a/pandas/tests/io/parser/test_unsupported.py b/pandas/tests/io/parser/test_unsupported.py index 999db47cf2eaf..48dd5d4ba506b 100644 --- a/pandas/tests/io/parser/test_unsupported.py +++ b/pandas/tests/io/parser/test_unsupported.py @@ -29,15 +29,6 @@ def test_mangle_dupe_cols_false(self): read_csv(StringIO(data), engine=engine, mangle_dupe_cols=False) - def test_nrows_and_chunksize(self): - data = 'a b c' - msg = "cannot be used together yet" - - for engine in ('c', 'python'): - with tm.assertRaisesRegexp(NotImplementedError, msg): - read_csv(StringIO(data), engine=engine, - nrows=10, chunksize=5) - def test_c_engine(self): # see gh-6607 data = 'a b c\n1 2 3' From 1c9d46a3bb8737c877b0a15aaea15dfb0172ac1c Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Tue, 21 Mar 2017 17:52:18 -0400 Subject: [PATCH 256/353] BUG: Enforce correct encoding in stata Ensure StataReader and StataWriter have the correct encoding. Standardized default encoding to 'latin-1' closes #15723 Author: Kevin Sheppard Closes #15768 from bashtage/limit-stata-encoding and squashes the following commits: 8278be7 [Kevin Sheppard] BUG: Fix limited key range on 32-bit platofrms 2f02697 [Kevin Sheppard] BUG: Enforce correct encoding in stata --- doc/source/whatsnew/v0.20.0.txt | 3 +++ pandas/io/stata.py | 29 ++++++++++++++++++++++------- pandas/tests/io/test_stata.py | 7 +++++++ 3 files changed, 32 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 44f0752fc3df4..eeb568c2e2558 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -919,6 +919,8 @@ Bug Fixes - Avoid use of ``np.finfo()`` during ``import pandas`` removed to mitigate deadlock on Python GIL misuse (:issue:`14641`) - Bug in ``DataFrame.to_stata()`` and ``StataWriter`` which produces incorrectly formatted files to be produced for some locales (:issue:`13856`) +- Bug in ``StataReader`` and ``StataWriter`` which allows invalid encodings (:issue:`15723`) + - Bug in ``pd.concat()`` in which concatting with an empty dataframe with ``join='inner'`` was being improperly handled (:issue:`15328`) - Bug in ``groupby.agg()`` incorrectly localizing timezone on ``datetime`` (:issue:`15426`, :issue:`10668`, :issue:`13046`) @@ -933,3 +935,4 @@ Bug Fixes - Bug in ``pd.melt()`` where passing a tuple value for ``value_vars`` caused a ``TypeError`` (:issue:`15348`) - Bug in ``.eval()`` which caused multiline evals to fail with local variables not on the first line (:issue:`15342`) - Bug in ``pd.read_msgpack`` which did not allow to load dataframe with an index of type ``CategoricalIndex`` (:issue:`15487`) + diff --git a/pandas/io/stata.py b/pandas/io/stata.py index af4bc6a6b7ddb..1d2951da68086 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -33,6 +33,9 @@ from pandas._libs.lib import max_len_string_array, infer_dtype from pandas._libs.tslib import NaT, Timestamp +VALID_ENCODINGS = ('ascii', 'us-ascii', 'latin-1', 'latin_1', 'iso-8859-1', + 'iso8859-1', '8859', 'cp819', 'latin', 'latin1', 'L1') + _version_error = ("Version of given Stata file is not 104, 105, 108, " "111 (Stata 7SE), 113 (Stata 8/9), 114 (Stata 10/11), " "115 (Stata 12), 117 (Stata 13), or 118 (Stata 14)") @@ -45,7 +48,7 @@ _encoding_params = """\ encoding : string, None or encoding - Encoding used to parse the files. None defaults to iso-8859-1.""" + Encoding used to parse the files. None defaults to latin-1.""" _statafile_processing_params2 = """\ index : identifier of index column @@ -816,9 +819,14 @@ def get_base_missing_value(cls, dtype): class StataParser(object): - _default_encoding = 'iso-8859-1' + _default_encoding = 'latin-1' def __init__(self, encoding): + if encoding is not None: + if encoding not in VALID_ENCODINGS: + raise ValueError('Unknown encoding. Only latin-1 and ascii ' + 'supported.') + self._encoding = encoding # type code. @@ -936,7 +944,7 @@ def __init__(self, path_or_buf, convert_dates=True, convert_categoricals=True, index=None, convert_missing=False, preserve_dtypes=True, columns=None, order_categoricals=True, - encoding='iso-8859-1', chunksize=None): + encoding='latin-1', chunksize=None): super(StataReader, self).__init__(encoding) self.col_sizes = () @@ -949,6 +957,10 @@ def __init__(self, path_or_buf, convert_dates=True, self._preserve_dtypes = preserve_dtypes self._columns = columns self._order_categoricals = order_categoricals + if encoding is not None: + if encoding not in VALID_ENCODINGS: + raise ValueError('Unknown encoding. Only latin-1 and ascii ' + 'supported.') self._encoding = encoding self._chunksize = chunksize @@ -1362,7 +1374,8 @@ def _read_value_labels(self): def _read_strls(self): self.path_or_buf.seek(self.seek_strls) - self.GSO = {0: ''} + # Wrap v_o in a string to allow uint64 values as keys on 32bit OS + self.GSO = {'0': ''} while True: if self.path_or_buf.read(3) != b'GSO': break @@ -1387,7 +1400,8 @@ def _read_strls(self): if self.format_version == 117: encoding = self._encoding or self._default_encoding va = va[0:-1].decode(encoding) - self.GSO[v_o] = va + # Wrap v_o in a string to allow uint64 values as keys on 32bit OS + self.GSO[str(v_o)] = va # legacy @Appender('DEPRECATED: ' + _data_method_doc) @@ -1623,7 +1637,8 @@ def _insert_strls(self, data): for i, typ in enumerate(self.typlist): if typ != 'Q': continue - data.iloc[:, i] = [self.GSO[k] for k in data.iloc[:, i]] + # Wrap v_o in a string to allow uint64 values as keys on 32bit OS + data.iloc[:, i] = [self.GSO[str(k)] for k in data.iloc[:, i]] return data def _do_select_columns(self, data, columns): @@ -1855,7 +1870,7 @@ class StataWriter(StataParser): write_index : bool Write the index to Stata dataset. encoding : str - Default is latin-1. Unicode is not supported + Default is latin-1. Only latin-1 and ascii are supported. byteorder : str Can be ">", "<", "little", or "big". default is `sys.byteorder` time_stamp : datetime diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index 5188adf54b887..db594889c91ee 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -1276,3 +1276,10 @@ def test_out_of_range_float(self): original.to_stata(path) tm.assertTrue('ColumnTooBig' in cm.exception) tm.assertTrue('infinity' in cm.exception) + + def test_invalid_encoding(self): + # GH15723, validate encoding + original = self.read_csv(self.csv3) + with tm.assertRaises(ValueError): + with tm.ensure_clean() as path: + original.to_stata(path, encoding='utf-8') From 32dd92912f15a5c66035f5674c116d23f21bdbca Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 21 Mar 2017 19:19:25 -0400 Subject: [PATCH 257/353] CLN: relocate lib.ismember* to hashtable space - fixes .isin on 32-bit (hopefully) - perf about 30% better - releases GIL Author: Jeff Reback Closes #15773 from jreback/ismember and squashes the following commits: a7dfe51 [Jeff Reback] CLN: relocate lib.ismember* to hashtable space --- pandas/_libs/hashtable_func_helper.pxi.in | 98 ++++++++++++++++++++--- pandas/_libs/lib.pyx | 72 +---------------- pandas/core/algorithms.py | 32 ++++++-- pandas/core/frame.py | 4 +- pandas/indexes/multi.py | 5 +- pandas/indexes/numeric.py | 8 +- pandas/io/parsers.py | 6 +- pandas/tests/indexes/test_base.py | 5 +- pandas/tseries/tools.py | 3 +- 9 files changed, 131 insertions(+), 102 deletions(-) diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in index fa373905ef08a..0608af8f8504b 100644 --- a/pandas/_libs/hashtable_func_helper.pxi.in +++ b/pandas/_libs/hashtable_func_helper.pxi.in @@ -11,14 +11,14 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in {{py: # dtype, ttype -dtypes = [('float64', 'float64'), - ('uint64', 'uint64'), - ('object', 'pymap'), - ('int64', 'int64')] +dtypes = [('float64', 'float64', 'float64_t'), + ('uint64', 'uint64', 'uint64_t'), + ('object', 'pymap', 'object'), + ('int64', 'int64', 'int64_t')] }} -{{for dtype, ttype in dtypes}} +{{for dtype, ttype, scalar in dtypes}} @cython.wraparound(False) @@ -34,9 +34,7 @@ cdef build_count_table_{{dtype}}({{dtype}}_t[:] values, khiter_t k Py_ssize_t i, n = len(values) - {{if dtype != 'object'}} - {{dtype}}_t val - {{endif}} + {{scalar}} val int ret = 0 @@ -79,7 +77,7 @@ cdef build_count_table_{{dtype}}({{dtype}}_t[:] values, {{if dtype == 'object'}} cpdef value_count_{{dtype}}(ndarray[{{dtype}}] values, bint dropna): {{else}} -cpdef value_count_{{dtype}}({{dtype}}_t[:] values, bint dropna): +cpdef value_count_{{dtype}}({{scalar}}[:] values, bint dropna): {{endif}} cdef: Py_ssize_t i=0 @@ -130,12 +128,11 @@ cpdef value_count_{{dtype}}({{dtype}}_t[:] values, bint dropna): @cython.boundscheck(False) {{if dtype == 'object'}} - def duplicated_{{dtype}}(ndarray[{{dtype}}] values, object keep='first'): {{else}} -def duplicated_{{dtype}}({{dtype}}_t[:] values, object keep='first'): +def duplicated_{{dtype}}({{scalar}}[:] values, object keep='first'): {{endif}} cdef: int ret = 0 @@ -203,8 +200,87 @@ def duplicated_{{dtype}}({{dtype}}_t[:] values, object keep='first'): kh_destroy_{{ttype}}(table) return out + +#---------------------------------------------------------------------- +# Membership +#---------------------------------------------------------------------- + + +@cython.wraparound(False) +@cython.boundscheck(False) +{{if dtype == 'object'}} + +def ismember_{{dtype}}(ndarray[{{scalar}}] arr, ndarray[{{scalar}}] values, bint hasnans=0): +{{else}} + +def ismember_{{dtype}}({{scalar}}[:] arr, {{scalar}}[:] values, bint hasnans=0): +{{endif}} + + """ + Return boolean of values in arr on an + element by-element basis + + Parameters + ---------- + arr : {{dtype}} ndarray + values : {{dtype}} ndarray + hasnans : bint, optional + + Returns + ------- + boolean ndarry len of (arr) + """ + cdef: + Py_ssize_t i, n, k + int ret = 0 + ndarray[uint8_t] result + {{scalar}} val + kh_{{ttype}}_t * table = kh_init_{{ttype}}() + + + # construct the table + n = len(values) + kh_resize_{{ttype}}(table, min(n, len(values))) + + {{if dtype == 'object'}} + for i in range(n): + kh_put_{{ttype}}(table, values[i], &ret) + {{else}} + with nogil: + for i in range(n): + kh_put_{{ttype}}(table, values[i], &ret) + {{endif}} + + # test membership + n = len(arr) + result = np.empty(n, dtype=np.uint8) + + {{if dtype == 'object'}} + for i in range(n): + val = arr[i] + k = kh_get_{{ttype}}(table, val) + if k != table.n_buckets: + result[i] = 1 + else: + result[i] = hasnans and val != val + {{else}} + with nogil: + for i in range(n): + val = arr[i] + k = kh_get_{{ttype}}(table, val) + if k != table.n_buckets: + result[i] = 1 + else: + result[i] = hasnans and val != val + {{endif}} + + kh_destroy_{{ttype}}(table) + return result.view(np.bool_) + {{endfor}} + + #---------------------------------------------------------------------- # Mode Computations #---------------------------------------------------------------------- diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index b4724bc3dd59b..f78040e5a52f2 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -13,6 +13,7 @@ cdef extern from "numpy/arrayobject.h": cdef enum NPY_TYPES: NPY_intp "NPY_INTP" +from libc.stdlib cimport malloc, free from cpython cimport (PyDict_New, PyDict_GetItem, PyDict_SetItem, PyDict_Contains, PyDict_Keys, @@ -111,77 +112,6 @@ cpdef map_indices_list(list index): return result -from libc.stdlib cimport malloc, free - - -def ismember_nans(float64_t[:] arr, set values, bint hasnans): - cdef: - Py_ssize_t i, n - ndarray[uint8_t] result - float64_t val - - n = len(arr) - result = np.empty(n, dtype=np.uint8) - for i in range(n): - val = arr[i] - result[i] = val in values or hasnans and isnan(val) - - return result.view(np.bool_) - - -def ismember(ndarray arr, set values): - """ - Checks whether - - Parameters - ---------- - arr : ndarray - values : set - - Returns - ------- - ismember : ndarray (boolean dtype) - """ - cdef: - Py_ssize_t i, n - ndarray[uint8_t] result - object val - - n = len(arr) - result = np.empty(n, dtype=np.uint8) - for i in range(n): - val = util.get_value_at(arr, i) - result[i] = val in values - - return result.view(np.bool_) - - -def ismember_int64(ndarray[int64_t] arr, set values): - """ - Checks whether - - Parameters - ---------- - arr : ndarray of int64 - values : set - - Returns - ------- - ismember : ndarray (boolean dtype) - """ - cdef: - Py_ssize_t i, n - ndarray[uint8_t] result - int64_t v - - n = len(arr) - result = np.empty(n, dtype=np.uint8) - for i in range(n): - result[i] = arr[i] in values - - return result.view(np.bool_) - - @cython.wraparound(False) @cython.boundscheck(False) def memory_usage_of_objects(ndarray[object, ndim=1] arr): diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 00a3264e6c74a..9a8d0a779105e 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -12,12 +12,12 @@ from pandas.types.common import (is_unsigned_integer_dtype, is_signed_integer_dtype, is_integer_dtype, - is_int64_dtype, is_categorical_dtype, is_extension_type, is_datetimetz, is_period_dtype, is_period_arraylike, + is_numeric_dtype, is_float_dtype, is_bool_dtype, needs_i8_conversion, @@ -197,19 +197,37 @@ def isin(comps, values): except TypeError: # object array conversion will fail pass - else: + elif is_numeric_dtype(comps): comps = np.asarray(comps) values = np.asarray(values) + else: + comps = np.asarray(comps).astype(object) + values = np.asarray(values).astype(object) # GH11232 # work-around for numpy < 1.8 and comparisions on py3 # faster for larger cases to use np.in1d + f = lambda x, y: htable.ismember_object(x, values) if (_np_version_under1p8 and compat.PY3) or len(comps) > 1000000: - f = lambda x, y: np.in1d(x, np.asarray(list(y))) - elif is_int64_dtype(comps): - f = lambda x, y: lib.ismember_int64(x, set(y)) - else: - f = lambda x, y: lib.ismember(x, set(values)) + f = lambda x, y: np.in1d(x, y) + elif is_integer_dtype(comps): + try: + values = values.astype('int64', copy=False) + comps = comps.astype('int64', copy=False) + f = lambda x, y: htable.ismember_int64(x, y) + except (TypeError, ValueError): + values = values.astype(object) + comps = comps.astype(object) + + elif is_float_dtype(comps): + try: + values = values.astype('float64', copy=False) + comps = comps.astype('float64', copy=False) + checknull = isnull(values).any() + f = lambda x, y: htable.ismember_float64(x, y, checknull) + except (TypeError, ValueError): + values = values.astype(object) + comps = comps.astype(object) return f(comps, values) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 732d88b47ae2a..b49aa926d1923 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5358,8 +5358,8 @@ def isin(self, values): "you passed a " "{0!r}".format(type(values).__name__)) return DataFrame( - lib.ismember(self.values.ravel(), - set(values)).reshape(self.shape), self.index, + algorithms.isin(self.values.ravel(), + values).reshape(self.shape), self.index, self.columns) # ---------------------------------------------------------------------- diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index 978492131ca89..e6ae0605d4758 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -1392,7 +1392,7 @@ def _drop_from_level(self, labels, level): index = self.levels[i] values = index.get_indexer(labels) - mask = ~lib.ismember(self.labels[i], set(values)) + mask = ~algos.isin(self.labels[i], values) return self[mask] @@ -2463,7 +2463,8 @@ def _wrap_joined_index(self, joined, other): @Appender(Index.isin.__doc__) def isin(self, values, level=None): if level is None: - return lib.ismember(np.array(self), set(values)) + return algos.isin(self.values, + MultiIndex.from_tuples(values).values) else: num = self._get_level_number(level) levs = self.levels[num] diff --git a/pandas/indexes/numeric.py b/pandas/indexes/numeric.py index 2f897c81975c2..31258c785d9e8 100644 --- a/pandas/indexes/numeric.py +++ b/pandas/indexes/numeric.py @@ -1,13 +1,13 @@ import numpy as np -from pandas._libs import (lib, index as libindex, +from pandas._libs import (index as libindex, algos as libalgos, join as libjoin) from pandas.types.common import (is_dtype_equal, pandas_dtype, is_float_dtype, is_object_dtype, is_integer_dtype, is_scalar) -from pandas.types.missing import isnull from pandas.core.common import _asarray_tuplesafe, _values_from_object from pandas import compat +from pandas.core import algorithms from pandas.indexes.base import Index, InvalidIndexError, _index_shared_docs from pandas.util.decorators import Appender, cache_readonly import pandas.indexes.base as ibase @@ -379,11 +379,9 @@ def is_unique(self): @Appender(Index.isin.__doc__) def isin(self, values, level=None): - value_set = set(values) if level is not None: self._validate_index_level(level) - return lib.ismember_nans(np.array(self), value_set, - isnull(list(value_set)).any()) + return algorithms.isin(np.array(self), values) Float64Index._add_numeric_methods() diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 18343670fb39e..90d72c0bceeb7 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -26,6 +26,7 @@ from pandas.core.series import Series from pandas.core.frame import DataFrame from pandas.core.categorical import Categorical +from pandas.core import algorithms from pandas.core.common import AbstractMethodError from pandas.io.date_converters import generic_parser from pandas.io.common import (get_filepath_or_buffer, _validate_header_arg, @@ -1388,7 +1389,8 @@ def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False, try: values = lib.map_infer(values, conv_f) except ValueError: - mask = lib.ismember(values, na_values).view(np.uint8) + mask = algorithms.isin( + values, list(na_values)).view(np.uint8) values = lib.map_infer_mask(values, conv_f, mask) cvals, na_count = self._infer_types( @@ -1436,7 +1438,7 @@ def _infer_types(self, values, na_values, try_num_bool=True): na_count = 0 if issubclass(values.dtype.type, (np.number, np.bool_)): - mask = lib.ismember(values, na_values) + mask = algorithms.isin(values, list(na_values)) na_count = mask.sum() if na_count > 0: if is_integer_dtype(values): diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 7199a38bb7a80..c4dc10d8174cc 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1363,14 +1363,17 @@ def test_isin_nan(self): np.array([False, False])) tm.assert_numpy_array_equal(Index(['a', np.nan]).isin([pd.NaT]), np.array([False, False])) + # Float64Index overrides isin, so must be checked separately tm.assert_numpy_array_equal(Float64Index([1.0, np.nan]).isin([np.nan]), np.array([False, True])) tm.assert_numpy_array_equal( Float64Index([1.0, np.nan]).isin([float('nan')]), np.array([False, True])) + + # we cannot compare NaT with NaN tm.assert_numpy_array_equal(Float64Index([1.0, np.nan]).isin([pd.NaT]), - np.array([False, True])) + np.array([False, False])) def test_isin_level_kwarg(self): def check_idx(idx): diff --git a/pandas/tseries/tools.py b/pandas/tseries/tools.py index 093331e861fa7..5dc9746c6d6f9 100644 --- a/pandas/tseries/tools.py +++ b/pandas/tseries/tools.py @@ -13,6 +13,7 @@ from pandas.types.generic import (ABCIndexClass, ABCSeries, ABCDataFrame) from pandas.types.missing import notnull +from pandas.core import algorithms import pandas.compat as compat @@ -577,7 +578,7 @@ def calc_with_mask(carg, mask): # string with NaN-like try: - mask = ~lib.ismember(arg, tslib._nat_strings) + mask = ~algorithms.isin(arg, list(tslib._nat_strings)) return calc_with_mask(arg, mask) except: pass From a20009f7fe93c17c13447ba0aff9756b2b5d4863 Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Wed, 22 Mar 2017 09:03:32 +0100 Subject: [PATCH 258/353] BUG: Check that values for "nrows" and "chunksize" are valid (#15774) --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/io/parsers.py | 32 +++++++++++++++++++++----------- pandas/tests/io/parser/common.py | 17 ++++++++++++++++- 3 files changed, 38 insertions(+), 12 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index eeb568c2e2558..5ac7624856040 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -815,6 +815,7 @@ Bug Fixes - Bug in ``pd.read_fwf`` where the skiprows parameter was not being respected during column width inference (:issue:`11256`) - Bug in ``pd.read_csv()`` in which missing data was being improperly handled with ``usecols`` (:issue:`6710`) - Bug in ``pd.read_csv()`` in which a file containing a row with many columns followed by rows with fewer columns would cause a crash (:issue:`14125`) +- Added checks in ``pd.read_csv()`` ensuring that values for ``nrows`` and ``chunksize`` are valid (:issue:`15767`) - Bug in ``pd.tools.hashing.hash_pandas_object()`` in which hashing of categoricals depended on the ordering of categories, instead of just their values. (:issue:`15143`) - Bug in ``.groupby(..).resample()`` when passed the ``on=`` kwarg. (:issue:`15021`) - Bug in using ``__deepcopy__`` on empty NDFrame objects (:issue:`15370`) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 90d72c0bceeb7..af57cc3ce7950 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -345,24 +345,34 @@ """ % (_parser_params % (_fwf_widths, '')) -def _validate_nrows(nrows): +def _validate_integer(name, val, min_val=0): """ - Checks whether the 'nrows' parameter for parsing is either + Checks whether the 'name' parameter for parsing is either an integer OR float that can SAFELY be cast to an integer without losing accuracy. Raises a ValueError if that is not the case. + + Parameters + ---------- + name : string + Parameter name (used for error reporting) + val : int or float + The value to check + min_val : int + Minimum allowed value (val < min_val will result in a ValueError) """ - msg = "'nrows' must be an integer" + msg = "'{name:s}' must be an integer >={min_val:d}".format(name=name, + min_val=min_val) - if nrows is not None: - if is_float(nrows): - if int(nrows) != nrows: + if val is not None: + if is_float(val): + if int(val) != val: raise ValueError(msg) - nrows = int(nrows) - elif not is_integer(nrows): + val = int(val) + elif not (is_integer(val) and val >= min_val): raise ValueError(msg) - return nrows + return val def _read(filepath_or_buffer, kwds): @@ -384,8 +394,8 @@ def _read(filepath_or_buffer, kwds): # Extract some of the arguments (pass chunksize on). iterator = kwds.get('iterator', False) - chunksize = kwds.get('chunksize', None) - nrows = _validate_nrows(kwds.get('nrows', None)) + chunksize = _validate_integer('chunksize', kwds.get('chunksize', None), 1) + nrows = _validate_integer('nrows', kwds.get('nrows', None)) # Create the parser. parser = TextFileReader(filepath_or_buffer, **kwds) diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py index 24d15dcb96fe7..2c8bca490f274 100644 --- a/pandas/tests/io/parser/common.py +++ b/pandas/tests/io/parser/common.py @@ -384,7 +384,7 @@ def test_read_nrows(self): df = self.read_csv(StringIO(self.data1), nrows=3.0) tm.assert_frame_equal(df, expected) - msg = "must be an integer" + msg = r"'nrows' must be an integer >=0" with tm.assertRaisesRegexp(ValueError, msg): self.read_csv(StringIO(self.data1), nrows=1.2) @@ -392,6 +392,9 @@ def test_read_nrows(self): with tm.assertRaisesRegexp(ValueError, msg): self.read_csv(StringIO(self.data1), nrows='foo') + with tm.assertRaisesRegexp(ValueError, msg): + self.read_csv(StringIO(self.data1), nrows=-1) + def test_read_chunksize(self): reader = self.read_csv(StringIO(self.data1), index_col=0, chunksize=2) df = self.read_csv(StringIO(self.data1), index_col=0) @@ -402,6 +405,18 @@ def test_read_chunksize(self): tm.assert_frame_equal(chunks[1], df[2:4]) tm.assert_frame_equal(chunks[2], df[4:]) + # with invalid chunksize value: + msg = r"'chunksize' must be an integer >=1" + + with tm.assertRaisesRegexp(ValueError, msg): + self.read_csv(StringIO(self.data1), chunksize=1.3) + + with tm.assertRaisesRegexp(ValueError, msg): + self.read_csv(StringIO(self.data1), chunksize='foo') + + with tm.assertRaisesRegexp(ValueError, msg): + self.read_csv(StringIO(self.data1), chunksize=0) + def test_read_chunksize_and_nrows(self): # gh-15755 From bc1235e8277568b4d095b5fd4a5d7990a7fafefd Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 22 Mar 2017 07:53:46 -0400 Subject: [PATCH 259/353] COMPAT: 32-bit skips (#15776) closes #14183 --- pandas/tests/indexes/common.py | 1 - pandas/tests/indexes/period/test_period.py | 9 ++++++++- pandas/tests/test_algos.py | 4 +++- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index b1e6bd7520c69..e9122f7a17359 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -121,7 +121,6 @@ def test_reindex_base(self): idx.get_indexer(idx, method='invalid') def test_ndarray_compat_properties(self): - idx = self.create_index() self.assertTrue(idx.T.equals(idx)) self.assertTrue(idx.transpose().equals(idx)) diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 1739211982b10..4fbadfca06ede 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -1,3 +1,5 @@ +import pytest + import numpy as np from numpy.random import randn from datetime import timedelta @@ -6,7 +8,7 @@ from pandas.util import testing as tm from pandas import (PeriodIndex, period_range, notnull, DatetimeIndex, NaT, Index, Period, Int64Index, Series, DataFrame, date_range, - offsets) + offsets, compat) from ..datetimelike import DatetimeLike @@ -626,6 +628,11 @@ def test_shift_nat(self): tm.assert_index_equal(result, expected) self.assertEqual(result.name, expected.name) + def test_ndarray_compat_properties(self): + if compat.is_platform_32bit(): + pytest.skip("skipping on 32bit") + super(TestPeriodIndex, self).test_ndarray_compat_properties() + def test_shift_ndarray(self): idx = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-04'], freq='M', name='idx') diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 7a3cc3e2c3cd7..ce925f756edb7 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -648,7 +648,9 @@ def test_value_counts_uint64(self): expected = Series([1, 1], index=[-1, 2**63]) result = algos.value_counts(arr) - tm.assert_series_equal(result, expected) + # 32-bit linux has a different ordering + if not compat.is_platform_32bit(): + tm.assert_series_equal(result, expected) class TestDuplicated(tm.TestCase): From 2a3b05a3a7167c7b384375e9442c350f740e9629 Mon Sep 17 00:00:00 2001 From: Dominik Stanczak Date: Wed, 22 Mar 2017 07:55:29 -0400 Subject: [PATCH 260/353] CLN/INT: Rename _possibly to _maybe (GH15764) Also rename "private" functions in pandas.type.cast closes #15764 Author: Dominik Stanczak Closes #15771 from StanczakDominik/rename-possibly and squashes the following commits: 486b932 [Dominik Stanczak] Cleanup missed linting errors 188c48b [Dominik Stanczak] CLN/INT: Rename _possibly to _maybe --- pandas/computation/expr.py | 34 +++++----- pandas/core/algorithms.py | 6 +- pandas/core/categorical.py | 21 +++--- pandas/core/frame.py | 54 +++++++-------- pandas/core/generic.py | 6 +- pandas/core/groupby.py | 6 +- pandas/core/internals.py | 67 +++++++++--------- pandas/core/nanops.py | 4 +- pandas/core/ops.py | 10 +-- pandas/core/panel.py | 12 ++-- pandas/core/reshape.py | 4 +- pandas/core/series.py | 20 +++--- pandas/indexes/base.py | 6 +- pandas/indexes/frozen.py | 4 +- pandas/io/parsers.py | 6 +- pandas/sparse/array.py | 12 ++-- pandas/sparse/frame.py | 6 +- pandas/tests/types/test_cast.py | 116 ++++++++++++++++---------------- pandas/tools/util.py | 5 +- pandas/tseries/index.py | 2 +- pandas/tseries/tdi.py | 2 +- pandas/types/cast.py | 56 +++++++-------- 22 files changed, 228 insertions(+), 231 deletions(-) diff --git a/pandas/computation/expr.py b/pandas/computation/expr.py index a782287175327..e78806b38c667 100644 --- a/pandas/computation/expr.py +++ b/pandas/computation/expr.py @@ -348,7 +348,7 @@ def _rewrite_membership_op(self, node, left, right): op = self.visit(op_instance) return op, op_instance, left, right - def _possibly_transform_eq_ne(self, node, left=None, right=None): + def _maybe_transform_eq_ne(self, node, left=None, right=None): if left is None: left = self.visit(node.left, side='left') if right is None: @@ -357,7 +357,7 @@ def _possibly_transform_eq_ne(self, node, left=None, right=None): right) return op, op_class, left, right - def _possibly_downcast_constants(self, left, right): + def _maybe_downcast_constants(self, left, right): f32 = np.dtype(np.float32) if left.isscalar and not right.isscalar and right.return_type == f32: # right is a float32 array, left is a scalar @@ -370,7 +370,7 @@ def _possibly_downcast_constants(self, left, right): return left, right - def _possibly_eval(self, binop, eval_in_python): + def _maybe_eval(self, binop, eval_in_python): # eval `in` and `not in` (for now) in "partial" python space # things that can be evaluated in "eval" space will be turned into # temporary variables. for example, @@ -380,10 +380,10 @@ def _possibly_eval(self, binop, eval_in_python): return binop.evaluate(self.env, self.engine, self.parser, self.term_type, eval_in_python) - def _possibly_evaluate_binop(self, op, op_class, lhs, rhs, - eval_in_python=('in', 'not in'), - maybe_eval_in_python=('==', '!=', '<', '>', - '<=', '>=')): + def _maybe_evaluate_binop(self, op, op_class, lhs, rhs, + eval_in_python=('in', 'not in'), + maybe_eval_in_python=('==', '!=', '<', '>', + '<=', '>=')): res = op(lhs, rhs) if res.has_invalid_return_type: @@ -397,24 +397,24 @@ def _possibly_evaluate_binop(self, op, op_class, lhs, rhs, getattr(rhs, 'is_datetime', False)): # all date ops must be done in python bc numexpr doesn't work # well with NaT - return self._possibly_eval(res, self.binary_ops) + return self._maybe_eval(res, self.binary_ops) if res.op in eval_in_python: # "in"/"not in" ops are always evaluated in python - return self._possibly_eval(res, eval_in_python) + return self._maybe_eval(res, eval_in_python) elif self.engine != 'pytables': if (getattr(lhs, 'return_type', None) == object or getattr(rhs, 'return_type', None) == object): # evaluate "==" and "!=" in python if either of our operands # has an object return type - return self._possibly_eval(res, eval_in_python + - maybe_eval_in_python) + return self._maybe_eval(res, eval_in_python + + maybe_eval_in_python) return res def visit_BinOp(self, node, **kwargs): - op, op_class, left, right = self._possibly_transform_eq_ne(node) - left, right = self._possibly_downcast_constants(left, right) - return self._possibly_evaluate_binop(op, op_class, left, right) + op, op_class, left, right = self._maybe_transform_eq_ne(node) + left, right = self._maybe_downcast_constants(left, right) + return self._maybe_evaluate_binop(op, op_class, left, right) def visit_Div(self, node, **kwargs): truediv = self.env.scope['truediv'] @@ -662,9 +662,9 @@ def visitor(x, y): lhs = self._try_visit_binop(x) rhs = self._try_visit_binop(y) - op, op_class, lhs, rhs = self._possibly_transform_eq_ne(node, lhs, - rhs) - return self._possibly_evaluate_binop(op, node.op, lhs, rhs) + op, op_class, lhs, rhs = self._maybe_transform_eq_ne( + node, lhs, rhs) + return self._maybe_evaluate_binop(op, node.op, lhs, rhs) operands = node.values return reduce(visitor, operands) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 9a8d0a779105e..3b77bda6f69f0 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -7,7 +7,7 @@ import numpy as np from pandas import compat, _np_version_under1p8 -from pandas.types.cast import _maybe_promote +from pandas.types.cast import maybe_promote from pandas.types.generic import ABCSeries, ABCIndex from pandas.types.common import (is_unsigned_integer_dtype, is_signed_integer_dtype, @@ -1297,7 +1297,7 @@ def take_nd(arr, indexer, axis=0, out=None, fill_value=np.nan, mask_info=None, else: # check for promotion based on types only (do this first because # it's faster than computing a mask) - dtype, fill_value = _maybe_promote(arr.dtype, fill_value) + dtype, fill_value = maybe_promote(arr.dtype, fill_value) if dtype != arr.dtype and (out is None or out.dtype != dtype): # check if promotion is actually required based on indexer if mask_info is not None: @@ -1380,7 +1380,7 @@ def take_2d_multi(arr, indexer, out=None, fill_value=np.nan, mask_info=None, else: # check for promotion based on types only (do this first because # it's faster than computing a mask) - dtype, fill_value = _maybe_promote(arr.dtype, fill_value) + dtype, fill_value = maybe_promote(arr.dtype, fill_value) if dtype != arr.dtype and (out is None or out.dtype != dtype): # check if promotion is actually required based on indexer if mask_info is not None: diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index af51c7f2e2dc1..0e58c18631588 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -10,8 +10,8 @@ from pandas.types.generic import ABCSeries, ABCIndexClass, ABCCategoricalIndex from pandas.types.missing import isnull, notnull -from pandas.types.cast import (_possibly_infer_to_datetimelike, - _coerce_indexer_dtype) +from pandas.types.cast import (maybe_infer_to_datetimelike, + coerce_indexer_dtype) from pandas.types.dtypes import CategoricalDtype from pandas.types.common import (_ensure_int64, _ensure_object, @@ -237,7 +237,7 @@ def __init__(self, values, categories=None, ordered=False, fastpath=False): if fastpath: # fast path - self._codes = _coerce_indexer_dtype(values, categories) + self._codes = coerce_indexer_dtype(values, categories) self._categories = self._validate_categories( categories, fastpath=isinstance(categories, ABCIndexClass)) self._ordered = ordered @@ -266,8 +266,7 @@ def __init__(self, values, categories=None, ordered=False, fastpath=False): # correctly no need here this is an issue because _sanitize_array # also coerces np.nan to a string under certain versions of numpy # as well - values = _possibly_infer_to_datetimelike(values, - convert_dates=True) + values = maybe_infer_to_datetimelike(values, convert_dates=True) if not isinstance(values, np.ndarray): values = _convert_to_list_like(values) from pandas.core.series import _sanitize_array @@ -324,7 +323,7 @@ def __init__(self, values, categories=None, ordered=False, fastpath=False): self.set_ordered(ordered or False, inplace=True) self._categories = categories - self._codes = _coerce_indexer_dtype(codes, categories) + self._codes = coerce_indexer_dtype(codes, categories) @property def _constructor(self): @@ -877,7 +876,7 @@ def add_categories(self, new_categories, inplace=False): new_categories = list(self._categories) + list(new_categories) cat = self if inplace else self.copy() cat._categories = self._validate_categories(new_categories) - cat._codes = _coerce_indexer_dtype(cat._codes, new_categories) + cat._codes = coerce_indexer_dtype(cat._codes, new_categories) if not inplace: return cat @@ -961,7 +960,7 @@ def remove_unused_categories(self, inplace=False): idx, inv = idx[1:], inv - 1 cat._categories = cat.categories.take(idx) - cat._codes = _coerce_indexer_dtype(inv, self._categories) + cat._codes = coerce_indexer_dtype(inv, self._categories) if not inplace: return cat @@ -1065,8 +1064,8 @@ def __setstate__(self, state): state['_categories'] = self._validate_categories(state.pop( '_levels')) if '_codes' not in state and 'labels' in state: - state['_codes'] = _coerce_indexer_dtype(state.pop('labels'), - state['_categories']) + state['_codes'] = coerce_indexer_dtype( + state.pop('labels'), state['_categories']) # 0.16.0 ordered change if '_ordered' not in state: @@ -2062,7 +2061,7 @@ def _get_codes_for_values(values, categories): (_, _), cats = _get_data_algo(categories, _hashtables) t = hash_klass(len(cats)) t.map_locations(cats) - return _coerce_indexer_dtype(t.lookup(vals), cats) + return coerce_indexer_dtype(t.lookup(vals), cats) def _convert_to_list_like(list_like): diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b49aa926d1923..6b5e8e0799421 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -23,15 +23,15 @@ import numpy as np import numpy.ma as ma -from pandas.types.cast import (_maybe_upcast, _infer_dtype_from_scalar, - _possibly_cast_to_datetime, - _possibly_infer_to_datetimelike, - _possibly_convert_platform, - _possibly_downcast_to_dtype, - _invalidate_string_dtypes, - _coerce_to_dtypes, - _maybe_upcast_putmask, - _find_common_type) +from pandas.types.cast import (maybe_upcast, infer_dtype_from_scalar, + maybe_cast_to_datetime, + maybe_infer_to_datetimelike, + maybe_convert_platform, + maybe_downcast_to_dtype, + invalidate_string_dtypes, + coerce_to_dtypes, + maybe_upcast_putmask, + find_common_type) from pandas.types.common import (is_categorical_dtype, is_object_dtype, is_extension_type, @@ -275,7 +275,7 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, else: mask = ma.getmaskarray(data) if mask.any(): - data, fill_value = _maybe_upcast(data, copy=True) + data, fill_value = maybe_upcast(data, copy=True) data[mask] = fill_value else: data = data.copy() @@ -335,7 +335,7 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, if isinstance(data, compat.string_types) and dtype is None: dtype = np.object_ if dtype is None: - dtype, data = _infer_dtype_from_scalar(data) + dtype, data = infer_dtype_from_scalar(data) values = np.empty((len(index), len(columns)), dtype=dtype) values.fill(data) @@ -469,7 +469,7 @@ def _get_axes(N, K, index=index, columns=columns): # on the entire block; this is to convert if we have datetimelike's # embedded in an object type if dtype is None and is_object_dtype(values): - values = _possibly_infer_to_datetimelike(values) + values = maybe_infer_to_datetimelike(values) return create_block_manager_from_blocks([values], [columns, index]) @@ -2359,7 +2359,7 @@ def select_dtypes(self, include=None, exclude=None): include, exclude = map( lambda x: frozenset(map(_get_dtype_from_object, x)), selection) for dtypes in (include, exclude): - _invalidate_string_dtypes(dtypes) + invalidate_string_dtypes(dtypes) # can't both include AND exclude! if not include.isdisjoint(exclude): @@ -2659,7 +2659,7 @@ def reindexer(value): value = _sanitize_index(value, self.index, copy=False) if not isinstance(value, (np.ndarray, Index)): if isinstance(value, list) and len(value) > 0: - value = _possibly_convert_platform(value) + value = maybe_convert_platform(value) else: value = com._asarray_tuplesafe(value) elif value.ndim == 2: @@ -2671,13 +2671,13 @@ def reindexer(value): # possibly infer to datetimelike if is_object_dtype(value.dtype): - value = _possibly_infer_to_datetimelike(value) + value = maybe_infer_to_datetimelike(value) else: # upcast the scalar - dtype, value = _infer_dtype_from_scalar(value) + dtype, value = infer_dtype_from_scalar(value) value = np.repeat(value, len(self.index)).astype(dtype) - value = _possibly_cast_to_datetime(value, dtype) + value = maybe_cast_to_datetime(value, dtype) # return internal types directly if is_extension_type(value): @@ -3000,8 +3000,8 @@ def _maybe_casted_values(index, labels=None): else: values = values.take(labels) if mask.any(): - values, changed = _maybe_upcast_putmask(values, mask, - np.nan) + values, changed = maybe_upcast_putmask( + values, mask, np.nan) return values new_index = _default_index(len(new_obj)) @@ -3722,7 +3722,7 @@ def combine(self, other, func, fill_value=None, overwrite=True): # if we have different dtypes, possibily promote new_dtype = this_dtype if not is_dtype_equal(this_dtype, other_dtype): - new_dtype = _find_common_type([this_dtype, other_dtype]) + new_dtype = find_common_type([this_dtype, other_dtype]) if not is_dtype_equal(this_dtype, new_dtype): series = series.astype(new_dtype) if not is_dtype_equal(other_dtype, new_dtype): @@ -3743,13 +3743,13 @@ def combine(self, other, func, fill_value=None, overwrite=True): # try to downcast back to the original dtype if needs_i8_conversion_i: # ToDo: This conversion should be handled in - # _possibly_cast_to_datetime but the change affects lot... + # _maybe_cast_to_datetime but the change affects lot... if is_datetime64tz_dtype(new_dtype): arr = DatetimeIndex._simple_new(arr, tz=new_dtype.tz) else: - arr = _possibly_cast_to_datetime(arr, new_dtype) + arr = maybe_cast_to_datetime(arr, new_dtype) else: - arr = _possibly_downcast_to_dtype(arr, this_dtype) + arr = maybe_downcast_to_dtype(arr, this_dtype) result[col] = arr @@ -5003,7 +5003,7 @@ def f(x): # try to coerce to the original dtypes item by item if we can if axis == 0: - result = _coerce_to_dtypes(result, self.dtypes) + result = coerce_to_dtypes(result, self.dtypes) return Series(result, index=labels) @@ -5505,7 +5505,7 @@ def _prep_ndarray(values, copy=True): return np.empty((0, 0), dtype=object) def convert(v): - return _possibly_convert_platform(v) + return maybe_convert_platform(v) # we could have a 1-dim or 2-dim list here # this is equiv of np.asarray, but does object conversion @@ -5601,7 +5601,7 @@ def _masked_rec_array_to_mgr(data, index, columns, dtype, copy): for fv, arr, col in zip(fill_value, arrays, arr_columns): mask = ma.getmaskarray(data[col]) if mask.any(): - arr, fv = _maybe_upcast(arr, fill_value=fv, copy=True) + arr, fv = maybe_upcast(arr, fill_value=fv, copy=True) arr[mask] = fv new_arrays.append(arr) @@ -5699,7 +5699,7 @@ def _convert_object_array(content, columns, coerce_float=False, dtype=None): def convert(arr): if dtype != object and dtype != np.object: arr = lib.maybe_convert_objects(arr, try_float=coerce_float) - arr = _possibly_cast_to_datetime(arr, dtype) + arr = maybe_cast_to_datetime(arr, dtype) return arr arrays = [convert(arr) for arr in content] diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 1db9677659ca3..87052800b8fb5 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -23,7 +23,7 @@ is_list_like, is_dict_like, is_re_compilable) -from pandas.types.cast import _maybe_promote, _maybe_upcast_putmask +from pandas.types.cast import maybe_promote, maybe_upcast_putmask from pandas.types.missing import isnull, notnull from pandas.types.generic import ABCSeries, ABCPanel @@ -4956,10 +4956,10 @@ def _where(self, cond, other=np.nan, inplace=False, axis=None, level=None, # or not try_quick if not try_quick: - dtype, fill_value = _maybe_promote(other.dtype) + dtype, fill_value = maybe_promote(other.dtype) new_other = np.empty(len(icond), dtype=dtype) new_other.fill(fill_value) - _maybe_upcast_putmask(new_other, icond, other) + maybe_upcast_putmask(new_other, icond, other) other = new_other else: diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 4095a14aa5970..0a63981290df3 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -32,7 +32,7 @@ _ensure_object, _ensure_categorical, _ensure_float) -from pandas.types.cast import _possibly_downcast_to_dtype +from pandas.types.cast import maybe_downcast_to_dtype from pandas.types.missing import isnull, notnull, _maybe_fill from pandas.core.common import (_values_from_object, AbstractMethodError, @@ -783,7 +783,7 @@ def _try_cast(self, result, obj, numeric_only=False): if not is_scalar(result): if numeric_only and is_numeric_dtype(dtype) or not numeric_only: - result = _possibly_downcast_to_dtype(result, dtype) + result = maybe_downcast_to_dtype(result, dtype) return result @@ -2914,7 +2914,7 @@ def transform(self, func, *args, **kwargs): # the cython take a different path (and casting) dtype = self._selected_obj.dtype if is_numeric_dtype(dtype): - result = _possibly_downcast_to_dtype(result, dtype) + result = maybe_downcast_to_dtype(result, dtype) result.name = self._selected_obj.name result.index = self._selected_obj.index diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 6487c2108028e..8db801f8e7212 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -29,15 +29,15 @@ is_re_compilable, is_scalar, _get_dtype) -from pandas.types.cast import (_possibly_downcast_to_dtype, - _maybe_convert_string_to_object, - _maybe_upcast, - _maybe_convert_scalar, _maybe_promote, - _infer_dtype_from_scalar, - _soft_convert_objects, - _possibly_convert_objects, - _astype_nansafe, - _find_common_type) +from pandas.types.cast import (maybe_downcast_to_dtype, + maybe_convert_string_to_object, + maybe_upcast, + maybe_convert_scalar, maybe_promote, + infer_dtype_from_scalar, + soft_convert_objects, + maybe_convert_objects, + astype_nansafe, + find_common_type) from pandas.types.missing import (isnull, array_equivalent, _is_na_compat, is_null_datelike_scalar) @@ -429,7 +429,7 @@ def downcast(self, dtypes=None, mgr=None): if dtypes is None: dtypes = 'infer' - nv = _possibly_downcast_to_dtype(values, dtypes) + nv = maybe_downcast_to_dtype(values, dtypes) return self.make_block(nv, fastpath=True) # ndim > 1 @@ -455,7 +455,7 @@ def downcast(self, dtypes=None, mgr=None): if dtype is None: nv = _block_shape(values[i], ndim=self.ndim) else: - nv = _possibly_downcast_to_dtype(values[i], dtype) + nv = maybe_downcast_to_dtype(values[i], dtype) nv = _block_shape(nv, ndim=self.ndim) blocks.append(self.make_block(nv, fastpath=True, placement=[rl])) @@ -514,7 +514,7 @@ def _astype(self, dtype, copy=False, errors='raise', values=None, values = self.get_values(dtype=dtype) # _astype_nansafe works fine with 1-d only - values = _astype_nansafe(values.ravel(), dtype, copy=True) + values = astype_nansafe(values.ravel(), dtype, copy=True) values = values.reshape(self.shape) newb = make_block(values, placement=self.mgr_locs, dtype=dtype, @@ -578,7 +578,7 @@ def _try_cast_result(self, result, dtype=None): return result # may need to change the dtype here - return _possibly_downcast_to_dtype(result, dtype) + return maybe_downcast_to_dtype(result, dtype) def _try_operate(self, values): """ return a version to operate on as the input """ @@ -684,7 +684,7 @@ def setitem(self, indexer, value, mgr=None): # cast the values to a type that can hold nan (if necessary) if not self._can_hold_element(value): - dtype, _ = _maybe_promote(arr_value.dtype) + dtype, _ = maybe_promote(arr_value.dtype) values = values.astype(dtype) transf = (lambda x: x.T) if self.ndim == 2 else (lambda x: x) @@ -758,7 +758,7 @@ def _is_empty_indexer(indexer): value.dtype): dtype = value.dtype elif is_scalar(value): - dtype, _ = _infer_dtype_from_scalar(value) + dtype, _ = infer_dtype_from_scalar(value) else: dtype = 'infer' values = self._try_coerce_and_cast_result(values, dtype) @@ -871,7 +871,7 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0, n = np.array(new) # type of the new block - dtype, _ = _maybe_promote(n.dtype) + dtype, _ = maybe_promote(n.dtype) # we need to explicitly astype here to make a copy n = n.astype(dtype) @@ -1066,7 +1066,7 @@ def shift(self, periods, axis=0, mgr=None): # convert integer to float if necessary. need to do a lot more than # that, handle boolean etc also - new_values, fill_value = _maybe_upcast(self.values) + new_values, fill_value = maybe_upcast(self.values) # make sure array sent to np.roll is c_contiguous f_ordered = new_values.flags.f_contiguous @@ -1250,8 +1250,8 @@ def where(self, other, cond, align=True, raise_on_error=True, raise ValueError("where must have a condition that is ndarray " "like") - other = _maybe_convert_string_to_object(other) - other = _maybe_convert_scalar(other) + other = maybe_convert_string_to_object(other) + other = maybe_convert_scalar(other) # our where function def func(cond, values, other): @@ -1864,10 +1864,10 @@ def convert(self, *args, **kwargs): new_style |= kw in kwargs if new_style: - fn = _soft_convert_objects + fn = soft_convert_objects fn_inputs = new_inputs else: - fn = _possibly_convert_objects + fn = maybe_convert_objects fn_inputs = ['convert_dates', 'convert_numeric', 'convert_timedeltas'] fn_inputs += ['copy'] @@ -2643,7 +2643,7 @@ def shift(self, periods, axis=0, mgr=None): new_values = self.values.to_dense().take(indexer) # convert integer to float if necessary. need to do a lot more than # that, handle boolean etc also - new_values, fill_value = _maybe_upcast(new_values) + new_values, fill_value = maybe_upcast(new_values) if periods > 0: new_values[:periods] = fill_value else: @@ -3239,13 +3239,12 @@ def replace_list(self, src_list, dest_list, inplace=False, regex=False, def comp(s): if isnull(s): return isnull(values) - return _possibly_compare(values, getattr(s, 'asm8', s), - operator.eq) + return _maybe_compare(values, getattr(s, 'asm8', s), operator.eq) def _cast_scalar(block, scalar): - dtype, val = _infer_dtype_from_scalar(scalar, pandas_dtype=True) + dtype, val = infer_dtype_from_scalar(scalar, pandas_dtype=True) if not is_dtype_equal(block.dtype, dtype): - dtype = _find_common_type([block.dtype, dtype]) + dtype = find_common_type([block.dtype, dtype]) block = block.astype(dtype) # use original value val = scalar @@ -3920,7 +3919,7 @@ def _slice_take_blocks_ax0(self, slice_or_indexer, fill_tuple=None): return [blk.getitem_block(slobj, new_mgr_locs=slice(0, sllen))] elif not allow_fill or self.ndim == 1: if allow_fill and fill_tuple[0] is None: - _, fill_value = _maybe_promote(blk.dtype) + _, fill_value = maybe_promote(blk.dtype) fill_tuple = (fill_value, ) return [blk.take_nd(slobj, axis=0, @@ -3978,7 +3977,7 @@ def _make_na_block(self, placement, fill_value=None): block_shape = list(self.shape) block_shape[0] = len(placement) - dtype, fill_value = _infer_dtype_from_scalar(fill_value) + dtype, fill_value = infer_dtype_from_scalar(fill_value) block_values = np.empty(block_shape, dtype=dtype) block_values.fill(fill_value) return make_block(block_values, placement=placement) @@ -4497,7 +4496,7 @@ def _interleaved_dtype(blocks): if not len(blocks): return None - dtype = _find_common_type([b.dtype for b in blocks]) + dtype = find_common_type([b.dtype for b in blocks]) # only numpy compat if isinstance(dtype, ExtensionDtype): @@ -4587,7 +4586,7 @@ def _vstack(to_stack, dtype): return np.vstack(to_stack) -def _possibly_compare(a, b, op): +def _maybe_compare(a, b, op): is_a_array = isinstance(a, np.ndarray) is_b_array = isinstance(b, np.ndarray) @@ -4637,7 +4636,7 @@ def _block2d_to_blocknd(values, placement, shape, labels, ref_items): if mask.all(): pvalues = np.empty(panel_shape, dtype=values.dtype) else: - dtype, fill_value = _maybe_promote(values.dtype) + dtype, fill_value = maybe_promote(values.dtype) pvalues = np.empty(panel_shape, dtype=dtype) pvalues.fill(fill_value) @@ -4786,7 +4785,7 @@ def _putmask_smart(v, m, n): pass # change the dtype - dtype, _ = _maybe_promote(n.dtype) + dtype, _ = maybe_promote(n.dtype) if is_extension_type(v.dtype) and is_object_dtype(dtype): nv = v.get_values(dtype) @@ -5142,8 +5141,8 @@ def dtype(self): if not self.needs_filling: return self.block.dtype else: - return _get_dtype(_maybe_promote(self.block.dtype, - self.block.fill_value)[0]) + return _get_dtype(maybe_promote(self.block.dtype, + self.block.fill_value)[0]) return self._dtype diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index bb6c9b4546d0f..6ec94e69740a2 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -20,7 +20,7 @@ is_datetime64_dtype, is_timedelta64_dtype, is_datetime_or_timedelta_dtype, is_int_or_datetime_dtype, is_any_int_dtype) -from pandas.types.cast import _int64_max, _maybe_upcast_putmask +from pandas.types.cast import _int64_max, maybe_upcast_putmask from pandas.types.missing import isnull, notnull from pandas.core.common import _values_from_object @@ -200,7 +200,7 @@ def _get_values(values, skipna, fill_value=None, fill_value_typ=None, # promote if needed else: - values, changed = _maybe_upcast_putmask(values, mask, fill_value) + values, changed = maybe_upcast_putmask(values, mask, fill_value) elif copy: values = values.copy() diff --git a/pandas/core/ops.py b/pandas/core/ops.py index fe83f8a352851..5dac8a7e4d2da 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -33,7 +33,7 @@ is_list_like, is_scalar, _ensure_object) -from pandas.types.cast import _maybe_upcast_putmask, _find_common_type +from pandas.types.cast import maybe_upcast_putmask, find_common_type from pandas.types.generic import ABCSeries, ABCIndex, ABCPeriodIndex # ----------------------------------------------------------------------------- @@ -657,7 +657,7 @@ def na_op(x, y): raise_on_error=True, **eval_kwargs) except TypeError: if isinstance(y, (np.ndarray, ABCSeries, pd.Index)): - dtype = _find_common_type([x.dtype, y.dtype]) + dtype = find_common_type([x.dtype, y.dtype]) result = np.empty(x.size, dtype=dtype) mask = notnull(x) & notnull(y) result[mask] = op(x[mask], _values_from_object(y[mask])) @@ -670,7 +670,7 @@ def na_op(x, y): "{op}".format(typ=type(x).__name__, op=str_rep)) - result, changed = _maybe_upcast_putmask(result, ~mask, np.nan) + result, changed = maybe_upcast_putmask(result, ~mask, np.nan) result = missing.fill_zeros(result, x, y, name, fill_zeros) return result @@ -1204,7 +1204,7 @@ def na_op(x, y): "objects of type {x} and {y}".format( op=name, x=type(x), y=type(y))) - result, changed = _maybe_upcast_putmask(result, ~mask, np.nan) + result, changed = maybe_upcast_putmask(result, ~mask, np.nan) result = result.reshape(x.shape) result = missing.fill_zeros(result, x, y, name, fill_zeros) @@ -1329,7 +1329,7 @@ def na_op(x, y): result = np.empty(len(x), dtype=x.dtype) mask = notnull(x) result[mask] = op(x[mask], y) - result, changed = _maybe_upcast_putmask(result, ~mask, np.nan) + result, changed = maybe_upcast_putmask(result, ~mask, np.nan) result = missing.fill_zeros(result, x, y, name, fill_zeros) return result diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 5c7b66a2d1356..50ddc24ac9656 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -6,8 +6,8 @@ import numpy as np -from pandas.types.cast import (_infer_dtype_from_scalar, - _possibly_cast_item) +from pandas.types.cast import (infer_dtype_from_scalar, + maybe_cast_item) from pandas.types.common import (is_integer, is_list_like, is_string_like, is_scalar) from pandas.types.missing import notnull @@ -165,7 +165,7 @@ def _init_data(self, data, copy, dtype, **kwargs): dtype = None elif is_scalar(data) and all(x is not None for x in passed_axes): if dtype is None: - dtype, data = _infer_dtype_from_scalar(data) + dtype, data = infer_dtype_from_scalar(data) values = np.empty([len(x) for x in passed_axes], dtype=dtype) values.fill(data) mgr = self._init_matrix(values, passed_axes, dtype=dtype, @@ -533,11 +533,11 @@ def set_value(self, *args, **kwargs): d = self._construct_axes_dict_from(self, axes, copy=False) result = self.reindex(**d) args = list(args) - likely_dtype, args[-1] = _infer_dtype_from_scalar(args[-1]) + likely_dtype, args[-1] = infer_dtype_from_scalar(args[-1]) made_bigger = not np.array_equal(axes[0], self._info_axis) # how to make this logic simpler? if made_bigger: - _possibly_cast_item(result, args[0], likely_dtype) + maybe_cast_item(result, args[0], likely_dtype) return result.set_value(*args) @@ -568,7 +568,7 @@ def __setitem__(self, key, value): shape[1:], tuple(map(int, value.shape)))) mat = np.asarray(value) elif is_scalar(value): - dtype, value = _infer_dtype_from_scalar(value) + dtype, value = infer_dtype_from_scalar(value) mat = np.empty(shape[1:], dtype=dtype) mat.fill(value) else: diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index 1e685ae6895ad..2822d98b7c906 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -10,7 +10,7 @@ from pandas.types.common import (_ensure_platform_int, is_list_like, is_bool_dtype, needs_i8_conversion) -from pandas.types.cast import _maybe_promote +from pandas.types.cast import maybe_promote from pandas.types.missing import notnull import pandas.types.concat as _concat @@ -202,7 +202,7 @@ def get_new_values(self): dtype = values.dtype new_values = np.empty(result_shape, dtype=dtype) else: - dtype, fill_value = _maybe_promote(values.dtype, self.fill_value) + dtype, fill_value = maybe_promote(values.dtype, self.fill_value) new_values = np.empty(result_shape, dtype=dtype) new_values.fill(fill_value) diff --git a/pandas/core/series.py b/pandas/core/series.py index 4c51ced1845fe..0913592e055cd 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -29,9 +29,9 @@ _is_unorderable_exception, _ensure_platform_int) from pandas.types.generic import ABCSparseArray, ABCDataFrame -from pandas.types.cast import (_maybe_upcast, _infer_dtype_from_scalar, - _possibly_convert_platform, - _possibly_cast_to_datetime, _possibly_castable) +from pandas.types.cast import (maybe_upcast, infer_dtype_from_scalar, + maybe_convert_platform, + maybe_cast_to_datetime, maybe_castable) from pandas.types.missing import isnull, notnull from pandas.core.common import (is_bool_indexer, @@ -2794,7 +2794,7 @@ def _sanitize_array(data, index, dtype=None, copy=False, if isinstance(data, ma.MaskedArray): mask = ma.getmaskarray(data) if mask.any(): - data, fill_value = _maybe_upcast(data, copy=True) + data, fill_value = maybe_upcast(data, copy=True) data[mask] = fill_value else: data = data.copy() @@ -2803,11 +2803,11 @@ def _try_cast(arr, take_fast_path): # perf shortcut as this is the most common case if take_fast_path: - if _possibly_castable(arr) and not copy and dtype is None: + if maybe_castable(arr) and not copy and dtype is None: return arr try: - subarr = _possibly_cast_to_datetime(arr, dtype) + subarr = maybe_cast_to_datetime(arr, dtype) if not is_extension_type(subarr): subarr = np.array(subarr, dtype=dtype, copy=copy) except (ValueError, TypeError): @@ -2863,9 +2863,9 @@ def _try_cast(arr, take_fast_path): subarr = lib.maybe_convert_objects(subarr) else: - subarr = _possibly_convert_platform(data) + subarr = maybe_convert_platform(data) - subarr = _possibly_cast_to_datetime(subarr, dtype) + subarr = maybe_cast_to_datetime(subarr, dtype) else: subarr = _try_cast(data, False) @@ -2894,10 +2894,10 @@ def create_from_value(value, index, dtype): # figure out the dtype from the value (upcast if necessary) if dtype is None: - dtype, value = _infer_dtype_from_scalar(value) + dtype, value = infer_dtype_from_scalar(value) else: # need to possibly convert the value here - value = _possibly_cast_to_datetime(value, dtype) + value = maybe_cast_to_datetime(value, dtype) subarr = create_from_value(value, index, dtype) diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index d262ecd818f1d..54f73a2466286 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -2445,7 +2445,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): if tolerance is not None: tolerance = self._convert_tolerance(tolerance) - pself, ptarget = self._possibly_promote(target) + pself, ptarget = self._maybe_promote(target) if pself is not self or ptarget is not target: return pself.get_indexer(ptarget, method=method, limit=limit, tolerance=tolerance) @@ -2572,7 +2572,7 @@ def _filter_indexer_tolerance(self, target, indexer, tolerance): @Appender(_index_shared_docs['get_indexer_non_unique'] % _index_doc_kwargs) def get_indexer_non_unique(self, target): target = _ensure_index(target) - pself, ptarget = self._possibly_promote(target) + pself, ptarget = self._maybe_promote(target) if pself is not self or ptarget is not target: return pself.get_indexer_non_unique(ptarget) @@ -2595,7 +2595,7 @@ def get_indexer_for(self, target, **kwargs): indexer, _ = self.get_indexer_non_unique(target, **kwargs) return indexer - def _possibly_promote(self, other): + def _maybe_promote(self, other): # A hack, but it works from pandas.tseries.index import DatetimeIndex if self.inferred_type == 'date' and isinstance(other, DatetimeIndex): diff --git a/pandas/indexes/frozen.py b/pandas/indexes/frozen.py index e043ba64bbad7..97a1a3ea99e65 100644 --- a/pandas/indexes/frozen.py +++ b/pandas/indexes/frozen.py @@ -10,7 +10,7 @@ import numpy as np from pandas.core.base import PandasObject -from pandas.types.cast import _coerce_indexer_dtype +from pandas.types.cast import coerce_indexer_dtype from pandas.formats.printing import pprint_thing @@ -119,7 +119,7 @@ def __unicode__(self): def _ensure_frozen(array_like, categories, copy=False): - array_like = _coerce_indexer_dtype(array_like, categories) + array_like = coerce_indexer_dtype(array_like, categories) array_like = array_like.view(FrozenNDArray) if copy: array_like = array_like.copy() diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index af57cc3ce7950..f7b2d75c19304 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -21,7 +21,7 @@ is_object_dtype, is_string_dtype, is_scalar, is_categorical_dtype) from pandas.types.missing import isnull -from pandas.types.cast import _astype_nansafe +from pandas.types.cast import astype_nansafe from pandas.core.index import Index, MultiIndex, RangeIndex from pandas.core.series import Series from pandas.core.frame import DataFrame @@ -1498,11 +1498,11 @@ def _cast_types(self, values, cast_type, column): # c-parser which parses all categories # as strings if not is_object_dtype(values): - values = _astype_nansafe(values, str) + values = astype_nansafe(values, str) values = Categorical(values) else: try: - values = _astype_nansafe(values, cast_type, copy=True) + values = astype_nansafe(values, cast_type, copy=True) except ValueError: raise ValueError("Unable to convert column %s to " "type %s" % (column, cast_type)) diff --git a/pandas/sparse/array.py b/pandas/sparse/array.py index 5f4c07971d37e..f149e724c19c3 100644 --- a/pandas/sparse/array.py +++ b/pandas/sparse/array.py @@ -22,8 +22,8 @@ is_list_like, is_string_dtype, is_scalar, is_dtype_equal) -from pandas.types.cast import (_possibly_convert_platform, _maybe_promote, - _astype_nansafe, _find_common_type) +from pandas.types.cast import (maybe_convert_platform, maybe_promote, + astype_nansafe, find_common_type) from pandas.types.missing import isnull, notnull, na_value_for_dtype from pandas.sparse import libsparse as splib @@ -93,7 +93,7 @@ def _sparse_array_op(left, right, op, name, series=False): # dtype used to find corresponding sparse method if not is_dtype_equal(left.dtype, right.dtype): - dtype = _find_common_type([left.dtype, right.dtype]) + dtype = find_common_type([left.dtype, right.dtype]) left = left.astype(dtype) right = right.astype(dtype) else: @@ -370,7 +370,7 @@ def fill_value(self, value): if not is_scalar(value): raise ValueError('fill_value must be a scalar') # if the specified value triggers type promotion, raise ValueError - new_dtype, fill_value = _maybe_promote(self.dtype, value) + new_dtype, fill_value = maybe_promote(self.dtype, value) if is_dtype_equal(self.dtype, new_dtype): self._fill_value = fill_value else: @@ -532,7 +532,7 @@ def __setslice__(self, i, j, value): def astype(self, dtype=None, copy=True): dtype = np.dtype(dtype) - sp_values = _astype_nansafe(self.sp_values, dtype, copy=copy) + sp_values = astype_nansafe(self.sp_values, dtype, copy=copy) try: if is_bool_dtype(dtype): # to avoid np.bool_ dtype @@ -736,7 +736,7 @@ def _sanitize_values(arr): pass elif is_list_like(arr) and len(arr) > 0: - arr = _possibly_convert_platform(arr) + arr = maybe_convert_platform(arr) else: arr = np.asarray(arr) diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py index a21f64f524a0a..41f301f263374 100644 --- a/pandas/sparse/frame.py +++ b/pandas/sparse/frame.py @@ -11,7 +11,7 @@ import numpy as np from pandas.types.missing import isnull, notnull -from pandas.types.cast import _maybe_upcast, _find_common_type +from pandas.types.cast import maybe_upcast, find_common_type from pandas.types.common import _ensure_platform_int, is_scipy_sparse from pandas.core.common import _try_sort @@ -250,7 +250,7 @@ def to_coo(self): except ImportError: raise ImportError('Scipy is not installed') - dtype = _find_common_type(self.dtypes) + dtype = find_common_type(self.dtypes) cols, rows, datas = [], [], [] for col, name in enumerate(self): s = self[name] @@ -635,7 +635,7 @@ def _reindex_index(self, index, method, copy, level, fill_value=np.nan, new = new.values # convert integer to float if necessary. need to do a lot # more than that, handle boolean etc also - new, fill_value = _maybe_upcast(new, fill_value=fill_value) + new, fill_value = maybe_upcast(new, fill_value=fill_value) np.putmask(new, mask, fill_value) new_series[col] = new diff --git a/pandas/tests/types/test_cast.py b/pandas/tests/types/test_cast.py index d7b086daea1e3..dd4ea3bb02be9 100644 --- a/pandas/tests/types/test_cast.py +++ b/pandas/tests/types/test_cast.py @@ -9,33 +9,33 @@ import numpy as np from pandas import Timedelta, Timestamp, DatetimeIndex -from pandas.types.cast import (_possibly_downcast_to_dtype, - _possibly_convert_objects, - _infer_dtype_from_scalar, - _maybe_convert_string_to_object, - _maybe_convert_scalar, - _find_common_type) +from pandas.types.cast import (maybe_downcast_to_dtype, + maybe_convert_objects, + infer_dtype_from_scalar, + maybe_convert_string_to_object, + maybe_convert_scalar, + find_common_type) from pandas.types.dtypes import (CategoricalDtype, DatetimeTZDtype, PeriodDtype) from pandas.util import testing as tm -class TestPossiblyDowncast(tm.TestCase): +class TestMaybeDowncast(tm.TestCase): def test_downcast_conv(self): # test downcasting arr = np.array([8.5, 8.6, 8.7, 8.8, 8.9999999999995]) - result = _possibly_downcast_to_dtype(arr, 'infer') + result = maybe_downcast_to_dtype(arr, 'infer') assert (np.array_equal(result, arr)) arr = np.array([8., 8., 8., 8., 8.9999999999995]) - result = _possibly_downcast_to_dtype(arr, 'infer') + result = maybe_downcast_to_dtype(arr, 'infer') expected = np.array([8, 8, 8, 8, 9]) assert (np.array_equal(result, expected)) arr = np.array([8., 8., 8., 8., 9.0000000000005]) - result = _possibly_downcast_to_dtype(arr, 'infer') + result = maybe_downcast_to_dtype(arr, 'infer') expected = np.array([8, 8, 8, 8, 9]) assert (np.array_equal(result, expected)) @@ -44,41 +44,41 @@ def test_downcast_conv(self): expected = np.array([1, 2]) for dtype in [np.float64, object, np.int64]: arr = np.array([1.0, 2.0], dtype=dtype) - result = _possibly_downcast_to_dtype(arr, 'infer') + result = maybe_downcast_to_dtype(arr, 'infer') tm.assert_almost_equal(result, expected, check_dtype=False) for dtype in [np.float64, object]: expected = np.array([1.0, 2.0, np.nan], dtype=dtype) arr = np.array([1.0, 2.0, np.nan], dtype=dtype) - result = _possibly_downcast_to_dtype(arr, 'infer') + result = maybe_downcast_to_dtype(arr, 'infer') tm.assert_almost_equal(result, expected) # empties for dtype in [np.int32, np.float64, np.float32, np.bool_, np.int64, object]: arr = np.array([], dtype=dtype) - result = _possibly_downcast_to_dtype(arr, 'int64') + result = maybe_downcast_to_dtype(arr, 'int64') tm.assert_almost_equal(result, np.array([], dtype=np.int64)) assert result.dtype == np.int64 def test_datetimelikes_nan(self): arr = np.array([1, 2, np.nan]) exp = np.array([1, 2, np.datetime64('NaT')], dtype='datetime64[ns]') - res = _possibly_downcast_to_dtype(arr, 'datetime64[ns]') + res = maybe_downcast_to_dtype(arr, 'datetime64[ns]') tm.assert_numpy_array_equal(res, exp) exp = np.array([1, 2, np.timedelta64('NaT')], dtype='timedelta64[ns]') - res = _possibly_downcast_to_dtype(arr, 'timedelta64[ns]') + res = maybe_downcast_to_dtype(arr, 'timedelta64[ns]') tm.assert_numpy_array_equal(res, exp) def test_datetime_with_timezone(self): # GH 15426 ts = Timestamp("2016-01-01 12:00:00", tz='US/Pacific') exp = DatetimeIndex([ts, ts]) - res = _possibly_downcast_to_dtype(exp, exp.dtype) + res = maybe_downcast_to_dtype(exp, exp.dtype) tm.assert_index_equal(res, exp) - res = _possibly_downcast_to_dtype(exp.asi8, exp.dtype) + res = maybe_downcast_to_dtype(exp.asi8, exp.dtype) tm.assert_index_equal(res, exp) @@ -91,121 +91,121 @@ def test_infer_dtype_from_scalar(self): for dtypec in [np.uint8, np.int8, np.uint16, np.int16, np.uint32, np.int32, np.uint64, np.int64]: data = dtypec(12) - dtype, val = _infer_dtype_from_scalar(data) + dtype, val = infer_dtype_from_scalar(data) self.assertEqual(dtype, type(data)) data = 12 - dtype, val = _infer_dtype_from_scalar(data) + dtype, val = infer_dtype_from_scalar(data) self.assertEqual(dtype, np.int64) for dtypec in [np.float16, np.float32, np.float64]: data = dtypec(12) - dtype, val = _infer_dtype_from_scalar(data) + dtype, val = infer_dtype_from_scalar(data) self.assertEqual(dtype, dtypec) data = np.float(12) - dtype, val = _infer_dtype_from_scalar(data) + dtype, val = infer_dtype_from_scalar(data) self.assertEqual(dtype, np.float64) for data in [True, False]: - dtype, val = _infer_dtype_from_scalar(data) + dtype, val = infer_dtype_from_scalar(data) self.assertEqual(dtype, np.bool_) for data in [np.complex64(1), np.complex128(1)]: - dtype, val = _infer_dtype_from_scalar(data) + dtype, val = infer_dtype_from_scalar(data) self.assertEqual(dtype, np.complex_) import datetime for data in [np.datetime64(1, 'ns'), Timestamp(1), datetime.datetime(2000, 1, 1, 0, 0)]: - dtype, val = _infer_dtype_from_scalar(data) + dtype, val = infer_dtype_from_scalar(data) self.assertEqual(dtype, 'M8[ns]') for data in [np.timedelta64(1, 'ns'), Timedelta(1), datetime.timedelta(1)]: - dtype, val = _infer_dtype_from_scalar(data) + dtype, val = infer_dtype_from_scalar(data) self.assertEqual(dtype, 'm8[ns]') for data in [datetime.date(2000, 1, 1), Timestamp(1, tz='US/Eastern'), 'foo']: - dtype, val = _infer_dtype_from_scalar(data) + dtype, val = infer_dtype_from_scalar(data) self.assertEqual(dtype, np.object_) class TestMaybe(tm.TestCase): def test_maybe_convert_string_to_array(self): - result = _maybe_convert_string_to_object('x') + result = maybe_convert_string_to_object('x') tm.assert_numpy_array_equal(result, np.array(['x'], dtype=object)) self.assertTrue(result.dtype == object) - result = _maybe_convert_string_to_object(1) + result = maybe_convert_string_to_object(1) self.assertEqual(result, 1) arr = np.array(['x', 'y'], dtype=str) - result = _maybe_convert_string_to_object(arr) + result = maybe_convert_string_to_object(arr) tm.assert_numpy_array_equal(result, np.array(['x', 'y'], dtype=object)) self.assertTrue(result.dtype == object) # unicode arr = np.array(['x', 'y']).astype('U') - result = _maybe_convert_string_to_object(arr) + result = maybe_convert_string_to_object(arr) tm.assert_numpy_array_equal(result, np.array(['x', 'y'], dtype=object)) self.assertTrue(result.dtype == object) # object arr = np.array(['x', 2], dtype=object) - result = _maybe_convert_string_to_object(arr) + result = maybe_convert_string_to_object(arr) tm.assert_numpy_array_equal(result, np.array(['x', 2], dtype=object)) self.assertTrue(result.dtype == object) def test_maybe_convert_scalar(self): # pass thru - result = _maybe_convert_scalar('x') + result = maybe_convert_scalar('x') self.assertEqual(result, 'x') - result = _maybe_convert_scalar(np.array([1])) + result = maybe_convert_scalar(np.array([1])) self.assertEqual(result, np.array([1])) # leave scalar dtype - result = _maybe_convert_scalar(np.int64(1)) + result = maybe_convert_scalar(np.int64(1)) self.assertEqual(result, np.int64(1)) - result = _maybe_convert_scalar(np.int32(1)) + result = maybe_convert_scalar(np.int32(1)) self.assertEqual(result, np.int32(1)) - result = _maybe_convert_scalar(np.float32(1)) + result = maybe_convert_scalar(np.float32(1)) self.assertEqual(result, np.float32(1)) - result = _maybe_convert_scalar(np.int64(1)) + result = maybe_convert_scalar(np.int64(1)) self.assertEqual(result, np.float64(1)) # coerce - result = _maybe_convert_scalar(1) + result = maybe_convert_scalar(1) self.assertEqual(result, np.int64(1)) - result = _maybe_convert_scalar(1.0) + result = maybe_convert_scalar(1.0) self.assertEqual(result, np.float64(1)) - result = _maybe_convert_scalar(Timestamp('20130101')) + result = maybe_convert_scalar(Timestamp('20130101')) self.assertEqual(result, Timestamp('20130101').value) - result = _maybe_convert_scalar(datetime(2013, 1, 1)) + result = maybe_convert_scalar(datetime(2013, 1, 1)) self.assertEqual(result, Timestamp('20130101').value) - result = _maybe_convert_scalar(Timedelta('1 day 1 min')) + result = maybe_convert_scalar(Timedelta('1 day 1 min')) self.assertEqual(result, Timedelta('1 day 1 min').value) class TestConvert(tm.TestCase): - def test_possibly_convert_objects_copy(self): + def test_maybe_convert_objects_copy(self): values = np.array([1, 2]) - out = _possibly_convert_objects(values, copy=False) + out = maybe_convert_objects(values, copy=False) self.assertTrue(values is out) - out = _possibly_convert_objects(values, copy=True) + out = maybe_convert_objects(values, copy=True) self.assertTrue(values is not out) values = np.array(['apply', 'banana']) - out = _possibly_convert_objects(values, copy=False) + out = maybe_convert_objects(values, copy=False) self.assertTrue(values is out) - out = _possibly_convert_objects(values, copy=True) + out = maybe_convert_objects(values, copy=True) self.assertTrue(values is not out) @@ -267,34 +267,34 @@ def test_numpy_dtypes(self): ((np.dtype('datetime64[ns]'), np.int64), np.object) ) for src, common in testcases: - self.assertEqual(_find_common_type(src), common) + self.assertEqual(find_common_type(src), common) with tm.assertRaises(ValueError): # empty - _find_common_type([]) + find_common_type([]) def test_categorical_dtype(self): dtype = CategoricalDtype() - self.assertEqual(_find_common_type([dtype]), 'category') - self.assertEqual(_find_common_type([dtype, dtype]), 'category') - self.assertEqual(_find_common_type([np.object, dtype]), np.object) + self.assertEqual(find_common_type([dtype]), 'category') + self.assertEqual(find_common_type([dtype, dtype]), 'category') + self.assertEqual(find_common_type([np.object, dtype]), np.object) def test_datetimetz_dtype(self): dtype = DatetimeTZDtype(unit='ns', tz='US/Eastern') - self.assertEqual(_find_common_type([dtype, dtype]), + self.assertEqual(find_common_type([dtype, dtype]), 'datetime64[ns, US/Eastern]') for dtype2 in [DatetimeTZDtype(unit='ns', tz='Asia/Tokyo'), np.dtype('datetime64[ns]'), np.object, np.int64]: - self.assertEqual(_find_common_type([dtype, dtype2]), np.object) - self.assertEqual(_find_common_type([dtype2, dtype]), np.object) + self.assertEqual(find_common_type([dtype, dtype2]), np.object) + self.assertEqual(find_common_type([dtype2, dtype]), np.object) def test_period_dtype(self): dtype = PeriodDtype(freq='D') - self.assertEqual(_find_common_type([dtype, dtype]), 'period[D]') + self.assertEqual(find_common_type([dtype, dtype]), 'period[D]') for dtype2 in [DatetimeTZDtype(unit='ns', tz='Asia/Tokyo'), PeriodDtype(freq='2D'), PeriodDtype(freq='H'), np.dtype('datetime64[ns]'), np.object, np.int64]: - self.assertEqual(_find_common_type([dtype, dtype2]), np.object) - self.assertEqual(_find_common_type([dtype2, dtype]), np.object) + self.assertEqual(find_common_type([dtype, dtype2]), np.object) + self.assertEqual(find_common_type([dtype2, dtype]), np.object) diff --git a/pandas/tools/util.py b/pandas/tools/util.py index bf78a9dfb65cc..263d2f16a4216 100644 --- a/pandas/tools/util.py +++ b/pandas/tools/util.py @@ -9,7 +9,7 @@ is_decimal, is_scalar as isscalar) -from pandas.types.cast import _possibly_downcast_to_dtype +from pandas.types.cast import maybe_downcast_to_dtype import pandas as pd from pandas.compat import reduce @@ -226,8 +226,7 @@ def to_numeric(arg, errors='raise', downcast=None): # from smallest to largest for dtype in typecodes: if np.dtype(dtype).itemsize <= values.dtype.itemsize: - values = _possibly_downcast_to_dtype( - values, dtype) + values = maybe_downcast_to_dtype(values, dtype) # successful conversion if values.dtype == dtype: diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index f80618ef34373..983c1a4cd9de9 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -1329,7 +1329,7 @@ def _partial_date_slice(self, reso, parsed, use_lhs=True, use_rhs=True): # try to find a the dates return (lhs_mask & rhs_mask).nonzero()[0] - def _possibly_promote(self, other): + def _maybe_promote(self, other): if other.inferred_type == 'date': other = DatetimeIndex(other) return self, other diff --git a/pandas/tseries/tdi.py b/pandas/tseries/tdi.py index f47d80a31b174..13d844bb6a399 100644 --- a/pandas/tseries/tdi.py +++ b/pandas/tseries/tdi.py @@ -623,7 +623,7 @@ def intersection(self, other): left_chunk = left.values[lslice] return self._shallow_copy(left_chunk) - def _possibly_promote(self, other): + def _maybe_promote(self, other): if other.inferred_type == 'timedelta': other = TimedeltaIndex(other) return self, other diff --git a/pandas/types/cast.py b/pandas/types/cast.py index 0e26cd085db5a..91c7d287d6d46 100644 --- a/pandas/types/cast.py +++ b/pandas/types/cast.py @@ -32,7 +32,7 @@ _int64_max = np.iinfo(np.int64).max -def _possibly_convert_platform(values): +def maybe_convert_platform(values): """ try to do platform conversion, allow ndarray or list here """ if isinstance(values, (list, tuple)): @@ -45,7 +45,7 @@ def _possibly_convert_platform(values): return values -def _possibly_downcast_to_dtype(result, dtype): +def maybe_downcast_to_dtype(result, dtype): """ try to cast to the specified dtype (e.g. convert back to bool/int or could be an astype of float64->float32 """ @@ -142,7 +142,7 @@ def trans(x): # noqa return result -def _maybe_upcast_putmask(result, mask, other): +def maybe_upcast_putmask(result, mask, other): """ A safe version of putmask that potentially upcasts the result @@ -193,7 +193,7 @@ def changeit(): # we are forced to change the dtype of the result as the input # isn't compatible - r, _ = _maybe_upcast(result, fill_value=other, copy=True) + r, _ = maybe_upcast(result, fill_value=other, copy=True) np.place(r, mask, other) return r, True @@ -203,7 +203,7 @@ def changeit(): # upcast (possibly), otherwise we DON't want to upcast (e.g. if we # have values, say integers, in the success portion then it's ok to not # upcast) - new_dtype, _ = _maybe_promote(result.dtype, other) + new_dtype, _ = maybe_promote(result.dtype, other) if new_dtype != result.dtype: # we have a scalar or len 0 ndarray @@ -227,7 +227,7 @@ def changeit(): return result, False -def _maybe_promote(dtype, fill_value=np.nan): +def maybe_promote(dtype, fill_value=np.nan): # if we passed an array here, determine the fill value by dtype if isinstance(fill_value, np.ndarray): @@ -312,7 +312,7 @@ def _maybe_promote(dtype, fill_value=np.nan): return dtype, fill_value -def _infer_dtype_from_scalar(val, pandas_dtype=False): +def infer_dtype_from_scalar(val, pandas_dtype=False): """ interpret the dtype from a scalar @@ -387,7 +387,7 @@ def _infer_dtype_from_scalar(val, pandas_dtype=False): return dtype, val -def _maybe_upcast(values, fill_value=np.nan, dtype=None, copy=False): +def maybe_upcast(values, fill_value=np.nan, dtype=None, copy=False): """ provide explict type promotion and coercion Parameters @@ -404,7 +404,7 @@ def _maybe_upcast(values, fill_value=np.nan, dtype=None, copy=False): else: if dtype is None: dtype = values.dtype - new_dtype, fill_value = _maybe_promote(dtype, fill_value) + new_dtype, fill_value = maybe_promote(dtype, fill_value) if new_dtype != values.dtype: values = values.astype(new_dtype) elif copy: @@ -413,7 +413,7 @@ def _maybe_upcast(values, fill_value=np.nan, dtype=None, copy=False): return values, fill_value -def _possibly_cast_item(obj, item, dtype): +def maybe_cast_item(obj, item, dtype): chunk = obj[item] if chunk.values.dtype != dtype: @@ -423,7 +423,7 @@ def _possibly_cast_item(obj, item, dtype): raise ValueError("Unexpected dtype encountered: %s" % dtype) -def _invalidate_string_dtypes(dtype_set): +def invalidate_string_dtypes(dtype_set): """Change string like dtypes to object for ``DataFrame.select_dtypes()``. """ @@ -432,7 +432,7 @@ def _invalidate_string_dtypes(dtype_set): raise TypeError("string dtypes are not allowed, use 'object' instead") -def _maybe_convert_string_to_object(values): +def maybe_convert_string_to_object(values): """ Convert string-like and string-like array to convert object dtype. @@ -446,13 +446,13 @@ def _maybe_convert_string_to_object(values): return values -def _maybe_convert_scalar(values): +def maybe_convert_scalar(values): """ Convert a python scalar to the appropriate numpy dtype if possible This avoids numpy directly converting according to platform preferences """ if is_scalar(values): - dtype, values = _infer_dtype_from_scalar(values) + dtype, values = infer_dtype_from_scalar(values) try: values = dtype(values) except TypeError: @@ -460,7 +460,7 @@ def _maybe_convert_scalar(values): return values -def _coerce_indexer_dtype(indexer, categories): +def coerce_indexer_dtype(indexer, categories): """ coerce the indexer input array to the smallest dtype possible """ l = len(categories) if l < _int8_max: @@ -472,7 +472,7 @@ def _coerce_indexer_dtype(indexer, categories): return _ensure_int64(indexer) -def _coerce_to_dtypes(result, dtypes): +def coerce_to_dtypes(result, dtypes): """ given a dtypes and a result set, coerce the result elements to the dtypes @@ -507,7 +507,7 @@ def conv(r, dtype): return [conv(r, dtype) for r, dtype in zip(result, dtypes)] -def _astype_nansafe(arr, dtype, copy=True): +def astype_nansafe(arr, dtype, copy=True): """ return a view if copy is False, but need to be very careful as the result shape could change! """ if not isinstance(dtype, np.dtype): @@ -564,8 +564,8 @@ def _astype_nansafe(arr, dtype, copy=True): return arr.view(dtype) -def _possibly_convert_objects(values, convert_dates=True, convert_numeric=True, - convert_timedeltas=True, copy=True): +def maybe_convert_objects(values, convert_dates=True, convert_numeric=True, + convert_timedeltas=True, copy=True): """ if we have an object dtype, try to coerce dates and/or numbers """ # if we have passed in a list or scalar @@ -579,8 +579,8 @@ def _possibly_convert_objects(values, convert_dates=True, convert_numeric=True, # we take an aggressive stance and convert to datetime64[ns] if convert_dates == 'coerce': - new_values = _possibly_cast_to_datetime(values, 'M8[ns]', - errors='coerce') + new_values = maybe_cast_to_datetime( + values, 'M8[ns]', errors='coerce') # if we are all nans then leave me alone if not isnull(new_values).all(): @@ -627,8 +627,8 @@ def _possibly_convert_objects(values, convert_dates=True, convert_numeric=True, return values -def _soft_convert_objects(values, datetime=True, numeric=True, timedelta=True, - coerce=False, copy=True): +def soft_convert_objects(values, datetime=True, numeric=True, timedelta=True, + coerce=False, copy=True): """ if we have an object dtype, try to coerce dates and/or numbers """ conversion_count = sum((datetime, numeric, timedelta)) @@ -683,7 +683,7 @@ def _soft_convert_objects(values, datetime=True, numeric=True, timedelta=True, return values -def _possibly_castable(arr): +def maybe_castable(arr): # return False to force a non-fastpath # check datetime64[ns]/timedelta64[ns] are valid @@ -695,7 +695,7 @@ def _possibly_castable(arr): return arr.dtype.name not in _POSSIBLY_CAST_DTYPES -def _possibly_infer_to_datetimelike(value, convert_dates=False): +def maybe_infer_to_datetimelike(value, convert_dates=False): """ we might have a array (or single object) that is datetime like, and no dtype is passed don't change the value unless we find a @@ -788,7 +788,7 @@ def _try_timedelta(v): return value -def _possibly_cast_to_datetime(value, dtype, errors='raise'): +def maybe_cast_to_datetime(value, dtype, errors='raise'): """ try to cast the array/value to a datetimelike dtype, converting float nan to iNaT """ @@ -886,12 +886,12 @@ def _possibly_cast_to_datetime(value, dtype, errors='raise'): # conversion elif not (is_array and not (issubclass(value.dtype.type, np.integer) or value.dtype == np.object_)): - value = _possibly_infer_to_datetimelike(value) + value = maybe_infer_to_datetimelike(value) return value -def _find_common_type(types): +def find_common_type(types): """ Find a common data type among the given dtypes. From fb7af6e257d5ca162487ea417eae675e3edbe271 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 22 Mar 2017 07:58:28 -0400 Subject: [PATCH 261/353] CLN: move groupby algos separate cython lib - separate out groupby algorithms to separate lib - release GIL on median - release GIL on is_lexsorted / fix memory leak - release GIL on nancorr Author: Jeff Reback Closes #15775 from jreback/groupby and squashes the following commits: 4e2bfec [Jeff Reback] release GIL on median release GIL on is_lexsorted / fix memory leak release GIL on nancorr ce28bb5 [Jeff Reback] CLN: separate out groupby algorithms to separate lib --- pandas/_libs/algos.pxd | 13 + pandas/_libs/algos.pyx | 530 +++++------------- pandas/_libs/groupby.pyx | 291 ++++++++++ ...by_helper.pxi.in => groupby_helper.pxi.in} | 18 +- pandas/core/groupby.py | 10 +- pandas/tests/groupby/test_bin_groupby.py | 5 +- pandas/tests/groupby/test_transform.py | 14 +- pandas/tests/test_algos.py | 7 +- setup.py | 8 +- 9 files changed, 474 insertions(+), 422 deletions(-) create mode 100644 pandas/_libs/algos.pxd create mode 100644 pandas/_libs/groupby.pyx rename pandas/_libs/{algos_groupby_helper.pxi.in => groupby_helper.pxi.in} (98%) diff --git a/pandas/_libs/algos.pxd b/pandas/_libs/algos.pxd new file mode 100644 index 0000000000000..6d80e6f0073eb --- /dev/null +++ b/pandas/_libs/algos.pxd @@ -0,0 +1,13 @@ +from util cimport numeric +from numpy cimport float64_t, double_t + +cpdef numeric kth_smallest(numeric[:] a, Py_ssize_t k) nogil + +cdef inline Py_ssize_t swap(numeric *a, numeric *b) nogil: + cdef numeric t + + # cython doesn't allow pointer dereference so use array syntax + t = a[0] + a[0] = b[0] + b[0] = t + return 0 diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index 7d3ce3280ec1e..897a60e0c2f21 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -96,22 +96,94 @@ class NegInfinity(object): __ge__ = lambda self, other: self is other -cdef inline Py_ssize_t swap(numeric *a, numeric *b) nogil except -1: - cdef numeric t +@cython.wraparound(False) +@cython.boundscheck(False) +def is_lexsorted(list list_of_arrays): + cdef: + int i + Py_ssize_t n, nlevels + int64_t k, cur, pre + ndarray arr + bint result = True + + nlevels = len(list_of_arrays) + n = len(list_of_arrays[0]) + + cdef int64_t **vecs = malloc(nlevels * sizeof(int64_t*)) + for i in range(nlevels): + arr = list_of_arrays[i] + vecs[i] = arr.data + + # Assume uniqueness?? + with nogil: + for i in range(n): + for k in range(nlevels): + cur = vecs[k][i] + pre = vecs[k][i -1] + if cur == pre: + continue + elif cur > pre: + break + else: + result = False + break + free(vecs) + return result + + +@cython.boundscheck(False) +@cython.wraparound(False) +def groupsort_indexer(ndarray[int64_t] index, Py_ssize_t ngroups): + """ + compute a 1-d indexer that is an ordering of the passed index, + ordered by the groups. This is a reverse of the label + factorization process. + + Parameters + ---------- + index: int64 ndarray + mappings from group -> position + ngroups: int64 + number of groups + + return a tuple of (1-d indexer ordered by groups, group counts) + """ + + cdef: + Py_ssize_t i, loc, label, n + ndarray[int64_t] counts, where, result + + counts = np.zeros(ngroups + 1, dtype=np.int64) + n = len(index) + result = np.zeros(n, dtype=np.int64) + where = np.zeros(ngroups + 1, dtype=np.int64) + + with nogil: + + # count group sizes, location 0 for NA + for i in range(n): + counts[index[i] + 1] += 1 - # cython doesn't allow pointer dereference so use array syntax - t = a[0] - a[0] = b[0] - b[0] = t - return 0 + # mark the start of each contiguous group of like-indexed data + for i in range(1, ngroups + 1): + where[i] = where[i - 1] + counts[i - 1] + + # this is our indexer + for i in range(n): + label = index[i] + 1 + result[where[label]] = i + where[label] += 1 + + return result, counts @cython.boundscheck(False) @cython.wraparound(False) -cpdef numeric kth_smallest(numeric[:] a, Py_ssize_t k): +cpdef numeric kth_smallest(numeric[:] a, Py_ssize_t k) nogil: cdef: - Py_ssize_t i, j, l, m, n = a.size + Py_ssize_t i, j, l, m, n = a.shape[0] numeric x + with nogil: l = 0 m = n - 1 @@ -132,32 +204,6 @@ cpdef numeric kth_smallest(numeric[:] a, Py_ssize_t k): if j < k: l = i if k < i: m = j - return a[k] - - -cdef inline kth_smallest_c(float64_t* a, Py_ssize_t k, Py_ssize_t n): - cdef: - Py_ssize_t i, j, l, m - double_t x, t - - l = 0 - m = n -1 - while (l j: break - - if j < k: l = i - if k < i: m = j return a[k] @@ -181,6 +227,8 @@ cpdef numeric median(numeric[:] arr): # -------------- Min, Max subsequence +@cython.boundscheck(False) +@cython.wraparound(False) def max_subseq(ndarray[double_t] arr): cdef: Py_ssize_t i=0, s=0, e=0, T, n @@ -195,21 +243,24 @@ def max_subseq(ndarray[double_t] arr): S = m T = 0 - for i in range(1, n): - # S = max { S + A[i], A[i] ) - if (S > 0): - S = S + arr[i] - else: - S = arr[i] - T = i - if S > m: - s = T - e = i - m = S + with nogil: + for i in range(1, n): + # S = max { S + A[i], A[i] ) + if (S > 0): + S = S + arr[i] + else: + S = arr[i] + T = i + if S > m: + s = T + e = i + m = S return (s, e, m) +@cython.boundscheck(False) +@cython.wraparound(False) def min_subseq(ndarray[double_t] arr): cdef: Py_ssize_t s, e @@ -225,9 +276,10 @@ def min_subseq(ndarray[double_t] arr): @cython.boundscheck(False) @cython.wraparound(False) -def nancorr(ndarray[float64_t, ndim=2] mat, cov=False, minp=None): +def nancorr(ndarray[float64_t, ndim=2] mat, bint cov=0, minp=None): cdef: Py_ssize_t i, j, xi, yi, N, K + bint minpv ndarray[float64_t, ndim=2] result ndarray[uint8_t, ndim=2] mask int64_t nobs = 0 @@ -236,46 +288,49 @@ def nancorr(ndarray[float64_t, ndim=2] mat, cov=False, minp=None): N, K = ( mat).shape if minp is None: - minp = 1 + minpv = 1 + else: + minpv = minp result = np.empty((K, K), dtype=np.float64) mask = np.isfinite(mat).view(np.uint8) - for xi in range(K): - for yi in range(xi + 1): - nobs = sumxx = sumyy = sumx = sumy = 0 - for i in range(N): - if mask[i, xi] and mask[i, yi]: - vx = mat[i, xi] - vy = mat[i, yi] - nobs += 1 - sumx += vx - sumy += vy - - if nobs < minp: - result[xi, yi] = result[yi, xi] = np.NaN - else: - meanx = sumx / nobs - meany = sumy / nobs - - # now the cov numerator - sumx = 0 - + with nogil: + for xi in range(K): + for yi in range(xi + 1): + nobs = sumxx = sumyy = sumx = sumy = 0 for i in range(N): if mask[i, xi] and mask[i, yi]: - vx = mat[i, xi] - meanx - vy = mat[i, yi] - meany + vx = mat[i, xi] + vy = mat[i, yi] + nobs += 1 + sumx += vx + sumy += vy + + if nobs < minpv: + result[xi, yi] = result[yi, xi] = NaN + else: + meanx = sumx / nobs + meany = sumy / nobs - sumx += vx * vy - sumxx += vx * vx - sumyy += vy * vy + # now the cov numerator + sumx = 0 - divisor = (nobs - 1.0) if cov else sqrt(sumxx * sumyy) + for i in range(N): + if mask[i, xi] and mask[i, yi]: + vx = mat[i, xi] - meanx + vy = mat[i, yi] - meany - if divisor != 0: - result[xi, yi] = result[yi, xi] = sumx / divisor - else: - result[xi, yi] = result[yi, xi] = np.NaN + sumx += vx * vy + sumxx += vx * vx + sumyy += vy * vy + + divisor = (nobs - 1.0) if cov else sqrt(sumxx * sumyy) + + if divisor != 0: + result[xi, yi] = result[yi, xi] = sumx / divisor + else: + result[xi, yi] = result[yi, xi] = NaN return result @@ -308,7 +363,7 @@ def nancorr_spearman(ndarray[float64_t, ndim=2] mat, Py_ssize_t minp=1): nobs += 1 if nobs < minp: - result[xi, yi] = result[yi, xi] = np.NaN + result[xi, yi] = result[yi, xi] = NaN else: maskedx = np.empty(nobs, dtype=np.float64) maskedy = np.empty(nobs, dtype=np.float64) @@ -339,326 +394,11 @@ def nancorr_spearman(ndarray[float64_t, ndim=2] mat, Py_ssize_t minp=1): if divisor != 0: result[xi, yi] = result[yi, xi] = sumx / divisor else: - result[xi, yi] = result[yi, xi] = np.NaN + result[xi, yi] = result[yi, xi] = NaN return result -#---------------------------------------------------------------------- -# group operations - - -@cython.wraparound(False) -@cython.boundscheck(False) -def is_lexsorted(list list_of_arrays): - cdef: - int i - Py_ssize_t n, nlevels - int64_t k, cur, pre - ndarray arr - - nlevels = len(list_of_arrays) - n = len(list_of_arrays[0]) - - cdef int64_t **vecs = malloc(nlevels * sizeof(int64_t*)) - for i from 0 <= i < nlevels: - arr = list_of_arrays[i] - vecs[i] = arr.data - - # Assume uniqueness?? - for i from 1 <= i < n: - for k from 0 <= k < nlevels: - cur = vecs[k][i] - pre = vecs[k][i -1] - if cur == pre: - continue - elif cur > pre: - break - else: - return False - free(vecs) - return True - - -@cython.boundscheck(False) -@cython.wraparound(False) -def groupsort_indexer(ndarray[int64_t] index, Py_ssize_t ngroups): - """ - compute a 1-d indexer that is an ordering of the passed index, - ordered by the groups. This is a reverse of the label - factorization process. - - Parameters - ---------- - index: int64 ndarray - mappings from group -> position - ngroups: int64 - number of groups - - return a tuple of (1-d indexer ordered by groups, group counts) - """ - - cdef: - Py_ssize_t i, loc, label, n - ndarray[int64_t] counts, where, result - - counts = np.zeros(ngroups + 1, dtype=np.int64) - n = len(index) - result = np.zeros(n, dtype=np.int64) - where = np.zeros(ngroups + 1, dtype=np.int64) - - with nogil: - - # count group sizes, location 0 for NA - for i from 0 <= i < n: - counts[index[i] + 1] += 1 - - # mark the start of each contiguous group of like-indexed data - for i from 1 <= i < ngroups + 1: - where[i] = where[i - 1] + counts[i - 1] - - # this is our indexer - for i from 0 <= i < n: - label = index[i] + 1 - result[where[label]] = i - where[label] += 1 - - return result, counts - -# TODO: aggregate multiple columns in single pass -#---------------------------------------------------------------------- -# first, nth, last - - -@cython.boundscheck(False) -@cython.wraparound(False) -def group_nth_object(ndarray[object, ndim=2] out, - ndarray[int64_t] counts, - ndarray[object, ndim=2] values, - ndarray[int64_t] labels, - int64_t rank): - """ - Only aggregates on axis=0 - """ - cdef: - Py_ssize_t i, j, N, K, lab - object val - float64_t count - ndarray[int64_t, ndim=2] nobs - ndarray[object, ndim=2] resx - - nobs = np.zeros(( out).shape, dtype=np.int64) - resx = np.empty(( out).shape, dtype=object) - - N, K = ( values).shape - - for i in range(N): - lab = labels[i] - if lab < 0: - continue - - counts[lab] += 1 - for j in range(K): - val = values[i, j] - - # not nan - if val == val: - nobs[lab, j] += 1 - if nobs[lab, j] == rank: - resx[lab, j] = val - - for i in range(len(counts)): - for j in range(K): - if nobs[i, j] == 0: - out[i, j] = nan - else: - out[i, j] = resx[i, j] - - -@cython.boundscheck(False) -@cython.wraparound(False) -def group_nth_bin_object(ndarray[object, ndim=2] out, - ndarray[int64_t] counts, - ndarray[object, ndim=2] values, - ndarray[int64_t] bins, int64_t rank): - """ - Only aggregates on axis=0 - """ - cdef: - Py_ssize_t i, j, N, K, ngroups, b - object val - float64_t count - ndarray[object, ndim=2] resx - ndarray[float64_t, ndim=2] nobs - - nobs = np.zeros(( out).shape, dtype=np.float64) - resx = np.empty(( out).shape, dtype=object) - - if len(bins) == 0: - return - if bins[len(bins) - 1] == len(values): - ngroups = len(bins) - else: - ngroups = len(bins) + 1 - - N, K = ( values).shape - - b = 0 - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - for j in range(K): - val = values[i, j] - - # not nan - if val == val: - nobs[b, j] += 1 - if nobs[b, j] == rank: - resx[b, j] = val - - for i in range(ngroups): - for j in range(K): - if nobs[i, j] == 0: - out[i, j] = nan - else: - out[i, j] = resx[i, j] - - -@cython.boundscheck(False) -@cython.wraparound(False) -def group_last_object(ndarray[object, ndim=2] out, - ndarray[int64_t] counts, - ndarray[object, ndim=2] values, - ndarray[int64_t] labels): - """ - Only aggregates on axis=0 - """ - cdef: - Py_ssize_t i, j, N, K, lab - object val - float64_t count - ndarray[object, ndim=2] resx - ndarray[int64_t, ndim=2] nobs - - nobs = np.zeros(( out).shape, dtype=np.int64) - resx = np.empty(( out).shape, dtype=object) - - N, K = ( values).shape - - for i in range(N): - lab = labels[i] - if lab < 0: - continue - - counts[lab] += 1 - for j in range(K): - val = values[i, j] - - # not nan - if val == val: - nobs[lab, j] += 1 - resx[lab, j] = val - - for i in range(len(counts)): - for j in range(K): - if nobs[i, j] == 0: - out[i, j] = nan - else: - out[i, j] = resx[i, j] - - -@cython.boundscheck(False) -@cython.wraparound(False) -def group_last_bin_object(ndarray[object, ndim=2] out, - ndarray[int64_t] counts, - ndarray[object, ndim=2] values, - ndarray[int64_t] bins): - """ - Only aggregates on axis=0 - """ - cdef: - Py_ssize_t i, j, N, K, ngroups, b - object val - float64_t count - ndarray[object, ndim=2] resx - ndarray[float64_t, ndim=2] nobs - - nobs = np.zeros(( out).shape, dtype=np.float64) - resx = np.empty(( out).shape, dtype=object) - - if len(bins) == 0: - return - if bins[len(bins) - 1] == len(values): - ngroups = len(bins) - else: - ngroups = len(bins) + 1 - - N, K = ( values).shape - - b = 0 - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - for j in range(K): - val = values[i, j] - - # not nan - if val == val: - nobs[b, j] += 1 - resx[b, j] = val - - for i in range(ngroups): - for j in range(K): - if nobs[i, j] == 0: - out[i, j] = nan - else: - out[i, j] = resx[i, j] - -cdef inline float64_t _median_linear(float64_t* a, int n): - cdef int i, j, na_count = 0 - cdef float64_t result - cdef float64_t* tmp - - if n == 0: - return NaN - - # count NAs - for i in range(n): - if a[i] != a[i]: - na_count += 1 - - if na_count: - if na_count == n: - return NaN - - tmp = malloc((n - na_count) * sizeof(float64_t)) - - j = 0 - for i in range(n): - if a[i] == a[i]: - tmp[j] = a[i] - j += 1 - - a = tmp - n -= na_count - - if n % 2: - result = kth_smallest_c( a, n / 2, n) - else: - result = (kth_smallest_c(a, n / 2, n) + - kth_smallest_c(a, n / 2 - 1, n)) / 2 - - if na_count: - free(a) - - return result - - # generated from template include "algos_common_helper.pxi" -include "algos_groupby_helper.pxi" include "algos_rank_helper.pxi" include "algos_take_helper.pxi" diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx new file mode 100644 index 0000000000000..c6ff602cfef1c --- /dev/null +++ b/pandas/_libs/groupby.pyx @@ -0,0 +1,291 @@ +# cython: profile=False + +from numpy cimport * +cimport numpy as np +import numpy as np + +cimport cython + +import_array() + +cimport util + +from numpy cimport (int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t, + uint32_t, uint64_t, float16_t, float32_t, float64_t) + +from libc.stdlib cimport malloc, free + +from util cimport numeric, get_nat +from algos cimport swap +from algos import take_2d_axis1_float64_float64, groupsort_indexer + +cdef int64_t iNaT = get_nat() + +cdef double NaN = np.NaN +cdef double nan = NaN + + +# TODO: aggregate multiple columns in single pass +#---------------------------------------------------------------------- +# first, nth, last + + +@cython.boundscheck(False) +@cython.wraparound(False) +def group_nth_object(ndarray[object, ndim=2] out, + ndarray[int64_t] counts, + ndarray[object, ndim=2] values, + ndarray[int64_t] labels, + int64_t rank): + """ + Only aggregates on axis=0 + """ + cdef: + Py_ssize_t i, j, N, K, lab + object val + float64_t count + ndarray[int64_t, ndim=2] nobs + ndarray[object, ndim=2] resx + + nobs = np.zeros(( out).shape, dtype=np.int64) + resx = np.empty(( out).shape, dtype=object) + + N, K = ( values).shape + + for i in range(N): + lab = labels[i] + if lab < 0: + continue + + counts[lab] += 1 + for j in range(K): + val = values[i, j] + + # not nan + if val == val: + nobs[lab, j] += 1 + if nobs[lab, j] == rank: + resx[lab, j] = val + + for i in range(len(counts)): + for j in range(K): + if nobs[i, j] == 0: + out[i, j] = nan + else: + out[i, j] = resx[i, j] + + +@cython.boundscheck(False) +@cython.wraparound(False) +def group_nth_bin_object(ndarray[object, ndim=2] out, + ndarray[int64_t] counts, + ndarray[object, ndim=2] values, + ndarray[int64_t] bins, int64_t rank): + """ + Only aggregates on axis=0 + """ + cdef: + Py_ssize_t i, j, N, K, ngroups, b + object val + float64_t count + ndarray[object, ndim=2] resx + ndarray[float64_t, ndim=2] nobs + + nobs = np.zeros(( out).shape, dtype=np.float64) + resx = np.empty(( out).shape, dtype=object) + + if len(bins) == 0: + return + if bins[len(bins) - 1] == len(values): + ngroups = len(bins) + else: + ngroups = len(bins) + 1 + + N, K = ( values).shape + + b = 0 + for i in range(N): + while b < ngroups - 1 and i >= bins[b]: + b += 1 + + counts[b] += 1 + for j in range(K): + val = values[i, j] + + # not nan + if val == val: + nobs[b, j] += 1 + if nobs[b, j] == rank: + resx[b, j] = val + + for i in range(ngroups): + for j in range(K): + if nobs[i, j] == 0: + out[i, j] = nan + else: + out[i, j] = resx[i, j] + + +@cython.boundscheck(False) +@cython.wraparound(False) +def group_last_object(ndarray[object, ndim=2] out, + ndarray[int64_t] counts, + ndarray[object, ndim=2] values, + ndarray[int64_t] labels): + """ + Only aggregates on axis=0 + """ + cdef: + Py_ssize_t i, j, N, K, lab + object val + float64_t count + ndarray[object, ndim=2] resx + ndarray[int64_t, ndim=2] nobs + + nobs = np.zeros(( out).shape, dtype=np.int64) + resx = np.empty(( out).shape, dtype=object) + + N, K = ( values).shape + + for i in range(N): + lab = labels[i] + if lab < 0: + continue + + counts[lab] += 1 + for j in range(K): + val = values[i, j] + + # not nan + if val == val: + nobs[lab, j] += 1 + resx[lab, j] = val + + for i in range(len(counts)): + for j in range(K): + if nobs[i, j] == 0: + out[i, j] = nan + else: + out[i, j] = resx[i, j] + + +@cython.boundscheck(False) +@cython.wraparound(False) +def group_last_bin_object(ndarray[object, ndim=2] out, + ndarray[int64_t] counts, + ndarray[object, ndim=2] values, + ndarray[int64_t] bins): + """ + Only aggregates on axis=0 + """ + cdef: + Py_ssize_t i, j, N, K, ngroups, b + object val + float64_t count + ndarray[object, ndim=2] resx + ndarray[float64_t, ndim=2] nobs + + nobs = np.zeros(( out).shape, dtype=np.float64) + resx = np.empty(( out).shape, dtype=object) + + if len(bins) == 0: + return + if bins[len(bins) - 1] == len(values): + ngroups = len(bins) + else: + ngroups = len(bins) + 1 + + N, K = ( values).shape + + b = 0 + for i in range(N): + while b < ngroups - 1 and i >= bins[b]: + b += 1 + + counts[b] += 1 + for j in range(K): + val = values[i, j] + + # not nan + if val == val: + nobs[b, j] += 1 + resx[b, j] = val + + for i in range(ngroups): + for j in range(K): + if nobs[i, j] == 0: + out[i, j] = nan + else: + out[i, j] = resx[i, j] + + +cdef inline float64_t _median_linear(float64_t* a, int n) nogil: + cdef int i, j, na_count = 0 + cdef float64_t result + cdef float64_t* tmp + + if n == 0: + return NaN + + # count NAs + for i in range(n): + if a[i] != a[i]: + na_count += 1 + + if na_count: + if na_count == n: + return NaN + + tmp = malloc((n - na_count) * sizeof(float64_t)) + + j = 0 + for i in range(n): + if a[i] == a[i]: + tmp[j] = a[i] + j += 1 + + a = tmp + n -= na_count + + if n % 2: + result = kth_smallest_c( a, n / 2, n) + else: + result = (kth_smallest_c(a, n / 2, n) + + kth_smallest_c(a, n / 2 - 1, n)) / 2 + + if na_count: + free(a) + + return result + + +cdef inline float64_t kth_smallest_c(float64_t* a, + Py_ssize_t k, + Py_ssize_t n) nogil: + cdef: + Py_ssize_t i, j, l, m + double_t x, t + + l = 0 + m = n -1 + while (l j: break + + if j < k: l = i + if k < i: m = j + return a[k] + + +# generated from template +include "groupby_helper.pxi" diff --git a/pandas/_libs/algos_groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in similarity index 98% rename from pandas/_libs/algos_groupby_helper.pxi.in rename to pandas/_libs/groupby_helper.pxi.in index e2c263f49b110..d38b677df321c 100644 --- a/pandas/_libs/algos_groupby_helper.pxi.in +++ b/pandas/_libs/groupby_helper.pxi.in @@ -681,6 +681,8 @@ def group_cummax_{{name}}(ndarray[{{dest_type2}}, ndim=2] out, #---------------------------------------------------------------------- +@cython.boundscheck(False) +@cython.wraparound(False) def group_median_float64(ndarray[float64_t, ndim=2] out, ndarray[int64_t] counts, ndarray[float64_t, ndim=2] values, @@ -704,13 +706,15 @@ def group_median_float64(ndarray[float64_t, ndim=2] out, take_2d_axis1_float64_float64(values.T, indexer, out=data) - for i in range(K): - # exclude NA group - ptr += _counts[0] - for j in range(ngroups): - size = _counts[j + 1] - out[j, i] = _median_linear(ptr, size) - ptr += size + with nogil: + + for i in range(K): + # exclude NA group + ptr += _counts[0] + for j in range(ngroups): + size = _counts[j + 1] + out[j, i] = _median_linear(ptr, size) + ptr += size @cython.boundscheck(False) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 0a63981290df3..727af8b8cd3eb 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -60,7 +60,7 @@ import pandas.core.common as com from pandas.core.config import option_context -from pandas._libs import lib, algos as libalgos, Timestamp, NaT, iNaT +from pandas._libs import lib, groupby as libgroupby, Timestamp, NaT, iNaT from pandas._libs.lib import count_level_2d _doc_template = """ @@ -1474,7 +1474,7 @@ def shift(self, periods=1, freq=None, axis=0): # filled in by Cython indexer = np.zeros_like(labels) - libalgos.group_shift_indexer(indexer, labels, ngroups, periods) + libgroupby.group_shift_indexer(indexer, labels, ngroups, periods) output = {} for name, obj in self._iterate_slices(): @@ -1815,13 +1815,13 @@ def _get_cython_function(self, kind, how, values, is_numeric): def get_func(fname): # see if there is a fused-type version of function # only valid for numeric - f = getattr(libalgos, fname, None) + f = getattr(libgroupby, fname, None) if f is not None and is_numeric: return f # otherwise find dtype-specific version, falling back to object for dt in [dtype_str, 'object']: - f = getattr(libalgos, "%s_%s" % (fname, dtype_str), None) + f = getattr(libgroupby, "%s_%s" % (fname, dtype_str), None) if f is not None: return f @@ -3118,7 +3118,7 @@ def value_counts(self, normalize=False, sort=True, ascending=False, out = _ensure_int64(out) return Series(out, index=mi, name=self.name) - # for compat. with libalgos.value_counts need to ensure every + # for compat. with libgroupby.value_counts need to ensure every # bin is present at every index level, null filled with zeros diff = np.zeros(len(out), dtype='bool') for lab in labels[:-1]: diff --git a/pandas/tests/groupby/test_bin_groupby.py b/pandas/tests/groupby/test_bin_groupby.py index 77c5bde332cff..02c7933e020ea 100644 --- a/pandas/tests/groupby/test_bin_groupby.py +++ b/pandas/tests/groupby/test_bin_groupby.py @@ -7,8 +7,7 @@ from pandas import Index, isnull from pandas.util.testing import assert_almost_equal import pandas.util.testing as tm -import pandas._libs.lib as lib -import pandas._libs.algos as algos +from pandas._libs import lib, groupby def test_series_grouper(): @@ -92,7 +91,7 @@ def _check(dtype): labels = _ensure_int64(np.repeat(np.arange(3), np.diff(np.r_[0, bins]))) - func = getattr(algos, 'group_ohlc_%s' % dtype) + func = getattr(groupby, 'group_ohlc_%s' % dtype) func(out, counts, obj[:, None], labels) def _ohlc(group): diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index 4acf9dd4755f4..3b85fadda6cfe 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -6,7 +6,7 @@ from pandas import Series, DataFrame, Timestamp, MultiIndex, concat, date_range from pandas.types.common import _ensure_platform_int, is_timedelta64_dtype from pandas.compat import StringIO -from pandas._libs import algos +from pandas._libs import groupby from .common import MixIn, assert_fp_equal from pandas.util.testing import assert_frame_equal, assert_series_equal @@ -418,8 +418,8 @@ def test_cython_group_transform_algos(self): dtypes = [np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint32, np.uint64, np.float32, np.float64] - ops = [(algos.group_cumprod_float64, np.cumproduct, [np.float64]), - (algos.group_cumsum, np.cumsum, dtypes)] + ops = [(groupby.group_cumprod_float64, np.cumproduct, [np.float64]), + (groupby.group_cumsum, np.cumsum, dtypes)] is_datetimelike = False for pd_op, np_op, dtypes in ops: @@ -437,13 +437,13 @@ def test_cython_group_transform_algos(self): data = np.array([[1], [2], [3], [np.nan], [4]], dtype='float64') actual = np.zeros_like(data) actual.fill(np.nan) - algos.group_cumprod_float64(actual, data, labels, is_datetimelike) + groupby.group_cumprod_float64(actual, data, labels, is_datetimelike) expected = np.array([1, 2, 6, np.nan, 24], dtype='float64') self.assert_numpy_array_equal(actual[:, 0], expected) actual = np.zeros_like(data) actual.fill(np.nan) - algos.group_cumsum(actual, data, labels, is_datetimelike) + groupby.group_cumsum(actual, data, labels, is_datetimelike) expected = np.array([1, 3, 6, np.nan, 10], dtype='float64') self.assert_numpy_array_equal(actual[:, 0], expected) @@ -451,8 +451,8 @@ def test_cython_group_transform_algos(self): is_datetimelike = True data = np.array([np.timedelta64(1, 'ns')] * 5, dtype='m8[ns]')[:, None] actual = np.zeros_like(data, dtype='int64') - algos.group_cumsum(actual, data.view('int64'), labels, - is_datetimelike) + groupby.group_cumsum(actual, data.view('int64'), labels, + is_datetimelike) expected = np.array([np.timedelta64(1, 'ns'), np.timedelta64( 2, 'ns'), np.timedelta64(3, 'ns'), np.timedelta64(4, 'ns'), np.timedelta64(5, 'ns')]) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index ce925f756edb7..f8eac7a8911ad 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -10,7 +10,8 @@ import pandas as pd from pandas import compat -from pandas._libs import algos as libalgos, hashtable +from pandas._libs import (groupby as libgroupby, algos as libalgos, + hashtable) from pandas._libs.hashtable import unique_label_indices from pandas.compat import lrange import pandas.core.algorithms as algos @@ -891,7 +892,7 @@ def test_group_var_constant(self): class TestGroupVarFloat64(tm.TestCase, GroupVarTestMixin): __test__ = True - algo = algos.algos.group_var_float64 + algo = libgroupby.group_var_float64 dtype = np.float64 rtol = 1e-5 @@ -914,7 +915,7 @@ def test_group_var_large_inputs(self): class TestGroupVarFloat32(tm.TestCase, GroupVarTestMixin): __test__ = True - algo = algos.algos.group_var_float32 + algo = libgroupby.group_var_float32 dtype = np.float32 rtol = 1e-2 diff --git a/setup.py b/setup.py index 3e0a6b41152dc..8e690f05b818c 100755 --- a/setup.py +++ b/setup.py @@ -110,8 +110,9 @@ def is_platform_mac(): _pxi_dep_template = { - 'algos': ['_libs/algos_common_helper.pxi.in', '_libs/algos_groupby_helper.pxi.in', + 'algos': ['_libs/algos_common_helper.pxi.in', '_libs/algos_take_helper.pxi.in', '_libs/algos_rank_helper.pxi.in'], + 'groupby': ['_libs/groupby_helper.pxi.in'], 'join': ['_libs/join_helper.pxi.in', '_libs/join_func_helper.pxi.in'], 'reshape': ['_libs/reshape_helper.pxi.in'], 'hashtable': ['_libs/hashtable_class_helper.pxi.in', @@ -496,8 +497,11 @@ def pxd(name): 'pxdfiles': ['_libs/src/util', '_libs/hashtable'], 'depends': _pxi_dep['index']}, '_libs.algos': {'pyxfile': '_libs/algos', - 'pxdfiles': ['_libs/src/util', '_libs/hashtable'], + 'pxdfiles': ['_libs/src/util', '_libs/algos', '_libs/hashtable'], 'depends': _pxi_dep['algos']}, + '_libs.groupby': {'pyxfile': '_libs/groupby', + 'pxdfiles': ['_libs/src/util', '_libs/algos'], + 'depends': _pxi_dep['groupby']}, '_libs.join': {'pyxfile': '_libs/join', 'pxdfiles': ['_libs/src/util', '_libs/hashtable'], 'depends': _pxi_dep['join']}, From 79581ffe6fb73089dfa8394c2f4e44677acfe1ce Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 22 Mar 2017 09:56:58 -0400 Subject: [PATCH 262/353] travis deduping on prs closes #12438 Author: Jeff Reback Closes #15780 from jreback/dedupe and squashes the following commits: 64f217e [Jeff Reback] replace . by space b6f2a62 [Jeff Reback] formatting 0c33d9b [Jeff Reback] tests commit 24f6ae6 [Jeff Reback] CI: fast finish travis builds for the same PR --- .travis.yml | 5 ++- ci/install_travis.sh | 41 +++++++++++---------- ci/travis_fast_finish.py | 77 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 102 insertions(+), 21 deletions(-) create mode 100755 ci/travis_fast_finish.py diff --git a/.travis.yml b/.travis.yml index 67b37f1d58931..270f8c2fc76c3 100644 --- a/.travis.yml +++ b/.travis.yml @@ -177,15 +177,14 @@ matrix: - USE_CACHE=true before_install: + - echo "Checking to see if this build is outdated" + - ci/travis_fast_finish.py || { echo "Failing outdated build to end it."; exit 1; } - echo "before_install" - source ci/travis_process_gbq_encryption.sh - - echo $VIRTUAL_ENV - export PATH="$HOME/miniconda3/bin:$PATH" - df -h - - date - pwd - uname -a - - python -V - git --version - git tag - ci/before_install_travis.sh diff --git a/ci/install_travis.sh b/ci/install_travis.sh index c940083f5ae9e..66633c0592748 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -1,18 +1,6 @@ #!/bin/bash -# There are 2 distinct pieces that get zipped and cached -# - The venv site-packages dir including the installed dependencies -# - The pandas build artifacts, using the build cache support via -# scripts/use_build_cache.py -# -# if the user opted in to use the cache and we're on a whitelisted fork -# - if the server doesn't hold a cached version of venv/pandas build, -# do things the slow way, and put the results on the cache server -# for the next time. -# - if the cache files are available, instal some necessaries via apt -# (no compiling needed), then directly goto script and collect 200$. -# - +# edit the locale file if needed function edit_init() { if [ -n "$LOCALE_OVERRIDE" ]; then @@ -26,15 +14,18 @@ function edit_init() fi } +echo echo "[install_travis]" edit_init home_dir=$(pwd) -echo "[home_dir: $home_dir]" +echo +echo "[home_dir]: $home_dir" # install miniconda MINICONDA_DIR="$HOME/miniconda3" +echo echo "[Using clean Miniconda install]" if [ -d "$MINICONDA_DIR" ]; then @@ -49,14 +40,17 @@ else fi time bash miniconda.sh -b -p "$MINICONDA_DIR" || exit 1 +echo echo "[show conda]" which conda +echo echo "[update conda]" conda config --set ssl_verify false || exit 1 conda config --set always_yes true --set changeps1 false || exit 1 conda update -q conda +echo echo "[add channels]" # add the pandas channel to take priority # to add extra packages @@ -73,26 +67,28 @@ fi conda info -a || exit 1 # set the compiler cache to work +echo if [ "$USE_CACHE" ] && [ "${TRAVIS_OS_NAME}" == "linux" ]; then echo "[Using ccache]" export PATH=/usr/lib/ccache:/usr/lib64/ccache:$PATH gcc=$(which gcc) - echo "[gcc: $gcc]" + echo "[gcc]: $gcc" ccache=$(which ccache) - echo "[ccache: $ccache]" + echo "[ccache]: $ccache" export CC='ccache gcc' elif [ "$USE_CACHE" ] && [ "${TRAVIS_OS_NAME}" == "osx" ]; then echo "[Using ccache]" time brew install ccache export PATH=/usr/local/opt/ccache/libexec:$PATH gcc=$(which gcc) - echo "[gcc: $gcc]" + echo "[gcc]: $gcc" ccache=$(which ccache) - echo "[ccache: $ccache]" + echo "[ccache]: $ccache" else echo "[Not using ccache]" fi +echo echo "[create env]" # may have installation instructions for this build @@ -106,6 +102,7 @@ else fi # build deps +echo echo "[build installs]" REQ="ci/requirements-${PYTHON_VERSION}${JOB_TAG}.build" if [ -e ${REQ} ]; then @@ -113,6 +110,7 @@ if [ -e ${REQ} ]; then fi # may have addtl installation instructions for this build +echo echo "[build addtl installs]" REQ="ci/requirements-${PYTHON_VERSION}${JOB_TAG}.build.sh" if [ -e ${REQ} ]; then @@ -132,6 +130,7 @@ if [ "$COVERAGE" ]; then pip install coverage pytest-cov fi +echo if [ "$BUILD_TEST" ]; then # build & install testing @@ -151,6 +150,7 @@ else fi # we may have run installations +echo echo "[conda installs]" REQ="ci/requirements-${PYTHON_VERSION}${JOB_TAG}.run" if [ -e ${REQ} ]; then @@ -158,6 +158,7 @@ if [ -e ${REQ} ]; then fi # we may have additional pip installs +echo echo "[pip installs]" REQ="ci/requirements-${PYTHON_VERSION}${JOB_TAG}.pip" if [ -e ${REQ} ]; then @@ -165,6 +166,7 @@ if [ -e ${REQ} ]; then fi # may have addtl installation instructions for this build +echo echo "[addtl installs]" REQ="ci/requirements-${PYTHON_VERSION}${JOB_TAG}.sh" if [ -e ${REQ} ]; then @@ -176,14 +178,17 @@ if [ -z "$BUILD_TEST" ]; then # remove any installed pandas package # w/o removing anything else + echo echo "[removing installed pandas]" conda remove pandas --force # install our pandas + echo echo "[running setup.py develop]" python setup.py develop || exit 1 fi +echo echo "[done]" exit 0 diff --git a/ci/travis_fast_finish.py b/ci/travis_fast_finish.py new file mode 100755 index 0000000000000..c2e2a9159918b --- /dev/null +++ b/ci/travis_fast_finish.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python + +# script to cancel previous travis builds for the same PR +# originally from +# https://github.com/conda-forge/staged-recipes/pull/2257 + +try: + from future_builtins import ( + map, + filter, + ) +except ImportError: + pass + +import codecs +import contextlib +import json +import os + +try: + from urllib.request import ( + Request, + urlopen, + ) +except ImportError: + from urllib2 import ( + Request, + urlopen, + ) + + +def check_latest_pr_build(repo, pr, build_num): + # Not a PR so it is latest. + if pr is None: + return True + + headers = { + "Accept": "application/vnd.travis-ci.2+json", + } + url = "https://api.travis-ci.org/repos/{repo}/builds?event_type=pull_request" + + request = Request(url.format(repo=repo), headers=headers) + with contextlib.closing(urlopen(request)) as response: + reader = codecs.getreader("utf-8") + data = json.load(reader(response)) + + # Parse the response to get a list of build numbers for this PR. + builds = data["builds"] + pr_builds = filter(lambda b: b["pull_request_number"] == pr, builds) + pr_build_nums = sorted(map(lambda b: int(b["number"]), pr_builds)) + + print("build_num: {}".format(build_num)) + print("pr_build_nums: {}".format(','.join([str(n) for n in pr_build_nums]))) + + # Check if our build number is the latest (largest) + # out of all of the builds for this PR. + if build_num < max(pr_build_nums): + return False + else: + return True + + +def main(): + repo = os.environ["TRAVIS_REPO_SLUG"] + + pr = os.environ["TRAVIS_PULL_REQUEST"] + pr = None if pr == "false" else int(pr) + build_num = int(os.environ["TRAVIS_BUILD_NUMBER"]) + + print("checking for fast_finish: {}-{}-{}".format(repo, pr, build_num)) + + return int(check_latest_pr_build(repo, pr, build_num) is False) + + +if __name__ == "__main__": + import sys + sys.exit(main()) From 1a266ee5809990244f1fe6daeb717878d06cf783 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 22 Mar 2017 14:42:52 -0400 Subject: [PATCH 263/353] API: return Index instead of array from DatetimeIndex field accessors (GH15022) closes #15022 Author: Joris Van den Bossche Closes #15589 from jorisvandenbossche/api-dt-fields-index and squashes the following commits: ffacd38 [Joris Van den Bossche] doc fixes 41728a9 [Joris Van den Bossche] FIX: boolean fields should still return array 6317b6b [Joris Van den Bossche] Add whatsnew 96ed069 [Joris Van den Bossche] Preserve name for PeriodIndex field accessors cdf6cae [Joris Van den Bossche] Preserve name for DatetimeIndex field accessors f2831e2 [Joris Van den Bossche] Update timedelta accessors 52f9008 [Joris Van den Bossche] Fix tests 41008c7 [Joris Van den Bossche] API: return Index instead of array from datetime field accessors (GH15022) --- doc/source/whatsnew/v0.20.0.txt | 33 ++++++++- pandas/tests/indexes/datetimes/test_misc.py | 33 ++++++++- .../tests/indexes/period/test_construction.py | 4 +- pandas/tests/indexes/period/test_period.py | 10 +-- .../indexes/timedeltas/test_timedelta.py | 24 ++++--- pandas/tests/scalar/test_timestamp.py | 13 +++- pandas/tests/tools/test_pivot.py | 2 +- pandas/tests/tools/test_util.py | 8 +-- pandas/tests/tseries/test_timezones.py | 70 +++++++++---------- pandas/tseries/common.py | 2 + pandas/tseries/converter.py | 2 +- pandas/tseries/index.py | 19 +++-- pandas/tseries/period.py | 5 +- pandas/tseries/tdi.py | 5 +- pandas/tseries/util.py | 4 +- 15 files changed, 156 insertions(+), 78 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 5ac7624856040..6d951af139b42 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -471,6 +471,38 @@ New Behavior: s.map(lambda x: x.hour) + +.. _whatsnew_0200.api_breaking.index_dt_field: + +Accessing datetime fields of Index now return Index +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The datetime-related attributes (see :ref:`here ` +for an overview) of ``DatetimeIndex``, ``PeriodIndex`` and ``TimedeltaIndex`` previously +returned numpy arrays. They will now return a new ``Index`` object, except +in the case of a boolean field, where the result will stil be a boolean ndarray. (:issue:`15022`) + +Previous behaviour: + +.. code-block:: ipython + + In [1]: idx = pd.date_range("2015-01-01", periods=5, freq='10H') + + In [2]: idx.hour + Out[2]: array([ 0, 10, 20, 6, 16], dtype=int32) + +New Behavior: + +.. ipython:: python + + idx = pd.date_range("2015-01-01", periods=5, freq='10H') + idx.hour + +This has the advantage that specific ``Index`` methods are still available on the +result. On the other hand, this might have backward incompatibilities: e.g. +compared to numpy arrays, ``Index`` objects are not mutable. To get the original +ndarray, you can always convert explicitly using ``np.asarray(idx.hour)``. + .. _whatsnew_0200.api_breaking.s3: S3 File Handling @@ -936,4 +968,3 @@ Bug Fixes - Bug in ``pd.melt()`` where passing a tuple value for ``value_vars`` caused a ``TypeError`` (:issue:`15348`) - Bug in ``.eval()`` which caused multiline evals to fail with local variables not on the first line (:issue:`15342`) - Bug in ``pd.read_msgpack`` which did not allow to load dataframe with an index of type ``CategoricalIndex`` (:issue:`15487`) - diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py index e99f1d46637c2..ef24c493f5090 100644 --- a/pandas/tests/indexes/datetimes/test_misc.py +++ b/pandas/tests/indexes/datetimes/test_misc.py @@ -172,6 +172,7 @@ def test_normalize(self): class TestDatetime64(tm.TestCase): def test_datetimeindex_accessors(self): + dti_naive = DatetimeIndex(freq='D', start=datetime(1998, 1, 1), periods=365) # GH 13303 @@ -255,6 +256,34 @@ def test_datetimeindex_accessors(self): self.assertEqual(len(dti.is_year_end), 365) self.assertEqual(len(dti.weekday_name), 365) + dti.name = 'name' + + # non boolean accessors -> return Index + for accessor in ['year', 'month', 'day', 'hour', 'minute', + 'second', 'microsecond', 'nanosecond', + 'dayofweek', 'dayofyear', 'weekofyear', + 'quarter', 'weekday_name']: + res = getattr(dti, accessor) + assert len(res) == 365 + assert isinstance(res, Index) + assert res.name == 'name' + + # boolean accessors -> return array + for accessor in ['is_month_start', 'is_month_end', + 'is_quarter_start', 'is_quarter_end', + 'is_year_start', 'is_year_end']: + res = getattr(dti, accessor) + assert len(res) == 365 + assert isinstance(res, np.ndarray) + + # test boolean indexing + res = dti[dti.is_quarter_start] + exp = dti[[0, 90, 181, 273]] + tm.assert_index_equal(res, exp) + res = dti[dti.is_leap_year] + exp = DatetimeIndex([], freq='D', tz=dti.tz, name='name') + tm.assert_index_equal(res, exp) + dti = DatetimeIndex(freq='BQ-FEB', start=datetime(1998, 1, 1), periods=4) @@ -313,5 +342,5 @@ def test_datetimeindex_accessors(self): def test_nanosecond_field(self): dti = DatetimeIndex(np.arange(10)) - self.assert_numpy_array_equal(dti.nanosecond, - np.arange(10, dtype=np.int32)) + self.assert_index_equal(dti.nanosecond, + pd.Index(np.arange(10, dtype=np.int64))) diff --git a/pandas/tests/indexes/period/test_construction.py b/pandas/tests/indexes/period/test_construction.py index f13a84f4f0e92..ab70ad59846e8 100644 --- a/pandas/tests/indexes/period/test_construction.py +++ b/pandas/tests/indexes/period/test_construction.py @@ -91,8 +91,8 @@ def test_constructor_arrays_negative_year(self): pindex = PeriodIndex(year=years, quarter=quarters) - self.assert_numpy_array_equal(pindex.year, years) - self.assert_numpy_array_equal(pindex.quarter, quarters) + self.assert_index_equal(pindex.year, pd.Index(years)) + self.assert_index_equal(pindex.quarter, pd.Index(quarters)) def test_constructor_invalid_quarters(self): self.assertRaises(ValueError, PeriodIndex, year=lrange(2000, 2004), diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 4fbadfca06ede..6a6c0ab49b15d 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -658,12 +658,12 @@ def test_negative_ordinals(self): def test_pindex_fieldaccessor_nat(self): idx = PeriodIndex(['2011-01', '2011-02', 'NaT', - '2012-03', '2012-04'], freq='D') + '2012-03', '2012-04'], freq='D', name='name') - exp = np.array([2011, 2011, -1, 2012, 2012], dtype=np.int64) - self.assert_numpy_array_equal(idx.year, exp) - exp = np.array([1, 2, -1, 3, 4], dtype=np.int64) - self.assert_numpy_array_equal(idx.month, exp) + exp = Index([2011, 2011, -1, 2012, 2012], dtype=np.int64, name='name') + self.assert_index_equal(idx.year, exp) + exp = Index([1, 2, -1, 3, 4], dtype=np.int64, name='name') + self.assert_index_equal(idx.month, exp) def test_pindex_qaccess(self): pi = PeriodIndex(['2Q05', '3Q05', '4Q05', '1Q06', '2Q06'], freq='Q') diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index 4c8571e4f08f9..3abc2d8422fd3 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -424,7 +424,7 @@ def test_total_seconds(self): freq='s') expt = [1 * 86400 + 10 * 3600 + 11 * 60 + 12 + 100123456. / 1e9, 1 * 86400 + 10 * 3600 + 11 * 60 + 13 + 100123456. / 1e9] - tm.assert_almost_equal(rng.total_seconds(), np.array(expt)) + tm.assert_almost_equal(rng.total_seconds(), Index(expt)) # test Series s = Series(rng) @@ -486,16 +486,16 @@ def test_append_numpy_bug_1681(self): def test_fields(self): rng = timedelta_range('1 days, 10:11:12.100123456', periods=2, freq='s') - self.assert_numpy_array_equal(rng.days, np.array( - [1, 1], dtype='int64')) - self.assert_numpy_array_equal( + self.assert_index_equal(rng.days, Index([1, 1], dtype='int64')) + self.assert_index_equal( rng.seconds, - np.array([10 * 3600 + 11 * 60 + 12, 10 * 3600 + 11 * 60 + 13], - dtype='int64')) - self.assert_numpy_array_equal(rng.microseconds, np.array( - [100 * 1000 + 123, 100 * 1000 + 123], dtype='int64')) - self.assert_numpy_array_equal(rng.nanoseconds, np.array( - [456, 456], dtype='int64')) + Index([10 * 3600 + 11 * 60 + 12, 10 * 3600 + 11 * 60 + 13], + dtype='int64')) + self.assert_index_equal( + rng.microseconds, + Index([100 * 1000 + 123, 100 * 1000 + 123], dtype='int64')) + self.assert_index_equal(rng.nanoseconds, + Index([456, 456], dtype='int64')) self.assertRaises(AttributeError, lambda: rng.hours) self.assertRaises(AttributeError, lambda: rng.minutes) @@ -509,6 +509,10 @@ def test_fields(self): tm.assert_series_equal(s.dt.seconds, Series( [10 * 3600 + 11 * 60 + 12, np.nan], index=[0, 1])) + # preserve name (GH15589) + rng.name = 'name' + assert rng.days.name == 'name' + def test_freq_conversion(self): # doc example diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py index 082f0fa9c40d5..bbf33c4db5ad7 100644 --- a/pandas/tests/scalar/test_timestamp.py +++ b/pandas/tests/scalar/test_timestamp.py @@ -597,9 +597,20 @@ def test_nat_fields(self): def test_nat_vector_field_access(self): idx = DatetimeIndex(['1/1/2000', None, None, '1/4/2000']) + # non boolean fields fields = ['year', 'quarter', 'month', 'day', 'hour', 'minute', 'second', 'microsecond', 'nanosecond', 'week', 'dayofyear', - 'days_in_month', 'is_leap_year'] + 'days_in_month'] + + for field in fields: + result = getattr(idx, field) + expected = [getattr(x, field) for x in idx] + self.assert_index_equal(result, pd.Index(expected)) + + # boolean fields + fields = ['is_leap_year'] + # other boolean fields like 'is_month_start' and 'is_month_end' + # not yet supported by NaT for field in fields: result = getattr(idx, field) diff --git a/pandas/tests/tools/test_pivot.py b/pandas/tests/tools/test_pivot.py index 62863372dbd02..4502f232c6d9c 100644 --- a/pandas/tests/tools/test_pivot.py +++ b/pandas/tests/tools/test_pivot.py @@ -1367,7 +1367,7 @@ def test_daily(self): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): annual = pivot_annual(ts, 'D') - doy = ts.index.dayofyear + doy = np.asarray(ts.index.dayofyear) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): doy[(~isleapyear(ts.index.year)) & (doy >= 60)] += 1 diff --git a/pandas/tests/tools/test_util.py b/pandas/tests/tools/test_util.py index 2672db13a959f..ed64e8f42d84b 100644 --- a/pandas/tests/tools/test_util.py +++ b/pandas/tests/tools/test_util.py @@ -31,10 +31,10 @@ def test_datetimeindex(self): # make sure that the ordering on datetimeindex is consistent x = date_range('2000-01-01', periods=2) result1, result2 = [Index(y).day for y in cartesian_product([x, x])] - expected1 = np.array([1, 1, 2, 2], dtype=np.int32) - expected2 = np.array([1, 2, 1, 2], dtype=np.int32) - tm.assert_numpy_array_equal(result1, expected1) - tm.assert_numpy_array_equal(result2, expected2) + expected1 = Index([1, 1, 2, 2]) + expected2 = Index([1, 2, 1, 2]) + tm.assert_index_equal(result1, expected1) + tm.assert_index_equal(result2, expected2) def test_empty(self): # product of empty factors diff --git a/pandas/tests/tseries/test_timezones.py b/pandas/tests/tseries/test_timezones.py index 1ccc1652d2719..1fc0e1b73df6b 100644 --- a/pandas/tests/tseries/test_timezones.py +++ b/pandas/tests/tseries/test_timezones.py @@ -358,8 +358,8 @@ def test_field_access_localize(self): dr = date_range('2011-10-02 00:00', freq='h', periods=10, tz=self.tzstr('America/Atikokan')) - expected = np.arange(10, dtype=np.int32) - self.assert_numpy_array_equal(dr.hour, expected) + expected = Index(np.arange(10, dtype=np.int64)) + self.assert_index_equal(dr.hour, expected) def test_with_tz(self): tz = self.tz('US/Central') @@ -947,8 +947,8 @@ def test_tz_convert_hour_overflow_dst(self): '2009-05-12 09:50:32'] tt = to_datetime(ts).tz_localize('US/Eastern') ut = tt.tz_convert('UTC') - expected = np.array([13, 14, 13], dtype=np.int32) - self.assert_numpy_array_equal(ut.hour, expected) + expected = Index([13, 14, 13]) + self.assert_index_equal(ut.hour, expected) # sorted case UTC -> US/Eastern ts = ['2008-05-12 13:50:00', @@ -956,8 +956,8 @@ def test_tz_convert_hour_overflow_dst(self): '2009-05-12 13:50:32'] tt = to_datetime(ts).tz_localize('UTC') ut = tt.tz_convert('US/Eastern') - expected = np.array([9, 9, 9], dtype=np.int32) - self.assert_numpy_array_equal(ut.hour, expected) + expected = Index([9, 9, 9]) + self.assert_index_equal(ut.hour, expected) # unsorted case US/Eastern -> UTC ts = ['2008-05-12 09:50:00', @@ -965,8 +965,8 @@ def test_tz_convert_hour_overflow_dst(self): '2008-05-12 09:50:32'] tt = to_datetime(ts).tz_localize('US/Eastern') ut = tt.tz_convert('UTC') - expected = np.array([13, 14, 13], dtype=np.int32) - self.assert_numpy_array_equal(ut.hour, expected) + expected = Index([13, 14, 13]) + self.assert_index_equal(ut.hour, expected) # unsorted case UTC -> US/Eastern ts = ['2008-05-12 13:50:00', @@ -974,8 +974,8 @@ def test_tz_convert_hour_overflow_dst(self): '2008-05-12 13:50:32'] tt = to_datetime(ts).tz_localize('UTC') ut = tt.tz_convert('US/Eastern') - expected = np.array([9, 9, 9], dtype=np.int32) - self.assert_numpy_array_equal(ut.hour, expected) + expected = Index([9, 9, 9]) + self.assert_index_equal(ut.hour, expected) def test_tz_convert_hour_overflow_dst_timestamps(self): # Regression test for: @@ -989,8 +989,8 @@ def test_tz_convert_hour_overflow_dst_timestamps(self): Timestamp('2009-05-12 09:50:32', tz=tz)] tt = to_datetime(ts) ut = tt.tz_convert('UTC') - expected = np.array([13, 14, 13], dtype=np.int32) - self.assert_numpy_array_equal(ut.hour, expected) + expected = Index([13, 14, 13]) + self.assert_index_equal(ut.hour, expected) # sorted case UTC -> US/Eastern ts = [Timestamp('2008-05-12 13:50:00', tz='UTC'), @@ -998,8 +998,8 @@ def test_tz_convert_hour_overflow_dst_timestamps(self): Timestamp('2009-05-12 13:50:32', tz='UTC')] tt = to_datetime(ts) ut = tt.tz_convert('US/Eastern') - expected = np.array([9, 9, 9], dtype=np.int32) - self.assert_numpy_array_equal(ut.hour, expected) + expected = Index([9, 9, 9]) + self.assert_index_equal(ut.hour, expected) # unsorted case US/Eastern -> UTC ts = [Timestamp('2008-05-12 09:50:00', tz=tz), @@ -1007,8 +1007,8 @@ def test_tz_convert_hour_overflow_dst_timestamps(self): Timestamp('2008-05-12 09:50:32', tz=tz)] tt = to_datetime(ts) ut = tt.tz_convert('UTC') - expected = np.array([13, 14, 13], dtype=np.int32) - self.assert_numpy_array_equal(ut.hour, expected) + expected = Index([13, 14, 13]) + self.assert_index_equal(ut.hour, expected) # unsorted case UTC -> US/Eastern ts = [Timestamp('2008-05-12 13:50:00', tz='UTC'), @@ -1016,8 +1016,8 @@ def test_tz_convert_hour_overflow_dst_timestamps(self): Timestamp('2008-05-12 13:50:32', tz='UTC')] tt = to_datetime(ts) ut = tt.tz_convert('US/Eastern') - expected = np.array([9, 9, 9], dtype=np.int32) - self.assert_numpy_array_equal(ut.hour, expected) + expected = Index([9, 9, 9]) + self.assert_index_equal(ut.hour, expected) def test_tslib_tz_convert_trans_pos_plus_1__bug(self): # Regression test for tslib.tz_convert(vals, tz1, tz2). @@ -1028,9 +1028,8 @@ def test_tslib_tz_convert_trans_pos_plus_1__bug(self): idx = idx.tz_localize('UTC') idx = idx.tz_convert('Europe/Moscow') - expected = np.repeat(np.array([3, 4, 5], dtype=np.int32), - np.array([n, n, 1])) - self.assert_numpy_array_equal(idx.hour, expected) + expected = np.repeat(np.array([3, 4, 5]), np.array([n, n, 1])) + self.assert_index_equal(idx.hour, Index(expected)) def test_tslib_tz_convert_dst(self): for freq, n in [('H', 1), ('T', 60), ('S', 3600)]: @@ -1039,62 +1038,57 @@ def test_tslib_tz_convert_dst(self): tz='UTC') idx = idx.tz_convert('US/Eastern') expected = np.repeat(np.array([18, 19, 20, 21, 22, 23, - 0, 1, 3, 4, 5], dtype=np.int32), + 0, 1, 3, 4, 5]), np.array([n, n, n, n, n, n, n, n, n, n, 1])) - self.assert_numpy_array_equal(idx.hour, expected) + self.assert_index_equal(idx.hour, Index(expected)) idx = date_range('2014-03-08 18:00', '2014-03-09 05:00', freq=freq, tz='US/Eastern') idx = idx.tz_convert('UTC') - expected = np.repeat(np.array([23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9], - dtype=np.int32), + expected = np.repeat(np.array([23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), np.array([n, n, n, n, n, n, n, n, n, n, 1])) - self.assert_numpy_array_equal(idx.hour, expected) + self.assert_index_equal(idx.hour, Index(expected)) # End DST idx = date_range('2014-11-01 23:00', '2014-11-02 09:00', freq=freq, tz='UTC') idx = idx.tz_convert('US/Eastern') expected = np.repeat(np.array([19, 20, 21, 22, 23, - 0, 1, 1, 2, 3, 4], dtype=np.int32), + 0, 1, 1, 2, 3, 4]), np.array([n, n, n, n, n, n, n, n, n, n, 1])) - self.assert_numpy_array_equal(idx.hour, expected) + self.assert_index_equal(idx.hour, Index(expected)) idx = date_range('2014-11-01 18:00', '2014-11-02 05:00', freq=freq, tz='US/Eastern') idx = idx.tz_convert('UTC') expected = np.repeat(np.array([22, 23, 0, 1, 2, 3, 4, 5, 6, - 7, 8, 9, 10], dtype=np.int32), + 7, 8, 9, 10]), np.array([n, n, n, n, n, n, n, n, n, n, n, n, 1])) - self.assert_numpy_array_equal(idx.hour, expected) + self.assert_index_equal(idx.hour, Index(expected)) # daily # Start DST idx = date_range('2014-03-08 00:00', '2014-03-09 00:00', freq='D', tz='UTC') idx = idx.tz_convert('US/Eastern') - self.assert_numpy_array_equal(idx.hour, - np.array([19, 19], dtype=np.int32)) + self.assert_index_equal(idx.hour, Index([19, 19])) idx = date_range('2014-03-08 00:00', '2014-03-09 00:00', freq='D', tz='US/Eastern') idx = idx.tz_convert('UTC') - self.assert_numpy_array_equal(idx.hour, - np.array([5, 5], dtype=np.int32)) + self.assert_index_equal(idx.hour, Index([5, 5])) # End DST idx = date_range('2014-11-01 00:00', '2014-11-02 00:00', freq='D', tz='UTC') idx = idx.tz_convert('US/Eastern') - self.assert_numpy_array_equal(idx.hour, - np.array([20, 20], dtype=np.int32)) + self.assert_index_equal(idx.hour, Index([20, 20])) idx = date_range('2014-11-01 00:00', '2014-11-02 000:00', freq='D', tz='US/Eastern') idx = idx.tz_convert('UTC') - self.assert_numpy_array_equal(idx.hour, - np.array([4, 4], dtype=np.int32)) + self.assert_index_equal(idx.hour, Index([4, 4])) def test_tzlocal(self): # GH 13583 diff --git a/pandas/tseries/common.py b/pandas/tseries/common.py index 82fcdbcd0d367..f9fd27176487c 100644 --- a/pandas/tseries/common.py +++ b/pandas/tseries/common.py @@ -105,6 +105,8 @@ def _delegate_property_get(self, name): elif not is_list_like(result): return result + result = np.asarray(result) + # blow up if we operate on categories if self.orig is not None: result = take_1d(result, self.orig.cat.codes) diff --git a/pandas/tseries/converter.py b/pandas/tseries/converter.py index 8aea14a2688d1..bc768a8bc5b58 100644 --- a/pandas/tseries/converter.py +++ b/pandas/tseries/converter.py @@ -455,7 +455,7 @@ def period_break(dates, period): """ current = getattr(dates, period) previous = getattr(dates - 1, period) - return (current - previous).nonzero()[0] + return np.nonzero(current - previous)[0] def has_level_label(label_flags, vmin): diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 983c1a4cd9de9..11d2d29597fc0 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -64,6 +64,7 @@ def f(self): if self.tz is not utc: values = self._local_timestamps() + # boolean accessors -> return array if field in ['is_month_start', 'is_month_end', 'is_quarter_start', 'is_quarter_end', 'is_year_start', 'is_year_end']: @@ -73,16 +74,20 @@ def f(self): result = libts.get_start_end_field(values, field, self.freqstr, month_kw) - elif field in ['weekday_name']: - result = libts.get_date_name_field(values, field) - return self._maybe_mask_results(result) + return self._maybe_mask_results(result, convert='float64') elif field in ['is_leap_year']: # no need to mask NaT return libts.get_date_field(values, field) + + # non-boolean accessors -> return Index + elif field in ['weekday_name']: + result = libts.get_date_name_field(values, field) + result = self._maybe_mask_results(result) else: result = libts.get_date_field(values, field) + result = self._maybe_mask_results(result, convert='float64') - return self._maybe_mask_results(result, convert='float64') + return Index(result, name=self.name) f.__name__ = name f.__doc__ = docstring @@ -1909,9 +1914,9 @@ def to_julian_date(self): """ # http://mysite.verizon.net/aesir_research/date/jdalg2.htm - year = self.year - month = self.month - day = self.day + year = np.asarray(self.year) + month = np.asarray(self.month) + day = np.asarray(self.day) testarr = month < 3 year[testarr] -= 1 month[testarr] += 12 diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index f7e9ba9eaa9b1..c279d5a9342e8 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -52,7 +52,8 @@ def _field_accessor(name, alias, docstring=None): def f(self): base, mult = _gfc(self.freq) - return get_period_field_arr(alias, self._values, base) + result = get_period_field_arr(alias, self._values, base) + return Index(result, name=self.name) f.__name__ = name f.__doc__ = docstring return property(f) @@ -585,7 +586,7 @@ def to_datetime(self, dayfirst=False): @property def is_leap_year(self): """ Logical indicating if the date belongs to a leap year """ - return tslib._isleapyear_arr(self.year) + return tslib._isleapyear_arr(np.asarray(self.year)) @property def start_time(self): diff --git a/pandas/tseries/tdi.py b/pandas/tseries/tdi.py index 13d844bb6a399..55333890640c1 100644 --- a/pandas/tseries/tdi.py +++ b/pandas/tseries/tdi.py @@ -374,7 +374,7 @@ def _get_field(self, m): else: result = np.array([getattr(Timedelta(val), m) for val in values], dtype='int64') - return result + return Index(result, name=self.name) @property def days(self): @@ -437,7 +437,8 @@ def total_seconds(self): .. versionadded:: 0.17.0 """ - return self._maybe_mask_results(1e-9 * self.asi8) + return Index(self._maybe_mask_results(1e-9 * self.asi8), + name=self.name) def to_pytimedelta(self): """ diff --git a/pandas/tseries/util.py b/pandas/tseries/util.py index dc460dee8415b..da3bb075dd02c 100644 --- a/pandas/tseries/util.py +++ b/pandas/tseries/util.py @@ -54,7 +54,7 @@ def pivot_annual(series, freq=None): if freq == 'D': width = 366 - offset = index.dayofyear - 1 + offset = np.asarray(index.dayofyear) - 1 # adjust for leap year offset[(~isleapyear(year)) & (offset >= 59)] += 1 @@ -63,7 +63,7 @@ def pivot_annual(series, freq=None): # todo: strings like 1/1, 1/25, etc.? elif freq in ('M', 'BM'): width = 12 - offset = index.month - 1 + offset = np.asarray(index.month) - 1 columns = lrange(1, 13) elif freq == 'H': width = 8784 From 94720d951b4e804bab72abc33dffeb2186ecb310 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 22 Mar 2017 14:48:38 -0400 Subject: [PATCH 264/353] API: change default behaviour of str.match from deprecated extract to match (GH5224) This PR changes the default behaviour of `str.match` from extracting groups to just a match (True/False). The previous default behaviour was deprecated since 0.13.0 (https://github.com/pandas-dev/pandas/pull/5224) Author: Joris Van den Bossche Closes #15257 from jorisvandenbossche/str-match and squashes the following commits: 0ab36b6 [Joris Van den Bossche] Raise FutureWarning instead of UserWarning for as_indexer a2bae51 [Joris Van den Bossche] raise error in case of regex with groups and as_indexer=False 87446c3 [Joris Van den Bossche] fix test 0788de2 [Joris Van den Bossche] API: change default behaviour of str.match from deprecated extract to match (GH5224) --- doc/source/text.rst | 12 ------- doc/source/whatsnew/v0.20.0.txt | 7 ++++ pandas/core/strings.py | 59 +++++++++--------------------- pandas/tests/test_strings.py | 63 ++++++++++++--------------------- 4 files changed, 46 insertions(+), 95 deletions(-) diff --git a/doc/source/text.rst b/doc/source/text.rst index 2b2520cb6100f..b110ef2167a03 100644 --- a/doc/source/text.rst +++ b/doc/source/text.rst @@ -385,18 +385,6 @@ or match a pattern: The distinction between ``match`` and ``contains`` is strictness: ``match`` relies on strict ``re.match``, while ``contains`` relies on ``re.search``. -.. warning:: - - In previous versions, ``match`` was for *extracting* groups, - returning a not-so-convenient Series of tuples. The new method ``extract`` - (described in the previous section) is now preferred. - - This old, deprecated behavior of ``match`` is still the default. As - demonstrated above, use the new behavior by setting ``as_indexer=True``. - In this mode, ``match`` is analogous to ``contains``, returning a boolean - Series. The new behavior will become the default behavior in a future - release. - Methods like ``match``, ``contains``, ``startswith``, and ``endswith`` take an extra ``na`` argument so missing values can be considered True or False: diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 6d951af139b42..37a70435ed6ff 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -761,6 +761,12 @@ Other API Changes - ``Series.sort_values()`` accepts a one element list of bool for consistency with the behavior of ``DataFrame.sort_values()`` (:issue:`15604`) - ``.merge()`` and ``.join()`` on ``category`` dtype columns will now preserve the category dtype when possible (:issue:`10409`) - ``SparseDataFrame.default_fill_value`` will be 0, previously was ``nan`` in the return from ``pd.get_dummies(..., sparse=True)`` (:issue:`15594`) +- The default behaviour of ``Series.str.match`` has changed from extracting + groups to matching the pattern. The extracting behaviour was deprecated + since pandas version 0.13.0 and can be done with the ``Series.str.extract`` + method (:issue:`5224`). As a consequence, the ``as_indexer`` keyword is + ignored (no longer needed to specify the new behaviour) and is deprecated. + .. _whatsnew_0200.deprecations: @@ -777,6 +783,7 @@ Deprecations - ``Series.sortlevel`` and ``DataFrame.sortlevel`` have been deprecated in favor of ``Series.sort_index`` and ``DataFrame.sort_index`` (:issue:`15099`) - importing ``concat`` from ``pandas.tools.merge`` has been deprecated in favor of imports from the ``pandas`` namespace. This should only affect explict imports (:issue:`15358`) - ``Series/DataFrame/Panel.consolidate()`` been deprecated as a public method. (:issue:`15483`) +- The ``as_indexer`` keyword of ``Series.str.match()`` has been deprecated (ignored keyword) (:issue:`15257`). - The following top-level pandas functions have been deprecated and will be removed in a future version (:issue:`13790`) * ``pd.pnow()``, replaced by ``Period.now()`` * ``pd.Term``, is removed, as it is not applicable to user code. Instead use in-line string expressions in the where clause when searching in HDFStore diff --git a/pandas/core/strings.py b/pandas/core/strings.py index b5b5d58235eaa..504d3dd47cc21 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -464,11 +464,9 @@ def rep(x, r): return result -def str_match(arr, pat, case=True, flags=0, na=np.nan, as_indexer=False): +def str_match(arr, pat, case=True, flags=0, na=np.nan, as_indexer=None): """ - Deprecated: Find groups in each string in the Series/Index - using passed regular expression. - If as_indexer=True, determine if each string matches a regular expression. + Determine if each string matches a regular expression. Parameters ---------- @@ -479,60 +477,37 @@ def str_match(arr, pat, case=True, flags=0, na=np.nan, as_indexer=False): flags : int, default 0 (no flags) re module flags, e.g. re.IGNORECASE na : default NaN, fill value for missing values. - as_indexer : False, by default, gives deprecated behavior better achieved - using str_extract. True return boolean indexer. + as_indexer : DEPRECATED Returns ------- Series/array of boolean values - if as_indexer=True - Series/Index of tuples - if as_indexer=False, default but deprecated See Also -------- contains : analogous, but less strict, relying on re.search instead of re.match - extract : now preferred to the deprecated usage of match (as_indexer=False) + extract : extract matched groups - Notes - ----- - To extract matched groups, which is the deprecated behavior of match, use - str.extract. """ - if not case: flags |= re.IGNORECASE regex = re.compile(pat, flags=flags) - if (not as_indexer) and regex.groups > 0: - # Do this first, to make sure it happens even if the re.compile - # raises below. - warnings.warn("In future versions of pandas, match will change to" - " always return a bool indexer.", FutureWarning, - stacklevel=3) - - if as_indexer and regex.groups > 0: - warnings.warn("This pattern has match groups. To actually get the" - " groups, use str.extract.", UserWarning, stacklevel=3) + if (as_indexer is False) and (regex.groups > 0): + raise ValueError("as_indexer=False with a pattern with groups is no " + "longer supported. Use '.str.extract(pat)' instead") + elif as_indexer is not None: + # Previously, this keyword was used for changing the default but + # deprecated behaviour. This keyword is now no longer needed. + warnings.warn("'as_indexer' keyword was specified but is ignored " + "(match now returns a boolean indexer by default), " + "and will be removed in a future version.", + FutureWarning, stacklevel=3) - # If not as_indexer and regex.groups == 0, this returns empty lists - # and is basically useless, so we will not warn. - - if (not as_indexer) and regex.groups > 0: - dtype = object - - def f(x): - m = regex.match(x) - if m: - return m.groups() - else: - return [] - else: - # This is the new behavior of str_match. - dtype = bool - f = lambda x: bool(regex.match(x)) + dtype = bool + f = lambda x: bool(regex.match(x)) return _na_map(f, arr, na, dtype=dtype) @@ -1587,7 +1562,7 @@ def contains(self, pat, case=True, flags=0, na=np.nan, regex=True): return self._wrap_result(result) @copy(str_match) - def match(self, pat, case=True, flags=0, na=np.nan, as_indexer=False): + def match(self, pat, case=True, flags=0, na=np.nan, as_indexer=None): result = str_match(self._data, pat, case=case, flags=flags, na=na, as_indexer=as_indexer) return self._wrap_result(result) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index f8ce0070b2c78..7a68ec8f368ae 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -559,64 +559,44 @@ def test_repeat(self): exp = Series([u('a'), u('bb'), NA, u('cccc'), NA, u('dddddd')]) tm.assert_series_equal(result, exp) - def test_deprecated_match(self): - # Old match behavior, deprecated (but still default) in 0.13 + def test_match(self): + # New match behavior introduced in 0.13 values = Series(['fooBAD__barBAD', NA, 'foo']) - - with tm.assert_produces_warning(): - result = values.str.match('.*(BAD[_]+).*(BAD)') - exp = Series([('BAD__', 'BAD'), NA, []]) - tm.assert_series_equal(result, exp) - - # mixed - mixed = Series(['aBAD_BAD', NA, 'BAD_b_BAD', True, datetime.today(), - 'foo', None, 1, 2.]) - - with tm.assert_produces_warning(): - rs = Series(mixed).str.match('.*(BAD[_]+).*(BAD)') - xp = Series([('BAD_', 'BAD'), NA, ('BAD_', 'BAD'), - NA, NA, [], NA, NA, NA]) - tm.assertIsInstance(rs, Series) - tm.assert_series_equal(rs, xp) - - # unicode - values = Series([u('fooBAD__barBAD'), NA, u('foo')]) - - with tm.assert_produces_warning(): - result = values.str.match('.*(BAD[_]+).*(BAD)') - exp = Series([(u('BAD__'), u('BAD')), NA, []]) + result = values.str.match('.*(BAD[_]+).*(BAD)') + exp = Series([True, NA, False]) tm.assert_series_equal(result, exp) - def test_match(self): - # New match behavior introduced in 0.13 values = Series(['fooBAD__barBAD', NA, 'foo']) - with tm.assert_produces_warning(): - result = values.str.match('.*(BAD[_]+).*(BAD)', as_indexer=True) + result = values.str.match('.*BAD[_]+.*BAD') exp = Series([True, NA, False]) tm.assert_series_equal(result, exp) - # If no groups, use new behavior even when as_indexer is False. - # (Old behavior is pretty much useless in this case.) + # test passing as_indexer still works but is ignored values = Series(['fooBAD__barBAD', NA, 'foo']) - result = values.str.match('.*BAD[_]+.*BAD', as_indexer=False) exp = Series([True, NA, False]) + with tm.assert_produces_warning(FutureWarning): + result = values.str.match('.*BAD[_]+.*BAD', as_indexer=True) + tm.assert_series_equal(result, exp) + with tm.assert_produces_warning(FutureWarning): + result = values.str.match('.*BAD[_]+.*BAD', as_indexer=False) tm.assert_series_equal(result, exp) + with tm.assert_produces_warning(FutureWarning): + result = values.str.match('.*(BAD[_]+).*(BAD)', as_indexer=True) + tm.assert_series_equal(result, exp) + self.assertRaises(ValueError, values.str.match, '.*(BAD[_]+).*(BAD)', + as_indexer=False) # mixed mixed = Series(['aBAD_BAD', NA, 'BAD_b_BAD', True, datetime.today(), 'foo', None, 1, 2.]) - - with tm.assert_produces_warning(): - rs = Series(mixed).str.match('.*(BAD[_]+).*(BAD)', as_indexer=True) + rs = Series(mixed).str.match('.*(BAD[_]+).*(BAD)') xp = Series([True, NA, True, NA, NA, False, NA, NA, NA]) tm.assertIsInstance(rs, Series) tm.assert_series_equal(rs, xp) # unicode values = Series([u('fooBAD__barBAD'), NA, u('foo')]) - - with tm.assert_produces_warning(): - result = values.str.match('.*(BAD[_]+).*(BAD)', as_indexer=True) + result = values.str.match('.*(BAD[_]+).*(BAD)') exp = Series([True, NA, False]) tm.assert_series_equal(result, exp) @@ -2610,10 +2590,11 @@ def test_match_findall_flags(self): pat = r'([A-Z0-9._%+-]+)@([A-Z0-9.-]+)\.([A-Z]{2,4})' - with tm.assert_produces_warning(FutureWarning): - result = data.str.match(pat, flags=re.IGNORECASE) + result = data.str.extract(pat, flags=re.IGNORECASE, expand=True) + self.assertEqual(result.iloc[0].tolist(), ['dave', 'google', 'com']) - self.assertEqual(result[0], ('dave', 'google', 'com')) + result = data.str.match(pat, flags=re.IGNORECASE) + self.assertEqual(result[0], True) result = data.str.findall(pat, flags=re.IGNORECASE) self.assertEqual(result[0][0], ('dave', 'google', 'com')) From 7fa77527c747f2d91b6c16fe512cd05a7a072ec9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miroslav=20=C5=A0ediv=C3=BD?= Date: Wed, 22 Mar 2017 22:36:53 +0100 Subject: [PATCH 265/353] Update testing.py (#15784) Docs typo fix --- pandas/util/testing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index cf76f4ead77e3..9a9f3c6c6b945 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -1151,7 +1151,7 @@ def assert_series_equal(left, right, check_dtype=True, Whether to compare number exactly. check_names : bool, default True Whether to check the Series and Index names attribute. - check_dateteimelike_compat : bool, default False + check_datetimelike_compat : bool, default False Compare datetime-like which is comparable ignoring dtype. check_categorical : bool, default True Whether to compare internal Categorical exactly. @@ -1264,7 +1264,7 @@ def assert_frame_equal(left, right, check_dtype=True, If True, compare by blocks. check_exact : bool, default False Whether to compare number exactly. - check_dateteimelike_compat : bool, default False + check_datetimelike_compat : bool, default False Compare datetime-like which is comparable ignoring dtype. check_categorical : bool, default True Whether to compare internal Categorical exactly. From 1bcb671877287be731ce677aaf96686278b69f9a Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 23 Mar 2017 15:06:49 -0400 Subject: [PATCH 266/353] CI: remove travis dedupe as enabled auto-cancellation xref https://github.com/pandas- dev/pandas/commit/79581ffe6fb73089dfa8394c2f4e44677acfe1ce of course Travis just announced auto-cancellation / it looks good when I enabled it. so removing this :< Author: Jeff Reback Closes #15783 from jreback/cancel and squashes the following commits: 8286d70 [Jeff Reback] CI: remove travis dedupe as enabled auto-cancellation --- .travis.yml | 2 -- ci/travis_fast_finish.py | 77 ---------------------------------------- 2 files changed, 79 deletions(-) delete mode 100755 ci/travis_fast_finish.py diff --git a/.travis.yml b/.travis.yml index 270f8c2fc76c3..eb2a58b0616ef 100644 --- a/.travis.yml +++ b/.travis.yml @@ -177,8 +177,6 @@ matrix: - USE_CACHE=true before_install: - - echo "Checking to see if this build is outdated" - - ci/travis_fast_finish.py || { echo "Failing outdated build to end it."; exit 1; } - echo "before_install" - source ci/travis_process_gbq_encryption.sh - export PATH="$HOME/miniconda3/bin:$PATH" diff --git a/ci/travis_fast_finish.py b/ci/travis_fast_finish.py deleted file mode 100755 index c2e2a9159918b..0000000000000 --- a/ci/travis_fast_finish.py +++ /dev/null @@ -1,77 +0,0 @@ -#!/usr/bin/env python - -# script to cancel previous travis builds for the same PR -# originally from -# https://github.com/conda-forge/staged-recipes/pull/2257 - -try: - from future_builtins import ( - map, - filter, - ) -except ImportError: - pass - -import codecs -import contextlib -import json -import os - -try: - from urllib.request import ( - Request, - urlopen, - ) -except ImportError: - from urllib2 import ( - Request, - urlopen, - ) - - -def check_latest_pr_build(repo, pr, build_num): - # Not a PR so it is latest. - if pr is None: - return True - - headers = { - "Accept": "application/vnd.travis-ci.2+json", - } - url = "https://api.travis-ci.org/repos/{repo}/builds?event_type=pull_request" - - request = Request(url.format(repo=repo), headers=headers) - with contextlib.closing(urlopen(request)) as response: - reader = codecs.getreader("utf-8") - data = json.load(reader(response)) - - # Parse the response to get a list of build numbers for this PR. - builds = data["builds"] - pr_builds = filter(lambda b: b["pull_request_number"] == pr, builds) - pr_build_nums = sorted(map(lambda b: int(b["number"]), pr_builds)) - - print("build_num: {}".format(build_num)) - print("pr_build_nums: {}".format(','.join([str(n) for n in pr_build_nums]))) - - # Check if our build number is the latest (largest) - # out of all of the builds for this PR. - if build_num < max(pr_build_nums): - return False - else: - return True - - -def main(): - repo = os.environ["TRAVIS_REPO_SLUG"] - - pr = os.environ["TRAVIS_PULL_REQUEST"] - pr = None if pr == "false" else int(pr) - build_num = int(os.environ["TRAVIS_BUILD_NUMBER"]) - - print("checking for fast_finish: {}-{}-{}".format(repo, pr, build_num)) - - return int(check_latest_pr_build(repo, pr, build_num) is False) - - -if __name__ == "__main__": - import sys - sys.exit(main()) From 56ccad8229824584678e22815f4f180a91309c9d Mon Sep 17 00:00:00 2001 From: Kernc Date: Thu, 23 Mar 2017 18:42:20 +0100 Subject: [PATCH 267/353] DOC: .groupby() aligns Series, accepts ndarray closes #15789 closes #15244 --- pandas/core/generic.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 87052800b8fb5..134840728d931 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4129,11 +4129,14 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, Parameters ---------- - by : mapping function / list of functions, dict, Series, or tuple / - list of column names or index level names. + by : mapping function / list of functions, dict, Series, ndarray, + or tuple / list of column names or index level names or + Series or ndarrays Called on each element of the object index to determine the groups. If a dict or Series is passed, the Series or dict VALUES will be - used to determine the groups + used to determine the groups (the Series' values are first + aligned; see ``.align()`` method). If ndarray is passed, the + values as-is determine the groups. axis : int, default 0 level : int, level name, or sequence of such, default None If the axis is a MultiIndex (hierarchical), group by a particular From 39a46fff8d73751dab5f5abfd50cbe221a2f91d4 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 23 Mar 2017 15:11:07 -0400 Subject: [PATCH 268/353] COMPAT: 3.6.1 compat for change in PySlice_GetIndices_Ex This doesn't actually matter to any tests except for some internal consistency ones. Bonus is that it eliminates a warning :< note that we aren't actually testing this (yet) on Travis as our 3.6 build uses conda-forge and 3.6.1 is not there as of yet. Its in defaults though (and shows up on appveyor build). Author: Jeff Reback Closes #15790 from jreback/py361 and squashes the following commits: 42ddddc [Jeff Reback] change to version < 3 d36902c [Jeff Reback] COMPAT: 3.6.1 compat for change in PySlice_GetIndices_Ex --- pandas/_libs/lib.pyx | 19 ++++++++-------- pandas/_libs/src/compat_helper.h | 37 ++++++++++++++++++++++++++++++++ pandas/tests/test_internals.py | 36 ++++++++++++++++++++----------- setup.py | 3 ++- 4 files changed, 73 insertions(+), 22 deletions(-) create mode 100644 pandas/_libs/src/compat_helper.h diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index f78040e5a52f2..f902422b0916d 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -3,6 +3,7 @@ cimport numpy as np cimport cython import numpy as np import sys + cdef bint PY3 = (sys.version_info[0] >= 3) from numpy cimport * @@ -26,7 +27,8 @@ from cpython cimport (PyDict_New, PyDict_GetItem, PyDict_SetItem, PyObject_SetAttrString, PyObject_RichCompareBool, PyBytes_GET_SIZE, - PyUnicode_GET_SIZE) + PyUnicode_GET_SIZE, + PyObject) try: from cpython cimport PyString_GET_SIZE @@ -36,11 +38,10 @@ except ImportError: cdef extern from "Python.h": Py_ssize_t PY_SSIZE_T_MAX - ctypedef struct PySliceObject: - pass +cdef extern from "compat_helper.h": - cdef int PySlice_GetIndicesEx( - PySliceObject* s, Py_ssize_t length, + cdef int slice_get_indices( + PyObject* s, Py_ssize_t length, Py_ssize_t *start, Py_ssize_t *stop, Py_ssize_t *step, Py_ssize_t *slicelength) except -1 @@ -1658,8 +1659,8 @@ cpdef slice_get_indices_ex(slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX): if slc is None: raise TypeError("slc should be a slice") - PySlice_GetIndicesEx(slc, objlen, - &start, &stop, &step, &length) + slice_get_indices(slc, objlen, + &start, &stop, &step, &length) return start, stop, step, length @@ -1683,8 +1684,8 @@ cpdef Py_ssize_t slice_len( if slc is None: raise TypeError("slc must be slice") - PySlice_GetIndicesEx(slc, objlen, - &start, &stop, &step, &length) + slice_get_indices(slc, objlen, + &start, &stop, &step, &length) return length diff --git a/pandas/_libs/src/compat_helper.h b/pandas/_libs/src/compat_helper.h new file mode 100644 index 0000000000000..e3c40d2ca65f4 --- /dev/null +++ b/pandas/_libs/src/compat_helper.h @@ -0,0 +1,37 @@ +/* +Copyright (c) 2016, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +The full license is in the LICENSE file, distributed with this software. +*/ + +#ifndef PANDAS__LIBS_SRC_COMPAT_HELPER_H_ +#define PANDAS__LIBS_SRC_COMPAT_HELPER_H_ + +#include "Python.h" +#include "numpy_helper.h" + +/* +PySlice_GetIndicesEx changes signature in PY3 +but 3.6.1 in particular changes the behavior of this function slightly +https://bugs.python.org/issue27867 +*/ + +PANDAS_INLINE int slice_get_indices(PyObject *s, + Py_ssize_t length, + Py_ssize_t *start, + Py_ssize_t *stop, + Py_ssize_t *step, + Py_ssize_t *slicelength) { +#if PY_VERSION_HEX >= 0x03000000 + return PySlice_GetIndicesEx(s, length, start, stop, + step, slicelength); +#else + return PySlice_GetIndicesEx((PySliceObject *)s, length, start, + stop, step, slicelength); +#endif +} + +#endif // PANDAS__LIBS_SRC_COMPAT_HELPER_H_ diff --git a/pandas/tests/test_internals.py b/pandas/tests/test_internals.py index 29920b165d3f6..af7c584249416 100644 --- a/pandas/tests/test_internals.py +++ b/pandas/tests/test_internals.py @@ -2,11 +2,12 @@ # pylint: disable=W0102 from datetime import datetime, date - +import sys import pytest import numpy as np import re +from distutils.version import LooseVersion import itertools from pandas import (Index, MultiIndex, DataFrame, DatetimeIndex, Series, Categorical) @@ -22,6 +23,9 @@ randn, assert_series_equal) from pandas.compat import zip, u +# in 3.6.1 a c-api slicing function changed, see src/compat_helper.h +PY361 = sys.version >= LooseVersion('3.6.1') + @pytest.fixture def mgr(): @@ -1128,8 +1132,10 @@ def assert_as_slice_equals(arr, slc): assert_as_slice_equals([0, 100], slice(0, 200, 100)) assert_as_slice_equals([2, 1], slice(2, 0, -1)) - assert_as_slice_equals([2, 1, 0], slice(2, None, -1)) - assert_as_slice_equals([100, 0], slice(100, None, -100)) + + if not PY361: + assert_as_slice_equals([2, 1, 0], slice(2, None, -1)) + assert_as_slice_equals([100, 0], slice(100, None, -100)) def test_not_slice_like_arrays(self): def assert_not_slice_like(arr): @@ -1150,8 +1156,9 @@ def test_slice_iter(self): assert list(BlockPlacement(slice(0, 0))) == [] assert list(BlockPlacement(slice(3, 0))) == [] - assert list(BlockPlacement(slice(3, 0, -1))) == [3, 2, 1] - assert list(BlockPlacement(slice(3, None, -1))) == [3, 2, 1, 0] + if not PY361: + assert list(BlockPlacement(slice(3, 0, -1))) == [3, 2, 1] + assert list(BlockPlacement(slice(3, None, -1))) == [3, 2, 1, 0] def test_slice_to_array_conversion(self): def assert_as_array_equals(slc, asarray): @@ -1164,8 +1171,10 @@ def assert_as_array_equals(slc, asarray): assert_as_array_equals(slice(3, 0), []) assert_as_array_equals(slice(3, 0, -1), [3, 2, 1]) - assert_as_array_equals(slice(3, None, -1), [3, 2, 1, 0]) - assert_as_array_equals(slice(31, None, -10), [31, 21, 11, 1]) + + if not PY361: + assert_as_array_equals(slice(3, None, -1), [3, 2, 1, 0]) + assert_as_array_equals(slice(31, None, -10), [31, 21, 11, 1]) def test_blockplacement_add(self): bpl = BlockPlacement(slice(0, 5)) @@ -1180,23 +1189,26 @@ def assert_add_equals(val, inc, result): assert_add_equals(slice(0, 0), 0, []) assert_add_equals(slice(1, 4), 0, [1, 2, 3]) assert_add_equals(slice(3, 0, -1), 0, [3, 2, 1]) - assert_add_equals(slice(2, None, -1), 0, [2, 1, 0]) assert_add_equals([1, 2, 4], 0, [1, 2, 4]) assert_add_equals(slice(0, 0), 10, []) assert_add_equals(slice(1, 4), 10, [11, 12, 13]) assert_add_equals(slice(3, 0, -1), 10, [13, 12, 11]) - assert_add_equals(slice(2, None, -1), 10, [12, 11, 10]) assert_add_equals([1, 2, 4], 10, [11, 12, 14]) assert_add_equals(slice(0, 0), -1, []) assert_add_equals(slice(1, 4), -1, [0, 1, 2]) - assert_add_equals(slice(3, 0, -1), -1, [2, 1, 0]) assert_add_equals([1, 2, 4], -1, [0, 1, 3]) with pytest.raises(ValueError): BlockPlacement(slice(1, 4)).add(-10) with pytest.raises(ValueError): BlockPlacement([1, 2, 4]).add(-10) - with pytest.raises(ValueError): - BlockPlacement(slice(2, None, -1)).add(-1) + + if not PY361: + assert_add_equals(slice(3, 0, -1), -1, [2, 1, 0]) + assert_add_equals(slice(2, None, -1), 0, [2, 1, 0]) + assert_add_equals(slice(2, None, -1), 10, [12, 11, 10]) + + with pytest.raises(ValueError): + BlockPlacement(slice(2, None, -1)).add(-1) diff --git a/setup.py b/setup.py index 8e690f05b818c..1b471f76ac5e6 100755 --- a/setup.py +++ b/setup.py @@ -460,7 +460,8 @@ def pxd(name): extra_compile_args=['-Wno-unused-function'] lib_depends = lib_depends + ['pandas/_libs/src/numpy_helper.h', - 'pandas/_libs/src/parse_helper.h'] + 'pandas/_libs/src/parse_helper.h', + 'pandas/_libs/src/compat_helper.h'] tseries_depends = ['pandas/_libs/src/datetime/np_datetime.h', From 9d3554c26aa85d66cbfe6f481464f1a357af5e12 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 23 Mar 2017 15:32:31 -0400 Subject: [PATCH 269/353] CI: tweaks in codecov --- codecov.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/codecov.yml b/codecov.yml index 45a6040c6a50d..b4552563deeaa 100644 --- a/codecov.yml +++ b/codecov.yml @@ -1,3 +1,6 @@ +codecov: + branch: master + coverage: status: project: @@ -6,4 +9,3 @@ coverage: patch: default: target: '50' - branches: null From 5d28f26bb3b2a4fa7adc0808be54d49a70b1589b Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 25 Mar 2017 11:56:18 -0400 Subject: [PATCH 270/353] CI: fix coverage file location CI: clean up some unused env variables Author: Jeff Reback Closes #15792 from jreback/ci and squashes the following commits: 8100d6d [Jeff Reback] CI: fix coverage file location --- .travis.yml | 33 +++++++-------------------------- ci/script_multi.sh | 4 ++-- ci/script_single.sh | 4 ++-- 3 files changed, 11 insertions(+), 30 deletions(-) diff --git a/.travis.yml b/.travis.yml index eb2a58b0616ef..d78e4dab31fbe 100644 --- a/.travis.yml +++ b/.travis.yml @@ -39,7 +39,6 @@ matrix: - TEST_ARGS="--skip-slow --skip-network" - JOB_TAG=_OSX - TRAVIS_PYTHON_VERSION=3.5 - - CACHE_NAME="35_osx" - USE_CACHE=true - python: 2.7 env: @@ -47,9 +46,7 @@ matrix: - JOB_NAME: "27_slow_nnet_LOCALE" - TEST_ARGS="--only-slow --skip-network" - LOCALE_OVERRIDE="zh_CN.UTF-8" - - FULL_DEPS=true - JOB_TAG=_LOCALE - - CACHE_NAME="27_slow_nnet_LOCALE" - USE_CACHE=true addons: apt: @@ -60,10 +57,8 @@ matrix: - PYTHON_VERSION=2.7 - JOB_NAME: "27_nslow" - TEST_ARGS="--skip-slow" - - FULL_DEPS=true - CLIPBOARD_GUI=gtk2 - LINT=true - - CACHE_NAME="27_nslow" - USE_CACHE=true addons: apt: @@ -74,10 +69,8 @@ matrix: - PYTHON_VERSION=3.5 - JOB_NAME: "35_nslow" - TEST_ARGS="--skip-slow --skip-network" - - FULL_DEPS=true - CLIPBOARD=xsel - COVERAGE=true - - CACHE_NAME="35_nslow" - USE_CACHE=true addons: apt: @@ -96,28 +89,24 @@ matrix: packages: - libatlas-base-dev - gfortran -# In allow_failures + # In allow_failures - python: 2.7 env: - PYTHON_VERSION=2.7 - JOB_NAME: "27_slow" - JOB_TAG=_SLOW - TEST_ARGS="--only-slow --skip-network" - - FULL_DEPS=true - - CACHE_NAME="27_slow" - USE_CACHE=true -# In allow_failures + # In allow_failures - python: 2.7 env: - PYTHON_VERSION=2.7 - JOB_NAME: "27_build_test" - JOB_TAG=_BUILD_TEST - TEST_ARGS="--skip-slow" - - FULL_DEPS=true - BUILD_TEST=true - - CACHE_NAME="27_build_test" - USE_CACHE=true -# In allow_failures + # In allow_failures - python: 3.5 env: - PYTHON_VERSION=3.5 @@ -125,17 +114,14 @@ matrix: - JOB_TAG=_NUMPY_DEV - TEST_ARGS="--skip-slow --skip-network" - PANDAS_TESTING_MODE="deprecate" - - CACHE_NAME="35_numpy_dev" - USE_CACHE=true -# In allow_failures + # In allow_failures - python: 3.5 env: - PYTHON_VERSION=3.5 - JOB_NAME: "doc_build" - - FULL_DEPS=true - DOC_BUILD=true - JOB_TAG=_DOC_BUILD - - CACHE_NAME="doc_build" - USE_CACHE=true allow_failures: - python: 2.7 @@ -144,8 +130,6 @@ matrix: - JOB_NAME: "27_slow" - JOB_TAG=_SLOW - TEST_ARGS="--only-slow --skip-network" - - FULL_DEPS=true - - CACHE_NAME="27_slow" - USE_CACHE=true - python: 2.7 env: @@ -153,9 +137,7 @@ matrix: - JOB_NAME: "27_build_test" - JOB_TAG=_BUILD_TEST - TEST_ARGS="--skip-slow" - - FULL_DEPS=true - BUILD_TEST=true - - CACHE_NAME="27_build_test" - USE_CACHE=true - python: 3.5 env: @@ -164,16 +146,13 @@ matrix: - JOB_TAG=_NUMPY_DEV - TEST_ARGS="--skip-slow --skip-network" - PANDAS_TESTING_MODE="deprecate" - - CACHE_NAME="35_numpy_dev" - USE_CACHE=true - python: 3.5 env: - PYTHON_VERSION=3.5 - JOB_NAME: "doc_build" - - FULL_DEPS=true - DOC_BUILD=true - JOB_TAG=_DOC_BUILD - - CACHE_NAME="doc_build" - USE_CACHE=true before_install: @@ -209,7 +188,9 @@ script: - echo "script done" after_success: - - source activate pandas && codecov + - if [ "$COVERAGE" ]; then + source activate pandas && codecov --file /tmp/cov-single.xml /tmp/cov-multiple.xml; + fi after_script: - echo "after_script start" diff --git a/ci/script_multi.sh b/ci/script_multi.sh index 2d1211b2f7b96..f0fbb8c54bf2a 100755 --- a/ci/script_multi.sh +++ b/ci/script_multi.sh @@ -27,8 +27,8 @@ if [ "$BUILD_TEST" ]; then cd /tmp python -c "import pandas; pandas.test(['-n 2'])" elif [ "$COVERAGE" ]; then - echo pytest -s -n 2 -m "not single" --cov=pandas --cov-append --cov-report xml:/tmp/cov.xml --junitxml=/tmp/multiple.xml $TEST_ARGS pandas - pytest -s -n 2 -m "not single" --cov=pandas --cov-append --cov-report xml:/tmp/cov.xml --junitxml=/tmp/multiple.xml $TEST_ARGS pandas + echo pytest -s -n 2 -m "not single" --cov=pandas --cov-report xml:/tmp/cov-multiple.xml --junitxml=/tmp/multiple.xml $TEST_ARGS pandas + pytest -s -n 2 -m "not single" --cov=pandas --cov-report xml:/tmp/cov-multiple.xml --junitxml=/tmp/multiple.xml $TEST_ARGS pandas else echo pytest -n 2 -m "not single" --junitxml=/tmp/multiple.xml $TEST_ARGS pandas pytest -n 2 -m "not single" --junitxml=/tmp/multiple.xml $TEST_ARGS pandas # TODO: doctest diff --git a/ci/script_single.sh b/ci/script_single.sh index 2d7962352842b..86e822cb57653 100755 --- a/ci/script_single.sh +++ b/ci/script_single.sh @@ -20,8 +20,8 @@ fi if [ "$BUILD_TEST" ]; then echo "We are not running pytest as this is simply a build test." elif [ "$COVERAGE" ]; then - echo pytest -s -m "single" --cov=pandas --cov-report xml:/tmp/cov.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas - pytest -s -m "single" --cov=pandas --cov-report xml:/tmp/cov.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas + echo pytest -s -m "single" --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas + pytest -s -m "single" --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas else echo pytest -m "single" --junitxml=/tmp/single.xml $TEST_ARGS pandas pytest -m "single" --junitxml=/tmp/single.xml $TEST_ARGS pandas # TODO: doctest From 59f977f366d1560b3600d7fb1fdb36ffd189c151 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sat, 25 Mar 2017 11:57:16 -0400 Subject: [PATCH 271/353] MAINT: Enforce string type for where parameter Deprecated in 0.11.0. xref #12027. Author: gfyoung Closes #15798 from gfyoung/where-string-enforce and squashes the following commits: 06adda1 [gfyoung] MAINT: Enforce string type for where parameter --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/computation/pytables.py | 89 +++++++++++--------------------- pandas/tests/io/test_pytables.py | 53 ------------------- 3 files changed, 30 insertions(+), 113 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 37a70435ed6ff..dee1a5750eeeb 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -812,6 +812,7 @@ Removal of prior version deprecations/changes - The ``Categorical`` constructor has dropped the ``name`` parameter (:issue:`10632`) - The ``take_last`` parameter has been dropped from ``duplicated()``, ``drop_duplicates()``, ``nlargest()``, and ``nsmallest()`` methods (:issue:`10236`, :issue:`10792`, :issue:`10920`) - ``Series``, ``Index``, and ``DataFrame`` have dropped the ``sort`` and ``order`` methods (:issue:`10726`) +- Where clauses in ``pytables`` are only accepted as strings and expressions types and not other data-types (:issue:`12027`) - The ``LongPanel`` and ``WidePanel`` classes have been removed (:issue:`10892`) .. _whatsnew_0200.performance: diff --git a/pandas/computation/pytables.py b/pandas/computation/pytables.py index 7c09ca8d38773..2a5056963fe8d 100644 --- a/pandas/computation/pytables.py +++ b/pandas/computation/pytables.py @@ -1,9 +1,7 @@ """ manage PyTables query interface via Expressions """ import ast -import warnings from functools import partial -from datetime import datetime, timedelta import numpy as np import pandas as pd @@ -452,6 +450,32 @@ def _rewrite_membership_op(self, node, left, right): return self.visit(node.op), node.op, left, right +def _validate_where(w): + """ + Validate that the where statement is of the right type. + + The type may either be String, Expr, or list-like of Exprs. + + Parameters + ---------- + w : String term expression, Expr, or list-like of Exprs. + + Returns + ------- + where : The original where clause if the check was successful. + + Raises + ------ + TypeError : An invalid data type was passed in for w (e.g. dict). + """ + + if not (isinstance(w, (Expr, string_types)) or is_list_like(w)): + raise TypeError("where must be passed as a string, Expr, " + "or list-like of Exprs") + + return w + + class Expr(expr.Expr): """ hold a pytables like expression, comprised of possibly multiple 'terms' @@ -481,11 +505,9 @@ class Expr(expr.Expr): "major_axis>=20130101" """ - def __init__(self, where, op=None, value=None, queryables=None, - encoding=None, scope_level=0): + def __init__(self, where, queryables=None, encoding=None, scope_level=0): - # try to be back compat - where = self.parse_back_compat(where, op, value) + where = _validate_where(where) self.encoding = encoding self.condition = None @@ -505,7 +527,7 @@ def __init__(self, where, op=None, value=None, queryables=None, if isinstance(w, Expr): local_dict = w.env.scope else: - w = self.parse_back_compat(w) + w = _validate_where(w) where[idx] = w where = ' & ' .join(["(%s)" % w for w in where]) # noqa @@ -519,59 +541,6 @@ def __init__(self, where, op=None, value=None, queryables=None, encoding=encoding) self.terms = self.parse() - def parse_back_compat(self, w, op=None, value=None): - """ allow backward compatibility for passed arguments """ - - if isinstance(w, dict): - w, op, value = w.get('field'), w.get('op'), w.get('value') - if not isinstance(w, string_types): - raise TypeError( - "where must be passed as a string if op/value are passed") - warnings.warn("passing a dict to Expr is deprecated, " - "pass the where as a single string", - FutureWarning, stacklevel=10) - if isinstance(w, tuple): - if len(w) == 2: - w, value = w - op = '==' - elif len(w) == 3: - w, op, value = w - warnings.warn("passing a tuple into Expr is deprecated, " - "pass the where as a single string", - FutureWarning, stacklevel=10) - - if op is not None: - if not isinstance(w, string_types): - raise TypeError( - "where must be passed as a string if op/value are passed") - - if isinstance(op, Expr): - raise TypeError("invalid op passed, must be a string") - w = "{0}{1}".format(w, op) - if value is not None: - if isinstance(value, Expr): - raise TypeError("invalid value passed, must be a string") - - # stringify with quotes these values - def convert(v): - if isinstance(v, (datetime, np.datetime64, - timedelta, np.timedelta64)): - return "'{0}'".format(v) - return v - - if isinstance(value, (list, tuple)): - value = [convert(v) for v in value] - else: - value = convert(value) - - w = "{0}{1}".format(w, value) - - warnings.warn("passing multiple values to Expr is deprecated, " - "pass the where as a single string", - FutureWarning, stacklevel=10) - - return w - def __unicode__(self): if self.terms is not None: return pprint_thing(self.terms) diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 324160d5b1ae6..2d62cb2d6944d 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -2585,59 +2585,6 @@ def test_term_compat(self): expected = wp.loc[:, :, ['A', 'B']] assert_panel_equal(result, expected) - def test_backwards_compat_without_term_object(self): - with ensure_clean_store(self.path) as store: - - wp = Panel(np.random.randn(2, 5, 4), items=['Item1', 'Item2'], - major_axis=date_range('1/1/2000', periods=5), - minor_axis=['A', 'B', 'C', 'D']) - store.append('wp', wp) - with catch_warnings(record=True): - result = store.select('wp', [('major_axis>20000102'), - ('minor_axis', '=', ['A', 'B'])]) - expected = wp.loc[:, - wp.major_axis > Timestamp('20000102'), - ['A', 'B']] - assert_panel_equal(result, expected) - - store.remove('wp', ('major_axis>20000103')) - result = store.select('wp') - expected = wp.loc[:, wp.major_axis <= Timestamp('20000103'), :] - assert_panel_equal(result, expected) - - with ensure_clean_store(self.path) as store: - - wp = Panel(np.random.randn(2, 5, 4), items=['Item1', 'Item2'], - major_axis=date_range('1/1/2000', periods=5), - minor_axis=['A', 'B', 'C', 'D']) - store.append('wp', wp) - - # stringified datetimes - with catch_warnings(record=True): - result = store.select('wp', - [('major_axis', - '>', - datetime.datetime(2000, 1, 2))]) - expected = wp.loc[:, wp.major_axis > Timestamp('20000102')] - assert_panel_equal(result, expected) - with catch_warnings(record=True): - result = store.select('wp', - [('major_axis', - '>', - datetime.datetime(2000, 1, 2, 0, 0))]) - expected = wp.loc[:, wp.major_axis > Timestamp('20000102')] - assert_panel_equal(result, expected) - with catch_warnings(record=True): - result = store.select('wp', - [('major_axis', - '=', - [datetime.datetime(2000, 1, 2, 0, 0), - datetime.datetime(2000, 1, 3, 0, 0)])] - ) - expected = wp.loc[:, [Timestamp('20000102'), - Timestamp('20000103')]] - assert_panel_equal(result, expected) - def test_same_name_scoping(self): with ensure_clean_store(self.path) as store: From 8c8dd8881107ba353c675ea65774ae409e6aea35 Mon Sep 17 00:00:00 2001 From: Joe Jevnik Date: Sat, 25 Mar 2017 12:07:09 -0400 Subject: [PATCH 272/353] PERF: add the 'name' attribute to dataframes that go through apply_frame_axis0 Previously, if you did `group.name` in the applied function, it would fail and fall back to the slower path because the attribute did not exist; `shape_before` was unused. Author: Joe Jevnik This patch had conflicts when merged, resolved by Committer: Jeff Reback Closes #15062 from llllllllll/add-name-in-apply-inference-call and squashes the following commits: 722a945 [Joe Jevnik] DOC: update whatsnew for groupby perf change 7e75635 [Joe Jevnik] DEV: add groupby asv benchmark 710528a [Joe Jevnik] BUG: add the 'name' attribute to dataframes that go through apply_frame_axis0 --- asv_bench/benchmarks/groupby.py | 32 ++++++++++++++++++++++------ doc/source/whatsnew/v0.20.0.txt | 3 +++ pandas/_libs/src/reduce.pyx | 2 +- pandas/tests/groupby/test_groupby.py | 16 ++++++++++++++ 4 files changed, 45 insertions(+), 8 deletions(-) diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py index 59f55914ea4d3..b8d8e8b7912d7 100644 --- a/asv_bench/benchmarks/groupby.py +++ b/asv_bench/benchmarks/groupby.py @@ -108,16 +108,34 @@ def setup(self): self.N = 10000 self.labels = np.random.randint(0, 2000, size=self.N) self.labels2 = np.random.randint(0, 3, size=self.N) - self.df = DataFrame({'key': self.labels, 'key2': self.labels2, 'value1': randn(self.N), 'value2': (['foo', 'bar', 'baz', 'qux'] * (self.N / 4)), }) - - def f(self, g): + self.df = DataFrame({ + 'key': self.labels, + 'key2': self.labels2, + 'value1': np.random.randn(self.N), + 'value2': (['foo', 'bar', 'baz', 'qux'] * (self.N // 4)), + }) + + @staticmethod + def scalar_function(g): return 1 - def time_groupby_frame_apply(self): - self.df.groupby(['key', 'key2']).apply(self.f) + def time_groupby_frame_apply_scalar_function(self): + self.df.groupby(['key', 'key2']).apply(self.scalar_function) + + def time_groupby_frame_apply_scalar_function_overhead(self): + self.df.groupby('key').apply(self.scalar_function) + + @staticmethod + def df_copy_function(g): + # ensure that the group name is available (see GH #15062) + g.name + return g.copy() + + def time_groupby_frame_df_copy_function(self): + self.df.groupby(['key', 'key2']).apply(self.df_copy_function) - def time_groupby_frame_apply_overhead(self): - self.df.groupby('key').apply(self.f) + def time_groupby_frame_apply_df_copy_overhead(self): + self.df.groupby('key').apply(self.df_copy_function) #---------------------------------------------------------------------- diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index dee1a5750eeeb..64bfeb3307e17 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -831,6 +831,9 @@ Performance Improvements - Improved performance when using ``.unstack()`` (:issue:`15503`) - Improved performance of merge/join on ``category`` columns (:issue:`10409`) - Improved performance of ``drop_duplicates()`` on ``bool`` columns (:issue:`12963`) +- Improve performance of ``pd.core.groupby.GroupBy.apply`` when the applied + function used the ``.name`` attribute of the group DataFrame (:issue:`15062`). + .. _whatsnew_0200.bug_fixes: diff --git a/pandas/_libs/src/reduce.pyx b/pandas/_libs/src/reduce.pyx index 1cd3e53494a72..2bba07256305a 100644 --- a/pandas/_libs/src/reduce.pyx +++ b/pandas/_libs/src/reduce.pyx @@ -497,7 +497,7 @@ def apply_frame_axis0(object frame, object f, object names, # Need to infer if our low-level mucking is going to cause a segfault if n > 0: chunk = frame.iloc[starts[0]:ends[0]] - shape_before = chunk.shape + object.__setattr__(chunk, 'name', names[0]) try: result = f(chunk) if result is chunk: diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index a355dca3029c7..9f5a7f404e2be 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -3244,6 +3244,22 @@ def _check_all(grouped): _check_all(self.df.groupby('A')) _check_all(self.df.groupby(['A', 'B'])) + def test_group_name_available_in_inference_pass(self): + # gh-15062 + df = pd.DataFrame({'a': [0, 0, 1, 1, 2, 2], 'b': np.arange(6)}) + + names = [] + + def f(group): + names.append(group.name) + return group.copy() + + df.groupby('a', sort=False, group_keys=False).apply(f) + # we expect 2 zeros because we call ``f`` once to see if a faster route + # can be used. + expected_names = [0, 0, 1, 2] + tm.assert_equal(names, expected_names) + def test_no_dummy_key_names(self): # GH #1291 From 80f30b44e3c79f26b20fada91995c1874c2e5cdf Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Sat, 25 Mar 2017 14:04:46 -0400 Subject: [PATCH 273/353] DOC: Add details to DataFrame groupby transform closes #13543 Author: Kevin Sheppard Closes #14388 from bashtage/groupby-transform-doc-string and squashes the following commits: ef1ff13 [Kevin Sheppard] DOC: Add details to DataFrame groupby transform --- doc/source/groupby.rst | 38 +++++++++++++++++++++++++++++++++----- pandas/core/groupby.py | 15 +++++++++++++++ 2 files changed, 48 insertions(+), 5 deletions(-) diff --git a/doc/source/groupby.rst b/doc/source/groupby.rst index 8484ccd69a983..cbe3588104439 100644 --- a/doc/source/groupby.rst +++ b/doc/source/groupby.rst @@ -580,9 +580,21 @@ Transformation -------------- The ``transform`` method returns an object that is indexed the same (same size) -as the one being grouped. Thus, the passed transform function should return a -result that is the same size as the group chunk. For example, suppose we wished -to standardize the data within each group: +as the one being grouped. The transform function must: + +* Return a result that is either the same size as the group chunk or + broadcastable to the size of the group chunk (e.g., a scalar, + ``grouped.transform(lambda x: x.iloc[-1])``). +* Operate column-by-column on the group chunk. The transform is applied to + the first group chunk using chunk.apply. +* Not perform in-place operations on the group chunk. Group chunks should + be treated as immutable, and changes to a group chunk may produce unexpected + results. For example, when using ``fillna``, ``inplace`` must be ``False`` + (``grouped.transform(lambda x: x.fillna(inplace=False))``). +* (Optionally) operates on the entire group chunk. If this is supported, a + fast path is used starting from the *second* chunk. + +For example, suppose we wished to standardize the data within each group: .. ipython:: python @@ -620,6 +632,21 @@ We can also visually compare the original and transformed data sets. @savefig groupby_transform_plot.png compare.plot() +Transformation functions that have lower dimension outputs are broadcast to +match the shape of the input array. + +.. ipython:: python + + data_range = lambda x: x.max() - x.min() + ts.groupby(key).transform(data_range) + +Alternatively the built-in methods can be could be used to produce the same +outputs + +.. ipython:: python + + ts.groupby(key).transform('max') - ts.groupby(key).transform('min') + Another common data transform is to replace missing data with the group mean. .. ipython:: python @@ -664,8 +691,9 @@ and that the transformed data contains no NAs. .. note:: - Some functions when applied to a groupby object will automatically transform the input, returning - an object of the same shape as the original. Passing ``as_index=False`` will not affect these transformation methods. + Some functions when applied to a groupby object will automatically transform + the input, returning an object of the same shape as the original. Passing + ``as_index=False`` will not affect these transformation methods. For example: ``fillna, ffill, bfill, shift``. diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 727af8b8cd3eb..64e116df88b88 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -3649,10 +3649,25 @@ def transform(self, func, *args, **kwargs): Each subframe is endowed the attribute 'name' in case you need to know which group you are working on. + The current implementation imposes three requirements on f: + + * f must return a value that either has the same shape as the input + subframe or can be broadcast to the shape of the input subframe. + For example, f returns a scalar it will be broadcast to have the + same shape as the input subframe. + * f must support application column-by-column in the subframe. If f + also supports application to the entire subframe, then a fast path + is used starting from the second chunk. + * f must not mutate subframes. Mutation is not supported and may + produce unexpected results. + Examples -------- >>> grouped = df.groupby(lambda x: mapping[x]) + # Same shape >>> grouped.transform(lambda x: (x - x.mean()) / x.std()) + # Broadcastable + >>> grouped.transform(lambda x: x.max() - x.min()) """ # optimized transforms From 83e24ca97b71e72a54ab360a44dc7a00f17ea429 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 25 Mar 2017 14:15:51 -0400 Subject: [PATCH 274/353] DOC: template groupby.transform doc-string --- pandas/core/groupby.py | 109 +++++++++++++++++++++-------------------- 1 file changed, 55 insertions(+), 54 deletions(-) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 64e116df88b88..dded55114ab6f 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -72,6 +72,55 @@ pandas.Panel.%(name)s """ +_transform_template = """ +Call function producing a like-indexed %(klass)s on each group and +return a %(klass)s having the same indexes as the original object +filled with the transformed values + +Parameters +---------- +f : function + Function to apply to each group + +Notes +----- +Each group is endowed the attribute 'name' in case you need to know +which group you are working on. + +The current implementation imposes three requirements on f: + +* f must return a value that either has the same shape as the input + subframe or can be broadcast to the shape of the input subframe. + For example, f returns a scalar it will be broadcast to have the + same shape as the input subframe. +* if this is a DataFrame, f must support application column-by-column + in the subframe. If f also supports application to the entire subframe, + then a fast path is used starting from the second chunk. +* f must not mutate groups. Mutation is not supported and may + produce unexpected results. + +Returns +------- +%(klass)s + +See also +-------- +aggregate, transform + +Examples +-------- +>>> df = pd.DataFrame(np.repeat(np.arange(10), 3).reshape(-1, 3), + columns=list('ABC')) +>>> grouped = df.groupby(df.index // 3) + +# Same shape +>>> grouped.%(selected)stransform(lambda x: (x - x.mean()) / x.std()) + +# Broadcastable +>>> grouped.%(selected)stransform(lambda x: x.max() - x.min()) + +""" + # special case to prevent duplicate plots when catching exceptions when # forwarding methods from NDFrames _plotting_methods = frozenset(['plot', 'boxplot', 'hist']) @@ -2860,25 +2909,9 @@ def _aggregate_named(self, func, *args, **kwargs): return result + @Substitution(klass='Series', selected='A.') + @Appender(_transform_template) def transform(self, func, *args, **kwargs): - """ - Call function producing a like-indexed Series on each group and return - a Series with the transformed values - - Parameters - ---------- - func : function - To apply to each group. Should return a Series with the same index - - Examples - -------- - >>> grouped.transform(lambda x: (x - x.mean()) / x.std()) - - Returns - ------- - transformed : Series - """ - func = self._is_cython_func(func) or func # if string function @@ -3633,42 +3666,9 @@ def _transform_general(self, func, *args, **kwargs): axis=self.axis, verify_integrity=False) return self._set_result_index_ordered(concatenated) + @Substitution(klass='DataFrame', selected='') + @Appender(_transform_template) def transform(self, func, *args, **kwargs): - """ - Call function producing a like-indexed DataFrame on each group and - return a DataFrame having the same indexes as the original object - filled with the transformed values - - Parameters - ---------- - f : function - Function to apply to each subframe - - Notes - ----- - Each subframe is endowed the attribute 'name' in case you need to know - which group you are working on. - - The current implementation imposes three requirements on f: - - * f must return a value that either has the same shape as the input - subframe or can be broadcast to the shape of the input subframe. - For example, f returns a scalar it will be broadcast to have the - same shape as the input subframe. - * f must support application column-by-column in the subframe. If f - also supports application to the entire subframe, then a fast path - is used starting from the second chunk. - * f must not mutate subframes. Mutation is not supported and may - produce unexpected results. - - Examples - -------- - >>> grouped = df.groupby(lambda x: mapping[x]) - # Same shape - >>> grouped.transform(lambda x: (x - x.mean()) / x.std()) - # Broadcastable - >>> grouped.transform(lambda x: x.max() - x.min()) - """ # optimized transforms func = self._is_cython_func(func) or func @@ -3784,7 +3784,8 @@ def filter(self, func, dropna=True, *args, **kwargs): # noqa Examples -------- - >>> grouped = df.groupby(lambda x: mapping[x]) + >>> df = pd.DataFrame(np.random.randn(10, 3), columns=list('ABC')) + >>> grouped = df.groupby(df.index % 3) >>> grouped.filter(lambda x: x['A'].sum() + x['B'].sum() > 0) """ From 7a42240fd38404092049ea5006561b69fa5b0d88 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 25 Mar 2017 15:18:00 -0400 Subject: [PATCH 275/353] DOC: whatsnew fixes --- doc/source/whatsnew/v0.20.0.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 64bfeb3307e17..c5bf943cebca7 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -785,6 +785,7 @@ Deprecations - ``Series/DataFrame/Panel.consolidate()`` been deprecated as a public method. (:issue:`15483`) - The ``as_indexer`` keyword of ``Series.str.match()`` has been deprecated (ignored keyword) (:issue:`15257`). - The following top-level pandas functions have been deprecated and will be removed in a future version (:issue:`13790`) + * ``pd.pnow()``, replaced by ``Period.now()`` * ``pd.Term``, is removed, as it is not applicable to user code. Instead use in-line string expressions in the where clause when searching in HDFStore * ``pd.Expr``, is removed, as it is not applicable to user code. From c577c19d22ac8ec7ea05630576c379e3108248af Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 25 Mar 2017 15:41:58 -0400 Subject: [PATCH 276/353] CI: only print skipped if not on doc-build --- .travis.yml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index d78e4dab31fbe..ab83a37f25905 100644 --- a/.travis.yml +++ b/.travis.yml @@ -196,6 +196,10 @@ after_script: - echo "after_script start" - ci/install_test.sh - source activate pandas && python -c "import pandas; pandas.show_versions();" - - ci/print_skipped.py /tmp/single.xml - - ci/print_skipped.py /tmp/multiple.xml + - if [ "$DOC_BUILD"]; then + ci/print_skipped.py /tmp/single.xml; + fi + - if [ "$DOC_BUILD"]; then + ci/print_skipped.py /tmp/multiple.xml; + fi - echo "after_script done" From 156bfd2ed5db2837fe740ec2934a782f56e99864 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 25 Mar 2017 18:42:14 -0400 Subject: [PATCH 277/353] CI: typo in .travis.yml for print_skipped CI: linted .travis.yml CI: removed CLIPBOARD env variables as not used closes #15803 --- .travis.yml | 95 +++++++++-------------------------------------------- 1 file changed, 16 insertions(+), 79 deletions(-) diff --git a/.travis.yml b/.travis.yml index ab83a37f25905..bb3388734229e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -27,63 +27,37 @@ matrix: - language: objective-c os: osx compiler: clang - osx_image: xcode6.4 cache: ccache: true directories: - $HOME/.cache # cython cache - $HOME/.ccache # compiler cache env: - - PYTHON_VERSION=3.5 - - JOB_NAME: "35_osx" - - TEST_ARGS="--skip-slow --skip-network" - - JOB_TAG=_OSX - - TRAVIS_PYTHON_VERSION=3.5 - - USE_CACHE=true + - PYTHON_VERSION=3.5 JOB_NAME="35_osx" TEST_ARGS="--skip-slow --skip-network" JOB_TAG="_OSX" TRAVIS_PYTHON_VERSION=3.5 USE_CACHE=true - python: 2.7 env: - - PYTHON_VERSION=2.7 - - JOB_NAME: "27_slow_nnet_LOCALE" - - TEST_ARGS="--only-slow --skip-network" - - LOCALE_OVERRIDE="zh_CN.UTF-8" - - JOB_TAG=_LOCALE - - USE_CACHE=true + - PYTHON_VERSION=2.7 JOB_NAME="27_slow_nnet_LOCALE" TEST_ARGS="--only-slow --skip-network" LOCALE_OVERRIDE="zh_CN.UTF-8" JOB_TAG="_LOCALE" USE_CACHE=true addons: apt: packages: - language-pack-zh-hans - python: 2.7 env: - - PYTHON_VERSION=2.7 - - JOB_NAME: "27_nslow" - - TEST_ARGS="--skip-slow" - - CLIPBOARD_GUI=gtk2 - - LINT=true - - USE_CACHE=true + - PYTHON_VERSION=2.7 JOB_NAME="27_nslow" TEST_ARGS="--skip-slow" LINT=true USE_CACHE=true addons: apt: packages: - python-gtk2 - python: 3.5 env: - - PYTHON_VERSION=3.5 - - JOB_NAME: "35_nslow" - - TEST_ARGS="--skip-slow --skip-network" - - CLIPBOARD=xsel - - COVERAGE=true - - USE_CACHE=true + - PYTHON_VERSION=3.5 JOB_NAME="35_nslow" TEST_ARGS="--skip-slow --skip-network" COVERAGE=true USE_CACHE=true addons: apt: packages: - xsel - python: 3.6 env: - - PYTHON_VERSION=3.6 - - JOB_NAME: "36" - - TEST_ARGS="--skip-slow --skip-network" - - PANDAS_TESTING_MODE="deprecate" - - CONDA_FORGE=true - - USE_CACHE=true + - PYTHON_VERSION=3.6 JOB_NAME="36" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" CONDA_FORGE=true USE_CACHE=true addons: apt: packages: @@ -92,68 +66,32 @@ matrix: # In allow_failures - python: 2.7 env: - - PYTHON_VERSION=2.7 - - JOB_NAME: "27_slow" - - JOB_TAG=_SLOW - - TEST_ARGS="--only-slow --skip-network" - - USE_CACHE=true + - PYTHON_VERSION=2.7 JOB_NAME="27_slow" JOB_TAG="_SLOW" TEST_ARGS="--only-slow --skip-network" USE_CACHE=true # In allow_failures - python: 2.7 env: - - PYTHON_VERSION=2.7 - - JOB_NAME: "27_build_test" - - JOB_TAG=_BUILD_TEST - - TEST_ARGS="--skip-slow" - - BUILD_TEST=true - - USE_CACHE=true + - PYTHON_VERSION=2.7 JOB_NAME="27_build_test" JOB_TAG="_BUILD_TEST" TEST_ARGS="--skip-slow" BUILD_TEST=true USE_CACHE=true # In allow_failures - python: 3.5 env: - - PYTHON_VERSION=3.5 - - JOB_NAME: "35_numpy_dev" - - JOB_TAG=_NUMPY_DEV - - TEST_ARGS="--skip-slow --skip-network" - - PANDAS_TESTING_MODE="deprecate" - - USE_CACHE=true + - PYTHON_VERSION=3.5 JOB_NAME="35_numpy_dev" JOB_TAG="_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" USE_CACHE=true # In allow_failures - python: 3.5 env: - - PYTHON_VERSION=3.5 - - JOB_NAME: "doc_build" - - DOC_BUILD=true - - JOB_TAG=_DOC_BUILD - - USE_CACHE=true + - PYTHON_VERSION=3.5 JOB_NAME="doc_build" DOC_BUILD=true JOB_TAG="_DOC_BUILD" USE_CACHE=true allow_failures: - python: 2.7 env: - - PYTHON_VERSION=2.7 - - JOB_NAME: "27_slow" - - JOB_TAG=_SLOW - - TEST_ARGS="--only-slow --skip-network" - - USE_CACHE=true + - PYTHON_VERSION=2.7 JOB_NAME="27_slow" JOB_TAG="_SLOW" TEST_ARGS="--only-slow --skip-network" USE_CACHE=true - python: 2.7 env: - - PYTHON_VERSION=2.7 - - JOB_NAME: "27_build_test" - - JOB_TAG=_BUILD_TEST - - TEST_ARGS="--skip-slow" - - BUILD_TEST=true - - USE_CACHE=true + - PYTHON_VERSION=2.7 JOB_NAME="27_build_test" JOB_TAG="_BUILD_TEST" TEST_ARGS="--skip-slow" BUILD_TEST=true USE_CACHE=true - python: 3.5 env: - - PYTHON_VERSION=3.5 - - JOB_NAME: "35_numpy_dev" - - JOB_TAG=_NUMPY_DEV - - TEST_ARGS="--skip-slow --skip-network" - - PANDAS_TESTING_MODE="deprecate" - - USE_CACHE=true + - PYTHON_VERSION=3.5 JOB_NAME="35_numpy_dev" JOB_TAG="_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" USE_CACHE=true - python: 3.5 env: - - PYTHON_VERSION=3.5 - - JOB_NAME: "doc_build" - - DOC_BUILD=true - - JOB_TAG=_DOC_BUILD - - USE_CACHE=true + - PYTHON_VERSION=3.5 JOB_NAME="doc_build" DOC_BUILD=true JOB_TAG="_DOC_BUILD" USE_CACHE=true before_install: - echo "before_install" @@ -165,7 +103,7 @@ before_install: - git --version - git tag - ci/before_install_travis.sh - - export DISPLAY=:99.0 + - export DISPLAY=":99.0" install: - echo "install start" @@ -194,12 +132,11 @@ after_success: after_script: - echo "after_script start" - - ci/install_test.sh - source activate pandas && python -c "import pandas; pandas.show_versions();" - - if [ "$DOC_BUILD"]; then + - if [ -e /tmp/single.xml ]; then ci/print_skipped.py /tmp/single.xml; fi - - if [ "$DOC_BUILD"]; then + - if [ -e /tmp/multiple.xml ]; then ci/print_skipped.py /tmp/multiple.xml; fi - echo "after_script done" From 22f9d0ddbefccbf9a3e4000ad17dd12db9bddba9 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 25 Mar 2017 22:16:31 -0400 Subject: [PATCH 278/353] Revert "MAINT: Remove Long and WidePanel (#15748)" (#15802) This reverts commit bff47f2302a0be4dcbf7e5055e525d5652e08fb5. --- asv_bench/benchmarks/pandas_vb_common.py | 5 ++++ bench/bench_join_panel.py | 4 +-- doc/source/whatsnew/v0.20.0.txt | 1 - pandas/core/api.py | 2 +- pandas/core/panel.py | 23 ++++++++++++++++ pandas/tests/api/test_api.py | 3 +- pandas/tests/io/test_pytables.py | 3 ++ pandas/tests/test_panel.py | 35 ++++++++++++++++-------- vb_suite/pandas_vb_common.py | 5 ++++ 9 files changed, 64 insertions(+), 17 deletions(-) diff --git a/asv_bench/benchmarks/pandas_vb_common.py b/asv_bench/benchmarks/pandas_vb_common.py index a7e530e7f5ef1..56ccc94c414fb 100644 --- a/asv_bench/benchmarks/pandas_vb_common.py +++ b/asv_bench/benchmarks/pandas_vb_common.py @@ -25,6 +25,11 @@ except: pass +try: + Panel = Panel +except Exception: + Panel = WidePanel + # didn't add to namespace until later try: from pandas.core.index import MultiIndex diff --git a/bench/bench_join_panel.py b/bench/bench_join_panel.py index 113b317dd8ff8..f3c3f8ba15f70 100644 --- a/bench/bench_join_panel.py +++ b/bench/bench_join_panel.py @@ -45,8 +45,8 @@ def reindex_on_axis(panels, axis, axis_reindex): return p -# Does the job but inefficient. It is better to handle -# this like you read a table in pytables. +# does the job but inefficient (better to handle like you read a table in +# pytables...e.g create a LongPanel then convert to Wide) def create_panels_join(cls, panels): """ given an array of panels's, create a single panel """ panels = [a for a in panels if a is not None] diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index c5bf943cebca7..ca6541256f1d2 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -814,7 +814,6 @@ Removal of prior version deprecations/changes - The ``take_last`` parameter has been dropped from ``duplicated()``, ``drop_duplicates()``, ``nlargest()``, and ``nsmallest()`` methods (:issue:`10236`, :issue:`10792`, :issue:`10920`) - ``Series``, ``Index``, and ``DataFrame`` have dropped the ``sort`` and ``order`` methods (:issue:`10726`) - Where clauses in ``pytables`` are only accepted as strings and expressions types and not other data-types (:issue:`12027`) -- The ``LongPanel`` and ``WidePanel`` classes have been removed (:issue:`10892`) .. _whatsnew_0200.performance: diff --git a/pandas/core/api.py b/pandas/core/api.py index 5018de39ca907..65253dedb8b53 100644 --- a/pandas/core/api.py +++ b/pandas/core/api.py @@ -15,7 +15,7 @@ from pandas.core.series import Series from pandas.core.frame import DataFrame -from pandas.core.panel import Panel +from pandas.core.panel import Panel, WidePanel from pandas.core.panel4d import Panel4D from pandas.core.reshape import (pivot_simple as pivot, get_dummies, lreshape, wide_to_long) diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 50ddc24ac9656..5ab3c44b175fe 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -4,6 +4,8 @@ # pylint: disable=E1103,W0231,W0212,W0621 from __future__ import division +import warnings + import numpy as np from pandas.types.cast import (infer_dtype_from_scalar, @@ -1554,3 +1556,24 @@ def f(self, other, axis=0): ops.add_special_arithmetic_methods(Panel, **ops.panel_special_funcs) Panel._add_aggregate_operations() Panel._add_numeric_operations() + + +# legacy +class WidePanel(Panel): + + def __init__(self, *args, **kwargs): + # deprecation, #10892 + warnings.warn("WidePanel is deprecated. Please use Panel", + FutureWarning, stacklevel=2) + + super(WidePanel, self).__init__(*args, **kwargs) + + +class LongPanel(DataFrame): + + def __init__(self, *args, **kwargs): + # deprecation, #10892 + warnings.warn("LongPanel is deprecated. Please use DataFrame", + FutureWarning, stacklevel=2) + + super(LongPanel, self).__init__(*args, **kwargs) diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 2c7dcf2501f32..73222c246fc70 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -54,7 +54,8 @@ class TestPDApi(Base, tm.TestCase): 'TimedeltaIndex', 'Timestamp'] # these are already deprecated; awaiting removal - deprecated_classes = ['Panel4D', 'SparseList', 'Expr', 'Term'] + deprecated_classes = ['WidePanel', 'Panel4D', + 'SparseList', 'Expr', 'Term'] # these should be deprecated in the future deprecated_classes_in_future = ['Panel'] diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 2d62cb2d6944d..82a98f5d08488 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -2964,6 +2964,9 @@ def _check(left, right): # empty # self._check_roundtrip(wp.to_frame()[:0], _check) + def test_longpanel(self): + pass + def test_overwrite_node(self): with ensure_clean_store(self.path) as store: diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 13e16f3b90730..ab0322abbcf06 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -178,6 +178,10 @@ def wrapper(x): class SafeForSparse(object): + @classmethod + def assert_panel_equal(cls, x, y): + assert_panel_equal(x, y) + def test_get_axis(self): assert (self.panel._get_axis(0) is self.panel.items) assert (self.panel._get_axis(1) is self.panel.major_axis) @@ -342,10 +346,10 @@ def check_op(op, name): def test_combinePanel(self): result = self.panel.add(self.panel) - assert_panel_equal(result, self.panel * 2) + self.assert_panel_equal(result, self.panel * 2) def test_neg(self): - assert_panel_equal(-self.panel, self.panel * -1) + self.assert_panel_equal(-self.panel, self.panel * -1) # issue 7692 def test_raise_when_not_implemented(self): @@ -365,22 +369,22 @@ def test_select(self): # select items result = p.select(lambda x: x in ('ItemA', 'ItemC'), axis='items') expected = p.reindex(items=['ItemA', 'ItemC']) - assert_panel_equal(result, expected) + self.assert_panel_equal(result, expected) # select major_axis result = p.select(lambda x: x >= datetime(2000, 1, 15), axis='major') new_major = p.major_axis[p.major_axis >= datetime(2000, 1, 15)] expected = p.reindex(major=new_major) - assert_panel_equal(result, expected) + self.assert_panel_equal(result, expected) # select minor_axis result = p.select(lambda x: x in ('D', 'A'), axis=2) expected = p.reindex(minor=['A', 'D']) - assert_panel_equal(result, expected) + self.assert_panel_equal(result, expected) # corner case, empty thing result = p.select(lambda x: x in ('foo', ), axis='items') - assert_panel_equal(result, p.reindex(items=[])) + self.assert_panel_equal(result, p.reindex(items=[])) def test_get_value(self): for item in self.panel.items: @@ -395,8 +399,8 @@ def test_abs(self): result = self.panel.abs() result2 = abs(self.panel) expected = np.abs(self.panel) - assert_panel_equal(result, expected) - assert_panel_equal(result2, expected) + self.assert_panel_equal(result, expected) + self.assert_panel_equal(result2, expected) df = self.panel['ItemA'] result = df.abs() @@ -863,6 +867,10 @@ def test_set_value(self): class TestPanel(tm.TestCase, PanelTests, CheckIndexing, SafeForLongAndSparse, SafeForSparse): + @classmethod + def assert_panel_equal(cls, x, y): + assert_panel_equal(x, y) + def setUp(self): self.panel = _panel.copy() self.panel.major_axis.name = None @@ -1959,7 +1967,7 @@ def test_round(self): major_axis=pd.date_range('1/1/2000', periods=5), minor_axis=['A', 'B']) result = p.round() - assert_panel_equal(expected, result) + self.assert_panel_equal(expected, result) def test_numpy_round(self): values = [[[-3.2, 2.2], [0, -4.8213], [3.123, 123.12], @@ -1975,7 +1983,7 @@ def test_numpy_round(self): major_axis=pd.date_range('1/1/2000', periods=5), minor_axis=['A', 'B']) result = np.round(p) - assert_panel_equal(expected, result) + self.assert_panel_equal(expected, result) msg = "the 'out' parameter is not supported" tm.assertRaisesRegexp(ValueError, msg, np.round, p, out=p) @@ -2262,12 +2270,15 @@ def test_all_any_unhandled(self): self.assertRaises(NotImplementedError, self.panel.any, bool_only=True) -class TestPanelFrame(tm.TestCase): +class TestLongPanel(tm.TestCase): """ - Check that conversions to and from Panel to DataFrame work. + LongPanel no longer exists, but... """ def setUp(self): + import warnings + warnings.filterwarnings(action='ignore', category=FutureWarning) + panel = tm.makePanel() tm.add_nans(panel) diff --git a/vb_suite/pandas_vb_common.py b/vb_suite/pandas_vb_common.py index 41e43d6ab10e5..bd2e8a1c1d504 100644 --- a/vb_suite/pandas_vb_common.py +++ b/vb_suite/pandas_vb_common.py @@ -18,6 +18,11 @@ except: import pandas._libs.lib as lib +try: + Panel = WidePanel +except Exception: + pass + # didn't add to namespace until later try: from pandas.core.index import MultiIndex From d2f32a0362bbb90c4ab32a454962912901e32080 Mon Sep 17 00:00:00 2001 From: Carlos Souza Date: Sat, 25 Mar 2017 22:19:58 -0400 Subject: [PATCH 279/353] BUG: Series.asof fails for all NaN Series (GH15713) closes bug #15713 Added the test if the series is all nans Added the code that check if that's the case: if yes, return the expected output Author: Carlos Souza Closes #15758 from ucals/bug-fix-15713 and squashes the following commits: 0765108 [Carlos Souza] First simplification, code-block in the same place bb63964 [Carlos Souza] Propagating Series name af9a29b [Carlos Souza] Setting name of asof result when scalar input and all nan b8f078a [Carlos Souza] Small code standard change 7448b96 [Carlos Souza] Fixing scalar input a080b9b [Carlos Souza] Making scalar input return in a Series 04b7306 [Carlos Souza] Removing .values and formating code PEP8 3f9c7fd [Carlos Souza] Minor comments 70c958f [Carlos Souza] Added tests for non-default indexes, scalar and multiple inputs, and results preserve columns 6b745af [Carlos Souza] Adding DataFrame tests & support, and optimizing the code 89fb6cf [Carlos Souza] BUG #15713 fixing failing tests 17d1d77 [Carlos Souza] BUG #15713 Series.asof return nan when series is all nans! 4e26ab8 [Carlos Souza] BUG #15713 Series.asof return nan when series is all nans. c78d687 [Carlos Souza] BUG #15713 Series.asof return nan when series is all nans 676a4e5 [Carlos Souza] Test --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/generic.py | 10 +++++++ pandas/tests/frame/test_asof.py | 47 ++++++++++++++++++++++++-------- pandas/tests/series/test_asof.py | 26 ++++++++++++++++++ 4 files changed, 73 insertions(+), 11 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index ca6541256f1d2..f96fc41c73f15 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -868,6 +868,7 @@ Bug Fixes - Bug in ``pd.cut()`` with a single bin on an all 0s array (:issue:`15428`) - Bug in ``pd.qcut()`` with a single quantile and an array with identical values (:issue:`15431`) - Compat with SciPy 0.19.0 for testing on ``.interpolate()`` (:issue:`15662`) +- Bug in ``Series.asof`` which raised if the series contained all ``np.nan`` (:issue:`15713`) - Compat for 32-bit platforms for ``.qcut/cut``; bins will now be ``int64`` dtype (:issue:`14866`) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 134840728d931..ad56ea44a0dc6 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3972,6 +3972,16 @@ def asof(self, where, subset=None): where = Index(where) if is_list else Index([where]) nulls = self.isnull() if is_series else self[subset].isnull().any(1) + if nulls.all(): + if is_series: + return self._constructor(np.nan, index=where, name=self.name) + elif is_list: + from pandas import DataFrame + return DataFrame(np.nan, index=where, columns=self.columns) + else: + from pandas import Series + return Series(np.nan, index=self.columns, name=where[0]) + locs = self.index.asof_locs(where, ~(nulls.values)) # mask the missing diff --git a/pandas/tests/frame/test_asof.py b/pandas/tests/frame/test_asof.py index 8bb26d3d7474c..dd03f8f7cb7a9 100644 --- a/pandas/tests/frame/test_asof.py +++ b/pandas/tests/frame/test_asof.py @@ -4,22 +4,19 @@ from pandas import (DataFrame, date_range, Timestamp, Series, to_datetime) -from pandas.util.testing import assert_frame_equal, assert_series_equal import pandas.util.testing as tm from .common import TestData class TestFrameAsof(TestData, tm.TestCase): - def setUp(self): self.N = N = 50 - rng = date_range('1/1/1990', periods=N, freq='53s') + self.rng = date_range('1/1/1990', periods=N, freq='53s') self.df = DataFrame({'A': np.arange(N), 'B': np.arange(N)}, - index=rng) + index=self.rng) def test_basic(self): - df = self.df.copy() df.loc[15:30, 'A'] = np.nan dates = date_range('1/1/1990', periods=self.N * 3, @@ -39,7 +36,6 @@ def test_basic(self): self.assertTrue((rs == 14).all(1).all()) def test_subset(self): - N = 10 rng = date_range('1/1/1990', periods=N, freq='53s') df = DataFrame({'A': np.arange(N), 'B': np.arange(N)}, @@ -51,19 +47,19 @@ def test_subset(self): # with a subset of A should be the same result = df.asof(dates, subset='A') expected = df.asof(dates) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # same with A/B result = df.asof(dates, subset=['A', 'B']) expected = df.asof(dates) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # B gives self.df.asof result = df.asof(dates, subset='B') expected = df.resample('25s', closed='right').ffill().reindex(dates) expected.iloc[20:] = 9 - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_missing(self): # GH 15118 @@ -75,9 +71,38 @@ def test_missing(self): result = df.asof('1989-12-31') expected = Series(index=['A', 'B'], name=Timestamp('1989-12-31')) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) result = df.asof(to_datetime(['1989-12-31'])) expected = DataFrame(index=to_datetime(['1989-12-31']), columns=['A', 'B'], dtype='float64') - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) + + def test_all_nans(self): + # GH 15713 + # DataFrame is all nans + result = DataFrame([np.nan]).asof([0]) + expected = DataFrame([np.nan]) + tm.assert_frame_equal(result, expected) + + # testing non-default indexes, multiple inputs + dates = date_range('1/1/1990', periods=self.N * 3, freq='25s') + result = DataFrame(np.nan, index=self.rng, columns=['A']).asof(dates) + expected = DataFrame(np.nan, index=dates, columns=['A']) + tm.assert_frame_equal(result, expected) + + # testing multiple columns + dates = date_range('1/1/1990', periods=self.N * 3, freq='25s') + result = DataFrame(np.nan, index=self.rng, + columns=['A', 'B', 'C']).asof(dates) + expected = DataFrame(np.nan, index=dates, columns=['A', 'B', 'C']) + tm.assert_frame_equal(result, expected) + + # testing scalar input + result = DataFrame(np.nan, index=[1, 2], columns=['A', 'B']).asof([3]) + expected = DataFrame(np.nan, index=[3], columns=['A', 'B']) + tm.assert_frame_equal(result, expected) + + result = DataFrame(np.nan, index=[1, 2], columns=['A', 'B']).asof(3) + expected = Series(np.nan, index=['A', 'B'], name=3) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/test_asof.py b/pandas/tests/series/test_asof.py index d2fd8858e7647..82914a99e2f6c 100644 --- a/pandas/tests/series/test_asof.py +++ b/pandas/tests/series/test_asof.py @@ -148,3 +148,29 @@ def test_errors(self): s = Series(np.random.randn(N), index=rng) with self.assertRaises(ValueError): s.asof(s.index[0], subset='foo') + + def test_all_nans(self): + # GH 15713 + # series is all nans + result = Series([np.nan]).asof([0]) + expected = Series([np.nan]) + tm.assert_series_equal(result, expected) + + # testing non-default indexes + N = 50 + rng = date_range('1/1/1990', periods=N, freq='53s') + + dates = date_range('1/1/1990', periods=N * 3, freq='25s') + result = Series(np.nan, index=rng).asof(dates) + expected = Series(np.nan, index=dates) + tm.assert_series_equal(result, expected) + + # testing scalar input + date = date_range('1/1/1990', periods=N * 3, freq='25s')[0] + result = Series(np.nan, index=rng).asof(date) + assert isnull(result) + + # test name is propagated + result = Series(np.nan, index=[1, 2, 3, 4], name='test').asof([4, 5]) + expected = Series(np.nan, index=[4, 5], name='test') + tm.assert_series_equal(result, expected) From 10589887016f4c9280fdeec01f9fcdbe9cea4dfa Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 26 Mar 2017 09:41:19 -0400 Subject: [PATCH 280/353] CI: simplify ci setup a bit closes #15807 --- .travis.yml | 37 +++++++++---------- appveyor.yml | 6 +-- ci/check_cache.sh | 4 ++ ci/install_circle.sh | 19 ++++------ ci/install_travis.sh | 37 ++++++------------- ci/prep_cython_cache.sh | 8 ++-- ci/requirements-2.7.build | 2 + ci/requirements-2.7_BUILD_TEST.build | 2 + ci/requirements-2.7_COMPAT.build | 1 + ci/requirements-2.7_LOCALE.build | 1 + ci/requirements-2.7_SLOW.build | 1 + ...ts-2.7-64.run => requirements-2.7_WIN.run} | 0 ci/requirements-3.4-64.run | 12 ------ ci/requirements-3.4.build | 1 + ci/requirements-3.4_SLOW.build | 2 + ci/requirements-3.5.build | 2 + ci/requirements-3.5_ASCII.build | 2 + ci/requirements-3.5_DOC_BUILD.build | 2 + ci/requirements-3.5_OSX.build | 2 + ci/requirements-3.6.build | 2 + ...build => requirements-3.6_NUMPY_DEV.build} | 1 + ...sh => requirements-3.6_NUMPY_DEV.build.sh} | 0 ...DEV.run => requirements-3.6_NUMPY_DEV.run} | 0 ...ts-3.6-64.run => requirements-3.6_WIN.run} | 0 circle.yml | 8 ++-- 25 files changed, 74 insertions(+), 78 deletions(-) rename ci/{requirements-2.7-64.run => requirements-2.7_WIN.run} (100%) delete mode 100644 ci/requirements-3.4-64.run rename ci/{requirements-3.5_NUMPY_DEV.build => requirements-3.6_NUMPY_DEV.build} (70%) rename ci/{requirements-3.5_NUMPY_DEV.build.sh => requirements-3.6_NUMPY_DEV.build.sh} (100%) rename ci/{requirements-3.5_NUMPY_DEV.run => requirements-3.6_NUMPY_DEV.run} (100%) rename ci/{requirements-3.6-64.run => requirements-3.6_WIN.run} (100%) diff --git a/.travis.yml b/.travis.yml index bb3388734229e..d9dbdf96ff976 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,9 +1,9 @@ sudo: false language: python -# To turn off cached miniconda, cython files and compiler cache comment out the -# USE_CACHE=true line for the build in the matrix below. To delete caches go to -# https://travis-ci.org/OWNER/REPOSITORY/caches or run +# To turn off cached cython files and compiler cache +# set NOCACHE-true +# To delete caches go to https://travis-ci.org/OWNER/REPOSITORY/caches or run # travis cache --delete inside the project directory from the travis command line client # The cash directories will be deleted if anything in ci/ changes in a commit cache: @@ -33,31 +33,31 @@ matrix: - $HOME/.cache # cython cache - $HOME/.ccache # compiler cache env: - - PYTHON_VERSION=3.5 JOB_NAME="35_osx" TEST_ARGS="--skip-slow --skip-network" JOB_TAG="_OSX" TRAVIS_PYTHON_VERSION=3.5 USE_CACHE=true + - JOB="3.5_OSX" TEST_ARGS="--skip-slow --skip-network" TRAVIS_PYTHON_VERSION=3.5 - python: 2.7 env: - - PYTHON_VERSION=2.7 JOB_NAME="27_slow_nnet_LOCALE" TEST_ARGS="--only-slow --skip-network" LOCALE_OVERRIDE="zh_CN.UTF-8" JOB_TAG="_LOCALE" USE_CACHE=true + - JOB="2.7_LOCALE" TEST_ARGS="--only-slow --skip-network" LOCALE_OVERRIDE="zh_CN.UTF-8" addons: apt: packages: - language-pack-zh-hans - python: 2.7 env: - - PYTHON_VERSION=2.7 JOB_NAME="27_nslow" TEST_ARGS="--skip-slow" LINT=true USE_CACHE=true + - JOB="2.7" TEST_ARGS="--skip-slow" LINT=true addons: apt: packages: - python-gtk2 - python: 3.5 env: - - PYTHON_VERSION=3.5 JOB_NAME="35_nslow" TEST_ARGS="--skip-slow --skip-network" COVERAGE=true USE_CACHE=true + - JOB="3.5" TEST_ARGS="--skip-slow --skip-network" COVERAGE=true addons: apt: packages: - xsel - python: 3.6 env: - - PYTHON_VERSION=3.6 JOB_NAME="36" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" CONDA_FORGE=true USE_CACHE=true + - JOB="3.6" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" CONDA_FORGE=true addons: apt: packages: @@ -66,32 +66,32 @@ matrix: # In allow_failures - python: 2.7 env: - - PYTHON_VERSION=2.7 JOB_NAME="27_slow" JOB_TAG="_SLOW" TEST_ARGS="--only-slow --skip-network" USE_CACHE=true + - JOB="2.7_SLOW" TEST_ARGS="--only-slow --skip-network" # In allow_failures - python: 2.7 env: - - PYTHON_VERSION=2.7 JOB_NAME="27_build_test" JOB_TAG="_BUILD_TEST" TEST_ARGS="--skip-slow" BUILD_TEST=true USE_CACHE=true + - JOB="2.7_BUILD_TEST" TEST_ARGS="--skip-slow" BUILD_TEST=true # In allow_failures - - python: 3.5 + - python: 3.6 env: - - PYTHON_VERSION=3.5 JOB_NAME="35_numpy_dev" JOB_TAG="_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" USE_CACHE=true + - JOB="3.6_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" # In allow_failures - python: 3.5 env: - - PYTHON_VERSION=3.5 JOB_NAME="doc_build" DOC_BUILD=true JOB_TAG="_DOC_BUILD" USE_CACHE=true + - JOB="3.5_DOC_BUILD" DOC_BUILD=true allow_failures: - python: 2.7 env: - - PYTHON_VERSION=2.7 JOB_NAME="27_slow" JOB_TAG="_SLOW" TEST_ARGS="--only-slow --skip-network" USE_CACHE=true + - JOB="2.7_SLOW" TEST_ARGS="--only-slow --skip-network" - python: 2.7 env: - - PYTHON_VERSION=2.7 JOB_NAME="27_build_test" JOB_TAG="_BUILD_TEST" TEST_ARGS="--skip-slow" BUILD_TEST=true USE_CACHE=true - - python: 3.5 + - JOB="2.7_BUILD_TEST" TEST_ARGS="--skip-slow" BUILD_TEST=true + - python: 3.6 env: - - PYTHON_VERSION=3.5 JOB_NAME="35_numpy_dev" JOB_TAG="_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" USE_CACHE=true + - JOB="3.6_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" - python: 3.5 env: - - PYTHON_VERSION=3.5 JOB_NAME="doc_build" DOC_BUILD=true JOB_TAG="_DOC_BUILD" USE_CACHE=true + - JOB="3.5_DOC_BUILD" DOC_BUILD=true before_install: - echo "before_install" @@ -107,7 +107,6 @@ before_install: install: - echo "install start" - - ci/check_cache.sh - ci/prep_cython_cache.sh - ci/install_travis.sh - ci/submit_cython_cache.sh diff --git a/appveyor.yml b/appveyor.yml index 5d748ddf1a108..db729b3005be6 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -72,11 +72,11 @@ install: - cmd: conda info -a # create our env - - cmd: conda create -q -n pandas python=%PYTHON_VERSION% cython pytest + - cmd: conda create -n pandas python=%PYTHON_VERSION% cython pytest - cmd: activate pandas - - SET REQ=ci\requirements-%PYTHON_VERSION%-%PYTHON_ARCH%.run + - SET REQ=ci\requirements-%PYTHON_VERSION%_WIN.run - cmd: echo "installing requirements from %REQ%" - - cmd: conda install -n pandas -q --file=%REQ% + - cmd: conda install -n pandas --file=%REQ% - cmd: conda list -n pandas - cmd: echo "installing requirements from %REQ% - done" diff --git a/ci/check_cache.sh b/ci/check_cache.sh index 1c9de7b017569..b83144fc45ef4 100755 --- a/ci/check_cache.sh +++ b/ci/check_cache.sh @@ -1,5 +1,9 @@ #!/bin/bash +# currently not used +# script to make sure that cache is clean +# Travis CI now handles this + if [ "$TRAVIS_PULL_REQUEST" == "false" ] then echo "Not a PR: checking for changes in ci/ from last 2 commits" diff --git a/ci/install_circle.sh b/ci/install_circle.sh index 485586e9d4f49..00e14b10ebbd6 100755 --- a/ci/install_circle.sh +++ b/ci/install_circle.sh @@ -46,9 +46,9 @@ echo "[environmental variable file]" cat $ENVS_FILE source $ENVS_FILE -export REQ_BUILD=ci/requirements-${PYTHON_VERSION}${JOB_TAG}.build -export REQ_RUN=ci/requirements-${PYTHON_VERSION}${JOB_TAG}.run -export REQ_PIP=ci/requirements-${PYTHON_VERSION}${JOB_TAG}.pip +export REQ_BUILD=ci/requirements-${JOB}.build +export REQ_RUN=ci/requirements-${JOB}.run +export REQ_PIP=ci/requirements-${JOB}.pip # edit the locale override if needed if [ -n "$LOCALE_OVERRIDE" ]; then @@ -61,16 +61,13 @@ if [ -n "$LOCALE_OVERRIDE" ]; then echo fi -# create new env -echo "[create env]" -time conda create -q -n pandas python=${PYTHON_VERSION} pytest || exit 1 +# create envbuild deps +echo "[create env: ${REQ_BUILD}]" +time conda create -n pandas -q --file=${REQ_BUILD} || exit 1 +time conda install -n pandas pytest || exit 1 source activate pandas -# build deps -echo "[build installs: ${REQ_BUILD}]" -time conda install -q --file=${REQ_BUILD} || exit 1 - # build but don't install echo "[build em]" time python setup.py build_ext --inplace || exit 1 @@ -84,5 +81,5 @@ fi # we may have additional pip installs echo "[pip installs: ${REQ_PIP}]" if [ -e ${REQ_PIP} ]; then - pip install -q -r $REQ_PIP + pip install -r $REQ_PIP fi diff --git a/ci/install_travis.sh b/ci/install_travis.sh index 66633c0592748..ac7bb2c2f3764 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -68,7 +68,7 @@ conda info -a || exit 1 # set the compiler cache to work echo -if [ "$USE_CACHE" ] && [ "${TRAVIS_OS_NAME}" == "linux" ]; then +if [ -z "$NOCACHE" ] && [ "${TRAVIS_OS_NAME}" == "linux" ]; then echo "[Using ccache]" export PATH=/usr/lib/ccache:/usr/lib64/ccache:$PATH gcc=$(which gcc) @@ -76,7 +76,7 @@ if [ "$USE_CACHE" ] && [ "${TRAVIS_OS_NAME}" == "linux" ]; then ccache=$(which ccache) echo "[ccache]: $ccache" export CC='ccache gcc' -elif [ "$USE_CACHE" ] && [ "${TRAVIS_OS_NAME}" == "osx" ]; then +elif [ -z "$NOCACHE" ] && [ "${TRAVIS_OS_NAME}" == "osx" ]; then echo "[Using ccache]" time brew install ccache export PATH=/usr/local/opt/ccache/libexec:$PATH @@ -91,35 +91,22 @@ fi echo echo "[create env]" -# may have installation instructions for this build -INSTALL="ci/install-${PYTHON_VERSION}${JOB_TAG}.sh" -if [ -e ${INSTALL} ]; then - time bash $INSTALL || exit 1 -else - # create new env - # this may already exists, in which case our caching worked - time conda create -n pandas python=$PYTHON_VERSION pytest nomkl -fi +# create our environment +REQ="ci/requirements-${JOB}.build" +time conda create -n pandas --file=${REQ} || exit 1 -# build deps -echo -echo "[build installs]" -REQ="ci/requirements-${PYTHON_VERSION}${JOB_TAG}.build" -if [ -e ${REQ} ]; then - time conda install -n pandas --file=${REQ} || exit 1 -fi +source activate pandas # may have addtl installation instructions for this build echo echo "[build addtl installs]" -REQ="ci/requirements-${PYTHON_VERSION}${JOB_TAG}.build.sh" +REQ="ci/requirements-${JOB}.build.sh" if [ -e ${REQ} ]; then time bash $REQ || exit 1 fi -source activate pandas - -pip install pytest-xdist +time conda install -n pandas pytest +time pip install pytest-xdist if [ "$LINT" ]; then conda install flake8 @@ -152,7 +139,7 @@ fi # we may have run installations echo echo "[conda installs]" -REQ="ci/requirements-${PYTHON_VERSION}${JOB_TAG}.run" +REQ="ci/requirements-${JOB}.run" if [ -e ${REQ} ]; then time conda install -n pandas --file=${REQ} || exit 1 fi @@ -160,7 +147,7 @@ fi # we may have additional pip installs echo echo "[pip installs]" -REQ="ci/requirements-${PYTHON_VERSION}${JOB_TAG}.pip" +REQ="ci/requirements-${JOB}.pip" if [ -e ${REQ} ]; then pip install -r $REQ fi @@ -168,7 +155,7 @@ fi # may have addtl installation instructions for this build echo echo "[addtl installs]" -REQ="ci/requirements-${PYTHON_VERSION}${JOB_TAG}.sh" +REQ="ci/requirements-${JOB}.sh" if [ -e ${REQ} ]; then time bash $REQ || exit 1 fi diff --git a/ci/prep_cython_cache.sh b/ci/prep_cython_cache.sh index e091bb00ccedc..18d9388327ddc 100755 --- a/ci/prep_cython_cache.sh +++ b/ci/prep_cython_cache.sh @@ -22,7 +22,7 @@ fi home_dir=$(pwd) -if [ -f "$CACHE_File" ] && [ "$USE_CACHE" ] && [ -d "$PYX_CACHE_DIR" ]; then +if [ -f "$CACHE_File" ] && [ -z "$NOCACHE" ] && [ -d "$PYX_CACHE_DIR" ]; then echo "Cache available - checking pyx diff" @@ -57,16 +57,16 @@ if [ -f "$CACHE_File" ] && [ "$USE_CACHE" ] && [ -d "$PYX_CACHE_DIR" ]; then fi -if [ $clear_cache -eq 0 ] && [ "$USE_CACHE" ] +if [ $clear_cache -eq 0 ] && [ -z "$NOCACHE" ] then - # No and use_cache is set + # No and nocache is not set echo "Will reuse cached cython file" cd / tar xvmf $CACHE_File cd $home_dir else echo "Rebuilding cythonized files" - echo "Use cache (Blank if not set) = $USE_CACHE" + echo "No cache = $NOCACHE" echo "Clear cache (1=YES) = $clear_cache" fi diff --git a/ci/requirements-2.7.build b/ci/requirements-2.7.build index 836385671d603..415df13179fcf 100644 --- a/ci/requirements-2.7.build +++ b/ci/requirements-2.7.build @@ -1,4 +1,6 @@ +python=2.7* python-dateutil=2.4.1 pytz=2013b +nomkl numpy cython=0.23 diff --git a/ci/requirements-2.7_BUILD_TEST.build b/ci/requirements-2.7_BUILD_TEST.build index faf1e3559f7f1..aadec00cb7ebf 100644 --- a/ci/requirements-2.7_BUILD_TEST.build +++ b/ci/requirements-2.7_BUILD_TEST.build @@ -1,4 +1,6 @@ +python=2.7* dateutil pytz +nomkl numpy cython diff --git a/ci/requirements-2.7_COMPAT.build b/ci/requirements-2.7_COMPAT.build index 95e3da03f161b..0e1ccf9eac9bf 100644 --- a/ci/requirements-2.7_COMPAT.build +++ b/ci/requirements-2.7_COMPAT.build @@ -1,3 +1,4 @@ +python=2.7* numpy=1.7.1 cython=0.23 dateutil=1.5 diff --git a/ci/requirements-2.7_LOCALE.build b/ci/requirements-2.7_LOCALE.build index 28e2b96851eff..4a37ce8fbe161 100644 --- a/ci/requirements-2.7_LOCALE.build +++ b/ci/requirements-2.7_LOCALE.build @@ -1,3 +1,4 @@ +python=2.7* python-dateutil pytz=2013b numpy=1.8.2 diff --git a/ci/requirements-2.7_SLOW.build b/ci/requirements-2.7_SLOW.build index 664e8b418def7..0f4a2c6792e6b 100644 --- a/ci/requirements-2.7_SLOW.build +++ b/ci/requirements-2.7_SLOW.build @@ -1,3 +1,4 @@ +python=2.7* python-dateutil pytz numpy=1.8.2 diff --git a/ci/requirements-2.7-64.run b/ci/requirements-2.7_WIN.run similarity index 100% rename from ci/requirements-2.7-64.run rename to ci/requirements-2.7_WIN.run diff --git a/ci/requirements-3.4-64.run b/ci/requirements-3.4-64.run deleted file mode 100644 index 106cc5b7168ba..0000000000000 --- a/ci/requirements-3.4-64.run +++ /dev/null @@ -1,12 +0,0 @@ -python-dateutil -pytz -numpy=1.9* -openpyxl -xlsxwriter -xlrd -xlwt -scipy -numexpr -pytables -bottleneck -jinja2=2.8 diff --git a/ci/requirements-3.4.build b/ci/requirements-3.4.build index e6e59dcba63fe..e8a957f70d40e 100644 --- a/ci/requirements-3.4.build +++ b/ci/requirements-3.4.build @@ -1,3 +1,4 @@ +python=3.4* numpy=1.8.1 cython=0.24.1 libgfortran=1.0 diff --git a/ci/requirements-3.4_SLOW.build b/ci/requirements-3.4_SLOW.build index c05a68a14b402..88212053af472 100644 --- a/ci/requirements-3.4_SLOW.build +++ b/ci/requirements-3.4_SLOW.build @@ -1,4 +1,6 @@ +python=3.4* python-dateutil pytz +nomkl numpy=1.10* cython diff --git a/ci/requirements-3.5.build b/ci/requirements-3.5.build index 2fc2053e64fe9..76227e106e1fd 100644 --- a/ci/requirements-3.5.build +++ b/ci/requirements-3.5.build @@ -1,4 +1,6 @@ +python=3.5* python-dateutil pytz +nomkl numpy=1.11.3 cython diff --git a/ci/requirements-3.5_ASCII.build b/ci/requirements-3.5_ASCII.build index 9558cf00ddf5c..f7befe3b31865 100644 --- a/ci/requirements-3.5_ASCII.build +++ b/ci/requirements-3.5_ASCII.build @@ -1,4 +1,6 @@ +python=3.5* python-dateutil pytz +nomkl numpy cython diff --git a/ci/requirements-3.5_DOC_BUILD.build b/ci/requirements-3.5_DOC_BUILD.build index 9558cf00ddf5c..f7befe3b31865 100644 --- a/ci/requirements-3.5_DOC_BUILD.build +++ b/ci/requirements-3.5_DOC_BUILD.build @@ -1,4 +1,6 @@ +python=3.5* python-dateutil pytz +nomkl numpy cython diff --git a/ci/requirements-3.5_OSX.build b/ci/requirements-3.5_OSX.build index a201be352b8e4..f5bc01b67a20a 100644 --- a/ci/requirements-3.5_OSX.build +++ b/ci/requirements-3.5_OSX.build @@ -1,2 +1,4 @@ +python=3.5* +nomkl numpy=1.10.4 cython diff --git a/ci/requirements-3.6.build b/ci/requirements-3.6.build index 9558cf00ddf5c..1c4b46aea3865 100644 --- a/ci/requirements-3.6.build +++ b/ci/requirements-3.6.build @@ -1,4 +1,6 @@ +python=3.6* python-dateutil pytz +nomkl numpy cython diff --git a/ci/requirements-3.5_NUMPY_DEV.build b/ci/requirements-3.6_NUMPY_DEV.build similarity index 70% rename from ci/requirements-3.5_NUMPY_DEV.build rename to ci/requirements-3.6_NUMPY_DEV.build index d15edbfa3d2c1..738366867a217 100644 --- a/ci/requirements-3.5_NUMPY_DEV.build +++ b/ci/requirements-3.6_NUMPY_DEV.build @@ -1,3 +1,4 @@ +python=3.6* python-dateutil pytz cython diff --git a/ci/requirements-3.5_NUMPY_DEV.build.sh b/ci/requirements-3.6_NUMPY_DEV.build.sh similarity index 100% rename from ci/requirements-3.5_NUMPY_DEV.build.sh rename to ci/requirements-3.6_NUMPY_DEV.build.sh diff --git a/ci/requirements-3.5_NUMPY_DEV.run b/ci/requirements-3.6_NUMPY_DEV.run similarity index 100% rename from ci/requirements-3.5_NUMPY_DEV.run rename to ci/requirements-3.6_NUMPY_DEV.run diff --git a/ci/requirements-3.6-64.run b/ci/requirements-3.6_WIN.run similarity index 100% rename from ci/requirements-3.6-64.run rename to ci/requirements-3.6_WIN.run diff --git a/circle.yml b/circle.yml index 046af6e9e1389..fa2da0680f388 100644 --- a/circle.yml +++ b/circle.yml @@ -21,13 +21,13 @@ dependencies: - > case $CIRCLE_NODE_INDEX in 0) - sudo apt-get install language-pack-it && ./ci/install_circle.sh PYTHON_VERSION=2.7 JOB_TAG="_COMPAT" LOCALE_OVERRIDE="it_IT.UTF-8" ;; + sudo apt-get install language-pack-it && ./ci/install_circle.sh JOB="2.7_COMPAT" LOCALE_OVERRIDE="it_IT.UTF-8" ;; 1) - sudo apt-get install language-pack-zh-hans && ./ci/install_circle.sh PYTHON_VERSION=3.4 JOB_TAG="_SLOW" LOCALE_OVERRIDE="zh_CN.UTF-8" ;; + sudo apt-get install language-pack-zh-hans && ./ci/install_circle.sh JOB="3.4_SLOW" LOCALE_OVERRIDE="zh_CN.UTF-8" ;; 2) - sudo apt-get install language-pack-zh-hans && ./ci/install_circle.sh PYTHON_VERSION=3.4 JOB_TAG="" LOCALE_OVERRIDE="zh_CN.UTF-8" ;; + sudo apt-get install language-pack-zh-hans && ./ci/install_circle.sh JOB="3.4" LOCALE_OVERRIDE="zh_CN.UTF-8" ;; 3) - ./ci/install_circle.sh PYTHON_VERSION=3.5 JOB_TAG="_ASCII" LOCALE_OVERRIDE="C" ;; + ./ci/install_circle.sh JOB="3.5_ASCII" LOCALE_OVERRIDE="C" ;; esac - ./ci/show_circle.sh From c80bd19e7c866762c8cfdbb11d2608e4acd6c2f8 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 26 Mar 2017 11:12:18 -0400 Subject: [PATCH 281/353] DOC: remove warnings for .sort / .order deprecation removals (#15808) --- doc/source/whatsnew/v0.13.1.txt | 2 +- doc/source/whatsnew/v0.15.0.txt | 2 +- doc/source/whatsnew/v0.20.0.txt | 5 +++++ doc/source/whatsnew/v0.7.3.txt | 2 +- doc/source/whatsnew/v0.9.1.txt | 15 +++++++++++---- 5 files changed, 19 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v0.13.1.txt b/doc/source/whatsnew/v0.13.1.txt index d5d54ba43b622..5e5653945fefa 100644 --- a/doc/source/whatsnew/v0.13.1.txt +++ b/doc/source/whatsnew/v0.13.1.txt @@ -125,7 +125,7 @@ API changes df = DataFrame({'col':['foo', 0, np.nan]}) df2 = DataFrame({'col':[np.nan, 0, 'foo']}, index=[2,1,0]) df.equals(df2) - df.equals(df2.sort()) + df.equals(df2.sort_index()) import pandas.core.common as com com.array_equivalent(np.array([0, np.nan]), np.array([0, np.nan])) diff --git a/doc/source/whatsnew/v0.15.0.txt b/doc/source/whatsnew/v0.15.0.txt index aff8ec9092cdc..6282f15b6faeb 100644 --- a/doc/source/whatsnew/v0.15.0.txt +++ b/doc/source/whatsnew/v0.15.0.txt @@ -80,7 +80,7 @@ For full docs, see the :ref:`categorical introduction ` and the # Reorder the categories and simultaneously add the missing categories df["grade"] = df["grade"].cat.set_categories(["very bad", "bad", "medium", "good", "very good"]) df["grade"] - df.sort("grade") + df.sort_values("grade") df.groupby("grade").size() - ``pandas.core.group_agg`` and ``pandas.core.factor_agg`` were removed. As an alternative, construct diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index f96fc41c73f15..38109d5442751 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -35,6 +35,11 @@ New features The ``dtype`` keyword argument in the :func:`read_csv` function for specifying the types of parsed columns is now supported with the ``'python'`` engine (:issue:`14295`). See the :ref:`io docs ` for more information. +.. ipython:: python + :suppress: + + from pandas.compat import StringIO + .. ipython:: python data = "a,b\n1,2\n3,4" diff --git a/doc/source/whatsnew/v0.7.3.txt b/doc/source/whatsnew/v0.7.3.txt index 21aa16e5fcb06..6b5199c55cbf5 100644 --- a/doc/source/whatsnew/v0.7.3.txt +++ b/doc/source/whatsnew/v0.7.3.txt @@ -93,4 +93,4 @@ Series, to be more consistent with the ``groupby`` behavior with DataFrame: df grouped = df.groupby('A')['C'] grouped.describe() - grouped.apply(lambda x: x.order()[-2:]) # top 2 values + grouped.apply(lambda x: x.sort_values()[-2:]) # top 2 values diff --git a/doc/source/whatsnew/v0.9.1.txt b/doc/source/whatsnew/v0.9.1.txt index 9dd29a5fe7bf7..4faf38219ebee 100644 --- a/doc/source/whatsnew/v0.9.1.txt +++ b/doc/source/whatsnew/v0.9.1.txt @@ -20,13 +20,20 @@ New features - `Series.sort`, `DataFrame.sort`, and `DataFrame.sort_index` can now be specified in a per-column manner to support multiple sort orders (:issue:`928`) - .. ipython:: python - :okwarning: + .. code-block:: ipython - df = DataFrame(np.random.randint(0, 2, (6, 3)), columns=['A', 'B', 'C']) + In [2]: df = DataFrame(np.random.randint(0, 2, (6, 3)), columns=['A', 'B', 'C']) - df.sort(['A', 'B'], ascending=[1, 0]) + In [3]: df.sort(['A', 'B'], ascending=[1, 0]) + Out[3]: + A B C + 3 0 1 1 + 4 0 1 1 + 2 0 0 1 + 0 1 0 0 + 1 1 0 0 + 5 1 0 0 - `DataFrame.rank` now supports additional argument values for the `na_option` parameter so missing values can be assigned either the largest From 18ac0b7752b531daa105a73ef3c211f83bc5c8f7 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sun, 26 Mar 2017 12:58:58 -0400 Subject: [PATCH 282/353] MAINT: Remove combineAdd and combineMult (#15805) Deprecated in 0.17.0. xref gh-10735 --- doc/source/10min.rst | 43 +++++++++--------- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/frame.py | 56 ----------------------- pandas/tests/frame/test_operators.py | 68 ---------------------------- 4 files changed, 22 insertions(+), 146 deletions(-) diff --git a/doc/source/10min.rst b/doc/source/10min.rst index 0612e86134cf2..8482eef552c17 100644 --- a/doc/source/10min.rst +++ b/doc/source/10min.rst @@ -84,29 +84,28 @@ will be completed: @verbatim In [1]: df2. - df2.A df2.boxplot - df2.abs df2.C - df2.add df2.clip - df2.add_prefix df2.clip_lower - df2.add_suffix df2.clip_upper - df2.align df2.columns - df2.all df2.combine - df2.any df2.combineAdd + df2.A df2.bool + df2.abs df2.boxplot + df2.add df2.C + df2.add_prefix df2.clip + df2.add_suffix df2.clip_lower + df2.align df2.clip_upper + df2.all df2.columns + df2.any df2.combine df2.append df2.combine_first - df2.apply df2.combineMult - df2.applymap df2.compound - df2.as_blocks df2.consolidate - df2.asfreq df2.convert_objects - df2.as_matrix df2.copy - df2.astype df2.corr - df2.at df2.corrwith - df2.at_time df2.count - df2.axes df2.cov - df2.B df2.cummax - df2.between_time df2.cummin - df2.bfill df2.cumprod - df2.blocks df2.cumsum - df2.bool df2.D + df2.apply df2.compound + df2.applymap df2.consolidate + df2.as_blocks df2.convert_objects + df2.asfreq df2.copy + df2.as_matrix df2.corr + df2.astype df2.corrwith + df2.at df2.count + df2.at_time df2.cov + df2.axes df2.cummax + df2.B df2.cummin + df2.between_time df2.cumprod + df2.bfill df2.cumsum + df2.blocks df2.D As you can see, the columns ``A``, ``B``, ``C``, and ``D`` are automatically tab completed. ``E`` is there as well; the rest of the attributes have been diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 38109d5442751..358d66653fb9c 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -819,6 +819,7 @@ Removal of prior version deprecations/changes - The ``take_last`` parameter has been dropped from ``duplicated()``, ``drop_duplicates()``, ``nlargest()``, and ``nsmallest()`` methods (:issue:`10236`, :issue:`10792`, :issue:`10920`) - ``Series``, ``Index``, and ``DataFrame`` have dropped the ``sort`` and ``order`` methods (:issue:`10726`) - Where clauses in ``pytables`` are only accepted as strings and expressions types and not other data-types (:issue:`12027`) +- ``DataFrame`` has dropped the ``combineAdd`` and ``combineMult`` methods in favor of ``add`` and ``mul`` respectively (:issue:`10735`) .. _whatsnew_0200.performance: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6b5e8e0799421..90c49a9c85133 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5362,62 +5362,6 @@ def isin(self, values): values).reshape(self.shape), self.index, self.columns) - # ---------------------------------------------------------------------- - # Deprecated stuff - - def combineAdd(self, other): - """ - DEPRECATED. Use ``DataFrame.add(other, fill_value=0.)`` instead. - - Add two DataFrame objects and do not propagate - NaN values, so if for a (column, time) one frame is missing a - value, it will default to the other frame's value (which might - be NaN as well) - - Parameters - ---------- - other : DataFrame - - Returns - ------- - DataFrame - - See also - -------- - DataFrame.add - - """ - warnings.warn("'combineAdd' is deprecated. Use " - "'DataFrame.add(other, fill_value=0.)' instead", - FutureWarning, stacklevel=2) - return self.add(other, fill_value=0.) - - def combineMult(self, other): - """ - DEPRECATED. Use ``DataFrame.mul(other, fill_value=1.)`` instead. - - Multiply two DataFrame objects and do not propagate NaN values, so if - for a (column, time) one frame is missing a value, it will default to - the other frame's value (which might be NaN as well) - - Parameters - ---------- - other : DataFrame - - Returns - ------- - DataFrame - - See also - -------- - DataFrame.mul - - """ - warnings.warn("'combineMult' is deprecated. Use " - "'DataFrame.mul(other, fill_value=1.)' instead", - FutureWarning, stacklevel=2) - return self.mul(other, fill_value=1.) - DataFrame._setup_axes(['index', 'columns'], info_axis=1, stat_axis=0, axes_are_reversed=True, aliases={'rows': 0}) diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index d6a3592446fd5..268854fe6b62d 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -1038,74 +1038,6 @@ def test_boolean_comparison(self): self.assertRaises(ValueError, lambda: df == (2, 2)) self.assertRaises(ValueError, lambda: df == [2, 2]) - def test_combineAdd(self): - - with tm.assert_produces_warning(FutureWarning): - # trivial - comb = self.frame.combineAdd(self.frame) - assert_frame_equal(comb, self.frame * 2) - - # more rigorous - a = DataFrame([[1., nan, nan, 2., nan]], - columns=np.arange(5)) - b = DataFrame([[2., 3., nan, 2., 6., nan]], - columns=np.arange(6)) - expected = DataFrame([[3., 3., nan, 4., 6., nan]], - columns=np.arange(6)) - - with tm.assert_produces_warning(FutureWarning): - result = a.combineAdd(b) - assert_frame_equal(result, expected) - - with tm.assert_produces_warning(FutureWarning): - result2 = a.T.combineAdd(b.T) - assert_frame_equal(result2, expected.T) - - expected2 = a.combine(b, operator.add, fill_value=0.) - assert_frame_equal(expected, expected2) - - # corner cases - with tm.assert_produces_warning(FutureWarning): - comb = self.frame.combineAdd(self.empty) - assert_frame_equal(comb, self.frame) - - with tm.assert_produces_warning(FutureWarning): - comb = self.empty.combineAdd(self.frame) - assert_frame_equal(comb, self.frame) - - # integer corner case - df1 = DataFrame({'x': [5]}) - df2 = DataFrame({'x': [1]}) - df3 = DataFrame({'x': [6]}) - - with tm.assert_produces_warning(FutureWarning): - comb = df1.combineAdd(df2) - assert_frame_equal(comb, df3) - - # mixed type GH2191 - df1 = DataFrame({'A': [1, 2], 'B': [3, 4]}) - df2 = DataFrame({'A': [1, 2], 'C': [5, 6]}) - with tm.assert_produces_warning(FutureWarning): - rs = df1.combineAdd(df2) - xp = DataFrame({'A': [2, 4], 'B': [3, 4.], 'C': [5, 6.]}) - assert_frame_equal(xp, rs) - - # TODO: test integer fill corner? - - def test_combineMult(self): - with tm.assert_produces_warning(FutureWarning): - # trivial - comb = self.frame.combineMult(self.frame) - - assert_frame_equal(comb, self.frame ** 2) - - # corner cases - comb = self.frame.combineMult(self.empty) - assert_frame_equal(comb, self.frame) - - comb = self.empty.combineMult(self.frame) - assert_frame_equal(comb, self.frame) - def test_combine_generic(self): df1 = self.frame df2 = self.frame.loc[self.frame.index[:-5], ['A', 'B', 'C']] From 0caf685d1d419782b513efdab399c18dadc3a9da Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sun, 26 Mar 2017 13:02:45 -0400 Subject: [PATCH 283/353] DOC: Explain differences further for sep parameter (#15804) [ci skip] --- doc/source/io.rst | 11 ++++++----- pandas/io/parsers.py | 10 ++++++---- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index a702efdc6aaf9..faeea9d448cf2 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -91,11 +91,12 @@ filepath_or_buffer : various locations), or any object with a ``read()`` method (such as an open file or :class:`~python:io.StringIO`). sep : str, defaults to ``','`` for :func:`read_csv`, ``\t`` for :func:`read_table` - Delimiter to use. If sep is ``None``, - will try to automatically determine this. Separators longer than 1 character - and different from ``'\s+'`` will be interpreted as regular expressions, will - force use of the python parsing engine and will ignore quotes in the data. - Regex example: ``'\\r\\t'``. + Delimiter to use. If sep is ``None``, the C engine cannot automatically detect + the separator, but the Python parsing engine can, meaning the latter will be + used automatically. In addition, separators longer than 1 character and + different from ``'\s+'`` will be interpreted as regular expressions and + will also force the use of the Python parsing engine. Note that regex + delimiters are prone to ignoring quoted data. Regex example: ``'\\r\\t'``. delimiter : str, default ``None`` Alternative argument name for sep. delim_whitespace : boolean, default False diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index f7b2d75c19304..45c62b224ef4e 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -305,10 +305,12 @@ currently more feature-complete.""" _sep_doc = r"""sep : str, default {default} - Delimiter to use. If sep is None, will try to automatically determine - this. Separators longer than 1 character and different from ``'\s+'`` will - be interpreted as regular expressions, will force use of the python parsing - engine and will ignore quotes in the data. Regex example: ``'\r\t'``""" + Delimiter to use. If sep is None, the C engine cannot automatically detect + the separator, but the Python parsing engine can, meaning the latter will + be used automatically. In addition, separators longer than 1 character and + different from ``'\s+'`` will be interpreted as regular expressions and + will also force the use of the Python parsing engine. Note that regex + delimiters are prone to ignoring quoted data. Regex example: ``'\r\t'``""" _read_csv_doc = """ Read CSV (comma-separated) file into DataFrame From 179363765110611ad10883bab55d79785369da9b Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 26 Mar 2017 14:26:49 -0400 Subject: [PATCH 284/353] TST: suppress some numpy warnings (#15811) * BUG: incorrect conversion on isin algos with m8 * TST: suppress some warnings --- pandas/core/algorithms.py | 5 ++++- pandas/tests/test_algos.py | 2 ++ pandas/tests/test_categorical.py | 6 ++++-- pandas/tests/test_nanops.py | 19 ++++++++++++------- 4 files changed, 22 insertions(+), 10 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 3b77bda6f69f0..a62d290277443 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -174,7 +174,7 @@ def isin(comps, values): " to isin(), you passed a " "[{0}]".format(type(values).__name__)) - from pandas import DatetimeIndex, PeriodIndex + from pandas import DatetimeIndex, TimedeltaIndex, PeriodIndex if not isinstance(values, (ABCIndex, ABCSeries, np.ndarray)): values = np.array(list(values), dtype='object') @@ -183,6 +183,9 @@ def isin(comps, values): if is_period_dtype(values): comps = PeriodIndex(comps) values = PeriodIndex(values) + elif is_timedelta64_dtype(comps): + comps = TimedeltaIndex(comps) + values = TimedeltaIndex(values) else: comps = DatetimeIndex(comps) values = DatetimeIndex(values) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index f8eac7a8911ad..5d69746034346 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -431,6 +431,8 @@ def test_basic(self): expected = np.array([False, False]) tm.assert_numpy_array_equal(result, expected) + def test_i8(self): + arr = pd.date_range('20130101', periods=3).values result = algos.isin(arr, [arr[0]]) expected = np.array([True, False, False]) diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 6c8aeba704c7b..479f0e4566b8d 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -2913,10 +2913,12 @@ def test_info(self): df['category'] = Series(np.array(list('abcdefghij')).take( np.random.randint(0, 10, size=n))).astype('category') df.isnull() - df.info() + buf = compat.StringIO() + df.info(buf=buf) df2 = df[df['category'] == 'd'] - df2.info() + buf = compat.StringIO() + df2.info(buf=buf) def test_groupby_sort(self): diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index 75a7555d58ca5..54de8c1e34031 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -389,9 +389,10 @@ def test_nanstd(self): def test_nansem(self): tm.skip_if_no_package('scipy', min_version='0.17.0') from scipy.stats import sem - self.check_funs_ddof(nanops.nansem, sem, allow_complex=False, - allow_str=False, allow_date=False, - allow_tdelta=True, allow_obj='convert') + with np.errstate(invalid='ignore'): + self.check_funs_ddof(nanops.nansem, sem, allow_complex=False, + allow_str=False, allow_date=False, + allow_tdelta=False, allow_obj='convert') def _minmax_wrap(self, value, axis=None, func=None): res = func(value, axis) @@ -449,16 +450,20 @@ def test_nanskew(self): tm.skip_if_no_package('scipy', min_version='0.17.0') from scipy.stats import skew func = partial(self._skew_kurt_wrap, func=skew) - self.check_funs(nanops.nanskew, func, allow_complex=False, - allow_str=False, allow_date=False, allow_tdelta=False) + with np.errstate(invalid='ignore'): + self.check_funs(nanops.nanskew, func, allow_complex=False, + allow_str=False, allow_date=False, + allow_tdelta=False) def test_nankurt(self): tm.skip_if_no_package('scipy', min_version='0.17.0') from scipy.stats import kurtosis func1 = partial(kurtosis, fisher=True) func = partial(self._skew_kurt_wrap, func=func1) - self.check_funs(nanops.nankurt, func, allow_complex=False, - allow_str=False, allow_date=False, allow_tdelta=False) + with np.errstate(invalid='ignore'): + self.check_funs(nanops.nankurt, func, allow_complex=False, + allow_str=False, allow_date=False, + allow_tdelta=False) def test_nanprod(self): self.check_funs(nanops.nanprod, np.prod, allow_str=False, From da92411485d7fbe766d12e5a78910ff7aaa45c12 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 26 Mar 2017 14:53:16 -0400 Subject: [PATCH 285/353] DOC: remove as_indexer from text.rst example --- doc/source/text.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/source/text.rst b/doc/source/text.rst index b110ef2167a03..4992f132ce815 100644 --- a/doc/source/text.rst +++ b/doc/source/text.rst @@ -146,8 +146,8 @@ following code will cause trouble because of the regular expression meaning of # We need to escape the special character (for >1 len patterns) dollars.str.replace(r'-\$', '-') -The ``replace`` method can also take a callable as replacement. It is called -on every ``pat`` using :func:`re.sub`. The callable should expect one +The ``replace`` method can also take a callable as replacement. It is called +on every ``pat`` using :func:`re.sub`. The callable should expect one positional argument (a regex object) and return a string. .. versionadded:: 0.20.0 @@ -380,7 +380,7 @@ or match a pattern: .. ipython:: python - pd.Series(['1', '2', '3a', '3b', '03c']).str.match(pattern, as_indexer=True) + pd.Series(['1', '2', '3a', '3b', '03c']).str.match(pattern) The distinction between ``match`` and ``contains`` is strictness: ``match`` relies on strict ``re.match``, while ``contains`` relies on ``re.search``. From 7e3dd90d0e4744c29da08cb158a10c37b6610ef0 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 27 Mar 2017 07:56:56 -0400 Subject: [PATCH 286/353] DOC: small fixes in text.rst --- doc/source/text.rst | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/doc/source/text.rst b/doc/source/text.rst index 4992f132ce815..e3e4b24d17f44 100644 --- a/doc/source/text.rst +++ b/doc/source/text.rst @@ -372,12 +372,11 @@ You can check whether elements contain a pattern: .. ipython:: python - pattern = r'[a-z][0-9]' + pattern = r'[0-9][a-z]' pd.Series(['1', '2', '3a', '3b', '03c']).str.contains(pattern) or match a pattern: - .. ipython:: python pd.Series(['1', '2', '3a', '3b', '03c']).str.match(pattern) @@ -386,7 +385,7 @@ The distinction between ``match`` and ``contains`` is strictness: ``match`` relies on strict ``re.match``, while ``contains`` relies on ``re.search``. Methods like ``match``, ``contains``, ``startswith``, and ``endswith`` take - an extra ``na`` argument so missing values can be considered True or False: +an extra ``na`` argument so missing values can be considered True or False: .. ipython:: python From aff78d91f5895f5645cf254f03a3538829e73687 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 26 Mar 2017 15:21:05 -0400 Subject: [PATCH 287/353] CI: remove more cruft --- .travis.yml | 6 ------ ci/install_travis.sh | 3 ++- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/.travis.yml b/.travis.yml index d9dbdf96ff976..777280e3c4a25 100644 --- a/.travis.yml +++ b/.travis.yml @@ -26,7 +26,6 @@ matrix: include: - language: objective-c os: osx - compiler: clang cache: ccache: true directories: @@ -58,11 +57,6 @@ matrix: - python: 3.6 env: - JOB="3.6" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" CONDA_FORGE=true - addons: - apt: - packages: - - libatlas-base-dev - - gfortran # In allow_failures - python: 2.7 env: diff --git a/ci/install_travis.sh b/ci/install_travis.sh index ac7bb2c2f3764..f71df979c9df0 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -77,8 +77,9 @@ if [ -z "$NOCACHE" ] && [ "${TRAVIS_OS_NAME}" == "linux" ]; then echo "[ccache]: $ccache" export CC='ccache gcc' elif [ -z "$NOCACHE" ] && [ "${TRAVIS_OS_NAME}" == "osx" ]; then + echo "[Install ccache]" + brew install ccache > /dev/null 2>&1 echo "[Using ccache]" - time brew install ccache export PATH=/usr/local/opt/ccache/libexec:$PATH gcc=$(which gcc) echo "[gcc]: $gcc" From 056c0a666f0cb83ee15e793376361b916e7b364c Mon Sep 17 00:00:00 2001 From: Wes Turner Date: Mon, 27 Mar 2017 09:06:53 -0400 Subject: [PATCH 288/353] DOC: ecosystem.rst: QtPandas xref draperjames/qtpandas#36 Author: Wes Turner Closes #15813 from westurner/patch-5 and squashes the following commits: a97084e [Wes Turner] DOC: ecosystem.rst: QtPandas --- doc/source/ecosystem.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/ecosystem.rst b/doc/source/ecosystem.rst index 5a7d6a11d293d..ee0ea60c6f220 100644 --- a/doc/source/ecosystem.rst +++ b/doc/source/ecosystem.rst @@ -93,8 +93,8 @@ targets the IPython Notebook environment. `Plotly’s `__ `Python API `__ enables interactive figures and web shareability. Maps, 2D, 3D, and live-streaming graphs are rendered with WebGL and `D3.js `__. The library supports plotting directly from a pandas DataFrame and cloud-based collaboration. Users of `matplotlib, ggplot for Python, and Seaborn `__ can convert figures into interactive web-based plots. Plots can be drawn in `IPython Notebooks `__ , edited with R or MATLAB, modified in a GUI, or embedded in apps and dashboards. Plotly is free for unlimited sharing, and has `cloud `__, `offline `__, or `on-premise `__ accounts for private use. -Visualizing Data in Qt applications -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +`QtPandas `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Spun off from the main pandas library, the `qtpandas `__ library enables DataFrame visualization and manipulation in PyQt4 and PySide applications. From 80280ec576ab8077ba0cc6664c6a358f0b1e671e Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 27 Mar 2017 10:45:17 -0400 Subject: [PATCH 289/353] DEPR: Drop support for NaN categories in Categorical Deprecated in 0.17.0. xref #10748 xref #13648 Author: Jeff Reback Author: gfyoung Closes #15806 from gfyoung/categories-nan-drop and squashes the following commits: 318175b [Jeff Reback] TST: test pd.NaT with correct dtype 4dce349 [gfyoung] Drop support for NaN categories in Categorical --- doc/source/categorical.rst | 9 ++ doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/categorical.py | 13 +- pandas/tests/indexes/test_category.py | 14 -- pandas/tests/test_categorical.py | 207 ++++---------------------- 5 files changed, 41 insertions(+), 203 deletions(-) diff --git a/doc/source/categorical.rst b/doc/source/categorical.rst index 2203737ecd7b5..411f973e9a71f 100644 --- a/doc/source/categorical.rst +++ b/doc/source/categorical.rst @@ -230,6 +230,15 @@ Categories must be unique or a `ValueError` is raised: except ValueError as e: print("ValueError: " + str(e)) +Categories must also not be ``NaN`` or a `ValueError` is raised: + +.. ipython:: python + + try: + s.cat.categories = [1,2,np.nan] + except ValueError as e: + print("ValueError: " + str(e)) + Appending new categories ~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 358d66653fb9c..a0b2b47c4bac3 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -816,6 +816,7 @@ Removal of prior version deprecations/changes in favor of ``iloc`` and ``iat`` as explained :ref:`here ` (:issue:`10711`). - The deprecated ``DataFrame.iterkv()`` has been removed in favor of ``DataFrame.iteritems()`` (:issue:`10711`) - The ``Categorical`` constructor has dropped the ``name`` parameter (:issue:`10632`) +- ``Categorical`` has dropped support for ``NaN`` categories (:issue:`10748`) - The ``take_last`` parameter has been dropped from ``duplicated()``, ``drop_duplicates()``, ``nlargest()``, and ``nsmallest()`` methods (:issue:`10236`, :issue:`10792`, :issue:`10920`) - ``Series``, ``Index``, and ``DataFrame`` have dropped the ``sort`` and ``order`` methods (:issue:`10726`) - Where clauses in ``pytables`` are only accepted as strings and expressions types and not other data-types (:issue:`12027`) diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 0e58c18631588..632c24c33feb7 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -545,18 +545,11 @@ def _validate_categories(cls, categories, fastpath=False): if not fastpath: - # check properties of the categories - # we don't allow NaNs in the categories themselves - + # Categories cannot contain NaN. if categories.hasnans: - # NaNs in cats deprecated in 0.17 - # GH 10748 - msg = ('\nSetting NaNs in `categories` is deprecated and ' - 'will be removed in a future version of pandas.') - warn(msg, FutureWarning, stacklevel=3) - - # categories must be unique + raise ValueError('Categorial categories cannot be null') + # Categories must be unique. if not categories.is_unique: raise ValueError('Categorical categories must be unique') diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index 64a0e71bd5ace..ef1be7e60e0e8 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -183,11 +183,6 @@ def test_contains(self): self.assertFalse(0 in ci) self.assertFalse(1 in ci) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - ci = CategoricalIndex( - list('aabbca'), categories=list('cabdef') + [np.nan]) - self.assertFalse(np.nan in ci) - ci = CategoricalIndex( list('aabbca') + [np.nan], categories=list('cabdef')) self.assertTrue(np.nan in ci) @@ -541,7 +536,6 @@ def test_ensure_copied_data(self): self.assertIs(_base(index.values), _base(result.values)) def test_equals_categorical(self): - ci1 = CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=True) ci2 = CategoricalIndex(['a', 'b'], categories=['a', 'b', 'c'], ordered=True) @@ -579,14 +573,6 @@ def test_equals_categorical(self): self.assertFalse(ci.equals(CategoricalIndex(list('aabca')))) self.assertTrue(ci.equals(ci.copy())) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - ci = CategoricalIndex(list('aabca'), - categories=['c', 'a', 'b', np.nan]) - self.assertFalse(ci.equals(list('aabca'))) - self.assertFalse(ci.equals(CategoricalIndex(list('aabca')))) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.assertTrue(ci.equals(ci.copy())) - ci = CategoricalIndex(list('aabca') + [np.nan], categories=['c', 'a', 'b']) self.assertFalse(ci.equals(list('aabca'))) diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 479f0e4566b8d..8fd3c6324d48c 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # pylint: disable=E1101,E1103,W0232 +import pytest import sys from datetime import datetime from distutils.version import LooseVersion @@ -17,7 +18,8 @@ import pandas.compat as compat import pandas.util.testing as tm from pandas import (Categorical, Index, Series, DataFrame, PeriodIndex, - Timestamp, CategoricalIndex, isnull) + Timestamp, CategoricalIndex, DatetimeIndex, + isnull, NaT) from pandas.compat import range, lrange, u, PY3 from pandas.core.config import option_context @@ -160,12 +162,6 @@ def f(): self.assertRaises(ValueError, f) - def f(): - with tm.assert_produces_warning(FutureWarning): - Categorical([1, 2], [1, 2, np.nan, np.nan]) - - self.assertRaises(ValueError, f) - # The default should be unordered c1 = Categorical(["a", "b", "c", "a"]) self.assertFalse(c1.ordered) @@ -222,29 +218,12 @@ def f(): cat = pd.Categorical([np.nan, 1., 2., 3.]) self.assertTrue(is_float_dtype(cat.categories)) - # Deprecating NaNs in categoires (GH #10748) - # preserve int as far as possible by converting to object if NaN is in - # categories - with tm.assert_produces_warning(FutureWarning): - cat = pd.Categorical([np.nan, 1, 2, 3], - categories=[np.nan, 1, 2, 3]) - self.assertTrue(is_object_dtype(cat.categories)) - # This doesn't work -> this would probably need some kind of "remember # the original type" feature to try to cast the array interface result # to... # vals = np.asarray(cat[cat.notnull()]) # self.assertTrue(is_integer_dtype(vals)) - with tm.assert_produces_warning(FutureWarning): - cat = pd.Categorical([np.nan, "a", "b", "c"], - categories=[np.nan, "a", "b", "c"]) - self.assertTrue(is_object_dtype(cat.categories)) - # but don't do it for floats - with tm.assert_produces_warning(FutureWarning): - cat = pd.Categorical([np.nan, 1., 2., 3.], - categories=[np.nan, 1., 2., 3.]) - self.assertTrue(is_float_dtype(cat.categories)) # corner cases cat = pd.Categorical([1]) @@ -295,6 +274,22 @@ def f(): c = Categorical(np.array([], dtype='int64'), # noqa categories=[3, 2, 1], ordered=True) + def test_constructor_with_null(self): + + # Cannot have NaN in categories + with pytest.raises(ValueError): + pd.Categorical([np.nan, "a", "b", "c"], + categories=[np.nan, "a", "b", "c"]) + + with pytest.raises(ValueError): + pd.Categorical([None, "a", "b", "c"], + categories=[None, "a", "b", "c"]) + + with pytest.raises(ValueError): + pd.Categorical(DatetimeIndex(['nat', '20160101']), + categories=[NaT, Timestamp('20160101')]) + + def test_constructor_with_index(self): ci = CategoricalIndex(list('aabbca'), categories=list('cab')) tm.assert_categorical_equal(ci.values, Categorical(ci)) @@ -418,6 +413,12 @@ def f(): self.assertRaises(ValueError, f) + # NaN categories included + def f(): + Categorical.from_codes([0, 1, 2], ["a", "b", np.nan]) + + self.assertRaises(ValueError, f) + # too negative def f(): Categorical.from_codes([-2, 1, 2], ["a", "b", "c"]) @@ -649,30 +650,6 @@ def test_describe(self): name='categories')) tm.assert_frame_equal(desc, expected) - # NA as a category - with tm.assert_produces_warning(FutureWarning): - cat = pd.Categorical(["a", "c", "c", np.nan], - categories=["b", "a", "c", np.nan]) - result = cat.describe() - - expected = DataFrame([[0, 0], [1, 0.25], [2, 0.5], [1, 0.25]], - columns=['counts', 'freqs'], - index=pd.CategoricalIndex(['b', 'a', 'c', np.nan], - name='categories')) - tm.assert_frame_equal(result, expected, check_categorical=False) - - # NA as an unused category - with tm.assert_produces_warning(FutureWarning): - cat = pd.Categorical(["a", "c", "c"], - categories=["b", "a", "c", np.nan]) - result = cat.describe() - - exp_idx = pd.CategoricalIndex( - ['b', 'a', 'c', np.nan], name='categories') - expected = DataFrame([[0, 0], [1, 1 / 3.], [2, 2 / 3.], [0, 0]], - columns=['counts', 'freqs'], index=exp_idx) - tm.assert_frame_equal(result, expected, check_categorical=False) - def test_print(self): expected = ["[a, b, b, a, a, c, c, c]", "Categories (3, object): [a < b < c]"] @@ -1119,90 +1096,18 @@ def test_nan_handling(self): self.assert_numpy_array_equal(c._codes, np.array([0, -1, -1, 0], dtype=np.int8)) - # If categories have nan included, the code should point to that - # instead - with tm.assert_produces_warning(FutureWarning): - c = Categorical(["a", "b", np.nan, "a"], - categories=["a", "b", np.nan]) - self.assert_index_equal(c.categories, Index(["a", "b", np.nan])) - self.assert_numpy_array_equal(c._codes, - np.array([0, 1, 2, 0], dtype=np.int8)) - c[1] = np.nan - self.assert_index_equal(c.categories, Index(["a", "b", np.nan])) - self.assert_numpy_array_equal(c._codes, - np.array([0, 2, 2, 0], dtype=np.int8)) - - # Changing categories should also make the replaced category np.nan - c = Categorical(["a", "b", "c", "a"]) - with tm.assert_produces_warning(FutureWarning): - c.categories = ["a", "b", np.nan] # noqa - - self.assert_index_equal(c.categories, Index(["a", "b", np.nan])) - self.assert_numpy_array_equal(c._codes, - np.array([0, 1, 2, 0], dtype=np.int8)) - # Adding nan to categories should make assigned nan point to the # category! c = Categorical(["a", "b", np.nan, "a"]) self.assert_index_equal(c.categories, Index(["a", "b"])) self.assert_numpy_array_equal(c._codes, np.array([0, 1, -1, 0], dtype=np.int8)) - with tm.assert_produces_warning(FutureWarning): - c.set_categories(["a", "b", np.nan], rename=True, inplace=True) - - self.assert_index_equal(c.categories, Index(["a", "b", np.nan])) - self.assert_numpy_array_equal(c._codes, - np.array([0, 1, -1, 0], dtype=np.int8)) - c[1] = np.nan - self.assert_index_equal(c.categories, Index(["a", "b", np.nan])) - self.assert_numpy_array_equal(c._codes, - np.array([0, 2, -1, 0], dtype=np.int8)) - - # Remove null categories (GH 10156) - cases = [([1.0, 2.0, np.nan], [1.0, 2.0]), - (['a', 'b', None], ['a', 'b']), - ([pd.Timestamp('2012-05-01'), pd.NaT], - [pd.Timestamp('2012-05-01')])] - - null_values = [np.nan, None, pd.NaT] - - for with_null, without in cases: - with tm.assert_produces_warning(FutureWarning): - base = Categorical([], with_null) - expected = Categorical([], without) - - for nullval in null_values: - result = base.remove_categories(nullval) - self.assert_categorical_equal(result, expected) - - # Different null values are indistinguishable - for i, j in [(0, 1), (0, 2), (1, 2)]: - nulls = [null_values[i], null_values[j]] - - def f(): - with tm.assert_produces_warning(FutureWarning): - Categorical([], categories=nulls) - - self.assertRaises(ValueError, f) def test_isnull(self): exp = np.array([False, False, True]) c = Categorical(["a", "b", np.nan]) res = c.isnull() - self.assert_numpy_array_equal(res, exp) - with tm.assert_produces_warning(FutureWarning): - c = Categorical(["a", "b", np.nan], categories=["a", "b", np.nan]) - res = c.isnull() - self.assert_numpy_array_equal(res, exp) - - # test both nan in categories and as -1 - exp = np.array([True, False, True]) - c = Categorical(["a", "b", np.nan]) - with tm.assert_produces_warning(FutureWarning): - c.set_categories(["a", "b", np.nan], rename=True, inplace=True) - c[0] = np.nan - res = c.isnull() self.assert_numpy_array_equal(res, exp) def test_codes_immutable(self): @@ -1487,45 +1392,10 @@ def test_slicing_directly(self): def test_set_item_nan(self): cat = pd.Categorical([1, 2, 3]) - exp = pd.Categorical([1, np.nan, 3], categories=[1, 2, 3]) cat[1] = np.nan - tm.assert_categorical_equal(cat, exp) - # if nan in categories, the proper code should be set! - cat = pd.Categorical([1, 2, 3, np.nan], categories=[1, 2, 3]) - with tm.assert_produces_warning(FutureWarning): - cat.set_categories([1, 2, 3, np.nan], rename=True, inplace=True) - cat[1] = np.nan - exp = np.array([0, 3, 2, -1], dtype=np.int8) - self.assert_numpy_array_equal(cat.codes, exp) - - cat = pd.Categorical([1, 2, 3, np.nan], categories=[1, 2, 3]) - with tm.assert_produces_warning(FutureWarning): - cat.set_categories([1, 2, 3, np.nan], rename=True, inplace=True) - cat[1:3] = np.nan - exp = np.array([0, 3, 3, -1], dtype=np.int8) - self.assert_numpy_array_equal(cat.codes, exp) - - cat = pd.Categorical([1, 2, 3, np.nan], categories=[1, 2, 3]) - with tm.assert_produces_warning(FutureWarning): - cat.set_categories([1, 2, 3, np.nan], rename=True, inplace=True) - cat[1:3] = [np.nan, 1] - exp = np.array([0, 3, 0, -1], dtype=np.int8) - self.assert_numpy_array_equal(cat.codes, exp) - - cat = pd.Categorical([1, 2, 3, np.nan], categories=[1, 2, 3]) - with tm.assert_produces_warning(FutureWarning): - cat.set_categories([1, 2, 3, np.nan], rename=True, inplace=True) - cat[1:3] = [np.nan, np.nan] - exp = np.array([0, 3, 3, -1], dtype=np.int8) - self.assert_numpy_array_equal(cat.codes, exp) - - cat = pd.Categorical([1, 2, np.nan, 3], categories=[1, 2, 3]) - with tm.assert_produces_warning(FutureWarning): - cat.set_categories([1, 2, 3, np.nan], rename=True, inplace=True) - cat[pd.isnull(cat)] = np.nan - exp = np.array([0, 1, 3, 2], dtype=np.int8) - self.assert_numpy_array_equal(cat.codes, exp) + exp = pd.Categorical([1, np.nan, 3], categories=[1, 2, 3]) + tm.assert_categorical_equal(cat, exp) def test_shift(self): # GH 9416 @@ -2026,33 +1896,12 @@ def test_sideeffects_free(self): def test_nan_handling(self): - # Nans are represented as -1 in labels + # NaNs are represented as -1 in labels s = Series(Categorical(["a", "b", np.nan, "a"])) self.assert_index_equal(s.cat.categories, Index(["a", "b"])) self.assert_numpy_array_equal(s.values.codes, np.array([0, 1, -1, 0], dtype=np.int8)) - # If categories have nan included, the label should point to that - # instead - with tm.assert_produces_warning(FutureWarning): - s2 = Series(Categorical(["a", "b", np.nan, "a"], - categories=["a", "b", np.nan])) - - exp_cat = Index(["a", "b", np.nan]) - self.assert_index_equal(s2.cat.categories, exp_cat) - self.assert_numpy_array_equal(s2.values.codes, - np.array([0, 1, 2, 0], dtype=np.int8)) - - # Changing categories should also make the replaced category np.nan - s3 = Series(Categorical(["a", "b", "c", "a"])) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - s3.cat.categories = ["a", "b", np.nan] - - exp_cat = Index(["a", "b", np.nan]) - self.assert_index_equal(s3.cat.categories, exp_cat) - self.assert_numpy_array_equal(s3.values.codes, - np.array([0, 1, 2, 0], dtype=np.int8)) - def test_cat_accessor(self): s = Series(Categorical(["a", "b", np.nan, "a"])) self.assert_index_equal(s.cat.categories, Index(["a", "b"])) From 686e9e00aaac6747bf0c8e340f5c3eedd893d702 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 27 Mar 2017 10:06:32 -0400 Subject: [PATCH 290/353] CI: make generic 3.5 builds CI: rename BUILD_DOC -> DOC --- .travis.yml | 45 ++++++++++--------- ci/build_docs.sh | 2 +- ...BUILD.build => requirements-3.5_DOC.build} | 1 - ...DOC_BUILD.run => requirements-3.5_DOC.run} | 0 ...5_DOC_BUILD.sh => requirements-3.5_DOC.sh} | 0 ci/script_multi.sh | 7 +-- ci/script_single.sh | 9 ++-- 7 files changed, 29 insertions(+), 35 deletions(-) rename ci/{requirements-3.5_DOC_BUILD.build => requirements-3.5_DOC.build} (88%) rename ci/{requirements-3.5_DOC_BUILD.run => requirements-3.5_DOC.run} (100%) rename ci/{requirements-3.5_DOC_BUILD.sh => requirements-3.5_DOC.sh} (100%) diff --git a/.travis.yml b/.travis.yml index 777280e3c4a25..1053f8925ebd7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,5 +1,7 @@ sudo: false language: python +# Default Python version is usually 2.7 +python: 3.5 # To turn off cached cython files and compiler cache # set NOCACHE-true @@ -7,6 +9,7 @@ language: python # travis cache --delete inside the project directory from the travis command line client # The cash directories will be deleted if anything in ci/ changes in a commit cache: + ccache: true directories: - $HOME/.cache # cython cache - $HOME/.ccache # compiler cache @@ -23,69 +26,67 @@ git: matrix: fast_finish: true + exclude: + # Exclude the default Python 3.5 build + - python: 3.5 include: - - language: objective-c - os: osx - cache: - ccache: true - directories: - - $HOME/.cache # cython cache - - $HOME/.ccache # compiler cache + - os: osx + language: generic env: - - JOB="3.5_OSX" TEST_ARGS="--skip-slow --skip-network" TRAVIS_PYTHON_VERSION=3.5 - - python: 2.7 + - JOB="3.5_OSX" TEST_ARGS="--skip-slow --skip-network" + - os: linux env: - JOB="2.7_LOCALE" TEST_ARGS="--only-slow --skip-network" LOCALE_OVERRIDE="zh_CN.UTF-8" addons: apt: packages: - language-pack-zh-hans - - python: 2.7 + - os: linux env: - JOB="2.7" TEST_ARGS="--skip-slow" LINT=true addons: apt: packages: - python-gtk2 - - python: 3.5 + - os: linux env: - JOB="3.5" TEST_ARGS="--skip-slow --skip-network" COVERAGE=true addons: apt: packages: - xsel - - python: 3.6 + - os: linux env: - JOB="3.6" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" CONDA_FORGE=true # In allow_failures - - python: 2.7 + - os: linux env: - JOB="2.7_SLOW" TEST_ARGS="--only-slow --skip-network" # In allow_failures - - python: 2.7 + - os: linux env: - JOB="2.7_BUILD_TEST" TEST_ARGS="--skip-slow" BUILD_TEST=true # In allow_failures - - python: 3.6 + - os: linux env: - JOB="3.6_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" # In allow_failures - - python: 3.5 + - os: linux env: - - JOB="3.5_DOC_BUILD" DOC_BUILD=true + - JOB="3.5_DOC" DOC=true allow_failures: - - python: 2.7 + - os: linux env: - JOB="2.7_SLOW" TEST_ARGS="--only-slow --skip-network" - - python: 2.7 + - os: linux env: - JOB="2.7_BUILD_TEST" TEST_ARGS="--skip-slow" BUILD_TEST=true - - python: 3.6 + - os: linux env: - JOB="3.6_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" - - python: 3.5 + - os: linux env: - - JOB="3.5_DOC_BUILD" DOC_BUILD=true + - JOB="3.5_DOC" DOC=true before_install: - echo "before_install" diff --git a/ci/build_docs.sh b/ci/build_docs.sh index bfe7a1eed756b..1356d097025c9 100755 --- a/ci/build_docs.sh +++ b/ci/build_docs.sh @@ -17,7 +17,7 @@ if [ "$?" != "0" ]; then fi -if [ x"$DOC_BUILD" != x"" ]; then +if [ "$DOC" ]; then echo "Will build docs" diff --git a/ci/requirements-3.5_DOC_BUILD.build b/ci/requirements-3.5_DOC.build similarity index 88% rename from ci/requirements-3.5_DOC_BUILD.build rename to ci/requirements-3.5_DOC.build index f7befe3b31865..73aeb3192242f 100644 --- a/ci/requirements-3.5_DOC_BUILD.build +++ b/ci/requirements-3.5_DOC.build @@ -1,6 +1,5 @@ python=3.5* python-dateutil pytz -nomkl numpy cython diff --git a/ci/requirements-3.5_DOC_BUILD.run b/ci/requirements-3.5_DOC.run similarity index 100% rename from ci/requirements-3.5_DOC_BUILD.run rename to ci/requirements-3.5_DOC.run diff --git a/ci/requirements-3.5_DOC_BUILD.sh b/ci/requirements-3.5_DOC.sh similarity index 100% rename from ci/requirements-3.5_DOC_BUILD.sh rename to ci/requirements-3.5_DOC.sh diff --git a/ci/script_multi.sh b/ci/script_multi.sh index f0fbb8c54bf2a..88ecaf344a410 100755 --- a/ci/script_multi.sh +++ b/ci/script_multi.sh @@ -4,11 +4,6 @@ echo "[script multi]" source activate pandas -# don't run the tests for the doc build -if [ x"$DOC_BUILD" != x"" ]; then - exit 0 -fi - if [ -n "$LOCALE_OVERRIDE" ]; then export LC_ALL="$LOCALE_OVERRIDE"; echo "Setting LC_ALL to $LOCALE_OVERRIDE" @@ -26,6 +21,8 @@ echo PYTHONHASHSEED=$PYTHONHASHSEED if [ "$BUILD_TEST" ]; then cd /tmp python -c "import pandas; pandas.test(['-n 2'])" +elif [ "$DOC" ]; then + echo "We are not running pytest as this is a doc-build" elif [ "$COVERAGE" ]; then echo pytest -s -n 2 -m "not single" --cov=pandas --cov-report xml:/tmp/cov-multiple.xml --junitxml=/tmp/multiple.xml $TEST_ARGS pandas pytest -s -n 2 -m "not single" --cov=pandas --cov-report xml:/tmp/cov-multiple.xml --junitxml=/tmp/multiple.xml $TEST_ARGS pandas diff --git a/ci/script_single.sh b/ci/script_single.sh index 86e822cb57653..db637679f0e0f 100755 --- a/ci/script_single.sh +++ b/ci/script_single.sh @@ -4,11 +4,6 @@ echo "[script_single]" source activate pandas -# don't run the tests for the doc build -if [ x"$DOC_BUILD" != x"" ]; then - exit 0 -fi - if [ -n "$LOCALE_OVERRIDE" ]; then export LC_ALL="$LOCALE_OVERRIDE"; echo "Setting LC_ALL to $LOCALE_OVERRIDE" @@ -18,7 +13,9 @@ if [ -n "$LOCALE_OVERRIDE" ]; then fi if [ "$BUILD_TEST" ]; then - echo "We are not running pytest as this is simply a build test." + echo "We are not running pytest as this is a build test." +elif [ "$DOC" ]; then + echo "We are not running pytest as this is a doc-build" elif [ "$COVERAGE" ]; then echo pytest -s -m "single" --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas pytest -s -m "single" --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas From 71f621fe11b499a6e1420737faf375ba99bb619b Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 27 Mar 2017 15:22:22 -0400 Subject: [PATCH 291/353] API: NaT boolean accessors now return False (#15782) TST: add pandas/tests/scalar/test_nat TST: revise testing of tseries accessors closes #15781 --- doc/source/whatsnew/v0.20.0.txt | 3 +- pandas/_libs/tslib.pyx | 53 +++- pandas/tests/indexes/datetimes/test_misc.py | 9 +- pandas/tests/indexes/datetimes/test_ops.py | 13 +- pandas/tests/indexes/period/test_ops.py | 9 +- pandas/tests/indexes/period/test_period.py | 4 +- pandas/tests/indexes/timedeltas/test_ops.py | 6 +- pandas/tests/scalar/test_nat.py | 248 +++++++++++++++++ pandas/tests/scalar/test_period.py | 25 -- pandas/tests/scalar/test_timedelta.py | 15 +- pandas/tests/scalar/test_timestamp.py | 294 +++----------------- pandas/tests/series/test_datetime_values.py | 20 +- pandas/tests/test_base.py | 2 +- pandas/tests/test_categorical.py | 28 +- pandas/tseries/common.py | 12 +- pandas/tseries/index.py | 61 ++-- pandas/tseries/period.py | 18 +- pandas/tseries/tdi.py | 11 +- 18 files changed, 429 insertions(+), 402 deletions(-) create mode 100644 pandas/tests/scalar/test_nat.py diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index a0b2b47c4bac3..3ab69e1ff409b 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -771,7 +771,8 @@ Other API Changes since pandas version 0.13.0 and can be done with the ``Series.str.extract`` method (:issue:`5224`). As a consequence, the ``as_indexer`` keyword is ignored (no longer needed to specify the new behaviour) and is deprecated. - +- ``NaT`` will now correctly report ``False`` for datetimelike boolean operations such as ``is_month_start`` (:issue:`15781`) +- ``NaT`` will now correctly return ``np.nan`` for ``Timedelta`` and ``Period`` accessors such as ``days`` and ``quarter`` (:issue:`15782`) .. _whatsnew_0200.deprecations: diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 055534bbdb7ee..d441f1ec4759b 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -849,6 +849,30 @@ class NaTType(_NaT): def is_leap_year(self): return False + @property + def is_month_start(self): + return False + + @property + def is_quarter_start(self): + return False + + @property + def is_year_start(self): + return False + + @property + def is_month_end(self): + return False + + @property + def is_quarter_end(self): + return False + + @property + def is_year_end(self): + return False + def __rdiv__(self, other): return _nat_rdivide_op(self, other) @@ -3799,8 +3823,9 @@ def array_strptime(ndarray[object] values, object fmt, # these by definition return np.nan fields = ['year', 'quarter', 'month', 'day', 'hour', 'minute', 'second', 'millisecond', 'microsecond', 'nanosecond', - 'week', 'dayofyear', 'days_in_month', 'daysinmonth', 'dayofweek', - 'weekday_name'] + 'week', 'dayofyear', 'weekofyear', 'days_in_month', 'daysinmonth', + 'dayofweek', 'weekday_name', 'days', 'seconds', 'microseconds', + 'nanoseconds', 'qyear', 'quarter'] for field in fields: prop = property(fget=lambda self: np.nan) setattr(NaTType, field, prop) @@ -4810,7 +4835,7 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, if field == 'is_month_start': if is_business: for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue + if dtindex[i] == NPY_NAT: out[i] = 0; continue pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) @@ -4823,7 +4848,7 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, return out.view(bool) else: for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue + if dtindex[i] == NPY_NAT: out[i] = 0; continue pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) @@ -4836,7 +4861,7 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, elif field == 'is_month_end': if is_business: for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue + if dtindex[i] == NPY_NAT: out[i] = 0; continue pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) @@ -4854,7 +4879,7 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, return out.view(bool) else: for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue + if dtindex[i] == NPY_NAT: out[i] = 0; continue pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) @@ -4871,7 +4896,7 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, elif field == 'is_quarter_start': if is_business: for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue + if dtindex[i] == NPY_NAT: out[i] = 0; continue pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) @@ -4885,7 +4910,7 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, return out.view(bool) else: for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue + if dtindex[i] == NPY_NAT: out[i] = 0; continue pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) @@ -4898,7 +4923,7 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, elif field == 'is_quarter_end': if is_business: for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue + if dtindex[i] == NPY_NAT: out[i] = 0; continue pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) @@ -4917,7 +4942,7 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, return out.view(bool) else: for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue + if dtindex[i] == NPY_NAT: out[i] = 0; continue pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) @@ -4934,7 +4959,7 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, elif field == 'is_year_start': if is_business: for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue + if dtindex[i] == NPY_NAT: out[i] = 0; continue pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) @@ -4948,7 +4973,7 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, return out.view(bool) else: for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue + if dtindex[i] == NPY_NAT: out[i] = 0; continue pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) @@ -4961,7 +4986,7 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, elif field == 'is_year_end': if is_business: for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue + if dtindex[i] == NPY_NAT: out[i] = 0; continue pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) @@ -4980,7 +5005,7 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, return out.view(bool) else: for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue + if dtindex[i] == NPY_NAT: out[i] = 0; continue pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py index ef24c493f5090..76a26b09ed131 100644 --- a/pandas/tests/indexes/datetimes/test_misc.py +++ b/pandas/tests/indexes/datetimes/test_misc.py @@ -259,19 +259,14 @@ def test_datetimeindex_accessors(self): dti.name = 'name' # non boolean accessors -> return Index - for accessor in ['year', 'month', 'day', 'hour', 'minute', - 'second', 'microsecond', 'nanosecond', - 'dayofweek', 'dayofyear', 'weekofyear', - 'quarter', 'weekday_name']: + for accessor in DatetimeIndex._field_ops: res = getattr(dti, accessor) assert len(res) == 365 assert isinstance(res, Index) assert res.name == 'name' # boolean accessors -> return array - for accessor in ['is_month_start', 'is_month_end', - 'is_quarter_start', 'is_quarter_end', - 'is_year_start', 'is_year_end']: + for accessor in DatetimeIndex._bool_ops: res = getattr(dti, accessor) assert len(res) == 365 assert isinstance(res, np.ndarray) diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index 4abc282252559..4681879d708c4 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -31,15 +31,10 @@ def setUp(self): self.not_valid_objs = [o for o in self.objs if not mask(o)] def test_ops_properties(self): - self.check_ops_properties( - ['year', 'month', 'day', 'hour', 'minute', 'second', 'weekofyear', - 'week', 'dayofweek', 'dayofyear', 'quarter']) - self.check_ops_properties(['date', 'time', 'microsecond', 'nanosecond', - 'is_month_start', 'is_month_end', - 'is_quarter_start', - 'is_quarter_end', 'is_year_start', - 'is_year_end', 'weekday_name'], - lambda x: isinstance(x, DatetimeIndex)) + f = lambda x: isinstance(x, DatetimeIndex) + self.check_ops_properties(DatetimeIndex._field_ops, f) + self.check_ops_properties(DatetimeIndex._object_ops, f) + self.check_ops_properties(DatetimeIndex._bool_ops, f) def test_ops_properties_basic(self): diff --git a/pandas/tests/indexes/period/test_ops.py b/pandas/tests/indexes/period/test_ops.py index 4533428cf1514..3b94992f2fe9f 100644 --- a/pandas/tests/indexes/period/test_ops.py +++ b/pandas/tests/indexes/period/test_ops.py @@ -21,11 +21,10 @@ def setUp(self): self.not_valid_objs = [o for o in self.objs if not mask(o)] def test_ops_properties(self): - self.check_ops_properties( - ['year', 'month', 'day', 'hour', 'minute', 'second', 'weekofyear', - 'week', 'dayofweek', 'dayofyear', 'quarter']) - self.check_ops_properties(['qyear'], - lambda x: isinstance(x, PeriodIndex)) + f = lambda x: isinstance(x, PeriodIndex) + self.check_ops_properties(PeriodIndex._field_ops, f) + self.check_ops_properties(PeriodIndex._object_ops, f) + self.check_ops_properties(PeriodIndex._bool_ops, f) def test_asobject_tolist(self): idx = pd.period_range(start='2013-01-01', periods=4, freq='M', diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 6a6c0ab49b15d..6639fcd985ac4 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -394,8 +394,8 @@ def test_fields(self): def _check_all_fields(self, periodindex): fields = ['year', 'month', 'day', 'hour', 'minute', 'second', - 'weekofyear', 'week', 'dayofweek', 'weekday', 'dayofyear', - 'quarter', 'qyear', 'days_in_month', 'is_leap_year'] + 'weekofyear', 'week', 'dayofweek', 'dayofyear', + 'quarter', 'qyear', 'days_in_month'] periods = list(periodindex) s = pd.Series(periodindex) diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index 8c7b88a9cf2ca..2e9f11297dc83 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -21,9 +21,9 @@ def setUp(self): self.not_valid_objs = [] def test_ops_properties(self): - self.check_ops_properties(['days', 'hours', 'minutes', 'seconds', - 'milliseconds']) - self.check_ops_properties(['microseconds', 'nanoseconds']) + f = lambda x: isinstance(x, TimedeltaIndex) + self.check_ops_properties(TimedeltaIndex._field_ops, f) + self.check_ops_properties(TimedeltaIndex._object_ops, f) def test_asobject_tolist(self): idx = timedelta_range(start='1 days', periods=4, freq='D', name='idx') diff --git a/pandas/tests/scalar/test_nat.py b/pandas/tests/scalar/test_nat.py new file mode 100644 index 0000000000000..ce2ed237f5559 --- /dev/null +++ b/pandas/tests/scalar/test_nat.py @@ -0,0 +1,248 @@ +import pytest + +from datetime import datetime, timedelta +import pytz + +import numpy as np +from pandas import (NaT, Index, Timestamp, Timedelta, Period, + DatetimeIndex, PeriodIndex, + TimedeltaIndex, Series, isnull) +from pandas.util import testing as tm +from pandas._libs.tslib import iNaT + + +@pytest.mark.parametrize('nat, idx', [(Timestamp('NaT'), DatetimeIndex), + (Timedelta('NaT'), TimedeltaIndex), + (Period('NaT', freq='M'), PeriodIndex)]) +def test_nat_fields(nat, idx): + + for field in idx._field_ops: + + # weekday is a property of DTI, but a method + # on NaT/Timestamp for compat with datetime + if field == 'weekday': + continue + + result = getattr(NaT, field) + assert np.isnan(result) + + result = getattr(nat, field) + assert np.isnan(result) + + for field in idx._bool_ops: + + result = getattr(NaT, field) + assert result is False + + result = getattr(nat, field) + assert result is False + + +def test_nat_vector_field_access(): + idx = DatetimeIndex(['1/1/2000', None, None, '1/4/2000']) + + for field in DatetimeIndex._field_ops: + # weekday is a property of DTI, but a method + # on NaT/Timestamp for compat with datetime + if field == 'weekday': + continue + + result = getattr(idx, field) + expected = Index([getattr(x, field) for x in idx]) + tm.assert_index_equal(result, expected) + + s = Series(idx) + + for field in DatetimeIndex._field_ops: + + # weekday is a property of DTI, but a method + # on NaT/Timestamp for compat with datetime + if field == 'weekday': + continue + + result = getattr(s.dt, field) + expected = [getattr(x, field) for x in idx] + tm.assert_series_equal(result, Series(expected)) + + for field in DatetimeIndex._bool_ops: + result = getattr(s.dt, field) + expected = [getattr(x, field) for x in idx] + tm.assert_series_equal(result, Series(expected)) + + +@pytest.mark.parametrize('klass', [Timestamp, Timedelta, Period]) +def test_identity(klass): + assert klass(None) is NaT + + result = klass(np.nan) + assert result is NaT + + result = klass(None) + assert result is NaT + + result = klass(iNaT) + assert result is NaT + + result = klass(np.nan) + assert result is NaT + + result = klass(float('nan')) + assert result is NaT + + result = klass(NaT) + assert result is NaT + + result = klass('NaT') + assert result is NaT + + assert isnull(klass('nat')) + + +@pytest.mark.parametrize('klass', [Timestamp, Timedelta, Period]) +def test_equality(klass): + + # nat + if klass is not Period: + klass('').value == iNaT + klass('nat').value == iNaT + klass('NAT').value == iNaT + klass(None).value == iNaT + klass(np.nan).value == iNaT + assert isnull(klass('nat')) + + +@pytest.mark.parametrize('klass', [Timestamp, Timedelta]) +def test_round_nat(klass): + # GH14940 + ts = klass('nat') + for method in ["round", "floor", "ceil"]: + round_method = getattr(ts, method) + for freq in ["s", "5s", "min", "5min", "h", "5h"]: + assert round_method(freq) is ts + + +def test_NaT_methods(): + # GH 9513 + raise_methods = ['astimezone', 'combine', 'ctime', 'dst', + 'fromordinal', 'fromtimestamp', 'isocalendar', + 'strftime', 'strptime', 'time', 'timestamp', + 'timetuple', 'timetz', 'toordinal', 'tzname', + 'utcfromtimestamp', 'utcnow', 'utcoffset', + 'utctimetuple'] + nat_methods = ['date', 'now', 'replace', 'to_datetime', 'today'] + nan_methods = ['weekday', 'isoweekday'] + + for method in raise_methods: + if hasattr(NaT, method): + with pytest.raises(ValueError): + getattr(NaT, method)() + + for method in nan_methods: + if hasattr(NaT, method): + assert np.isnan(getattr(NaT, method)()) + + for method in nat_methods: + if hasattr(NaT, method): + # see gh-8254 + exp_warning = None + if method == 'to_datetime': + exp_warning = FutureWarning + with tm.assert_produces_warning( + exp_warning, check_stacklevel=False): + assert getattr(NaT, method)() is NaT + + # GH 12300 + assert NaT.isoformat() == 'NaT' + + +@pytest.mark.parametrize('klass', [Timestamp, Timedelta]) +def test_isoformat(klass): + + result = klass('NaT').isoformat() + expected = 'NaT' + assert result == expected + + +def test_nat_arithmetic(): + # GH 6873 + i = 2 + f = 1.5 + + for (left, right) in [(NaT, i), (NaT, f), (NaT, np.nan)]: + assert left / right is NaT + assert left * right is NaT + assert right * left is NaT + with pytest.raises(TypeError): + right / left + + # Timestamp / datetime + t = Timestamp('2014-01-01') + dt = datetime(2014, 1, 1) + for (left, right) in [(NaT, NaT), (NaT, t), (NaT, dt)]: + # NaT __add__ or __sub__ Timestamp-like (or inverse) returns NaT + assert right + left is NaT + assert left + right is NaT + assert left - right is NaT + assert right - left is NaT + + # timedelta-like + # offsets are tested in test_offsets.py + + delta = timedelta(3600) + td = Timedelta('5s') + + for (left, right) in [(NaT, delta), (NaT, td)]: + # NaT + timedelta-like returns NaT + assert right + left is NaT + assert left + right is NaT + assert right - left is NaT + assert left - right is NaT + + # GH 11718 + t_utc = Timestamp('2014-01-01', tz='UTC') + t_tz = Timestamp('2014-01-01', tz='US/Eastern') + dt_tz = pytz.timezone('Asia/Tokyo').localize(dt) + + for (left, right) in [(NaT, t_utc), (NaT, t_tz), + (NaT, dt_tz)]: + # NaT __add__ or __sub__ Timestamp-like (or inverse) returns NaT + assert right + left is NaT + assert left + right is NaT + assert left - right is NaT + assert right - left is NaT + + # int addition / subtraction + for (left, right) in [(NaT, 2), (NaT, 0), (NaT, -3)]: + assert right + left is NaT + assert left + right is NaT + assert left - right is NaT + assert right - left is NaT + + +def test_nat_arithmetic_index(): + # GH 11718 + + dti = DatetimeIndex(['2011-01-01', '2011-01-02'], name='x') + exp = DatetimeIndex([NaT, NaT], name='x') + tm.assert_index_equal(dti + NaT, exp) + tm.assert_index_equal(NaT + dti, exp) + + dti_tz = DatetimeIndex(['2011-01-01', '2011-01-02'], + tz='US/Eastern', name='x') + exp = DatetimeIndex([NaT, NaT], name='x', tz='US/Eastern') + tm.assert_index_equal(dti_tz + NaT, exp) + tm.assert_index_equal(NaT + dti_tz, exp) + + exp = TimedeltaIndex([NaT, NaT], name='x') + for (left, right) in [(NaT, dti), (NaT, dti_tz)]: + tm.assert_index_equal(left - right, exp) + tm.assert_index_equal(right - left, exp) + + # timedelta + tdi = TimedeltaIndex(['1 day', '2 day'], name='x') + exp = DatetimeIndex([NaT, NaT], name='x') + for (left, right) in [(NaT, tdi)]: + tm.assert_index_equal(left + right, exp) + tm.assert_index_equal(right + left, exp) + tm.assert_index_equal(left - right, exp) + tm.assert_index_equal(right - left, exp) diff --git a/pandas/tests/scalar/test_period.py b/pandas/tests/scalar/test_period.py index 3128e90695324..7a15600d6041e 100644 --- a/pandas/tests/scalar/test_period.py +++ b/pandas/tests/scalar/test_period.py @@ -110,20 +110,6 @@ def test_period_cons_nat(self): p = Period(tslib.iNaT) self.assertIs(p, pd.NaT) - def test_cons_null_like(self): - # check Timestamp compat - self.assertIs(Timestamp('NaT'), pd.NaT) - self.assertIs(Period('NaT'), pd.NaT) - - self.assertIs(Timestamp(None), pd.NaT) - self.assertIs(Period(None), pd.NaT) - - self.assertIs(Timestamp(float('nan')), pd.NaT) - self.assertIs(Period(float('nan')), pd.NaT) - - self.assertIs(Timestamp(np.nan), pd.NaT) - self.assertIs(Period(np.nan), pd.NaT) - def test_period_cons_mult(self): p1 = Period('2011-01', freq='3M') p2 = Period('2011-01', freq='M') @@ -854,17 +840,6 @@ def test_properties_secondly(self): self.assertEqual(Period(freq='Min', year=2012, month=2, day=1, hour=0, minute=0, second=0).days_in_month, 29) - def test_properties_nat(self): - p_nat = Period('NaT', freq='M') - t_nat = pd.Timestamp('NaT') - self.assertIs(p_nat, t_nat) - - # confirm Period('NaT') work identical with Timestamp('NaT') - for f in ['year', 'month', 'day', 'hour', 'minute', 'second', 'week', - 'dayofyear', 'quarter', 'days_in_month']: - self.assertTrue(np.isnan(getattr(p_nat, f))) - self.assertTrue(np.isnan(getattr(t_nat, f))) - def test_pnow(self): # deprecation, xref #13790 diff --git a/pandas/tests/scalar/test_timedelta.py b/pandas/tests/scalar/test_timedelta.py index 7c5caa9506ca2..c2b895925b685 100644 --- a/pandas/tests/scalar/test_timedelta.py +++ b/pandas/tests/scalar/test_timedelta.py @@ -6,7 +6,7 @@ import pandas.util.testing as tm from pandas.tseries.timedeltas import _coerce_scalar_to_timedelta_type as ct from pandas import (Timedelta, TimedeltaIndex, timedelta_range, Series, - to_timedelta, compat, isnull) + to_timedelta, compat) from pandas._libs.tslib import iNaT, NaTType @@ -151,14 +151,6 @@ def test_construction(self): 500, 'ms').astype('m8[ns]').view('i8') self.assertEqual(Timedelta(10.5, unit='s').value, expected) - # nat - self.assertEqual(Timedelta('').value, iNaT) - self.assertEqual(Timedelta('nat').value, iNaT) - self.assertEqual(Timedelta('NAT').value, iNaT) - self.assertEqual(Timedelta(None).value, iNaT) - self.assertEqual(Timedelta(np.nan).value, iNaT) - self.assertTrue(isnull(Timedelta('nat'))) - # offset self.assertEqual(to_timedelta(pd.offsets.Hour(2)), Timedelta('0 days, 02:00:00')) @@ -686,11 +678,6 @@ def test_isoformat(self): expected = 'P0DT0H0M0.001S' self.assertEqual(result, expected) - # NaT - result = Timedelta('NaT').isoformat() - expected = 'NaT' - self.assertEqual(result, expected) - # don't strip every 0 result = Timedelta(minutes=1).isoformat() expected = 'P0DT0H1M0S' diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py index bbf33c4db5ad7..e39375141ad5f 100644 --- a/pandas/tests/scalar/test_timestamp.py +++ b/pandas/tests/scalar/test_timestamp.py @@ -7,23 +7,19 @@ from datetime import datetime, timedelta from distutils.version import LooseVersion -import pandas as pd import pandas.util.testing as tm - from pandas.tseries import offsets, frequencies from pandas._libs import tslib, period -from pandas._libs.tslib import get_timezone, iNaT +from pandas._libs.tslib import get_timezone from pandas.compat import lrange, long from pandas.util.testing import assert_series_equal from pandas.compat.numpy import np_datetime64_compat from pandas import (Timestamp, date_range, Period, Timedelta, compat, - Series, NaT, isnull, DataFrame, DatetimeIndex) + Series, NaT, DataFrame, DatetimeIndex) from pandas.tseries.frequencies import (RESO_DAY, RESO_HR, RESO_MIN, RESO_US, RESO_MS, RESO_SEC) -randn = np.random.randn - class TestTimestamp(tm.TestCase): @@ -202,8 +198,6 @@ def test_constructor_positional(self): repr(Timestamp(2015, 11, 12, 1, 2, 3, 999999)), repr(Timestamp('2015-11-12 01:02:03.999999'))) - self.assertIs(Timestamp(None), pd.NaT) - def test_constructor_keyword(self): # GH 10758 with tm.assertRaises(TypeError): @@ -235,7 +229,7 @@ def test_constructor_fromordinal(self): self.assertEqual(base.toordinal(), ts.toordinal()) ts = Timestamp.fromordinal(base.toordinal(), tz='US/Eastern') - self.assertEqual(pd.Timestamp('2000-01-01', tz='US/Eastern'), ts) + self.assertEqual(Timestamp('2000-01-01', tz='US/Eastern'), ts) self.assertEqual(base.toordinal(), ts.toordinal()) def test_constructor_offset_depr(self): @@ -260,7 +254,7 @@ def test_constructor_offset_depr_fromordinal(self): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): ts = Timestamp.fromordinal(base.toordinal(), offset='D') - self.assertEqual(pd.Timestamp('2000-01-01'), ts) + self.assertEqual(Timestamp('2000-01-01'), ts) self.assertEqual(ts.freq, 'D') self.assertEqual(base.toordinal(), ts.toordinal()) @@ -422,12 +416,12 @@ def test_tz_localize_nonexistent(self): self.assertRaises(NonExistentTimeError, ts.tz_localize, tz, errors='raise') self.assertIs(ts.tz_localize(tz, errors='coerce'), - pd.NaT) + NaT) def test_tz_localize_errors_ambiguous(self): # See issue 13057 from pytz.exceptions import AmbiguousTimeError - ts = pd.Timestamp('2015-11-1 01:00') + ts = Timestamp('2015-11-1 01:00') self.assertRaises(AmbiguousTimeError, ts.tz_localize, 'US/Pacific', errors='coerce') @@ -576,94 +570,6 @@ def check(value, equal): for end in ends: self.assertTrue(getattr(ts, end)) - def test_nat_fields(self): - # GH 10050 - ts = Timestamp('NaT') - self.assertTrue(np.isnan(ts.year)) - self.assertTrue(np.isnan(ts.month)) - self.assertTrue(np.isnan(ts.day)) - self.assertTrue(np.isnan(ts.hour)) - self.assertTrue(np.isnan(ts.minute)) - self.assertTrue(np.isnan(ts.second)) - self.assertTrue(np.isnan(ts.microsecond)) - self.assertTrue(np.isnan(ts.nanosecond)) - self.assertTrue(np.isnan(ts.dayofweek)) - self.assertTrue(np.isnan(ts.quarter)) - self.assertTrue(np.isnan(ts.dayofyear)) - self.assertTrue(np.isnan(ts.week)) - self.assertTrue(np.isnan(ts.daysinmonth)) - self.assertTrue(np.isnan(ts.days_in_month)) - - def test_nat_vector_field_access(self): - idx = DatetimeIndex(['1/1/2000', None, None, '1/4/2000']) - - # non boolean fields - fields = ['year', 'quarter', 'month', 'day', 'hour', 'minute', - 'second', 'microsecond', 'nanosecond', 'week', 'dayofyear', - 'days_in_month'] - - for field in fields: - result = getattr(idx, field) - expected = [getattr(x, field) for x in idx] - self.assert_index_equal(result, pd.Index(expected)) - - # boolean fields - fields = ['is_leap_year'] - # other boolean fields like 'is_month_start' and 'is_month_end' - # not yet supported by NaT - - for field in fields: - result = getattr(idx, field) - expected = [getattr(x, field) for x in idx] - self.assert_numpy_array_equal(result, np.array(expected)) - - s = pd.Series(idx) - - for field in fields: - result = getattr(s.dt, field) - expected = [getattr(x, field) for x in idx] - self.assert_series_equal(result, pd.Series(expected)) - - def test_nat_scalar_field_access(self): - fields = ['year', 'quarter', 'month', 'day', 'hour', 'minute', - 'second', 'microsecond', 'nanosecond', 'week', 'dayofyear', - 'days_in_month', 'daysinmonth', 'dayofweek', 'weekday_name'] - for field in fields: - result = getattr(NaT, field) - self.assertTrue(np.isnan(result)) - - def test_NaT_methods(self): - # GH 9513 - raise_methods = ['astimezone', 'combine', 'ctime', 'dst', - 'fromordinal', 'fromtimestamp', 'isocalendar', - 'strftime', 'strptime', 'time', 'timestamp', - 'timetuple', 'timetz', 'toordinal', 'tzname', - 'utcfromtimestamp', 'utcnow', 'utcoffset', - 'utctimetuple'] - nat_methods = ['date', 'now', 'replace', 'to_datetime', 'today'] - nan_methods = ['weekday', 'isoweekday'] - - for method in raise_methods: - if hasattr(NaT, method): - self.assertRaises(ValueError, getattr(NaT, method)) - - for method in nan_methods: - if hasattr(NaT, method): - self.assertTrue(np.isnan(getattr(NaT, method)())) - - for method in nat_methods: - if hasattr(NaT, method): - # see gh-8254 - exp_warning = None - if method == 'to_datetime': - exp_warning = FutureWarning - with tm.assert_produces_warning( - exp_warning, check_stacklevel=False): - self.assertIs(getattr(NaT, method)(), NaT) - - # GH 12300 - self.assertEqual(NaT.isoformat(), 'NaT') - def test_pprint(self): # GH12622 import pprint @@ -772,24 +678,40 @@ def test_round(self): self.assertRaises(ValueError, lambda: dti.round(freq)) # GH 14440 & 15578 - result = pd.Timestamp('2016-10-17 12:00:00.0015').round('ms') - expected = pd.Timestamp('2016-10-17 12:00:00.002000') + result = Timestamp('2016-10-17 12:00:00.0015').round('ms') + expected = Timestamp('2016-10-17 12:00:00.002000') self.assertEqual(result, expected) - result = pd.Timestamp('2016-10-17 12:00:00.00149').round('ms') - expected = pd.Timestamp('2016-10-17 12:00:00.001000') + result = Timestamp('2016-10-17 12:00:00.00149').round('ms') + expected = Timestamp('2016-10-17 12:00:00.001000') self.assertEqual(result, expected) - ts = pd.Timestamp('2016-10-17 12:00:00.0015') + ts = Timestamp('2016-10-17 12:00:00.0015') for freq in ['us', 'ns']: self.assertEqual(ts, ts.round(freq)) - result = pd.Timestamp('2016-10-17 12:00:00.001501031').round('10ns') - expected = pd.Timestamp('2016-10-17 12:00:00.001501030') + result = Timestamp('2016-10-17 12:00:00.001501031').round('10ns') + expected = Timestamp('2016-10-17 12:00:00.001501030') self.assertEqual(result, expected) with tm.assert_produces_warning(): - pd.Timestamp('2016-10-17 12:00:00.001501031').round('1010ns') + Timestamp('2016-10-17 12:00:00.001501031').round('1010ns') + + def test_round_misc(self): + stamp = Timestamp('2000-01-05 05:09:15.13') + + def _check_round(freq, expected): + result = stamp.round(freq=freq) + self.assertEqual(result, expected) + + for freq, expected in [('D', Timestamp('2000-01-05 00:00:00')), + ('H', Timestamp('2000-01-05 05:00:00')), + ('S', Timestamp('2000-01-05 05:09:15'))]: + _check_round(freq, expected) + + msg = frequencies._INVALID_FREQ_ERROR + with self.assertRaisesRegexp(ValueError, msg): + stamp.round('foo') def test_class_ops_pytz(self): tm._skip_if_no_pytz() @@ -906,48 +828,30 @@ def check(val, unit=None, h=1, s=1, us=0): check(val / 1000000000.0 + 0.5, unit='s', us=500000) check(days + 0.5, unit='D', h=12) - # nan - result = Timestamp(np.nan) - self.assertIs(result, NaT) - - result = Timestamp(None) - self.assertIs(result, NaT) - - result = Timestamp(iNaT) - self.assertIs(result, NaT) - - result = Timestamp(NaT) - self.assertIs(result, NaT) - - result = Timestamp('NaT') - self.assertIs(result, NaT) - - self.assertTrue(isnull(Timestamp('nat'))) - def test_roundtrip(self): # test value to string and back conversions # further test accessors base = Timestamp('20140101 00:00:00') - result = Timestamp(base.value + pd.Timedelta('5ms').value) + result = Timestamp(base.value + Timedelta('5ms').value) self.assertEqual(result, Timestamp(str(base) + ".005000")) self.assertEqual(result.microsecond, 5000) - result = Timestamp(base.value + pd.Timedelta('5us').value) + result = Timestamp(base.value + Timedelta('5us').value) self.assertEqual(result, Timestamp(str(base) + ".000005")) self.assertEqual(result.microsecond, 5) - result = Timestamp(base.value + pd.Timedelta('5ns').value) + result = Timestamp(base.value + Timedelta('5ns').value) self.assertEqual(result, Timestamp(str(base) + ".000000005")) self.assertEqual(result.nanosecond, 5) self.assertEqual(result.microsecond, 0) - result = Timestamp(base.value + pd.Timedelta('6ms 5us').value) + result = Timestamp(base.value + Timedelta('6ms 5us').value) self.assertEqual(result, Timestamp(str(base) + ".006005")) self.assertEqual(result.microsecond, 5 + 6 * 1000) - result = Timestamp(base.value + pd.Timedelta('200ms 5us').value) + result = Timestamp(base.value + Timedelta('200ms 5us').value) self.assertEqual(result, Timestamp(str(base) + ".200005")) self.assertEqual(result.microsecond, 5 + 200 * 1000) @@ -1004,9 +908,9 @@ def test_compare_invalid(self): self.assertTrue(val != np.int64(1)) # ops testing - df = DataFrame(randn(5, 2)) + df = DataFrame(np.random.randn(5, 2)) a = df[0] - b = Series(randn(5)) + b = Series(np.random.randn(5)) b.name = Timestamp('2000-01-01') tm.assert_series_equal(a / b, 1 / (b / a)) @@ -1149,8 +1053,8 @@ def test_timestamp_compare_series(self): s = Series(date_range('20010101', periods=10), name='dates') s_nat = s.copy(deep=True) - s[0] = pd.Timestamp('nat') - s[3] = pd.Timestamp('nat') + s[0] = Timestamp('nat') + s[3] = Timestamp('nat') ops = {'lt': 'gt', 'le': 'ge', 'eq': 'eq', 'ne': 'ne'} @@ -1194,18 +1098,6 @@ def test_is_leap_year(self): dt = Timestamp('2100-01-01 00:00:00', tz=tz) self.assertFalse(dt.is_leap_year) - self.assertFalse(pd.NaT.is_leap_year) - self.assertIsInstance(pd.NaT.is_leap_year, bool) - - def test_round_nat(self): - # GH14940 - ts = Timestamp('nat') - print(dir(ts)) - for method in ["round", "floor", "ceil"]: - round_method = getattr(ts, method) - for freq in ["s", "5s", "min", "5min", "h", "5h"]: - self.assertIs(round_method(freq), ts) - class TestTimestampNsOperations(tm.TestCase): @@ -1293,95 +1185,6 @@ def test_nanosecond_timestamp(self): self.assertEqual(t.value, expected) self.assertEqual(t.nanosecond, 10) - def test_nat_arithmetic(self): - # GH 6873 - i = 2 - f = 1.5 - - for (left, right) in [(pd.NaT, i), (pd.NaT, f), (pd.NaT, np.nan)]: - self.assertIs(left / right, pd.NaT) - self.assertIs(left * right, pd.NaT) - self.assertIs(right * left, pd.NaT) - with tm.assertRaises(TypeError): - right / left - - # Timestamp / datetime - t = Timestamp('2014-01-01') - dt = datetime(2014, 1, 1) - for (left, right) in [(pd.NaT, pd.NaT), (pd.NaT, t), (pd.NaT, dt)]: - # NaT __add__ or __sub__ Timestamp-like (or inverse) returns NaT - self.assertIs(right + left, pd.NaT) - self.assertIs(left + right, pd.NaT) - self.assertIs(left - right, pd.NaT) - self.assertIs(right - left, pd.NaT) - - # timedelta-like - # offsets are tested in test_offsets.py - - delta = timedelta(3600) - td = Timedelta('5s') - - for (left, right) in [(pd.NaT, delta), (pd.NaT, td)]: - # NaT + timedelta-like returns NaT - self.assertIs(right + left, pd.NaT) - self.assertIs(left + right, pd.NaT) - self.assertIs(right - left, pd.NaT) - self.assertIs(left - right, pd.NaT) - - # GH 11718 - tm._skip_if_no_pytz() - import pytz - - t_utc = Timestamp('2014-01-01', tz='UTC') - t_tz = Timestamp('2014-01-01', tz='US/Eastern') - dt_tz = pytz.timezone('Asia/Tokyo').localize(dt) - - for (left, right) in [(pd.NaT, t_utc), (pd.NaT, t_tz), - (pd.NaT, dt_tz)]: - # NaT __add__ or __sub__ Timestamp-like (or inverse) returns NaT - self.assertIs(right + left, pd.NaT) - self.assertIs(left + right, pd.NaT) - self.assertIs(left - right, pd.NaT) - self.assertIs(right - left, pd.NaT) - - # int addition / subtraction - for (left, right) in [(pd.NaT, 2), (pd.NaT, 0), (pd.NaT, -3)]: - self.assertIs(right + left, pd.NaT) - self.assertIs(left + right, pd.NaT) - self.assertIs(left - right, pd.NaT) - self.assertIs(right - left, pd.NaT) - - def test_nat_arithmetic_index(self): - # GH 11718 - - # datetime - tm._skip_if_no_pytz() - - dti = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], name='x') - exp = pd.DatetimeIndex([pd.NaT, pd.NaT], name='x') - self.assert_index_equal(dti + pd.NaT, exp) - self.assert_index_equal(pd.NaT + dti, exp) - - dti_tz = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], - tz='US/Eastern', name='x') - exp = pd.DatetimeIndex([pd.NaT, pd.NaT], name='x', tz='US/Eastern') - self.assert_index_equal(dti_tz + pd.NaT, exp) - self.assert_index_equal(pd.NaT + dti_tz, exp) - - exp = pd.TimedeltaIndex([pd.NaT, pd.NaT], name='x') - for (left, right) in [(pd.NaT, dti), (pd.NaT, dti_tz)]: - self.assert_index_equal(left - right, exp) - self.assert_index_equal(right - left, exp) - - # timedelta - tdi = pd.TimedeltaIndex(['1 day', '2 day'], name='x') - exp = pd.DatetimeIndex([pd.NaT, pd.NaT], name='x') - for (left, right) in [(pd.NaT, tdi)]: - self.assert_index_equal(left + right, exp) - self.assert_index_equal(right + left, exp) - self.assert_index_equal(left - right, exp) - self.assert_index_equal(right - left, exp) - class TestTimestampOps(tm.TestCase): @@ -1722,22 +1525,3 @@ def test_to_datetime_bijective(self): self.assertEqual( Timestamp(Timestamp.min.to_pydatetime()).value / 1000, Timestamp.min.value / 1000) - - -class TestTslib(tm.TestCase): - - def test_round(self): - stamp = Timestamp('2000-01-05 05:09:15.13') - - def _check_round(freq, expected): - result = stamp.round(freq=freq) - self.assertEqual(result, expected) - - for freq, expected in [('D', Timestamp('2000-01-05 00:00:00')), - ('H', Timestamp('2000-01-05 05:00:00')), - ('S', Timestamp('2000-01-05 05:09:15'))]: - _check_round(freq, expected) - - msg = pd.tseries.frequencies._INVALID_FREQ_ERROR - with self.assertRaisesRegexp(ValueError, msg): - stamp.round('foo') diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py index 4c697c7e52bb8..89f972a33a630 100644 --- a/pandas/tests/series/test_datetime_values.py +++ b/pandas/tests/series/test_datetime_values.py @@ -8,10 +8,8 @@ from pandas.types.common import is_integer_dtype, is_list_like from pandas import (Index, Series, DataFrame, bdate_range, - date_range, period_range, timedelta_range) -from pandas.tseries.period import PeriodIndex -from pandas.tseries.index import Timestamp, DatetimeIndex -from pandas.tseries.tdi import TimedeltaIndex + date_range, period_range, timedelta_range, + PeriodIndex, Timestamp, DatetimeIndex, TimedeltaIndex) import pandas.core.common as com from pandas.util.testing import assert_series_equal @@ -27,21 +25,13 @@ def test_dt_namespace_accessor(self): # GH 7207, 11128 # test .dt namespace accessor - ok_for_base = ['year', 'month', 'day', 'hour', 'minute', 'second', - 'weekofyear', 'week', 'dayofweek', 'weekday', - 'dayofyear', 'quarter', 'freq', 'days_in_month', - 'daysinmonth', 'is_leap_year'] - ok_for_period = ok_for_base + ['qyear', 'start_time', 'end_time'] + ok_for_period = PeriodIndex._datetimelike_ops ok_for_period_methods = ['strftime', 'to_timestamp', 'asfreq'] - ok_for_dt = ok_for_base + ['date', 'time', 'microsecond', 'nanosecond', - 'is_month_start', 'is_month_end', - 'is_quarter_start', 'is_quarter_end', - 'is_year_start', 'is_year_end', 'tz', - 'weekday_name'] + ok_for_dt = DatetimeIndex._datetimelike_ops ok_for_dt_methods = ['to_period', 'to_pydatetime', 'tz_localize', 'tz_convert', 'normalize', 'strftime', 'round', 'floor', 'ceil', 'weekday_name'] - ok_for_td = ['days', 'seconds', 'microseconds', 'nanoseconds'] + ok_for_td = TimedeltaIndex._datetimelike_ops ok_for_td_methods = ['components', 'to_pytimedelta', 'total_seconds', 'round', 'floor', 'ceil'] diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 68db0d19344b9..032e3a186b84a 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -219,7 +219,7 @@ def check_ops_properties(self, props, filter=None, ignore_failures=False): self.assertEqual(result, expected) # freq raises AttributeError on an Int64Index because its not - # defined we mostly care about Series hwere anyhow + # defined we mostly care about Series here anyhow if not ignore_failures: for o in self.not_valid_objs: diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 8fd3c6324d48c..b4072d04dfd81 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -17,9 +17,11 @@ import pandas as pd import pandas.compat as compat import pandas.util.testing as tm -from pandas import (Categorical, Index, Series, DataFrame, PeriodIndex, - Timestamp, CategoricalIndex, DatetimeIndex, - isnull, NaT) +from pandas import (Categorical, Index, Series, DataFrame, + Timestamp, CategoricalIndex, isnull, + date_range, DatetimeIndex, + period_range, PeriodIndex, + timedelta_range, TimedeltaIndex, NaT) from pandas.compat import range, lrange, u, PY3 from pandas.core.config import option_context @@ -4299,9 +4301,6 @@ def test_str_accessor_api_for_categorical(self): def test_dt_accessor_api_for_categorical(self): # https://github.com/pandas-dev/pandas/issues/10661 from pandas.tseries.common import Properties - from pandas.tseries.index import date_range, DatetimeIndex - from pandas.tseries.period import period_range, PeriodIndex - from pandas.tseries.tdi import timedelta_range, TimedeltaIndex s_dr = Series(date_range('1/1/2015', periods=5, tz="MET")) c_dr = s_dr.astype("category") @@ -4312,10 +4311,14 @@ def test_dt_accessor_api_for_categorical(self): s_tdr = Series(timedelta_range('1 days', '10 days')) c_tdr = s_tdr.astype("category") + # only testing field (like .day) + # and bool (is_month_start) + get_ops = lambda x: x._datetimelike_ops + test_data = [ - ("Datetime", DatetimeIndex._datetimelike_ops, s_dr, c_dr), - ("Period", PeriodIndex._datetimelike_ops, s_pr, c_pr), - ("Timedelta", TimedeltaIndex._datetimelike_ops, s_tdr, c_tdr)] + ("Datetime", get_ops(DatetimeIndex), s_dr, c_dr), + ("Period", get_ops(PeriodIndex), s_pr, c_pr), + ("Timedelta", get_ops(TimedeltaIndex), s_tdr, c_tdr)] self.assertIsInstance(c_dr.dt, Properties) @@ -4325,12 +4328,13 @@ def test_dt_accessor_api_for_categorical(self): ('round', ("D",), {}), ('floor', ("D",), {}), ('ceil', ("D",), {}), + ('asfreq', ("D",), {}), # ('tz_localize', ("UTC",), {}), ] _special_func_names = [f[0] for f in special_func_defs] # the series is already localized - _ignore_names = ['tz_localize'] + _ignore_names = ['tz_localize', 'components'] for name, attr_names, s, c in test_data: func_names = [f @@ -4352,7 +4356,7 @@ def test_dt_accessor_api_for_categorical(self): elif isinstance(res, pd.Series): tm.assert_series_equal(res, exp) else: - tm.assert_numpy_array_equal(res, exp) + tm.assert_almost_equal(res, exp) for attr in attr_names: try: @@ -4367,7 +4371,7 @@ def test_dt_accessor_api_for_categorical(self): elif isinstance(res, pd.Series): tm.assert_series_equal(res, exp) else: - tm.assert_numpy_array_equal(res, exp) + tm.assert_almost_equal(res, exp) invalid = Series([1, 2, 3]).astype('category') with tm.assertRaisesRegexp( diff --git a/pandas/tseries/common.py b/pandas/tseries/common.py index f9fd27176487c..7940efc7e1b59 100644 --- a/pandas/tseries/common.py +++ b/pandas/tseries/common.py @@ -168,8 +168,7 @@ def to_pydatetime(self): typ='property') DatetimeProperties._add_delegate_accessors( delegate=DatetimeIndex, - accessors=["to_period", "tz_localize", "tz_convert", - "normalize", "strftime", "round", "floor", "ceil"], + accessors=DatetimeIndex._datetimelike_methods, typ='method') @@ -208,7 +207,7 @@ def components(self): typ='property') TimedeltaProperties._add_delegate_accessors( delegate=TimedeltaIndex, - accessors=["to_pytimedelta", "total_seconds", "round", "floor", "ceil"], + accessors=TimedeltaIndex._datetimelike_methods, typ='method') @@ -230,9 +229,10 @@ class PeriodProperties(Properties): delegate=PeriodIndex, accessors=PeriodIndex._datetimelike_ops, typ='property') -PeriodProperties._add_delegate_accessors(delegate=PeriodIndex, - accessors=["strftime"], - typ='method') +PeriodProperties._add_delegate_accessors( + delegate=PeriodIndex, + accessors=PeriodIndex._datetimelike_methods, + typ='method') class CombinedDatetimelikeProperties(DatetimeProperties, TimedeltaProperties): diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 11d2d29597fc0..1992e177556cc 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -64,25 +64,26 @@ def f(self): if self.tz is not utc: values = self._local_timestamps() - # boolean accessors -> return array - if field in ['is_month_start', 'is_month_end', - 'is_quarter_start', 'is_quarter_end', - 'is_year_start', 'is_year_end']: - month_kw = (self.freq.kwds.get('startingMonth', - self.freq.kwds.get('month', 12)) - if self.freq else 12) - - result = libts.get_start_end_field(values, field, self.freqstr, - month_kw) - return self._maybe_mask_results(result, convert='float64') - elif field in ['is_leap_year']: - # no need to mask NaT - return libts.get_date_field(values, field) - - # non-boolean accessors -> return Index - elif field in ['weekday_name']: + if field in self._bool_ops: + if field in ['is_month_start', 'is_month_end', + 'is_quarter_start', 'is_quarter_end', + 'is_year_start', 'is_year_end']: + month_kw = (self.freq.kwds.get('startingMonth', + self.freq.kwds.get('month', 12)) + if self.freq else 12) + + result = libts.get_start_end_field(values, field, self.freqstr, + month_kw) + else: + result = libts.get_date_field(values, field) + + # these return a boolean by-definition + return result + + if field in self._object_ops: result = libts.get_date_name_field(values, field) result = self._maybe_mask_results(result) + else: result = libts.get_date_field(values, field) result = self._maybe_mask_results(result, convert='float64') @@ -232,14 +233,24 @@ def _join_i8_wrapper(joinf, **kwargs): offset = None _comparables = ['name', 'freqstr', 'tz'] _attributes = ['name', 'freq', 'tz'] - _datetimelike_ops = ['year', 'month', 'day', 'hour', 'minute', 'second', - 'weekofyear', 'week', 'dayofweek', 'weekday', - 'dayofyear', 'quarter', 'days_in_month', - 'daysinmonth', 'date', 'time', 'microsecond', - 'nanosecond', 'is_month_start', 'is_month_end', - 'is_quarter_start', 'is_quarter_end', 'is_year_start', - 'is_year_end', 'tz', 'freq', 'weekday_name', - 'is_leap_year'] + + # define my properties & methods for delegation + _bool_ops = ['is_month_start', 'is_month_end', + 'is_quarter_start', 'is_quarter_end', 'is_year_start', + 'is_year_end', 'is_leap_year'] + _object_ops = ['weekday_name', 'freq', 'tz'] + _field_ops = ['year', 'month', 'day', 'hour', 'minute', 'second', + 'weekofyear', 'week', 'weekday', 'dayofweek', + 'dayofyear', 'quarter', 'days_in_month', + 'daysinmonth', 'microsecond', + 'nanosecond'] + _other_ops = ['date', 'time'] + _datetimelike_ops = _field_ops + _object_ops + _bool_ops + _other_ops + _datetimelike_methods = ['to_period', 'tz_localize', + 'tz_convert', + 'normalize', 'strftime', 'round', 'floor', + 'ceil'] + _is_numeric_dtype = False _infer_as_myclass = True diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index c279d5a9342e8..1e1496bbe9c27 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -174,12 +174,18 @@ class PeriodIndex(DatelikeOps, DatetimeIndexOpsMixin, Int64Index): _box_scalars = True _typ = 'periodindex' _attributes = ['name', 'freq'] - _datetimelike_ops = ['year', 'month', 'day', 'hour', 'minute', 'second', - 'weekofyear', 'week', 'dayofweek', 'weekday', - 'dayofyear', 'quarter', 'qyear', 'freq', - 'days_in_month', 'daysinmonth', - 'to_timestamp', 'asfreq', 'start_time', 'end_time', - 'is_leap_year'] + + # define my properties & methods for delegation + _other_ops = [] + _bool_ops = ['is_leap_year'] + _object_ops = ['start_time', 'end_time', 'freq'] + _field_ops = ['year', 'month', 'day', 'hour', 'minute', 'second', + 'weekofyear', 'weekday', 'week', 'dayofweek', + 'dayofyear', 'quarter', 'qyear', + 'days_in_month', 'daysinmonth'] + _datetimelike_ops = _field_ops + _object_ops + _bool_ops + _datetimelike_methods = ['strftime', 'to_timestamp', 'asfreq'] + _is_numeric_dtype = False _infer_as_myclass = True diff --git a/pandas/tseries/tdi.py b/pandas/tseries/tdi.py index 55333890640c1..5d062dd38f9fc 100644 --- a/pandas/tseries/tdi.py +++ b/pandas/tseries/tdi.py @@ -127,8 +127,15 @@ def _join_i8_wrapper(joinf, **kwargs): _left_indexer_unique = _join_i8_wrapper( libjoin.left_join_indexer_unique_int64, with_indexers=False) _arrmap = None - _datetimelike_ops = ['days', 'seconds', 'microseconds', 'nanoseconds', - 'freq', 'components'] + + # define my properties & methods for delegation + _other_ops = [] + _bool_ops = [] + _object_ops = ['freq'] + _field_ops = ['days', 'seconds', 'microseconds', 'nanoseconds'] + _datetimelike_ops = _field_ops + _object_ops + _bool_ops + _datetimelike_methods = ["to_pytimedelta", "total_seconds", + "round", "floor", "ceil"] __eq__ = _td_index_cmp('__eq__') __ne__ = _td_index_cmp('__ne__', nat_result=True) From 7e43c78a4e310955f3f214f58d1b77dc03c2ec0d Mon Sep 17 00:00:00 2001 From: "Graham R. Jeffries" Date: Mon, 27 Mar 2017 13:36:19 -0400 Subject: [PATCH 292/353] Remove NotImplementedError for parse_dates keyword in read_excel Rebase and update of PR https://github.com/pydata/pandas/pull/12051 Author: Joris Van den Bossche Author: Graham R. Jeffries This patch had conflicts when merged, resolved by Committer: Jeff Reback Closes #14326 from jorisvandenbossche/pr/12051 and squashes the following commits: 0b65a7a [Joris Van den Bossche] update wording 656ec44 [Joris Van den Bossche] Fix detection to raise warning b1c7f87 [Joris Van den Bossche] add whatsnew 925ce1b [Joris Van den Bossche] Update tests 0e10a9d [Graham R. Jeffries] remove read_excel kwd NotImplemented error, update documentation #11544 --- doc/source/io.rst | 14 +++++++++++++ doc/source/whatsnew/v0.19.0.txt | 4 ++++ pandas/io/excel.py | 9 +++------ pandas/tests/io/test_excel.py | 36 ++++++++++++++++++++------------- 4 files changed, 43 insertions(+), 20 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index faeea9d448cf2..e72224c6fa1fe 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -2767,6 +2767,20 @@ indices to be parsed. read_excel('path_to_file.xls', 'Sheet1', parse_cols=[0, 2, 3]) + +Parsing Dates ++++++++++++++ + +Datetime-like values are normally automatically converted to the appropriate +dtype when reading the excel file. But if you have a column of strings that +*look* like dates (but are not actually formatted as dates in excel), you can +use the `parse_dates` keyword to parse those strings to datetimes: + +.. code-block:: python + + read_excel('path_to_file.xls', 'Sheet1', parse_dates=['date_strings']) + + Cell Converters +++++++++++++++ diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 9b003034aa94a..11df0afb144ea 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -517,6 +517,7 @@ Other enhancements - The ``pd.read_json`` and ``DataFrame.to_json`` has gained support for reading and writing json lines with ``lines`` option see :ref:`Line delimited json ` (:issue:`9180`) - :func:`read_excel` now supports the true_values and false_values keyword arguments (:issue:`13347`) - ``groupby()`` will now accept a scalar and a single-element list for specifying ``level`` on a non-``MultiIndex`` grouper. (:issue:`13907`) +<<<<<<< HEAD - Non-convertible dates in an excel date column will be returned without conversion and the column will be ``object`` dtype, rather than raising an exception (:issue:`10001`). - ``pd.Timedelta(None)`` is now accepted and will return ``NaT``, mirroring ``pd.Timestamp`` (:issue:`13687`) - ``pd.read_stata()`` can now handle some format 111 files, which are produced by SAS when generating Stata dta files (:issue:`11526`) @@ -524,6 +525,9 @@ Other enhancements series or indices. This behaves like a standard binary operator with regards to broadcasting rules (:issue:`14208`). +======= +- Re-enable the ``parse_dates`` keyword of ``read_excel`` to parse string columns as dates (:issue:`14326`) +>>>>>>> PR_TOOL_MERGE_PR_14326 .. _whatsnew_0190.api: diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 82ea2e8a46592..e7a8b71a5f6c9 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -343,13 +343,10 @@ def _parse_excel(self, sheetname=0, header=0, skiprows=None, names=None, if 'chunksize' in kwds: raise NotImplementedError("chunksize keyword of read_excel " "is not implemented") - if parse_dates: - raise NotImplementedError("parse_dates keyword of read_excel " - "is not implemented") - if date_parser is not None: - raise NotImplementedError("date_parser keyword of read_excel " - "is not implemented") + if parse_dates is True and not index_col: + warn("The 'parse_dates=True' keyword of read_excel was provided" + " without an 'index_col' keyword value.") import xlrd from xlrd import (xldate, XL_CELL_DATE, diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index b66cb24bf44d8..df77708232dd2 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -924,17 +924,27 @@ def test_read_excel_chunksize(self): chunksize=100) def test_read_excel_parse_dates(self): - # GH 11544 - with tm.assertRaises(NotImplementedError): - pd.read_excel(os.path.join(self.dirpath, 'test1' + self.ext), - parse_dates=True) + # GH 11544, 12051 - def test_read_excel_date_parser(self): - # GH 11544 - with tm.assertRaises(NotImplementedError): - dateparse = lambda x: pd.datetime.strptime(x, '%Y-%m-%d %H:%M:%S') - pd.read_excel(os.path.join(self.dirpath, 'test1' + self.ext), - date_parser=dateparse) + df = DataFrame( + {'col': [1, 2, 3], + 'date_strings': pd.date_range('2012-01-01', periods=3)}) + df2 = df.copy() + df2['date_strings'] = df2['date_strings'].dt.strftime('%m/%d/%Y') + + with ensure_clean(self.ext) as pth: + df2.to_excel(pth) + + res = read_excel(pth) + tm.assert_frame_equal(df2, res) + + res = read_excel(pth, parse_dates=['date_strings']) + tm.assert_frame_equal(df, res) + + dateparser = lambda x: pd.datetime.strptime(x, '%m/%d/%Y') + res = read_excel(pth, parse_dates=['date_strings'], + date_parser=dateparser) + tm.assert_frame_equal(df, res) def test_read_excel_skiprows_list(self): # GH 4903 @@ -1382,8 +1392,7 @@ def test_to_excel_multiindex(self): # round trip frame.to_excel(path, 'test1', merge_cells=self.merge_cells) reader = ExcelFile(path) - df = read_excel(reader, 'test1', index_col=[0, 1], - parse_dates=False) + df = read_excel(reader, 'test1', index_col=[0, 1]) tm.assert_frame_equal(frame, df) # GH13511 @@ -1424,8 +1433,7 @@ def test_to_excel_multiindex_cols(self): frame.to_excel(path, 'test1', merge_cells=self.merge_cells) reader = ExcelFile(path) df = read_excel(reader, 'test1', header=header, - index_col=[0, 1], - parse_dates=False) + index_col=[0, 1]) if not self.merge_cells: fm = frame.columns.format(sparsify=False, adjoin=False, names=False) From 1dab800b412be3613e8f666eb1be88458b631312 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 27 Mar 2017 13:37:32 -0400 Subject: [PATCH 293/353] BUG: index_names can be None when processing date conversions closes #15820 closes #11544 --- doc/source/whatsnew/v0.19.0.txt | 4 ---- doc/source/whatsnew/v0.20.0.txt | 3 ++- pandas/io/excel.py | 2 +- pandas/io/parsers.py | 13 ++++++++++--- pandas/tests/io/test_excel.py | 9 +++++++-- 5 files changed, 20 insertions(+), 11 deletions(-) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 11df0afb144ea..9b003034aa94a 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -517,7 +517,6 @@ Other enhancements - The ``pd.read_json`` and ``DataFrame.to_json`` has gained support for reading and writing json lines with ``lines`` option see :ref:`Line delimited json ` (:issue:`9180`) - :func:`read_excel` now supports the true_values and false_values keyword arguments (:issue:`13347`) - ``groupby()`` will now accept a scalar and a single-element list for specifying ``level`` on a non-``MultiIndex`` grouper. (:issue:`13907`) -<<<<<<< HEAD - Non-convertible dates in an excel date column will be returned without conversion and the column will be ``object`` dtype, rather than raising an exception (:issue:`10001`). - ``pd.Timedelta(None)`` is now accepted and will return ``NaT``, mirroring ``pd.Timestamp`` (:issue:`13687`) - ``pd.read_stata()`` can now handle some format 111 files, which are produced by SAS when generating Stata dta files (:issue:`11526`) @@ -525,9 +524,6 @@ Other enhancements series or indices. This behaves like a standard binary operator with regards to broadcasting rules (:issue:`14208`). -======= -- Re-enable the ``parse_dates`` keyword of ``read_excel`` to parse string columns as dates (:issue:`14326`) ->>>>>>> PR_TOOL_MERGE_PR_14326 .. _whatsnew_0190.api: diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 3ab69e1ff409b..fdf34e0d11572 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -270,7 +270,7 @@ To convert a ``SparseDataFrame`` back to sparse SciPy matrix in COO format, you .. _whatsnew_0200.enhancements.other: -Other enhancements +Other Enhancements ^^^^^^^^^^^^^^^^^^ - Integration with the ``feather-format``, including a new top-level ``pd.read_feather()`` and ``DataFrame.to_feather()`` method, see :ref:`here `. @@ -314,6 +314,7 @@ Other enhancements - ``pd.types.concat.union_categoricals`` gained the ``ignore_ordered`` argument to allow ignoring the ordered attribute of unioned categoricals (:issue:`13410`). See the :ref:`categorical union docs ` for more information. - ``pandas.io.json.json_normalize()`` with an empty ``list`` will return an empty ``DataFrame`` (:issue:`15534`) - ``pd.DataFrame.to_latex`` and ``pd.DataFrame.to_string`` now allow optional header aliases. (:issue:`15536`) +- Re-enable the ``parse_dates`` keyword of ``read_excel`` to parse string columns as dates (:issue:`14326`) .. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations diff --git a/pandas/io/excel.py b/pandas/io/excel.py index e7a8b71a5f6c9..d324855bc2f4d 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -344,7 +344,7 @@ def _parse_excel(self, sheetname=0, header=0, skiprows=None, names=None, raise NotImplementedError("chunksize keyword of read_excel " "is not implemented") - if parse_dates is True and not index_col: + if parse_dates is True and index_col is None: warn("The 'parse_dates=True' keyword of read_excel was provided" " without an 'index_col' keyword value.") diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 45c62b224ef4e..30b88de91ef76 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1176,13 +1176,18 @@ def _should_parse_dates(self, i): if isinstance(self.parse_dates, bool): return self.parse_dates else: - name = self.index_names[i] + if self.index_names is not None: + name = self.index_names[i] + else: + name = None j = self.index_col[i] if is_scalar(self.parse_dates): - return (j == self.parse_dates) or (name == self.parse_dates) + return ((j == self.parse_dates) or + (name is not None and name == self.parse_dates)) else: - return (j in self.parse_dates) or (name in self.parse_dates) + return ((j in self.parse_dates) or + (name is not None and name in self.parse_dates)) def _extract_multi_indexer_columns(self, header, index_names, col_names, passed_names=False): @@ -1352,6 +1357,7 @@ def _get_name(icol): def _agg_index(self, index, try_parse_dates=True): arrays = [] + for i, arr in enumerate(index): if (try_parse_dates and self._should_parse_dates(i)): @@ -1512,6 +1518,7 @@ def _cast_types(self, values, cast_type, column): def _do_date_conversions(self, names, data): # returns data, columns + if self.parse_dates is not None: data, names = _process_date_conversion( data, self._date_conv, self.parse_dates, self.index_col, diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index df77708232dd2..256a37e922177 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -938,12 +938,17 @@ def test_read_excel_parse_dates(self): res = read_excel(pth) tm.assert_frame_equal(df2, res) - res = read_excel(pth, parse_dates=['date_strings']) + # no index_col specified when parse_dates is True + with tm.assert_produces_warning(): + res = read_excel(pth, parse_dates=True) + tm.assert_frame_equal(df2, res) + + res = read_excel(pth, parse_dates=['date_strings'], index_col=0) tm.assert_frame_equal(df, res) dateparser = lambda x: pd.datetime.strptime(x, '%m/%d/%Y') res = read_excel(pth, parse_dates=['date_strings'], - date_parser=dateparser) + date_parser=dateparser, index_col=0) tm.assert_frame_equal(df, res) def test_read_excel_skiprows_list(self): From a9406057b5f48d579d9a9136a183a594c4b1f758 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 28 Mar 2017 12:48:41 -0400 Subject: [PATCH 294/353] BUG: bug in .at/.loc indexing with a tz-aware columns closes #15822 Author: Jeff Reback Closes #15827 from jreback/at and squashes the following commits: 4fcd2c6 [Jeff Reback] BUG: bug in .at/.loc indexing with a tz-aware columns --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/frame.py | 11 ++++++++++- pandas/tests/indexing/test_scalar.py | 15 +++++++++++++++ 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index fdf34e0d11572..51c3d5578ae6c 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -881,6 +881,7 @@ Bug Fixes - Compat for 32-bit platforms for ``.qcut/cut``; bins will now be ``int64`` dtype (:issue:`14866`) +- Bug in ``.at`` when selecting from a tz-aware column (:issue:`15822`) - Bug in the display of ``.info()`` where a qualifier (+) would always be displayed with a ``MultiIndex`` that contains only non-strings (:issue:`15245`) - Bug in ``.replace()`` may result in incorrect dtypes. (:issue:`12747`, :issue:`15765`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 90c49a9c85133..90baa1aff4857 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1918,7 +1918,16 @@ def get_value(self, index, col, takeable=False): series = self._get_item_cache(col) engine = self.index._engine - return engine.get_value(series.get_values(), index) + + try: + return engine.get_value(series._values, index) + except TypeError: + + # we cannot handle direct indexing + # use positional + col = self.columns.get_loc(col) + index = self.index.get_loc(index) + return self.get_value(index, col, takeable=True) def set_value(self, index, col, value, takeable=False): """ diff --git a/pandas/tests/indexing/test_scalar.py b/pandas/tests/indexing/test_scalar.py index 4e81cd01cd5d2..0eeaec3e00fa6 100644 --- a/pandas/tests/indexing/test_scalar.py +++ b/pandas/tests/indexing/test_scalar.py @@ -154,3 +154,18 @@ def test_at_to_fail(self): # Check that we get the correct value in the KeyError self.assertRaisesRegexp(KeyError, r"\['y'\] not in index", lambda: df[['x', 'y', 'z']]) + + def test_at_with_tz(self): + # gh-15822 + df = DataFrame({'name': ['John', 'Anderson'], + 'date': [Timestamp(2017, 3, 13, 13, 32, 56), + Timestamp(2017, 2, 16, 12, 10, 3)]}) + df['date'] = df['date'].dt.tz_localize('Asia/Shanghai') + + expected = Timestamp('2017-03-13 13:32:56+0800', tz='Asia/Shanghai') + + result = df.loc[0, 'date'] + assert result == expected + + result = df.at[0, 'date'] + assert result == expected From 66fb0a3e0c25cf10988ce0d14a7efee437aa94d6 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 28 Mar 2017 13:11:17 -0400 Subject: [PATCH 295/353] TST: consistency of indexing with a tz-aware scalar xref #12938 --- pandas/tests/indexing/test_datetime.py | 31 ++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/pandas/tests/indexing/test_datetime.py b/pandas/tests/indexing/test_datetime.py index 1c4e5772d316f..eeef41ad6dbb2 100644 --- a/pandas/tests/indexing/test_datetime.py +++ b/pandas/tests/indexing/test_datetime.py @@ -63,6 +63,37 @@ def f(): df.loc[df.new_col == 'new', 'time'] = v tm.assert_series_equal(df.loc[df.new_col == 'new', 'time'], v) + def test_consistency_with_tz_aware_scalar(self): + # xef gh-12938 + # various ways of indexing the same tz-aware scalar + df = Series([Timestamp('2016-03-30 14:35:25', + tz='Europe/Brussels')]).to_frame() + + df = pd.concat([df, df]).reset_index(drop=True) + expected = Timestamp('2016-03-30 14:35:25+0200', + tz='Europe/Brussels') + + result = df[0][0] + assert result == expected + + result = df.iloc[0, 0] + assert result == expected + + result = df.loc[0, 0] + assert result == expected + + result = df.iat[0, 0] + assert result == expected + + result = df.at[0, 0] + assert result == expected + + result = df[0].loc[0] + assert result == expected + + result = df[0].at[0] + assert result == expected + def test_indexing_with_datetimeindex_tz(self): # GH 12050 From d96ff291cc7446ad36ae7d8db05b0cc588ccd7ec Mon Sep 17 00:00:00 2001 From: stijnvanhoey Date: Wed, 8 Mar 2017 20:25:51 +0100 Subject: [PATCH 296/353] DOC: Make example running example closes #15624 --- pandas/core/groupby.py | 58 +++++++++++++++++++++++++++++++++++------- 1 file changed, 49 insertions(+), 9 deletions(-) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index dded55114ab6f..2cc68bcabdd22 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -109,15 +109,33 @@ Examples -------- ->>> df = pd.DataFrame(np.repeat(np.arange(10), 3).reshape(-1, 3), - columns=list('ABC')) ->>> grouped = df.groupby(df.index // 3) # Same shape ->>> grouped.%(selected)stransform(lambda x: (x - x.mean()) / x.std()) +>>> df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar', +... 'foo', 'bar'], +... 'B' : ['one', 'one', 'two', 'three', +... 'two', 'two'], +... 'C' : [1, 5, 5, 2, 5, 5], +... 'D' : [2.0, 5., 8., 1., 2., 9.]}) +>>> grouped = df.groupby('A') +>>> grouped.transform(lambda x: (x - x.mean()) / x.std()) + C D +0 -1.154701 -0.577350 +1 0.577350 0.000000 +2 0.577350 1.154701 +3 -1.154701 -1.000000 +4 0.577350 -0.577350 +5 0.577350 1.000000 # Broadcastable ->>> grouped.%(selected)stransform(lambda x: x.max() - x.min()) +>>> grouped.transform(lambda x: x.max() - x.min()) + C D +0 4 6.0 +1 3 8.0 +2 4 6.0 +3 3 8.0 +4 4 6.0 +5 3 8.0 """ @@ -2982,7 +3000,17 @@ def filter(self, func, dropna=True, *args, **kwargs): # noqa Examples -------- - >>> grouped.filter(lambda x: x.mean() > 0) + >>> import pandas as pd + >>> df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar', + ... 'foo', 'bar'], + ... 'B' : [1, 2, 3, 4, 5, 6], + ... 'C' : [2.0, 5., 8., 1., 2., 9.]}) + >>> grouped = df.groupby('A') + >>> df.groupby('A').B.filter(lambda x: x.mean() > 3.) + 1 2 + 3 4 + 5 6 + Name: B, dtype: int64 Returns ------- @@ -3784,9 +3812,21 @@ def filter(self, func, dropna=True, *args, **kwargs): # noqa Examples -------- - >>> df = pd.DataFrame(np.random.randn(10, 3), columns=list('ABC')) - >>> grouped = df.groupby(df.index % 3) - >>> grouped.filter(lambda x: x['A'].sum() + x['B'].sum() > 0) + >>> import pandas as pd + >>> df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar', + ... 'foo', 'bar'], + ... 'B' : [1, 2, 3, 4, 5, 6], + ... 'C' : [2.0, 5., 8., 1., 2., 9.]}) + >>> grouped = df.groupby('A') + >>> grouped.filter(lambda x: x['B'].mean() > 3.) + A B C + 1 bar 2 5.0 + 3 bar 4 1.0 + 5 bar 6 9.0 + + Returns + ------- + filtered : DataFrame """ indices = [] From 6f789e15cdd91cc02af2005405026355e6fae69e Mon Sep 17 00:00:00 2001 From: Carlos Souza Date: Tue, 28 Mar 2017 14:23:44 -0400 Subject: [PATCH 297/353] BUG: replace of numeric by string / dtype coversion (GH15743) closes #15743 Author: Carlos Souza Author: Jeff Reback Closes #15812 from ucals/bug-fix-15743 and squashes the following commits: e6e4971 [Carlos Souza] Adding replace unicode with number and replace mixed types with string tests bd31b2b [Carlos Souza] Resolving merge conflict by incorporating @jreback suggestions 73805ce [Jeff Reback] CLN: add infer_dtype_from_array 45e67e4 [Carlos Souza] Fixing PEP8 line indent 0a98557 [Carlos Souza] BUG: replace of numeric by string fixed 97e1f18 [Carlos Souza] Test e62763c [Carlos Souza] Fixing PEP8 line indent 080c71e [Carlos Souza] BUG: replace of numeric by string fixed 8b463cb [Carlos Souza] Merge remote-tracking branch 'upstream/master' 9fc617b [Carlos Souza] Merge remote-tracking branch 'upstream/master' e12bca7 [Carlos Souza] Sync fork 676a4e5 [Carlos Souza] Test --- RELEASE.md | 2 +- doc/source/whatsnew/v0.20.0.txt | 3 +- pandas/core/missing.py | 26 ++++++++------- pandas/tests/frame/test_replace.py | 25 +++++++++------ pandas/tests/series/test_replace.py | 22 ++++++++++++- pandas/tests/types/test_cast.py | 50 ++++++++++++++++++++--------- pandas/types/cast.py | 44 +++++++++++++++++++++++++ 7 files changed, 132 insertions(+), 40 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index a181412be2719..efd075dabcba9 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,6 +1,6 @@ Release Notes ============= -The list of changes to pandas between each release can be found +The list of changes to Pandas between each release can be found [here](http://pandas.pydata.org/pandas-docs/stable/whatsnew.html). For full details, see the commit logs at http://github.com/pandas-dev/pandas. diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 51c3d5578ae6c..1aebfc140284d 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -884,6 +884,8 @@ Bug Fixes - Bug in ``.at`` when selecting from a tz-aware column (:issue:`15822`) - Bug in the display of ``.info()`` where a qualifier (+) would always be displayed with a ``MultiIndex`` that contains only non-strings (:issue:`15245`) - Bug in ``.replace()`` may result in incorrect dtypes. (:issue:`12747`, :issue:`15765`) +- Bug in ``Series.replace`` and ``DataFrame.replace`` which failed on empty replacement dicts (:issue:`15289`) +- Bug in ``Series.replace`` which replaced a numeric by string (:issue:`15743`) - Bug in ``.asfreq()``, where frequency was not set for empty ``Series`` (:issue:`14320`) @@ -986,7 +988,6 @@ Bug Fixes - Bug in ``DataFrame.hist`` where ``plt.tight_layout`` caused an ``AttributeError`` (use ``matplotlib >= 2.0.1``) (:issue:`9351`) - Bug in ``DataFrame.boxplot`` where ``fontsize`` was not applied to the tick labels on both axes (:issue:`15108`) -- Bug in ``Series.replace`` and ``DataFrame.replace`` which failed on empty replacement dicts (:issue:`15289`) - Bug in ``pd.melt()`` where passing a tuple value for ``value_vars`` caused a ``TypeError`` (:issue:`15348`) - Bug in ``.eval()`` which caused multiline evals to fail with local variables not on the first line (:issue:`15342`) - Bug in ``pd.read_msgpack`` which did not allow to load dataframe with an index of type ``CategoricalIndex`` (:issue:`15487`) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 3b9bfe1de48e7..91039f3270af2 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -9,10 +9,16 @@ from pandas.compat import range, string_types from pandas.types.common import (is_numeric_v_string_like, - is_float_dtype, is_datetime64_dtype, - is_datetime64tz_dtype, is_integer_dtype, - _ensure_float64, is_scalar, - needs_i8_conversion, is_integer) + is_float_dtype, + is_datetime64_dtype, + is_datetime64tz_dtype, + is_integer_dtype, + is_scalar, + is_integer, + needs_i8_conversion, + _ensure_float64) + +from pandas.types.cast import infer_dtype_from_array from pandas.types.missing import isnull @@ -21,11 +27,11 @@ def mask_missing(arr, values_to_mask): Return a masking array of same size/shape as arr with entries equaling any member of values_to_mask set to True """ - if not isinstance(values_to_mask, (list, np.ndarray)): - values_to_mask = [values_to_mask] + dtype, values_to_mask = infer_dtype_from_array(values_to_mask) try: - values_to_mask = np.array(values_to_mask, dtype=arr.dtype) + values_to_mask = np.array(values_to_mask, dtype=dtype) + except Exception: values_to_mask = np.array(values_to_mask, dtype=object) @@ -409,7 +415,7 @@ def interpolate_2d(values, method='pad', axis=0, limit=None, fill_value=None, if axis != 0: # pragma: no cover raise AssertionError("cannot interpolate on a ndim == 1 with " "axis != 0") - values = values.reshape(tuple((1, ) + values.shape)) + values = values.reshape(tuple((1,) + values.shape)) if fill_value is None: mask = None @@ -447,7 +453,6 @@ def wrapper(arr, mask, limit=None): def pad_1d(values, limit=None, mask=None, dtype=None): - if dtype is None: dtype = values.dtype _method = None @@ -472,7 +477,6 @@ def pad_1d(values, limit=None, mask=None, dtype=None): def backfill_1d(values, limit=None, mask=None, dtype=None): - if dtype is None: dtype = values.dtype _method = None @@ -498,7 +502,6 @@ def backfill_1d(values, limit=None, mask=None, dtype=None): def pad_2d(values, limit=None, mask=None, dtype=None): - if dtype is None: dtype = values.dtype _method = None @@ -528,7 +531,6 @@ def pad_2d(values, limit=None, mask=None, dtype=None): def backfill_2d(values, limit=None, mask=None, dtype=None): - if dtype is None: dtype = values.dtype _method = None diff --git a/pandas/tests/frame/test_replace.py b/pandas/tests/frame/test_replace.py index 8b50036cd50f8..fce59e10bf4bd 100644 --- a/pandas/tests/frame/test_replace.py +++ b/pandas/tests/frame/test_replace.py @@ -795,7 +795,7 @@ def test_replace_dtypes(self): expected = DataFrame({'datetime64': Index([now] * 3)}) assert_frame_equal(result, expected) - def test_replace_input_formats(self): + def test_replace_input_formats_listlike(self): # both dicts to_rep = {'A': np.nan, 'B': 0, 'C': ''} values = {'A': 0, 'B': -1, 'C': 'missing'} @@ -812,15 +812,6 @@ def test_replace_input_formats(self): 'C': ['', 'asdf', 'fd']}) assert_frame_equal(result, expected) - # dict to scalar - filled = df.replace(to_rep, 0) - expected = {} - for k, v in compat.iteritems(df): - expected[k] = v.replace(to_rep[k], 0) - assert_frame_equal(filled, DataFrame(expected)) - - self.assertRaises(TypeError, df.replace, to_rep, [np.nan, 0, '']) - # scalar to dict values = {'A': 0, 'B': -1, 'C': 'missing'} df = DataFrame({'A': [np.nan, 0, np.nan], 'B': [0, 2, 5], @@ -842,6 +833,20 @@ def test_replace_input_formats(self): self.assertRaises(ValueError, df.replace, to_rep, values[1:]) + def test_replace_input_formats_scalar(self): + df = DataFrame({'A': [np.nan, 0, np.inf], 'B': [0, 2, 5], + 'C': ['', 'asdf', 'fd']}) + + # dict to scalar + to_rep = {'A': np.nan, 'B': 0, 'C': ''} + filled = df.replace(to_rep, 0) + expected = {} + for k, v in compat.iteritems(df): + expected[k] = v.replace(to_rep[k], 0) + assert_frame_equal(filled, DataFrame(expected)) + + self.assertRaises(TypeError, df.replace, to_rep, [np.nan, 0, '']) + # list to scalar to_rep = [np.nan, 0, ''] result = df.replace(to_rep, -1) diff --git a/pandas/tests/series/test_replace.py b/pandas/tests/series/test_replace.py index 0a53581e24ba5..5190eb110f4cf 100644 --- a/pandas/tests/series/test_replace.py +++ b/pandas/tests/series/test_replace.py @@ -10,7 +10,6 @@ class TestSeriesReplace(TestData, tm.TestCase): - def test_replace(self): N = 100 ser = pd.Series(np.random.randn(N)) @@ -227,3 +226,24 @@ def test_replace_with_empty_dictlike(self): s = pd.Series(list('abcd')) tm.assert_series_equal(s, s.replace(dict())) tm.assert_series_equal(s, s.replace(pd.Series([]))) + + def test_replace_string_with_number(self): + # GH 15743 + s = pd.Series([1, 2, 3]) + result = s.replace('2', np.nan) + expected = pd.Series([1, 2, 3]) + tm.assert_series_equal(expected, result) + + def test_replace_unicode_with_number(self): + # GH 15743 + s = pd.Series([1, 2, 3]) + result = s.replace(u'2', np.nan) + expected = pd.Series([1, 2, 3]) + tm.assert_series_equal(expected, result) + + def test_replace_mixed_types_with_string(self): + # Testing mixed + s = pd.Series([1, 2, 3, '4', 4, 5]) + result = s.replace([2, '4'], np.nan) + expected = pd.Series([1, np.nan, 3, np.nan, 4, 5]) + tm.assert_series_equal(expected, result) diff --git a/pandas/tests/types/test_cast.py b/pandas/tests/types/test_cast.py index dd4ea3bb02be9..de6ef7af9d7f9 100644 --- a/pandas/tests/types/test_cast.py +++ b/pandas/tests/types/test_cast.py @@ -5,13 +5,15 @@ """ -from datetime import datetime +import pytest +from datetime import datetime, timedelta, date import numpy as np from pandas import Timedelta, Timestamp, DatetimeIndex from pandas.types.cast import (maybe_downcast_to_dtype, maybe_convert_objects, infer_dtype_from_scalar, + infer_dtype_from_array, maybe_convert_string_to_object, maybe_convert_scalar, find_common_type) @@ -82,7 +84,7 @@ def test_datetime_with_timezone(self): tm.assert_index_equal(res, exp) -class TestInferDtype(tm.TestCase): +class TestInferDtype(object): def test_infer_dtype_from_scalar(self): # Test that _infer_dtype_from_scalar is returning correct dtype for int @@ -92,44 +94,62 @@ def test_infer_dtype_from_scalar(self): np.int32, np.uint64, np.int64]: data = dtypec(12) dtype, val = infer_dtype_from_scalar(data) - self.assertEqual(dtype, type(data)) + assert dtype == type(data) data = 12 dtype, val = infer_dtype_from_scalar(data) - self.assertEqual(dtype, np.int64) + assert dtype == np.int64 for dtypec in [np.float16, np.float32, np.float64]: data = dtypec(12) dtype, val = infer_dtype_from_scalar(data) - self.assertEqual(dtype, dtypec) + assert dtype == dtypec data = np.float(12) dtype, val = infer_dtype_from_scalar(data) - self.assertEqual(dtype, np.float64) + assert dtype == np.float64 for data in [True, False]: dtype, val = infer_dtype_from_scalar(data) - self.assertEqual(dtype, np.bool_) + assert dtype == np.bool_ for data in [np.complex64(1), np.complex128(1)]: dtype, val = infer_dtype_from_scalar(data) - self.assertEqual(dtype, np.complex_) + assert dtype == np.complex_ - import datetime for data in [np.datetime64(1, 'ns'), Timestamp(1), - datetime.datetime(2000, 1, 1, 0, 0)]: + datetime(2000, 1, 1, 0, 0)]: dtype, val = infer_dtype_from_scalar(data) - self.assertEqual(dtype, 'M8[ns]') + assert dtype == 'M8[ns]' for data in [np.timedelta64(1, 'ns'), Timedelta(1), - datetime.timedelta(1)]: + timedelta(1)]: dtype, val = infer_dtype_from_scalar(data) - self.assertEqual(dtype, 'm8[ns]') + assert dtype == 'm8[ns]' - for data in [datetime.date(2000, 1, 1), + for data in [date(2000, 1, 1), Timestamp(1, tz='US/Eastern'), 'foo']: dtype, val = infer_dtype_from_scalar(data) - self.assertEqual(dtype, np.object_) + assert dtype == np.object_ + + @pytest.mark.parametrize( + "arr, expected", + [('foo', np.object_), + (b'foo', np.object_), + (1, np.int_), + (1.5, np.float_), + ([1], np.int_), + (np.array([1]), np.int_), + ([np.nan, 1, ''], np.object_), + (np.array([[1.0, 2.0]]), np.float_), + (Timestamp('20160101'), np.object_), + (np.datetime64('2016-01-01'), np.dtype('>> np.asarray([1, '1']) + array(['1', '1'], dtype='>> infer_dtype_from_array([1, '1']) + (numpy.object_, [1, '1']) + + """ + + if isinstance(arr, np.ndarray): + return arr.dtype, arr + + if not is_list_like(arr): + arr = [arr] + + # don't force numpy coerce with nan's + inferred = lib.infer_dtype(arr) + if inferred in ['string', 'bytes', 'unicode', + 'mixed', 'mixed-integer']: + return (np.object_, arr) + + arr = np.asarray(arr) + return arr.dtype, arr + + def maybe_upcast(values, fill_value=np.nan, dtype=None, copy=False): """ provide explict type promotion and coercion From ec84ae3d6f73633aee5058148ea76fdd79f74ac4 Mon Sep 17 00:00:00 2001 From: Scott Sanderson Date: Tue, 28 Mar 2017 15:03:27 -0400 Subject: [PATCH 298/353] ENH: Add empty property to Index. Previously, attempting to evaluate an Index in a boolean context prints an error message listing various alternatives, one of which is `.empty`, which was not actually implemented on `Index`. Author: Scott Sanderson This patch had conflicts when merged, resolved by Committer: Jeff Reback closes #13207 Closes #15270 from ssanderson/add-empty-to-index and squashes the following commits: bb0126f [Scott Sanderson] ENH: Add empty property to Index. --- doc/source/api.rst | 1 + doc/source/whatsnew/v0.20.0.txt | 2 ++ pandas/core/base.py | 4 ++++ pandas/tests/indexes/common.py | 6 ++++++ 4 files changed, 13 insertions(+) diff --git a/doc/source/api.rst b/doc/source/api.rst index f6bf480bebcfc..dfeaf8e60feb1 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -1277,6 +1277,7 @@ Attributes Index.nbytes Index.ndim Index.size + Index.empty Index.strides Index.itemsize Index.base diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 1aebfc140284d..15566d207e31f 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -310,11 +310,13 @@ Other Enhancements - ``Series/DataFrame.squeeze()`` have gained the ``axis`` parameter. (:issue:`15339`) - ``DataFrame.to_excel()`` has a new ``freeze_panes`` parameter to turn on Freeze Panes when exporting to Excel (:issue:`15160`) - HTML table output skips ``colspan`` or ``rowspan`` attribute if equal to 1. (:issue:`15403`) + - ``pd.TimedeltaIndex`` now has a custom datetick formatter specifically designed for nanosecond level precision (:issue:`8711`) - ``pd.types.concat.union_categoricals`` gained the ``ignore_ordered`` argument to allow ignoring the ordered attribute of unioned categoricals (:issue:`13410`). See the :ref:`categorical union docs ` for more information. - ``pandas.io.json.json_normalize()`` with an empty ``list`` will return an empty ``DataFrame`` (:issue:`15534`) - ``pd.DataFrame.to_latex`` and ``pd.DataFrame.to_string`` now allow optional header aliases. (:issue:`15536`) - Re-enable the ``parse_dates`` keyword of ``read_excel`` to parse string columns as dates (:issue:`14326`) +- Added ``.empty`` property to subclasses of ``Index``. (:issue:`15270`) .. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations diff --git a/pandas/core/base.py b/pandas/core/base.py index bde60be3ddcff..3401c7c59cb56 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -774,6 +774,10 @@ def _values(self): """ the internal implementation """ return self.values + @property + def empty(self): + return not self.size + def max(self): """ The maximum value of the object """ return nanops.nanmax(self.values) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index e9122f7a17359..ba76945834aff 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -910,3 +910,9 @@ def test_nulls(self): result = isnull(index) self.assert_numpy_array_equal(index.isnull(), result) self.assert_numpy_array_equal(index.notnull(), ~result) + + def test_empty(self): + # GH 15270 + index = self.create_index() + self.assertFalse(index.empty) + self.assertTrue(index[:0].empty) From 34c6bd0fb7ad58b579ba940d4248ebab0aa758bf Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 28 Mar 2017 17:43:38 -0400 Subject: [PATCH 299/353] ENH: GH14883: json_normalize now takes a user-specified separator closes #14883 Author: Jeff Reback Author: John Owens Closes #14950 from jowens/json_normalize-separator and squashes the following commits: 0327dd1 [Jeff Reback] compare sorted columns bc5aae8 [Jeff Reback] CLN: fixup json_normalize with sep 8edc40e [John Owens] ENH: json_normalize now takes a user-specified separator --- doc/source/whatsnew/v0.20.0.txt | 8 +- pandas/io/json/normalize.py | 37 +++++-- pandas/tests/io/json/test_normalize.py | 141 ++++++++++++++----------- 3 files changed, 114 insertions(+), 72 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 15566d207e31f..638044cee67bb 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -300,9 +300,9 @@ Other Enhancements - ``pd.DataFrame.plot`` now prints a title above each subplot if ``suplots=True`` and ``title`` is a list of strings (:issue:`14753`) - ``pd.Series.interpolate`` now supports timedelta as an index type with ``method='time'`` (:issue:`6424`) - ``Timedelta.isoformat`` method added for formatting Timedeltas as an `ISO 8601 duration`_. See the :ref:`Timedelta docs ` (:issue:`15136`) -- ``pandas.io.json.json_normalize()`` gained the option ``errors='ignore'|'raise'``; the default is ``errors='raise'`` which is backward compatible. (:issue:`14583`) - ``.select_dtypes()`` now allows the string 'datetimetz' to generically select datetimes with tz (:issue:`14910`) - The ``.to_latex()`` method will now accept ``multicolumn`` and ``multirow`` arguments to use the accompanying LaTeX enhancements + - ``pd.merge_asof()`` gained the option ``direction='backward'|'forward'|'nearest'`` (:issue:`14887`) - ``Series/DataFrame.asfreq()`` have gained a ``fill_value`` parameter, to fill missing values (:issue:`3715`). - ``Series/DataFrame.resample.asfreq`` have gained a ``fill_value`` parameter, to fill missing values during resampling (:issue:`3715`). @@ -313,11 +313,15 @@ Other Enhancements - ``pd.TimedeltaIndex`` now has a custom datetick formatter specifically designed for nanosecond level precision (:issue:`8711`) - ``pd.types.concat.union_categoricals`` gained the ``ignore_ordered`` argument to allow ignoring the ordered attribute of unioned categoricals (:issue:`13410`). See the :ref:`categorical union docs ` for more information. -- ``pandas.io.json.json_normalize()`` with an empty ``list`` will return an empty ``DataFrame`` (:issue:`15534`) - ``pd.DataFrame.to_latex`` and ``pd.DataFrame.to_string`` now allow optional header aliases. (:issue:`15536`) - Re-enable the ``parse_dates`` keyword of ``read_excel`` to parse string columns as dates (:issue:`14326`) - Added ``.empty`` property to subclasses of ``Index``. (:issue:`15270`) +- ``pandas.io.json.json_normalize()`` gained the option ``errors='ignore'|'raise'``; the default is ``errors='raise'`` which is backward compatible. (:issue:`14583`) +- ``pandas.io.json.json_normalize()`` with an empty ``list`` will return an empty ``DataFrame`` (:issue:`15534`) +- ``pandas.io.json.json_normalize()`` has gained a ``sep`` option that accepts ``str`` to separate joined fields; the default is ".", which is backward compatible. (:issue:`14883`) + + .. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations diff --git a/pandas/io/json/normalize.py b/pandas/io/json/normalize.py index 4da4a6ad57850..518e0bc2064e2 100644 --- a/pandas/io/json/normalize.py +++ b/pandas/io/json/normalize.py @@ -21,7 +21,7 @@ def _convert_to_line_delimits(s): return convert_json_to_lines(s) -def nested_to_record(ds, prefix="", level=0): +def nested_to_record(ds, prefix="", sep=".", level=0): """a simplified json_normalize converts a nested dict into a flat dict ("record"), unlike json_normalize, @@ -31,6 +31,12 @@ def nested_to_record(ds, prefix="", level=0): ---------- ds : dict or list of dicts prefix: the prefix, optional, default: "" + sep : string, default '.' + Nested records will generate names separated by sep, + e.g., for sep='.', { 'foo' : { 'bar' : 0 } } -> foo.bar + + .. versionadded:: 0.20.0 + level: the number of levels in the jason string, optional, default: 0 Returns @@ -66,7 +72,7 @@ def nested_to_record(ds, prefix="", level=0): if level == 0: newkey = k else: - newkey = prefix + '.' + k + newkey = prefix + sep + k # only dicts gets recurse-flattend # only at level>1 do we rename the rest of the keys @@ -77,7 +83,7 @@ def nested_to_record(ds, prefix="", level=0): continue else: v = new_d.pop(k) - new_d.update(nested_to_record(v, newkey, level + 1)) + new_d.update(nested_to_record(v, newkey, sep, level + 1)) new_ds.append(new_d) if singleton: @@ -88,7 +94,8 @@ def nested_to_record(ds, prefix="", level=0): def json_normalize(data, record_path=None, meta=None, meta_prefix=None, record_prefix=None, - errors='raise'): + errors='raise', + sep='.'): """ "Normalize" semi-structured JSON data into a flat table @@ -106,13 +113,21 @@ def json_normalize(data, record_path=None, meta=None, path to records is ['foo', 'bar'] meta_prefix : string, default None errors : {'raise', 'ignore'}, default 'raise' - * 'ignore' : will ignore KeyError if keys listed in meta are not - always present - * 'raise' : will raise KeyError if keys listed in meta are not - always present + + * ignore : will ignore KeyError if keys listed in meta are not + always present + * raise : will raise KeyError if keys listed in meta are not + always present .. versionadded:: 0.20.0 + sep : string, default '.' + Nested records will generate names separated by sep, + e.g., for sep='.', { 'foo' : { 'bar' : 0 } } -> foo.bar + + .. versionadded:: 0.20.0 + + Returns ------- frame : DataFrame @@ -173,7 +188,7 @@ def _pull_field(js, spec): # # TODO: handle record value which are lists, at least error # reasonably - data = nested_to_record(data) + data = nested_to_record(data, sep=sep) return DataFrame(data) elif not isinstance(record_path, list): record_path = [record_path] @@ -192,7 +207,9 @@ def _pull_field(js, spec): lengths = [] meta_vals = defaultdict(list) - meta_keys = ['.'.join(val) for val in meta] + if not isinstance(sep, compat.string_types): + sep = str(sep) + meta_keys = [sep.join(val) for val in meta] def _recursive_extract(data, path, seen_meta, level=0): if len(path) > 1: diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py index f881f4dafe0f3..ee79859e9b71a 100644 --- a/pandas/tests/io/json/test_normalize.py +++ b/pandas/tests/io/json/test_normalize.py @@ -1,36 +1,60 @@ -from pandas import DataFrame +import pytest import numpy as np import json import pandas.util.testing as tm -from pandas import compat +from pandas import compat, Index, DataFrame from pandas.io.json import json_normalize from pandas.io.json.normalize import nested_to_record -def _assert_equal_data(left, right): - if not left.columns.equals(right.columns): - left = left.reindex(columns=right.columns) +@pytest.fixture +def deep_nested(): + # deeply nested data + return [{'country': 'USA', + 'states': [{'name': 'California', + 'cities': [{'name': 'San Francisco', + 'pop': 12345}, + {'name': 'Los Angeles', + 'pop': 12346}] + }, + {'name': 'Ohio', + 'cities': [{'name': 'Columbus', + 'pop': 1234}, + {'name': 'Cleveland', + 'pop': 1236}]} + ] + }, + {'country': 'Germany', + 'states': [{'name': 'Bayern', + 'cities': [{'name': 'Munich', 'pop': 12347}] + }, + {'name': 'Nordrhein-Westfalen', + 'cities': [{'name': 'Duesseldorf', 'pop': 1238}, + {'name': 'Koeln', 'pop': 1239}]} + ] + } + ] - tm.assert_frame_equal(left, right) +@pytest.fixture +def state_data(): + return [ + {'counties': [{'name': 'Dade', 'population': 12345}, + {'name': 'Broward', 'population': 40000}, + {'name': 'Palm Beach', 'population': 60000}], + 'info': {'governor': 'Rick Scott'}, + 'shortname': 'FL', + 'state': 'Florida'}, + {'counties': [{'name': 'Summit', 'population': 1234}, + {'name': 'Cuyahoga', 'population': 1337}], + 'info': {'governor': 'John Kasich'}, + 'shortname': 'OH', + 'state': 'Ohio'}] -class TestJSONNormalize(tm.TestCase): - def setUp(self): - self.state_data = [ - {'counties': [{'name': 'Dade', 'population': 12345}, - {'name': 'Broward', 'population': 40000}, - {'name': 'Palm Beach', 'population': 60000}], - 'info': {'governor': 'Rick Scott'}, - 'shortname': 'FL', - 'state': 'Florida'}, - {'counties': [{'name': 'Summit', 'population': 1234}, - {'name': 'Cuyahoga', 'population': 1337}], - 'info': {'governor': 'John Kasich'}, - 'shortname': 'OH', - 'state': 'Ohio'}] +class TestJSONNormalize(object): def test_simple_records(self): recs = [{'a': 1, 'b': 2, 'c': 3}, @@ -43,21 +67,21 @@ def test_simple_records(self): tm.assert_frame_equal(result, expected) - def test_simple_normalize(self): - result = json_normalize(self.state_data[0], 'counties') - expected = DataFrame(self.state_data[0]['counties']) + def test_simple_normalize(self, state_data): + result = json_normalize(state_data[0], 'counties') + expected = DataFrame(state_data[0]['counties']) tm.assert_frame_equal(result, expected) - result = json_normalize(self.state_data, 'counties') + result = json_normalize(state_data, 'counties') expected = [] - for rec in self.state_data: + for rec in state_data: expected.extend(rec['counties']) expected = DataFrame(expected) tm.assert_frame_equal(result, expected) - result = json_normalize(self.state_data, 'counties', meta='state') + result = json_normalize(state_data, 'counties', meta='state') expected['state'] = np.array(['Florida', 'Ohio']).repeat([3, 2]) tm.assert_frame_equal(result, expected) @@ -67,33 +91,30 @@ def test_empty_array(self): expected = DataFrame() tm.assert_frame_equal(result, expected) - def test_more_deeply_nested(self): - data = [{'country': 'USA', - 'states': [{'name': 'California', - 'cities': [{'name': 'San Francisco', - 'pop': 12345}, - {'name': 'Los Angeles', - 'pop': 12346}] - }, - {'name': 'Ohio', - 'cities': [{'name': 'Columbus', - 'pop': 1234}, - {'name': 'Cleveland', - 'pop': 1236}]} - ] - }, - {'country': 'Germany', - 'states': [{'name': 'Bayern', - 'cities': [{'name': 'Munich', 'pop': 12347}] - }, - {'name': 'Nordrhein-Westfalen', - 'cities': [{'name': 'Duesseldorf', 'pop': 1238}, - {'name': 'Koeln', 'pop': 1239}]} - ] - } - ] + def test_simple_normalize_with_separator(self, deep_nested): + # GH 14883 + result = json_normalize({'A': {'A': 1, 'B': 2}}) + expected = DataFrame([[1, 2]], columns=['A.A', 'A.B']) + tm.assert_frame_equal(result.reindex_like(expected), expected) + + result = json_normalize({'A': {'A': 1, 'B': 2}}, sep='_') + expected = DataFrame([[1, 2]], columns=['A_A', 'A_B']) + tm.assert_frame_equal(result.reindex_like(expected), expected) + + result = json_normalize({'A': {'A': 1, 'B': 2}}, sep=u'\u03c3') + expected = DataFrame([[1, 2]], columns=[u'A\u03c3A', u'A\u03c3B']) + tm.assert_frame_equal(result.reindex_like(expected), expected) + + result = json_normalize(deep_nested, ['states', 'cities'], + meta=['country', ['states', 'name']], + sep='_') + expected = Index(['name', 'pop', + 'country', 'states_name']).sort_values() + assert result.columns.sort_values().equals(expected) + + def test_more_deeply_nested(self, deep_nested): - result = json_normalize(data, ['states', 'cities'], + result = json_normalize(deep_nested, ['states', 'cities'], meta=['country', ['states', 'name']]) # meta_prefix={'states': 'state_'}) @@ -143,26 +164,26 @@ def test_meta_name_conflict(self): 'data': [{'foo': 'something', 'bar': 'else'}, {'foo': 'something2', 'bar': 'else2'}]}] - self.assertRaises(ValueError, json_normalize, data, - 'data', meta=['foo', 'bar']) + with pytest.raises(ValueError): + json_normalize(data, 'data', meta=['foo', 'bar']) result = json_normalize(data, 'data', meta=['foo', 'bar'], meta_prefix='meta') for val in ['metafoo', 'metabar', 'foo', 'bar']: - self.assertTrue(val in result) + assert val in result - def test_record_prefix(self): - result = json_normalize(self.state_data[0], 'counties') - expected = DataFrame(self.state_data[0]['counties']) + def test_record_prefix(self, state_data): + result = json_normalize(state_data[0], 'counties') + expected = DataFrame(state_data[0]['counties']) tm.assert_frame_equal(result, expected) - result = json_normalize(self.state_data, 'counties', + result = json_normalize(state_data, 'counties', meta='state', record_prefix='county_') expected = [] - for rec in self.state_data: + for rec in state_data: expected.extend(rec['counties']) expected = DataFrame(expected) expected = expected.rename(columns=lambda x: 'county_' + x) From 2e646147349bafa7f0b2e9ce26d1e27e3015d3b3 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 28 Mar 2017 18:42:31 -0400 Subject: [PATCH 300/353] COMPAT: Fix indent level bug preventing wrapper function rename Original code intends to rename the wrapper function f using the provided name, but this isn't happening because code is incorrectly indented an extra level. from pandas.core.groupby import GroupBy GroupBy.sum.__name__ Should be 'sum'. Author: Jeff Reback Author: Matt Hagy Author: Matt Hagy Closes #14620 from matthagy/patch-1 and squashes the following commits: db3c6e4 [Jeff Reback] clean/reorg tests 205489b [Jeff Reback] doc 8b185b4 [Jeff Reback] PEP 781b9b3 [Jeff Reback] Move _groupby_function inside GroupBy 68013bf [Matt Hagy] Added a test for known inconsistent attribute/method names 3bf8993 [Matt Hagy] Revise attribute/method consistency check to skip known inconsistencies 033e42d [Matt Hagy] Test for consistency of attribute and method names 2a54b77 [Matt Hagy] Test renaming of _groupby_function wrapper function a492b5a [Matt Hagy] Fix indent level bug preventing wrapper function rename --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/groupby.py | 143 ++++++------ pandas/tests/groupby/common.py | 36 +-- pandas/tests/groupby/test_groupby.py | 223 ------------------ pandas/tests/groupby/test_whitelist.py | 301 +++++++++++++++++++++++++ 5 files changed, 400 insertions(+), 304 deletions(-) create mode 100644 pandas/tests/groupby/test_whitelist.py diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 638044cee67bb..787857095044a 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -887,6 +887,7 @@ Bug Fixes - Compat for 32-bit platforms for ``.qcut/cut``; bins will now be ``int64`` dtype (:issue:`14866`) +- Properly set ``__name__`` and ``__qualname__`` for ``Groupby.*`` functions (:issue:`14620`) - Bug in ``.at`` when selecting from a tz-aware column (:issue:`15822`) - Bug in the display of ``.info()`` where a qualifier (+) would always be displayed with a ``MultiIndex`` that contains only non-strings (:issue:`15245`) - Bug in ``.replace()`` may result in incorrect dtypes. (:issue:`12747`, :issue:`15765`) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 2cc68bcabdd22..fe764a099bb63 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -12,8 +12,8 @@ ) from pandas import compat -from pandas.compat.numpy import function as nv -from pandas.compat.numpy import _np_version_under1p8 +from pandas.compat.numpy import function as nv, _np_version_under1p8 +from pandas.compat import set_function_name from pandas.types.common import (is_numeric_dtype, is_timedelta64_dtype, is_datetime64_dtype, @@ -172,64 +172,6 @@ 'cummin', 'cummax']) -def _groupby_function(name, alias, npfunc, numeric_only=True, - _convert=False): - - _local_template = "Compute %(f)s of group values" - - @Substitution(name='groupby', f=name) - @Appender(_doc_template) - @Appender(_local_template) - def f(self, **kwargs): - if 'numeric_only' not in kwargs: - kwargs['numeric_only'] = numeric_only - self._set_group_selection() - try: - return self._cython_agg_general(alias, alt=npfunc, **kwargs) - except AssertionError as e: - raise SpecificationError(str(e)) - except Exception: - result = self.aggregate(lambda x: npfunc(x, axis=self.axis)) - if _convert: - result = result._convert(datetime=True) - return result - - f.__name__ = name - - return f - - -def _first_compat(x, axis=0): - - def _first(x): - - x = np.asarray(x) - x = x[notnull(x)] - if len(x) == 0: - return np.nan - return x[0] - - if isinstance(x, DataFrame): - return x.apply(_first, axis=axis) - else: - return _first(x) - - -def _last_compat(x, axis=0): - def _last(x): - - x = np.asarray(x) - x = x[notnull(x)] - if len(x) == 0: - return np.nan - return x[-1] - - if isinstance(x, DataFrame): - return x.apply(_last, axis=axis) - else: - return _last(x) - - class Grouper(object): """ A Grouper allows the user to specify a groupby instruction for a target @@ -1184,14 +1126,76 @@ def size(self): result.name = getattr(self, 'name', None) return result - sum = _groupby_function('sum', 'add', np.sum) - prod = _groupby_function('prod', 'prod', np.prod) - min = _groupby_function('min', 'min', np.min, numeric_only=False) - max = _groupby_function('max', 'max', np.max, numeric_only=False) - first = _groupby_function('first', 'first', _first_compat, - numeric_only=False, _convert=True) - last = _groupby_function('last', 'last', _last_compat, numeric_only=False, - _convert=True) + @classmethod + def _add_numeric_operations(cls): + """ add numeric operations to the GroupBy generically """ + + def groupby_function(name, alias, npfunc, + numeric_only=True, _convert=False): + + _local_template = "Compute %(f)s of group values" + + @Substitution(name='groupby', f=name) + @Appender(_doc_template) + @Appender(_local_template) + def f(self, **kwargs): + if 'numeric_only' not in kwargs: + kwargs['numeric_only'] = numeric_only + self._set_group_selection() + try: + return self._cython_agg_general( + alias, alt=npfunc, **kwargs) + except AssertionError as e: + raise SpecificationError(str(e)) + except Exception: + result = self.aggregate( + lambda x: npfunc(x, axis=self.axis)) + if _convert: + result = result._convert(datetime=True) + return result + + set_function_name(f, name, cls) + + return f + + def first_compat(x, axis=0): + + def first(x): + + x = np.asarray(x) + x = x[notnull(x)] + if len(x) == 0: + return np.nan + return x[0] + + if isinstance(x, DataFrame): + return x.apply(first, axis=axis) + else: + return first(x) + + def last_compat(x, axis=0): + + def last(x): + + x = np.asarray(x) + x = x[notnull(x)] + if len(x) == 0: + return np.nan + return x[-1] + + if isinstance(x, DataFrame): + return x.apply(last, axis=axis) + else: + return last(x) + + cls.sum = groupby_function('sum', 'add', np.sum) + cls.prod = groupby_function('prod', 'prod', np.prod) + cls.min = groupby_function('min', 'min', np.min, numeric_only=False) + cls.max = groupby_function('max', 'max', np.max, numeric_only=False) + cls.first = groupby_function('first', 'first', first_compat, + numeric_only=False, _convert=True) + cls.last = groupby_function('last', 'last', last_compat, + numeric_only=False, _convert=True) @Substitution(name='groupby') @Appender(_doc_template) @@ -1604,6 +1608,9 @@ def tail(self, n=5): return self._selected_obj[mask] +GroupBy._add_numeric_operations() + + @Appender(GroupBy.__doc__) def groupby(obj, by, **kwds): if isinstance(obj, Series): diff --git a/pandas/tests/groupby/common.py b/pandas/tests/groupby/common.py index 8a70777d08682..f3dccf473f53a 100644 --- a/pandas/tests/groupby/common.py +++ b/pandas/tests/groupby/common.py @@ -1,10 +1,31 @@ """ Base setup """ +import pytest import numpy as np from pandas.util import testing as tm from pandas import DataFrame, MultiIndex +@pytest.fixture +def mframe(): + index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', + 'three']], + labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=['first', 'second']) + return DataFrame(np.random.randn(10, 3), index=index, + columns=['A', 'B', 'C']) + + +@pytest.fixture +def df(): + return DataFrame( + {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], + 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], + 'C': np.random.randn(8), + 'D': np.random.randn(8)}) + + class MixIn(object): def setUp(self): @@ -15,12 +36,7 @@ def setUp(self): self.frame = DataFrame(self.seriesd) self.tsframe = DataFrame(self.tsd) - self.df = DataFrame( - {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], - 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], - 'C': np.random.randn(8), - 'D': np.random.randn(8)}) - + self.df = df() self.df_mixed_floats = DataFrame( {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], @@ -28,13 +44,7 @@ def setUp(self): 'D': np.array( np.random.randn(8), dtype='float32')}) - index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', - 'three']], - labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], - [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], - names=['first', 'second']) - self.mframe = DataFrame(np.random.randn(10, 3), index=index, - columns=['A', 'B', 'C']) + self.mframe = mframe() self.three_group = DataFrame( {'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar', diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 9f5a7f404e2be..83502434e6053 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -3706,229 +3706,6 @@ def test_index_label_overlaps_location(self): expected = ser.take([1, 3, 4]) assert_series_equal(actual, expected) - def test_groupby_selection_with_methods(self): - # some methods which require DatetimeIndex - rng = pd.date_range('2014', periods=len(self.df)) - self.df.index = rng - - g = self.df.groupby(['A'])[['C']] - g_exp = self.df[['C']].groupby(self.df['A']) - # TODO check groupby with > 1 col ? - - # methods which are called as .foo() - methods = ['count', - 'corr', - 'cummax', - 'cummin', - 'cumprod', - 'describe', - 'rank', - 'quantile', - 'diff', - 'shift', - 'all', - 'any', - 'idxmin', - 'idxmax', - 'ffill', - 'bfill', - 'pct_change', - 'tshift'] - - for m in methods: - res = getattr(g, m)() - exp = getattr(g_exp, m)() - assert_frame_equal(res, exp) # should always be frames! - - # methods which aren't just .foo() - assert_frame_equal(g.fillna(0), g_exp.fillna(0)) - assert_frame_equal(g.dtypes, g_exp.dtypes) - assert_frame_equal(g.apply(lambda x: x.sum()), - g_exp.apply(lambda x: x.sum())) - - assert_frame_equal(g.resample('D').mean(), g_exp.resample('D').mean()) - assert_frame_equal(g.resample('D').ohlc(), - g_exp.resample('D').ohlc()) - - assert_frame_equal(g.filter(lambda x: len(x) == 3), - g_exp.filter(lambda x: len(x) == 3)) - - def test_groupby_whitelist(self): - from string import ascii_lowercase - letters = np.array(list(ascii_lowercase)) - N = 10 - random_letters = letters.take(np.random.randint(0, 26, N)) - df = DataFrame({'floats': N / 10 * Series(np.random.random(N)), - 'letters': Series(random_letters)}) - s = df.floats - - df_whitelist = frozenset([ - 'last', - 'first', - 'mean', - 'sum', - 'min', - 'max', - 'head', - 'tail', - 'cumcount', - 'resample', - 'rank', - 'quantile', - 'fillna', - 'mad', - 'any', - 'all', - 'take', - 'idxmax', - 'idxmin', - 'shift', - 'tshift', - 'ffill', - 'bfill', - 'pct_change', - 'skew', - 'plot', - 'boxplot', - 'hist', - 'median', - 'dtypes', - 'corrwith', - 'corr', - 'cov', - 'diff', - ]) - s_whitelist = frozenset([ - 'last', - 'first', - 'mean', - 'sum', - 'min', - 'max', - 'head', - 'tail', - 'cumcount', - 'resample', - 'rank', - 'quantile', - 'fillna', - 'mad', - 'any', - 'all', - 'take', - 'idxmax', - 'idxmin', - 'shift', - 'tshift', - 'ffill', - 'bfill', - 'pct_change', - 'skew', - 'plot', - 'hist', - 'median', - 'dtype', - 'corr', - 'cov', - 'diff', - 'unique', - 'nlargest', - 'nsmallest', - ]) - - for obj, whitelist in zip((df, s), (df_whitelist, s_whitelist)): - gb = obj.groupby(df.letters) - self.assertEqual(whitelist, gb._apply_whitelist) - for m in whitelist: - getattr(type(gb), m) - - AGG_FUNCTIONS = ['sum', 'prod', 'min', 'max', 'median', 'mean', 'skew', - 'mad', 'std', 'var', 'sem'] - AGG_FUNCTIONS_WITH_SKIPNA = ['skew', 'mad'] - - def test_regression_whitelist_methods(self): - - # GH6944 - # explicity test the whitelest methods - index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', - 'three']], - labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], - [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], - names=['first', 'second']) - raw_frame = DataFrame(np.random.randn(10, 3), index=index, - columns=Index(['A', 'B', 'C'], name='exp')) - raw_frame.iloc[1, [1, 2]] = np.nan - raw_frame.iloc[7, [0, 1]] = np.nan - - for op, level, axis, skipna in cart_product(self.AGG_FUNCTIONS, - lrange(2), lrange(2), - [True, False]): - - if axis == 0: - frame = raw_frame - else: - frame = raw_frame.T - - if op in self.AGG_FUNCTIONS_WITH_SKIPNA: - grouped = frame.groupby(level=level, axis=axis) - result = getattr(grouped, op)(skipna=skipna) - expected = getattr(frame, op)(level=level, axis=axis, - skipna=skipna) - assert_frame_equal(result, expected) - else: - grouped = frame.groupby(level=level, axis=axis) - result = getattr(grouped, op)() - expected = getattr(frame, op)(level=level, axis=axis) - assert_frame_equal(result, expected) - - def test_groupby_blacklist(self): - from string import ascii_lowercase - letters = np.array(list(ascii_lowercase)) - N = 10 - random_letters = letters.take(np.random.randint(0, 26, N)) - df = DataFrame({'floats': N / 10 * Series(np.random.random(N)), - 'letters': Series(random_letters)}) - s = df.floats - - blacklist = [ - 'eval', 'query', 'abs', 'where', - 'mask', 'align', 'groupby', 'clip', 'astype', - 'at', 'combine', 'consolidate', 'convert_objects', - ] - to_methods = [method for method in dir(df) if method.startswith('to_')] - - blacklist.extend(to_methods) - - # e.g., to_csv - defined_but_not_allowed = ("(?:^Cannot.+{0!r}.+{1!r}.+try using the " - "'apply' method$)") - - # e.g., query, eval - not_defined = "(?:^{1!r} object has no attribute {0!r}$)" - fmt = defined_but_not_allowed + '|' + not_defined - for bl in blacklist: - for obj in (df, s): - gb = obj.groupby(df.letters) - msg = fmt.format(bl, type(gb).__name__) - with tm.assertRaisesRegexp(AttributeError, msg): - getattr(gb, bl) - - def test_tab_completion(self): - grp = self.mframe.groupby(level='second') - results = set([v for v in dir(grp) if not v.startswith('_')]) - expected = set( - ['A', 'B', 'C', 'agg', 'aggregate', 'apply', 'boxplot', 'filter', - 'first', 'get_group', 'groups', 'hist', 'indices', 'last', 'max', - 'mean', 'median', 'min', 'name', 'ngroups', 'nth', 'ohlc', 'plot', - 'prod', 'size', 'std', 'sum', 'transform', 'var', 'sem', 'count', - 'nunique', 'head', 'describe', 'cummax', 'quantile', - 'rank', 'cumprod', 'tail', 'resample', 'cummin', 'fillna', - 'cumsum', 'cumcount', 'all', 'shift', 'skew', 'bfill', 'ffill', - 'take', 'tshift', 'pct_change', 'any', 'mad', 'corr', 'corrwith', - 'cov', 'dtypes', 'ndim', 'diff', 'idxmax', 'idxmin', - 'ffill', 'bfill', 'pad', 'backfill', 'rolling', 'expanding']) - self.assertEqual(results, expected) - def test_lower_int_prec_count(self): df = DataFrame({'a': np.array( [0, 1, 2, 100], np.int8), diff --git a/pandas/tests/groupby/test_whitelist.py b/pandas/tests/groupby/test_whitelist.py new file mode 100644 index 0000000000000..d566f34b7eae8 --- /dev/null +++ b/pandas/tests/groupby/test_whitelist.py @@ -0,0 +1,301 @@ +""" +test methods relating to generic function evaluation +the so-called white/black lists +""" + +import pytest +from string import ascii_lowercase +import numpy as np +from pandas import DataFrame, Series, compat, date_range, Index, MultiIndex +from pandas.util import testing as tm +from pandas.compat import lrange, product + +AGG_FUNCTIONS = ['sum', 'prod', 'min', 'max', 'median', 'mean', 'skew', + 'mad', 'std', 'var', 'sem'] +AGG_FUNCTIONS_WITH_SKIPNA = ['skew', 'mad'] + +df_whitelist = frozenset([ + 'last', + 'first', + 'mean', + 'sum', + 'min', + 'max', + 'head', + 'tail', + 'cumcount', + 'resample', + 'rank', + 'quantile', + 'fillna', + 'mad', + 'any', + 'all', + 'take', + 'idxmax', + 'idxmin', + 'shift', + 'tshift', + 'ffill', + 'bfill', + 'pct_change', + 'skew', + 'plot', + 'boxplot', + 'hist', + 'median', + 'dtypes', + 'corrwith', + 'corr', + 'cov', + 'diff', +]) + +s_whitelist = frozenset([ + 'last', + 'first', + 'mean', + 'sum', + 'min', + 'max', + 'head', + 'tail', + 'cumcount', + 'resample', + 'rank', + 'quantile', + 'fillna', + 'mad', + 'any', + 'all', + 'take', + 'idxmax', + 'idxmin', + 'shift', + 'tshift', + 'ffill', + 'bfill', + 'pct_change', + 'skew', + 'plot', + 'hist', + 'median', + 'dtype', + 'corr', + 'cov', + 'diff', + 'unique', + 'nlargest', + 'nsmallest', +]) + + +@pytest.fixture +def mframe(): + index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', + 'three']], + labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=['first', 'second']) + return DataFrame(np.random.randn(10, 3), index=index, + columns=['A', 'B', 'C']) + + +@pytest.fixture +def df(): + return DataFrame( + {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], + 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], + 'C': np.random.randn(8), + 'D': np.random.randn(8)}) + + +@pytest.fixture +def df_letters(): + letters = np.array(list(ascii_lowercase)) + N = 10 + random_letters = letters.take(np.random.randint(0, 26, N)) + df = DataFrame({'floats': N / 10 * Series(np.random.random(N)), + 'letters': Series(random_letters)}) + return df + + +@pytest.mark.parametrize( + "obj, whitelist", zip((df_letters(), df_letters().floats), + (df_whitelist, s_whitelist))) +def test_groupby_whitelist(df_letters, obj, whitelist): + df = df_letters + + # these are aliases so ok to have the alias __name__ + alias = {'bfill': 'backfill', + 'ffill': 'pad', + 'boxplot': None} + + gb = obj.groupby(df.letters) + + assert whitelist == gb._apply_whitelist + for m in whitelist: + + m = alias.get(m, m) + if m is None: + continue + + f = getattr(type(gb), m) + + # name + try: + n = f.__name__ + except AttributeError: + continue + assert n == m + + # qualname + if compat.PY3: + try: + n = f.__qualname__ + except AttributeError: + continue + assert n.endswith(m) + + +@pytest.fixture +def raw_frame(): + index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', + 'three']], + labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=['first', 'second']) + raw_frame = DataFrame(np.random.randn(10, 3), index=index, + columns=Index(['A', 'B', 'C'], name='exp')) + raw_frame.iloc[1, [1, 2]] = np.nan + raw_frame.iloc[7, [0, 1]] = np.nan + return raw_frame + + +@pytest.mark.parametrize( + "op, level, axis, skipna", + product(AGG_FUNCTIONS, + lrange(2), lrange(2), + [True, False])) +def test_regression_whitelist_methods(raw_frame, op, level, axis, skipna): + # GH6944 + # explicity test the whitelest methods + + if axis == 0: + frame = raw_frame + else: + frame = raw_frame.T + + if op in AGG_FUNCTIONS_WITH_SKIPNA: + grouped = frame.groupby(level=level, axis=axis) + result = getattr(grouped, op)(skipna=skipna) + expected = getattr(frame, op)(level=level, axis=axis, + skipna=skipna) + tm.assert_frame_equal(result, expected) + else: + grouped = frame.groupby(level=level, axis=axis) + result = getattr(grouped, op)() + expected = getattr(frame, op)(level=level, axis=axis) + tm.assert_frame_equal(result, expected) + + +def test_groupby_blacklist(df_letters): + df = df_letters + s = df_letters.floats + + blacklist = [ + 'eval', 'query', 'abs', 'where', + 'mask', 'align', 'groupby', 'clip', 'astype', + 'at', 'combine', 'consolidate', 'convert_objects', + ] + to_methods = [method for method in dir(df) if method.startswith('to_')] + + blacklist.extend(to_methods) + + # e.g., to_csv + defined_but_not_allowed = ("(?:^Cannot.+{0!r}.+{1!r}.+try using the " + "'apply' method$)") + + # e.g., query, eval + not_defined = "(?:^{1!r} object has no attribute {0!r}$)" + fmt = defined_but_not_allowed + '|' + not_defined + for bl in blacklist: + for obj in (df, s): + gb = obj.groupby(df.letters) + msg = fmt.format(bl, type(gb).__name__) + with tm.assertRaisesRegexp(AttributeError, msg): + getattr(gb, bl) + + +def test_tab_completion(mframe): + grp = mframe.groupby(level='second') + results = set([v for v in dir(grp) if not v.startswith('_')]) + expected = set( + ['A', 'B', 'C', 'agg', 'aggregate', 'apply', 'boxplot', 'filter', + 'first', 'get_group', 'groups', 'hist', 'indices', 'last', 'max', + 'mean', 'median', 'min', 'name', 'ngroups', 'nth', 'ohlc', 'plot', + 'prod', 'size', 'std', 'sum', 'transform', 'var', 'sem', 'count', + 'nunique', 'head', 'describe', 'cummax', 'quantile', + 'rank', 'cumprod', 'tail', 'resample', 'cummin', 'fillna', + 'cumsum', 'cumcount', 'all', 'shift', 'skew', + 'take', 'tshift', 'pct_change', 'any', 'mad', 'corr', 'corrwith', + 'cov', 'dtypes', 'ndim', 'diff', 'idxmax', 'idxmin', + 'ffill', 'bfill', 'pad', 'backfill', 'rolling', 'expanding']) + assert results == expected + + +def test_groupby_function_rename(mframe): + grp = mframe.groupby(level='second') + for name in ['sum', 'prod', 'min', 'max', 'first', 'last']: + f = getattr(grp, name) + assert f.__name__ == name + + +def test_groupby_selection_with_methods(df): + # some methods which require DatetimeIndex + rng = date_range('2014', periods=len(df)) + df.index = rng + + g = df.groupby(['A'])[['C']] + g_exp = df[['C']].groupby(df['A']) + # TODO check groupby with > 1 col ? + + # methods which are called as .foo() + methods = ['count', + 'corr', + 'cummax', + 'cummin', + 'cumprod', + 'describe', + 'rank', + 'quantile', + 'diff', + 'shift', + 'all', + 'any', + 'idxmin', + 'idxmax', + 'ffill', + 'bfill', + 'pct_change', + 'tshift'] + + for m in methods: + res = getattr(g, m)() + exp = getattr(g_exp, m)() + + # should always be frames! + tm.assert_frame_equal(res, exp) + + # methods which aren't just .foo() + tm.assert_frame_equal(g.fillna(0), g_exp.fillna(0)) + tm.assert_frame_equal(g.dtypes, g_exp.dtypes) + tm.assert_frame_equal(g.apply(lambda x: x.sum()), + g_exp.apply(lambda x: x.sum())) + + tm.assert_frame_equal(g.resample('D').mean(), g_exp.resample('D').mean()) + tm.assert_frame_equal(g.resample('D').ohlc(), + g_exp.resample('D').ohlc()) + + tm.assert_frame_equal(g.filter(lambda x: len(x) == 3), + g_exp.filter(lambda x: len(x) == 3)) From bd169dc0a91f50031f6c2240075ff84d6b296576 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral Date: Tue, 28 Mar 2017 20:17:41 -0400 Subject: [PATCH 301/353] BUG: Fix index order for Index.intersection() closes #15582 Author: Albert Villanova del Moral Author: Jeff Reback Closes #15583 from albertvillanova/fix-15582 and squashes the following commits: 2d4e143 [Albert Villanova del Moral] Fix pytest fixture name collision 64e86a4 [Albert Villanova del Moral] Fix test on right join 73df69e [Albert Villanova del Moral] Address requested changes 8d2e9cc [Albert Villanova del Moral] Address requested changes 968c7f1 [Jeff Reback] DOC/TST: change to use parameterization 9e39794 [Albert Villanova del Moral] Address requested changes 5bf1508 [Albert Villanova del Moral] Address requested changes 654288b [Albert Villanova del Moral] Fix Travis errors 33eb740 [Albert Villanova del Moral] Address requested changes 3c200fe [Albert Villanova del Moral] Add new tests ef2581e [Albert Villanova del Moral] Fix Travis error f0d9d03 [Albert Villanova del Moral] Add whatsnew c96306d [Albert Villanova del Moral] Add sort argument to Index.join 047b513 [Albert Villanova del Moral] Address requested changes ec836bd [Albert Villanova del Moral] Fix Travis errors b977278 [Albert Villanova del Moral] Address requested changes 784fe75 [Albert Villanova del Moral] Fix error: line too long 1197b99 [Albert Villanova del Moral] Fix DataFrame column order when read from HDF file d9e29f8 [Albert Villanova del Moral] Create new DatetimeIndex from the Index.intersection result e7bcd28 [Albert Villanova del Moral] Fix typo in documentation a4ead99 [Albert Villanova del Moral] Fix typo c2a8dc3 [Albert Villanova del Moral] Implement tests c12bb3f [Albert Villanova del Moral] BUG: Fix index order for Index.intersection() --- doc/source/whatsnew/v0.20.0.txt | 57 +++++++++++ pandas/core/frame.py | 23 +++-- pandas/indexes/base.py | 27 ++++-- pandas/indexes/range.py | 27 ++---- pandas/io/pytables.py | 2 +- pandas/tests/frame/test_join.py | 140 ++++++++++++++++++++++++++++ pandas/tests/frame/test_misc_api.py | 86 ----------------- pandas/tests/indexes/test_base.py | 19 ++-- pandas/tests/tools/test_merge.py | 48 ++++++++++ pandas/tools/merge.py | 3 +- pandas/tseries/index.py | 14 +-- 11 files changed, 309 insertions(+), 137 deletions(-) create mode 100644 pandas/tests/frame/test_join.py diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 787857095044a..2e822729873ad 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -750,6 +750,62 @@ New Behavior: TypeError: Cannot compare 2014-01-01 00:00:00 of type to string column +.. _whatsnew_0200.api_breaking.index_order: + +Index.intersection and inner join now preserve the order of the left Index +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +`:meth:Index.intersection` now preserves the order of the calling ``Index`` (left) +instead of the other ``Index`` (right) (:issue:`15582`). This affects the inner +joins (`:meth:DataFrame.join` and `:func:merge`) and the ``.align`` methods. + +- ``Index.intersection`` + + .. ipython:: python + + left = pd.Index([2, 1, 0]) + left + right = pd.Index([1, 2, 3]) + right + + Previous Behavior: + + .. code-block:: ipython + + In [4]: left.intersection(right) + Out[4]: Int64Index([1, 2], dtype='int64') + + New Behavior: + + .. ipython:: python + + left.intersection(right) + +- ``DataFrame.join`` and ``pd.merge`` + + .. ipython:: python + + left = pd.DataFrame({'a': [20, 10, 0]}, index=[2, 1, 0]) + left + right = pd.DataFrame({'b': [100, 200, 300]}, index=[1, 2, 3]) + right + + Previous Behavior: + + .. code-block:: ipython + + In [4]: left.join(right, how='inner') + Out[4]: + a b + 1 10 100 + 2 20 200 + + New Behavior: + + .. ipython:: python + + left.join(right, how='inner') + .. _whatsnew_0200.api: @@ -984,6 +1040,7 @@ Bug Fixes - Bug in ``DataFrame.to_stata()`` and ``StataWriter`` which produces incorrectly formatted files to be produced for some locales (:issue:`13856`) - Bug in ``StataReader`` and ``StataWriter`` which allows invalid encodings (:issue:`15723`) +- Bug with ``sort=True`` in ``DataFrame.join`` and ``pd.merge`` when joining on indexes (:issue:`15582`) - Bug in ``pd.concat()`` in which concatting with an empty dataframe with ``join='inner'`` was being improperly handled (:issue:`15328`) - Bug in ``groupby.agg()`` incorrectly localizing timezone on ``datetime`` (:issue:`15426`, :issue:`10668`, :issue:`13046`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 90baa1aff4857..03f93f1e53cc8 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -124,10 +124,14 @@ ----------%s right : DataFrame how : {'left', 'right', 'outer', 'inner'}, default 'inner' - * left: use only keys from left frame (SQL: left outer join) - * right: use only keys from right frame (SQL: right outer join) - * outer: use union of keys from both frames (SQL: full outer join) - * inner: use intersection of keys from both frames (SQL: inner join) + * left: use only keys from left frame, similar to a SQL left outer join; + preserve key order + * right: use only keys from right frame, similar to a SQL right outer join; + preserve key order + * outer: use union of keys from both frames, similar to a SQL full outer + join; sort keys lexicographically + * inner: use intersection of keys from both frames, similar to a SQL inner + join; preserve the order of the left keys on : label or list Field names to join on. Must be found in both DataFrames. If on is None and not merging on indexes, then it merges on the intersection of @@ -147,7 +151,8 @@ Use the index from the right DataFrame as the join key. Same caveats as left_index sort : boolean, default False - Sort the join keys lexicographically in the result DataFrame + Sort the join keys lexicographically in the result DataFrame. If False, + the order of the join keys depends on the join type (how keyword) suffixes : 2-length sequence (tuple, list, ...) Suffix to apply to overlapping column names in the left and right side, respectively @@ -4472,16 +4477,18 @@ def join(self, other, on=None, how='left', lsuffix='', rsuffix='', * left: use calling frame's index (or column if on is specified) * right: use other frame's index * outer: form union of calling frame's index (or column if on is - specified) with other frame's index + specified) with other frame's index, and sort it + lexicographically * inner: form intersection of calling frame's index (or column if - on is specified) with other frame's index + on is specified) with other frame's index, preserving the order + of the calling's one lsuffix : string Suffix to use from left frame's overlapping columns rsuffix : string Suffix to use from right frame's overlapping columns sort : boolean, default False Order result DataFrame lexicographically by the join key. If False, - preserves the index order of the calling (left) DataFrame + the order of the join key depends on the join type (how keyword) Notes ----- diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index 54f73a2466286..7f0de963e5c56 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -2089,8 +2089,8 @@ def intersection(self, other): """ Form the intersection of two Index objects. - This returns a new Index with elements common to the index and `other`. - Sortedness of the result is not guaranteed. + This returns a new Index with elements common to the index and `other`, + preserving the order of the calling index. Parameters ---------- @@ -2128,15 +2128,15 @@ def intersection(self, other): pass try: - indexer = Index(self._values).get_indexer(other._values) + indexer = Index(other._values).get_indexer(self._values) indexer = indexer.take((indexer != -1).nonzero()[0]) except: # duplicates - indexer = Index(self._values).get_indexer_non_unique( - other._values)[0].unique() + indexer = Index(other._values).get_indexer_non_unique( + self._values)[0].unique() indexer = indexer[indexer != -1] - taken = self.take(indexer) + taken = other.take(indexer) if self.name != other.name: taken.name = None return taken @@ -2831,8 +2831,7 @@ def _reindex_non_unique(self, target): new_index = self._shallow_copy_with_infer(new_labels, freq=None) return new_index, indexer, new_indexer - def join(self, other, how='left', level=None, return_indexers=False): - """ + _index_shared_docs['join'] = """ *this is an internal non-public method* Compute join_index and indexers to conform data @@ -2844,11 +2843,20 @@ def join(self, other, how='left', level=None, return_indexers=False): how : {'left', 'right', 'inner', 'outer'} level : int or level name, default None return_indexers : boolean, default False + sort : boolean, default False + Sort the join keys lexicographically in the result Index. If False, + the order of the join keys depends on the join type (how keyword) + + .. versionadded:: 0.20.0 Returns ------- join_index, (left_indexer, right_indexer) """ + + @Appender(_index_shared_docs['join']) + def join(self, other, how='left', level=None, return_indexers=False, + sort=False): from .multi import MultiIndex self_is_mi = isinstance(self, MultiIndex) other_is_mi = isinstance(other, MultiIndex) @@ -2929,6 +2937,9 @@ def join(self, other, how='left', level=None, return_indexers=False): elif how == 'outer': join_index = self.union(other) + if sort: + join_index = join_index.sort_values() + if return_indexers: if join_index is self: lindexer = None diff --git a/pandas/indexes/range.py b/pandas/indexes/range.py index 103a3ac2fd5f4..be68c97fb7890 100644 --- a/pandas/indexes/range.py +++ b/pandas/indexes/range.py @@ -431,29 +431,16 @@ def union(self, other): return self._int64index.union(other) - def join(self, other, how='left', level=None, return_indexers=False): - """ - *this is an internal non-public method* - - Compute join_index and indexers to conform data - structures to the new index. - - Parameters - ---------- - other : Index - how : {'left', 'right', 'inner', 'outer'} - level : int or level name, default None - return_indexers : boolean, default False - - Returns - ------- - join_index, (left_indexer, right_indexer) - """ + @Appender(_index_shared_docs['join']) + def join(self, other, how='left', level=None, return_indexers=False, + sort=False): if how == 'outer' and self is not other: # note: could return RangeIndex in more circumstances - return self._int64index.join(other, how, level, return_indexers) + return self._int64index.join(other, how, level, return_indexers, + sort) - return super(RangeIndex, self).join(other, how, level, return_indexers) + return super(RangeIndex, self).join(other, how, level, return_indexers, + sort) def __len__(self): """ diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index b3b253f151541..f75a4761e0948 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -4321,7 +4321,7 @@ def _reindex_axis(obj, axis, labels, other=None): labels = _ensure_index(labels.unique()) if other is not None: - labels = labels & _ensure_index(other.unique()) + labels = _ensure_index(other.unique()) & labels if not labels.equals(ax): slicer = [slice(None, None)] * obj.ndim slicer[axis] = labels diff --git a/pandas/tests/frame/test_join.py b/pandas/tests/frame/test_join.py new file mode 100644 index 0000000000000..f7a510023ca07 --- /dev/null +++ b/pandas/tests/frame/test_join.py @@ -0,0 +1,140 @@ +# -*- coding: utf-8 -*- + +import pytest +import numpy as np + +from pandas import DataFrame, Index +from pandas.tests.frame.common import TestData +import pandas.util.testing as tm + + +@pytest.fixture +def frame(): + return TestData().frame + + +@pytest.fixture +def left(): + return DataFrame({'a': [20, 10, 0]}, index=[2, 1, 0]) + + +@pytest.fixture +def right(): + return DataFrame({'b': [300, 100, 200]}, index=[3, 1, 2]) + + +@pytest.mark.parametrize( + "how, sort, expected", + [('inner', False, DataFrame({'a': [20, 10], + 'b': [200, 100]}, + index=[2, 1])), + ('inner', True, DataFrame({'a': [10, 20], + 'b': [100, 200]}, + index=[1, 2])), + ('left', False, DataFrame({'a': [20, 10, 0], + 'b': [200, 100, np.nan]}, + index=[2, 1, 0])), + ('left', True, DataFrame({'a': [0, 10, 20], + 'b': [np.nan, 100, 200]}, + index=[0, 1, 2])), + ('right', False, DataFrame({'a': [np.nan, 10, 20], + 'b': [300, 100, 200]}, + index=[3, 1, 2])), + ('right', True, DataFrame({'a': [10, 20, np.nan], + 'b': [100, 200, 300]}, + index=[1, 2, 3])), + ('outer', False, DataFrame({'a': [0, 10, 20, np.nan], + 'b': [np.nan, 100, 200, 300]}, + index=[0, 1, 2, 3])), + ('outer', True, DataFrame({'a': [0, 10, 20, np.nan], + 'b': [np.nan, 100, 200, 300]}, + index=[0, 1, 2, 3]))]) +def test_join(left, right, how, sort, expected): + + result = left.join(right, how=how, sort=sort) + tm.assert_frame_equal(result, expected) + + +def test_join_index(frame): + # left / right + + f = frame.loc[frame.index[:10], ['A', 'B']] + f2 = frame.loc[frame.index[5:], ['C', 'D']].iloc[::-1] + + joined = f.join(f2) + tm.assert_index_equal(f.index, joined.index) + expected_columns = Index(['A', 'B', 'C', 'D']) + tm.assert_index_equal(joined.columns, expected_columns) + + joined = f.join(f2, how='left') + tm.assert_index_equal(joined.index, f.index) + tm.assert_index_equal(joined.columns, expected_columns) + + joined = f.join(f2, how='right') + tm.assert_index_equal(joined.index, f2.index) + tm.assert_index_equal(joined.columns, expected_columns) + + # inner + + joined = f.join(f2, how='inner') + tm.assert_index_equal(joined.index, f.index[5:10]) + tm.assert_index_equal(joined.columns, expected_columns) + + # outer + + joined = f.join(f2, how='outer') + tm.assert_index_equal(joined.index, frame.index.sort_values()) + tm.assert_index_equal(joined.columns, expected_columns) + + tm.assertRaisesRegexp(ValueError, 'join method', f.join, f2, how='foo') + + # corner case - overlapping columns + for how in ('outer', 'left', 'inner'): + with tm.assertRaisesRegexp(ValueError, 'columns overlap but ' + 'no suffix'): + frame.join(frame, how=how) + + +def test_join_index_more(frame): + af = frame.loc[:, ['A', 'B']] + bf = frame.loc[::2, ['C', 'D']] + + expected = af.copy() + expected['C'] = frame['C'][::2] + expected['D'] = frame['D'][::2] + + result = af.join(bf) + tm.assert_frame_equal(result, expected) + + result = af.join(bf, how='right') + tm.assert_frame_equal(result, expected[::2]) + + result = bf.join(af, how='right') + tm.assert_frame_equal(result, expected.loc[:, result.columns]) + + +def test_join_index_series(frame): + df = frame.copy() + s = df.pop(frame.columns[-1]) + joined = df.join(s) + + # TODO should this check_names ? + tm.assert_frame_equal(joined, frame, check_names=False) + + s.name = None + tm.assertRaisesRegexp(ValueError, 'must have a name', df.join, s) + + +def test_join_overlap(frame): + df1 = frame.loc[:, ['A', 'B', 'C']] + df2 = frame.loc[:, ['B', 'C', 'D']] + + joined = df1.join(df2, lsuffix='_df1', rsuffix='_df2') + df1_suf = df1.loc[:, ['B', 'C']].add_suffix('_df1') + df2_suf = df2.loc[:, ['B', 'C']].add_suffix('_df2') + + no_overlap = frame.loc[:, ['A', 'D']] + expected = df1_suf.join(df2_suf).join(no_overlap) + + # column order not necessarily sorted + tm.assert_frame_equal(joined, expected.loc[:, joined.columns]) diff --git a/pandas/tests/frame/test_misc_api.py b/pandas/tests/frame/test_misc_api.py index 321d46739b24c..42427df90401d 100644 --- a/pandas/tests/frame/test_misc_api.py +++ b/pandas/tests/frame/test_misc_api.py @@ -57,92 +57,6 @@ def test_get_value(self): expected = self.frame[col][idx] tm.assert_almost_equal(result, expected) - def test_join_index(self): - # left / right - - f = self.frame.reindex(columns=['A', 'B'])[:10] - f2 = self.frame.reindex(columns=['C', 'D']) - - joined = f.join(f2) - self.assert_index_equal(f.index, joined.index) - self.assertEqual(len(joined.columns), 4) - - joined = f.join(f2, how='left') - self.assert_index_equal(joined.index, f.index) - self.assertEqual(len(joined.columns), 4) - - joined = f.join(f2, how='right') - self.assert_index_equal(joined.index, f2.index) - self.assertEqual(len(joined.columns), 4) - - # inner - - f = self.frame.reindex(columns=['A', 'B'])[:10] - f2 = self.frame.reindex(columns=['C', 'D']) - - joined = f.join(f2, how='inner') - self.assert_index_equal(joined.index, f.index.intersection(f2.index)) - self.assertEqual(len(joined.columns), 4) - - # outer - - f = self.frame.reindex(columns=['A', 'B'])[:10] - f2 = self.frame.reindex(columns=['C', 'D']) - - joined = f.join(f2, how='outer') - self.assertTrue(tm.equalContents(self.frame.index, joined.index)) - self.assertEqual(len(joined.columns), 4) - - assertRaisesRegexp(ValueError, 'join method', f.join, f2, how='foo') - - # corner case - overlapping columns - for how in ('outer', 'left', 'inner'): - with assertRaisesRegexp(ValueError, 'columns overlap but ' - 'no suffix'): - self.frame.join(self.frame, how=how) - - def test_join_index_more(self): - af = self.frame.loc[:, ['A', 'B']] - bf = self.frame.loc[::2, ['C', 'D']] - - expected = af.copy() - expected['C'] = self.frame['C'][::2] - expected['D'] = self.frame['D'][::2] - - result = af.join(bf) - assert_frame_equal(result, expected) - - result = af.join(bf, how='right') - assert_frame_equal(result, expected[::2]) - - result = bf.join(af, how='right') - assert_frame_equal(result, expected.loc[:, result.columns]) - - def test_join_index_series(self): - df = self.frame.copy() - s = df.pop(self.frame.columns[-1]) - joined = df.join(s) - - # TODO should this check_names ? - assert_frame_equal(joined, self.frame, check_names=False) - - s.name = None - assertRaisesRegexp(ValueError, 'must have a name', df.join, s) - - def test_join_overlap(self): - df1 = self.frame.loc[:, ['A', 'B', 'C']] - df2 = self.frame.loc[:, ['B', 'C', 'D']] - - joined = df1.join(df2, lsuffix='_df1', rsuffix='_df2') - df1_suf = df1.loc[:, ['B', 'C']].add_suffix('_df1') - df2_suf = df2.loc[:, ['B', 'C']].add_suffix('_df2') - - no_overlap = self.frame.loc[:, ['A', 'D']] - expected = df1_suf.join(df2_suf).join(no_overlap) - - # column order not necessarily sorted - assert_frame_equal(joined, expected.loc[:, joined.columns]) - def test_add_prefix_suffix(self): with_prefix = self.frame.add_prefix('foo#') expected = pd.Index(['foo#%s' % c for c in self.frame.columns]) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index c4dc10d8174cc..a8197b070b032 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -626,14 +626,14 @@ def test_intersection(self): # non monotonic idx1 = Index([5, 3, 2, 4, 1], name='idx') idx2 = Index([4, 7, 6, 5, 3], name='idx') - result2 = idx1.intersection(idx2) - self.assertTrue(tm.equalContents(result2, expected2)) - self.assertEqual(result2.name, expected2.name) + expected = Index([5, 3, 4], name='idx') + result = idx1.intersection(idx2) + self.assert_index_equal(result, expected) - idx3 = Index([4, 7, 6, 5, 3], name='other') - result3 = idx1.intersection(idx3) - self.assertTrue(tm.equalContents(result3, expected3)) - self.assertEqual(result3.name, expected3.name) + idx2 = Index([4, 7, 6, 5, 3], name='other') + expected = Index([5, 3, 4], name=None) + result = idx1.intersection(idx2) + self.assert_index_equal(result, expected) # non-monotonic non-unique idx1 = Index(['A', 'B', 'A', 'C']) @@ -642,6 +642,11 @@ def test_intersection(self): result = idx1.intersection(idx2) self.assert_index_equal(result, expected) + idx2 = Index(['B', 'D', 'A']) + expected = Index(['A', 'B', 'A'], dtype='object') + result = idx1.intersection(idx2) + self.assert_index_equal(result, expected) + # preserve names first = self.strIndex[5:20] second = self.strIndex[:10] diff --git a/pandas/tests/tools/test_merge.py b/pandas/tests/tools/test_merge.py index ff27500355998..8011bc4a1cfc2 100644 --- a/pandas/tests/tools/test_merge.py +++ b/pandas/tests/tools/test_merge.py @@ -1355,3 +1355,51 @@ def test_dtype_on_merged_different(self, change, how, left, right): np.dtype('int64')], index=['X', 'Y', 'Z']) assert_series_equal(result, expected) + + +@pytest.fixture +def left_df(): + return DataFrame({'a': [20, 10, 0]}, index=[2, 1, 0]) + + +@pytest.fixture +def right_df(): + return DataFrame({'b': [300, 100, 200]}, index=[3, 1, 2]) + + +class TestMergeOnIndexes(object): + + @pytest.mark.parametrize( + "how, sort, expected", + [('inner', False, DataFrame({'a': [20, 10], + 'b': [200, 100]}, + index=[2, 1])), + ('inner', True, DataFrame({'a': [10, 20], + 'b': [100, 200]}, + index=[1, 2])), + ('left', False, DataFrame({'a': [20, 10, 0], + 'b': [200, 100, np.nan]}, + index=[2, 1, 0])), + ('left', True, DataFrame({'a': [0, 10, 20], + 'b': [np.nan, 100, 200]}, + index=[0, 1, 2])), + ('right', False, DataFrame({'a': [np.nan, 10, 20], + 'b': [300, 100, 200]}, + index=[3, 1, 2])), + ('right', True, DataFrame({'a': [10, 20, np.nan], + 'b': [100, 200, 300]}, + index=[1, 2, 3])), + ('outer', False, DataFrame({'a': [0, 10, 20, np.nan], + 'b': [np.nan, 100, 200, 300]}, + index=[0, 1, 2, 3])), + ('outer', True, DataFrame({'a': [0, 10, 20, np.nan], + 'b': [np.nan, 100, 200, 300]}, + index=[0, 1, 2, 3]))]) + def test_merge_on_indexes(self, left_df, right_df, how, sort, expected): + + result = pd.merge(left_df, right_df, + left_index=True, + right_index=True, + how=how, + sort=sort) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index 60d523a8ea539..7de2549cadfc7 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -733,7 +733,8 @@ def _get_join_info(self): if self.left_index and self.right_index and self.how != 'asof': join_index, left_indexer, right_indexer = \ - left_ax.join(right_ax, how=self.how, return_indexers=True) + left_ax.join(right_ax, how=self.how, return_indexers=True, + sort=self.sort) elif self.right_index and self.how == 'left': join_index, left_indexer, right_indexer = \ _left_join_on_index(left_ax, right_ax, self.left_join_keys, diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 1992e177556cc..9123131a6dccf 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -1048,7 +1048,8 @@ def union_many(self, others): this.offset = to_offset(this.inferred_freq) return this - def join(self, other, how='left', level=None, return_indexers=False): + def join(self, other, how='left', level=None, return_indexers=False, + sort=False): """ See Index.join """ @@ -1062,7 +1063,7 @@ def join(self, other, how='left', level=None, return_indexers=False): this, other = self._maybe_utc_convert(other) return Index.join(this, other, how=how, level=level, - return_indexers=return_indexers) + return_indexers=return_indexers, sort=sort) def _maybe_utc_convert(self, other): this = self @@ -1214,9 +1215,10 @@ def intersection(self, other): not other.offset.isAnchored() or (not self.is_monotonic or not other.is_monotonic)): result = Index.intersection(self, other) - if isinstance(result, DatetimeIndex): - if result.freq is None: - result.offset = to_offset(result.inferred_freq) + result = self._shallow_copy(result._values, name=result.name, + tz=result.tz, freq=None) + if result.freq is None: + result.offset = to_offset(result.inferred_freq) return result if len(self) == 0: @@ -1539,7 +1541,7 @@ def _get_freq(self): def _set_freq(self, value): self.offset = value freq = property(fget=_get_freq, fset=_set_freq, - doc="get/set the frequncy of the Index") + doc="get/set the frequency of the Index") year = _field_accessor('year', 'Y', "The year of the datetime") month = _field_accessor('month', 'M', From f3e3cfeddac934fdb60ef9f54ea4e06af687014f Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 29 Mar 2017 12:21:02 -0400 Subject: [PATCH 302/353] CI: upload coverage with flags --- .travis.yml | 5 +---- ci/install_travis.sh | 2 +- ci/upload_coverage.sh | 11 +++++++++++ 3 files changed, 13 insertions(+), 5 deletions(-) create mode 100755 ci/upload_coverage.sh diff --git a/.travis.yml b/.travis.yml index 1053f8925ebd7..d864b755541de 100644 --- a/.travis.yml +++ b/.travis.yml @@ -108,7 +108,6 @@ install: - echo "install done" before_script: - - source activate pandas && pip install codecov - ci/install_db_travis.sh script: @@ -120,9 +119,7 @@ script: - echo "script done" after_success: - - if [ "$COVERAGE" ]; then - source activate pandas && codecov --file /tmp/cov-single.xml /tmp/cov-multiple.xml; - fi + - ci/upload_coverage.sh after_script: - echo "after_script start" diff --git a/ci/install_travis.sh b/ci/install_travis.sh index f71df979c9df0..10556ccffa55d 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -115,7 +115,7 @@ if [ "$LINT" ]; then fi if [ "$COVERAGE" ]; then - pip install coverage pytest-cov + pip install coverage pytest-cov codecov fi echo diff --git a/ci/upload_coverage.sh b/ci/upload_coverage.sh new file mode 100755 index 0000000000000..0da8e46a15de1 --- /dev/null +++ b/ci/upload_coverage.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +if [ -z "$COVERAGE" ]; then + echo "no upload of coverage is needed" + exit 0 +fi + +source activate pandas + +codecov --file -c -F single /tmp/cov-single.xml +codecov --file -c -F multiple /tmp/cov-multiple.xml From abf16978cfcb1188ca856819204e6bc7500af179 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 29 Mar 2017 13:02:46 -0400 Subject: [PATCH 303/353] CI: reconfig coverage uploading --- ci/install_travis.sh | 2 +- ci/upload_coverage.sh | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/ci/install_travis.sh b/ci/install_travis.sh index 10556ccffa55d..f71df979c9df0 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -115,7 +115,7 @@ if [ "$LINT" ]; then fi if [ "$COVERAGE" ]; then - pip install coverage pytest-cov codecov + pip install coverage pytest-cov fi echo diff --git a/ci/upload_coverage.sh b/ci/upload_coverage.sh index 0da8e46a15de1..a7ef2fa908079 100755 --- a/ci/upload_coverage.sh +++ b/ci/upload_coverage.sh @@ -1,11 +1,12 @@ #!/bin/bash if [ -z "$COVERAGE" ]; then - echo "no upload of coverage is needed" + echo "coverage is not selected for this build" exit 0 fi source activate pandas -codecov --file -c -F single /tmp/cov-single.xml -codecov --file -c -F multiple /tmp/cov-multiple.xml +echo "uploading coverage" +bash <(curl -s https://codecov.io/bash) -Z -c -F single -f /tmp/cov-single.xml +bash <(curl -s https://codecov.io/bash) -Z -c -F multiple -f /tmp/cov-multiple.xml From ecaeea17f2cba89f41630527c8bf8801447fa09e Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 29 Mar 2017 18:15:43 -0400 Subject: [PATCH 304/353] DOC: update io.rst Author: Jeff Reback Closes #15840 from jreback/io and squashes the following commits: b4ee5dd [Jeff Reback] DOC: update io.rst --- doc/source/io.rst | 50 +++++++++++++++++++---------------------------- 1 file changed, 20 insertions(+), 30 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index e72224c6fa1fe..90167e7c6183f 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -29,36 +29,26 @@ IO Tools (Text, CSV, HDF5, ...) =============================== The pandas I/O API is a set of top level ``reader`` functions accessed like ``pd.read_csv()`` that generally return a ``pandas`` -object. - - * :ref:`read_csv` - * :ref:`read_excel` - * :ref:`read_hdf` - * :ref:`read_feather` - * :ref:`read_sql` - * :ref:`read_json` - * :ref:`read_msgpack` - * :ref:`read_html` - * :ref:`read_gbq` - * :ref:`read_stata` - * :ref:`read_sas` - * :ref:`read_clipboard` - * :ref:`read_pickle` - -The corresponding ``writer`` functions are object methods that are accessed like ``df.to_csv()`` - - * :ref:`to_csv` - * :ref:`to_excel` - * :ref:`to_hdf` - * :ref:`to_feather` - * :ref:`to_sql` - * :ref:`to_json` - * :ref:`to_msgpack` - * :ref:`to_html` - * :ref:`to_gbq` - * :ref:`to_stata` - * :ref:`to_clipboard` - * :ref:`to_pickle` +object. The corresponding ``writer`` functions are object methods that are accessed like ``df.to_csv()`` + +.. csv-table:: + :header: "Format Type", "Data Description", "Reader", "Writer" + :widths: 30, 100, 60, 60 + :delim: ; + + text;`CSV `__;:ref:`read_csv`;:ref:`to_csv` + text;`JSON `__;:ref:`read_json`;:ref:`to_json` + text;`HTML `__;:ref:`read_html`;:ref:`to_html` + text; Local clipboard;:ref:`read_clipboard`;:ref:`to_clipboard` + binary;`MS Excel `__;:ref:`read_excel`;:ref:`to_excel` + binary;`HDF5 Format `__;:ref:`read_hdf`;:ref:`to_hdf` + binary;`Feather Format `__;:ref:`read_feather`;:ref:`to_feather` + binary;`Msgpack `__;:ref:`read_msgpack`;:ref:`to_msgpack` + binary;`Stata `__;:ref:`read_stata`;:ref:`to_stata` + binary;`SAS `__;:ref:`read_sas`; + binary;`Python Pickle Format `__;:ref:`read_pickle`;:ref:`to_pickle` + SQL;`SQL `__;:ref:`read_sql`;:ref:`to_sql` + SQL;`Google Big Query `__;:ref:`read_gbq`;:ref:`to_gbq` :ref:`Here ` is an informal performance comparison for some of these IO methods. From 0ab081345eb191937fd4152eba48b8c9692b02bf Mon Sep 17 00:00:00 2001 From: Brian Date: Wed, 29 Mar 2017 19:24:40 -0400 Subject: [PATCH 305/353] ENH: read_html() handles tables with multiple header rows #13434 closes #13434 Author: Brian Author: S. Brian Huey Closes #15242 from brianhuey/thead-improvement and squashes the following commits: fc1c80e [S. Brian Huey] Merge branch 'master' into thead-improvement b54aa0c [Brian] removed duplicate test case 6ae2860 [Brian] updated docstring and io.rst 41fe8cd [Brian] review changes 873ea58 [Brian] switched from range to lrange cd70225 [Brian] ENH:read_html() handles tables with multiple header rows #13434 --- doc/source/io.rst | 7 ++++--- doc/source/whatsnew/v0.20.0.txt | 13 +++++++------ pandas/io/html.py | 31 ++++++++++++++++++++----------- pandas/tests/io/test_html.py | 12 ++++++++++++ 4 files changed, 43 insertions(+), 20 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index 90167e7c6183f..5cec27c329a7f 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -2222,9 +2222,10 @@ Read a URL and match a table that contains specific text match = 'Metcalf Bank' df_list = pd.read_html(url, match=match) -Specify a header row (by default ```` are used to form the column index, if multiple rows are contained within +```` then a multiindex is created); if specified, the header row is taken +from the data minus the parsed header elements (``
Conda - conda downloads + conda default downloads + +
Conda-forge + + conda-forge downloads
- - appveyor build status + + appveyor build status
`` elements are used to form the column -index); if specified, the header row is taken from the data minus the parsed -header elements (```` elements). +Specify a header row (by default ```` or ```` elements located within a +``
`` elements). .. code-block:: python diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 2e822729873ad..65635edb82163 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -283,7 +283,7 @@ Other Enhancements - ``DataFrame`` has gained a ``nunique()`` method to count the distinct values over an axis (:issue:`14336`). - ``DataFrame.groupby()`` has gained a ``.nunique()`` method to count the distinct values for all columns within each group (:issue:`14336`, :issue:`15197`). -- ``pd.read_excel`` now preserves sheet order when using ``sheetname=None`` (:issue:`9930`) +- ``pd.read_excel()`` now preserves sheet order when using ``sheetname=None`` (:issue:`9930`) - Multiple offset aliases with decimal points are now supported (e.g. '0.5min' is parsed as '30s') (:issue:`8419`) - ``.isnull()`` and ``.notnull()`` have been added to ``Index`` object to make them more consistent with the ``Series`` API (:issue:`15300`) @@ -294,8 +294,8 @@ Other Enhancements - ``pd.cut`` and ``pd.qcut`` now support datetime64 and timedelta64 dtypes (:issue:`14714`, :issue:`14798`) - ``pd.qcut`` has gained the ``duplicates='raise'|'drop'`` option to control whether to raise on duplicated edges (:issue:`7751`) - ``Series`` provides a ``to_excel`` method to output Excel files (:issue:`8825`) -- The ``usecols`` argument in ``pd.read_csv`` now accepts a callable function as a value (:issue:`14154`) -- The ``skiprows`` argument in ``pd.read_csv`` now accepts a callable function as a value (:issue:`10882`) +- The ``usecols`` argument in ``pd.read_csv()`` now accepts a callable function as a value (:issue:`14154`) +- The ``skiprows`` argument in ``pd.read_csv()`` now accepts a callable function as a value (:issue:`10882`) - The ``nrows`` and ``chunksize`` arguments in ``pd.read_csv()`` are supported if both are passed (:issue:`6774`, :issue:`15755`) - ``pd.DataFrame.plot`` now prints a title above each subplot if ``suplots=True`` and ``title`` is a list of strings (:issue:`14753`) - ``pd.Series.interpolate`` now supports timedelta as an index type with ``method='time'`` (:issue:`6424`) @@ -309,6 +309,7 @@ Other Enhancements - ``pandas.tools.hashing`` has gained a ``hash_tuples`` routine, and ``hash_pandas_object`` has gained the ability to hash a ``MultiIndex`` (:issue:`15224`) - ``Series/DataFrame.squeeze()`` have gained the ``axis`` parameter. (:issue:`15339`) - ``DataFrame.to_excel()`` has a new ``freeze_panes`` parameter to turn on Freeze Panes when exporting to Excel (:issue:`15160`) +- ``pd.read_html()`` will parse multiple header rows, creating a multiindex header. (:issue:`13434`). - HTML table output skips ``colspan`` or ``rowspan`` attribute if equal to 1. (:issue:`15403`) - ``pd.TimedeltaIndex`` now has a custom datetick formatter specifically designed for nanosecond level precision (:issue:`8711`) @@ -813,7 +814,7 @@ Other API Changes ^^^^^^^^^^^^^^^^^ - ``numexpr`` version is now required to be >= 2.4.6 and it will not be used at all if this requisite is not fulfilled (:issue:`15213`). -- ``CParserError`` has been renamed to ``ParserError`` in ``pd.read_csv`` and will be removed in the future (:issue:`12665`) +- ``CParserError`` has been renamed to ``ParserError`` in ``pd.read_csv()`` and will be removed in the future (:issue:`12665`) - ``SparseArray.cumsum()`` and ``SparseSeries.cumsum()`` will now always return ``SparseArray`` and ``SparseSeries`` respectively (:issue:`12855`) - ``DataFrame.applymap()`` with an empty ``DataFrame`` will return a copy of the empty ``DataFrame`` instead of a ``Series`` (:issue:`8222`) - ``.loc`` has compat with ``.ix`` for accepting iterators, and NamedTuples (:issue:`15120`) @@ -926,7 +927,7 @@ Bug Fixes - Bug in ``pd.to_numeric()`` in which float and unsigned integer elements were being improperly casted (:issue:`14941`, :issue:`15005`) - Cleaned up ``PeriodIndex`` constructor, including raising on floats more consistently (:issue:`13277`) - Bug in ``pd.read_csv()`` in which the ``dialect`` parameter was not being verified before processing (:issue:`14898`) -- Bug in ``pd.read_fwf`` where the skiprows parameter was not being respected during column width inference (:issue:`11256`) +- Bug in ``pd.read_fwf()`` where the skiprows parameter was not being respected during column width inference (:issue:`11256`) - Bug in ``pd.read_csv()`` in which missing data was being improperly handled with ``usecols`` (:issue:`6710`) - Bug in ``pd.read_csv()`` in which a file containing a row with many columns followed by rows with fewer columns would cause a crash (:issue:`14125`) - Added checks in ``pd.read_csv()`` ensuring that values for ``nrows`` and ``chunksize`` are valid (:issue:`15767`) @@ -1054,4 +1055,4 @@ Bug Fixes - Bug in ``DataFrame.boxplot`` where ``fontsize`` was not applied to the tick labels on both axes (:issue:`15108`) - Bug in ``pd.melt()`` where passing a tuple value for ``value_vars`` caused a ``TypeError`` (:issue:`15348`) - Bug in ``.eval()`` which caused multiline evals to fail with local variables not on the first line (:issue:`15342`) -- Bug in ``pd.read_msgpack`` which did not allow to load dataframe with an index of type ``CategoricalIndex`` (:issue:`15487`) +- Bug in ``pd.read_msgpack()`` which did not allow to load dataframe with an index of type ``CategoricalIndex`` (:issue:`15487`) diff --git a/pandas/io/html.py b/pandas/io/html.py index 53595b94eb94d..8a3709dba2176 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -355,9 +355,12 @@ def _parse_raw_thead(self, table): thead = self._parse_thead(table) res = [] if thead: - res = lmap(self._text_getter, self._parse_th(thead[0])) - return np.atleast_1d( - np.array(res).squeeze()) if res and len(res) == 1 else res + trs = self._parse_tr(thead[0]) + for tr in trs: + cols = lmap(self._text_getter, self._parse_td(tr)) + if any([col != '' for col in cols]): + res.append(cols) + return res def _parse_raw_tfoot(self, table): tfoot = self._parse_tfoot(table) @@ -591,9 +594,17 @@ def _parse_tfoot(self, table): return table.xpath('.//tfoot') def _parse_raw_thead(self, table): - expr = './/thead//th' - return [_remove_whitespace(x.text_content()) for x in - table.xpath(expr)] + expr = './/thead' + thead = table.xpath(expr) + res = [] + if thead: + trs = self._parse_tr(thead[0]) + for tr in trs: + cols = [_remove_whitespace(x.text_content()) for x in + self._parse_td(tr)] + if any([col != '' for col in cols]): + res.append(cols) + return res def _parse_raw_tfoot(self, table): expr = './/tfoot//th|//tfoot//td' @@ -615,19 +626,17 @@ def _data_to_frame(**kwargs): head, body, foot = kwargs.pop('data') header = kwargs.pop('header') kwargs['skiprows'] = _get_skiprows(kwargs['skiprows']) - if head: - body = [head] + body - + rows = lrange(len(head)) + body = head + body if header is None: # special case when a table has elements - header = 0 + header = 0 if rows == [0] else rows if foot: body += [foot] # fill out elements of body that are "ragged" _expand_elements(body) - tp = TextParser(body, header=header, **kwargs) df = tp.read() return df diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index c1a2a4545a6f9..4aa85c0f63a68 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -760,6 +760,18 @@ def test_keep_default_na(self): html_df = read_html(html_data, keep_default_na=True)[0] tm.assert_frame_equal(expected_df, html_df) + def test_multiple_header_rows(self): + # Issue #13434 + expected_df = DataFrame(data=[("Hillary", 68, "D"), + ("Bernie", 74, "D"), + ("Donald", 69, "R")]) + expected_df.columns = [["Unnamed: 0_level_0", "Age", "Party"], + ["Name", "Unnamed: 1_level_1", + "Unnamed: 2_level_1"]] + html = expected_df.to_html(index=False) + html_df = read_html(html, )[0] + tm.assert_frame_equal(expected_df, html_df) + def _lang_enc(filename): return os.path.splitext(os.path.basename(filename))[0].split('_') From de589c23e0ae79d9cae59674259dc2707513795f Mon Sep 17 00:00:00 2001 From: Robin Date: Wed, 29 Mar 2017 19:45:05 -0400 Subject: [PATCH 306/353] BUG: Return mode even if single value (#15714) Author: Robin This patch had conflicts when merged, resolved by Committer: Jeff Reback Closes #15744 from buyology/issue-15714-fix-mode and squashes the following commits: 8c08cd5 [Robin] Added multi-test and whatsnew note 5f36395 [Robin] Fixed flake issues, removed duplicate test, inserted GH issue number reference 5f829e1 [Robin] Merge conflict 0e2dec0 [Robin] Fixed tests 26db131 [Robin] Return mode even if single value (#15714) 44dbbb2 [Robin] Return mode even if single value (#15714) --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/_libs/hashtable_func_helper.pxi.in | 2 +- pandas/core/categorical.py | 3 +- pandas/core/frame.py | 5 ++-- pandas/core/series.py | 3 +- pandas/tests/frame/test_analytics.py | 35 +++++++++++------------ pandas/tests/series/test_analytics.py | 13 +++++---- pandas/tests/test_algos.py | 33 ++++++++++++++++----- pandas/tests/test_categorical.py | 6 ++-- 9 files changed, 58 insertions(+), 43 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 65635edb82163..023d41763baee 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1050,6 +1050,7 @@ Bug Fixes - Bug in ``.read_csv()`` with ``parse_dates`` when multiline headers are specified (:issue:`15376`) - Bug in ``groupby.transform()`` that would coerce the resultant dtypes back to the original (:issue:`10972`, :issue:`11444`) +- Bug in ``.mode()`` where ``mode`` was not returned if was only a single value (:issue:`15714`) - Bug in ``DataFrame.hist`` where ``plt.tight_layout`` caused an ``AttributeError`` (use ``matplotlib >= 2.0.1``) (:issue:`9351`) - Bug in ``DataFrame.boxplot`` where ``fontsize`` was not applied to the tick labels on both axes (:issue:`15108`) diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in index 0608af8f8504b..c97639481f12c 100644 --- a/pandas/_libs/hashtable_func_helper.pxi.in +++ b/pandas/_libs/hashtable_func_helper.pxi.in @@ -309,7 +309,7 @@ def mode_{{dtype}}(ndarray[{{ctype}}] values): def mode_{{dtype}}({{ctype}}[:] values): {{endif}} cdef: - int count, max_count = 2 + int count, max_count = 1 int j = -1 # so you can do += Py_ssize_t k kh_{{table_type}}_t *table diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 632c24c33feb7..0fcf8664e755d 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -1868,8 +1868,7 @@ def mode(self): """ Returns the mode(s) of the Categorical. - Empty if nothing occurs at least 2 times. Always returns `Categorical` - even if only one value. + Always returns `Categorical` even if only one value. Returns ------- diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 03f93f1e53cc8..ffae22447cc65 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5127,9 +5127,8 @@ def _get_agg_axis(self, axis_num): def mode(self, axis=0, numeric_only=False): """ - Gets the mode(s) of each element along the axis selected. Empty if - nothing has 2+ occurrences. Adds a row for each mode per label, fills - in gaps with nan. + Gets the mode(s) of each element along the axis selected. Adds a row + for each mode per label, fills in gaps with nan. Note that there could be multiple values returned for the selected axis (when more than one item share the maximum frequency), which is diff --git a/pandas/core/series.py b/pandas/core/series.py index 0913592e055cd..bcc1ed272b081 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1192,8 +1192,7 @@ def count(self, level=None): def mode(self): """Return the mode(s) of the dataset. - Empty if nothing occurs at least 2 times. Always returns Series even - if only one value is returned. + Always returns Series even if only one value is returned. Returns ------- diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 735d3786e6a54..aa15e9fbab4cc 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -789,18 +789,23 @@ def test_mode(self): "E": [8, 8, 1, 1, 3, 3]}) tm.assert_frame_equal(df[["A"]].mode(), pd.DataFrame({"A": [12]})) - expected = pd.Series([], dtype='int64', name='D').to_frame() + expected = pd.Series([0, 1, 2, 3, 4, 5], dtype='int64', name='D').\ + to_frame() tm.assert_frame_equal(df[["D"]].mode(), expected) expected = pd.Series([1, 3, 8], dtype='int64', name='E').to_frame() tm.assert_frame_equal(df[["E"]].mode(), expected) tm.assert_frame_equal(df[["A", "B"]].mode(), pd.DataFrame({"A": [12], "B": [10.]})) tm.assert_frame_equal(df.mode(), - pd.DataFrame({"A": [12, np.nan, np.nan], - "B": [10, np.nan, np.nan], - "C": [8, 9, np.nan], - "D": [np.nan, np.nan, np.nan], - "E": [1, 3, 8]})) + pd.DataFrame({"A": [12, np.nan, np.nan, np.nan, + np.nan, np.nan], + "B": [10, np.nan, np.nan, np.nan, + np.nan, np.nan], + "C": [8, 9, np.nan, np.nan, np.nan, + np.nan], + "D": [0, 1, 2, 3, 4, 5], + "E": [1, 3, 8, np.nan, np.nan, + np.nan]})) # outputs in sorted order df["C"] = list(reversed(df["C"])) @@ -817,20 +822,12 @@ def test_mode(self): df = pd.DataFrame({"A": np.arange(6, dtype='int64'), "B": pd.date_range('2011', periods=6), "C": list('abcdef')}) - exp = pd.DataFrame({"A": pd.Series([], dtype=df["A"].dtype), - "B": pd.Series([], dtype=df["B"].dtype), - "C": pd.Series([], dtype=df["C"].dtype)}) - tm.assert_frame_equal(df.mode(), exp) - - # and also when not empty - df.loc[1, "A"] = 0 - df.loc[4, "B"] = df.loc[3, "B"] - df.loc[5, "C"] = 'e' - exp = pd.DataFrame({"A": pd.Series([0], dtype=df["A"].dtype), - "B": pd.Series([df.loc[3, "B"]], + exp = pd.DataFrame({"A": pd.Series(np.arange(6, dtype='int64'), + dtype=df["A"].dtype), + "B": pd.Series(pd.date_range('2011', periods=6), dtype=df["B"].dtype), - "C": pd.Series(['e'], dtype=df["C"].dtype)}) - + "C": pd.Series(list('abcdef'), + dtype=df["C"].dtype)}) tm.assert_frame_equal(df.mode(), exp) def test_operators_timedelta64(self): diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index dc71fafb1094f..b747a680c17dd 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -130,10 +130,10 @@ def test_mode(self): exp = Series([], dtype=np.float64) tm.assert_series_equal(Series([]).mode(), exp) - exp = Series([], dtype=np.int64) + exp = Series([1], dtype=np.int64) tm.assert_series_equal(Series([1]).mode(), exp) - exp = Series([], dtype=np.object) + exp = Series(['a', 'b', 'c'], dtype=np.object) tm.assert_series_equal(Series(['a', 'b', 'c']).mode(), exp) # Test numerical data types. @@ -169,7 +169,8 @@ def test_mode(self): tm.assert_series_equal(s.mode(), exp) # Test datetime types. - exp = Series([], dtype="M8[ns]") + exp = Series(['1900-05-03', '2011-01-03', + '2013-01-02'], dtype='M8[ns]') s = Series(['2011-01-03', '2013-01-02', '1900-05-03'], dtype='M8[ns]') tm.assert_series_equal(s.mode(), exp) @@ -180,7 +181,7 @@ def test_mode(self): tm.assert_series_equal(s.mode(), exp) # gh-5986: Test timedelta types. - exp = Series([], dtype='timedelta64[ns]') + exp = Series(['-1 days', '0 days', '1 days'], dtype='timedelta64[ns]') s = Series(['1 days', '-1 days', '0 days'], dtype='timedelta64[ns]') tm.assert_series_equal(s.mode(), exp) @@ -200,13 +201,13 @@ def test_mode(self): s = Series([1, 2**63, 2**63], dtype=np.uint64) tm.assert_series_equal(s.mode(), exp) - exp = Series([], dtype=np.uint64) + exp = Series([1, 2**63], dtype=np.uint64) s = Series([1, 2**63], dtype=np.uint64) tm.assert_series_equal(s.mode(), exp) # Test category dtype. c = Categorical([1, 2]) - exp = Categorical([], categories=[1, 2]) + exp = Categorical([1, 2], categories=[1, 2]) exp = Series(exp, dtype='category') tm.assert_series_equal(Series(c).mode(), exp) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 5d69746034346..ac3a42c3cf122 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -1261,10 +1261,27 @@ def test_no_mode(self): exp = Series([], dtype=np.float64) tm.assert_series_equal(algos.mode([]), exp) - exp = Series([], dtype=np.int) + # GH 15714 + def test_mode_single(self): + exp_single = [1] + data_single = [1] + + exp_multi = [1] + data_multi = [1, 1] + + for dt in np.typecodes['AllInteger'] + np.typecodes['Float']: + s = Series(data_single, dtype=dt) + exp = Series(exp_single, dtype=dt) + tm.assert_series_equal(algos.mode(s), exp) + + s = Series(data_multi, dtype=dt) + exp = Series(exp_multi, dtype=dt) + tm.assert_series_equal(algos.mode(s), exp) + + exp = Series([1], dtype=np.int) tm.assert_series_equal(algos.mode([1]), exp) - exp = Series([], dtype=np.object) + exp = Series(['a', 'b', 'c'], dtype=np.object) tm.assert_series_equal(algos.mode(['a', 'b', 'c']), exp) def test_number_mode(self): @@ -1300,7 +1317,8 @@ def test_strobj_mode(self): tm.assert_series_equal(algos.mode(s), exp) def test_datelike_mode(self): - exp = Series([], dtype="M8[ns]") + exp = Series(['1900-05-03', '2011-01-03', + '2013-01-02'], dtype="M8[ns]") s = Series(['2011-01-03', '2013-01-02', '1900-05-03'], dtype='M8[ns]') tm.assert_series_equal(algos.mode(s), exp) @@ -1311,7 +1329,8 @@ def test_datelike_mode(self): tm.assert_series_equal(algos.mode(s), exp) def test_timedelta_mode(self): - exp = Series([], dtype='timedelta64[ns]') + exp = Series(['-1 days', '0 days', '1 days'], + dtype='timedelta64[ns]') s = Series(['1 days', '-1 days', '0 days'], dtype='timedelta64[ns]') tm.assert_series_equal(algos.mode(s), exp) @@ -1331,13 +1350,13 @@ def test_uint64_overflow(self): s = Series([1, 2**63, 2**63], dtype=np.uint64) tm.assert_series_equal(algos.mode(s), exp) - exp = Series([], dtype=np.uint64) + exp = Series([1, 2**63], dtype=np.uint64) s = Series([1, 2**63], dtype=np.uint64) tm.assert_series_equal(algos.mode(s), exp) def test_categorical(self): c = Categorical([1, 2]) - exp = Series([], dtype=np.int64) + exp = Series([1, 2], dtype=np.int64) tm.assert_series_equal(algos.mode(c), exp) c = Categorical([1, 'a', 'a']) @@ -1350,7 +1369,7 @@ def test_categorical(self): def test_index(self): idx = Index([1, 2, 3]) - exp = Series([], dtype=np.int64) + exp = Series([1, 2, 3], dtype=np.int64) tm.assert_series_equal(algos.mode(idx), exp) idx = Index([1, 'a', 'a']) diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index b4072d04dfd81..ea2697ec19df3 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -1279,13 +1279,13 @@ def test_mode(self): s = Categorical([1, 2, 3, 4, 5], categories=[5, 4, 3, 2, 1], ordered=True) res = s.mode() - exp = Categorical([], categories=[5, 4, 3, 2, 1], ordered=True) + exp = Categorical([5, 4, 3, 2, 1], categories=[5, 4, 3, 2, 1], ordered=True) tm.assert_categorical_equal(res, exp) # NaN should not become the mode! s = Categorical([np.nan, np.nan, np.nan, 4, 5], categories=[5, 4, 3, 2, 1], ordered=True) res = s.mode() - exp = Categorical([], categories=[5, 4, 3, 2, 1], ordered=True) + exp = Categorical([5, 4], categories=[5, 4, 3, 2, 1], ordered=True) tm.assert_categorical_equal(res, exp) s = Categorical([np.nan, np.nan, np.nan, 4, 5, 4], categories=[5, 4, 3, 2, 1], ordered=True) @@ -2833,7 +2833,7 @@ def test_mode(self): s = Series(Categorical([1, 2, 3, 4, 5], categories=[5, 4, 3, 2, 1], ordered=True)) res = s.mode() - exp = Series(Categorical([], categories=[5, 4, 3, 2, 1], ordered=True)) + exp = Series(Categorical([5, 4, 3, 2, 1], categories=[5, 4, 3, 2, 1], ordered=True)) tm.assert_series_equal(res, exp) def test_value_counts(self): From 046d3be54970bb7ff99d7ebfd307d93e41eeb7ee Mon Sep 17 00:00:00 2001 From: gfyoung Date: Thu, 30 Mar 2017 07:53:11 -0400 Subject: [PATCH 307/353] CLN: Remove "flake8: noqa" from files Just some minor house-cleaning to cut down on the number of search results found here. Author: gfyoung Closes #15842 from gfyoung/flake8-noqa-clean and squashes the following commits: 5d1edeb [gfyoung] CLN: Make test_compat.py flake8-able f9079ff [gfyoung] CLN: Make exceptions.py flake8-able 0e236f5 [gfyoung] CLN: Make test_format.py flake8-able --- pandas/tests/computation/test_compat.py | 6 +- pandas/tests/formats/test_format.py | 255 ++++++++++++++++-------- pandas/util/clipboard/exceptions.py | 1 - 3 files changed, 177 insertions(+), 85 deletions(-) diff --git a/pandas/tests/computation/test_compat.py b/pandas/tests/computation/test_compat.py index 59bdde83aedd8..56a7cab730f1f 100644 --- a/pandas/tests/computation/test_compat.py +++ b/pandas/tests/computation/test_compat.py @@ -1,8 +1,4 @@ - -# flake8: noqa - import pytest -from itertools import product from distutils.version import LooseVersion import pandas as pd @@ -32,7 +28,7 @@ def test_compat(): @pytest.mark.parametrize('parser', expr._parsers) def test_invalid_numexpr_version(engine, parser): def testit(): - a, b = 1, 2 + a, b = 1, 2 # noqa res = pd.eval('a + b', engine=engine, parser=parser) tm.assert_equal(res, 3) diff --git a/pandas/tests/formats/test_format.py b/pandas/tests/formats/test_format.py index 44a7f2b45e759..83458c82a3d7c 100644 --- a/pandas/tests/formats/test_format.py +++ b/pandas/tests/formats/test_format.py @@ -1,13 +1,9 @@ # -*- coding: utf-8 -*- """ -test output formatting for Series/DataFrame -including to_string & reprs +Test output formatting for Series/DataFrame, including to_string & reprs """ -# TODO(wesm): lots of issues making flake8 hard -# flake8: noqa - from __future__ import print_function import re @@ -57,8 +53,14 @@ def has_info_repr(df): def has_non_verbose_info_repr(df): has_info = has_info_repr(df) r = repr(df) - nv = len(r.split( - '\n')) == 6 # 1. , 2. Index, 3. Columns, 4. dtype, 5. memory usage, 6. trailing newline + + # 1. + # 2. Index + # 3. Columns + # 4. dtype + # 5. memory usage + # 6. trailing newline + nv = len(r.split('\n')) == 6 return has_info and nv @@ -477,7 +479,7 @@ def test_east_asian_unicode_frame(self): if PY3: _rep = repr else: - _rep = unicode + _rep = unicode # noqa # not alighned properly because of east asian width @@ -529,27 +531,39 @@ def test_east_asian_unicode_frame(self): # index name df = DataFrame({'a': [u'あああああ', u'い', u'う', u'えええ'], 'b': [u'あ', u'いいい', u'う', u'ええええええ']}, - index=pd.Index([u'あ', u'い', u'うう', u'え'], name=u'おおおお')) - expected = (u" a b\nおおおお \nあ あああああ あ\n" - u"い い いいい\nうう う う\nえ えええ ええええええ" - ) + index=pd.Index([u'あ', u'い', u'うう', u'え'], + name=u'おおおお')) + expected = (u" a b\n" + u"おおおお \n" + u"あ あああああ あ\n" + u"い い いいい\n" + u"うう う う\n" + u"え えええ ええええええ") self.assertEqual(_rep(df), expected) # all df = DataFrame({u'あああ': [u'あああ', u'い', u'う', u'えええええ'], u'いいいいい': [u'あ', u'いいい', u'う', u'ええ']}, - index=pd.Index([u'あ', u'いいい', u'うう', u'え'], name=u'お')) - expected = (u" あああ いいいいい\nお \nあ あああ あ\n" - u"いいい い いいい\nうう う う\nえ えええええ ええ") + index=pd.Index([u'あ', u'いいい', u'うう', u'え'], + name=u'お')) + expected = (u" あああ いいいいい\n" + u"お \n" + u"あ あああ あ\n" + u"いいい い いいい\n" + u"うう う う\n" + u"え えええええ ええ") self.assertEqual(_rep(df), expected) # MultiIndex idx = pd.MultiIndex.from_tuples([(u'あ', u'いい'), (u'う', u'え'), ( u'おおお', u'かかかか'), (u'き', u'くく')]) df = DataFrame({'a': [u'あああああ', u'い', u'う', u'えええ'], - 'b': [u'あ', u'いいい', u'う', u'ええええええ']}, index=idx) - expected = (u" a b\nあ いい あああああ あ\n" - u"う え い いいい\nおおお かかかか う う\n" + 'b': [u'あ', u'いいい', u'う', u'ええええええ']}, + index=idx) + expected = (u" a b\n" + u"あ いい あああああ あ\n" + u"う え い いいい\n" + u"おおお かかかか う う\n" u"き くく えええ ええええええ") self.assertEqual(_rep(df), expected) @@ -597,18 +611,21 @@ def test_east_asian_unicode_frame(self): df = DataFrame({'a': [u'あああああ', u'い', u'う', u'えええ'], 'b': [u'あ', u'いいい', u'う', u'ええええええ']}, index=['a', 'bb', 'c', 'ddd']) - expected = (u" a b\na あああああ あ\n" - u"bb い いいい\nc う う\n" - u"ddd えええ ええええええ" - "") + expected = (u" a b\n" + u"a あああああ あ\n" + u"bb い いいい\n" + u"c う う\n" + u"ddd えええ ええええええ") self.assertEqual(_rep(df), expected) # column name df = DataFrame({u'あああああ': [1, 222, 33333, 4], 'b': [u'あ', u'いいい', u'う', u'ええええええ']}, index=['a', 'bb', 'c', 'ddd']) - expected = (u" b あああああ\na あ 1\n" - u"bb いいい 222\nc う 33333\n" + expected = (u" b あああああ\n" + u"a あ 1\n" + u"bb いいい 222\n" + u"c う 33333\n" u"ddd ええええええ 4") self.assertEqual(_rep(df), expected) @@ -616,37 +633,49 @@ def test_east_asian_unicode_frame(self): df = DataFrame({'a': [u'あああああ', u'い', u'う', u'えええ'], 'b': [u'あ', u'いいい', u'う', u'ええええええ']}, index=[u'あああ', u'いいいいいい', u'うう', u'え']) - expected = (u" a b\nあああ あああああ あ\n" - u"いいいいいい い いいい\nうう う う\n" + expected = (u" a b\n" + u"あああ あああああ あ\n" + u"いいいいいい い いいい\n" + u"うう う う\n" u"え えええ ええええええ") self.assertEqual(_rep(df), expected) # index name df = DataFrame({'a': [u'あああああ', u'い', u'う', u'えええ'], 'b': [u'あ', u'いいい', u'う', u'ええええええ']}, - index=pd.Index([u'あ', u'い', u'うう', u'え'], name=u'おおおお')) - expected = (u" a b\nおおおお \n" - u"あ あああああ あ\nい い いいい\n" - u"うう う う\nえ えええ ええええええ" - ) + index=pd.Index([u'あ', u'い', u'うう', u'え'], + name=u'おおおお')) + expected = (u" a b\n" + u"おおおお \n" + u"あ あああああ あ\n" + u"い い いいい\n" + u"うう う う\n" + u"え えええ ええええええ") self.assertEqual(_rep(df), expected) # all df = DataFrame({u'あああ': [u'あああ', u'い', u'う', u'えええええ'], u'いいいいい': [u'あ', u'いいい', u'う', u'ええ']}, - index=pd.Index([u'あ', u'いいい', u'うう', u'え'], name=u'お')) - expected = (u" あああ いいいいい\nお \n" - u"あ あああ あ\nいいい い いいい\n" - u"うう う う\nえ えええええ ええ") + index=pd.Index([u'あ', u'いいい', u'うう', u'え'], + name=u'お')) + expected = (u" あああ いいいいい\n" + u"お \n" + u"あ あああ あ\n" + u"いいい い いいい\n" + u"うう う う\n" + u"え えええええ ええ") self.assertEqual(_rep(df), expected) # MultiIndex idx = pd.MultiIndex.from_tuples([(u'あ', u'いい'), (u'う', u'え'), ( u'おおお', u'かかかか'), (u'き', u'くく')]) df = DataFrame({'a': [u'あああああ', u'い', u'う', u'えええ'], - 'b': [u'あ', u'いいい', u'う', u'ええええええ']}, index=idx) - expected = (u" a b\nあ いい あああああ あ\n" - u"う え い いいい\nおおお かかかか う う\n" + 'b': [u'あ', u'いいい', u'う', u'ええええええ']}, + index=idx) + expected = (u" a b\n" + u"あ いい あああああ あ\n" + u"う え い いいい\n" + u"おおお かかかか う う\n" u"き くく えええ ええええええ") self.assertEqual(_rep(df), expected) @@ -660,14 +689,18 @@ def test_east_asian_unicode_frame(self): u'ああああ': [u'さ', u'し', u'す', u'せ']}, columns=['a', 'b', 'c', u'ああああ']) - expected = (u" a ... ああああ\n0 あああああ ... さ\n" - u".. ... ... ...\n3 えええ ... せ\n" + expected = (u" a ... ああああ\n" + u"0 あああああ ... さ\n" + u".. ... ... ...\n" + u"3 えええ ... せ\n" u"\n[4 rows x 4 columns]") self.assertEqual(_rep(df), expected) df.index = [u'あああ', u'いいいい', u'う', 'aaa'] - expected = (u" a ... ああああ\nあああ あああああ ... さ\n" - u"... ... ... ...\naaa えええ ... せ\n" + expected = (u" a ... ああああ\n" + u"あああ あああああ ... さ\n" + u"... ... ... ...\n" + u"aaa えええ ... せ\n" u"\n[4 rows x 4 columns]") self.assertEqual(_rep(df), expected) @@ -675,8 +708,10 @@ def test_east_asian_unicode_frame(self): df = DataFrame({u'あああああ': [1, 222, 33333, 4], 'b': [u'あ', u'いいい', u'¡¡', u'ええええええ']}, index=['a', 'bb', 'c', '¡¡¡']) - expected = (u" b あああああ\na あ 1\n" - u"bb いいい 222\nc ¡¡ 33333\n" + expected = (u" b あああああ\n" + u"a あ 1\n" + u"bb いいい 222\n" + u"c ¡¡ 33333\n" u"¡¡¡ ええええええ 4") self.assertEqual(_rep(df), expected) @@ -753,7 +788,8 @@ def test_truncate_with_different_dtypes(self): # 11594 import datetime s = Series([datetime.datetime(2012, 1, 1)] * 10 + - [datetime.datetime(1012, 1, 2)] + [datetime.datetime(2012, 1, 3)] * 10) + [datetime.datetime(1012, 1, 2)] + [ + datetime.datetime(2012, 1, 3)] * 10) with pd.option_context('display.max_rows', 8): result = str(s) @@ -762,7 +798,8 @@ def test_truncate_with_different_dtypes(self): # 12045 df = DataFrame({'text': ['some words'] + [None] * 9}) - with pd.option_context('display.max_rows', 8, 'display.max_columns', 3): + with pd.option_context('display.max_rows', 8, + 'display.max_columns', 3): result = str(df) self.assertTrue('None' in result) self.assertFalse('NaN' in result) @@ -771,7 +808,8 @@ def test_datetimelike_frame(self): # GH 12211 df = DataFrame( - {'date': [pd.Timestamp('20130101').tz_localize('UTC')] + [pd.NaT] * 5}) + {'date': [pd.Timestamp('20130101').tz_localize('UTC')] + + [pd.NaT] * 5}) with option_context("display.max_rows", 5): result = str(df) @@ -1019,21 +1057,24 @@ def test_index_with_nan(self): y = df.set_index(['id1', 'id2', 'id3']) result = y.to_string() expected = u( - ' value\nid1 id2 id3 \n1a3 NaN 78d 123\n9h4 d67 79d 64') + ' value\nid1 id2 id3 \n' + '1a3 NaN 78d 123\n9h4 d67 79d 64') self.assertEqual(result, expected) # index y = df.set_index('id2') result = y.to_string() expected = u( - ' id1 id3 value\nid2 \nNaN 1a3 78d 123\nd67 9h4 79d 64') + ' id1 id3 value\nid2 \n' + 'NaN 1a3 78d 123\nd67 9h4 79d 64') self.assertEqual(result, expected) # with append (this failed in 0.12) y = df.set_index(['id1', 'id2']).set_index('id3', append=True) result = y.to_string() expected = u( - ' value\nid1 id2 id3 \n1a3 NaN 78d 123\n9h4 d67 79d 64') + ' value\nid1 id2 id3 \n' + '1a3 NaN 78d 123\n9h4 d67 79d 64') self.assertEqual(result, expected) # all-nan in mi @@ -1042,7 +1083,8 @@ def test_index_with_nan(self): y = df2.set_index('id2') result = y.to_string() expected = u( - ' id1 id3 value\nid2 \nNaN 1a3 78d 123\nNaN 9h4 79d 64') + ' id1 id3 value\nid2 \n' + 'NaN 1a3 78d 123\nNaN 9h4 79d 64') self.assertEqual(result, expected) # partial nan in mi @@ -1051,7 +1093,8 @@ def test_index_with_nan(self): y = df2.set_index(['id2', 'id3']) result = y.to_string() expected = u( - ' id1 value\nid2 id3 \nNaN 78d 1a3 123\n 79d 9h4 64') + ' id1 value\nid2 id3 \n' + 'NaN 78d 1a3 123\n 79d 9h4 64') self.assertEqual(result, expected) df = DataFrame({'id1': {0: np.nan, @@ -1066,7 +1109,8 @@ def test_index_with_nan(self): y = df.set_index(['id1', 'id2', 'id3']) result = y.to_string() expected = u( - ' value\nid1 id2 id3 \nNaN NaN NaN 123\n9h4 d67 79d 64') + ' value\nid1 id2 id3 \n' + 'NaN NaN NaN 123\n9h4 d67 79d 64') self.assertEqual(result, expected) def test_to_string(self): @@ -1660,8 +1704,8 @@ def test_east_asian_unicode_series(self): if PY3: _rep = repr else: - _rep = unicode - # not alighned properly because of east asian width + _rep = unicode # noqa + # not aligned properly because of east asian width # unicode index s = Series(['a', 'bb', 'CCC', 'D'], @@ -1686,7 +1730,8 @@ def test_east_asian_unicode_series(self): # unicode footer s = Series([u'あ', u'いい', u'ううう', u'ええええ'], - index=[u'ああ', u'いいいい', u'う', u'えええ'], name=u'おおおおおおお') + index=[u'ああ', u'いいいい', u'う', u'えええ'], + name=u'おおおおおおお') expected = (u"ああ あ\nいいいい いい\nう ううう\n" u"えええ ええええ\nName: おおおおおおお, dtype: object") self.assertEqual(_rep(s), expected) @@ -1695,7 +1740,9 @@ def test_east_asian_unicode_series(self): idx = pd.MultiIndex.from_tuples([(u'あ', u'いい'), (u'う', u'え'), ( u'おおお', u'かかかか'), (u'き', u'くく')]) s = Series([1, 22, 3333, 44444], index=idx) - expected = (u"あ いい 1\nう え 22\nおおお かかかか 3333\n" + expected = (u"あ いい 1\n" + u"う え 22\n" + u"おおお かかかか 3333\n" u"き くく 44444\ndtype: int64") self.assertEqual(_rep(s), expected) @@ -1708,14 +1755,16 @@ def test_east_asian_unicode_series(self): # object dtype, longer than unicode repr s = Series([1, 22, 3333, 44444], index=[1, 'AB', pd.Timestamp('2011-01-01'), u'あああ']) - expected = (u"1 1\nAB 22\n" - u"2011-01-01 00:00:00 3333\nあああ 44444\ndtype: int64" - ) + expected = (u"1 1\n" + u"AB 22\n" + u"2011-01-01 00:00:00 3333\n" + u"あああ 44444\ndtype: int64") self.assertEqual(_rep(s), expected) # truncate with option_context('display.max_rows', 3): - s = Series([u'あ', u'いい', u'ううう', u'ええええ'], name=u'おおおおおおお') + s = Series([u'あ', u'いい', u'ううう', u'ええええ'], + name=u'おおおおおおお') expected = (u"0 あ\n ... \n" u"3 ええええ\nName: おおおおおおお, dtype: object") @@ -1746,23 +1795,32 @@ def test_east_asian_unicode_series(self): # both s = Series([u'あ', u'いい', u'ううう', u'ええええ'], index=[u'ああ', u'いいいい', u'う', u'えええ']) - expected = (u"ああ あ\nいいいい いい\nう ううう\n" + expected = (u"ああ あ\n" + u"いいいい いい\n" + u"う ううう\n" u"えええ ええええ\ndtype: object") self.assertEqual(_rep(s), expected) # unicode footer s = Series([u'あ', u'いい', u'ううう', u'ええええ'], - index=[u'ああ', u'いいいい', u'う', u'えええ'], name=u'おおおおおおお') - expected = (u"ああ あ\nいいいい いい\nう ううう\n" - u"えええ ええええ\nName: おおおおおおお, dtype: object") + index=[u'ああ', u'いいいい', u'う', u'えええ'], + name=u'おおおおおおお') + expected = (u"ああ あ\n" + u"いいいい いい\n" + u"う ううう\n" + u"えええ ええええ\n" + u"Name: おおおおおおお, dtype: object") self.assertEqual(_rep(s), expected) # MultiIndex idx = pd.MultiIndex.from_tuples([(u'あ', u'いい'), (u'う', u'え'), ( u'おおお', u'かかかか'), (u'き', u'くく')]) s = Series([1, 22, 3333, 44444], index=idx) - expected = (u"あ いい 1\nう え 22\nおおお かかかか 3333\n" - u"き くく 44444\ndtype: int64") + expected = (u"あ いい 1\n" + u"う え 22\n" + u"おおお かかかか 3333\n" + u"き くく 44444\n" + u"dtype: int64") self.assertEqual(_rep(s), expected) # object dtype, shorter than unicode repr @@ -1774,27 +1832,33 @@ def test_east_asian_unicode_series(self): # object dtype, longer than unicode repr s = Series([1, 22, 3333, 44444], index=[1, 'AB', pd.Timestamp('2011-01-01'), u'あああ']) - expected = (u"1 1\nAB 22\n" - u"2011-01-01 00:00:00 3333\nあああ 44444\ndtype: int64" - ) + expected = (u"1 1\n" + u"AB 22\n" + u"2011-01-01 00:00:00 3333\n" + u"あああ 44444\ndtype: int64") self.assertEqual(_rep(s), expected) # truncate with option_context('display.max_rows', 3): - s = Series([u'あ', u'いい', u'ううう', u'ええええ'], name=u'おおおおおおお') + s = Series([u'あ', u'いい', u'ううう', u'ええええ'], + name=u'おおおおおおお') expected = (u"0 あ\n ... \n" u"3 ええええ\nName: おおおおおおお, dtype: object") self.assertEqual(_rep(s), expected) s.index = [u'ああ', u'いいいい', u'う', u'えええ'] - expected = (u"ああ あ\n ... \n" - u"えええ ええええ\nName: おおおおおおお, dtype: object") + expected = (u"ああ あ\n" + u" ... \n" + u"えええ ええええ\n" + u"Name: おおおおおおお, dtype: object") self.assertEqual(_rep(s), expected) # ambiguous unicode s = Series([u'¡¡', u'い¡¡', u'ううう', u'ええええ'], index=[u'ああ', u'¡¡¡¡いい', u'¡¡', u'えええ']) - expected = (u"ああ ¡¡\n¡¡¡¡いい い¡¡\n¡¡ ううう\n" + expected = (u"ああ ¡¡\n" + u"¡¡¡¡いい い¡¡\n" + u"¡¡ ううう\n" u"えええ ええええ\ndtype: object") self.assertEqual(_rep(s), expected) @@ -2099,15 +2163,48 @@ def test_output_significant_digits(self): expected_output = { (0, 6): - ' col1\n0 9.999000e-08\n1 1.000000e-07\n2 1.000100e-07\n3 2.000000e-07\n4 4.999000e-07\n5 5.000000e-07', + ' col1\n' + '0 9.999000e-08\n' + '1 1.000000e-07\n' + '2 1.000100e-07\n' + '3 2.000000e-07\n' + '4 4.999000e-07\n' + '5 5.000000e-07', (1, 6): - ' col1\n1 1.000000e-07\n2 1.000100e-07\n3 2.000000e-07\n4 4.999000e-07\n5 5.000000e-07', + ' col1\n' + '1 1.000000e-07\n' + '2 1.000100e-07\n' + '3 2.000000e-07\n' + '4 4.999000e-07\n' + '5 5.000000e-07', (1, 8): - ' col1\n1 1.000000e-07\n2 1.000100e-07\n3 2.000000e-07\n4 4.999000e-07\n5 5.000000e-07\n6 5.000100e-07\n7 6.000000e-07', + ' col1\n' + '1 1.000000e-07\n' + '2 1.000100e-07\n' + '3 2.000000e-07\n' + '4 4.999000e-07\n' + '5 5.000000e-07\n' + '6 5.000100e-07\n' + '7 6.000000e-07', (8, 16): - ' col1\n8 9.999000e-07\n9 1.000000e-06\n10 1.000100e-06\n11 2.000000e-06\n12 4.999000e-06\n13 5.000000e-06\n14 5.000100e-06\n15 6.000000e-06', + ' col1\n' + '8 9.999000e-07\n' + '9 1.000000e-06\n' + '10 1.000100e-06\n' + '11 2.000000e-06\n' + '12 4.999000e-06\n' + '13 5.000000e-06\n' + '14 5.000100e-06\n' + '15 6.000000e-06', (9, 16): - ' col1\n9 0.000001\n10 0.000001\n11 0.000002\n12 0.000005\n13 0.000005\n14 0.000005\n15 0.000006' + ' col1\n' + '9 0.000001\n' + '10 0.000001\n' + '11 0.000002\n' + '12 0.000005\n' + '13 0.000005\n' + '14 0.000005\n' + '15 0.000006' } for (start, stop), v in expected_output.items(): diff --git a/pandas/util/clipboard/exceptions.py b/pandas/util/clipboard/exceptions.py index f42d263a02993..413518e53660a 100644 --- a/pandas/util/clipboard/exceptions.py +++ b/pandas/util/clipboard/exceptions.py @@ -1,4 +1,3 @@ -# flake8: noqa import ctypes From 3d6c5a8723ae69c55a43d444dcb64fc532358714 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Thu, 30 Mar 2017 04:55:17 -0700 Subject: [PATCH 308/353] DOC: Fix up _DeprecatedModule parameters doc (#15843) [ci skip] --- pandas/util/depr_module.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/util/depr_module.py b/pandas/util/depr_module.py index b181c4627b1e1..af7faf9dd96c8 100644 --- a/pandas/util/depr_module.py +++ b/pandas/util/depr_module.py @@ -13,8 +13,9 @@ class _DeprecatedModule(object): Parameters ---------- deprmod : name of module to be deprecated. - deprmodto : name of module as a replacement, optional - if not givent will __module__ + deprmodto : name of module as a replacement, optional. + If not given, the __module__ attribute will + be used when needed. removals : objects or methods in module that will no longer be accessible once module is removed. """ From 48749ce4a774fba73ea38501cd99820537549d5a Mon Sep 17 00:00:00 2001 From: Baurzhan Muftakhidinov Date: Thu, 30 Mar 2017 16:57:00 +0500 Subject: [PATCH 309/353] Fix a typo (#15844) --- pandas/sparse/sparse.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/sparse/sparse.pyx b/pandas/sparse/sparse.pyx index 7ab29414499fc..00d317c42b18d 100644 --- a/pandas/sparse/sparse.pyx +++ b/pandas/sparse/sparse.pyx @@ -71,7 +71,7 @@ cdef class IntIndex(SparseIndex): def check_integrity(self): """ Only need be strictly ascending and nothing less than 0 or greater than - totall ength + total length """ pass From 1e0fbd2b86985e11d1869ceff688214f3ca64055 Mon Sep 17 00:00:00 2001 From: Carlos Souza Date: Thu, 30 Mar 2017 08:03:45 -0400 Subject: [PATCH 310/353] BUG: SparseDataFrame construction with lists not coercing to dtype (GH 15682) closes #15682 Author: Carlos Souza Closes #15834 from ucals/bug-fix-15682 and squashes the following commits: 04fba8d [Carlos Souza] Adding test_rename test cases (were missing) 483bb2c [Carlos Souza] Doing adjustments as per @jreback requests cc4c15b [Carlos Souza] Fixing coersion bug at SparseDataFrame construction faa5c5c [Carlos Souza] Merge remote-tracking branch 'upstream/master' 43456a5 [Carlos Souza] Merge remote-tracking branch 'upstream/master' 8b463cb [Carlos Souza] Merge remote-tracking branch 'upstream/master' 9fc617b [Carlos Souza] Merge remote-tracking branch 'upstream/master' e12bca7 [Carlos Souza] Sync fork 676a4e5 [Carlos Souza] Test --- doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/sparse/frame.py | 2 +- pandas/tests/sparse/test_frame.py | 36 +++++++++++++++++++++++-------- 3 files changed, 29 insertions(+), 11 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 023d41763baee..1ecdd6dd8fbef 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1046,7 +1046,7 @@ Bug Fixes - Bug in ``pd.concat()`` in which concatting with an empty dataframe with ``join='inner'`` was being improperly handled (:issue:`15328`) - Bug in ``groupby.agg()`` incorrectly localizing timezone on ``datetime`` (:issue:`15426`, :issue:`10668`, :issue:`13046`) - +- Bug in ``SparseDataFrame`` construction with lists not coercing to dtype (:issue:`15682`) - Bug in ``.read_csv()`` with ``parse_dates`` when multiline headers are specified (:issue:`15376`) - Bug in ``groupby.transform()`` that would coerce the resultant dtypes back to the original (:issue:`10972`, :issue:`11444`) diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py index 41f301f263374..455d120cca640 100644 --- a/pandas/sparse/frame.py +++ b/pandas/sparse/frame.py @@ -142,7 +142,7 @@ def _init_dict(self, data, index, columns, dtype=None): sp_maker = lambda x: SparseArray(x, kind=self._default_kind, fill_value=self._default_fill_value, - copy=True) + copy=True, dtype=dtype) sdict = DataFrame() for k, v in compat.iteritems(data): if isinstance(v, Series): diff --git a/pandas/tests/sparse/test_frame.py b/pandas/tests/sparse/test_frame.py index c0c678c184ee8..ae1a1e35f1859 100644 --- a/pandas/tests/sparse/test_frame.py +++ b/pandas/tests/sparse/test_frame.py @@ -28,7 +28,6 @@ class TestSparseDataFrame(tm.TestCase, SharedWithSparse): - klass = SparseDataFrame def setUp(self): @@ -237,6 +236,18 @@ def test_constructor_nan_dataframe(self): dtype=float) tm.assert_sp_frame_equal(result, expected) + def test_type_coercion_at_construction(self): + # GH 15682 + result = pd.SparseDataFrame( + {'a': [1, 0, 0], 'b': [0, 1, 0], 'c': [0, 0, 1]}, dtype='uint8', + default_fill_value=0) + expected = pd.SparseDataFrame( + {'a': pd.SparseSeries([1, 0, 0], dtype='uint8'), + 'b': pd.SparseSeries([0, 1, 0], dtype='uint8'), + 'c': pd.SparseSeries([0, 0, 1], dtype='uint8')}, + default_fill_value=0) + tm.assert_sp_frame_equal(result, expected) + def test_dtypes(self): df = DataFrame(np.random.randn(10000, 4)) df.loc[:9998] = np.nan @@ -756,9 +767,18 @@ def test_sparse_frame_fillna_limit(self): tm.assert_frame_equal(result, expected) def test_rename(self): - # just check this works - renamed = self.frame.rename(index=str) # noqa - renamed = self.frame.rename(columns=lambda x: '%s%d' % (x, len(x))) # noqa + result = self.frame.rename(index=str) + expected = SparseDataFrame(self.data, index=self.dates.strftime( + "%Y-%m-%d %H:%M:%S")) + tm.assert_sp_frame_equal(result, expected) + + result = self.frame.rename(columns=lambda x: '%s%d' % (x, len(x))) + data = {'A1': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6], + 'B1': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6], + 'C1': np.arange(10, dtype=np.float64), + 'D1': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]} + expected = SparseDataFrame(data, index=self.dates) + tm.assert_sp_frame_equal(result, expected) def test_corr(self): res = self.frame.corr() @@ -967,7 +987,6 @@ def _check(frame, orig): def test_shift(self): def _check(frame, orig): - shifted = frame.shift(0) exp = orig.shift(0) tm.assert_frame_equal(shifted.to_dense(), exp) @@ -1060,7 +1079,7 @@ def test_sparse_pow_issue(self): df = SparseDataFrame({'A': [nan, 0, 1]}) # note that 2 ** df works fine, also df ** 1 - result = 1**df + result = 1 ** df r1 = result.take([0], 1)['A'] r2 = result['A'] @@ -1126,7 +1145,7 @@ def test_isnotnull(self): tm.assert_frame_equal(res.to_dense(), exp) -@pytest.mark.parametrize('index', [None, list('ab')]) # noqa: F811 +@pytest.mark.parametrize('index', [None, list('ab')]) # noqa: F811 @pytest.mark.parametrize('columns', [None, list('cd')]) @pytest.mark.parametrize('fill_value', [None, 0, np.nan]) @pytest.mark.parametrize('dtype', [bool, int, float, np.uint16]) @@ -1180,7 +1199,7 @@ def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype): tm.assert_equal(sdf.to_coo().dtype, np.object_) -@pytest.mark.parametrize('fill_value', [None, 0, np.nan]) # noqa: F811 +@pytest.mark.parametrize('fill_value', [None, 0, np.nan]) # noqa: F811 def test_from_to_scipy_object(spmatrix, fill_value): # GH 4343 dtype = object @@ -1255,7 +1274,6 @@ def test_comparison_op_scalar(self): class TestSparseDataFrameAnalytics(tm.TestCase): - def setUp(self): self.data = {'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6], 'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6], From 9c98e13172dd5decd99496f7f381568c547f6ba3 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 30 Mar 2017 08:46:29 -0400 Subject: [PATCH 311/353] DOC: prettify bug fixes section (#15846) --- doc/source/whatsnew/v0.20.0.txt | 196 ++++++++++++++++---------------- 1 file changed, 95 insertions(+), 101 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 1ecdd6dd8fbef..399f91fc60810 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -913,147 +913,141 @@ Performance Improvements Bug Fixes ~~~~~~~~~ +Conversion +^^^^^^^^^^ + - Bug in ``Timestamp.replace`` now raises ``TypeError`` when incorrect argument names are given; previously this raised ``ValueError`` (:issue:`15240`) +- Bug in ``Timestamp.replace`` with compat for passing long integers (:issue:`15030`) - Bug in ``Timestamp`` returning UTC based time/date attributes when a timezone was provided (:issue:`13303`) -- Bug in ``Index`` power operations with reversed operands (:issue:`14973`) - Bug in ``TimedeltaIndex`` addition where overflow was being allowed without error (:issue:`14816`) - Bug in ``TimedeltaIndex`` raising a ``ValueError`` when boolean indexing with ``loc`` (:issue:`14946`) +- Bug in catching an overflow in ``Timestamp`` + ``Timedelta/Offset`` operations (:issue:`15126`) - Bug in ``DatetimeIndex.round()`` and ``Timestamp.round()`` floating point accuracy when rounding by milliseconds or less (:issue:`14440`, :issue:`15578`) - Bug in ``astype()`` where ``inf`` values were incorrectly converted to integers. Now raises error now with ``astype()`` for Series and DataFrames (:issue:`14265`) - Bug in ``DataFrame(..).apply(to_numeric)`` when values are of type decimal.Decimal. (:issue:`14827`) - Bug in ``describe()`` when passing a numpy array which does not contain the median to the ``percentiles`` keyword argument (:issue:`14908`) -- Bug in ``DataFrame.sort_values()`` when sorting by multiple columns where one column is of type ``int64`` and contains ``NaT`` (:issue:`14922`) -- Bug in ``DataFrame.reindex()`` in which ``method`` was ignored when passing ``columns`` (:issue:`14992`) -- Bug in ``pd.to_numeric()`` in which float and unsigned integer elements were being improperly casted (:issue:`14941`, :issue:`15005`) - Cleaned up ``PeriodIndex`` constructor, including raising on floats more consistently (:issue:`13277`) -- Bug in ``pd.read_csv()`` in which the ``dialect`` parameter was not being verified before processing (:issue:`14898`) -- Bug in ``pd.read_fwf()`` where the skiprows parameter was not being respected during column width inference (:issue:`11256`) -- Bug in ``pd.read_csv()`` in which missing data was being improperly handled with ``usecols`` (:issue:`6710`) -- Bug in ``pd.read_csv()`` in which a file containing a row with many columns followed by rows with fewer columns would cause a crash (:issue:`14125`) -- Added checks in ``pd.read_csv()`` ensuring that values for ``nrows`` and ``chunksize`` are valid (:issue:`15767`) -- Bug in ``pd.tools.hashing.hash_pandas_object()`` in which hashing of categoricals depended on the ordering of categories, instead of just their values. (:issue:`15143`) -- Bug in ``.groupby(..).resample()`` when passed the ``on=`` kwarg. (:issue:`15021`) - Bug in using ``__deepcopy__`` on empty NDFrame objects (:issue:`15370`) -- Bug in ``DataFrame.loc`` with indexing a ``MultiIndex`` with a ``Series`` indexer (:issue:`14730`, :issue:`15424`) -- Bug in ``DataFrame.loc`` with indexing a ``MultiIndex`` with a numpy array (:issue:`15434`) -- Bug in ``Rolling.quantile`` function that caused a segmentation fault when called with a quantile value outside of the range [0, 1] (:issue:`15463`) -- Bug in ``pd.cut()`` with a single bin on an all 0s array (:issue:`15428`) -- Bug in ``pd.qcut()`` with a single quantile and an array with identical values (:issue:`15431`) -- Compat with SciPy 0.19.0 for testing on ``.interpolate()`` (:issue:`15662`) -- Bug in ``Series.asof`` which raised if the series contained all ``np.nan`` (:issue:`15713`) - -- Compat for 32-bit platforms for ``.qcut/cut``; bins will now be ``int64`` dtype (:issue:`14866`) - -- Properly set ``__name__`` and ``__qualname__`` for ``Groupby.*`` functions (:issue:`14620`) -- Bug in ``.at`` when selecting from a tz-aware column (:issue:`15822`) -- Bug in the display of ``.info()`` where a qualifier (+) would always be displayed with a ``MultiIndex`` that contains only non-strings (:issue:`15245`) - Bug in ``.replace()`` may result in incorrect dtypes. (:issue:`12747`, :issue:`15765`) - Bug in ``Series.replace`` and ``DataFrame.replace`` which failed on empty replacement dicts (:issue:`15289`) - Bug in ``Series.replace`` which replaced a numeric by string (:issue:`15743`) - +- Bug in ``Index`` construction with ``NaN`` elements and integer dtype specified (:issue:`15187`) +- Bug in ``Series`` construction with a datetimetz (:issue:`14928`) +- Bug in ``Series.dt.round()`` inconsistent behaviour on ``NaT`` 's with different arguments (:issue:`14940`) +- Bug in ``Series`` constructor when both ``copy=True`` and ``dtype`` arguments are provided (:issue:`15125`) +- Incorrect dtyped ``Series`` was returned by comparison methods (e.g., ``lt``, ``gt``, ...) against a constant for an empty ``DataFrame`` (:issue:`15077`) +- Bug in ``Series.ffill()`` with mixed dtypes containing tz-aware datetimes. (:issue:`14956`) +- Bug in ``DataFrame.fillna()`` where the argument ``downcast`` was ignored when fillna value was of type ``dict`` (:issue:`15277`) - Bug in ``.asfreq()``, where frequency was not set for empty ``Series`` (:issue:`14320`) -- Bug in ``pd.read_msgpack()`` in which ``Series`` categoricals were being improperly processed (:issue:`14901`) -- Bug in ``Series.ffill()`` with mixed dtypes containing tz-aware datetimes. (:issue:`14956`) -- Bug in interactions with ``Qt`` when a ``QtApplication`` already exists (:issue:`14372`) -- Bug in ``DataFrame.isin`` comparing datetimelike to empty frame (:issue:`15473`) +Indexing +^^^^^^^^ +- Bug in ``Index`` power operations with reversed operands (:issue:`14973`) +- Bug in ``DataFrame.sort_values()`` when sorting by multiple columns where one column is of type ``int64`` and contains ``NaT`` (:issue:`14922`) +- Bug in ``DataFrame.reindex()`` in which ``method`` was ignored when passing ``columns`` (:issue:`14992`) +- Bug in ``DataFrame.loc`` with indexing a ``MultiIndex`` with a ``Series`` indexer (:issue:`14730`, :issue:`15424`) +- Bug in ``DataFrame.loc`` with indexing a ``MultiIndex`` with a numpy array (:issue:`15434`) +- Bug in ``Series.asof`` which raised if the series contained all ``np.nan`` (:issue:`15713`) +- Bug in ``.at`` when selecting from a tz-aware column (:issue:`15822`) - Bug in ``Series.where()`` and ``DataFrame.where()`` where array-like conditionals were being rejected (:issue:`15414`) - Bug in ``Series.where()`` where TZ-aware data was converted to float representation (:issue:`15701`) -- Bug in ``Index`` construction with ``NaN`` elements and integer dtype specified (:issue:`15187`) -- Bug in ``Series`` construction with a datetimetz (:issue:`14928`) +- Bug in ``.loc`` that would not return the correct dtype for scalar access for a DataFrame (:issue:`11617`) - Bug in output formatting of a ``MultiIndex`` when names are integers (:issue:`12223`, :issue:`15262`) +- Bug in ``Categorical.searchsorted()`` where alphabetical instead of the provided categorical order was used (:issue:`14522`) +- Bug in ``Series.iloc`` where a ``Categorical`` object for list-like indexes input was returned, where a ``Series`` was expected. (:issue:`14580`) +- Bug in ``DataFrame.isin`` comparing datetimelike to empty frame (:issue:`15473`) +- Bug in ``.reset_index()`` when an all ``NaN`` level of a ``MultiIndex`` would fail (:issue:`6322`) +- Bug in creating a ``MultiIndex`` with tuples and not passing a list of names; this will now raise ``ValueError`` (:issue:`15110`) +- Bug in the HTML display with with a ``MultiIndex`` and truncation (:issue:`14882`) +- Bug in the display of ``.info()`` where a qualifier (+) would always be displayed with a ``MultiIndex`` that contains only non-strings (:issue:`15245`) -- Bug in compat for passing long integers to ``Timestamp.replace`` (:issue:`15030`) -- Bug in ``.loc`` that would not return the correct dtype for scalar access for a DataFrame (:issue:`11617`) -- Bug in ``GroupBy.get_group()`` failing with a categorical grouper (:issue:`15155`) -- Bug in ``pandas.tools.utils.cartesian_product()`` with large input can cause overflow on windows (:issue:`15265`) +I/O +^^^ +- Bug in ``pd.to_numeric()`` in which float and unsigned integer elements were being improperly casted (:issue:`14941`, :issue:`15005`) +- Bug in ``pd.read_fwf()`` where the skiprows parameter was not being respected during column width inference (:issue:`11256`) +- Bug in ``pd.read_csv()`` in which the ``dialect`` parameter was not being verified before processing (:issue:`14898`) +- Bug in ``pd.read_csv()`` in which missing data was being improperly handled with ``usecols`` (:issue:`6710`) +- Bug in ``pd.read_csv()`` in which a file containing a row with many columns followed by rows with fewer columns would cause a crash (:issue:`14125`) +- Bug in ``pd.read_csv()`` for the C engine where ``usecols`` were being indexed incorrectly with ``parse_dates`` (:issue:`14792`) +- Bug in ``pd.read_csv()`` with ``parse_dates`` when multiline headers are specified (:issue:`15376`) +- Bug in ``pd.read_csv()`` with ``float_precision='round_trip'`` which caused a segfault when a text entry is parsed (:issue:`15140`) +- Added checks in ``pd.read_csv()`` ensuring that values for ``nrows`` and ``chunksize`` are valid (:issue:`15767`) +- Bug in ``pd.tools.hashing.hash_pandas_object()`` in which hashing of categoricals depended on the ordering of categories, instead of just their values. (:issue:`15143`) +- Bug in ``.to_json()`` where ``lines=True`` and contents (keys or values) contain escaped characters (:issue:`15096`) +- Bug in ``.to_json()`` causing single byte ascii characters to be expanded to four byte unicode (:issue:`15344`) +- Bug in ``.read_json()`` for Python 2 where ``lines=True`` and contents contain non-ascii unicode characters (:issue:`15132`) +- Bug in ``pd.read_msgpack()`` in which ``Series`` categoricals were being improperly processed (:issue:`14901`) +- Bug in ``pd.read_msgpack()`` which did not allow loading of a dataframe with an index of type ``CategoricalIndex`` (:issue:`15487`) +- Bug in ``pd.read_msgpack()`` when deserializing a ``CategoricalIndex`` (:issue:`15487`) - Bug in ``DataFrame.to_records()`` with converting a ``DatetimeIndex`` with a timezone (:issue:`13937`) +- Bug in ``DataFrame.to_records()`` which failed with unicode characters in column names (:issue:`11879`) +- Bug in ``.to_sql()`` when writing a DataFrame with numeric index names (:issue:`15404`). +- Bug in ``DataFrame.to_html()`` with ``index=False`` and ``max_rows`` raising in ``IndexError`` (:issue:`14998`) +- Bug in ``pd.read_hdf()`` passing a ``Timestamp`` to the ``where`` parameter with a non date column (:issue:`15492`) +- Bug in ``DataFrame.to_stata()`` and ``StataWriter`` which produces incorrectly formatted files to be produced for some locales (:issue:`13856`) +- Bug in ``StataReader`` and ``StataWriter`` which allows invalid encodings (:issue:`15723`) +Plotting +^^^^^^^^ -- Bug in ``.groupby(...).rolling(...)`` when ``on`` is specified and using a ``DatetimeIndex`` (:issue:`15130`) - - -- Bug in ``to_sql`` when writing a DataFrame with numeric index names (:issue:`15404`). -- Bug in ``Series.iloc`` where a ``Categorical`` object for list-like indexes input was returned, where a ``Series`` was expected. (:issue:`14580`) -- Bug in repr-formatting a ``SparseDataFrame`` after a value was set on (a copy of) one of its series (:issue:`15488`) -- Bug in ``SparseSeries.reindex`` on single level with list of length 1 (:issue:`15447`) +- Bug in ``DataFrame.hist`` where ``plt.tight_layout`` caused an ``AttributeError`` (use ``matplotlib >= 2.0.1``) (:issue:`9351`) +- Bug in ``DataFrame.boxplot`` where ``fontsize`` was not applied to the tick labels on both axes (:issue:`15108`) +Groupby/Resample/Rolling +^^^^^^^^^^^^^^^^^^^^^^^^ +- Bug in ``.groupby(..).resample()`` when passed the ``on=`` kwarg. (:issue:`15021`) +- Properly set ``__name__`` and ``__qualname__`` for ``Groupby.*`` functions (:issue:`14620`) +- Bug in ``GroupBy.get_group()`` failing with a categorical grouper (:issue:`15155`) +- Bug in ``.groupby(...).rolling(...)`` when ``on`` is specified and using a ``DatetimeIndex`` (:issue:`15130`) - Bug in groupby operations with timedelta64 when passing ``numeric_only=False`` (:issue:`5724`) - Bug in ``groupby.apply()`` coercing ``object`` dtypes to numeric types, when not all values were numeric (:issue:`14423`, :issue:`15421`, :issue:`15670`) - - -- Bug in ``DataFrame.to_html`` with ``index=False`` and ``max_rows`` raising in ``IndexError`` (:issue:`14998`) - -- Bug in ``Categorical.searchsorted()`` where alphabetical instead of the provided categorical order was used (:issue:`14522`) - - - -- Bug in ``resample``, where a non-string ```loffset`` argument would not be applied when resampling a timeseries (:issue:`13218`) - - - -- Bug in ``.rank()`` which incorrectly ranks ordered categories (:issue:`15420`) -- Bug in ``.corr()`` and ``.cov()`` where the column and index were the same object (:issue:`14617`) - - -- Require at least 0.23 version of cython to avoid problems with character encodings (:issue:`14699`) -- Bug in ``pd.pivot_table()`` where no error was raised when values argument was not in the columns (:issue:`14938`) - -- Bug in ``.to_json()`` where ``lines=True`` and contents (keys or values) contain escaped characters (:issue:`15096`) -- Bug in ``.to_json()`` causing single byte ascii characters to be expanded to four byte unicode (:issue:`15344`) -- Bug in ``.read_json()`` for Python 2 where ``lines=True`` and contents contain non-ascii unicode characters (:issue:`15132`) +- Bug in ``resample``, where a non-string ``loffset`` argument would not be applied when resampling a timeseries (:issue:`13218`) +- Bug in ``DataFrame.groupby().describe()`` when grouping on ``Index`` containing tuples (:issue:`14848`) +- Bug in ``groupby().nunique()`` with a datetimelike-grouper where bins counts were incorrect (:issue:`13453`) +- Bug in ``groupby.transform()`` that would coerce the resultant dtypes back to the original (:issue:`10972`, :issue:`11444`) +- Bug in ``groupby.agg()`` incorrectly localizing timezone on ``datetime`` (:issue:`15426`, :issue:`10668`, :issue:`13046`) - Bug in ``.rolling/expanding()`` functions where ``count()`` was not counting ``np.Inf``, nor handling ``object`` dtypes (:issue:`12541`) - Bug in ``.rolling()`` where ``pd.Timedelta`` or ``datetime.timedelta`` was not accepted as a ``window`` argument (:issue:`15440`) +- Bug in ``Rolling.quantile`` function that caused a segmentation fault when called with a quantile value outside of the range [0, 1] (:issue:`15463`) - Bug in ``DataFrame.resample().median()`` if duplicate column names are present (:issue:`14233`) -- Bug in ``DataFrame.groupby().describe()`` when grouping on ``Index`` containing tuples (:issue:`14848`) -- Bug in creating a ``MultiIndex`` with tuples and not passing a list of names; this will now raise ``ValueError`` (:issue:`15110`) -- Bug in ``groupby().nunique()`` with a datetimelike-grouper where bins counts were incorrect (:issue:`13453`) +Sparse +^^^^^^ -- Bug in catching an overflow in ``Timestamp`` + ``Timedelta/Offset`` operations (:issue:`15126`) -- Bug in the HTML display with with a ``MultiIndex`` and truncation (:issue:`14882`) +- Bug in ``SparseSeries.reindex`` on single level with list of length 1 (:issue:`15447`) +- Bug in repr-formatting a ``SparseDataFrame`` after a value was set on (a copy of) one of its series (:issue:`15488`) +- Bug in ``SparseDataFrame`` construction with lists not coercing to dtype (:issue:`15682`) +Reshaping +^^^^^^^^^ - Bug in ``pd.merge_asof()`` where ``left_index`` or ``right_index`` caused a failure when multiple ``by`` was specified (:issue:`15676`) - Bug in ``pd.merge_asof()`` where ``left_index``/``right_index`` together caused a failure when ``tolerance`` was specified (:issue:`15135`) - Bug in ``DataFrame.pivot_table()`` where ``dropna=True`` would not drop all-NaN columns when the columns was a ``category`` dtype (:issue:`15193`) - - -- Bug in ``pd.read_hdf()`` passing a ``Timestamp`` to the ``where`` parameter with a non date column (:issue:`15492`) - - -- Bug in ``Series`` constructor when both ``copy=True`` and ``dtype`` arguments are provided (:issue:`15125`) -- Bug in ``pd.read_csv()`` for the C engine where ``usecols`` were being indexed incorrectly with ``parse_dates`` (:issue:`14792`) -- Incorrect dtyped ``Series`` was returned by comparison methods (e.g., ``lt``, ``gt``, ...) against a constant for an empty ``DataFrame`` (:issue:`15077`) -- Bug in ``Series.dt.round`` inconsistent behaviour on NAT's with different arguments (:issue:`14940`) -- Bug in ``DataFrame.fillna()`` where the argument ``downcast`` was ignored when fillna value was of type ``dict`` (:issue:`15277`) -- Bug in ``.reset_index()`` when an all ``NaN`` level of a ``MultiIndex`` would fail (:issue:`6322`) - -- Bug in ``pd.read_msgpack()`` when deserializing a ``CategoricalIndex`` (:issue:`15487`) -- Bug in ``pd.DataFrame.to_records()`` which failed with unicode characters in column names (:issue:`11879`) - - -- Bug in ``pd.read_csv()`` with ``float_precision='round_trip'`` which caused a segfault when a text entry is parsed (:issue:`15140`) -- Avoid use of ``np.finfo()`` during ``import pandas`` removed to mitigate deadlock on Python GIL misuse (:issue:`14641`) - -- Bug in ``DataFrame.to_stata()`` and ``StataWriter`` which produces incorrectly formatted files to be produced for some locales (:issue:`13856`) -- Bug in ``StataReader`` and ``StataWriter`` which allows invalid encodings (:issue:`15723`) -- Bug with ``sort=True`` in ``DataFrame.join`` and ``pd.merge`` when joining on indexes (:issue:`15582`) - +- Bug in ``pd.melt()`` where passing a tuple value for ``value_vars`` caused a ``TypeError`` (:issue:`15348`) +- Bug in ``pd.pivot_table()`` where no error was raised when values argument was not in the columns (:issue:`14938`) - Bug in ``pd.concat()`` in which concatting with an empty dataframe with ``join='inner'`` was being improperly handled (:issue:`15328`) -- Bug in ``groupby.agg()`` incorrectly localizing timezone on ``datetime`` (:issue:`15426`, :issue:`10668`, :issue:`13046`) +- Bug with ``sort=True`` in ``DataFrame.join`` and ``pd.merge`` when joining on indexes (:issue:`15582`) -- Bug in ``SparseDataFrame`` construction with lists not coercing to dtype (:issue:`15682`) +Numeric +^^^^^^^ -- Bug in ``.read_csv()`` with ``parse_dates`` when multiline headers are specified (:issue:`15376`) -- Bug in ``groupby.transform()`` that would coerce the resultant dtypes back to the original (:issue:`10972`, :issue:`11444`) +- Bug in ``.rank()`` which incorrectly ranks ordered categories (:issue:`15420`) +- Bug in ``.corr()`` and ``.cov()`` where the column and index were the same object (:issue:`14617`) - Bug in ``.mode()`` where ``mode`` was not returned if was only a single value (:issue:`15714`) - -- Bug in ``DataFrame.hist`` where ``plt.tight_layout`` caused an ``AttributeError`` (use ``matplotlib >= 2.0.1``) (:issue:`9351`) -- Bug in ``DataFrame.boxplot`` where ``fontsize`` was not applied to the tick labels on both axes (:issue:`15108`) -- Bug in ``pd.melt()`` where passing a tuple value for ``value_vars`` caused a ``TypeError`` (:issue:`15348`) +- Bug in ``pd.cut()`` with a single bin on an all 0s array (:issue:`15428`) +- Bug in ``pd.qcut()`` with a single quantile and an array with identical values (:issue:`15431`) +- Bug in ``pandas.tools.utils.cartesian_product()`` with large input can cause overflow on windows (:issue:`15265`) - Bug in ``.eval()`` which caused multiline evals to fail with local variables not on the first line (:issue:`15342`) -- Bug in ``pd.read_msgpack()`` which did not allow to load dataframe with an index of type ``CategoricalIndex`` (:issue:`15487`) + +Other +^^^^^ + +- Compat with SciPy 0.19.0 for testing on ``.interpolate()`` (:issue:`15662`) +- Compat for 32-bit platforms for ``.qcut/cut``; bins will now be ``int64`` dtype (:issue:`14866`) +- Require at least 0.23 version of cython to avoid problems with character encodings (:issue:`14699`) +- Bug in interactions with ``Qt`` when a ``QtApplication`` already exists (:issue:`14372`) +- Avoid use of ``np.finfo()`` during ``import pandas`` removed to mitigate deadlock on Python GIL misuse (:issue:`14641`) From 1f8906078c723cc9b7000cdd552c03769cc4c5ca Mon Sep 17 00:00:00 2001 From: Tong SHEN Date: Thu, 30 Mar 2017 21:20:51 +0800 Subject: [PATCH 312/353] CLN: Fix a typo in comment (#15847) --- pandas/_libs/src/ujson/lib/ultrajson.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/src/ujson/lib/ultrajson.h b/pandas/_libs/src/ujson/lib/ultrajson.h index d0588348baa44..4f51fa8b3eb38 100644 --- a/pandas/_libs/src/ujson/lib/ultrajson.h +++ b/pandas/_libs/src/ujson/lib/ultrajson.h @@ -233,7 +233,7 @@ typedef struct __JSONObjectEncoder { int recursionMax; /* - Configuration for max decimals of double floating poiunt numbers to encode (0-9) */ + Configuration for max decimals of double floating point numbers to encode (0-9) */ int doublePrecision; /* From b6d405d695249980aa2f93d58998412b4b81dcf3 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 30 Mar 2017 16:42:23 -0400 Subject: [PATCH 313/353] TST: incorrect localization in append testing and when ``pytz`` version changes our tests break because of this incorrect (old) method, which works when you *dont'* have a tz change, but fails when the tz's actually change. Author: Jeff Reback Closes #15849 from jreback/localize and squashes the following commits: d43d088 [Jeff Reback] TST: incorrect localization in append testing --- pandas/tests/test_multilevel.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index fd5421abc89ad..5584c1ac6a239 100755 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -83,9 +83,9 @@ def test_append_index(self): # GH 7112 import pytz tz = pytz.timezone('Asia/Tokyo') - expected_tuples = [(1.1, datetime.datetime(2011, 1, 1, tzinfo=tz)), - (1.2, datetime.datetime(2011, 1, 2, tzinfo=tz)), - (1.3, datetime.datetime(2011, 1, 3, tzinfo=tz))] + expected_tuples = [(1.1, tz.localize(datetime.datetime(2011, 1, 1))), + (1.2, tz.localize(datetime.datetime(2011, 1, 2))), + (1.3, tz.localize(datetime.datetime(2011, 1, 3)))] expected = Index([1.1, 1.2, 1.3] + expected_tuples) tm.assert_index_equal(result, expected) @@ -103,9 +103,9 @@ def test_append_index(self): result = midx_lv3.append(midx_lv2) expected = Index._simple_new( - np.array([(1.1, datetime.datetime(2011, 1, 1, tzinfo=tz), 'A'), - (1.2, datetime.datetime(2011, 1, 2, tzinfo=tz), 'B'), - (1.3, datetime.datetime(2011, 1, 3, tzinfo=tz), 'C')] + + np.array([(1.1, tz.localize(datetime.datetime(2011, 1, 1)), 'A'), + (1.2, tz.localize(datetime.datetime(2011, 1, 2)), 'B'), + (1.3, tz.localize(datetime.datetime(2011, 1, 3)), 'C')] + expected_tuples), None) tm.assert_index_equal(result, expected) From a1086517818039dc4461a526b19c4b7c917b9afe Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 30 Mar 2017 17:02:14 -0400 Subject: [PATCH 314/353] COMPAT: add 0.19.2 msgpack/pickle files (#15848) * COMPAT: add 0.19.2 msgpack/pickle files * show error * add in 2.7 pickles --- .../0.19.2/0.19.2_x86_64_darwin_2.7.12.msgpack | Bin 0 -> 12325 bytes .../0.19.2/0.19.2_x86_64_darwin_3.6.1.msgpack | Bin 0 -> 119196 bytes .../0.19.2/0.19.2_x86_64_darwin_2.7.12.pickle | Bin 0 -> 127525 bytes .../0.19.2/0.19.2_x86_64_darwin_3.6.1.pickle | Bin 0 -> 125349 bytes pandas/tests/io/test_pickle.py | 3 ++- 5 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 pandas/tests/io/data/legacy_msgpack/0.19.2/0.19.2_x86_64_darwin_2.7.12.msgpack create mode 100644 pandas/tests/io/data/legacy_msgpack/0.19.2/0.19.2_x86_64_darwin_3.6.1.msgpack create mode 100644 pandas/tests/io/data/legacy_pickle/0.19.2/0.19.2_x86_64_darwin_2.7.12.pickle create mode 100644 pandas/tests/io/data/legacy_pickle/0.19.2/0.19.2_x86_64_darwin_3.6.1.pickle diff --git a/pandas/tests/io/data/legacy_msgpack/0.19.2/0.19.2_x86_64_darwin_2.7.12.msgpack b/pandas/tests/io/data/legacy_msgpack/0.19.2/0.19.2_x86_64_darwin_2.7.12.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..f2dc38766025e8b5d2d24d2587783c127686973c GIT binary patch literal 12325 zcmeHN3w%@689z7aBc*`p%N9YC7Ni}ku$J0Yx7oB5l<}}gahr^l+vc`Jng_Y5w$-gL zkk+XPDNM$6(l%*n3+e_Nn|oNbLna9B0Vp!9dpM@s=p0Jj9QAzXk)*dtn!X0?*ZcdO z@4K&@obP=9?{U6+>fI)bQL5VLGKvnVeov{@Y_m(U>@KOW%B4+?nojrH3Q?B#72wAX zlUZ5_Z@Z0-YMb;F`~>c0XRWlzt6XJvX&MbYy2KL<>A7GB6}ihE$BJ84qJTobd?8BN^8) z9>qAFzV?MM1TuFpKXlGpY?9q%HP-o#wT<3exnxy{;|*g23}-xo@kqusj7Kp(f$?a@ zCo(>XaXQ|PLNYWmV5-+8Fz&r{bX2RYa;aD$+T{&xfC1Sdnr)i^Fy#GCc-{NJeUDgC zDIJeJ5pnwLA%t3F+`S~%gR1K2WH61nWQWxj?#8{@9SC1YHi6CH9ipY2q7S<6u)`&T z7gamH%lcJJ9qpiHIr&dtNKg#q5L~8VeL>4%dIQEWz;7w9o6IIYdfIJ^wd8K8)S>u) z@;5u2g4!vBIaO+>3e#{V)so5zhslGB4l`U*soWry>}G`ApavhjgAPtwOBpHj+#p%!n6InWV(LE@;UQG3~ak) zRY1k)#WOc0V*jUq#(Ycy-gmL&0Pv+-FQ~Bp(?5~?@UoM!O%9of^nc;FhXHZw+UY#{ zi!T()+-{kRL&NsvW>b}9yd)@Mo`t%SBUCKxpayfeF`ITBJ+=j*<}$1G2s3EZY^=2P z7h-Vl)tJ|q-#D+apfQg+F1N>LQTIIz-HncIYYk1tOA|vpNe)YYNRs0(n1q((xXbL= z;nzV(+|+`zuhO;hw#~lLN*Pt~D>9CnMw3|x_8J+Mp2RpU#tk$*Zv5c3;g;8kHffs* zf@`bADBA5}HNA&}RSOB)lZ<<-&-5}xz<3R z;{Z!m3GE9Y_OX`D-uQex3vzWCaYlUHwrFNqqA_b=pVC%1#H0s`k1d>Xjzao57Dom2q7Rm{Ny z7_2uriusaeG_tHTl4U`51r3UK{7{O+w5#@$b*ViVt|#49;Xa_%^S(Ts7sQeb^i~Sv zX^iu%j3>p@*!>xd&te=g9?!U*@tKUL56t3*hLi^4x&kcJ$JBAPQLp8r@9}iSCeb35 z(m9iL`M~JMp&QsCIYU_}@NN)0l-JatJz4d5x-#rS3U`?vR~bSfWf!C)72WBkk#6{$|`yXPVgNT*@n1i zD-|6Z!K~+I*11cqMrmLnE+gBY!5>yl$5BR5>6hXv9B$Jwdg8H(=3x8GMS@Bl79J6) ziJA~Sanj_gv@x-9@d;NaCQZ3!>a=UG)9LidDbr_MKQlG$Q?ovue#7iJpUKFan{{LM zXLD}4`E#HD!Y#QvLtg&8g82(>y=~za7cE}0^!CDKcPuYjap#x5{FS>_es$H?igl~S zl2W5oR=&n`_u2}x#cErJSE7~o+*?&$bKm`6|3=`Etya5H zvP;GnXt7;^FJ}7laK>D2wVUv_S@`W9j8wR;Zf7U>vn#XV!P3RU>TxHndxy_DWqI7qy)L5C2ugX22&mpO#0)5%`1 zb%uJRvAYw*x?R}1bNm@Md7y~!belsWhC!SF+s&XQ;>Pku2Ghtf2Wfz+FQny4pcX!wWt`e(b4eVuT&y!8!oFePmYOBw-Gw+*hA6d-# zy0rQGS0nO{cMk;JeCX$G*OMP-AcXrT|KZ(%6J)De%=rTs&mGmHW{c?908BLnDg`x-8Ru8RcySR>t7A(a25Z$zZfGcM$>}<1#K~I5HzxIOUCJSJJR(a zjW#}Mnm%R5%(PkQv->YUP_Cgn3TX`dV5nDo5pL+0d=88SD_KQzEUY)DM2_Q5{kgQLQ`hQO|vcI|z_!sgcg{`cr0 zpFscZF#$n-{#^#e9keLaC#bc5^Y1(2k6c(QZcoLyBjD(;aG%g{EIKSaIE0G*_7A~h z{9^yrf8O_N<;xT8=M(O;F9Y_kpXbG4yF|P!_`dgi-{8QIQ2(&7?NI?i|3_(!&2sp! zQ;I_p6Zqx!kCes)26XZF`@gxbbAwy9_4f_`ep@FT`YCM#;>v45ctD_kY=H!Z#B}m$ z-_ie=Hel-EqoXh^x>cxuhwXF1yKRqZ6&!pBZZ`HK2Xn&1{#5dZ#9d}-Y?1oMUFzpG zV@Vm<`SZ&0y(av0c)mQ1gTk7|SBM(1Dc^hlB7H{3FsoZ|-8lUOGCIyx(JIzlm@#8#EejP*pd~V`BusXC4XG(pGx!`zyG5yUnmhFRm+cgjL3G!*}9~xgL7Q}WBKIP($jP~ggTeH76fj?hqBH}8| zk8A#o(P{4Y#D$F=6djw$u;{|5QedzbsguSy<=EH z&+#KcbHDeuVf%jcF7XHd_Bi+Ze^*Py|NVXM_(a9F@PBP0FdUbQ8_-L|-R>W0i5oZn zR}S&_??+kv(RRWpkWjY2-2O#5#Mg`XQkY*c=$B;>9})bwN@09L7y6^n|9*=Y|JyAB z|BV)LF5#yBERslQ34cD~gFB1=`5OLLQkqb%KW^EtQ<`v0LbiUn{fm_TK^^!jsZ5AL z{(UMR&HgaP)G(}2u2I+G9J(u2HY?Wib=LKWGyUDlhb-+EsrYvh#P=VuVO-pgZV7w3#RYcX_r!#^3HA5?+3kxT zq9XoEVI)-DpPdbcra#*t)e^rborEX3-x+fLN=g$N*YBk?;h2Ofme4#BLiAsx^bZIt? zfC;foLTr)*_NM_eff#6lDZq4KmV^q1{(O(W;QJGH^Z#N2PDmJ2fBsnV_Y3iVqv}sJ z5ra=cqfXfV&l>Sx>1qB4eu8A-|Aui?J)h9CMe+%@Sh)Fe|lz4cy|8x#^2zv(EI()LwWp& zyDVX5txls~1Vw+PEhMzQU)86$Ioj>Rn|tq&@kjn^23GxIR-ts+I!*s54fhE6x|1%Q@|7imLb;jbqJMAz# zp)Um!hUx?phU5ejhWZ2(hV=Kn<4=&_fBOFk=Sdj)6V8(`jWLd(h(AA%@q45F->4e@ zWT^I60t6y{ndFaa#6J?{J^ziW@yn|4%c4t&0u!RV6eZ$MlnP9U`VykLgeWl~3d{n> zWdn17Il)|DZZHp+7t9Cd2Md4&!9rkRun1TbqRVmUKd5SdBE0+?74GMK(LeKq@Q?IG z|M2+tTT`F^X&>^dJ_G*`3i6*F5r1SUVbn~J!2b1(kyRG_3EBHQn!l^^Z+h;?>X47w_-=`4VsJR9WBXQ2*As;s>X{Ka({%egf-{X0l=* zN8;zU;(mOe+v?D=dVjRX}0w!V-CSwYwVj8An24-RwW@8TKVjh+VON=GKl48lQ4p7tRPkhD~uJviekmE;#di+ zBvuM5jg`U5V&$;%SOu&iRtc+&Rl%xa)v)SV4Xh?s3#*OQ!Rli5u=-d7tRdD2Ym7C) znqtkc=2#2N2WyG>Vt$xE)(UHlwZQ_gwpcr?Jr;-sVZm4k)&UE}!mx0xBi0G)jCH}f zV%@OrSP!fx)(h*6^}+gL{jmPn0Bj&O2pfzI!G>bPu;JJUY$P@c8;y;@#$plJIBYyN z0h@?T!X{%=u&LNIY&tdrn~BZBW@C}q94rcp#$vF!*gR}Lwg6j*Ey5OKOR%NbGHf}v z0$Yi#!d7Ezu(jAaY(2IC+lXz#He*|`t=KkfJGKMciS5F6V|%c@*gkAOb^tqw9l{P{ zN3f&VG3+>Y0y~MF!cJpnu(Q}X>^yb>yNF%FE@M})tJpQ{I(7rQiQU3(V|TE-*gfn% z_5gc`J;EMiPq3%hGweC`0(*(Q!d_!^=4Y`-pwQK4V|7uh_TP;>U3UM>vU7 zIE^zni*q=S3%H0&xQr{fifg!z8@P#ExQ#owi+gw?JTaaGPl_kQljAAylz1vUHJ%1f zi>JfW;~DUbcqTkEo(0c}XT!7OIq;l#E<87$2hWS=!}H?>@Pc?Dyf9t_FNzn#i{mBm zl6Wb+G+qWTi@P>FJyfNMc zZ;Cg=o8v8TAG{^*i~HgJcq_a$-UbiA+v4r;_IMy3ga_jxcn3Tb55vRpj(8`$Gu{R7 zig&}i<2~@6crUy+-UsiC_rv?+1Mq?PAbc=B1RshI!-wM|@R9f^d^A1=AB#uekOuj1G6>-Y`)CVmUQjo-oV;`i|T z_yhbQ{s@1JKf#~k&+zB?3;ZSi3V)5i!QbNV@b~x!{3HGe|BQdZzvACw8$V7E1R}`T z=P8b9Cvp%uiCjc(A`g+5$VcQS3J?W}LPTMr2vL+MMieJX z5G9FHL}{W7QI;r2lqV_>6^TkjWugjEm8eEkCu$HiiCRQ$q7G4)s7KT%8W0VMMnq$x z3DJ~jMl>f{5I#gp!k6$P{E1dXYoZMiK(r;=5$%aUB8UhkLWmATC=o`46CH_8L}#K4 z(Us^%bSHWcJ&9gKZ=w&;m*_|ICk7A$i9y6*VhAym7)A^yMi3*3QN(Cs3^A67AjT2n zi3!9+ViGZ#m_ke?rV-PL8N^It7BQQMB<2uNL^KgY%q8X#^N9t-LShlIm{>wAC6*D( zi50|3VimEPSVOEO))DK84a7!b6S0}tLTn|r5!;C!#7<%tv76XK>?QUQ`-ua@LE;c` zm^eZlC5{oti4(+0;uLY3I76Hz&JpK{3&cg@5^5EVj&Q4v%W6+^{Q2~-l5LZwj| zR2G#(`Z zB46Z({81~^8nrVQH~7z#%nQ76VKs`|})Eo6d zeNjKu9}PeQ(I7M!4M9WEFf<&EKqJv8G#ZUTV^IVehsL7`Xd;?~CZj26Dw>9-qZw!> znuTVgNHhmUp=cC?=AwCMK3aelqD5#iT7s6MWoS8CfmWhbXf;}c)}nQ2J=%aaqD^Qs z+Jd&CZD>2%fp(%@XgAt}_M&}gKRSR8qC@B~I)aX(W9T?Kfli`R=rlTm&Z2YZJi34` zqD$y9x`M8vYv?+NA&P}G0GoD{QHxa`02Uip9=YTIHLfHpajaG z0;*u_di3{?4jP~dTA&R&V8RlX2iu9j#9$IIDVPjQ4yFK8f~mmNU>Yzjm<~)2#w`iN zeKN)q8yCfYGJ{#btY9`UJD3B^3FZQGgL%NbU_LNESO6>t76J={MZlt9F|asT0xSuZ z0!xEsz_MUDusm1+tO!;DD}z9BcvlfGt5^&=2$nTY;^?HedkQ7HkK$2Lr(%Fc=I0JAk2J7#I$A1UrG9!7gA| zup8JN>;d)!dx5>dK44$4AJ`up01gBPfrG&z;81WFI2;@Sjs!=6qroxYSTF({2aX3P zfD^$<;AC(LI2D`*P6uazGr?KlY%mg>14e<-U<^1HoCnSa7k~@FMc`s^3Ahwo1}+Cz zfGfdO;A(IUxE5Rot_L@O8^KNBW^fC*72F1H2X}xw!Cl~Pa1Xc_+z0Ll4}b^3L*QZX z2zV4c1|A1bfG5FI;A!v-cosYdo(C_07r{&5W$+4k6}$#s2XBBk!CT;M@D6wvya(O~ zAAk?RN8n@d3HTIz20jO0fG@#U;A`*=_!fKzz6U>mAHh%HXYdR775oM!theKM{0;&{ zAPG_+4Kg4Lav%>1pa@E!3@V@sYM>4ppb1)_4LYC;dSD_jF_;8Q3MK=SgDJq2U@9;* zmrUTQ18NiHSCNMLY1w^uzhF~MG zG1vrb3N{0qgDpTGuqEgV`hosnE3h@#1`Gh(g6+WeU?3O-27@7B2QU;21H-|NU?;FM z*ahqgb_2VEJ;0t|FR(Y*2kZ;>1N(ymz=7Z(a4DtBG&lwv3r2wB z!13S&a3VMfoD5C@r-IYK>EH}-CO8Y64Mu`pi~;9@^T7Gw0&pR?2wV&<0hfZy zz~$fya3#13Tn(-P*MjT7_233@Be)6N3~m9pg4@9D;0|ynxC`73?g96L`@sF+0q`Jr z2s{iP0gr;mz~kTv@FaK&JPn=!&w}T`^WX*WB6tbB3|;}Rg4e+7;0^F5cniD@-U07| z_rUw$1MngE2z(4a0iS};z~|r#@Fn;Pd=0(<--7SJ_uvQcBlrpY41NK>g5SW{g@NxM z93(&l{?lt#GMrZdRZs(U&;U))0&UO%UC;v)fr-H+U{Wv{m>f(2rUX-gslha0S}+}$ z9?Sq{1T%q|!7N}_FdLX1%mL;EbAh?RJYZfhADAC302TxbfrY^$U{SCbSR5f9;^UX1S^4+!75-?uo_q$tO3>pYk{@FI$&L}9#|i205$|0fsMf?U{kOe z*c@yD`hYD#U(gTq2U~%y!8Tw3*cNODwg&^jATSsV0Xu-9U>FzdGO7&sgp0gePmfuq4O;8-vM90!gECx8>d zN#JB~3OE&<22KZOfHT2a;A}7woC8LI(O?WX7n}#q2N!?~!A0O=a0$2+Tm~)&SAZ+Q zRp4rH4Y(Fu2d)PS;!9(C-@CbMm zJO&;IPk<-EQ{ZXv40ski2c8EnfEU3_;AQX%con<`UI%Z0H^E!rZSW3w7rY1F2OoeB z!AIa@@Co=7d#4fqy(2fhbCfFHq6;Aij)_!ayH#=aI1KmLORh(Hpg zKpJE~7UV!46hIM_Kp9j(71Tf-G(Z!yKpS*G7xchHU}7){m=sI~CI?f1DZx}=YA_9$ z7EA}G2Qz>f!AxLgFbkL!%m!u$bAUO)Twrc6511Fs2j&M0fCa%qU}3NbSQIP<76(g! zCBaf)X|N1f7Ayyr2P=RT!Af9dunJfetOiyGYk)PuT3~Ik4pCf2YY}$!Cqi*un*W5><9J-2Y>^?LEvC;2sjiR1`Y>DfFr?C;An6RI2Mcm$ARO) z3E)I<5;z&00!{^|fz!bm;7o89I2()v=YUaQG#CTU1?PeD!3E$#a1po|Tmmiymx0T{ z72ry66}TE)1Fi+vf$PBy;6`v0xEb66ZUwi2+rb^+PH-2v8{7l#1^0pb!2{qy@DO+y zJOUmCkAcU*6W~ek6nGju1D*xXf#<;s;6?Bfcp1C`UInj#*TEa$P4E_Y8@vPF1@D1B z*5ZHT<-Xjx{`Sj#>DAwQxvyG^*ugvY{T}RpFZccL<-Y&D-1oni`~LTG-~V3j``^ob z|1ZDXcON}K578s^7(GEx(KGZMy+AL~EA$$@L2uDJ^d5acAJHfD8GS)t(YM%vA14VC zkt9ixG|7-G$&oxMkRmCOGO3U%sgXKqkS1x7HtCQq>5+-Z#AFgODVdB+PNpDJlBvkl zWEwIpnT||PW*{??naIp!7BVZDjm%EwAajzr$lPQeGB25r%uf~|3zCJ%!ekM$C|Qgw zPL?1`lBLMfWErw7S&l4ERv;^qmB`9u6|yQ>jjT@AAZwDf$l7EbvMyPVtWP!|8GKQQ>&LiiO3&@4!B62aggj`B4 zBbSpa$d%+Oay7YzTuZJa*OME_jpQbBGr5J_N^T>!lRL%5N`50T3a1DP zQ6xoCG{sOX#Zf#ZP$DH!GNn)|rBOO%P$p$jHsw$*K#8eV0DV2;$PNkqyQmLra zR2nKRm5xeJWuP)rnW)TE7Ah;1jml2tpmI{VsN7T@Dle6f%1;%b3Q~or!c-BeC{>Iq zPL-faQl+TUR2ix)RgNl8RiG+Tm8i;86{;##jjB%7plVXJsM=H=sxDQJs!uhb8d8m@ z##9rkDbPPL$XsFsv39j(jnpP;Gqr`NItRI!m3S&Qlkti_|6RGIfQzN?oI_Q#Yua)Gg{Zb%(l3 z-J|YP52%OKBkD2rgnCLnqn=YQsF&0$>NWL-dP}{d-cui_kJKmXGxde~N`0d+8m9>w z(IidLG|kW~&Cxt9&>}6-GOf@mtrmDV>Z?PN$$#(y8dw zbQ(G>osLdVXP`6Endr=P7CI}Pjm}QzpmWl>=-hN3Ixn4%&QBMh3(|$?!gLY3C|!&$ zPM4rd(xvFqbQ!uVU5+kKSD-7>mFUWJ6}l>2jjm4Dpli~#=-PB0x-MOhu1`0h8`6#F z#&i?9Dcy{2PPd?a=$5oE?MM65t?1Tt8#;h)OShxj(}8pl9ZZMN9q3Rxj1H$e(w*qe zbQiiS-Hq-}_n>>yz3AR_AG$BykM2(opa;@}=)v?5dMG`N9!`&-N7AF{(exO4EFD3Q zqsP+|=!x_sdNMtQo=Q)nr_(d≠4rHXTXNp`++%I)v&$^e%cgy@%dQ@1ytA2k3+J zA^I?Vgg#0iqmR=k=#%s*`ZRrpK1-jY&(jy^i}WS>GJS=Lgw^ey@}eTTkF z-=pu-59o*VBl`ZfKAeoMcj-_sxHkMt+{GyR4BN`Ip<24@Hc zF(gAVG{Z0~!!bM~Fd`!{GNUjmqcJ*TFeYO$HsdfZ<1vYt#7q(7A7l`jmggBU~)3KnA}VrCNGnZ$A#9hguij0tBtGM$*t zOc$mr(~arQ^k8~2y_nuiAEqzUkLk}0UE{naWIKrZY2`nanI^HWSIrVWOC5CWe{I%wy&=3z&t>B4#nOgjvcg zW0o^3n3c>bW;L^hS<9?r)-xNJjm#!yGqZ)+%4}n{Gdq}_%r0g(vxnKs>|^#b2bhD* zA?7f1ggMF_V~#T?n3K#Y<}`DLIm?`5&NCO7i_9hFGINEw%3NcvGdGx<%q`|NbBDRh z++*%D515C{Bjz#lgn7z5W1cfFn3v2e<~8$%dCR#>R0#B35aDVvN<&Zb~fvZ>hA zY#KH#n~qJ-W?(b2nb^#17B(xJjm^&HU~{s$*xYO$HZPlx&CeEK3$lgS!fX+?C|isz z&X!>&N=Dt=QIV8#aJ#%eG_Nvw>_78_b5V9oSGdj16ZyvYpt@ zY!|jG+l}qc_F#Lmz1ZGtAGR;skL}M6U@IdUyNBJ&?qm0}2iSw` zA@(qPggwe0V~?{Z*puuj_B4BjJ@D^-dxyQt z-ed2x57>w7Bla=-gni0BW1q7x*q7`p_BH#4eapUM-?JaskL)M*Gy8@8%6?-p4(A9C zaU@4^G{`)8!mur%eCX$bAenC7tDol9k@^~j0@*Fa-F!& zTooj z@Fs8ZHt+B*@9~NF#C#GyDW8l_&Zpp0@~QaL zd>TG2pN>z@XW%pPnfT0n7CtMVjnB^K;B)f1_}qLRJ};k-&(9a&3-X2d!h8|FC|`^( z&X?dz@}>CFd>OthUyd)&SKur1mH5hh6}~E8jjztv;A`@=_}Y9OzAj&nug^E&8}g0# z#(WdLDc_85&bQ!w_?Emc@5lS|t@ze_8$N(<%eUj(^MQO2AIyjF9r#c_j1T8K@}2n3 zd>6he-;M9i_uzZ-z4+dIAHFZ&kMGY9;0N-9_`&=TekebTAI^{9NAjci(fk;GEFZy- z_U(2uK*Yg|rjr=BlGrxu3%5USh^E>#R{4RbszlYz;@8kFL2l#{h zA^tFbgg?q3=r8{xpAvKg*xv&+`}fi~J@2GJl1?%3tHJ^EddL{4M@Ae}})z z-{bG|5BP`tBmOb}gn!CE|gzzMt{2%;bfvY-g6pb5HQ2&P~Ow%`b^;0cL@#6l7wsgO)aE~F4r3aNzD zLK-2hkWNT1WDqh6nS{(j79p#UO~@|f5ONB+gxo?NA+L~6$S)KS3JQgU!a@R1hi(m4wPd6``t7O{gx^5NZmwgxW$Kp{`I*s4p}S8VZes z#zGUJsnASlF0>GQgqDJ@;3xPCt%TM>8zDewE3^~Z3xPtA5G;fU9fVLJOb8b`3Y~<` zLKmT{&`szr^bmRqy@cLEAEB?%Pv|cU5C#f^gu%iPVW==n7%q$uMhc^Z(ZU#EtPmlL z6UGY@go(l=VX`nqm?}&YrVBHKnZhh#wh$@I5u$`>Ax4-h%oFAd3xtKjB4M$xL|7^; z6P61rgq6Z7VYRSESSzd()(abijlw2jv#>?jDr^(B3p<3J!Y*OAut(S{>=X722ZV#d zA>puaL^vuO6OIcfgpeY8;Xs@ z#$pq(sn|?xF18ST#FnD3=qLJ%t;E)18!}L|iH^ z6PJrC#FgSIakaQcTq~{<*NYp(jp8P8v$#dvDsB_Ei#x=f;x2KwxJTS8?i2Tm2gHNo zA@Q(yL_8`U6OW50#FOGF@w9kGJS(0P&x;qti{d5mvUo+jDqa(>i#NoZ;w|yEct^Y| z-V^VO55$M!Bk{5LM0_ef6Q7GO#FyeL@wNCyd@H^a--{o_kK!lsv-m~)Dt;3&36}^7 zNu)$cw8TiP#7Vp)NTMW3vZP3=q)EDDNTy^-w&X~zeNR8A@{Rgfx5m88m26{)IJO{y-{kZMY`q}oy)sjgH{sxLK=8cL0% z#!?fhsnkqrF13(+q?VGey`0ibX|gm$nkr3`rb{!VnbIt2wiGGNk)ot%DMp$r&6DO!3#5h8B5AR-L|Q5> zla@;>q?OVtX|=RQS}U!S)=L|tjnXD*v$RFpDs7XtOFN{U(k^MYv`5-2?UVLP2c(12 zA?dJmL^>)Rla5O#q?6Jq>9ll4IxC%%&Px}hi_#_OvUEkdDqWMVOE;vO(kbVs@? z-IMN152T0EBk8g9M0zSclb%a2q?ghw>9zDmdMmw?-b){(kJ2aUv-Cy!Dt(hM8J7te z$)rrlw9Lq?%*nhg$f7LCvaHCetjW4;$fj(`w(Q8R?8%Ab#BvfjshmtsE~k)F%Bkek zavC|UoK8+JXOJ_>ndHoJ7CEb&P0lXokaNnp?ixnt>o5n8#zF3E4P!|%Ykx`94v>(9pq3sOb(Yj%AMrS zau>O)+)eH-_mF$az2x3>Dyfvz zN*X1tl1@plWKc3HnUu^*7A32aP06n0P;x4{l-x=lC9jfC$*&Yp3Mz$^!b%aPs8UQR zu9Q$pDy5XtN*SfBQcfwaR8T4^m6Xa#6{V_DO{uQbP--f*l-f!irLIy>sjoCp8Y+#H z#!3^VsnSeouC!2ml$MIG;-~m4t(4YE8zn$#tF%+vD}hRo60C$M9h6WdObJ&yDxH+h zN*AT8(oN~E^iX;#y_DWcAEmF-PwB4=PzEZ4l)=gnWvDVt8Lo^_Mk=F}(aIQQtP-J& zQ^qS3l!?kDWwJ6wnW{`vrYkd)naV6>wi2n#QKFP+B}SR6%v0to3zUV*B4x3%L|LjV zQ{IqD2b6=# zA?2`gL^-M)Q;sVql#|LS<+O4}Ijfvg&MOy`i^?VCvT{Yas$5g9D>syz$}Q!#a!0wV z+*9r=50r<>BjvI3M0u(_Q=Tg?l$XjY<+bugd8@or-YXxJkIEZys;#A*^XshUhpuBK2^s;Sh} zY8o}InodoxW>7P#nbgc`7B#DyP0g<6P;;ue)ZA(wHLsda&94?v3#x_G!fFw)@e z#%dF_soG3!uC`Ep)RwBR>Zkgvt<=_P8#O>}tF}|ytAT2e8mxw>9n?@YObu5%s-4u% zY8SPu+D+}Q_E3AOz0}@nAGNRAPwlS`PzS1m)WPZyb*MT_9j=a0N2;UL(drm=tQw(? zQ^%_l)QRdOb+S4|ovKb#r>is6nd&Tcwi>CMnJ+x<}os?o;=x2h@Y= zA@#6&L_Mk=Q;(}B)RXEd^|X3MJ*%Em&#M>Ii|QryvU)|ms$NsCt2fk}>MixQdPlvh z-c#?Z57dY1BlWTRM1870Q=h9Z)R*cj^|ks&eXG7x->V34c7<_ zX{1JJw8m(x#%a7JXrd-*vZiRNrfIrnXr^Xqw&rNA=4pwv#99(9sg_JjuBFgYYN@o; zS{f~_mQG8rWzaHenY7GW7A>omP0Oz3&~j?IwA@-AEw7eO%dZvC3TlP4!delns8&oX zu9eVAYNfQ&S{bdZR!%FgRnRJGm9)xQ6|JgPO{=ce&}wS6wAxx7t*%y2tFJZC8fuNS z##$4tsn$$uuC>s7w3eE$=BN2KG&C}*<3$%sWB5kp@L|dvY z)0S&1w3XT_ZMC*WTdS?p)@vKIjoKz{v$jRss%_J@Ydf@^+AeLkwny8m?bG&a2egCQ zA?>hsL_4Y-(~fH=w3FH??X-4AJFA`3&TAL6i`pgavUWwgs$J8rYd5r;+AZz2c1OFb z-P7)C544BcBki&FM0=_|)1GTDw3pf|?X~tsd#k7-8Sw9e?P&gr}^=%Oy^vaaZ=uIajN=%#Mzw(jVz?&*p2#Cj4vsh&(vuBXsb>Z$b9 zdKx{go=#7%XV5e1ne@zh7Coz;P0z09&~xg!^xS$LJ+GcmI3+jdR!g>+Cs9sDj zu9whD>ZSD3dKtZ}UQREsSI{f!mGsJb6}_rnO|P!k&}-_o^xAqIy{=wQudg@I8|sbp z#(ERIsoqR)uD8&A^p?7>?x*|ft@PG<8$Cd8tGCnJ>w$WZ9;}Dx9rRE=Ob^#P>Yen? zdKbN`-c9eW_t1Olz4YFCAHA>MPw%e}&W(fSyDtRA6{ z)5q%*^ojZ;eX>49pQ=yOr|UEHnffe!wjQa^(WCTeJw~6a&(r7Y3-pEhB7L#GL|>{e z)0gWj^p*N5eYL(uU#qXv*XtYfjrt~iv%W>&s&CV`>pS$F`YwI9zDM7y@6-3|2lRvb zA^os^L_ew@(~s*X^ppB2{j`2YKdYb9&+8ZTi~1$~vVKLss$bKu>o@e9`Yrvoen-En z-_!5w5A=unBmJ@dM1QJ3)1T`v^q2Z8{k8r^f2+UK-|HXrkNPM5v;IZ@s(;fl12+f* z8KglOw80px!5O?E7@{E=vY{BNp&7bi7^Yzvw&57A;TegH#6}V$sgcY`Zlo|$8mWxb zMj9inkX&B$)#Fmf8XjNC>ZBd?Lq$Zr%d3L1rs!bTCJs8P%) zZj>-e8l{ZVMj4~5QO+oDR4^(Um5j?^e}oFy^P*QAEU3)&**OqFa{cfjKRhbW2iCA7;cO(MjE4x(Z(2KtPx?1 zGsYVejETl1W3n;Dm}*QjrW-SinZ_(*wh?K}F`|rUBgU9(%roX23yg)vB4e?!#8_%9 zGnN}GjFrYJW3{oySZk~^)*Bm)jm9Qpv$4h4YHTyM8#|1h#x7&GvB%hJ>@)To2aJQp zA>*)d#5igkGmaZ4jFZMG3FYJ4*=6E_JH znWRaXw8@yP$(g(6wYl#AXsRshP}7Zl*9(nyJjx zW*RfCna)gaW-v3Fnas>)7Bj1v&CG7*Fmsx@%-m)kGq0J?%x@Mj3z~(@!e$Y(s9DS` zZk8}hnx)LrW*M`rS1Xnw`wf zW*4)o+0E>3_Aq;zz0BTbAG5F7&+KmwFbA50%)#ambErAY9Bz&);%<}P!$xyRgV?lbqB2h4-! zA@i_##5`&qGmo1m%#-FR^R#)!JZqja&zl#_i{>TsvU$b4YF;z1n>Wmx<}LHKdB?nK z-ZSr;56p+=BlEHO#C&Q#GoPC;%$MdX^R@ZLd~3cl-NoA9js6*%nG+UTAi%U zRu`+Q)y?W|^{{$cy{z6=AFHp`&+2atum)O#tije0Yp6BM8g7lSMp~n+(bgDitQBF6 zv&LH!tclhnYqB-Pnrcn6rdu|8_jn*b>v$e(AYHhQ&TRW_s)-G$ewa40P?X&h<2dsnE zA?vVp#5!snvyNLQtdrI$>$G*oI%}P?&RZ9(i`FIUvUSC}YF)FgTQ{tm)-CI{b;r7E z-Lvjn53GmQBkQsC#CmEyvz}Wote4g+>$Ua9dTYJ2-di86kJcyav-QRLYJIaX8@CA? z*`!U`w9VM8&Dp#y*rF}jvaQ&vt=YP5*rsjSw(Z!i?b(U!#C8%psh!MDZl|zQ+Ntc+ zb{adaoz6~gXRtHcne5DV7CWn*&CYJ;uyfkE?A&%9JFlJ3&Tkj63)+S3!gdk6s9nr1 zZkMo2+NJE$b{V^@UCu6VSFkJEmF&uP6}zfk&8}|Ouxr}2?Amr6yRKc&u5UN68`_QR z#Csol(OZnv;~?3T8#?PvSjt?brz8#};mYqzu8+ktkF9c+i#9qdp$%nr9Z+MVpq zb{D&=-OcW9_pp1~z3kq0AG@#J&+cyzum{?M?7{XBd#F9k9&V4YN7|$8(e@a7tQ}#G zv&Y*L?1}ayd$K*no@!6Cr`t2^nf5GuwjF8Dv7_v0JI0=C&$H*-3+#pVB73pD#9nGI zvzOZ|?3MN^d$qmBUTd$j*V`NHjrJycv%SUMYHzc*+dJ%?_AYz3y~o~b@3Z&Y2ke9P zA^Wg>#6D^tvya;+?34B>`?P(=K5L(|&)XO5i}oe^vVFzAYG1Rj+c)f+_AUFieaF6Q z-?Q)A5A28bBm1%a#C~c&v!B~9?3eZ{`?dYXervz8-`gMTkM<|~v;D>XYJama2X_bu zIiy25w8J>8!#TVoIHDstvZFYvqdB@`IHqGcw&OUi<2i|(#7+_?sguk}?xb*1I;ou0 zP8uhzlg>%+WN6{o6G&8hCxaB4cWoZ3zur>;}asqZv!8aj=f z#!eHbsng79?zC`xoR*HS$G#)JAqD+6YPXI9h^`n%n5foI-Q)( zP8X-E)6MDb^l*AQy`0`oAE&R=&*|?Ba0WVqoWafzXQ(sG8Sad5MmnRM(asoWtP|mk zbH+OpoQcjPXRTGkiJ3E}6&Ms%Sv&Y%%>~r=z2b_b> zA?L7j#5w95bB;SFoRiKe=d^RiIqRHr&N~;Ji_RtIvUA0`>RfZKJ2#x0&MoJ*bH};s z+;i?b51fb2Bj>U6#ChsGbDlddoR`ik=e6_3dF#A$-a8+hkIpCOv-8FI>U?uB7k3F4 zxui?Ew9B}x%elNOxS}h$va7hNtGT*sxTb5lw(GdA>$!>C#BLHdshiAA?xt{4x~bgM zZW=eOo6b$|W^gmQncU277B{P#&CTxSaC5r3+}v&+H?Nz|&F>a)3%Z5e!fp|_s9Ve} z?v`*%x~1IGZW*_%Th1--R&Xo2mE6j16}PHe&8_a%aBI4?+}ds(x2{{yt?xE)8@i3$ z#%>e0soTtL?zV7!+?KAd>*xBrt=!gb8#lmh>$Y>-yMbTYwlyF1*S?k;z?yT{$@?sNCM2i$}1 zA@{I*#69XBbC0_x+>`Dp_q2P)J?oxx&$}1gi|!@&vU|n7>RxlNyEojM?k)GWd&j-& z-gED}58Q|DBlofU#C_^MbDz5}+?Vbv_qF@Ree1q+-@6~&kM1Y;v-`#U>V9)E5BCTU zd89{qw8wa?$9cRbc%mnHvZr{er+K<(c&2B0w&!@R=Xr^|#9k6Fsh7-4?xpZjda1nB zUK%g0m(EM?W$-e3nY_$i7B8!p&CBlP@N#;&yxd+MFRz!+%kLHN3VMaS!d?-ts8`G@ z?v?OLdZoP5UKy{fSI#T%Rq!f$mAuMc6|bsS&8zO!@M?OsyxLwJudY|mtM4`N8hVYq z#$FSzsn^VF?zQlIyq2D?=jZu*t-RJ=8!y0X>$UURdx2h%7wm<29lTI4%nSEAdY!z^ zUKg*c*Ujth_3(Ony}aIDAFr?1&+G3E@CJH=yuscOZ>TrS8}5zpMtY;X(cTzutQX;p z^TvATUD3dpo?H-Y##qx5wM-?eq3~2fTyc zA@8tv#5?L8^NxEbyp!H3@3eQuJL{eE&U+WUi{2&gvUkP1>Rt1$dpEqB-YxI8cgMTy z-Sh5y54?xoBk!^I#Cz&J^PYPzyqEtUY4_A6Mi;j2I#1F;M;#|Ui?eFh?AW$#+qP}n zwr$(CZQJI0$9m5-))?RJckdiO;jF5StHM8pe-8f={x$qt`1kN1;XlKFh5ru!6aF{+ zUw9<(pNK4?h!7D~L=(|P3=t|~idZ7Hh$G^PFcDA07YRf{kw_#KNkmeSOoR&|3}FgO z*uoL6kirwbP$CdoM2O@fg-9tvWe^>hsY^%iQFQO z$Sd-R{Gxy;C<=+fqKGIeiizT)geWOWiPEBsC@ac|@}h#MC@P7{qKc?0s)_2NhNvlO ziQ1x$s4MD;`l5koC>n{zqKRlKnu+G3g=i^SiPoZxXe-)@_M(I6C_0JGqKoJ%x{2U~m@DRq`C@@sC>DvuVu@HPmWkzJg;*(8iPd6_SS!|v^5U9VMkAAv*~nsKHL@AmjT}Z!BbSle$YbO+@)`M!0!BfjkWttuViYxs z8O4nfMoFWTQQ9bDlr_p3<&6qPMWd2Y*{EVvHL4lajT%Nxqn1(IsAJSM>KXNo21Y}p zkSk+IlVVk|Y58Ox0o#!6$AvD#Q;tTomd>x~V@Mq`t)+1O%i zHMSYsjUC2LW0$ep*kkN9_8I$)1I9t)ka5^JVjMM&8OMzi#!2IpaoRXzoHfoF=Zy=- zMdOlj*|=g{HLe-gjT^>IncW-2qanZ`_OrZdx<8O)4kCNr~{#ms7EGqamH%$#N}Gq;(?%xmT|^P2_Cf@UGJ zuvx?`Y8Eq#nzfVChGrwPvDw6IYBn>Qn=Q}&Qj`R`By+Mk#hhwR zGpCy~%$epabGA9hoNLZA=bH=6h2|o2vAM)tYA!REn=8zf<|=cwxyD>;t~1x08_bR7 zCUdj7#oTIcGq;;N%$?>gbGNz2+-vSL_nQaIgXSUguzAEhY92F>nGq0OB%$w#d^R{`%yldVw@0$O*BU%4hk*z3Jh!xd} zW<|GRSfN%-E0z`8ietsK!mM~!d@F&K&`M+_wvt#$tz=fXB`m`-Ez7bk$8s%cd6sV} zE3mW`VI{XxSShVkR%$DamDWmUrMEIz8LdoKW-E)8)yigNw{loHtz1@aE02}e%4g-b z3RnfLLRMj`h*i`oW)-(eSS77eR%xq@Rn{tJmA5Kb6|G8EWvhx+)v9Jyw`y25ty)%X ztBzIIs%O==8dwdjMpk31iPh9Sy)023P~FLDpbvh&9w2W(~JSSR<`b)@W;tHP#wujkhLP6RkyCBTx@XyP!<`e#M5|Fa|8 zQS1;qsvXUaZpW}g?U;5fJGLFij%$b6@$C3^0z09d$WCl0v6I@#>~LGyhHcuGZQG9R z+S2xH-&S^DYdgYDZl|zQ+Ntc+b{adaoz6~gXRtHcne5DV7CWn*&CYJ;uyfkE?A&%9 zJFlJ3&Tkj63)+S3!gdk6s9nr1ZkMo2+NJE$b{V^@UCu6VSFkJEmF&uP6}zfk&8}|O zuxr}2?Amr6yRKc&u5UN68`_QR#Csol(OZnv;o+O6!?b{o5`-Og@rcd$F!o$Stb z7rU$7&F*gZuzT9Q?A~@CyRY5P?r#sU2ik+|!S)b)s6EUcZjZ1>+N12z_85DtJn zZlADE+NbQ(_8I%Eea=2_U$8IQm+Z^-75l1v&Ax8muy5M8?A!Jo`>uV@zHdLUAKH)X z$MzHZsr}4;ZojZ!+OO=__8a@H{my=Gf3QE=pX|@}7yGOI&Hirxuz%XW?BDhu`>*}a zj^zC3M0TP$Ax=~$niJiL;e33K8(@tp)tLMM@v*h%6fb&@&Zj&Kae zbS%eq9LIH}<2k;goWRjegp=G!;iPm@IjNmAPFg3OlitbTWOOn)nVl?7RwtX2-O1tP zbaFYlojgunC!dqwDc}@z3OR+HB2H1Km{Z&-;gobrIi;O4PFbg%Q{JiIRCFpim7OY1 zRi~O$-KpW!bZR-ZojOikr=C;aY2Y+;8aa)fCQehQnbX{9;k0yGIjx;GPFts))86Ue zbaXm7ot-XDSErlP-Ra@P;m7CDQZCC*Z3nX}wk z;jDC4IjfyD&RS=kv)j*Ip7?04mpRN zBhFFhm~-4Y;hc0%Ij5a7&ROT2bKbe&Ty!oumz^ulRp**>-MQi1bZ$AfojcB5=bm%l zdEh*B9yyPlC(cvnne*Iv;kq^&ieOI}GtKA4UxtqdG>85g1yJ_6CZaO!;o59WKW^yyTS=_8{ zHaEMQ!_DdDa&x6UU!yJg(6ZaKHSTfwd9 zR&p!5Rotp>HMhE3!>#Goa%;PF+`4W(x4zrJZRj>~8@o;1rfxI0x!b~R>9%rPyKUUI zZacTV+rjPVc5*wrUEHp2H@Can!|mzza(lad+`eu5g(oyJOt3?l^b6JHegkPI4!^Q{1WUG8^5DyKCIF?mBn9yTRS)ZgMxfTimVgHg~(b!`HxO>7q>7H^=yJy_9?m73od%?ZvUUDzHSKO=aHTSxE!@cR=a&NnL z+`H~Q_rCkUeds=NAG=T7r|vWNx%>ArGbyKmgL?mPFr`@#L_esVv%U)-R+u$>=hM43#luEE!wIk#S|1j3?vE1TvvaBooUd zGO0`^!=;dhG^HhN=}1>f=}BKI8AvT7WOA88rj)586BWN}$SmXxJrX<0^=mE~l4SwU8mm1JdE zMOKy7WOZ3X)|9nmZCOXwmGxwO*+4dwjbvllL^hSpWOLaMQ)Yb-L>`sL{fH7sU(lqI%K1 z=w1vj)QjoG@?v{&ytrPN7tf3DCGZk@iM+&K5-+Kj%nSE~XLzP(dA8?xt|vXu^F8GS zp7tWV7P zub@}RE9@2Vih9Mo;$8`_q*ux-?UnJ$dgZ+GUInkBSIMjFRq?8N)x7Fn4X>tG%d73x z@#=c@y!u`Puc6n-YwR`gntIK==3Wc0rPs=9?X~gRdhNXSUI(wE*U9Vbb@94--MsEz z53i@!%j@m+@%noGy#C$*Z=g5G8|)47hI+%i;ob;uq&LbN?TzuqdgHwD-UM%=H_4mq zP4T9B)4b{43~#14%bV@Z@#cE-y!qY&Z=tuyTkI|ImU_#)<=zT!rMJpk?XB_Fdh5LP z-Ue@@x5?Y=ZSl5x+q~`G4sWNo%iHbk@%DQAy#3w*@1S?cJM10tj(W$ulC-f8fiTxygQa_m=?hD`WP2ciu-|<~v z`kwFm$`5?)NBGJ86n;uSm7m&A?!e~>@eAL0-7hxx<(5&lSjlt0=Zn`Q!Zw z{zQM0KiQw+PxYty)BPF#On;U?+n?jl_2>EX{RRF)f04h~U*a$Im-);675++pmA~3w zPJfrb+u!5w_4oPv{R93%|B!#!KjI(tkNL;_ z6aGp6lz-Ykgff(=EM+T4 zxk@Te`AVrkX%(T8s}w4wN~Kb(G%Br1r_!qoDx=DzGOH{qtIDRbs~jq)%B6CvJSwls zr}C=;s-P;Q3acWjs4Aw4s}icDDy2%RGODa9r^>4es-mi-Dyu50s;Z`{s~W1Ns-^r`oFys-xZN+CKB}+kr~0b_YM>gV2CE@zs2Zk*s}X9X8l^_7F>0(Dr^c%ZYNDE?CaWoGs+y*z zs~Kvhnx$r|Iclz&r{=2#YN1-B7ON#{samF%s}*XcTBTO2HEOL|r`D?tYNOhuHmfaa ztJYzHL4yz;Ts5+*Os}t&^I;BplGwQ55r_QSj>Y}=& zE~_i*s=B7Gs~hU3x}|QbJL;~wr|zo<>Y;k19;+wnsd}cKs~75}dZk{gH|ni=r{1d% z>ZAIkKC3V4tNNzCs~_s8`lWuWKkBdgry>Ra1(Aa&K}Zlah!#W-Vg#W<%pg_}JBSm+ z4Z?zWLHr;=kT6ITBo2}UNrPlTcpw5JFas;F11E3;8F+yos2~V*5D_E~QUoc3R6*(> zO^`N77o-m|1Q~-&LFOP!kTu8_WDjx#IfGn5?jTQ)H^>*{4+;bYgF->!ph!?OC>9hC zN(3c?QbFmUOi(r`7nBbw1QmlyLFJ%IP&KF)R1azdHG^6~?VwIjH>elX4;lmwgGNE) zph?g)XcjaNS_CbFRzd5aP0%)I7qkyL1RaA;LFb@L&^725bPsw2J%e6B@1Rf6H|Q7i z4+aDSgF(UIU`Q}D7#0i|jnXH<%a94;BOqgGIsOU`envSQab~Rs<`9Rl(|DO|Ujt7pxC91RH}*!RBB~ur=5g zY!7w>JA+-p?qE-_H`o{K4-NzegG0gL;7D*ZI2IfaP6Q`|Q^D!rOmH?h7n~0+1Q&x# z!R6pea5cCVTn}ypH-lTj?ch#uH@Fwv4;};$gGa&R;7RZ_cosYlUIZ_LSHbJxP4G5& z7rYNX1RsM>!RO#h@HO}rd=GvEKZ9Sv@8D1HH~1Gs(*NnmI*Ja_QFSyOUB}R&I;M`L zW9v9Nt`5`jbbOsaC)9~_Vx2@M)yZ_Y7TVCJwzRDs?P{q#?Q5k2t#yP>u2blgI+aeX z)9AE1oldVa=!`m(&aAWOtU8;{u5;*|I+xC^^XR-fpU$rf=z_YCF06~_qPmzau1n~W zx|A-h%jmMYoGz~`=!&|MuB@x*s=Au4u50L;x|Xi3>*%_=p02MO=!UwHZmgT=rn;GK zu3PAqx|MFN+vv8soo=r?=#ILR?yS4$uDYA*=z)5W9;}Dx zp?a7eu1DyRdXyfm$LO(ooF1~ulk$*u7BvC`j`H# z|LDK^pNsJnOoM4L9j3<&m=QB!X3T_y7RM4;5=&ueEQ4jS9G1rlSP?5>Wvqf#u^Lv#8dwu+VQs8~ zb+I1S#|GFC8)0K?f=#g*Hpdp&5?f(wY=dpF9k#~~*bzHnXY7Jqu^V>B9@rCmVQ=h% zeX$?*#{oDH2jO5GfxDhwuX54~XaT{*O9k>&B;cnc6 zdvPD`#{+l}58+`vf=BTf9>)`S5>Mf2JcDQP9G=Guco8q*WxRq{@fu#o8+a3M;cdKw zckv$H#|QWjAK_zsf=}@oKF1gM5?|qKe1mWC9lpm8_z^$hXZ(U+@f&`}ANUi0;cxtd zfAJsw&)Dq$d;ei%jDjH;6{BHvjDevT6Jud)jDvA84C7&ZOn?b75hlhYm=u#?I0`h- zLqLqPRxb5F%Ra& ze3%~#U_mT|g|P@0#bQ_-OJGSXg{83!mc?>d9xGr)tb~=Z3RcBxSRHF%O{|5ru@2V7 zdRQMDU_)$#jj;(f#b($XTVP9Ug{`p-w#9bX9y?%1?1Y`M3wFhB*d2RdPwa)gu@Cme ze%K!e;6NONgK-EB#bG!cN8m^tg`;r{j>T~}9w*>LoP?8c3QomoI2~u;Oq_+YaSqPK zc{m>z;6hx4i*X4q#bvl0SKvxqg{yH5uElk@9yj1d+=QEP3vR`2xE*)kPTYmNaS!gr zeYhVF;6Xfuhw%s=#bbCJPvA*Bg{Schp2c%`9xvcUyo8tW3SPx)cpY!xO}vG-@eba_ zdw3ro;6r?bkMRjU#b@{&U*Jo8g|G1qzQuR=9zWnm{DhzJ3x36K_#J=XPyB_y@elsR zfA~LB^Z(m_jEqq*1fya!jE*rd6k}p6jE!+HE{0(|jE@O0Atu7am;{qzG7Lw72AXK0 zjSjjf(L)~<2B%!rvVGiJf8m<_XI4$O(UFgNDGyqFL3 zV*xCPg|ILd!J=3Ui(?5aiKVbKmcg=E4$ET&tcaDcGFHK=SPiRV4XlZ^ur}7gx>yhE zV*_l6jj%B`!KT;@n_~-XiLJ0Tw!ya84%=e~?1-JPGj_qQ*bTd55A2D(us8O>zSs}@ z;{Y6pgK#ho!J#+|hvNtwiKB2dj=`}w4#(pJoQRWfGETv%I1Q)c44jFxa5m1txi}B! z;{sfWi*PY6!KJtim*WatiK}omuEDjq4%g!b+=!cSGj74HxDB`C4%~^ma5wJ3y|@qe z;{iN~hwv~S!J~K#kK+kEiKp;1p24$t4$tESyoi_ZGG4)}cnz=P4ZMlB@HXDTyLb=p z;{$w%kMJ=*!Ke5PpW_RBiLdZAzQMQn4&UPk{D`0MGk(FZ_zl0~5B!P0@HhU!zxWUT zzYqQYMfu<7fXEmHLog~v!{`_TLop`C!q^xG<6;=b!}yp06JjDvj7cylCc|(PXrPG} z+UTH*5Js)Gh-IairFwb=D?ho3v**0%!~Oj zKNi4(SO^Pa5iE+uusD{$l2{5$V;L-q<*+*1(!r3u|K?tc&%q zJ~qIH*a#bA6KsmjusOECme>kgV;gLX?XW#|z>e4nJ7X8@irug~_Q0Ol3wvW9?2G-d zKMufwI0y&h5FCoba5#>@kvIxR;}{%^<8VAqz==2sC*u^Hiqmj9&cK;C3uogToQv~t zJ}$t8xCj^H5?qSQa5=8PmADF5;~HFx>u^18z>T;GH{%xEira8I?!cY63wPrl+>85g zKOVq?cnA;U5j={=@Hn2plXwbG;~6}Q=kPpUz>9bZFXI)wir4Tu-oTr93vc5cyo>kn zK0d&Q_y`~46MTx#@HxJ~m-q@_;~RX7@9;f-z>oL|KjRntir?@%{=lF33xDGu{EPoE zQV8$=7#X8r2u8(d7#(9^D8|HC7#rhYTnxi_7#|a0LQI5-F$pHcWEhSD4K&e08y$2} zqK7^z3{YbPCdU+*5>sJnOoM4L9j3<&m=QB!X3T_y7RM4;5=&ueEQ4jS9G1rlSP?5>Wvqf#u^Lv#8dwu+VQs8~b+I1S#|GFC z8)0K?f=#g*Hpdp&5?f(wY=dpF9k#~~*bzHnXY7Jqu^V>B9@rCmVQ=h%eX$?*#{oDH z2jO5GfxDhwuX54~XaT{*O9k>&B;cnc6dvPD`#{+l} z58+`vf=BTf9>)`S5>Mf2JcDQP9G=Guco8q*WxRq{@fu#o8+a3M;cdKwckv$H#|QWj zAK_zsf=}@oKF1gM5?|qKe1mWC9lpm8_z^$hXZ(U+@f&`}ANUi0;cxtdfAJqiic0@6 zGDg7=jEd1PI>x|IjES)@HpaoY7>4mMJ|@6~mjwP@pmcr6l2FqeOERPkiB38o6SOu$MHLQ*`uqM{R+E@qcVm+*n4X`0L!p7JH zn_@F;jxDeyw!+rf2HRpgY>yqVBX+{h*af>{H|&l*uqXDy-q;8GVn6JU18^V?!ofHM zhvG0Cjw5g+j>6G62FKz!9FG%lB2L1|I0dKTG@Onza3;>e**FL1;yj#>3veMW!o|1* zm*O&9jw^5_uEN#02G`;`T#p-YBW}XYxCOW3Hr$Roa3}7<-M9z$;y&Du2k;;s!ozq3 zkK!>rjwkRWp2E|32G8O-JdYRfB3{DFcm=QGHN1{D@Fw2E+js}>;yt{N5AY#A!pHao zpW-uojxX>fzQWh|2H)a4e2*XSBYwiq_yxb>H~fx2@F)Jl-}ndr;y;WOjs9a~jDjH; z6{BHvjDevT6Jud)jDvA84C7&ZOn?b75hlhYm=u#?I0`h-LqLqPRxb5F%Ra&e3%~#U_mT|g|P@0#bQ_- zOJGSXg{83!mc?>d9xGr)tb~=Z3RcBxSRHF%O{|5ru@2V7dRQMDU_)$#jj;(f#b($X zTVP9Ug{`p-w#9bX9y?%1?1Y`M3wFhB*d2RdPwa)gu@Cmee%K!e;6NONgK-EB#bG!c zN8m^tg`;r{j>T~}9w*>LoP?8c3QomoI2~u;Oq_+YaSqPKc{m>z;6hx4i*X4q#bvl0 zSKvxqg{yH5uElk@9yj1d+=QEP3vR`2xE*)kPTYmNaS!greYhVF;6Xfuhw%s=#bbCJ zPvA*Bg{Schp2c%`9xvcUyo8tW3SPx)cpY!xO}vG-@eba_dw3ro;6r?bkMRjU#b@{& zU*Jo8g|G1qzQuR=9zWnm{DhzJ3x36K_#J=XPyB_y@elsRe;6q`{l~}{1w$|@M#JbB z14A(;#=_Vb2jgNG#>4oS025*&OpHk|DJH{k6lkD{7TV~bixNHbQDJ}@BQQCpz?7H@ zQ)3!Ti|H^uX26V?2{U6B%!=7CJLbTgm;O(V-YNh#jrS*z>-)B zOJf-us$}xhS&%jV-swO&9FJPz?Rqw zTVoq+i|w#IcEFC<2|HsK?26s6JNCey*b94OAMA_$us;sKfj9^U;}9H*!*Do`z>zo# zN8=bAi{o%SPQZyc2`A$eoQl(MI?lkEI16Xv9Gr{ua6T@;g}4Y8;}Tqo%Wyfaz?HZP zSK}I7i|cSbZorMW2{+>w+=|<9JMO@pxC?jV9^8xja6cZvgLnuJ;}JZH$M86wz>|0i zPvaRpi|6n>UcifZ2`}Rnyo%TGI^MvWcnfdi9lVS8@IF4khxiB|;}d*}&+s|Ez?b+6 zU*j8mi|_C~e!!3T2|wc({EFZ3JO03*_zQpIAN-5|Fj5TqkC8D7hG0~ThS4zwhGI;N zg|RUX#>Fsc!U~)`>DKQnM#x$4~ z(_wndfEh6pX2vX-6|-S>%z-&E7v{!1m>2V5ek_0mu@Dxmq=6{}%&tbsML7S_f(SQqPIeQbaYu@N@LCfF34VRLMOEwL50#x~d% z+hKd`fE}?DcE&E)6}w?~?14S87xu^NPR1!X6{q2JoPjfO7S6^wI2Y&Pd|ZGFaS<-YCAbuq;c{GoD{&RB#x=MW z*Wr5HfE#fWZpJOR6}RDb+<`lB7w*PAxEJ@~emsB&@em%yBX|^#;c+~HC-D@X#xr;p z&*6EzfEV!+UdAhU6|doSyn#3I7T(4?co*;CeSClq@ew}8C-@Yf;d6X}FYy(=#y9vD z-{E`wfFJP_e#S5O6~Ezk{DD957yiaS_!s|Sq)_^gkueH}U{s8T(J=;wVoZ#Mu`v$D z#W0MA@i74=#6*}FlVDOzhT$mCKoc#r(Lom_dg!CV05wKna!i3KF%_o9G?*6CVS3Df z88H)P#w?f>vtf43fjKc3=Egjj7xQ6$EPw^E5EjNFSQLw4aV&u)u@siZGFTSNVR@{8 z6|oXl#wu79t6_DlfiY z6LAtw#wj=zr{Q#*firOy&c-=77w6%8T!0I45iZ6hxD=P+a$JEcaTTt{HMkbn;d@fE(tH~1Fc;d}gm zAMq1@#xM94zu|ZMfj{vV{>DG}7yn_TnDiebV-yU*s2B~SV+;(%m>3IVV;qc&VHgkN zV**Twi7+uH!K9cB!%?7tCR%8tgDy(+&_{&F!wSOQC8DJ+d;uq>9t@>l^YVkNAM zRj?{n!|GTAYho>|jdidt*2DVP02^W>Y>Z8?DK^9A*aBN(D{PHzur0R3_SgYCVkhj3 zU9c;5!|vDvdtxu_jeW2$_QU=-00-hA9E?M7C=SEnI08rFC>)Jra4e3)@i+k|;v}4m zQ*bIy!|6B!XW}fJjdO4=&cpe*02ksST#QR_DK5k1xB^$=DqM|ga4oLG^|%2y;wIdT zTW~9G!|k{Ocj7MGjeBq}?!*0f01x6JJd8*1C?3P(cmhx2DLjp5@GPFg^LPO-;w8L{ zSMVxc!|QkhZ{jVyjd$=a-oyL&03YHbe2h=_xJ%n;wSu! zU+^n_!|(V5f8sCvjeqbj{=-PI=s!lrC>VlKF&ak47#NB%F&4(gI2aehFdoLo1eg#L zVPZ^zNii9Qqd)^qw9rNeU6kmdj|v0S7=g(#1*XJQm>SbyT1i(0EQZCg1eU~7SQ^Vw}aN>~}IU{$P! z)v*TF#9CMz>tJ21hxM@mHpE8Q7@J^IY=+IT1-8Ui*c#hlTWp8zu>*F*PS_c{U{~yh z-LVJu#9r7N`(R(}hy8H?4#Yt?7>D3c9EQVj1dhZ}I2y;`SR9AraRN@nNjMp&;8dK3 z({TpQ#925S=ipqNhx2g(F2qH+7?_uyXKhx_pW9>ha<7?0plJch^d1fIlGcpA^(Sv-g5@d94NOL!Tt;8nba z*YO74#9Me9@8Dg$hxhRTKEy}(7@y!%e1^~Q1-`^r_!{5fTYQJ_@dJLuPxu+X;8*;H z-|+|j#9#Ou|KMNzhmm5_e~gS#Fa)DwG>nchFcf2AER2nDFfN8+JdBSCFd-(w#Fzw= zVloUzfd-mrp^Xl@DA7Y76$Yp=0+VA3Oo^#5HKxI|m=4op2F!?=Ff(Sste6e6V-C!T zxiB~8!MvCc^J4)lh=s5)7Qv!e42xq4EQzJCG?u}#SPsi$1+0jburgM`s#p!HV-2i{ zwXinU!Ma!v>th3Kh>fr@Ho>OY44Y#MY>BO~HMYUF*bduc2keNQurqeSuGkH`V-M_! zy|6d-!M@lJ`{Mu{h=Xu24#A-~42R*ZsI1b0-1e}PIa57H8sW=U%;|!dM zvv4-f!MQjO=i>rgh>LJBF2SX^442~yT#2i2HLk(6xDMCj2Hc37a5HYft+)-h;||=3 zyKpz|!M(T-_u~OPh==en9>Jq{43FapJc+09G@ik;cn;6w1-yut@G@S(t9T8s;|;ut zx9~RJ!Mk`5@8bh}h>!3wKEbE>44>l*e2K5{HNL^O_zvIW2mFYi@H2kFulNnW;}86a zzwkHy!N2$qBgLWr7#X8r2u8(d7#(9^D8|HC7#rhYTnxi_7#|a0LQI5-F$pHcWEhSD z4K&e08y$2}qK7^z3{YbPCdU+*5>sJnOoM4L9j3<&m=QB!X3T_y7RM4;5=&ueEQ4jS9G1rlSP?5>Wvqf#u^Lv#8dwu+VQs8~ zb+I1S#|GFC8)0K?f=#g*Hpdp&5?f(wY=dpF9k#~~*bzHnXY7Jqu^V>B9@rCmVQ=h% zeX$?*#{oDH2jO5GfxDhwuX54~XaT{*O9k>&B;cnc6 zdvPD`#{+l}58+`vf=BTf9>)`S5>Mf2JcDQP9G=Guco8q*WxRq{@fu#o8+a3M;cdKw zckv$H#|QWjAK_zsf=}@oKF1gM5?|qKe1mWC9lpm8_z^$hXZ(U+@f&`}ANUi0;cxtd zfAJqiic9}7GDg7=jEd1PI>x|IjES)@HpaoY7>4mMJ|@6~mjwP@pmcr6l2FqeOERPkiB38o6SOu$MHLQ*`uqM{R+E@qcVm+*n z4X`0L!p7JHn_@F;jxDeyw!+rf2HRpgY>yqVBX+{h*af>{H|&l*uqXDy-q;8GVn6JU z18^V?!ofHMhvG0Cjw5g+j>6G62FKz!9FG%lB2L1|I0dKTG@Onza3;>e**FL1;yj#> z3veMW!o|1*m*O&9jw^5_uEN#02G`;`T#p-YBW}XYxCOW3Hr$Roa3}7<-M9z$;y&Du z2k;;s!ozq3kK!>rjwkRWp2E|32G8O-JdYRfB3{DFcm=QGHN1{D@Fw2E+js}>;yt{N z5AY#A!pHaopW-uojxX>fzQWh|2H)a4e2*XSBYwiq_yxb>H~fx2@F)Jl-}ndr;y;WO zM*lG~M!^t_iqSAS#=uaFiLo#?#=*E4hVd{yCcuQ42oqxxOp3`c90eL^qJ=g(=%Pdq zeN-5r#t2M~DKI6b!qk`s(_%VIj~Or{X2Q&v1+!u{%#JxQC+5Q3mKFp5=upkz~ z!dL{0Vlga^C9oux!qQj<%VIe!j}@>YR>I0y1*>8;td2FXCf35*SO@E3J*SeNC+@=CxCi&*KHQH7@E{(- z!*~Rb;xRmqC-5Ym!qa#L&*C{ej~DPFUc$?G1+U^YypA{UCf>r^cn9y|J-m+(@F70J z$M^)F;xl}XFYqP4!q@l)-{L!bk00&yZK`exYu?QB$ zVptqYU`Z^6rLhc_#d264D_}*egq5)hR>f*q9cy4stcA6)4%WqbSRWf;Lu`bNu?aTC zX4o8CU`uR;t+5TZ#dg>pJ77obgq^VqcExVk9eZF;?1jCt5B9}=*dGVrKpcdFaR?5@ zVK^K|;7A;Wqj3z5#c?Js)Gh-IairFwb=D?ho3v**0%!~OjKNi4(SO^Pa5iE+uusD{$ zl2{5$V;L-q<*+*1(!r3u|K?tc&%qJ~qIH*a#bA6KsmjusOEC zme>kgV;gLX?XW#|z>e4nJ7X8@irug~_Q0Ol3wvW9?2G-dKMufwI0y&h5FCoba5#>@ zkvIxR;}{%^<8VAqz==2sC*u^Hiqmj9&cK;C3uogToQv~tJ}$t8xCj^H5?qSQa5=8P zmADF5;~HFx>u^18z>T;GH{%xEira8I?!cY63wPrl+>85gKOVq?cnA;U5j={=@Hn2p zlXwbG;~6}Q=kPpUz>9bZFXI)wir4Tu-oTr93vc5cyo>knK0d&Q_y`~46MTx#@HxJ~ zm-q@_;~RX7@9;f-z>oL|KjRntir?@%{=lF33xDGu{EPoEQUdyqkueH}U{s8T(J=;w zVoZ#Mu`v$D#W0MA@i74=#6*}FlVDOzhT$mCKoc#r(Lom_dg!CV05wKna!i3KF%_o9 zG?*6CVS3Df88H)P#w?f>vtf43fjKc3=Egjj7xQ6$EPw^E5EjNFSQLw4aV&u)u@siZ zGFTSNVR@{86|oXl#wu79t6_DlfiY6LAtw#wj=zr{Q#*firOy&c-=77w6%8T!0I45iZ6hxD=P+a$JEcaTTt{ zHMkbn;d0vz>oaI z&-}u#{KoJ6!Jqua-~7YB{Ko*1^q&D4h=Cb|K^cs}8G<1hilG^XVHu9$8G#WQiIEwF zQ5lWV8G|tyi?JDpaT$;CnScqIh>4kmNtukvnSv>qim91~X_=1cnSmLZiJ6&&S(%O5 znS(i*i@BMHd6|#-S%3vuh=o~%MOlo+S%M{5ilteGWm%5pS%DQh8VP1%gi*@7+Eimlm(ZP||P*?}F|iJjSnUD=J@*@HdVi@n*0 zec6xwIe-H>h=VzVLphAYIf5fOilaG(V>yoFIe`;7iIX{nQ#p;(IfFAfi?cb0b2*Rm zxqu6~h>N*|OSz28xq>UXimSPXYq^f=xq%zGiJQ5FTe*$fxq~~oi@Ujpd%2JMd4LCb zh=+NEM|q6Ld4eZ-il=#oXL*k2d4U&siI;hWS9y)sd4o53i??})cX^NZ`G61kh>!V% zPx*|``GPO`im&;GZ~2bz`GFt#iJ$p}U-^yS`GY_Ci@*7YfBBCABI`c`G7tkZ2!k>h zgEIs}G898I48t-U!!rUSG7=**3ZpU_qca9$G8SVq4&yQ&<1+yhG7%Fq36nAzlQRWV zG8I!Z4bw6m(=!7zG7~d13$rpCvoi;CG8c0*5A!k~^Roa8vJeZi2#c~9i?akvvJ^|R z49l_{%d-M2vJxw^3ahdjtFs1cvKDKz4(qZW>$3qHvJo4z37fJRo3jO5vK3pi4coFE z+p_~ZvJ*SA3%jx#yR!#-vKM=^5Bsto`*Q#Xau5e|2#0bQhjRo+aui2%499XD$8!QF zauO$V3a4@!r*j5pau#QE4(DU62#@j@kMjgi@)S?=4A1f$&+`H=@)9re3a|1S zuk!|P@)mFN4)5|F@ACm4@(~~N37_&ApYsJ@@)ck64d3z|-}3`M@)JMv3%~Lkzw-xw z@)v*e5C8HX14Pk(24o-xW)KEtFa~D`hGZy)W*CNLIEH5gMr0&LW)wzcG)89(#$+tU zW*o+4JjQ1NCS)QeW)dc4GA3sVrerFnW*VktI;Lj^W@IL2W)@~;HfCoI=43ABW*+8c zKIUfu7Gxn7W)T);F&1YDmSicGW*L@cIhJPyR%9hsW))UtHCAU0)?_W#W*ydLJ=SLf zHe@3<{6&lIiBYQUgRZS<`rJ$HD2cp-sCOb<{jSUJ>KU7KI9`l<`X{UGd|}FzT_*u z<{Q4{JHF=!e&i>9<`;hDH-6_2{^T$I<{$p$KL&`Z{|v}L49p-5%3uu65Ddvs49zeM z%Ww?O2#m-`jLayE%4m$v7>vnSjLkTV%Xo~>1Wd?8Ow1%q%4AH=6imrfOwBY*%XCc7 z49v((%*-sz%52Qe9L&jF%*{N^%Y4kw0xZZvEX*P-%3>_e5-iD5EX^`3%W^Ew3arRV ztjsE`%4)368m!4$tj#*C%X+NO25iViY|JKX%4TfN7Hr8@Y|S=o%XVzf4(!NI?949g z%5Ln=9_-0p?9D#x%YN+70UXFd9Lymc%3&PN5gf@;9L+Ht%W)jf37p7DoXjbl%4wX= z8Jx*koXt6$%Xys71zgBQT+Ah0%4J;6613bt>Jj^3J%40mv6FkXNJk2va%X2)>3%tlnyv!@S%4@vN8@$O|yv;kj z%X_@f2Ykp!e9R|&%4dAe7ktTAe9bp}%XfUw5B$ha{LC->%5VJ6ANojI73xtN=In3wsOp9NTug;tLmw1_1c$L?9oi})sw|JX(c$fEhpAYzukNB8R_>|B1oG@KzxbPf_?Q0}AiDlDAOkTlgD@zAF*rjoBttPY!!RtvF+3wMA|o*} zqcAF?F*;)~CSx%+<1jAcF+LM8Armn%lQ1chF*#E(B~vjq(=aX5F+DRdBQr5GvoI^O zF*|cGCv!13^Dr;-F+U5iAPccDi?Aq*u{cYxBulY0%djlVu{##2Cu|6BHAsewVo3JUHu{m3?C0nsI+psO$u{}GmBRjD(yRa*}u{(RPCws9s z`>-$ju|EfJAO~?Uhj1u|aX3eCBu8;H$8apiaXcq*A}4V&r*JB#aXM#kCTDRr=Ws6P zaXuGtAs2BmmvAYUaXD9TC0B7Z*KjS@aXmM1BR6p~w{R=BaXWW#CwFl-_i!)waX%06 zAP?~{kMJmu@iV$^He++PU`w`QYqnuq zwqtvCU`KXhXLey%c4K$;U{Cg9Z}wqd_G5nz;6M)IU=HC>4&!i+;7E?*XpZ4nj^lVv z;6zU1WKQ8!PUCdW;7rcqY|i0a&f|P8;6g6qVlLrQF5_~p;7YFIYOdj0uH$-c;6`rZ zW^UnDZsT_D;7;!1Ztme;?&E$Q;6WbZVIJX89^-MI;7Ok1X`bO(p5u95;6+~IWnSS` zUgLG%;7#7*ZQkKs-s62f;6py*V?N{)#nep0v`okJ%)pGy#LUdXtjxyj%)y+@#oWxp zyv)b^EWm;+#KJ7XqAbSZEWwg2#nLRpvMk5)tiXz_#LBF~s;tK9tihVB#oDaHx~#|g zY`}(W#KvsGrfkOMY{8an#nx=Ywrt1t?7)uf#Ln!(uI$F{?7^Pw#op}0zU;^T9KeAb z#K9cGp&Z8H9Kn$s#nBwYu^h+ooWO~k#L1k(shq~?oWYr##o3(0xtz!OT)>4~#Kl~~ zrCi44T)~xG#noKHwOq&b+`x_8#Le8ot=z`#+`*mP#ogS)z1+wBJivoI#KSzoqddmr zJi(JZ#nU{)vpmQ1yugdR#LK+GtGveRyuq8i#oN5YyS&Hye87i%#K(NXr+miee8HD| z#n*hpw|vL<{J@X=#LxV~ul&aE{K236#ozqHzx>AlvGkt-8Hj-ygh3gM!5M-f8H%A9 zhG7|w;TeGu8Hte@g;5!e(HVm=8H=$QhjAH?@tJ@LnTUy*gh`o<$(e#FnTn~IhH06O z>6w8UnTeU1g;|-6*_nemnTxrZhk2Qg`B{JkS%`&Mghg45#aV(SS&F4uhGkifOm zghGRL7<2iv7If;`wg;P0= z(>a4PIg7J7hjTfP^SOWvxrmFogiE=M%ejIpxr(c~hHJTw>$!m&xrv*(g=Xrq_d5M>Kg;#lv*Lj0Cd5gDs zhj)38_xXSi`G}ACgira5&-sEc`HHXkhHv?f@A-ir`H7$TgrGYX?J8ly7?V=@+FGY;c29^*3s z6EYDKGYOM28Iv;wQ!*7(GY!)+9n&)dGcpr1GYhja8?!S9b21lmGY|7JAM>*S3$hRk zvj~f_7>lz6OR^M8vkc3!9Luu;E3y(RvkI%S8mqGgYqAz=vkvRB9_zCK8?q4_vk9BB z8Jn{OTe1~fvklv_9ow@5JF*iyvkSYj8@sayd$JdMvk&{SANz9v2XYVxa|nlW7>9EN zM{*QLa}39F9LIA4Cvp-ea|)+&8mDsxXL1&2a}MWn9_Mob7jh97a|xGn8JBYfS8^3s za}C#W9oKUMH*ym8n5#PZ}Jvz^A7Lw9`Ex3AMz0&^9i5w8K3h7U-A`S^9|qf9pCc< zKk^el^9#T78^7}hfASZ9^AG>>9|OeEe+FbA24)ZjWiSS32!>=RhGrOsWjKas1V&^e zMrIU7Wi&=-48~+E#%3JGWjw}b0w!c4CT0>QWilpb3Z`T#re+$ZWjdy324-X?W@Z*< zWj1DK4(4Po=4Kw|Wj^L-0TyH-7G@C^Wib|K36^9jmS!22WjU5-1y*DwR%R7eWi?i3 z4c25W)@B{nWj)qs12$wMHf9qxWivKs3$|n{wq_f)WjnTK2X z9LixF&Ji5RQ5?-N9LsSW z&k3B!Nu10noXTmO&KaD^S)9!|oXdHf&jnn_MO@4!T*_r!&J|qARb0(AT+4M_&kfwj zP29{a+{$g-&K=yzUEIw*+{=C3&jUQjLp;nQJj!D{&J#SzQ#{QxJj-)D&kMZBOT5f0 zyvl35&KtbRTfEIXyvuvM&j)iSA5Mke9L!y&ky{_PyEa;{K{|q z&L8~AU;NEK{L6m~5KsRZkbxMOK^T<57@Q#(lA#!yVHlR-7@iRrk&zggQ5coc7@aW~ zld%|^aTu5J7@rB4kcpU>Ntl$$n4Bq?lBt-QX_%Jjn4TG!k(rp8S(ugCn4LM8lew6i zd6<{^n4bk$kcC*7MOc)@SezwTlBHOhWmuNwSe_MFk(F4PRalkPSe-RkleJizby%16 zSf35pkd4@wP1uyp*qklclC9X9ZP=FW*q$BOk)7C?UD%b~*quGtlfBrReb|@%*q;M9 zkb^jwLpYSfIGiImlA}19V>p)MIGz(Yk&`%?Q#h5=IGr;%le0LRb2yjtIG+o+kc+sO zOSqKFxST7vlB>9yYq*x{xSkuhk(;=gTey|mxScz=le@T^d$^bTxSt1jkcW7fM|hOS zc$_DAlBal@XLy$9c%Bz{k(YRxS9q1zc%3(Rlec)AcX*fgc%KjWkdOG7PxzG2_?$2J zlCSuhZ}^t)_?{p5k)QaPU-*^Z_?Gav&oFoQ5CgE2TmFeF1U zG{Z0~!!bM~Fd`!{GNUjmqcJ*TFeYO$HsdfZ<1s!HFd-8$F_SPUlQB6{FeOtlHPbLH z(=k0WFe5WDGqW%&voSk!Feh^{H}fzr^D#dQupkSuFpID#i?KLMup~>dG|R9o%dtEw zup%q5GOMsEtFbz3uqJD#;r?upt|LM zGrO=WyRkcauqS)5H~X+J`>{U)0*Ks{Ja3eQyGq-Rnw{bgn za3^@Fs8Z zHt+B*@9{n#@F5@ZF`w`$pYb_g@FidIHQ(?p-|;;^@FPF*Gr#aFzwtYN@F#!qH~;W2 z|1m&<0RR2{9|mL~24)ZjWiSS32!>=RhGrOsWjKas1V&^eMrIU7Wi&=-48~+E#%3JG zWjw}b0w!c4CT0>QWilpb3Z`T#re+$ZWjdy324-X?W@Z*9LixF&Ji5RQ5?-N9LsSW&k3B!Nu10noXTmO&KaD^ zS)9!|oXdHf&jnn_MO@4!T*_r!&J|qARb0(AT+4M_&kfwjP29{a+{$g-&K=yzUEIw* z+{=C3&jUQjLp;nQJj!D{&J#SzQ#{QxJj-)D&kMZBOT5f0yvl35&KtbRTfEIXyvuvM z&j)iSA5Mke9L!y&ky{_PyEa;{K{|q&L8~AU;NEK{L6m~kVyX- zkbxMOK^T<57@Q#(lA#!yVHlR-7@iRrk&zggQ5coc7@aW~ld%|^aTu5J7@rB4kcpU> zNtl$$n4Bq?lBt-QX_%Jjn4TG!k(rp8S(ugCn4LM8lew6id6<{^n4bk$kcC*7MOc)@ zSezwTlBHOhWmuNwSe_MFk(F4PRalkPSe-RkleJizby%16Sf35pkd4@wP1uyp*qklc zlC9X9ZP=FW*q$BOk)7C?UD%b~*quGtlfBrReb|@%*q;M9kb^jwLpYSfIGiImlA}19 zV>p)MIGz(Yk&`%?Q#h5=IGr;%le0LRb2yjtIG+o+kc+sOOSqKFxST7vlB>9yYq*x{ zxSkuhk(;=gTey|mxScz=le@T^d$^bTxSt1jkcW7fM|hOSc$_DAlBal@XLy$9c%Bz{ zk(YRxS9q1zc%3(Rlec)AcX*fgc%KjWkdOG7PxzG2_?$2JlCSuhZ}^t)_?{p5k)QaP zU-*^Z_?dG|R9o%dtEwup%q5GOMsEtFbz3uqJD< zHtVo1>#;r?upt|LMGrO=WyRkcauqS)5H~X+J z`>{U)0*Ks{Ja3eQyGq-Rnw{bgna3^@Fs8ZHt+B*@9{n#@F5@ZF`w`$ zpYb_g@FidIHQ(?p-|;;^@FPF*Gr#aFzwtYN@F#!qH~;W2|1m%k{bxW1VqgYgPzGag zhG0mBVrYh8ScYSGMqornVq`{PR7PWT#$ZgwVr<4?T*hO3CSXD)Vqzv?QYK?^reI2@ zVrr&gTBc)qW?)8UVrFJxR%T;%=3q|dVs7SPUgl$d7GOaZVqq3xQ5IuymS9PiVriCP zS(amYR$xU|Vr5ogRaRql)?iK6Vr|x8UDjiLHef?GVq-R8Q#NCBwqQ%PVr#ZxTef3+ zc3?+#VrOdpRbJzD z-r!B%;%(mHUEbq;KHx(>;$uGHQ$FK!zTiu~;%mO)TfXCae&9!b;%9#0SAOGn{@_pk z;&1-pU;bl&r25Z*48*_;!k`Ss;0(c#48_n4!>|m;@QlESjKs){!l;bK=#0UbjK$cD z!?=vc_)NfrOvJ=Y!lX>ba4+1Y{k}W!?tY4_Uyop?8MIO!mjMb?(D&y?8V;f!@lgt{v5!89K^vK z!l4|-;T*w{9L3Qb!?7I4@tnYkoW#kT!l|6b>72otoWfJjBC1!lOLK<2=EW zJjK&I!?Qfc^Sr=|yu{1A!mGT->%766yv5tR!@Io4`+UHMe8k6m!l!)3=X}AJe8ty% z!?%3L_x!+*{KU`v!ms?s@BG1^{Ken=!@vB;0Lk>90U3ya8H7O@jKLX#AsLFH8HQmQ zj>{XhYtz1ClTMxXFKgGaag&Zs8Xx?h(TdJ3+BE6Zxj~!uLzcE}*Rf55)`J&y?$Q2# z#!I?3Xx*jBxd@*Z&-J>xz|WwuWap6T-U-Rkv5|FrtAGdp?^FMnMWlL2zaS*Jn}-Sv!lDu*Js$ech4E2Q zNs-A(pn7Om-~oC2c$icV>lZ}F#V5vw#zc3E^l(Fh$dL4D9zNVBsZS8#yF^6BB!>cz z5C8{20nnhv9x+G`NlA(fNsNpMO^)uAKEWdm3Bo#dj!5#zsX=gD0Gtw*rg5l836D#O zP3Y<$9-kQLADa@B937gN=oQ4fK0H1yDLFAEJQ;Y@^jW}bo3ytcEdUx21c!O|)BU_# zg8>;6B9gl%L;{Zygw!wX6$tiDF#Q6sAh>*BU|`)}-e1}!07~^*B`(Rk)ccn_Fga{U zP(uhh`1mSuL53i$TYQj@MljfXA6++n@>?H8{0mmO=%tNz?HT2xvl}+M>7%diJS}j^ zODiVz>h7Z>F!ksqIoYdYTxe{h#|lgt*w|wSq0w>49_P3D(_HIyaad?lWV(klJnQ;% zhSx&gXb1;&;*v`i&zO`c)sxxJt+-Y9-=@ftZVI`=XY~s}|5N)6eWA1;oK7Gl zgQ6J}%b<7$B{C?PL8%N%XVBy@ceeZ$QJe2^q@dNcH6O zbN`QlSwB85*j2E4KcyOWq8-Wc4~3%g_?f zphN~GGboio=?n@oD3d|i49aEDzm(3G5=172hsJ~^CVBFwdJ1?G$d?~U$)T|co`Sz! znw%699R`~IegQoGb&3=#QL;>#(!~q^{R4r4-oP#7&i;E@$iJ)xJcR?@K2sD)^%VUN zQxyA`De8arM8vnOw0U> zX?Q}{N!#&_%*?S@9qAn zV`1-hKCr2j>Z$x6+g173c0OZNbyL5&lqmfVnG1tJw@-+mCWUH*CM8EE#(ApMUA^Vk zFP|7Sykb=Mic!PMpk@#n7nf z8hZ6_lPtR)fDe?S zeuGlj{|HL8Gq5CsysngSQ#qYd8C1-mY6dkks9y%nS2?*>ug_50@3*ia>C%EUOJ%4Y z?$zv{2pZuN7D*E}x_CzYi2NUgO#?vIyNMSB{>ddOgG=O)Aj$`pKB!3c)vERb2Znhq z=yto@UZY3*xxLnQd+qH0+rk|}Q#~Dnh{FCwiu)Jw#H4y+y-6F|B{Inq=WY>1g!$Z@ zfq5n8e8SGUAJuCd^H}?xVYk$@#x}DSu#LS04>-R_^du>91-s#2Y3< zy+QSN9u3RLBX6hje<4ZSe`dn)f5fmi8le9Y<|FKvz`?=%Xz1-d`oRaF9>gP^$|D9;l z+$rv1=?UikS5o=fq_E_qj3k=w?&9_)Qc4izEg(^ezBX}2s%NJA&$^NJDx++8d;8Fu zzNx@7%WKTpsh&B$%wsb$FP+O=_y1HV{#_#Gc{$AY=H&vv|Ev*dEolZsE==_-`m@no z?DGnimRk*eWBQUbnC$WIHoKCy(G2!1^=sklF{`9qtEOG6rCqB>{uS`c(t~CBKZ0dN zs%PcDf@Rgef@Sr81j`zqSm56&zxIEb^4{+M|7^Otd z`;zPL-~Ybg{axU^xeNYPe>VEq!voyj`h)qJ`9I3jrhmNlk8a>ia$}{*ZIqKc@K9@O_V@$-4bdS$Ftm5^44E%C^Th%NvxwkB{Gi{U7_{`nzcV83TL&A>4nFecwM`gEZB= zvhVlG?hV!hUPN$v)BfO}X@4kV(GqV?A5Kr{(7(d`Ncy6qe=a)qk44AR5#aAdC(;+4 z{BzN%e=Itk4i$edI+MQW?4OIy{bSMjv_;kaUUVTc)pOB@-IsjWy}*avm;F*bSAK)H z*U?@JT=jzYhLlv#we(?cgr*H&_YQCI4&O*0Mgy?4;hWy!9p2$v>BC3>nl^mfJG>_) z($`|Sy9Cj`<~<#n?*wrn5s^`$-cyc{j^2LUb2kW2Yv?@pf|S4B{G%zm|CfJL!jgiJ z*l5p#RL{d8EHScuQO~3Q#O<{66HwoGijslek5fHQ{DO#>(6GpuB+t`e&oeI){qBg< zJ|xnxzGh;4x5&7_>KWbp^S`?H7e1R(=?!raZ?EMu(#!vb^xou1%VZcv5EM6GL<0E+Oui}Z>g*~usV z`(V$9jM)76r}IDgw(yoDUu=H%YvIF-u+T)$myEk^p08k{4kFgi1yo?jQl0)gd0g1 zPfU(ajEGE3#JmH(W1vLbdy^D|hKD9665iX?MACarI}A)NRy2|FKK#o)-g%Mj7-rkxv`{ z@8?5Jvj%!?=+zj3P%rf2UW6tg3ZfwZG2U*FgLp`QL`Z^UNP$#HgLKG%Ovr+4$bno? zCMYwM1wstMJCYD0CPy55ez9#kI+gc?8%p+-<+s0q{* z3WAzJ&7l@hOQ;pp8fpUtLv5iDs2vmvg+bv^1QZEHLG2+o6b*HNIzll}EEEUDLkW-v zN`#W2WGDsd1a*eGKwY73PaLit%O!VtD!Z}T4)`#9@+qHgf>B&p)Jr>XdAR0+5zo^c0s$LJeF&Kvln1m^qh5^jLEX=_?EWjcx!7{ACDy+deY``XL!8Yu`E;tjM z8O{P{g|org;T&*II2W87&I9|wdEtC;ez*W!5cY=)!G+->a8bAzTpTU|mxN2frQtGg zS-2cr9C*YItDfl#e20ja)gU`bk;EV7j_%eJ2z6xK1ufsRsoA538Hhc%Z z3*Uq9!w=wx@FVy!`~-dqKZBpcFW{H(EBH1127U{_gWtm+;E(Vp_%r+k{tADCzr#P^ zpYShl@_XSNK@kkW5dt9*3ZW5zFbIoq2#*Meh)9TxD2R$^h>jSDiCBn@IEV|$gk(mt zAX$-YNOmL#k`u{=A@z|!qyf?pX@oRJnjlS)Afy@6 z9BF~HL|P%Okv2#$(iRCp+99Dx7!r;|AdyHE(jIXm(MSiRBNBteB5_DOl7M)SL?j7G zMpBSYNN1!A(iQ23bVqt1sYp+x7t$N)gY-rEA^nj7$UtNeG8h?x3`K?^!;ullNMsZ; z8X1F(MaCiHkqO8|WD+tNnSxA3rXkaj8OTgz7BU-|gUm(dA@h+1$UH$B`4rN#qoA8aacUMb07TkqgL06bB~c2cQGhZii*hKB3aE%msEjJ8ifX8i8mNg{sEsz2XfWCq4ME$Xp=cNyjz*x7XcXEWb)(T}2ecy^gT|t9Xgr#LdeB5P2~9>* z&`xM)v8|{PkMf;)s(E;c{bPzfi9fA%;hoQsK5$H&C6gnCm zgN{YVq2tjB=tOi9IvJgUPDQ7o)6p5|Omr4H8=ZsBMdzXO(FN#2bP>83U4kw}m!Zqi z73fNI6}lQ-gRVu_q3h8N=tguCx*6SqZbi4D+tD59PIMQ#8{LEMMfaim(F5o~^bmR& zJ%S!ZkD5^bz_PeS$tkpP|pu7wAj$75W-|gT6)Iq3_WT=tuMu`WgL#enr2b-_alFPxP0! z@WU8_p%{kY7=e)(h0z$m7>va@jK>5_#3W3{6imf5OvenOCl9L$Af!ZKr7u&h`% zEIXD1%ZcT}a$|WgKP)en56h1gzzSmiSRt%1Rs<`G6~l^SC9slMDXcVB1}lq|!^&e7 zu!>kEtTI*wtBO^_s$(^&SQ{)DYm0?o?XXZR3=791ut+QlYmd3HXsiR)5sSfMu{bOqOTau>B9??DV<}iC ztTWaH>xy;5x??@CRIDe~3+s*b!TMtTu>RNpY#=rW8;lLXhGN68;n)alBsK~gjg7&^ zV&ky!*aU1MHVK=IO~IyO)3E8-3~VMg3!9D2!RBJ~u=&^mY$3J?TZ}EimSW4W<=6^r zCAJD%jjh4fV(YN=*amDPwh7yeZNau;+pz7}4s0j33)_wD!S-VNu>IHp>>zdsJB%H{ zj$+5K05p z>>>6DdyGB7o?_3i=hzGECH4w?jlIF%V(+l`*az$*_6hrpeZjtB->~o459}xQ3xjYN zM{pF!a2zLa5~pw)2RMVXIEVANfQz_<%eaE8xQ6Svft$F6+qi?f@Jx7SJPV!`&xU8m zbKp7gTzGCg5AKKO#q;6$@d9{3+#fH57siX=Me$;Ial8ax5-)|9#>?Pk@p5>1yaHYk zuY^~|tKe1fYIt?L23`}dh1bUG;C1l;ydGX355ybb4e>^JW4sC86c56i;mz?DcuTw$ z-WqR%2jgw=5WF28iihFhcmy7aN8#;pHy(|5z&qkGcq|@=$KwgO2T#P4@MJs%?}T^8 zyWm~%Zg_XR2cC-e#Czer@jiH8ydT~lAAk?U2jPS9A^1>y7(N^yfse#T;iK^}_*i@# zJ|3TdPsAtTlkq9|RD2pf9iM^E#Ao5N@j3Whd>%d@Uw|*f7vYQXCHPW&8NM7}fv?0@ z;j8gA_*#4&z8>FzZ^Sp@oAE99R(u=29p8cP#CPGl@jdund>_6aKY$;^58;RLBluDL z7=9c-fuF=r;ivI4_*wiMejdMoU&Jrrm+>q3Rs0%$9lwF!#BbrZ@jLik{2qQEe}F&4 zAK{PjC-_tR8U7r9fxpCG;ji&G_*?uP{vQ8;f5boGpYbpFSNt3P9shy<#DC!s0TT#; z5*UFK1VIuMK@)&r2$tXoo)8F;kO-Mj2$j$XoiGTKun3!Q2p5rw$V_A*vJ%;d>_iSC zCy|TDP2?f`h`dBTB0o`pC`kAdg^0pL5uzwjj3`c&AW9OYh|)wEqAXF4C{I)%DiW25 z%0v~SDp8H7PShZ3619lhL>;0o5kS-<>Jx!P1EL|(h-geSA(|3FL^Gl}(Sm46v?5v) zZHQo^EfGSrBSMKVBAkdIB8e!XJ>e#zi4H_ZB8G@1;)r-6f$$KCL=urqq!68m&O{fY zE76VUPV^vBiJn9+qBqfp=u7k?`V#|)fy5wUFfoJ}N(>{06C;R`#3*7kF@_jRj3dSq z6Nrh#Bw{i#g_ufABc>BGh?&GJVm2{{m`ltf<`WBug~TFaF|mYLN-QIm6Dx?7#42Jn zv4&VntRvPF8;Fg>CSo(Oh1g1LBeoMeh@HePVmGme*h}mq_7ew)gTx`?FmZ%9N*p7O z6DNq1#3|x5afUccoFmQ?7l@0*CE_x1g}6#wBd!xSh?~SM;x=)IxJ%q4?h_A)hr}b| zG4X_WN<1T;6EBFD#4F-8@rHOyyd&NdABc~{C*m{lh4@N*Bfb+qh@ZqS0wQ4&AyE<| zagrcOk|JpmkPOL^9LbXcDUuQ?lM1Pl8mW^8X_6LclMd-3Gm)9eEM!(P8=0NVLFOcL zk-5n{q#v1=%tz)Y3y=j#f3gr+m@GmTC5w^8$r5BqvJ_dGEJKzh%aP^D3S>pH5?Pt7 zLRKZKk=4l>WKFUbS(~gw)+GbTdSrbvkZeFUBpZ>9$tGk|GKg$OHYZz`2Ctv1A+>PbQEaGLcLolgSja6WN*ULUtv) zk=@B2WGdN{>_zq_`;dLfeq?`g06CBxL=Gm0kVDB~?xOkVna5;R7#_C%AicjqHM~cTvR41GnIwPN@b(6Q#q)dR4ytv zm51`9@>2Pz{8Ry|AmvXLq6$+*sG?LcsyJ1GDoK^1N>gR1vQ#;$JXL|JNL8XLQ&p&{ zR5hwPRfDQY)uL)sb*Q>j09B8wPX$s9sD@M{sxj4sYDxuB&8X&73#uj6ifT=@p@ONl zR0!3M3Z=rRa4Ld|q@t+yl$(mCI#3;{7%G;EqvELq%0nemNmMeGLUp1#Q(dU8R5z+S z)q_fT}L+0-0rE;WyuPc5JpQj4g?)DmhbwTxO$t)Ny?tEkn~8fq=I zj#^J`pf*yQsLj+CYAdyk+D`4Dc2c{j-P9gxFSU=_PaU8RQirI+)Dh|^b&NVrouE!q zr>N7^8R{%`jyg|Wpe|CEsLRw9>MC`Ox=!7oZc?|X+teNEE_IK(Pd%U>Qje&|)D!9{ z^^AH>y`WxFuc+758|p3fj(SghpgvNcsL#|F>MQk)`cD0zep0_Eh=yr|Mrn-3X@Vwc zil%8mGc-$cG*1h(NK3R#E3`^$v`!neNn5l{JG6_=L}#Y6&{^qhbapxios-T*=ce<} zeso?sADy2rKo_L_=|Xg2x(HpAE=CupOVB0hQgms$3|*EkN0+B7&=u)QbY;2#-x&hsgZbUbxo6t?^Ai5ddoNht4q+8Lg={9sQ-Ifla z+tHzP7#&VW(2;Z$-JW*S(R2s8BOOD>(s6V=oj`l&L^_F1rc>xnbZ5E?-IeY}cc**M zsdP`e7u}ogL-(co(f#QG^gwzLJ(wOs52c6E!|4(9NO}}KnjS-srN`0Z=?U~idJ;XE zo(evpA^g?Dsx6#|_9rR9m7rmR_L+_>c(fjEG^g;R%eV9H%AEl4c$LSOFN%|Ch znm$9HrO(ml=?nBl`VxJazCvH6uhG}(8}v>37JZw(L*J$E(f8>G^h5d){g{42Kc%11 z&*>NROZpZ4ntnsSrQgx-=@0Zr`V;+`{z8AHztP|6AM{W97YzXzAOHmzzySeBKmi&6 zzyKC-fCmB)fdpir02OFJ2L>>K1#I8|7sv!MgDfB`$Of{593Usi1#*Ktzz^gF`9OY9 z02BoNpb#hwih!b^7$^=(fRdmTC=JShvY;F&4=R9)pc1GIs(`AX8mJCxfSRBds153X zx*!151NA{5XaE|5MxZfh0-Ay#&ZunlYnJHSq`3+x7a zz+SKq><0(HL2w8h21meAa10y=C%{Q?3Y-RKz*%q(oCg=cMQ{mR23NpUa1C4sH^5DB z3)}{Gz+G?;+y@W9L+}VZ22a3K@C-Z$FThLi3cLnyz+3PRyayk^NAL-J24BEe@C|$i zKfq7$3qTCaAPmZ249*Y?$xsZ<0ES^$hGTd}U_?e@WJY0BMq_lwU`)nhY{p?+OeQ8X zlZDC3WMi^3IhdSGE+#jVhw)?bGWnSNOaZ1K~XVrnyWn7T{=Q;(_71TqbnhD;--G1G);$^Nw&gGpt2GQF7IOdqB%(~s%T3}6N_gP6h05N0Sdj2X_1U`8^d zn9HZq%-&CC{NE3=K+&g@`zGP{`F%pPVhvya)&9AFMI zhnU065#}g!j5*GnU`{fpnA6M|<}7oLInP{RE;5&x%ghz#Dszpw&fH*bGPju9%pK+~ zbC0>tJYXI&kC?~I6Xq%NjCszyU|uq>nAgl3<}LG%dCz=cJ~E$}&&(I*EAx%{&ir70 zGQSvzg;|6}S&YS5f+bmsrCGo-EX#5%&kC%_O03K(tjcPv&Kj)ATCB}Ftc%UWW@fXn zS=nrCb~Xo_lg-8EX7jLqY+g1Wo1ZPf7G(X|LTq8S2wRjb#ujHwuqD}2Y-zR(Tb3=y zmS-!l71>H`Wwr`im955BXKS!E*;;IEwhmjD4PfiB_1QqS0o#ym#5QJ|uua(@wi(-; zZNau=Td}R#Hf%84mJMOsv7u}j8_q_sk!%#(o^`X)YzMX@8^gx3acn%BzoMXM3=zY)`fq+nepf_GSCA{n-KRKz0y2m>t3nWrwlD*%9nWb`(3B z9m9@g$Fbwt3G7665<8il!cJwUvD4WZ>`ZnRJDZ)u&SmGZ^VtRLLUs|mm|emyWtXwb z*%j`rzUyPMs^?q&C}``H8RLG}=P zm_5QCWskAP*%RzZ_7r=XJ;R=5&#~v(3+zSq5__4w!d_*svDeuf>`nF-dz-z(-evEx z_t^*RL-rB-n0>-NWuLLn*%$0f_7(e@eZ#(G-?8u659~+w6Z@I{!hU7HvESJr>`(R= z3vn=qa43gyI7e_KM{zUjng@UGdYX1Ifrv`nYhec7A`B7 zjmysE;Bs=gxZGSG&X3E><>T^m1-OEoKUat=%oX8^a>cmfTnVluSBfjmmEp>A<+$=( z1+F4jiL1<2;i_`gxawRDt|nKDtIgHn>T&^GJ+3|%$Ti>^a*epgTobM-7sNH=nsY6< zmRu{YHP?m<=Gt;0Tstn53**AM2riO~;@Wd=E}HAWb>w2WST2r>=Mp#%m&he?$y^H8 ziR;XD;kt6&xb9pJE|u%a_2PPSeYn0{KdwJFfE&mS;s$d=xS`xIZa6oB8_A90Mss7h zvD`RrJU4-x$W7uVb5ppf+%#@FH-nqW&EjTrbGW(OJZ?U>fLq8d;udpDxTV}OZaKGt zTgk2BR(wcI*xJ-30|$Zg^_b6dEr+%|4Iw}acs?c#QGd$_&aK5jpEfIG+?;tq31 zxTD-L?l^aXJIS5mPIG6tv)noEJa>V+$X((tb62>l+%@hxcZ0jh-QsR@ceuOUJ?=jD zfP2V2;vREPxToAR?m72@d&#}xUUP4_x7<7KJ@cx+&At!_k;V%{o)`V z<`Ev{F&^g$p5!T><^j*}EYI;gFYqES@iMRQDzEW6Z}28>@iy=9E^S-u=!p0B`H zz8YVhuff;kYw@-DI(%I|fUn2b=L7i$d_%qw-cfy z#kc0$@WFgrK7?<_hw@>3I3K}B@=<(y-pxnz9r%uX3?IwK@$q~D@8J{qBtDr>;XCo2 z`7V4{z8l}2@4=_?J^5aIZ@v%Tm+!~-=Lhfu`9b_(eh5F5AI1;oNAM&0QT%9r3_q41 z$B*YH@Duq-{A7L#Kb4=xPv>XwGx=HkY<>PslG65DE(ZLLs5BP(&yy6cdUIC4`bfDWSAbMkp(k6Uqw}go;8X zp|Vg#s47$wstYxQnnEq1wopf?D+CDjg!)3D&_HM?G!hyMO@yXGkkCwMF0>F@3ay0J zLK`7iXe)#W?SxPvOb8bugh(MuXfL>hXrY79QHT*@g*YKzNDw?iqL3sc3n@Y;p|j9M z=qhv*x(hvoRH3KPOXw~15&8=Kg#N++VW2Qb7%U7Ch6=-k;lc=Eq%cYtEsPPy3gd+F z!USQWFiDs!OcACE(}d~53}L1)OPDRp5#|c>g!#e(VWF@{SS&0NmI}*+<-!VKrLam^ zEvymN3hRXR!UkcZuu0e~Y!S8!+l1}H4q>OTOV};!5%voEg#E$+;h=CxI4m3yjta+w z?C#;yNF%I zZen+_hnOn%6nlxi#Xe$Rv7gvq93T!92Z@8lA>vSRm^fSj5UA!UQ6mN;Q#XI6%@t$~Jd>}p)ABm5} zC*o7_nfP3MA-)t}iLb>s;#={Z_+I=VeiT26pT#fYSMi(pUHl>b6n}}31WSm7N|=O8 zghWb|L`y(oBv#@iUJ@ixk|bGDBvsNRT{0w7vLst_B$t#)$}DA(vP#*c>{1RXr<6;| zE#;B?q`Xo-DZf-eDk%9&g`~n#5vizDOe!vwkV;CWq|#CusjO5^Dlb)#DoT~4%2E}n zs#Hy?F4d4~O0}fgQXQ$T6d=`;>PvxA1F50ZNNOxKk(x?DQZuQ!)Iw@0wUSy(ZKPnS ztrQ})lR~91DO`$>BBdy)z2ugnr4CX@DMpHw;-q*fLGnn6Qj(M`rAVEm&QcettJF>E zF7=R7rJhnRskhWe>MQk=`bz_(fzlvpurx#(Dh-o{OCzL_(kN-PG)5XLjg!Vp6QqgK zBx$lVMVcy2lcq~Eq?ytzX|^;+nk&td=1U8th0-Evv9v^5DlLEfWCTX*@McOKDleSAcq@B_(X}7dT+AHmo_Dct(gVG`CuyjN^Djk!KODCk0 z(kbb*bVfQWos-T>7o>~QCF!zsMY<|oldelQq?^($>9%x7x+~q2?n@7(htebIvGhcG zDm{~)OE09C(ktn;^hSCsy_4QcAEb}cC+V~FMfxgzlfFwoq@U6+36fzMkx?0wahZ@w znUZN4$c)U&oXpFDEXtBB%ZjYZnykx)Y|55w%Z}`lGs&6dEOJ&ko19(FA?K8H$+_h` zvY(t+&L`)W3&;g!f4PucSS}(Lm5a&6C3UWocl3ZD?B3G5G z$<^f=a!t9GTwAUq*Ode0dUAa^P;MYMlpD#7CJW?JdkCw;CW94!3czJ?6QJy4E zmZ!*5ILd-;R>QT`-d{v|^StRM=iU<$4f3aL;E ztpJ5lScOw~MNmXVQe;I@R7F#C#ZXMeQf$RhTuLS-vyw&0s$^5LD>;;$N-ib0l1K4V z@+$e1{7M0(pyIC-QVJ_Yl%h&8rMOZ;DXElFN-JfQvPwCnyi!4_s8muaD^--LN;Rdr zQbVb!)KY3Ib(FeFfKpGXuLLR$l!i(prLodPX{rP%&6MU!3#FyfN@=aMQG%7WN{G@< z301(uhLKHuMAKIDua~4$`EC!GE5n+j8H}@qmek#8dNQG5IMO942RYE0IN~Kkx zGAgTbDz6Hvs7k7=Dyph#s;(NUsamS7I;u;}q-IvLs9Du)YIZe;np4fC=2r8lerjGd zpPFASpcYj9)k11vwTN0&Ev6P%OQbZMBYCR}E0>srA)BwSn4DZKO6y zm>RA|sF7-v+Fo_5(P{^^qZ*^es&Q((nxJ~rL^VlGR#VhYYG<{J+EwkQc2|3-scKKP zm)cwHqxMz%sr}Uf>OggnI#?Z|4poP#!_^V$NOhDtS{WD`8R|@RmO5LVqs~?5sq@tZ>Oysqx>#MJE>)MQ%heU?N_CaGT3w^ARoAKO)eY)K zb(6YT-J)()x2fCJ9qLYXm%3ZsqwZDrsr%If>Ou98dRRT89#xO2$JG<+N%fR^T0Ntl zRnMvC)eGuH^^$s7y`o-Kuc_D78|qE&gquy2TsrS_f>O=LB`dEFUK2@Ko&(#;| zOZAodT79FwRo|)a)eq`N^^^Kp{i1$Vzp3BVAL>u_mkMdHhG?jUX}Cscq(*791~f)v zHBRF-K@&AelQl(CHBHksLo+o?vo%L^X_>UlS{5y^E36gKifYBQ;#vuct+m!h3)b3dAzC{vR14F> zwFoUzi_+R_ZY^5tpmo$@v{)@pi`Nn~kCvz1`=K5JjJui7{5yY@r-sr}L*9o7*Y)iE8{37ym_oz{WQ=&a7^ zye{aXF6pwa=&G*ix^C#EZt1q}=q^2zo>|YLXVtUm+4UTHPCb{NThF8W>3Q{hdValt zUQqYf3+aXRB6?B1m|k2jp_kN4>815DdRe`kUS6-DSJW%%mGvrmRlS;CU9X|n)NARr z^*VZ8JwUIg*VhB}26{uik=|HuqBqrp^k#Z6`^+vvf1TRlW?r-$lcdbl2; zN9s{}d)=)^>mBrtdW;^c$LaBUg6`21^&~x6PtiN+o%JqySG}9wUGJf%>OJ*ddT+gt z-dFFZ_tyvL1NA}rV10-_R3D}f*GK3h^-=n0eT+UO+Mv&3WXl}GHS{kj4)-bgS!Mxv2qBpWG4C!@2`#pr5uGrAi+j8vni(aY#<^fCGx z{fz#`0Arvr$QW!4F@_q$jN!%zW27<47;TI(#v0>{@x}yWqA|&sY)mnx8q@oHl`;7g@0pp-?$T(~qF^(F?jN`@$8^Tq|^ zqH)Q%Y+Ny}8rO{L#tq}9am%=E+%fJN_l*0-1LL9b$ari#F`gRFjOWG+zH-T z0JEN1-wZSxm<`QFW@EF7+0+a&o0-kc7G_JcmD$>CV+NaT%@DJl8ES@^;bw#xX-1jt zO}80sb}&1dF=nh8XU3ZerpHV)lgwl@#q4BuHoKTz&2DCQvxk{#_B4B$z0E#mU$dXt z-yC2LGzXc3%^~JcbC@~Y9AS<$N13C|G3HovoH^c{U`{kAnUl>a=2UZSDCBLHRf7#ow?rJU~V)wnVZcm=2ml? zx!v4h?lgCqyUji3UUQ$h-#lO*G!L1F%_HVf^O$+uJYk+RPnoC9Gv-Vl39;EZ!0<(UL6L zQY_WdEZs6J)3Pkvax9mX$;xbHv9em(tn5|}E2ovq%5CMb{H(lIJ}bXfz$$3@TZOE` zRuQYHRm>`Gm9R=$rL59c8LO;S&MI$Juqs-WtjbmutEyGas&3V=YFf3d+EyK_t`%U_ zv+7%cRs*Y{)yQgWHL;pnK~^)Xxz)mIX|=LiTWzdhtF0AcwX;I4Fe}`Oup+G}tG(s6 zqOA^AM=QpPwc@OJE5Y(uiB^)8Y^7M8tj<;!tE<(`>TdP0Qmvj=FRQoJ$LeeKv-(>D ztbx`bYp^xM8fp!*hFc@7k=7_{v^B;WYmKwUTNA8_)+B4PHN~20O|zz3Gpw1`ENiwk z$C_)+v*ue1tcBJhYq7P&T52t`mRl>VmDVb2wYA1tYpt`^TN|v6)+TGSwZ+#%jiI%*xWj$0?Jlh!Hgv~|WhYn`*sTNkX0)+Ot* zb;Y`BU9+xRH>{i1E$g;*$GU6Xv+i3DtcTVk>#_C3dTKqho?9=hm)0xmwe`k&YrV7H zTOX{C)+g(;^~L&XeY3t>KdhhDFAK6^8?jLvvvHfSNt?218`zA^+MLbXf-TyTE!&E% z+M2D~hHcuGZQG9RvNPG4?JRayJDZ)|&SB@YbJ@A=Jhq>m*Uo3>w+q+>ZGXFvUDz&S z7qyGo#qAPyNxPI?+Ad?4waeM%?Fx29yOLemu3}fUtJ&4<8g@;)mR;MfW7o93C9(JnT)9z*Ww)@z9?S6KDdw@OA z9%K);huA~yVfJu)ggw$8WskPU*kkQ+_IP`OJ<*sUSuz}m)J|~W%hD=g}u^VWv{l^*lX=|_Ii7Rz0uxeZ??DCTkUQ3c6*1t z)81w8w)fb3?S1xs`+$AWK4c%ZkJv};WA<_TgniOJWuLas*k|o?_Idk)ebK&TU$(E< zSM6)|b^C^W)4pZjw(rR=A;5Dw{34($Mkaaf0Qct>zVM{;CGaa2ch zbjNT^$8v1Paa>L&C$p2q$?9ZtvO77PoK7w$x0A>5bMiX*ocvA!r=a8S6mkkXMVz8e zF{ijw!YS#La!Na8oU%?ir@T|aspwR4Dmzu2s!lbhx>Lic>C|#+J9V78PJmO-sqX|j z4V;EfBd4*`#A)gTInA8rP79}{)5>Y>v~hx+woZuC&IxtGoNyF*4120DYB z!Ojq8s58tN?u>9oI-{J?&KPH`GtL?BOmHSTlbp%U6lbb4&6)1ZaArEQoY~GCXRb5P zneQxc7CMWZ#m*9Esk6*k?yPWDI;))3&KhT}v(8!XY;ZO@o1D$g7H6xo&DrkkaCSPo zoZZeIXRou*+3y^14myXN!_E=ssB_FY?woK=I;Wh|&Kc*dbIv*MTyQQrmz>Ma73ZpR z&AIN}aBe!coZHSF=dN?lx$iu19y*Vl$IcVysq@Tv?!0hbI9Sn5%W=6}nOvD&SzK9N*<9IOIb1nixm>wjd0c+3ysmt%{H_A7f-ZkoAy;8n z5m!-HF;{U{30Fy1DOYJ%8CO|XIahgC1y@B^C0Auv6<1YPHCJ_44OdN9E!Y3U+C2qX zvb=A+pFOrcm8i_B>alIxwr$(CZQHhO+qTWKXaB#yn{#&}&U5prcXdQxtypia?uaLg zI>nsgP6?-^Q_3mrlyS;B<(%?P1*f7@$*JsAajH7ioa#;ur>0ZOsqNHp>N@qD`c4C< zq0`7|>@;zjI?bHsP79}{)5>Y>v~k)x?VR>b2dAUc$?5ELak@I)obFB!r>E1)>FxA! z`a1ob{>}hrpfkuB>+I>Vgd&Io6uGs+q5jB&;~@0DXI?J5p&I)Ixv&vcRtZ~*l>zwt@24|zQ$=U2| zake_!obApIXQ#8v+3oCc_B#8V{mudBpmWGM>>P29I>(&j&I#wFbILjGoN>-N=bZD- z1?Qr3$+_%YajrVooa@dF=caSZx$WF>?mG9J`_2RBq4UUj>^yOvI?tTv&I{+I^U8Vc zym8(-@0|C}2j`>n$@%PjalSg=obS#L=cn__`R)91LWzGwXc0z)72!m95kW*0kwjz> zMMM?RM062D#1yeaY!OGq74bxTkw7FAi9}+NL^whSDU{H{6-HR$310*vL?ji-L~@Zr zq!g({YLP~y73oBJkwIh>nM7uhMPwD(M0Sxw=e7iZm~z~ z75l_~aX=gths0rVL>v{z#Bp&#oD`?TX>mrJ73aixaY0-Zm&9dpMO+ov#C35)+!VLO zZE;8375Bt_@jyHjkHll~L_8JG#B=dNycDm*Ywr>!^!Y6f{Z95$;dK_j4Gqa=rV?kDPzglGLDQZi~%qp|V z>@tVUDRar(GLOtF^U3_OfGj8r$-=UTEGmo1;avEcDQn5vvW~1P>&g1Efov!n$;PsYY$}_{=CXxsDO<_bvW;vj+sXE_gX}0f z$?*s-?y`sMDSOG@vXAU5`^o-tfE*|X$-#1n94d#&;c|o=DM!iCa*P}+$I0<> zf}AKP$;onxoGPcu>2ijgDQC&qa*muU=gIkUfm|pT$;EPsTq>8z<#L5wDObtWa*bRo z*U9yAgWM=L$<1<$+$y)p?Q)0QDR;@;a*y0A_sRY8fIKJ<$;0x9JSvaL+*)YDR0T!@{YVK@5%e}fqW<*$;a}Ed@7&G=kkSo zDPPIg@{N2e-^us#gZwBz$)e5yztx~Ji8nsrfQ|r|RwNY(So7EPzRc%w-)ef~&?NYnd z9<^8PQ~T8cbx<8rht&~vR2@^t)d_V{ol>XO8Ff~jQ|HwMbx~bXm(>+@Rb5lp)eUu1 z-BP#J9d%dTQ}@*a^-w)hkJS_PR6SGA)eH4fy;85$8}(MbQ}5LW^-+CNpVb%jRee+6 z)erSk{ZhZx9~DafqeJU3I;;+-!|Mn-qK>2^>nJ*^j;5pQ7&@korDN+jIm=IILQAc*)~+_%YESz*&>=dhPNtLV6gs6&rBmxPI;~Ep)9Va6qt2u=>nu8} z&Ze{L96G1YrE}{%Ims_SE~bm?61t==rAzBFx~wjz%j*idqOPPX z>nggcuBNN&8oH*grEBXtx~{IL>+1%(p>Cub>n6IXZl;^-7P_TwrCaMZx~*=f+v^Ux zqwb_T>n^&h?xwrz9=fOQrF-i>y07l1`|AOEpdO?L>mhom9;S!u5qhK^rAO;AdaNF& z$Lk4tqMoED>nVDwo~Ebk8G5FkrDy9odajm_=rUZ$7p6?&y!rB~}U zdaYik*Xs>>qu!)9>n(b#-ln(f9eStUrFZK+davH6_v-`tpgyDz>m&N8KBkZB6Z)h+ zrBCZK`m8>u&+7~NqQ0ar>nr-IzNWA18~UccrElvy`mVmG@9PKpp?;(v>nHlDex{%6 z7y6}srC;ke`mKJa-|G+hqyD5n>o5AN{-(d{ANr^MrGM)`I+Xj58`=%yhIPZa;oS&s zL^qNf*^S~xb)&h_-5736H$$!g zxFK#*H<_EyUALZF-)-PFbQ`&i-6n2Rx0&1AZQ-_bTe+>>Hf~$Do!j2+;C6I7xt-lEZdbRP z+uiNq_H=u>z1==;U$>vz-yPr%bO*VE-68H!cbGfe9pR32N4cZjG45D*oIBo~;7)WW zxs%-~?o@Z0JKde(&U9zFv)wuFTz8&3-(BD?bQigc-6if)cbU7~UE!{DSGlX*HSSt> zox9%M;BIs`xtrZB?pAl3yWQR4?sRv#yWKtRUU#3n-#y?SbPu_Q-6QT%_n3R!J>i~o zPr0YvGwxaUoO|BA;9hhuxtHB5?p61id)>X^-gIxdx7|DLUH6`Q-+kadbRW5o-6!r- z_nG_Lec`@zU%9W{H||^ao%`PX;C^&Jxu4xH?pODl``!KF{&au2zuiA>DD#gAZNiwa zCY%XxBAAFKl8J1hn5ZV2iEd(;m?oBqZQ_`?CZ36J5}1T0kx6Wl7{>@BjWXJ}#u#fn zC(oFYiZR(i1 zrk<&98kmNrk!ft2n5L$gX>MAWmZp_yZQ7W&rk!bTI+%{8lj&@_n69Rq>27+Mo~D=S zZTgtLrl09=2AF|nkQr=-n4xBv8E!_Hk!F+`ZN`|fW}F#sCYXt4l9_C#n5kx(nQms7 znP!%mZRVJ{W}caE7MO)*ky&h(n5AZ!S#DOCm1dP$ZPu8zW}R7YHkgfOli6&xn5|}; z*=}~2oo1KWZT6VGW}n$_4w!@HkU4CQn4{*HIc`pvljf8;ZO)jp=A1ciE|`nvlDTZI zn5*WRxo&Qlo933eZSI)6=AOB49+-#bk$G&Mn5X8Md2U{qm*$muZQhu-=AC(OKA4Z@ zllg4En6KuW`EGugpXQhOZT^^0_8%MChOuF7I2+zZun}z}8`(y&QEfCE-NvvnZ7dtx z#<6j2JR9F8unBDHyV+_tbSZ7bW_ zwy|w(JKNrNupMnD+u3%pU2Qkp-S)6OZ7Wp# z?I=6ijuoLYhJK0XLQ|&Z6-OjKx?JPUn&ardtJUibmunX-XyVx$VOYJhd z+^(=I?JB$4uCZ(FI=kL(up8|ryV-8BTkSTx-R`hE?Jm39?y-CAKD*x@um|lSd)OYa zN9{3t+@7!}?J0ZOp0Q``IeXq-uovwmd)Z#GSM4=>-QKV_?JaxT-m!P>J$v6iun+Ac z``A9QPwg}N+`h0c?JN7*zOirZJNw>#upjLw``Lc6U+p*h-TtsY?JxV={;{FFf4tCM z7%!|B&I|8F@FIGVyvSY@FRB;Ki|)nnVtTQ>*j^kjt{2aX?7Pub@}RE9@2Vih9Mo;$8`_q*ux-?UnJ$dgZ+GUInkBSIMjFRq?8N z)x7Fn4X>tG%d73x@#=c@y!u`Puc6n-YwR`gntIK==3Wc0rPs=9?X~gRdhNXSUI(wE z*U9Vbb@94--MsEz53i@!%j@m+@%noGy#C$*Z=g5G8|)47hI+%i;ob;uq&LbN?Tzuq zdgHwD-UM%=H_4mqP4T9B)4b{43~#14%bV@Z@#cE-y!qY&Z=tuyTkI|ImU_#)<=zT! zrMJpk?XB_Fdh5LP-Ue@@x5?Y=ZSl5x+q~`G4sWNo%iHbk@%DQAy#3w*@1S?cJM10t zj(W$u0^^^I@{S%lYN~3Vubu zl3&@c;#c*n`PKazeoeoYU)!(a*Y)f9_5B8ZL%)&V*l*%D^_%(4{T6;pzm?zGZ{xT1 z+xhMN4t__!li%6z;&=7C`Q7~;!pLb`P2Ow{!D+CKii+<&-Lf|^Zf<>LVuCJ*k9r= z^_Tg}{T2R7f0e)5U*oU!*ZJ%H4gN-dlfT*D;&1i0`P=;+{!V|FzuVvA@Adcj`~3s{ zLI03{*gxVQ^^f_-{S*F4|CE2)KjWYE&-v&53;sp_l7HF1;$QWz`Pcm${!Rauf7`#~ z-}UeL_x%U{L;sQg*ni?b^`H6A{TKdA|CRsRf8)RP-}&$T5B^90lmFTO;(ztO`QQB? z{!jmx|J(oLhYJ1)LI+`jutB&Wd=Mds7(@yp2T_8kL9`%x5F>~g#0p{uae}x(ydZv% zAV?S_3K9oN0w)lG3{;>4H!y(>yuc5FAS6f{Bny%UDT0(isvvccCP*8k3(^M}f{a0? zAajr<$Qon|vIjYWoI$Q2caSH@8{`Y}2L*zHL7|{NtArUx^EnZc}Jb}%QH8_Wyl2MdCQ z!J=Ssuq0R-EDM$gD}t55s$g}nCRiJ+3)Tl4f{nqZU~{k~*cxmLwg)?cox!ePcd#ee z8|(}A2M2QCO8|M3(f}@f{VeW;Bs&!xEfpwt_L@Q zo58K%c5o-S8{7--2M>aW!K2`D@FaK|JPV!&FM^lBtKfC;CU_gX3*HAGf{($c;B)XL z_!@i*z6U>opTV!-ckm|&74lC==#VfWVMD@&gb#@j5-}uFNaT=wkl+(!afWqF4J^|33Y{s`c8({`(rWbb46D zzFGgB$^V{c+5fkK?*H=t^-hAn-Ijk7<^TAv#`y1JH0J0S17qU9jm-b$-^(2SOB{@g z@i0Cnz=W6x6Jru|P@qJG8eKGK(L)~t48f$B43lFDOo^#5HKxJ;)u*OoPLCNdBWA+P zm<9h=XPb>VJLbTgm;O(V-fscHFh!P;#dMpVk!KuT~}9w*>LoP?8c3QomoI2~u;Oq_+YaSqPKc{m>z;6hx4i*X4q#bvl0SKvxqg{yH5 zuElk@9yj1d+=QEP3vR`2xE*)kPTYmNaS!greYhVF;6Xfuhw%s=#bbCJPvA*Bg{Sch zp2c%`9xvcUyo8tW3SPx)cpY!xO}vG-@eba_dw3ro;6r?bkMRjU#b@{&U*Jo8g|G1q zzQuR=9zWnm{DhzJ3x36K_#J=XPyB_y@elrO*!`Qo-MWACw^sLW{`Tqq&EFc`zxmsu z`!|0pbpPgWiSFP0?a%$2zwNkx^S2)NZ~hkK{>|TN+`swTi2FBx8*u;T@7d*V{vKHV z=I?RkZ~h)p{w5B_#dsJW6JSD2go!Z;Iw(-0LX9pOwCJIa0ft~wOoquZ1*XJQm>Sby zT1ZzFARfZQcm$8)F+7eZ z@FbqX(|88Y;yFBz7w{rp!pnFCui`bljyLco-oo2>2k+uNypIp?AwI&#_ynKgGklIO z@Fl*&*Z2nC;yZkgAMhi7!q4~xzv4Iijz91x{=(n*2mf~o;Xk4OTf3n`V;KDZ`})4< z{N5NC6Jud)jDvA89>&K6m=F_TVoZV#3Y4f&ql*SDdgx<-A(#}CVRB4?DKQnM#x$4~ z(_wndfEh6pX2vX-6|-S>%z-&E7v{!1m>2V5ek_0mu@Dxmq=6{}%&tbsML7S_f(SQqPIeQbaYu@N@LCfF34VRLMOEwL50#x~d% z+hKd`fE}?DcE&E)6}w?~?14S87xu^NPR1!X6{q2JoPjfO7S6^wI2Y&Pd|ZGFaS<-YCAbuq;c{GoD{&RB#x=MW z*Wr5HfE#fWZpJOR6}RDb+<`lB7w*PAxEJ@~emsB&@em%yBX|^#;c+~HC-D@X#xr;p z&*6EzfEV!+UdAhU6|doSyn#3I7T(4?co*;CeSClq@ew}8C-@Yf;d6X}FYy(=#y9vD z-{E`wfFJP_e#S5O6~Ezk{DD957yiaS_`g5&{}YD&k74k?`^^6RQ8?!C7y%<!;vgK1LvSb#!{Imr zN8%_Pjbm^uj>GXd0Vm=loQzX&Do(@cI0I+mES!yVa4ycn`M3ZV;v!s(OK>SJ!{xXF zSK=yMjcaf%uEX`X0XO0%+>BdrD{jN>xC3|MF5HcKa4+t|{dfQm;vqbYNAM^f!{c}Y zPvR*&jc4#Ip2PEa0Wabuyo^`yDqh3ucmr?ZExe6)@GjoN`}hDK;v;;FPw*)|!{_({ zU*ao#jc@QRzQgzU0YBm={ET1lD}KZ8_yd39FZ_*v@V`qw|NZ>@CoKCP!(dnphv6{- zM#M-M8KYoSjE2!M2FAo#7#rhYT#SeDF##sTM3@+ppo0P>D%9wrL5m*x7+?q{#blTq zQ(#I=g{d(Orp0ua9y4G@%!HXS3ueV^m>qLqPRxb5F%Ra&e3%~#U_mT|g|P@0#bQ_- zOJGSXg{83!mc?>d9xGr)tb~=Z3RcBxSRHF%O{|5ru@2V7dRQMDU_)$#jj;(f#b($X zTVP9Ug{`p-w#9bX9y?%1?1Y`M3wFhB*d2RdPwa)gu@Cmee%K!e;6NONgK-EB#bG!c zN8m^tg`;r{j>T~}9w*>LoP?8c3QomoI2~u;Oq_+YaSqPKc{m>z;6hx4i*X4q#bvl0 zSKvxqg{yH5uElk@9yj1d+=QEP3vR`2xE*)kPTYmNaS!greYhVF;6Xfuhw%s=#bbCJ zPvA*Bg{Schp2c%`9xvcUyo8tW3SPx)cpY!xO}vG-@eba_dw3ro;6r?bkMRjU#b@{& zU*Jo8g|G1qzQuR=9zWnm{DhzJ3x36K_#J=XPyB_y@elrc&+LEs2SZ~R42$6~JVwBX z7zra|6pV_|FgnJ-m>3IVV;qc&@i0Cnz=W6x6Jru|P@qJG8eKGK(L)~t48f$B43lFD zOo^#5HKxI|m=4op2F!?=Ff(Sste6e6V-C!TxiB~8!MvCc^J4)lh=s5)7Qv!e42xq4 zEQzJCG?u}#SPsi$1+0jburgM`|8BAV_Z?h~xjNRsnpg{KV;!uE^{_rRz=qfe8)Fk} zip{V&w!oIy3R`0vY>Vx%J$As3*acz=gO77vmCK zipy|0uE3SJ3RmMAT#M^)J#N5_xCuAo7Tk*4a69h6owy5k;~w0L`*1%Vz=L=Q591L$ zipTIcp1_lM3Qyx1Jd5Y>JYK+ycnL4#6}*bq@H*bWn|KRv;~l(<_wYVGz=!wv>U|0-? z;V}Y6#7Gz!qhM5whS4zw#>7|{8{=SHjEC_t0Vc#mm>83wg90Tg)aar?iyrzIU&yZK`exYu?QB$ zVptqYU`Z^6rLhc_#d264D_}*egq5)hR>f*q9cy4stcA6)4%WqbSRWf;Lu`bNu?aTC zX4o8CU`uR;t+5TZ#dg>pJ77obgq^VqcExVk9eZF;?1jCt5B9}=*dGVrKpcdFaR?5@ zVK^K|;7A;Wqj3z5#c?RW52E$@F437~o zB1Xc<7zLwZG>nchFeb*r*cb=nVmyqG2{0ih!o-*a9TX^0p+*-CTJ+Gz07Eb-Cd1^I z0#jltOpR$UEvCctm;p0lCd`akFe_%m?3e>{VlK>$c`z^L!~9qP3t}NGj76|07Q^CL z0!v~kERAKbESAIaSOF_yC9I59uqsx=>R1D7VlAwVb+9hh!}{0&8)74Dj7_j9HpAxF z0$XA$Y>jQOEw;n<*a16YC+v(}uq$@M?$`r+VlV8CeXuX~!~Qq`2jUa4Js2={N&t;w+qvb8s%s!}+)X7vdsZj7xASF2m)x z0$1WHT#ajREw01$xB)lfCftl$a4T-Z?YIMX;x62cdvGuA!~J*w58@#_j7RV&9>e2! z0#D*8JdJ1YES|&jcmXfsCA^GR@G4%z>v#ii;w`+5cknLW!~6IEAL1i?j8E_>KEvnu z0$<`Qe2s7LExyC|_yIrSC;W_G@GE}9@Aw0M;xGJ-e=t-;_CJQkFc=oYVR(#y5it@* z#wZvSqhWN6fiW=_#>O}p7vo`kOn?b75hlhY=%7G}3N^ZD(4vPv1{i`#F&QSu6qpiI zVQNf+X)zt9#|)SeGht@Tf>|*eX2%?u6LVp1%!7F`ALhpbSP%p5^R>vAx6Ki2@tb=v29@fVO*bp0GV{C#=u^BeU7T6M7 zVQXxIZLuA;#}3#LJ7H(+f?cs2cE=vr6MJEA?1O!=ANI!qI1mTnU>t%&aTpHA5jYY@ z;bUuCPRAKI6KCOUoP%?59?r)FxDXfNVqAhtaTzYh6}S>t z;c8riYjGW}#|^j-exUdJ1F6K~;dyn}b~9^S_X_z)lAV|;>7@fkkH7x)ri z;cI+@Z}AVx%J$As3*acz=gO77vmCKipy|0uE3SJ3RmMA zT#M^)J#N5_xCuAo7Tk*4a69h6owy5k;~w0L`*1%Vz=L=Q591L$ipTIcp1_lM3Qyx1 zJd5Y>JYK+ycnL4#6}*bq@H*bWn|KRv;~l(<_wYVGz=!wIFT9}{3gOoWLs2|6fHqC$->8no!4j{$~YQcQ-)F$Jc?RG1pmU|LLv z=`jOl#7vkOvtU-thS@O(=EPi>8}ndZ%!m2002ahTSQv|7Q7neVu>_XHQdkv02a#7(#vx8PRXhTCxm?!;ZV8~5N|+=u(|03O6cco>i1Q9Opn@dTd4Q+OKB;8{F} z=kWqw#7lS?ui#a@hS%{1-o#sY8}Hy_!ytyQ+$Tc@ddubSNIy=;9Go$ z@9_hE#83Dczu;H=hTriA{={GS8~@<{ihBQv@_*-m&=>~8VmJ(s5ilY~!pIl}qhd6S zjxjJM#=_Vb2jgNqjE@O0Atu7am;@aZC{dwC7Y$nU(8mBnFexU(SI818ZU}tc`WBF4n{P*Z>=1BW#RKuqigf=GX#TVk>NoZLlr2 z!}iz#J7Op7j9suRcEj%21AAgG?2Ub}FZRR!H~D z!}YiUH{vGTj9YLkZo}=k19##s+>Lv1FYd$rcmNOLAv}yn@F*U`<9Gs3;we0hXYeeZ z!}E9nFXAP8n18?Fjyp4D8F5biY_y8Z`BYccc@F_mS=lB9&;wyZOZ}2U? z!}s_BKjJ6+j9>68e#7th1ApQ#{EdI`e+B*jL}mYD7z~TyFg!-Uh!_bYV-$>v(J(s3 zz?c{dV`ChQi}5f%CcuQ42oqxxbWor~g&JKnXwgF-0}R2Wm<*F+3QUQqFg2#Zw3rUl zV+PEKnJ_bE!K|1Kvttg-iMcR0=E1y}5A$OIEQp1$Fc!h0SPY9}2`q`Fur!vzvRDqw zV+E{;m9R2a!Kzpdt78qUiM6mc*1@`159?zCY>17pF*d=b*bJLv3v7w4ur;>9w%88a zV+ZVrov<@@!LHa1yJHXRiM_Bl_QAf`5BuW)9EgK(Fb=_?I1Gp52pox{a5Rp=u{aLL z;{=?DlW;Ol!KpY6r{fHqiL-Dv&cV4j59i|oT!@QsF)qQSxD1!$3S5b+a5b*MwYUz~ z;|AP_n{YF3!L7Irx8n}niMwz&?!mpd5BK8%Jcx(zFdo69cnpu@2|S6X@HC#mvv>~A z;|08km+&%P!K-);uj388iMQ}J-od+g5AWjxe29Js)Gh-IairFwb=D?ho3v**0%!~OjKNi4(SO^Pa5iE+uusD{$l2{5$V;L-q<*+*1(!r3u|K?tc&%qJ~qIH*a#bA6KsmjusOECme>kgV;gLX?XW#| zz>e4nJ7X8@irug~_Q0Ol3wvW9?2G-dKMufwI0y&h5FCoba5#>@kvIxR;}{%^<8VAq zz==2sC*u^Hiqmj9&cK;C3uogToQv~tJ}$t8xCj^H5?qSQa5=8PmADF5;~HFx>u^18 zz>T;GH{%xEira8I?!cY63wPrl+>85gKOVq?cnA;U5j={=@Hn2plXwbG;~6}Q=kPpU zz>9bZFXI)wir4Tu-oTr93vc5cyo>knK0d&Q_y`~46MTx#@HxJ~m-q@_;~RX7@9;f- zz>oL|KjRntir?@%{=lF33xDGu3>BUK|6^zjgJCfohQ|mP5hGz_jDk@y8b-$$7!zY* zY>b0(F&@Up1eg#LVPZ^z4hod0P@{_mEqdrJeU{rVSX%t1+fqo#v)i0i(zprfhDmNmc}wz7RzCItbi4< z5?014SQV>bb*zCku@=_GI#?I$VSQ|X4Y3h6#wOSln_+Wofi1BWw#GKt7TaNa?0_Ay z6L!Wf*cH2BckF>Zu^0BnKG+xgVSgNe191=z#vwQqhv9G>fg^Dgj>a)K7RTXuoPZN? z5>Cb`I2EVibew@RaTdv(J(s3z?c{dV`ChQ zi}5f%CcuQ42oqxxbWor~g&JKnXwgF-0}R2Wm<*F+3QUQqFg2#Zw3rUlV+PEKnJ_bE z!K|1Kvttg-iMcR0=E1y}5A$OIEQp1$Fc!h0SPY9}2`q`Fur!vzvRDqwV+E{;m9R2a z!Kzpdt78qUiM6mc*1@`159?zCY>17pF*d=b*bJLv3v7w4ur;>9w%88aV+ZVrov<@@ z!LHa1yJHXRiM_Bl_QAf`5BuW)9EgK(Fb=_?I1Gp52pox{a5Rp=u{aLL;{=?DlW;Ol z!KpY6r{fHqiL-Dv&cV4j59i|oT!@QsF)qQSxD1!$3S5b+a5b*MwYUz~;|AP_n{YF3 z!L7Irx8n}niMwz&?!mpd5BK8%Jcx(zFdo69cnpu@2|S6X@HC#mvv>~A;|08km+&%P z!K-);uj388iMQ}J-od+g5AWjxe29D%9wrL5m*x7+?q{#blTqQ(#I=g{d(Orp0ua9y4G@%!HXS3ueV^ zm>qLqPRxb5F%Ra&e3%~#U_mT|g|P@0#bQ_-OJGSXg{83!mc?>d9xGr)tb~=Z3RcBx zSRHF%O{|5ru@2V7dRQMDU_)$#jj;(f#b($XTVP9Ug{`p-w#9bX9y?%1?1Y`M3wFhB z*d2RdPwa)gu@Cmee%K!e;6NONgK-EB#bG!cN8m^tg`;r{j>T~}9w*>LoP?8c3Qomo zI2~u;Oq_+YaSqPKc{m>z;6hx4i*X4q#bvl0SKvxqg{yH5uElk@9yj1d+=QEP3vR`2 zxE*)kPTYmNaS!greYhVF;6Xfuhw%s=#bbCJPvA*Bg{Schp2c%`9xvcUyo8tW3SPx) zcpY!xO}vG-@eba_dw3ro;6r?bkMRjU#b@{&U*Jo8g|G1qzQuR=9zWnm{DhzJ3x36K z_#J=XPyB_y@ehWI#s0_87zV>)I1G;wFd|06$QT8qVl<47F)${^!q^xG<6=CFj|ng# zCc?y+1RWG8QK3c`4O;Zj#{fexDJH|@m;zH`Dol-OFfFFT^q2uNVkXRtSuiVR!|a#? zb7C&cjd?IH=EMA001ILvER034C>F!wSOQC8DJ+d;uq>9t@>l^YVkNAMRj?{n!|GTA zYho>|jdidt*2DVP02^W>Y>Z8?DK^9A*aBN(D{PHzur0R3_SgYCVkhj3U9c;5!|vDv zdtxu_jeW2$_QU=-00-hA9E?M7C=SEnI08rFC>)Jra4e3)@i+k|;v}4mQ*bIy!|6B! zXW}fJjdO4=&cpe*02ksST#QR_DK5k1xB^$=DqM|ga4oLG^|%2y;wIdTTW~9G!|k{O zcj7MGjeBq}?!*0f01x6JJd8*1C?3P(cmhx2DLjp5@GPFg^LPO-;w8L{SMVxc!|Qkh zZ{jVyjd$=a-oyL&03YHbe2h=_xJ%n;wSu!U+^n_!|(V5 zf8sCvjejsyZ1z8f#xNKb!(n)gfDthgM#d-@6{BHvjDayR7RJUn7#HJVd`y4|F%c%l zB|SQBeu zZLEWJu^!gP2G|fAVPkB9O|cm^#}?QUTVZQ#gKe=Lw#N?G5j$aL?1Ejf8+OMY*b{qU zZ|sA8u^;xw0XPr`;b0tsLva`m#}POZN8xB3gJW?Vj>ic&5hvkfoPtwv8cxR0*UCP4=U zN>r%PMS~VS^fAB?Op3`cIi|prm85)v!9&z?xVKYhxX( zi}kQRHo%712peM)Y>LgWIkv!-*a}-?8*Gd1uswFbj@Su1V;Ag--LO0Mz@FF(dt)E$ zi~X=a4#0sp2nXX39E!tmIF7)PI0{GO7#xe^a6C@Hi8u)-;}o2V({MV@z?nD;XX6~4 zi}P?kF2IGj2p8iLT#CzZIj+E!xC&R}8eEI(a6N9ojkpOn;}+bC+i*MXz@4}YcjF%1 zi~Ddt9>9Zm2oK{CJc`HgIG(_hcnVMB89a;U@H}3?i+Bky;}yJ$*YG;tz?*mrZ{r=j zi}&z8KEQ|g2p{7Ue2UNTIljP`_zGX+8+?oJ@I8LOkN62c;}`sj-|##Bz@PXFf8!qv z6_@>wp)m}G#c&uNBVa^~gpn}{M#X3t9b;fjjD@i=4#vfJ7#|a0LQI5-F$p>-P@+PO zE*iAxp^pKEU{Xwm$uR|{#8j9X(_mUmhv_i`X2eXG8M9zk%!b)92j;|Fm>ct8Ud)I2 zu>cmtLRc7!U{NfF#jymI#8Oxq%V1e7hvl&XR>VqJ8LMDbtcKOG2G+z{SR3nLU95-o zu>m&3M%WmeU{h>{&9Mcx#8%iE+hAL4hwZTgcEnED8M|Ot?1tU32lm8X*cY>oQBhJ2F}D;I2-5ST%3pV zaRDyGMYtH3;8I+M%W(y+#8tQ&*Wg-QhwE_zZp2Nv8Mok8+=kn62kyjOxEuH2UfhTK z@cNB9_@;8T2t&+!Gm#8>zl-{4z(hwt$Ne#B4s8Nc9H{D$B02mZug_#6LVsCevu z42@whEQZ7I7y%<>vBd{AMznj3n9V z7E57iEQ4jS9G1rlSP?5>Wvqf#u^Lv#8t8#Fu@=_GI#?I$VSQ|X4bc-DVPkB9O|cm^ z#}?>?EwL50MsM^%U-UzNY=dplhV9Uf?Xd%P!~hJ$PS_bu48mXx!7kVpyJ2_ifjzMo zI*ZsI1b0-1e}PIa57H8sW=U%;|!dM zvv4-f!MQjO=i>rgh>LJBF2SX^442~yT#2i2HLk(6xDMCj2Hc37a5HYft+)-h;||=3 zyKpz|!M(T-_u~OPh==en9>Jq{43FapJc+09G@ik;cn;6w1-yut@G@S(t9T8s;|;ut zx9~RJ!Mk`5@8bh}h>!3wKEbE>44>l*e2K5{HNL^O_zvIW2mFYi@H2kFulNnW;}86a zzwkHyK`jaQe-vn76m-L=7!9Li40Oks7z<-#9E^+cFg_;0gqR2uV-ie?$uK#lz?7H@ zQ)3!Ti|H^uX26V?2{U6B%!=7CJLbTgm;O(V-YNh#jrS*z>-)B zOJf-T~}9w*>LoP?8c3QomoI2~u;Oq_+Y zaSqPKc{m>z;6hx4i*X4q#bvl0SKvxqg{yH5uElk@9yj1d+=QEP3vR`2xE*)kPTYmN zaS!greYhVF;6Xfuhw%s=#bbCJPvA*Bg{Schp2c%`9xvcUyo8tW3SPx)cpY!xO}vG- z@eba_dw3ro;6r?bkMRjU#b@{&U*Jo8g|G1qzQuR=9zWnm{DhzJ3x36K_#J=XPyB_y z@egWAx&Na;1EZiDM#X3t9b=$7#>7|{8{=SHjEC_t0Vc#mm>82_QcQ-)F$Jc?RG1pm zU|LLv=`jOl#7vkOvtU-thS@O(=EPi>8}ndZ%!m2002ahTSQv|7Q7neVu>_XHQdkuVU|Y0dJG5ha?0_9H00XfTc19C}Fc?Fy3wFhB*d2RdPwa&b?2Ub} zFZRR!H~D!}YiUH{vGTj9YLkZo}=k19##s+>Lv1 zFYd$rcmNOLAv}yn@F*U`<9Gs3;we0hXYeeZ!}E9nFXAP8n18?Fjyp4D8 zF5biY_y8Z`BYccc@F_mS=lB9&;wyZOZ}2U?!}s_BKjJ6+j9>68e#7th1ApQ#{EdH5 zOUC^l1sWIy-7qRf!{`_T-7zM{!q^xG<6=CFj|ng#Cc?y+1e0PiOpYlqC8omEmkzUYVk*aq984cnm|+hYgphyfUgov<^S7=*zXf?cpHcEj%21AAgGbYO4ngMG0d z_QwG@5C`F49D+k}7!Jn~I1)$UXdHuMaU71v2{;ia;bfeGQ*jzj#~C;iXW?v|gL82n z&c_9~5EtQMT!Kq+87{{axDr?4YFvYBaUHJ54Y(0E;bz=|TX7q1#~rv6cj0c_gL`ow z?#Bao5D(#DJc38@7#_zHcoI+HX*`2x@f@DV3wRMP;bpvnSMeHN#~XMPZ{cmcgLm;B z-p2>{5Fg=Ve1cE$89v7s_!3{?YkY%m@g2U$5BL#3;b;7UU-27$#~=6;f8lTZgIaR# z|0vMFDCmY!F&ak480d~MF&4(gI2ae>VSG%02{92S#w3^&lVNg9fhjQ+rp7dw7SmyR z%zzm&6K2LNm=&{OcFch}F&E~>JeU{rVSX%t1+fqo#v)i0i(zprfhDmNmc}wz7RzCI ztbi4<5?014SQV>bb*zCNSQBeuZLEWJu^!gP2G|fiu@N@LCfF34VRLMOUf2>_VQch8 zAM`~(^v5>X7H!xL?bseWU`GtVK#~#=dd!Yk+V;}5`{jfg{ zz=1dj2jdVNioc zz=gO77vmCKipy|0uE3SJ3RmMAT#M^)J#N5_xCuAo7Tk*4a69h6owy5k;~w0L`*1%V zz=L=Q591L$ipTIcp1_lM3Qyx1Jd5Y>JYK+ycnL4#6}*bq@H*bWn|KRv;~l(<_wYVG zz=!wtbEjES)@HpaoY7!TuP0!)aBFfk^7)R4Xa}f^uU@}3u|K?tc&%qJ~qIH=!uQ6F*d=b*bJLv3-rR4*a};tH~OG2 z`k_Cz!M13_c4)`;*a16Y00v?w?2IM`VK9bZ7wn4Nusim^p4bZ=*cY>oQBhJ2F}D;I2-5ST%3pVaRDyG zMYtH3;8I+M%W(y+#8tQ&*Wg-QhwE_zZp2Nv8Mok8+=kn62kyjOxEuH2UfhTK@c zNB9_@;8T2t&+!Gm#8>zl-{4z(hwt$Ne#B4s8Nc9H{D$B02mZug_#6MAmXiBF3N$bZ zx?xm|hS4zwx?@a?g|RUX#>IFT9}{3gOoWLs2`0s4m>g4JN=${RF%720beJA9U`EV@ znK27y#cY@zb6`%)g}E^g=EZ!N9}8eXEQE!z2o}X+SR6}WNi2n>u?&{Qa#$WKU`4Ei zm9Yv|#cEg`YoG_##9CMz>tJ21hxM@mHbhTsgpIKYHpOPx99y6lw!~K08okj6ebEp7 zu?@CG8@59`w#N?G5d$y~J7H%uF$jY(1iN5Y?1tU32lm8X=)m6C2m4|_?2iL*AP&O8 zI0T2{FdU8}a3qex(KrUj;y4_S6L2CZzFARfZQ zcm$8)F+7eZ@FbqX(|88Y;yFBz7w{rp!pnFCui`bljyLco-oo2>2k+uNypIp?AwI&# z_ynKgGklIO@Fl*&*Z2nC;yZkgAMhi7!q4~xzv4Iijz91x{=(n*2enk(|52cUQP2&e zVl<47G0+`jVl0e}aWF2%!}yp06JjDvj7cylCd1^I0#jltOpR$UEvCctm;p0lCd`ak zFe_%m?3e>{VlK>$c`z^L!~9qP3t}NGj76|07Q^CL0!v~kERAKbESAIaSOF_yC9I59 zuqsx=>R1CkuqM{R+E@qcVm+*n4X`14Vk2yfO|U68!{*omy|5*=!q(`GKIn^n=#Oo% zE!wah+Oa)$z>XMzf!GN88#yz+f_u+m#fCupq9>ybh z6p!I?Jb@?i6rRR2coxs$dAxuZ@e*FfD|i*J;dQ)$H}MwU#yfZy@8NxXfDiEzKE@~b z6rbU9e1R|V6~4wd_!i&cd;EYO@e_W=FZdO|;dlIjKk*m-#y_Z~=KhZY4UB?r7!{*o zbc})S7!zY*Y>b0(F&@Up1eg#LVPZ^zNii8F#}t?nQ(|=z%q{7S_f(SQqPIeQbaY(GweCV{C#=u^BeU7U+d7u@$yPZ}dT5^h1AagKg1< z?a+?xu>*F*01U)V*cnX>!e9)+F4z^jVR!6-J+T)$us8O>zSs}@;{Y6pgK#ho!J#+| zhvNtwiKB2dj=`}w4#(pJoQRWfGETv%I1Q)c44jFxa5m1txi}B!;{sfWi*PY6!KJti zm*WatiK}omuEDjq4%g!b+=!cSGj74HxDB`C4%~^ma5wJ3y|@qe;{iN~hwv~S!J~K# zkK+kEiKp;1p24$t4$tESyoi_ZGG4)}cnz=P4ZMlB@HXDTyLb=p;{$w%kMJ=*!Ke5P zpW_RBiLdZAzQMQn4&UPk{D`0MGk(FZ_zl0~5B!P0@HhTJEe-d76lh=+bi=3^4WnZW zbjO$&3u9v(jEnIwJ|@6~mJs)Gh-IairFwb z=D?ho3v**0%!~OjKNi4(SO^Pa5iE+uusD{$l2{5$V;L-q<*+ z)<6%eiM6mc*1@`159?zCY>1xN2peM)Y>LgWIkrGAY>BO~HF~2D`l28DV;gLXHf)D> zY>yqVBL-j~cEZkRVh{#n2zJ4)*bTd55A2D((1E?N5B9}=*dGVrKpcdFaR?5@VK^K| z;7A;Wqj3z5#c?SbyT1i(0EQZCg1eU~7SQ^Vw}aN>~}IU{$P!)v*S8 zU`?!rwXqJ?#d=sD8(>59#75W{n_yFHhRv}BdSOd!g{{#Web5*E&>!1iTeM+2v}1eh zfE_Ua1F;i!MiYZD7(=iNcExVk9eZF;?1c{OjeW2$_QU=-00-hA9E?M7C=SEnI08rF zC>)Jra4e3)@i+k|;v}4mQ*bIy!|6B!XW}fJjdO4=&cpe*02ksST#QR_DK5k1xB^$= zDqM|ga4oLG^|%2y;wIdTTW~9G!|k{Ocj7MGjeBq}?!*0f01x6JJd8*1C?3P(cmhx2 zDLjp5@GPFg^LPO-;w8L{SMVxc!|QkhZ{jVyjd$=a-oyL&03YHbe2h=_xJ%n;wSu!U+^n_!|(V5f8sCvjek%}$Ne7#8W;uLFe*mF=okatF($^s z*cb=nVmyqG2{0ih!o-*alVUPVjwvuDroz;i2Ge3XOph5bBWA+Pm<6+9Hq4GWFem21 z+?WURVm{1|1+X9%!opYti()Y>jwP@pmcr6l2FqeOERPkiB38o6SOu$MHLQ*`&;x5? zEv$`ourAia`q%&)q9-=O#@Ga#Vl!-xEzk>FVk>No-spqA=!gE;2HTxDhwuX54~XaT{*O9k>&B;cnc6dvPD`#{+l}58+`vf=BTf9>)`S5>Mf2 zJcDQP9G=Guco8q*WxRq{@fu#o8+a3M;cdKwckv$H#|QWjAK_zsf=}@oKF1gM5?|qK ze1mWC9lpm8_z^$hXZ(U+@f&`}ANUi0;cxtdT6)cT=_t^^DCmY!F&ak480d~MF&4(g zI2ae>VSG%02{92S#w3^&lVNg9fhjQ+rp7dw7SmyR%zzm&6K2LNm=&{OcFch}F&E~> zJeU{rVSX%t1+fqo#v)i0i(zprfhDmNmc}wz7RzCItbi4<5?014SQV>bb*zCNSQBeu zZLEWJu^!gP2G|fiu@N@LCfF34VRLMOUf2>_VQch8AM`~(^v5>X7H!xL?bseWU`GtV zK#~#=dd!Yk+V;}5`{jfg{z=1dj2jdVNiocz=gO77vmCKipy|0uE3SJ3RmMA zT#M^)J#N5_xCuAo7Tk*4a69h6owy5k;~w0L`*1%Vz=L=Q591L$ipTIcp1_lM3Qyx1 zJd5Y>JYK+ycnL4#6}*bq@H*bWn|KRv;~l(<_wYVGz=!wtbEjES)@HpaoY z7!TuP0!)aBFfk^7)R4Xa}f^uU@}3u|K? ztc&%qJ~qIH=!uQ6F*d=b*bJLv3-rR4*a};tH~OG2`k_Cz!M13_c4)`;*a16Y00v?w z?2IM`VK9bZ7wn4Nusim^p4bZ=*cY>oQBhJ2F}D;I2-5ST%3pVaRDyGMYtH3;8I+M%W(y+#8tQ&*Wg-Q zhwE_zZp2Nv8Mok8+=kn62kyjOxEuH2UfhTK@cNB9_@;8T2t&+!Gm#8>zl-{4z( zhwt$Ne#B4s8Nc9H{D$B02mZug_#6MAmXZ5E3N$bZx?xm|hS4zwx?@a?g|RUX#>IFT z9}{3gOoWLs2`0s4m>g4JN=${RF%720beJA9U`EV@nK27y#cY@zb6`%)g}E^g=EZ!N z9}8eXEQE!z2o}X+SR6}WNi2n>u?&{Qa#$WKU`4Eim9Yv|#cEg`YoG_##9CMz>tJ21 zhxM@mHbhTsgpIKYHpOPx99y6lw!~K08okj6ebEp7u?@CG8@59`w#N?G5d$y~J7H%u zF$jY(1iN5Y?1tU32lm8X=)m6C2m4|_?2iL*AP&O8I0T2{FdU8}a3qex(KrUj;y4_S z6L2CZzFARfZQcm$8)F+7eZ@FbqX(|88Y;yFBz z7w{rp!pnFCui`bljyLco-oo2>2k+uNypIp?AwI&#_ynKgGklIO@Fl*&*Z2nC;yZkg zAMhi7!q4~xzv4Iijz91x{=(n*2enMx|52cUQP2&eVl<47G0+`jVl0e}aWF2%!}yp0 z6JjDvj7cylCd1^I0#jltOpR$UEvCctm;p0lCd`akFe_%m?3e>{VlK>$c`z^L!~9qP z3t}NGj76|07Q^CL0!v~kERAKbESAIaSOF_yC9I59uqsx=>R1CkuqM{R+E@qcVm+*n z4X`14Vk2yfO|U68!{*omy|5*=!q(`GKIn^n=#Oo%E!wah+Oa)$z>XMzf!GN88#yz+f_u+m#fCupq9>ybh6p!I?Jb@?i6rRR2coxs$dAxuZ z@e*FfD|i*J;dQ)$H}MwU#yfZy@8NxXfDiEzKE@~b6rbU9e1R|V6~4wd_!i&cd;EYO z@e_W=FZdO|;dlIjKk*m-#y_ZK=KhZY4UB?r7!{*obc})S7!zY*Y>b0(F&@Up1eg#L zVPZ^zNii8F#}t?nQ(|=z%q{7S_f(SQqPIeQbaY z(GweCV{C#=u^BeU7U+d7u@$yPZ}dT5^h1AagKg1*F*01U)V*cnX>!e9)+ zF4z^jVR!6-J+T)$us8O>zSs}@;{Y6pgK#ho!J#+|hvNtwiKB2dj=`}w4#(pJoQRWf zGETv%I1Q)c44jFxa5m1txi}B!;{sfWi*PY6!KJtim*WatiK}omuEDjq4%g!b+=!cS zGj74HxDB`C4%~^ma5wJ3y|@qe;{iN~hwv~S!J~K#kK+kEiKp;1p24$t4$tESyoi_Z zGG4)}cnz=P4ZMlB@HXDTyLb=p;{$w%kMJ=*!Ke5PpW_RBiLdZAzQMQn4&UPk{D`0M zGk(FZ_zl0~5B!P0@HhTJEerR56lh=+bi=3^4WnZWbjO$&3u9v(jEnIwJ|@6~mJs)Gh-IairFwb=D?ho3v**0%!~OjKNi4(SO^Pa z5iE+uusD{$l2{5$V;L-q<*+)<6%eiM6mc*1@`159?zCY>1xN z2peM)Y>LgWIkrGAY>BO~HF~2D`l28DV;gLXHf)D>Y>yqVBL-j~cEZkRVh{#n2zJ4) z*bTd55A2D((1E?N5B9}=*dGVrKpcdFaR?5@VK^K|;7A;Wqj3z5#c?Jlc5@-pvL|S4kiI!AL zrX|->XeqT+T52tgmR3urrPnfO8MRDWW-W_sqHJ1rEr*s<%cbSk@@RRrd|H03fL2f| zq!reRXhpSRT5+v}R#GdamDb8=Wwmlzd98w0QLCg?)~aY#wQ5>*t%l~I)zoTfwY550 zU9Fy0Uu&Q>)I7CDT4Sw=)>LbzHP>2bURq17mDXDG)_gQy%}?{!+GuSxo7PUVYwfiT zT1PEF3)DJkoi$Sn(t@=Rt&7%G>!x+rdT2ehUYbMet@Y9RYW=kS+5l~!Hb@(+4bg^b z!?fYr2yLV`N*k?>(Z*`ywDH;mZK5_wo2*UIrfSo)>Dml!rZ!8Pt0%e3X%3T>sfN?Wb1(bj6~wDsBsZKJkH+pKNTwrbn7?b;4)r?yMmt?kkF zYWuYP+5zpLc1Sy{9np?z$F$?x3GJkIN;|Ec(avh;wDZ~p?V@%`yR2Q&u4>n`>)H+N zrglrat=-Y?YWKAJ+5_#O_DFlIJ<*-3+<)$N_(xn(cWtBwD;Nv?W6Wd`>cJ@ zzG~mJ@7fRTr}j(xt^JYRuj@iL^eDQU9#xN~N7rNM?s`l;mL6M=qsP_b>GAahdO|&s zo>)(!C)Jbb$@LU^NG|~ndO^LAURW=p7uAdD#q|<;NxhU_S}&uQ)ywJS^$L1Ly^>y8ucBAgtLfGC8oGyG zQ?I4h*6ZkX^?G`Jy@B3P_tYEdjrAsaQ@xqqTyLR!=`HnEdTZTV_tAZIKiyw%qqo&< zdOO{&x7R!99rXY`Q17I7)=fP~57tBUE_zqJo8DdTq4(5#=?=ZO-be4N_tX391N4FV zAbqetL?5aT(}(LL^pW}~eY8GCAFGek$LkaHiTWgcvOYzhs!!9W>ofG3`Ye66K1ZLc z&(r7Y3-pEhB7L#GL|>{e)0gWj^p*N5eYL(uU#qXv*XtYfjrt~iv%W>&s&CV`>pS$F z`YwI9zDM7y@6-3|2lRvbA^os^L_ew@(~s*X^ppB2{j`2YKdYb9&+8ZTi~1$~vVKLs zs$bKu>o@e9`Yrvoen-En-_!5w5A=unBmJ@dM1QJ3)1T`v^q2Z8{k8r^f2+UK-|HXr zkNPM5v;IZ@s(;hJ>p%3L`Y-*r{zp!JT?k=_D8fxd712a=5kt6(m?D;lE#io{BA$pZ z5{QH%kw`3(h@>K!NG?){lp>W#Ez*dzBArMtGKh>KlgKQxh^!);$S!h-oFbRVE%J!G zBA>`F3W$QDkSHvQh@zsHC@xBflA@F-Ey{?pqMRr%Du{}rlBg`Ih^nHRs4i*<4^dOp z617DgQCHLx^+f~GP|TqO&kXkO&qbqKoJ%x{22p7m@KA?(@VwG4e)`+!Yomek6h>c>C*ete)tzw(lE_R5WVwc!0_K3Y=pV%)Bh=byg zI4q8cqvDu2E>4J(;*>Zo&WN+(oH#Eoh>PNqxGb)StKyotE^dgM;+D8A?ufhMp13a_ zh=<~lcr2cXr{bA-E?$V2;+1$U-iWv2op>)kh>zlv_${>V$e zZV1CLq8M&QR3n-Z-H2hh8!?SoMr)JSF|H&PfWjZ{Wz zBaM;PNN1!sG8h?+Oh#rSi;>mHW@I;V7&(nxMs6dIk=Mv)hz77#>DVqn1(IsAJSM>KXNo21Y}} z(`aNgHkuesjb=u3qlMvRv@}{7tqpI($M7}$41c4I(bljT?F_ro-soU-Gy;r3qm$9u zFpVH1*a$JY7+sBSMt7r!(bMQy#AEU3)&**OqFa{cfjKRhbW2iCA7;cO(MjE4x z(Z(2KtTE0QZ%i;I8k3C4#uQ_!G0m86%rIsevy9os9AmCA&zNs4Fcun%jK#(hW2v#s zSZ=H^RvN2})y5iQt+CEnZ)`9&8k>yG#uj6%vCY_S>@ap3yNun&9%HYu&)9DqFb*1r zjKjteWJh5EEfyOoB-<879XRm=aTA zYD|N!Q%^@aJx2K1r+mV3z9K9KxgHDUlaKS29dlq#%!Roz59Y;um>&yZK`exYu?QB$ zVptqYU`Z^6rLhc_#d264D_}*egq5)hR>f*q9c!Qm*2G#^8|z?QtcUfn0X9TWY=n)m z2{y%M*c@A+7q-M!*c!dj2Yt~G{jm+UMH{w5JGRFT*bxIT5IbRKG%*N+F$B9{SL}w} zu?P0VUg*Hy*a!P!KkSbKa3BuC!8inm;xHVJBXA^+!qGSe$Kp5~j}verPQuAJ1*hUP zoQ^YaCeFgyI0xtAJe-dUa3LSeNC+@=CxCi&*KHQH7@E{(-!*~Rb;xRmqC-5Ym!qa#L&*C{ej~DPFUc$?G1+U^Y zypA{UCf>r^cn9y|J-m+(@F70J$M^)F;xl}XFYqP4!q@l)-{L!bk00{VlK>$c`z^L!~9qP3t}NGj76|07Q^CL0!v~kERAKbESAIaSOF_yC9I59 zuqsx=>R1CkuqM{R+E@qcVm+*n4X`14Vk2yfO|U68!{*omy|5*=!q(`GKIn^n=#Oo% zE!wah+Oa)$z>XMzf!GN88#yz+f_u+m#fCupq9>ybh z6p!I?Jb@?i6rRR2coxs$dAxuZ@e*FfD|i*J;dQ)$H}MwU#yfZy@8NxXfDiEzKE@~b z6rbU9e1R|V6~4wd_!i&cd;EYO@e_W=FZdO|;dlIjKk*m-#y=?k3}C%<6lh=+bi=3^ z4WnZWbjO$&3u9v(jEnIwJ|@6~mJs)Gh-Ia zirFwb=D?ho3v**0%!~OjKNi4(SO^Pa5iE+uusD{$l2{5$V;L-q<*+)<6%eiM6mc*1@`159?zCY>1xN2peM)Y>LgWIkrGAY>BO~HF~2D`l28DV;gLX zHf)D>Y>yqVBL-j~cEZkRVh{#n2zJ4)*bTd55A2D((1E?N5B9}=*dGVrKpcdFaR?5@ zVK^K|;7A;Wqj3z5#c?O}p7vo`kOn?b75hlhYm=u#?a!i3KF%_o9G?*6CVS3Df88H)P#w?f> zvtf43fjKc3=Egjj7xQ6$EPw^E5EjNFSQLw4aV&u)u@siZGFTSNVR@{86|oXl#wu79 zt6_DlfgV^BYhi7ygLSbU*2f0e5IwOGHpV8{6q{jlY=K_b5?f(w^hO`_ML+b%HrN(z z*beR39y?%148TC_gq_jEAPmM3?1Ejf8+OMY*b{r91AAj1?2G-dKMufwI0y&h5FCob za5#>@kvIxR;}{%^<8VAqz==2sC*u^Hiqmj9&cK;C3uogToQv~tJ}$t8xCj^H5?qSQ za5=8PmADF5;~HFx>u^18z>T;GH{%xEira8I?!cY63wPrl+>85gKOVq?cnA;U5j={= z@Hn2plXwbG;~6}Q=kPpUz>9bZFXI)wir4Tu-oTr93vc5cyo>knK0d&Q_y`~46a4%8 zycF^awfucv6#4tSVrEL2aQ!?lmE3lispWelKhH~}!XCOuv_ZDl!#tyDx-g=+MUD99 z{KXD4ttuGd&-v*>cV0Hk%S^A5p1QaE;h(4O>#6&B>i(X38&AEh|KFp`Aj^dKgm^{` z@;BT02bmdV+F@o=d)iX^cJdDJ^A5`68)*9HaTaiIV3l(>Gs`0Kw}x4)@>x}R&ls&b z`UiXac?Wx&*<`@W%r4(;$-Ql!wnnmO4pqp@%&CH@Y)u_zE)}pf_t0!DJan6vha10a z%q`>IUS=LuFzink^Q!W)ZQKUQA1iofb3UhNnv8~fMN9|#$jb7m%B)Y#uWE_pQwzwr zx5F%`3U+Xqg{)!$cI$8hz0ATg?J$d2QPURWp?SD@=<@fJMOFC_yE?WI?~owbu-$Ck z)rt7n9@%N6V%EOJ)xLJsGIpzBoUKwqMZL_D5_x=29cC#Nus!!OOIsU>Y%l-*cC(C% zI2%9?4mWw~x-LI3Y>~1Nis*6#1^f8h-g=qkRF++r?{*>I{B0k-%Ti!C->o0KR+AgDC#y@s4T~BwK&$wK85Yh8-P9KE$p%C*r)Q z^^p?oelUEBL6^LZs`pUQ*7JjN=*sF!VD({>u|MOw-wU0F{ z+NjE`Pi?DeiR4plGA>)Cohm3>#cmY~uv>>KTcy2BJIoGNG;FJMROLhL>ewQ*il>`j zaI4@RW`N9cwNIejc9@;y`@ikeSw+1}Q{p_*AQgz@NP}fuwoiyE=w^%RFuPa)|H0P2L)5SYd-IGc623fQ98rs+n=q{AF(1tT=w zD7DXjX}ZyBC)srFgKVT25Ir`My(rqUAk2=r9?#f-90xnbti_ezl`cX`=_TD?8{ z#wh33ZFF1t=HByMSEOZv`t($eijwH~mCz<0BR@wr@mO^@VQ0>`(9f6mS^4?SmyB93 z*VX?&UBjbNE*eJ17--F~@#;gZ;W$BkT4=jXlu3s<$qGhjx5;Xs|I%(#)K32o+HI=* z9692qsl$;YZo2wdc}28_f*f%(WZGfQw4%-t=WabI%u?k;?5cGmj<^i25jWe~caGZk zpAk1#MZL^<66cJVuL6&jdz^i1g^UPMut;o)7jf~5dU#kkrmS1NT3$R;F zB3pjFOgqdCRy6Ef->Axm*wy(*Xn9Y=FQk*XNoKfuV6)tIm|Nugzdf*3MZL^z5_z2R zA$+?EM7Z$nP_fWf-l>x69;%hM-YZz|6|MJ5&Uon4_%2yaHpFg8xSrdiDsk5NKfjmp zh_hE!W*zN5RZE1U-5*-B8WRUp(s@+Y`+q&!gR-#fjYAUE8;8}tavoUCBYWeBOgqe@ zRy1sH98={(?CP8%_J&)>cHR8_%;Pf4)gLG1w!=Ir-~a88Q!46Zo|ZVr%^4MlWJa8o zaoMitRKc(_;=C#!+E*7;vMQG;O@1fIQ$2ryA_WTOE0izH%e{7OIg~G|x_(qGM``B> zzNCWI-RiQcDv~3)BIB~nuBw8v&8}I+0_@f)$Tqt!(+=~76%E^FH&yu%yE@*8Z6^GJ z&08|=YOvdK+hN|3@BcQ~T^02*?@648d0z!0Im`z#E*tEjDj2rG9;x!q2D9$!|HE68 zHN79J%B;qDqH2lcQ=iJXY@BDRplqDyR}-TLlLA`v!I}qsoZu)?zInSjF8d$>o76YDaf7nk*r2 zD$%V9VyFtNd!Co+F1O{^=zDVEV1XAaA6|9z&6Cu7cLif>gA+DwA(O(5g`US>jxt9uj4ZHJjy?eK5+CQ)Jeq4JxW>|UX3 zk@Rj-RVG63CJWt99c*%|$hWY4n?i;>qqg$(_O&dzVu6WZCXIZ;5yWoH+(vcq2G3aN-{ zGY@01T53w>x+TbIV7+9%^^(0EL&oG7Ei6NFc}VT}?Lj%b2q_DnwF2 z#qbqWa#c`1w1Ubq=I=18NYvn}s`ikBOLemvT-8+CVOF=oYIMoDWR0#Gsz8Wcbw!B7 z^!S@EhrN|w)0wZ@J;YzFyUA~$DP3c&mW(>g+G;1~V|0YER!8M{nROM;v#V!qhHsww zDy9y|c|~cUl97+KhBC{ZTaEhmIWM@QE& zI7e21N=AMvfig>WM<<-mbsyj@n!wj;bVY?$(<%ihSx&G~rN3QM&v3BaB zc8at+x~d#6vzx+s4&ANI@ZHfv#X`HIr%FbC4!vZStmmo2bXfJsMb_TdM%YExJ}M%s zlZ*TRX_2*Wgd#%z*D`C{{jB=l+df#U+WoDR)%pXh&G1J)kb0QcgRCO*s%qUkZ12_5 z^I#csp8AkTDi}%y@_Mc=%fnRCTH$S?*0JR`VSj&fxC}bi+12BR?Sraygp7q*Sskg8 zUgjvZd$=JwT18}A$kwoi=-)qe{GW!P{HU<^u`yxmiR@1%GT(YdT*{HHD%XXqKSQPEQi~N1`)D#t6$r7bc9b`d*?;q0H*f#mHcn5vQ`$5SHN`=H3rtJ@^TEQwpU|dgVbSew4%--;QZ#Y zN#%#wRm-@pfS$4B61%m+AQ#$$%*`^(bxGVJw;kqI`5t+PZ&Nv5=5~ei)OJ{#;X8b% zipg%^!m>*x!=74bSMQctvL5+w*?S_XXD{{qhd*@6T$Nu749ndG;BSQ-R_t-|2$|%sOMSOdYsE#FXnz5n<&1fl_)U! zj&j(640#euuQuG$OPTet$BD%8Bh>ReY(45h;e`q`QGfGp6BwA^d?}NzQ~~opRNkzDv;|BeZT%z@H>(U{)DeUlh-`w_zSH-moYipghb6Y zL#k$*8s%!XMNw&o>1Kt)&bFv(hY-6OeR8%%`{9jDplws@nznH$Ms7I)WDm8smtAJh2TDzL8b~Q)sYHHfmj12R41N*;!9FQ%S zM(ytGy|mV5_})v$$1z#c%VfmKnnA{#6=aO0f=uBn$n2^h%w)~-w}PyZRFExv1=(E{ zgqf^4{#KAPk_vKB0nb0VRnj?IBM!GbGE2_cyb?8M^Qn?@&Z+^f=4^hIc9;dMaM(Fp zP!$NVt05=nT%o`Dt~p!SnXiVD%rEjc-!*58I`h>Kk@>~`<_nLg9zxF9;?8{4^{zQ9 z7rN!o;QTw7C1lWHmQ?##UlzhGZd*m%>#Bn8W+_=lt_ha5b|_=*kWD^#+1=$Zv2}CR zS5{`Q!d*_4k-x@LI6Jt4wHbbdR8%oJ(fB@5NhO^PYrQ-FR#!fBPFI!%oO8O0RolOF zy6WE&)gqK|Er?f_aXEHtsDfed*&eFAIu`jeI59-6qr7$9U7h~F`_AF~ygcQc=EMA0 z01ILvER034C>F!wSOQC8DJ<>$u2WMrjP+fomTHj*57xC^t>nBS*HOv;)?;;L0r_30 zo*_jSgRHGkk0MxemiQI{anQYQ4}$n>I4!?2WdO>~9Odf4hJ8x66>c z%(j=P@zg=>AjgyHWi_5UsR?d;sS0$~MvWIV52L5+ujQinOniaLjb Ub8&yL$`7%twsT!yLPGNVfBy>S&;S4c literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/legacy_pickle/0.19.2/0.19.2_x86_64_darwin_3.6.1.pickle b/pandas/tests/io/data/legacy_pickle/0.19.2/0.19.2_x86_64_darwin_3.6.1.pickle new file mode 100644 index 0000000000000000000000000000000000000000..6bb02672a4151c8d6536127fc94e68634e56c86d GIT binary patch literal 125349 zcmd?y1y~ea!|?H$Gk|~?*xiAJ-GySK*exQlxU7_f-Hoojc6WDocXxMpcYQOv{{>X^ z_V#(+_xrBvJ6`UOVRmO{PoFbqmR>ev@=uf{Ep#`z80{i-;eomcllHT1Kv1Ypq{)~p z%qPU(C&JY)G+gJ(exqGO%M(YN^7OVm+uPqK(#K>>8TZ+NA(6W95TBq3^(YMkL)r%E zDhGx7wX5S3;?q_aZZf~v#mi(&&Wi#={B@ml5w5;I5jvC6(K|%f(YvPhJN2o?6hAyE z+$W^1_N)e))@NxaZWH1YtTTBUZ6hLm!Xr%{lF=q2GBnKOVXOV1Q-6@72ACR{eAIVN z84?v7*4gsef}?^W1AW57eL9mS)UOsBm>1e8*r;~`u2D@M7T$mW1_`at}eY$$idS`tK^<(N&dFoSpNcuD$w)(WzPt@IHv@7IVxR`5U zbE4SzsIh6%CpOyoYVjUn@-qM0u7gidl(r+6(LT6sxVO4bO(A>Vs6aIWLew8^BZ30` zw9lZPzOCg0MEOKTsPAvI@m4>c`YzUt;NTssi}W$4y?Sb?eALs5@--@I%A53dmb{^N z&^xI=sR{kNQ&;z9)@Re=RG%YmjOH_*;4n2g2ir^1u%m}3_H*|1)Z*Bv1cZi~s*}Sc-JfJI zYt$HGqo(~IM2(14^V?4uBBoGBTJn>txdu>c3vO^So(l4*xkvx(6r zAY9kpWK0#4PUf1_H8da~LZ?QOQK_O(IF%>JlH2Fu#3(VZM13E(O;{g zjS0LhFH*~{8UyC2QWIQ@F@If zJgxO%{BhXS92=HAW-U&FET^Ci+M*pMKsD!?g9BQN;V8XUCF#e;|ISnX=h8txM?atX zX_1GlUmUGp(%qzAre7HwXZkg21nJijJL+C)h2gPTe=}L%@w-~``&vN+2gZ~2_0BPw z-<;{yX3yLJsV!O^ts$~zdM$(71?f8I)K=6>jVb4toMU~N+A^tG$WMJ?&5+0v#p1j$ zzGxL{p->Yv>5mC%$(}|fG(=}IX9u;izV(ZwQ7p$u^Q*+S9xJkAsO4BX^&|NBT0cT& zV^T7|xldtf?GsfC5AC80@vKrQPV@S6Q&2c=ldA6!UsIrFL0Oi~=a5pocG`jP&P?$<>_Ywz>_ar>RrO?xj(`+Y@= z*K6j~{AC>fKArTB{yS-;e`7w5PXAszo8Ly)zl>BvA~l*l4T;r9?a;AN8C#An#*`yN zO7&@f(26MjQDO0xC4Ca3?LTCEExDq!*3u|fG8yH{>VJ3je-)FVf%@iJ+ov_+bPRv4 zj{Z8e4Ib*@yQ@(UPaPL?PUdqkG*q8yXrw(f)*gO3ub8Ma)KvG!#$GFJ>z}(Pil5q_ z|G5$VsaK}Nb|}>@a_YEcRXu{PZKzt?_yn1Z3Ekst#k5%V-sX~|_Jg903B9c^(7Kz> zzl#d3PO&79;_DNxHiN!CT}=8Oanr?U=j|8juN~>9o}&6{%C#aSbJ>rIx33)Mqs2yM z)OV5DJvK6x-_`2=e-xK-I_F;ToVS(%)J9Gn1N>QMBeo7np=GJ~#{jfbhMC`et_885 z>-#?a_pOyWe(_ej1b>iCzW->yZpdRkV-}ywrBkaNr!32ZkLonPef3?f={e$^P&-ap zrernNKWptM{dogzvRH}ow8>LH*XUS3N*(L! z!mEb($MhZbmVS*fk$D`f4!`S!hD7R39!^?M$M9DDEFMk~QNA9XwEm&tgS!30!@n61 z818%*Q;+{BrlQnIhH!1LuH|LL=p47EvFtE9s2zTvYUU?dYZ!Gr>1cF{cP2sY9{sM4 z(b`mVbjDVSoy=XGH%WoU~7DJ(g7M zl*cv-8MQ4-qac2$$BYizh{+Pe`f6S#wU=AN;HW;|I$$>>wrr?l_Wv+S>;5K6;~Q64 zBUsHp-v#TodS2%6tL>Ny^1lsyvrycm zMWH!(n_k$qjX3bvdf zC^rA9Qyp5hA7$AQZrve!SnCSQj!xDciigd2ujpajAv~B(i?E}@AG7M55EG0>8eFDSPSxtsgzdJ@u?^OTDXd9^y*Xc~^*wCCaw7F1q zs@R{A?tg70B_v_)$m+fyNf}P6F>kn{K3ImP|DDoq8J+&Bl*U)u)e|s>ALjtHQu@DN z!s6ulZCcJ$t8DW8D{a$KXMdmPKUjCf=K1(@u%W7!ZT_x#jLA2#J&vD-bw7>2VmsDq zME;y6ilrtWRvOmB74$}cp?^p}YgQ*Sr;p(gz)Wkk%#SWOv2H}&b3 z9+RP``eU@Am-_hoCL*>{)9Rw{hqnf!_N$*7ZuKP$ZG|jprAr$I`}pa#UbA|w%Rew! zo#s{h@#Znj?-JyfA2UuWRQoO!8pWo9we0*N6%uKwpk62Rwp=v)%Y=xH9W5oy(;>g@ z^Tdwke^m=AYM)1~z0ybo^|0okmVI;3;`49PF}7UCv?+g-m_N@`7<6hRtK+e@D&}|?xa7J=TK_~h^-?n{eqF!9qKvLFNnYLG&-2CiFs=icYm2H{@o=l#<^XK_p3zi8d@W1+Zi-SJ~*`L1<`@UZg` z=6?4tN{f>^`(`OHkzFGGw!p-GEQ{TYFsJK^cWU7-iu{P(Y{TMUn_rIT?h;k$ozl|U zW_+E}YStaGc`N>$qE4Nuvt_!V_@>S*JB+sV%uk4&Hv5ZN9rdb8q%K&!UShVgdHig> zOrlS3NUPQ%YP`nNX;IgWs2N?mJ~o}E7OcHgtCAZ#(7N8mYV2&8<$3RWTKu=Kw;spZ z3$`BTU(dots7YjqR3GA2BXPP=L0S^VOc`~xCa4rMvbJ`itSJ~XYh)Q{=+#&J`+2z7 z4oOUz|2G4TpIXiDdb&TSuQ~>?rtfpo4}MISAJfqq|Nr?Y#PWu&ZWaq+VJw10u^1M| z5?B&TVQDObWw9KV{~sCsn7+$2^I|j2Potj^+CJ+%qhY(+vKam@$HcZ0<{Yg(#LdyM zr91vUlQsE&F)99IHn0|RYj&_|EtcOG$DcEtI(=z=$N1)qf9%vt1ncjus+HAu!TNix z75NvfgBDY9=iC0UxAWu6;OS^Ph=)W>i`%^B)K8|zwD z>*6%)rMHBZOK<@#Ee(w`*S?6nBFlJdHsPC$+5VTy0wyY2|M}^^q8Xum9G-~4i$%?5u?=4fy z`m@>uMYyhQ5pCh;A5PZBUI2+Rur?~~qdL*|FwcMf!HjK8^=4jrqLy%dD)X$V`K7;F z?_wV_*P_OQEDND1=IH)uVNBd7s_Tfff96``_j9!o>Pn+9Zi<{+@d9_xwde8TkEo02pZOaL%OWDkqcdaGNBgxMV zf|kD8+Tdc}EeMNuZLpe_lH{cRCoO-tGNvBM5UJ+vXm@*UwW3zcv?prAv;peBc-N7{ ziO|@qEnezDVtamVwJl36p5(jhuSUm!z@Q*+^CIlnIMAmuCh#R|s23227@EcwH#OM} zE!0h8GE!Vy;~7t~|GI`vVu`K4TMDbrGat)zpwb)lN?fMpUL!PvE4{p`x%uRP0&i6l z`ls)v`9Af9w(nG{F9=?%Ikt?jeXn}w(Ab8W#k}&()XeJqVCVYK=h#<|98>!KB-XWq z$A8*!V%_@LVp`*PW8K=PU z%a|x;cRO{=tj=_r@0C;6#6=j|tFf)Vtf6Zby^C^y`oHaWld8YC2&^85i@*ugyA~81TRW}fsJHK^hh&%>Q(#I=g{d(Orp0ua z9y4G@%!JmDo`vnKm<_XI4$O(UFgLnj9?XmRFh3SRS1gFunzk_8MX)Fq!{S&1OQN-% zEX{TqEQ{r^JXXLT?lhGu@%zf?j#aQKRzvI6L+hBZCcm$R9#|XepeNQv>l9*rwi{qW zY=n)m2{y%M*c@A6OKgQ+*c!dD4f>!j`k_DSFaX=49s|*U?Jx*~F$6;~4BKNkMqngH zVF&Dpov<@@!LHa1yJHXRiM_Bl_QAf`5BuW)9EgK38VBPLG~!SkhQo0Lj>J(o8cjF` z$Kp5~j}verPQuAJ1*hUPoQ^YaCeFgyI0xtAJe-dUa3LSeNC+@=CxCi&*KHQH7@E{(-!*~Rb;xRmqC-5Ym!qa#L z&*C{ej~DPFUc$?G1+U^YypA{UCf>r^cn9y|J-m+(@F70J$M^)F;xl}XFYqP4!q@l) z-{L!bk00EK}L0*nK@`TN@~FdZP5-BpglUEBPK*A)b0q>f_6`! z7LuU#UQ}nclVNg9fhjQ+rp7dw7SmyR%zzm&6K2LNm=&{OcFch}F&E}W7tDisF(2l~ z0_chbu@Dxt%*9E!tmIF7)PI0{Fj3CG}A9Eam^0#3w9I2otl zRGfy>aR$!BSvVW#;9Q)C^Kk(##6`Fmm*7%dhRbmUuEbTi8rR@jT!-s%18&4kxEZ(L zR@{c$aR=_iUAP0#-yftb~=(9jjnftcKOG2G+z{=z+Dd4tio;tcUfn0XD=& z*ch8&Q*4IKu?4n7?JhVicwuYw#y04KzUYVksKWqki+T)11GYo$wmU5ZV+e*~7`Df7 zjKD~Y!VcIGwVU*`&>6d6SL}w}QM-3f3q7$H_QpQg7yDs<9DoCH5Jux*9D+t1ioY>oQBhJ2F}D;I2-5ST%3pVaRDyGMYtH3;8I+M z%W(y+#8tQ&*Wg-QhwE_zZp2Nv8Mok8+=kn62kyjOxEuH2UfhTK@cNB9_@;8T2t z&+!Gm#8>zl-{4z(hwt$Ne#B4s8Nc9H{D#)cW3pnY7X&J3zCNn%vSr&26QDghpd%(k zCrpHiF$pF`XH164F$Jc?RG1pA-$Rv_?R1zPGoW^hp%yY>X3Ti(0EQZCg1eU~7SQ^Vw{^H>`w}(H*N`Rn%@0 z)IxQvfitTItfDKW*4N(h?u?aTCX4o8CU`uR;Uf3GFu?_m5FZ!WB zYIo{sApo^o_OzhKKr~=G48mXx!B7mt_85*47>QBX0Xt$R?2KKoD|SQe<~}X-z@FF( zdt)E$i~X=a4#0sp2%~W@4nZRh#bG!cN8m^tg`?4gV{j~v!|^x)C*mZWj8kwbPQ&Rq z183qaoQ-pEF3!XGxBwU8B3z71a49as<+uV@;woH?Yj7>D!}YiUH{vGTj9YLkZo}=k z19##s+>Lv1FYd$rcmNOLAv}yn@F*U`<9Gs3;we0hXYeeZ!}E9nFXAP8n z18?Fjyp4D8F5W})A{M?oF8=HHmdowha@8XLxLmdJ@dR2Sl_eo5>WUp(b$L$$$zF1h z9MuczPU=PU#8MI|spKprlafm*q?A%BDYcYFN-L$4(n}enj8Y~kvy?^3DrJ+hOF5*R zQZ6aCY9|Fr!BU76DuqeyrEn=iij<G)x*UjgUr4qomQ2Ng5-K zmBvZqr3unRX_7Qqnj%e=rb*MK8PZH?mNZ+MBh8iON%N%z(n4vGv{+gqEtQr@%cT|4 zN@5_C=x*}bbu1VLW8`4ebmULUXBi)tmN%y4( z(nINy^jLZ#J(ZqG&!rdAOX-#LT6!bBmEKA3r4Q0a>67$X`XYUm)C)7}H&_Z;k!@sK z*-lO%+sh8JqnuE7k`u{^Yd)TC@(OvSyh>gzuaVcv>*V$F26>~rN!~1Pk+;g*Vc`KEkJzAfL8 z@5=Y&`|<<%q5MdGEI*N-%FpEI@(cN;{7QZ;zmea{@8tLL2l=D?N&YNqM2weT8NgS zmGBa+g|}!Ue1xy?6aGRc0z_M(7lFbc+KC_$EJ8%62ovo^xQGyuB1&`+9YrV6S#%Ly zMK{r1^bkEoFVS1{5q(8J(O(P@1H~W_Ee4Ar!YGD{VPd!#Ax4T(Vze-cF=DJ3C&r5j zVxpKNCW|Rzs+cCGiy303m?dV5IbyDuC+3R>};-PpX9*Za9sdy%yix=Xh zcqLwoH{z{$C*F$>;-mN^K8r8nt564Kima$NpC~qptzxGnQ0x^4#ZgJ9I4Oyg#7Yt+ zsp708Q<5twl$1&;CAE@9Nvot&(kmI1j7laYvyw&0s$^5LD>;;$N-ib0;-chH@+$e1 z{7M1ERVk`Qbnn%R8y)e zHI$l4EyY8rt<+IGmAXnjrM}WYX{a<(8Y@kdrb;uVxza*uskBnOl-7#3(nj%7d=)>% zU(qQ6N?S#*1S$rlof4!3D@nfN@t~u(pBlEbXR&PJ(XTc zZ>5jYSLvtpR|Y5pl|f3hGFTa+7?q*QFlD$hLK&%yQbsE#WsEXb8K;a_CMXk?Ny=np ziZWH1rc766C^MB=%4}thGFO?W%vTmD3zbF6Vr7Z4R9U7hS5_!1l~u}WWsR~{S*NU5 zHYgjFP0D6vi?UVOrfgSsC_9y1%5G(kvRB!s>{kvb2bDw0VdaQ&R5_*`S57D=l~c-T z<&1JxIj5XgE+`k3OUh;CigH!Crd(HUC^wZ`%5CM2a#y)$zKe*}dPR%;`_+0$b6ZyH z^(tkZvX|5jf+hSgh&M0t)%Mu3-ww^a3iW4uwjIzB6QUC)!o-*alcF;w!{nF(Q(`Ji zjcG6~ro;4@0W)GI%#2wuD`vy&m;-ZSF3gQCmKFp5=&=m_}AuNnVuqYP8;#dMp zVks<*Ww0!k!}3@GE20}#!pi85Rj?{n!|GTAYho?*z}i>`J+UspqpiecCu!!ZIQF$z0iN9=^1 zu?u#^ZrB}rU{CCYy|EAW#eUcy2jD;)gwZ${hoBLM;xHVJBXA^+!qI5LF*p{-;dq>Y z6LAtw#wj=zr{Q#*firOy&c-=77w6%8T!0I45iZ6hxD=P+a$JEcaTTt{HMkbn;d@fE(tH~1Fc;d}gm zAMq1@#xM94zoE6^mu1T@1S)8QwrGb5&>kJo5fh>lCc?y+1e2mOCd1^I0#jltOpR$U zEvCctm;p0lCd`akFe_%m?3e>{VlK>$E|>@NVm{1|1<(}>Vj(PyMX)Fq!{S&1OQN+4 zSDNiISQg7+d8~jH(G4qMWpu|XSQV>bb*zCku@-t@ZLEWySQqPIeQbaYu@N@LCfF34 zVRLMOEwL4PVQcipHt2)C=!gEO!vJiHdJIGZw!+E7=e)(g&nXXcEZls z1-oK5?2bLKC-%bL*a!P!KkSbKa3BuCXdH|~(1=5E7!Jn~I1)$UXf)v%9E;;{JWjxg zI0+}?6r76Fa5~PwnK%n);~boe^Kd>cz=gO77vmCKipy|0uE3SJ3RmMAT#M^)J#N5_ zxCuAo7Tk*4a69h6owy5k;~w0L`*1%Vz=L=Q591L$ipTIcp1_lM3Qyx1Jd5Y>JYK+y zcnL4#6}*bq@H*bWn|KRv;~l(<_wYVGz=!wXR%59Y;um>&zED;C5;SQv|7Q7neVu>_XHQdkM;-v*baj*7(*}=!>~PuV+2NG6n4Ol*auVK^K|;7A;WqtS$8a4e3)@i+k|;v}4m zQ*bIy!|6B!XW}fJjdO4=&cpe*02ksST#QR_DK5k1xB^$=DqM|ga4oLG^|%2y;wIdT zTW~9G!|k{Ocj7MGjeBq}?!*0f01x6JJd8*1C?3P(cmhx2DLjp5@GPFg^LPO-;w8L{ zSMVxc!|QkhZ{jVyjd$=a-oyL&03YHbe2h=_xJ%n;wSu! zU+^n_Lv>YvrTs^N3fiD8+F=5;M+bDogy@8cFfk^Js)Gh-IairFwb=D?ho3v;6j=E1y}5A$OIbj5;L2n%BoEQ-ajIF`VYSPDyH87zzC zusl}4is*)wurj)16|9QYusYVjnpg`xur}5~Pppgeus$}xhS&%jV-swO&9FJPz?Rqw zy|6WUV;l5AU-UzN)L{U&MLhl^aE!o6jKU7s5j$aL?1Ejf8+OMY z*b{qUZ|sA8u^;xw0XPr`VKffLA!x**I1Gp52pox{a5S25435QdI36e9M4W_^aSBew zX*eBc;7pu_vvCg2#d$a%7vMr%go|+rF2!ZI99Q5rsL98cg$JcXz644%bvcpfj{MZAQU@d{qW zYj_=R;7z=RxA6|%#d~-kAK*iLgpctFKE-GF9ADr|e1)&^4Zg*9_#QvtNBo4J@e6*% zZ)m;1FWXr90Rk1YL0h!L1Za;A=!gl?2@_#rOoB<#8IxghOo1se6{f~Cm=@Dvddz?s zF%xFSESMFuq4pK~TF8MpF&E}W7tDisF(2l~0_chbu@DxzSs}@;{Y6pgD@Hg;}A6BP#lKCaRiRUQ8*e+ zI0nb!I2?}?a3W5^$v6e4;xwF&GjJx(!r3?n=i)q^j|*@iF2cpQ1efA6T#hSnC9cBN zxCYnaI$Vz%a3gNQ&A0`(;x^olJ8&oN!rizB_u@X>j|cD|9>T+T1drk|JdP*uB%Z?4 zcm~hnIXsUS@FHHq%XkH^;x)XEH}EFj!rOQU@8UhYj}P!6KElWN1fSwFe2y>hCBDMf z_y*tNJA98H@FRZ0&-ewu;y1J|?2~OR^@TtMZO|6&Fag@513F?tbizcK7?WU9bjDqLqPRxb5(FOBhUd)I2u>iVaK`exY zu?QB$VptqYU`Z^6rLhc_#d264D_})*!%A2g-LVQ*#cEg`YhX>Rg&tTN>!2sr#d=sD z8(>3hgpIKYYTswBg=W|swJ$c;LQ8Cg+PA4|p*4DA8}vb6^h1BtVF0#8JqDry+hGs} zV+e*~7`Df7j6m(Hw+=|<9JMO@pxC?jV9^8xja6cZvgLnuJ z;}JZH$M86wz>|0iPvaRpi|6n>UcifZ2`}Rnyo%TGI^MvWcnfdi9lVS8@IF4khxiB| z;}d*}&+s|Ez?b+6U*j8mi|_C~e!!3T2|wc({EFYu{B_@I{UY0$s~Jh4f;MQ2cBp;V zvKH*o0Ua?RI$4aJ93wCiqp$;Z#7@{5yI@!BhTX9T_QYP;8~b2i z?1%kv01m`K7>$E*2pVxH4#VL%0!QK~9E~O%gJW?Vj>ic&5hvkfoPtwv8cxR-(1U=A`0 zRL}-((GC-!`Hx(wM{r=<5fh>lCc?y+1e2mOCd1^I0#jltOpR$UEvCctm;p0lCd`ak zFe_%m?3e>{VlK>$E|>@NVm{1|1<(}>Vj(PyMX)Fq!{S&1OJXT3jb*Sbmc#N`0V|># zR>I2Yj#aQKR>SI818ZU}^uXF!2R*Sa*2DVP02^W>Y>Z8?DK^9A*aBN(EA+zF=#6dA z2Yt~G{ZWSj*cSB|hz4wjK^Tl77>Z%o9>XyLBQXj)U`OnPov{mc#ctRgdtguOg}t#4 z_QihK9|zz-9E8z07>A${hvG0Cjw5g+j>6Gs!ZA1&$KiOKfD>^NPR1!X6{q2JoPjfO z7S6^wI2Y&Pd|ZGFaS<-YCAbuq;c{GoD{&RB#x=MW*Wr5HfE#fWZpJOR6}RDb+<`lB z7w*PAxEJ@~emsB&@em%yBX|^#;c+~HC-D@X#xr;p&*6EzfEV!+UdAhU6|doSyn#3I z7T(4?co*;CeSClq@ew}8C-@Yf;d6X}FYy(=#y9vD-{E`wfFJP_e#S5O6~CeN27cMz z(hm@*pbgrh9VS3~bU;T;h)$RY6Jrugiq4n}lVb`@iK#F(roptB4%1@>%!rvVGiJf8 zm<_XI4$O(UFgLnj9?XmRFh3SRS1gEyurLth3Kh>fr@Ho>OY44Y#MY>BPV3tOW%wm~2C zML+aM9R^@q)MFqTupI_rFos|#hGBaQ#|VtXDC~e8u@iR2F4z^jVR!6-J+T+|#y;2= z`(b|^fCF(5M&n={f<_#Q!*Do`z>zo#N23YH;8+}o<8cB`#7Q_Ar{GkahSPBd&csv02a#7(#vx8PRXhTCxm?!;ZV z8~5N|+=u(|03O6cco>i1Q9Opn@dTd4Q+OKB;8{F}=kWqw#7lS?ui#a@hS%{1-o#sY z8}Hy_!ytyQ+$Tc@ddubSNIy=;9Go$@9_hE#83Dczu;H=hUzx~TE_n< zP(d5CMLSG@_UM3)m=K*X5hlhYm=v8c879XRm=aTAYD|M^F&(DI444r!VP?#NSuq=C z#~hdwb75|D!9183^I?80fUZ~&3t?d_f<>_y7RM4;5=&ueEQ4jS9G1rlSP|W@5>`fc ztb$ds8dk>|SQBfZ2iC?q=!tc)9@fVO*bp0GV{C#=u^BeU7T6M7p%=DBZ)}4;=!<^n zk2(y%wy4KIG+;Xn!e9)+Pz=NN7>*GbiBZ@AJ7Op7j9suRcEj%21AAgG?2Ub}FZRR! zH~|*eX2%?u z6LVp1biq8B7xQ6$EP$?95DQ^pEP_R`7#7D8SQ1NNX)J?fu^g7i3Rn@{uo6~AcdUX{ zu^Lv#8dwu+p$FE+I_QaYu^!gP2G|fAVPkB9O|cm^#}?QUTcH=WMsI9`KIn^n=#M%K zz_zHzKr~=G48mXx!B7mt_85*47>QBX0Xt$R?2KKoD|W-~*aLfFFYJwdurKz*{x|>! z;vkI1!8insI24EBa2$anaTJb56OO^LI1b0-1e}PIa57H8sW=U%;|!dMvv4-f!MQjO z=i>rgh>LJBF2SX^442~yT#2i2HLk(6xDMCj2Hc37a5HYft+)-h;||=3yKpz|!M(T- z_u~OPh==en9>Jq{43FapJc+09G@ik;cn;6w1-yut@G@S(t9T8s;|;utx9~RJ!Mk`5 z@8bh}h>!3wKEbE>44>l*e2K5{HNL^O_zvIW2mFYi@H2kFulNnszXD)s|52cVHfW1> zm;mk30Ua?RI$Yx?v@(jP6(kt70{* zjy13*)ta2uj}5RPHp0f(1e;q9kCALB@Y>nR727S;M{m>tE7=Ue2 zkAY~wb{K@g7=ob~hV3yNBQO%9umg6)PS_c{U{~yh-LVJu#9r7N`(R(}hy8H?4#Yti zje~Ir8gVEN!{ImrN8%_PjV2s}V{sgg#|bzQC*fqAf>UuCPRAKI6KCOUoP%?59?r)F zxDXfNVqAhtaTzYh6}S>t;c8riYjGW}#|^j-exUdJ1F6K~;dyn}b~9^S_X z_z)lAV|;>7@fkkH7x)ri;cI+@Z}A(*C1B1#Qq4?Jxn_ zqXRl(LUh7Jm>82_Qgp^-m>g4JN=${RF%720beJA9U`EV@nK27y#cY@zb6`%)g}KoM z^I%@ghxxGpx?({rgoUvP7R6#%97|wHEQO`943@=mSRN~2MRdbTSQ*{13RcBxSRHF% zO{|3;SR3o0C)UM!SRWf;Lu`bNu?aTCX4o8CU`uR;Uf3GFu?_m5FZ!WB>M#J?q8T033*eFd7Hr z5H#XY9EQVj1dhZ}I2uhj2FKz!9FG%lB2L1|I0dKTG@Onza3;>e**FL1;yj#>3veMW z!o|1*m*O&9jw^5_uEN#02G`;`T#p-YBW}XYxCOW3Hr$Roa3}7<-M9z$;y&Du2k;;s z!ozq3kK!>rjwkRWp2E|32G8O-JdYRfB3{DFcm=QGHN1{D@Fw2E+js}>;yt{N5AY#A z!pHaopW-uojxX>fzQWh|2H)a4e2*XSBYwiq_yxb>H&p)upr!psfePB7E!trMv_}VY z#DwUCi7+uH!KCPn$uK#lz?7H@Q)3!Ti|H^uX26V?2{U6B%!=7CJLbTgm4UGm9R3pV->85)v!9&z?xVK zJ+L;`K~JoU^{_rRz=qfe8)Fk}ip{V&w!oIy3cav3dSe^(L0|Mkf7D?BwnaS#q5<1s z5C&rihGH1D$8e0mNQ}Y`*bzHnXY7Jqu^V>B9@rCmVQ=h%eX$?*#{oDH2Vpc0#vy3L zp*ReO;|Lsyqi{5ua14&caX20);6$8+lW_`8#c4PlXW&eng|l%E&c%5+9~a<4T!f2p z2`Lkg}ZSN?!|q$9}nO`JcNhw z2p+{_cpOjQNj!z8@eH2Db9f#v;6=QIm+=Z-#cOySZ{SV5g}3nz-o<-(A0OaDe1wnj z2|mSV_#9v0OMHc|@eRJkclaJZ;79y~pYaQR#c!zo{a;J_j{+66L0h!L1Za;A=!gl? z2@_#rOoB<#8IxghOo1se6{f~Cm=@Dvddz?sF%xFSESMFuVRp=cIWZUJMi(j>fsq)69k3&I!p_(QyJ9!&jyw+=|<9JMO@pxC?jV9^8xja6cZvgLnuJ;}JZH z$M86wz>|0iPvaRpi|6n>UcifZ2`}Rnyo%TGI^MvWcnfdi9lVS8@IF4khxiB|;}d*} z&+s|Ez?b+6U*j8mi|_C~e!!3T2|wc({EFXDN<#aO0u{7DTeQOjXpau)hzZdN6JcUZ zf=SUClVNg9fhjQ+rp7dw7SmyR%zzm&6K2LNm=&{OcFch}F&E}W7tDisF(2l~0_chb zu@DxtTItfDN$`HpV8{6q{jlY=JGY6?$Q7^u{*mgTCm8{;0zMY>RpfL<6?NAPmM348<^P zkKq`Bkr;&?up@TD&e#RJVmIuLJ+LSC!rs^i`(i)rj{|TZ4#H?0j6=|fLva`m#}POZ zN8xBR;TRl?<8VAqz==2sC*u^Hiqmj9&cK;C3uogToQv~tJ}$t8xCj^H5?qSQa5=8P zmADF5;~HFx>u^18z>T;GH{%xEira8I?!cY63wPrl+>85gKOVq?cnA;U5j={=@Hn2p zlXwbG;~6}Q=kPpUz>9bZFXI)wir4Tu-oTr93vc5cyo>knK0d&Q_y`~46MTx#@HxJ~ zm-q@_;~RX7@9;f-z>oL|KjRntir-L5O8buj6|_NHw8I2wj}GXF3DF4?VPZ^zNzoaT zVRB4?DKQnM#x$4~(_wndfEh6pX2vX-6|-S>%z-&E7v@G6%!7F`ALhpb=!ylg5EjNF zSQLw4aV&u)u@siZGFTSNVR@{8710eVVP$m3Dp(b(VRfv5HL(_YU~Q~}o>&*_VSQ|X z4Y3h6#wOSln_+Wofi1BWdSPqy#y04KzUYVksKWqki+T)11Gd8;48{-)#V~A-;TVCD z7=<0MBX+{h*af>{H|&l*uqXDy-q;8GVn6JU18^V?!e|_fL(qsraTpHA5jYY@;b=7B z7#xe^a6C@Hi8u)-;}o2V({MV@z?nD;XX6~4i}P?kF2IGj2p8iLT#CzZIj+E!xC&R} z8eEI(a6N9ojkpOn;}+bC+i*MXz@4}YcjF%1i~Ddt9>9Zm2oK{CJc`HgIG(_hcnVMB z89a;U@H}3?i+Bky;}yJ$*YG;tz?*mrZ{r=ji}&z8KEQ|g2p{7Ue2UNTIljP`_zGX+ z8+?oJ@I8LOkN62c;}`sj-%xU<{YQZc+Mq4kVFI*A2Xw@Q=!A(dF($#J=#0rQIi|pr zmltur+#P8}vb6^h1BtVF0#8JqDry+hGs}V+e*~7`Df7jKD~Y!VcIG zJ7H(+f?cs2cE=vr6MJEA?1O!=ANI!qI1mS6G!DigXvCp742Rj>T~} z9w*>LoP?8c3QomoI2~u;Oq_+YaSqPKc{m>z;6hx4i*X4q#bvl0SKvxqg{yH5uElk@ z9yj1d+=QEP3vR`2xE*)kPTYmNaS!greYhVF;6Xfuhw%s=#bbCJPvA*Bg{Schp2c%` z9xvcUyo8tW3SPx)cpY!xO}vG-@eba_dw3ro;6r?bkMRjU#b@{&U*Jo8g|G1qzQuR= z9zWnm{DhzJ3x36KC?%u)M}Z33pe@>A0<=d5bi{<{go!XQCc&iWjL9%Lrofb#3R7bm zOpEC-J!Zg+m;O(V-YNh#jrS*z>-)B zOJf-friq)_>*1(!r3q7zl)LgW zIkv!-*b2R{HF{$k^g&)9w#RUcz(|b34%iVpVQ1`u zU9lT>#~#=ddtqs24#pv9#GyD0hvNtwiKB2dns5w`#c?4UGm9R3pV->85)v)^iVW}QtMcV-c3U3vA_gveyZQHheu5H^s*S2ljwr$&* zH#4h$zNAT$rcJxBB$mR`SO&{tIV_JAup(B%%2)-fVl}LeHLxbu!rE8|>ta2uj}5RP zHp0f(1e;q836kB2|Y>jQOEw;n<*a16YC+v(}uq$@M?$`r+VlV8CeXuX~!~Qq` z2jUe**FL1;yj#> z3veMW!o|1*m*O&9jw^5_uEN#02G`;`T#p-YBW}XYxCOW3Hr$Roa3}7<-M9z$;y&Du z2k;;s!ozq3kK!>rjwkRWp2E|32G8O-JdYRfB3{DFcm=QGHN1{D@Fw2E+js}>;yt{N z5AY#A!pHaopW-uojxX>fzQWh|2H)a4e2*XSBYwiq_yxb>H~fx2@F)Jl-}ndr;y<*a z(tmW&MGpl^RH)I%01bvta2uj}5RPHp0f( z1e;q836kB2|Y>jQOEw;n<*a16YC+v(}uq$@M?$`r+VlV8CeXuX~!~Qq`2jUe**FL1;yj#>3veMW z!o|1*m*O&9jw^5_uEN#02G`;`T#p-YBW}XYxCOW3Hr$Roa3}7<-M9z$;y&Du2k;;s z!ozq3kK!>rjwkRWp2E|32G8O-JdYRfB3{DFcm=QGHN1{D@Fw2E+js}>;yt{N5AY#A z!pHaopW-uojxX>fzQWh|2H)a4e2*XSBYwiq_yxb>H~fx2@F)Jl-}ndr;y<*a(SLN% zMGpl^RH)I%01bvta2uj}5RPHp0f(1e;q836kB2|Y>jQOEw;n<*a16YC+v(}uq$@M?$`r+VlV8CeXuX~!~Qq`2jUe**FL1;yj#>3veMW!o|1* zm*O&9jw^5_uEN#02G`;`T#p-YBW}XYxCOW3Hr$Roa3}7<-M9z$;y&Du2k;;s!ozq3 zkK!>rjwkRWp2E|32G8O-JdYRfB3{DFcm=QGHN1{D@Fw2E+js}>;yt{N5AY#A!pHao zpW-uojxX>fzQWh|2H)a4e2*XSBYwiq_yxb>H~fx2@F)Jl-}ndr;y<*a(|>f(MGpl^ zRH)I%01bvta2uj}5RPHp0f(1e;q83 z6kB2|Y>jQOEw;n<*a16YC+v(}uq$@M?$`r+VlV8CeXuX~!~Qq`2jUe**FL1;yj#>3veMW!o|1*m*O&9 zjw^5_uEN#02G`;`T#p-YBW}XYxCOW3Hr$Roa3}7<-M9z$;y&Du2k;;s!ozq3kK!>r zjwkRWp2E|32G8O-JdYRfB3{DFcm=QGHN1{D@Fw2E+js}>;yt{N5AY#A!pHaopW-uo zjxX>fzQWh|2H)a4e2*XSBYwiq_yxb>H~fx2@F)Jl-}ndr;y<)v(0_E$MGpl^RH)I% z01bvta2uj}5RPHp0f(1e;q836kB2| zY>jQOEw;n<*a16YC+v(}uq$@M?$`r+VlV8CeXuX~!~Qq`2jUe**FL1;yj#>3veMW!o|1*m*O&9jw^5_ zuEN#02G`;`T#p-YBW}XYxCOW3Hr$Roa3}7<-M9z$;y&Du2k;;s!ozq3kK!>rjwkRW zp2E|32G8O-JdYRfB3{DFcm=QGHN1{D@Fw2E+js}>;yt{N5AY#A!pHaopW-uojxX>f zzQWh|2H)a4e2*XSBYwiq_yxb>H~fx2@F)Jl-}ndr;y<)v(tmW&MGpl^RH)I%01bv< zI1G;wFd|06$QT8qVl<47F)${^!q^xG<6=CFj|ng#Cc?y+1e0PiOpYlqC8omEmta2uj}5RPHp0f(1e;q836kB2|Y>jQO zEw;n<*a16YC+v(}uq$@M?$`r+VlV8CeXuX~!~Qq`2jUe**FL1;yj#>3veMW!o|1*m*O&9jw^5_uEN#0 z2G`;`T#p-YBW}XYxCOW3Hr$Roa3}7<-M9z$;y&Du2k;;s!ozq3kK!>rjwkRWp2E|3 z2G8O-JdYRfB3{DFcm=QGHN1{D@Fw2E+js}>;yt{N5AY#A!pHaopW-uojxX>fzQWh| z2H)a4e2*XSBYwiq_yxb>H~fx2@F)Jl-}ndr;y<)v(SLN%MGpl^RH)I%01bvta2uj}5RPHp0f(1e;q836kB2|Y>jQOEw;n< z*a16YC+v(}uq$@M?$`r+VlV8CeXuX~!~Qq`2jUe**FL1;yj#>3veMW!o|1*m*O&9jw^5_uEN#02G`;` zT#p-YBW}XYxCOW3Hr$Roa3}7<-M9z$;y&Du2k;;s!ozq3kK!>rjwkRWp2E|32G8O- zJdYRfB3{DFcm=QGHN1{D@Fw2E+js}>;yt{N5AY#A!pHaopW-uojxX>fzQWh|2H)a4 ze2*XSBYwiq_yxb>H~fx2@F)Jl-}ndr;y<)v(|>f(MGpl^RH)I%01bvta2uj}5RPHp0f(1e;q836kB2|Y>jQOEw;n<*a16Y zC+v(}uq$@M?$`r+VlV8CeXuX~!~Qq`2jUe**FL1;yj#>3veMW!o|1*m*O&9jw^5_uEN#02G`;`T#p-Y zBW}XYxCOW3Hr$Roa3}7<-M9z$;y&Du2k;;s!ozq3kK!>rjwkRWp2E|32G8O-JdYRf zB3{DFcm=QGHN1{D@Fw2E+js}>;yt{N5AY#A!pHaopW-uojxX>fzQWh|2H)a4e2*XS zBYwiq_yxb>H~fx2@F)Jl-}ndr;y<+F(0_E$MGpl^RH)I%01bvta2uj}5RPHp0f(1e;q836kB2|Y>jQOEw;n<*a16YC+v(} zuq$@M?$`r+VlV8CeXuX~!~Qq`2jUe**FL1;yj#>3veMW!o|1*m*O&9jw^5_uEN#02G`;`T#p-YBW}XY zxCOW3Hr$Roa3}7<-M9z$;y&Du2k;;s!ozq3kK!>rjwkRWp2E|32G8O-JdYRfB3{DF zcm=QGHN1{D@Fw2E+js}>;yt{N5AY#A!pHaopW-uojxX>fzQWh|2H)a4e2*XSBYwiq z_yxb>H~fx2@F)Jl-}ndr;y<+F(tmW&MGpl^RH)I%01bvta2uj}5RPHp0f(1e;q836kB2|Y>jQOEw;n<*a16YC+v(}uq$@M z?$`r+VlV8CeXuX~!~Qq`2jUe**FL1;yj#>3veMW!o|1*m*O&9jw^5_uEN#02G`;`T#p-YBW}XYxCOW3 zHr$Roa3}7<-M9z$;y&Du2k;;s!ozq3kK!>rjwkRWp2E|32G8O-JdYRfB3{DFcm=QG zHN1{D@Fw2E+js}>;yt{N5AY#A!pHaopW-uojxX>fzQWh|2H)a4e2*XSBYwiq_yxb> zH~fx2@F)Jl-}ndr;y<+F(SLN%MGpl^RH)I%01bvta2uj}5RPHp0f(1e;q836kB2|Y>jQOEw;n<*a16YC+v(}uq$@M?$`r+ zVlV8CeXuX~!~Qq`2jUe**FL1;yj#>3veMW!o|1*m*O&9jw^5_uEN#02G`;`T#p-YBW}XYxCOW3Hr$Ro za3}7<-M9z$;y&Du2k;;s!ozq3kK!>rjwkRWp2E|32G8O-JdYRfB3{DFcm=QGHN1{D z@Fw2E+js}>;yt{N5AY#A!pHaopW-uojxX>fzQWh|2H)a4e2*XSBYwiq_yxb>H~fx2 z@F)Jl-}ndr;y<+F(|>f(MGpl^RH)I%01bvta2uj}5RPHp0f(1e;q836kB2|Y>jQOEw;n<*a16YC+v(}uq$@M?$`r+VlV8C zeXuX~!~Qq`2jUe z**FL1;yj#>3veMW!o|1*m*O&9jw^5_uEN#02G`;`T#p-YBW}XYxCOW3Hr$Roa3}7< z-M9z$;y&Du2k;;s!ozq3kK!>rjwkRWp2E|32G8O-JdYRfB3{DFcm=QGHN1{D@Fw2E z+js}>;yt{N5AY#A!pHaopW-uojxX>fzQWh|2H)a4e2*XSBYwiq_yxb>H~fx2@F)Jl z-}ndr;y<(!(0_E$MGpl^RH)I%01bvta2u zj}5RPHp0f(1e;q836kB2|Y>jQOEw;n<*a16YC+v(}uq$@M?$`r+VlV8CeXuX~ z!~Qq`2jUe**FL1 z;yj#>3veMW!o|1*m*O&9jw^5_uEN#02G`;`T#p-YBW}XYxCOW3Hr$Roa3}7<-M9z$ z;y&Du2k;;s!ozq3kK!>rjwkRWp2E|32G8O-JdYRfB3{DFcm=QGHN1{D@Fw2E+js}> z;yt{N5AY#A!pHaopW-uojxX>fzQWh|2H)a4e2*XSBYwiq_yxb>H~fx2@F)Jl-}ndr z;y<(!(tmW&MGpl^RH)I%01bvta2uj}5RP zHp0f(1e;q836kB2|Y>jQOEw;n<*a16YC+v(}uq$@M?$`r+VlV8CeXuX~!~Qq` z2jUe**FL1;yj#> z3veMW!o|1*m*O&9jw^5_uEN#02G`;`T#p-YBW}XYxCOW3Hr$Roa3}7<-M9z$;y&Du z2k;;s!ozq3kK!>rjwkRWp2E|32G8O-JdYRfB3{DFcm=QGHN1{D@Fw2E+js}>;yt{N z5AY#A!pHaopW-uojxX>fzQWh|2H)a4e2*XSBYwiq_yxb>H~fx2@F)Jl-}ndr;y<(! zS^qa|bkIc)1xi$?(Z>J{hF~}hj}b5;M#9J#1*2j#jE*rdCdR_p7zg8GJdBSCFd-(w z#Fzw=VlqsQDKI6b!qk`s(_%VIj~Or{X2Q&v1+!u{%#JxQC+5Q3mKFp5=upkz~ z!dL{0Vlga^C9oux!qQj<%VIe!j}@>YR>I0y1*>8;td2FXCf35*SO@E3J*YVk>NoZLlr2!}iz#J7Op7j9suRcEj%21AAgG?2Ub}FZRR!H~ZzF zARfZQcm$8)F+7eZ@FbqX(|88Y;yFBz7w{rp!pnFCui`bljyLco-oo2>2k+uNypIp? zAwI&#_ynKgGklIO@Fl*&*Z2nC;yZkgAMhi7!q4~xzv4Iijz91x{=(n*2mj(fv=Y;Q zbkIc)1xi$?(Z>J{hF~}hj}b5;M#9J#1*2j#jE*rdCdR_p7zg8GJdBSCFd-(w#Fzw= zVlqsQDKI6b!qk`s(_%VIj~Or{X2Q&v1+!u{%#JxQC+5Q3mKFp5=upkz~!dL{0 zVlga^C9oux!qQj<%VIe!j}@>YR>I0y1*>8;td2FXCf35*SO@E3J*YVk>NoZLlr2!}iz#J7Op7j9suRcEj%21AAgG?2Ub}FZRR!H~ZzFARfZQ zcm$8)F+7eZ@FbqX(|88Y;yFBz7w{rp!pnFCui`bljyLco-oo2>2k+uNypIp?AwI&# z_ynKgGklIO@Fl*&*Z2nC;yZkgAMhi7!q4~xzv4Iijz91x{=(n*2mj(fw35(&bkIc) z1xi$?(Z>J{hF~}hj}b5;M#9J#1*2j#jE*rdCdR_p7zg8GJdBSCFd-(w#Fzw=VlqsQ zDKI6b!qk`s(_%VIj~Or{X2Q&v1+!u{%#JxQC+5Q3mKFp5=upkz~!dL{0Vlga^ zC9oux!qQj<%VIe!j}@>YR>I0y1*>8;td2FXCf35*SO@E3J*YVk>NoZLlr2!}iz#J7Op7j9suRcEj%21AAgG?2Ub}FZRR!H~ZzFARfZQcm$8) zF+7eZ@FbqX(|88Y;yFBz7w{rp!pnFCui`bljyLco-oo2>2k+uNypIp?AwI&#_ynKg zGklIO@Fl*&*Z2nC;yZkgAMhi7!q4~xzv4Iijz91x{=(n*2mj(fw35<)bkIc)1xi$? z(Z>J{hF~}hj}b5;M#9J#1*2j#jE*rdCdR_p7zg8GJdBSCFd-(w#Fzw=VlqsQDKI6b z!qk`s(_%VIj~Or{X2Q&v1+!u{%#JxQC+5Q3mKFp5=upkz~!dL{0Vlga^C9oux z!qQj<%VIe!j}@>YR>I0y1*>8;td2FXCf35*SO@E3J*Y zVk>NoZLlr2!}iz#J7Op7j9suRcEj%21AAgG?2Ub}FZRR!H~ZzFARfZQcm$8)F+7eZ z@FbqX(|88Y;yFBz7w{rp!pnFCui`bljyLco-oo2>2k+uNypIp?AwI&#_ynKgGklIO z@Fl*&*Z2nC;yZkgAMhi7!q4~xzv4Iijz91x{=(n*2mj(fw35+(bkIc)1xi$?(Z>J{ zhF~}hj}b5;M#9J#1*2j#jE*rdCdR_p7zg8GJdBSCFd-(w#Fzw=VlqsQDKI6b!qk`s z(_%VIj~Or{X2Q&v1+!u{%#JxQC+5Q3mKFp5=upkz~!dL{0Vlga^C9oux!qQj< z%VIe!j}@>YR>I0y1*>8;td2FXCf35*SO@E3J*YVk>No zZLlr2!}iz#J7Op7j9suRcEj%21AAgG?2Ub}FZRR!H~ZzFARfZQcm$8)F+7eZ@FbqX z(|88Y;yFBz7w{rp!pnFCui`bljyLco-oo2>2k+uNypIp?AwI&#_ynKgGklIO@Fl*& z*Z2nC;yZkgAMhi7!q4~xzv4Iijz91x{=(n*2mj(fw35?*bkIc)1xi$?(Z>J{hF~}h zj}b5;M#9J#1*2j#jE*rdCdR_p7zg8GJdBSCFd-(w#Fzw=VlqsQDKI6b!qk`s(_%VI zj~Or{X2Q&v1+!u{%#JxQC+5Q3mKFp5=upkz~!dL{0Vlga^C9oux!qQj<%VIe! zj}@>YR>I0y1*>8;td2FXCf35*SO@E3J*YVk>NoZLlr2 z!}iz#J7Op7j9suRcEj%21AAgG?2Ub}FZRR!H~ZzFARfZQcm$8)F+7eZ@FbqX(|88Y z;yFBz7w{rp!pnFCui`bljyLco-oo2>2k+uNypIp?AwI&#_ynKgGklIO@Fl*&*Z2nC z;yZkgAMhi7!q4~xzv4Iijz91x{=(n*2mj(fv{KN2bkIc)1xi$?(Z>J{hF~}hj}b7e zOIYJ6%_{u=CPJf^?>a23a#*V|wq=DT2u)nZ8X6XwbohT$Da+X9BBmG`*6M(?tdWHi zmjB-sw&hr^DvQ{~(yj8)fXjQT*TUD&8 zRyC`-Rl}-j)v{_^b*#EpJ*&Rez-nkUvKm`Wtfp2otGU&}3bk5Vt*q8o8>_9=&T4OU zusT|utj<;!tE<(`>TdP0dRo1#-c}#0uhq}$Zw;^pT7#^?)(~r`HOv}rjj%>qqpZ=^ z7;CIG&Khq`uqIlQtjSiGHN~20O|zz3Gpw1`ENiwk$C_)+v*ue1tcBJhYq7P&T52t` zmRl>VmDVb2wYA1tYpt`^TN|v6)+TGSwZ+#%jiI%*xWj$0?Jlh!Hgv~|WhYn`*sTNkX0)+Ot*b;Y`BU9+xRH>{i1E$g;*$GU6X zv+i3DtcTVk>#_C3dTKqho?9=hm)0xmwe`k&YrV7HTOX{C)+g(;^~L&XeY3t>KdhhD zFYCAU$NFpi`|tW~+p%5SvxO~fWoz5F1KZdkb~rn{9l?%hN3tW^QS7L8G&{N-!;Wdk zvSZtE?6`J3JHDO3PG~2x6WdAbq;@hpxt+pJX{WMN+iC2yb~-z~ox#p%XR(!9vTNIQ?7DV6yT0APZfG~M8{19nrgk&Cx!uAJwOiV)?ACT0 zyRF^MZf|$6JKCM>&UP2OtKH4+ZuhWz+P&=Fb|1U1-OuiC53mQ?gY3cf5PPUS%pPu! zut(aX?9uiZd#pXq9&b;uC)$(j$#$4M#hz+Uv!~lL?3wl~d$v8to@>vu=i3YHh4vzQ zvAx7zYA>^w+bisq_9}a|y~bW^ud~=~7wn7nCHt~{#lC7^v#;AX?3?y2 z`?h_@zH8sJ@7oXThxQ};vHiq;YCp4|+b`^w_AC3f{lY6{E=>EZNrdO5wFK2Be!pVQwN;0$yIIfI=c z&QNEVGu#>BjC4jhqn$C%SZACw-kIP`bS62IoiJyLGu4^qOm}8DGo4w^Y-f%$*O}+c zcNRDcokh-KXNj}aS>`NvRyZr2RnBT>jkDHS=d5=&I2)Z!&Sqzev(?$=YQ_gAUjC0mG=bU#gI2WBu&SmF{bJe-# zTz76bH=SF~ZRd`2*SY81cOEzookz}N=ZW*wdFDKKUN|qESI%qajq}!d=e&15I3Jx) z&S&R~^VRw0e0P30Kb>FBZ|9Hm*ZKF~*KfOy>$;vRTq45-2`qzH<6pzP2whXlex*=6mCj4m7Cg4_xP9GzZhv=x zJJ22E4t9sQL)~HSaCd||(jDcFcE`A5-Er=CcY-_7o#ak-!`vzERCk&?-JRjibZ5D< z-8t@Dcb+@nUEnTs7rBewCGJvpnY-Lw;jVO7xvSkZ?pk-9yWZX4Zge-fo82w$R(G4b z-QD5tba%PC-97GJcb~i8J>VX654nfkBkoc6n0wqk;huC)xu@MT?pgPod)~d^UUV`&mJ_zdSdeOY-UJNg$7t4$7#qr{L@x1t60xzML$V==c@sfJUyyRXAFQu2t zOYNoc(t7E<^j-!pqnF9c>}Bz?dfB|}UJfs(m&?oT@rruI zyy9L7ucTMXEA5r>%6jFz@?Hh6qF2eQ>{aoqdeyw@UJb9NSIevI)$!_j^}PCC1Fxai z$ZPC1@tS(gyyjjDFVt)4wenhfZM?Q#JFmUh!RzRC@;ZB6ysln1ue;a7>*@9KdV77m zzFt4Czc;`e=ne7)dqcdT-Y{>tH^Lj~jq*l&W4y86IB&c+!JFt!@+Ny>-V|@DH_e;w z&G2S=v%J~f9B-~S&ztWp@D_TDyv5!UZ>hJ;Tkfs!R(h+v)!rI!t+&ow?``lldYin> z-WG4Gx6Rw`?eKPbyS&}r9&fL=&)e@E@D6&1yu;oR@2GdoJMNwEPI{-j)7}~Htar{k z?_KaNdY8P*-WBhvcg?%*-SBRDx4hfl9q+Dp&%5tE@E&@PyvN=X@2U69d+xpPUV5** z*WMfNt@qA*?|tw-dY`<{-WTty_s#q6{qTNzzr5exAMdaC&$EOr9N`L22qA?MTKFOm zMudoPBD{zoB8o^NvWOz0ifAIbh#_K%SR%HFBjSp9BECo<5{g73u}C74iew_WNFh>+ zR3f!VBhrd=BE85UGKx$hv&bT{ifkgg$RToyTq3u~Bl3!TBEKjg3W`FauqYymiejR; zC?QIUQlhjdBg%?$qP(aeDvC;?vZx}eifW>|s3B^KTB5e7BkGEJqP}P#8j41uv1lTi zie{p@XdyyHOVLWS7HvdZ(N44%9YjabNpu!nL|4&GbQe8DPti;C7JWot(NFXj1H?cv zNDLN3#85Fz3>PEBNHI!`7GuO%F;0vZ6U0O@NlX@DVv3k5ritldhL|a4iP>U~m@DRq z`C@@sC>DvuVu@HPmWkzJg;*(8iPd6_SS!|v^T?CX>q)GNnuN zGN;TXbIUw3ugoX&%L1~XEF=rdBC@C~CX34wvZO2}OUp8{tSl$X%L=lhtRySTDzd7q zCacRDvZkyhYs)&auB<2P%LcNcY$O}YCbFq)CY#F^GE}ydtz>K2Mz)phWP8~`c9flD zXW2z|mEB}_*+ce}y<~6MNA{KdWPdq84wQrBU^zq%mBZw4IYN$$fopP7lE%(U1a-ZBU56FY^kUT7p$fNR@JT6bjlk${2 zEziia@|-*`FUX7XlDsUh$gA?2ye@CZoAQ>tE$_&?@}9geAIOLDk$fzl$fxp|d@f(e zm-3Z-E#Jtu@|}DyKgf^rll&~d$glF7{4RgUpYoUdE&s^B@}IPntsLblPYETJQd;>c zP)3EQa4NitpdzYBDzb{AqN->rx{9GvZ`zxpbDx&s<0}eimGC& zxGJGas#2=7Dx=D(a;m(lpem|LsbHs-xV!I}PN~!Cj5@2%sq^ZBx~MLx%j$}{s;;T)>V~?h zZmHYqj=HPvsr%}IdZ-?$$LfiCs-CIm>VWBKNeyQK;kNT_rDNEbh(XRHi&{8X{wXXwhbchb8!|Mn-qK>2^>nJ*^j;5pQ7&@ko zrDN+jIm)jnu8}&Ze{L z96G1YrE}{%Ims_SE~bm?61t==rAzBFx~wjz%j*idqOPPX>nggc zuBNN&8oH*grEBXtx~{IL>+1%(p>Cub>n6IXZl;^-7CKb7)U9-D-A1?7?R0zHL3h-h zbZ6Z~ch%i=ciltx)V*|X-ADJ;{d9jlKo8V|^k6+i57oo;a6Lkg)T8uhJw}h!nO?3}=#_evUai;Y zwR)XiuQ%w8dXwI)x9F{Uo8GQ>=$(3(-mUlOy?USCuMg;h`j9@XkLaWNm_Dvg=#%=C zKCRE_v-+GquP^9}`jWn^ujs4#n!c`Y=$rbMzOC=*yZWBKuOH}#`jLLDpXjIhnSQQc z=$HDHey!i=xB8uauRrLI`jh^wzv!>}oBpnU=%4zR{;mJ$zxtoHeA{(euy8=5AR3tBl?m2$bJ+*svpgd?#J+B`my}jejGopAJ32PC-4*ciTuQV5tDnu!?&t7x`nmkvejY!spU=! z{GNUJRgW`y>33{wRO6KgJ*HkMqa-6a0z(B!99W z=1=jb`qTXB{tSPnKg*x(&++H_^Zfb#0)L^u$Y1O)@t69`{N?@%f2F_5U+u5)*ZS-H z_5KEbqrb`D>~Hb6`rG{N{tkbqzsuk4@A3Eg`~3a>0so+X$Up2K@sIk){Nw%!|D=D) zKkc9K&-&;5^Zo_@qJPQ1>|gP(`q%vH{tf@8f6Kq^-|_GI_x$_*1OK7_$balV@t^w7 z{OA4)|E2%Rf9=2V-}>+T_x=a}qyNeO?0@mU`rrKT{ty4B|I7dF|MCC&|9mU311E3; zFA#wYRG(Sqnfj38zZD~KJ$3E~Fvg7`s#AYqUw zNE{>yk_O3wkSWL3GxQ{ zg8V^&pkPoaC>#_CiU!4k;z5a^WKb$79h3>m2IYeCL4}}VP${S!R0*mE)q?6lji6>w zE2tgR3F-#*g8D&&pkdG`XdE;Nng-2+=0S@fG-w&L3R(wkg0?}spncFG=ooYgItN{X zu0gk;d(b238T1Nz2YrIRLBF7XFd!Hh35h7CI?}`lwfKwEtnq62xbPeg4w~GU~Vukm>(<%76yxg#lezbX|OC<9;^sf2CIVA z!J1%gur631YzQ_6n}W^3mSAhJE!ZCH2zCa$g5ANMU~jN5*dH7S4hDyU!@-f@XmBhz z9-IhH2B(74!I|J}a4t9>TnH`(mx9Z|mEdY{Ew~=s2yO}(rnD(z%9?Vfys2O+no6d!sbZ>{YNooWVQQLMrnaeL z>Y93{zG+|@nntFvX=0k1W~RAmVM0wy)5^3qZA@F!&a^ijOh?nnbT(Z~SJTaOH$6;G z)64WWeN11|&-6C~%s?~93^qf|P&3R7HzUkQGs=uMW6W4H&Wtw`%tSNEOg3R=ikWJr zndxSRnQ3O3*=CNJYv!5xW`S8~7MaCniCJovndN4MS!q_8)n<)ZYu1_dW`o&iHkr+4 zi`iphYtEVT=7PCs zE}6^bin(g8nd|0;xoK{h+vbkBYwnr*=7D)=9+}7HiFsBBgvbyTqC@C3)ZV~w#epIJn6Lxo|&aAAZnQWzzS7RCr;g>k}oVS+GGm?TUVrU+AoX~J}2 zhA>l@CCnD)2y=yb!hB(YuuxbeEEbjsONC{^a$$wAQdlLd7S;%Bg>}MuVS}(y*d%Ng zwg_8=ZNhe8hp%Ar z;ev2cxFlQ_t_W9!Yr=KmhHz83CEOP72zP~h!hPX^@KAUpJQkh^Plac~bK!;XQg|i2 z7TySNg?GYx;e+r|_#}K5z6f81Z^C!shwxMQCHxlt2!DltLL3km!~^j`0+0|S0*OHq zkQ5{X$$fB*pzU?2e*C_n`o&;bDkFo6YZ-~bnRzy|>cK?EpZfP)y20;B|~Kx&W% zqy_0fdXNER1eri)kOgD~*+6!X1LOp`KyHu+2AS=770i9+(dnfQ4WY zSPYhcrC=FY4pxAbU=>&m)_}EO9as-GfQ?`i*bKIStzaA24t9W@U>Dd8_JF-$AJ`8L zfP>%=I1G+}qu>}g4o-lR;1oCw&VaMv95@dyfQ#S~xD2j)uizW_4t{{2;1~D} z{(!&WABY3v!gw$~OaK$YL@+T-0+YgIFgX+;fDj@mLJTD+Lj|f(gE}P8fF`t{4ISu0 z5Be~GA&ek}400F)Q^1rk6-*7&z_c(OOb;`_j4%_-470$jFdNJcbHJQ17t9Uwz`QUY z%nu8|g0K)Q42!^`uox^3OTdz_6f6zPz_PF$EDtNdim(!_46DGZuo|omYrvYY7OV~H zz`C#=tPdN&hOiNA44c5Fuo-L)V_^%}61IY^VH?;Mwu9|q2iOsIf}LR(*cEnz-C+;d z6ZV3=VISBR_JjT505}j1f`j1@I1~VP_;PN*~Lg1Vw^s5|O`dZJ#aH|m4>qJF498h{3(L1-`ff~KNrXgZpKW};bWHkyOxqIqaOT7VX!MQAZvf|jCXXgOMe zR-#pCHClt#qIGCJ+JH8qO=vUPg0`Y8*dVn6HN9Zwn zf}Wyh=s9|UUZPj%HF|^IqIc*$`hY&7Pv|rHg1(|}=sWs>exhIKH~NGAqJJok7*~uZ z#upQa3B^QWVlj!BR7@r&7X=ZBP(-3AVo?%hQ4v*96Lpb@hG>eGXp4^Mik|3;ff$OB zNJS=cF-A-wrW8|&sl_y6S}~oNUd$k76f=pL#Vlf0F`JlO%pv9!bBVddJYrrkpO{}P zAQlu0iG{@?Vo|Y}SX?Y2mJ~~grNuI0S+Sg0UaTNi6f239#VTS|v6@(2tRdDEYl*eR zI$~Y1o>*UOAT|^miH*f3VpFl1*j$ViTZk>iR$^N#YN&`af!H8TqZ6TSBNXcRpM%Kjks1^ zC$1Mah#SRC;%0G+xK-RHZWnimJH=h%ZgG#eSKKG=7Y~RB#Y5s@@rZa-JSH9&PlzYQ zQ{rjyjCfW&C!QBCh!@36;$`uQcvZY6UKekOH^p1xZSjtHSG*_Q7axcZ#Yf^}@rn3U zd?r2@Ux+WoSK@2&jrdl4C%zXyh#$pI;%D)T_*MKSeiwg;KgD0-Z}E@#SNtc&!Etds z93LmZ32`Ev7$?C=aWb473m9OC5f(AV5|*)oRjgqh6Kr4;TiC`9cCm+j9N-W~m|})G zj=?E#N}LL(#%XX`oDQeQ8E{6N31`Mxa8{fRXU92kPMizp#(8jFoDb*61#m%J2p7gh za8XTn?AV6>vpd30KBda8+ClSI0GQOu9*f7}@puBBh$rF6cnY41r{U>%2A+v$;n{c& zo{Q(<`FH_dh!^3-cnMyLm*M4j1zw3);njEzUW?b^^>_o`h&SQQcnjW&x8d!02i}Qy z;oW!--i!C){rCVrh!5ez_y|6VkKyC^1U`vR;nVmGK8w%c^Y{Y3h%e#G_zJ#?ui@+X z2EK`J;oJBQzKieS`}hHVh#%p{_z8ZBpW)~D1%8QN;n(;Lev9AX_xJ<;h(F=a_zV7u zzv1ur2mXnF;otZV{)_+NI8t0Go)lk7ASIL%Nr|N-Qc@|Ilw1-dAVCR9qJ$+$k|jk_ zB~8*LA{mk?S&}U|k}G+VF9lL4MG}>m#HAQ1g_KfCC8d_qNNJ^XQhF(alu^ngWtOr? zS*2`Jb}5IHQ_3afmhwn>rF>F;sen{aDkK$_ibzGJVp4Ibgj7;0C6$)SNM)sRQhBL@ zR8guVRhFtqRi$cDb*Y9_Q>rD^mg-1#rFv3*se#l`Y9uw5nn+EhW>Rx0R%#)&lv+ux zr8ZJqsh!kb>L7KLI!T?SE>c&io77$EA@!7cNxh{$QeUZ`)L$AP4U`5+gQX$TP-&Pn zTpA&bltxLTr7_Z2X`D1(njlS-CP|Z}DbiGFnlxRSA6~<4x*%PYE=iZAE7Dcznsi;dA>EX2 zNw=jt(p~AEbYFTPJ(M0vkEJKlQ|X!XTzVnBlwL`%r8m-B>7Ddm`XGIjK1rXYFVa`( zoAh1!A^ntoNx!8((qHMH6i1FL$CKmB3FL%wA~~^~L{2IvlatGW3}h%HS(LFX$+E1- zs;tSnOk_hgWlOeYM|NdT_T@kh;HpFS)neNA4^4ll#j9m&+^UmGUZiwY)}NE3cE+%Nyj4@+Nt+yhYwBZHP=Epzq=*VuBt=#f zMO8FKSBPRLreZ0!;wY}-DZUaYp%N)nVG37bloU!zC6$s|Nu#7y(kbbc3`#~Nlag7< zqGVOFDcO}AN=_x0l3U56cQ z3Q9$#l2TczqEuC?DbDfQd_B`)K%&!^_2!nL#2_@SZSg(RhlWyl~|>P(o$)q zv{u?EZIyOPd!>WYQR$>~R=Ox%m2OIRrH9f}>812m`Y3&seoB93fHF`SqzqPuC_|ND z%5Y_bGEy0(j8?`dW0i5rcx8ezQJJJnR;DOZm1)X!Wri|SnWfBD<|uQOdCGicfwE9p zq%2mJC`*-P%5r6evQk;4tX9@2Yn64%dS!#MQQ4$yR<QZcid0d>s-()Q zqN=K<>MBtU)l@CjRvpzt7+7Pb& zS=6j*HZ{ANL(Qq?Qgf?$)VyjwHNRRwEvOb!3#&!cqG~a>y!rfM^_xf-jsP+O|4 z)YfVnwXNDtZLfAvJF1=3&T1F6tJ+QNuJ%xSs=d_SY9F<)+E4AT4p0ZGgVe$55Ot_J zOdYO{P)Dkx)Y0k~b*ws09j{JMC#sXw$?6n!sya=buFgKt{hI!~RiE>IV$ zi`2#H5_PG%OkJ+7P*#V|&FU6)tGZ3yuI^BGs=L(P>K=8k zx=-D&9#9Xeht$LB5%s8gOg*liP*19-)YIx2^{jeMJ+EF+FRGW+%jy;Ns(MYmuHH~@ zs<+hJ>K*m2dQZKtK2RU3kJQKN6ZNV3Ont7tP+zLA)Ys}8^{x6&eXo8{KdPV9&*~TT ztNKm-uKrMes=w6V>L2y5`cI9c#ns|z@wEh6LM@S&SWBWM)skt+H9-R!)Q~1>Sd%na zQ#4i6G+iT_p_!Vc*_xxdny2|%poLnbQH^O_i_ubODYaBuYAubHR!gU)*D`1swM<%O zEsK^_%cf=5a%efVTv~1|kCs=LbzHP>Rb7FtWK zmDXBoqqWuAY3;QRT1Tyu)>-SKb=A6Q-L)QCPpy~MTkE6s)%t1uwE@~dZICut8=?)> zhH1mK5!y&?lr~x$qm9+ZY2&pC+C*)VHd&jZP1UAp)3q7eOl_7nTbrZJ)#hpQwFTNj zZIQNETcR!1mTAki71~N|m9|=2qpj7}Y3sEO+D2`YwprVvZPm7E+qE6qPHmUATic`U z)%I!owFBBg?T~g@JE9%cj%mlW6WU4bly+J>qn*{xY3H>I+C}Y>c3HckUDd8>*R>nk zP3@L;Tf3v()$VEcwFlZm?UD9ad!jwno@vju7urkhmG)YDqrKJMY45cU+DGk^_F4O) zebv5c-?bmwPwkiXTl=H^)&6O5^tgIFJ-(hmPpBu-6YELzqx!=Gny%|aH*{0CbX#|HSNC*Z5A;xvbgDC*>oIx?J*A#XPpzlX)9UH;^m+z8qn=66 ztY^`)>e=+{dJa9Oo=eZI=h5@(`SkpH0llDJNH44x(TnQE^x}F6y`)}BFRho+%j)Iy z@_GfmqFza_tXI*i>eck>dJVm%UQ4g7*U{_h_4N9B1HGZ%NN=n+(VObc^yYf3-a>Dw zx6)hdZS=N!JH5T$LGP${(mU&2^sah0y}RB+@2U6Fd+UAlzIs2szdk@8s1MQy>qGRR z`Y?UCK0+Ur3>d`Z9gFzCvHAuhLiRYxK4HI(@yqLEorv(l_f{^sV|feY?Ix->L7?ck6rf zz4|_VzkWbJs2|b~>qqpX`Z4{uenLN~pVCk3XY{lBIsLqTLBFV9(l6^*^sD+c{kncb zzp3BSZ|isTyZSx-zWzXes6Wyl>reEj`ZN8x{z8ALztUgpZ}hkNJN>=>LI0?K(m(58 z^so9i{k#4{|Ed4df9rqrzxqEt4v9r-BgjZHii{>>$XGIt zj3*PwL^6p?CR4~%GL1|pGssLbi_9i-$Xqgy%qI)TLb8Y~CQHatvWzSzE67T+imWDU z$Xc?FtS1}DMzV=)CR@l>vW;vfJIGG5i|i(Q$X>FK>?a4vL2`&3CP&Coa*P}&C&)>1 zikv2A$XRlZoF^B^MRJK;CRfN+a*bRkH^@zLi`*u6$X#-e+$RsnL-L3`CQryy@{Bwu zFUU*sio7Oo$XoJ`yeA*XNAih$CSS-`@{N2aKgdt=i~J^k$Y1i0#4+L;@r?LJ0wbZ3 z$VhA?F_Id|jO2!300SDx5DjcdhHNN?YG{UT5W_G`!!m5cFDKq%q1EZHzI-8sm)d#sp)cG0B*0OfjY! z(~RlH3}dD-%b0Dk!jQz#||ypa zdzrn>K4xFDpV{9WU=B0~nS;$C=1_B(Ioup!jx%`N6ubDO!{++prCcbU7*J?36>pSj;WU>-CNnTO3I=27#QdE7iT`P_VA zzBFH%ugy2+Tl1ay-uz&GG(VZ2%`fIx^PBnI{9*nyf0@6{KjvTapBcxBYsItTTM4X$ zRw660mBdPFC9{%Sf(0ySAxpHdC0VkiSgNI2x#&+@In3a!YZ7PGh& zW2LZCTB)qmRvIg?QtF_g}YHPK#+FKp0j#ekDv(?4wYIU=^TRp6v zRxhi!)yL{<^|Sh01FV78AZxHS#2RW1vxZwEtdZ6zYqT}S8f%TS##|8_ zjn*b>v$e(AYHhQ&TRW_s)-G$ewa40P?X&h<2dsnEA?vVp#5!snvyNLQtdrI$>$G*o zI%}P?&RZ9(i`FIUvUSC}YF)FgTQ{tm)-CI{b;r7E-Lvjn53GmQBkQsC#CmEyvz}Wo zte4g+>$Ua9dTYJ2-di86kJcyav-QRLYJIc5TR*Iy)-UU~^~d^a{j=iOaqW0^d^>@i z&`xA0wv*UN?PPXxTd;u*ZDfl!wk2D(6DqwbR+@?F@ECJCmK+&SGb^v)S409Cl7Smz~?rW9PN=+4=1Pc0s$4 zUDz&S7qyGo#qAPyNxPI?+Ad?4waeM%?Fx29yOLemu3}fUtJ&4<8g@;)mR;MfW7oCo z+4b!Pc0;?7-PmqoH?^DD&Fxsbh27F_Ww*B5*lq20c6+;n-O=u3cecCOUF~jmce{t( z)9z*Ww)@z9?S6KDdw@OA9%K);huA~yVfJu)ggw$8WskPU*kkQ+_IP`OJ<*sUSuz}m)J|~W%hD=g}u^VWv{l^*lX=|_Ii7R zz0uxeZ??DCTkUQ3c6*1t)81w8w)fb3?S1xs`+$AWK4c%ZkJv};WA<_TgniOJWuLas z*k|o?_Idk)ebK&TU$(ET#7+_?sguk}?g$QWpo1LI!H(p}j^e0}=I9P_499dV$95dYbv(y+0w;7LhdRvR zPK=YnN$I3=Qafp!v`#uFy_3Po=wxy-J6W8pPBtgIlf%jB|oT5%Kr?^wXDe07QN;_qovQ9atyi>uc=u~nlJ5`*jPBo{xQ^Tq0)N*P&b)33R zJ*U3Yz-j0-avD2LoTg4Qr@0gBv~XHFt(?|Q8>g+)&S~#-a5_4joX$=cr>oP=>F)G! zdOE$F-cBE?<{Z@I*Xje&Jt&-v&>oUtZ-I3tDM!&8fUGu&ROql za5g%doXyS_XREW#+3xIcb~?M9-Oe6oud~nD?;LOrI)|LY&JpLRbIdvJoN!J$r<~Ky z8Rx8X&N=T~a4tHRoXgG?=c;qfx$fL>ZaTM|+s+;5u5-`1?>ulGI***k&J*XU^UQhf zyl`GRubkJ;8|SU_&Ux>Aa6USpoX^e|=d1J0`R@F1emcLL-_9TBuk+7|}x~}K?Zs3M)LQlxGCLKZfZA;o7PR|rgt;A8Qn~7W;ctQ)y?K+cXPNo-CS;NH;@Nv8@LVKMs8!biQCj|<~Da@-4#Br&$;K_3+_etl6%>`;$C&Hx!2tr?oIcWd)vL^-gWP}_uU8XL-&#U*nQ$Yb)UJ< z-52gl_m%tFedE4$-?{JI5AH|zll$5I;(m3%x!>I%?oaoZ``i8F{&oMkalE)*JTJbN zz)R>Q@)CPVyrf<-FS#dpz=Iz0L=StCCwq#gdYY$u#4|k8vpm~#JlFF)-wV9Zi#+Nv zk9#p*3NNLX%1iB~@zQ$fy!2iMFQb>q%j{+GvU=IP>|PEprRt`6rdP|W?bY$> zdiA{eUIVY8*T`$^HSwBy&AjGbtk=S8>9z7&du_b7UOTV7*TL)Pb@DoUUA(SdH?O5cM6dtyZ@#y{Tj(wF7JEy)rQR}cxwpbw>87DXU zduP0}-Z}5Qcfq^pUGgq_SG=pAmt^dvCnA-aGHT_rd$-O@B4ut`jJn4 z=5s&BPvNKZQ~9a=G=5q?ouA&%;AiwR`I-GJepWx5pWV;l=k#;=x&1tTUO%6o-!I@7 z^b7fg{UUx*znEX#FX5N;OZlb!GJaXVoL}Cr;8*l3`IY@DepSDkU)`_a*Ys=owf#DN zUB8}R-*4bI^c(q&{U&}>znS0MkM&#lE&W!0Yrl=()^F#x_dEC<{Z4*ozl-11@8);+ zd-y&5UVd-CkKfnt=lAyq_yhex{$PKIKhz)Q5BEp-BmGhSXn%}9)*t7O_b2!h{Yn00 ze~LfVpXN{ZXZSPyS^jK)jz8C*=g;>S_zV3-{$hWLztmsmFZWmYEB#geYJZKt)?eqZ z_c!<({Z0O6e~Z7>-{x=kclbN~UH)!=kH6R7=kNCq_y_$%{$c-!f7CzbANNoAC;e0Y zY5$CW)<5T;_b>Pt{Y(C3|B8Rrzvf@}Z}>O;TmEhTj(^v`=im1q_z(R@{$u}%|I~lx zKlfkwFa1~kYyXY^)_>=}_doa_{ZIa9|BL_C|K@-9fA~NBU;c0ZkN?;IFTgU08^jCZ z2ML0NL82gWkR(VNBny%ULI467pg;_8AO&)u1ZtoKdO!jrFas;F11E3;FYtpP2!kk~ z0SkB#6Ql@I2C0J7L7E_KkS<6cWC$__nS#tgmLO}8Eyy0^2yzCwg4{u#Aa9T_$R894 z3I>IO!afMMf|fz6pmoqDXdAQ(+6NtijzOoObI>K|8gvV~ z2R(wGL9d{9&?o2{^b7h21A>9UpkQz?Bp4bD3x)?Hf|0?fV017h7#oZW#s?FEiNT~` zaxf*B8cYkO2Qz}1!K`3*FejKB%nRlR3xb8gqF`~bBv=|O3zi2ff|bFlV0Ex2SR1Sh z)(0Dcjlrg1bFd}Y8f*);2RnkD!LDF;uqW6X>2ZDpaq2O?EBsdxz3yudTf|J3i z;B;^%I2)V`&IcEQi@~Mfa&RTM8e9vm2RDM7!L8tSa3{DM+zajp4}yomqu_DyBzPJ; z3!Vorf|tRo;C1jOcpJP6-UlCokHM$lbMPhj8hi`B2S0+J!LQ(V@F(~i{1;#r#tq|z z@xugR!Z1;oI7|{I4U>h*Lm>nq3{fbCIFv#;R6;e>LOmp*5t^YD+MyG=p%?mL5QbqC z(vXEbj0sbODZ^A@>M%{1HcS_$4>N=r!%Si3FiV&<%ob)3bA&m=Tw(4oPnb8%7v>KO zgayMwVd1bySTrma77t5=CBsr->99;#HY^vG4=aQf!%AW0uu51ptQJ-eYlJn!T4C+5 zPFOdr7uF9Ogbl++VdJn#*feYwHVL@KAU-JQ5xakA=s>6XD75 zRCqc(6P^vvh3CTy;l=P$csaZhUJb8>*TWm(&G1%uJG>L#4ey2b!w2ER@KN|Ud=fql zpM}rE7vanBRror56TS`Kh3~@;;m7b(_&NL%eht5c-@_l_&+u3HJNy&=4gU+UiQ-1_ zqWDpQC}ET+N*pDLl19m*qi{qLfjpD0P%3N*kq%(nlGhj8Ud2bCe~@8fA;JM>(RLQLZR=lqbp?<%{x1 z1)_pcp{Q_FBq|yei;71jqLNXmsB}~&DjSuH%10HViczJga#SU%8dZy`M>V3FQLU(U zR41w%)r;y!4Wfooqo{GzBx)Kpi<(EVQH!W$)GBHnwTaqB?V|Qkhp1!JDe4?`iMmGJ zqV7?TsAtqG>K*lo`bPbt{?UMFU^FNi91V$vM#G}v(THedG%6Y$jfuua!S71hG=87DcT%uiMB@DqV3U+XlJx5+8ynQ_D1`n{n3HwV00)t936>{M#rM#(TV6} zbSgR>or%sy=c4n`h3H~*DY_h8iLOT1qU+I(=w@^)x*gq#?nd{b`_Y5wVe}|^96gDi zM$e+>(TnJ1^eTECy@}pN@1pn7hv;MUDf%3JiM~ePqVLg<=x6jR`W^j={zm@=7|^&h z9*s{E(1bJ*O-z%}q%;{#P6Y}mq=OVcv6EGCbTJSMw`=E+Jd&Et!Qi7hPI{cXnWd$cBGwXXWE5!rQK+E+Jp9_y=ZURhxVoY zXn#6@4y1$VU^;{jrNiiOI)aX*qv&WlhK{A<=y*DTPNb9QWIBaTrPJtiI)l!nv*>I( zht8$*=zO|>E~Ja-V!DJbrOW7Yx`M8xtLSRFhOVXS=z6+=Zls&&X1axLrQ7Isx`Xbd zyXbDZhwi2O=ze;D9;AopVS0ofrN`)TdV-#$r|4;VhMuM8=y`g9UZj`kWqO5PrPt_n zdV}7ix9Dwphu)?4=zaQtKBSN6WBPYSZP*%~*35%UZCOtQBj`+OW2)9c#}zu#T(~ z>&&{auB;pD&U&z(tQYIe`mnyNAM4Ksuz_q48_b5Vp==l%&PK42Y!n;K#;~z$92?Ij zu!(FEo6M%LscagX&StQgY!;i%=CHYJ9-Ge=u!U?9Tg;ZQrED2n&Q`FMY!zG0*08l~ z9b3;fu#IdJ+swAGt!x|H&UUb!Y!}=ZlA z&akuW96Qf0u#4;xyUebztLz%P&Tg=q>=wJt?y$S;9=p#Tu!rmsd(57&r|cPf&R(#W z>=k>>-mtgq9ed9{u#fB$`^>(uuk0K9&VI0;>=*mZ{;bMTxz7thV}@Vq=9&(90+g1itf%!}}%ycjReOYoAs z6fe!o@UpxdFV8FRio6o9%&YLKyc)00Yw()97O&0g@VdMnug@FshP)AP%$xA0ycuuK zV|feSlDFcmc^lrAx8v=32i}o);+=UH-j#Rb-FXk*llS7ic^}@F_v8Kf06vfp;)D4R zK9mpR!}$n4l8@q}`4~QykK^O{1U`{Z;*c`5L~KujA|a2ELJR;+y#vzLjs|+xZT@lkeiY`5wNP@8kRV z0e+Al;)nSWev}{M$N338li%XE`5k_j z-{bfB1OAXd;*a?g{**uC&-n}flE31w`5XS0zvJ)u2mXN^_i64_7CSgpXn8YzjVv@!ri%A|M#DEwWgJS;2TCPi@E-k}^apLru zoMLpMw(UB$Y1n#lY_E|?;>5}Ld*0&DxLmpaF_#-1ziq?LlVf{~hV|?J*VXURwtb7n zty(voJZW;dA^*)|xBlN7|Ma`RoBz9MdG0@Zx)d$X|5r<`-|_EX={Td~7p+|7KfOwm z|L0xolK)pCc6rJHlmF9?T~#`6?CKG*Yx+!%T^GBtbllSadBy$T^(`a1H2PnT1dZBt zj{RT22Ahhe81@;rB>&O>|2+6#sL{y^c52ZueU)~tdbFEdc6{IeK`ZtDpmphz?*9O! CHmvOc literal 0 HcmV?d00001 diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index fad6237d851fb..f46f62e781006 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -207,7 +207,8 @@ def test_pickles(current_pickle_data, version): if data is None: continue n += 1 - assert n > 0, 'Pickle files are not tested' + assert n > 0, ('Pickle files are not ' + 'tested: {version}'.format(version=version)) def test_round_trip_current(current_pickle_data): From 1b53d8864af0ed936f84d0935e2cc360dc9f8de7 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 31 Mar 2017 08:40:58 +0200 Subject: [PATCH 315/353] Only call validation functions when args/kwargs are passed (#15850) --- pandas/compat/numpy/function.py | 35 +++++++++++++++++---------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/pandas/compat/numpy/function.py b/pandas/compat/numpy/function.py index 4053994efa005..f448a9aad04c6 100644 --- a/pandas/compat/numpy/function.py +++ b/pandas/compat/numpy/function.py @@ -37,23 +37,24 @@ def __init__(self, defaults, fname=None, method=None, def __call__(self, args, kwargs, fname=None, max_fname_arg_count=None, method=None): - fname = self.fname if fname is None else fname - max_fname_arg_count = (self.max_fname_arg_count if - max_fname_arg_count is None - else max_fname_arg_count) - method = self.method if method is None else method - - if method == 'args': - validate_args(fname, args, max_fname_arg_count, self.defaults) - elif method == 'kwargs': - validate_kwargs(fname, kwargs, self.defaults) - elif method == 'both': - validate_args_and_kwargs(fname, args, kwargs, - max_fname_arg_count, - self.defaults) - else: - raise ValueError("invalid validation method " - "'{method}'".format(method=method)) + if args or kwargs: + fname = self.fname if fname is None else fname + max_fname_arg_count = (self.max_fname_arg_count if + max_fname_arg_count is None + else max_fname_arg_count) + method = self.method if method is None else method + + if method == 'args': + validate_args(fname, args, max_fname_arg_count, self.defaults) + elif method == 'kwargs': + validate_kwargs(fname, kwargs, self.defaults) + elif method == 'both': + validate_args_and_kwargs(fname, args, kwargs, + max_fname_arg_count, + self.defaults) + else: + raise ValueError("invalid validation method " + "'{method}'".format(method=method)) ARGMINMAX_DEFAULTS = dict(out=None) From e7201ca1a9f5b3359a0e179ab1faf6a39cc9e2c7 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 1 Apr 2017 11:37:16 -0400 Subject: [PATCH 316/353] BLD: bug in building json compiled code on windows (#15857) --- pandas/_libs/src/ujson/python/objToJSON.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c index 26a68b8a9ae3a..f2c0b18d35131 100644 --- a/pandas/_libs/src/ujson/python/objToJSON.c +++ b/pandas/_libs/src/ujson/python/objToJSON.c @@ -401,7 +401,8 @@ static void *PyStringToUTF8(JSOBJ _obj, JSONTypeContext *tc, void *outValue, static void *PyUnicodeToUTF8(JSOBJ _obj, JSONTypeContext *tc, void *outValue, size_t *_outLen) { - PyObject *obj = (PyObject *)_obj; + PyObject *obj, *newObj; + obj = (PyObject *)_obj; #if (PY_VERSION_HEX >= 0x03030000) if (PyUnicode_IS_COMPACT_ASCII(obj)) { @@ -412,8 +413,8 @@ static void *PyUnicodeToUTF8(JSOBJ _obj, JSONTypeContext *tc, void *outValue, } #endif - PyObject *newObj = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(obj), - PyUnicode_GET_SIZE(obj), NULL); + newObj = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(obj), + PyUnicode_GET_SIZE(obj), NULL); GET_TC(tc)->newObj = newObj; From 57c7c87f695f6b133742978e4c7d04f4892eb991 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 1 Apr 2017 12:04:56 -0400 Subject: [PATCH 317/353] CI: use pytest-xdist on windows --- appveyor.yml | 2 +- test.bat | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index db729b3005be6..684b859c206b2 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -72,7 +72,7 @@ install: - cmd: conda info -a # create our env - - cmd: conda create -n pandas python=%PYTHON_VERSION% cython pytest + - cmd: conda create -n pandas python=%PYTHON_VERSION% cython pytest pytest-xdist - cmd: activate pandas - SET REQ=ci\requirements-%PYTHON_VERSION%_WIN.run - cmd: echo "installing requirements from %REQ%" diff --git a/test.bat b/test.bat index 080a1cc163a05..6c69f83866ffd 100644 --- a/test.bat +++ b/test.bat @@ -1,3 +1,3 @@ :: test on windows -pytest --skip-slow --skip-network pandas %* +pytest --skip-slow --skip-network pandas -n 2 %* From a57e681aef4d5de5da1201d18009b5dbb4382a6d Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 1 Apr 2017 17:48:58 -0400 Subject: [PATCH 318/353] PERF: improve iloc list indexing Author: Joris Van den Bossche Closes #15504 from jorisvandenbossche/perf-iloc-list and squashes the following commits: bf54a0b [Joris Van den Bossche] TST: edit test_take to preserve original dtype 74d45ae [Joris Van den Bossche] add whatsnew 3e537b6 [Joris Van den Bossche] small clean-up 6d2705c [Joris Van den Bossche] take method: only validate kwargs if there are kwargs aacbaa8 [Joris Van den Bossche] PERF: improve iloc list indexing --- doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/core/indexing.py | 24 ++++++++++++++---------- pandas/core/series.py | 7 ++++--- pandas/indexes/base.py | 3 ++- pandas/tests/test_generic.py | 2 +- 5 files changed, 22 insertions(+), 16 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 399f91fc60810..a34b9feb2b2fa 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -905,7 +905,7 @@ Performance Improvements - Improved performance of ``drop_duplicates()`` on ``bool`` columns (:issue:`12963`) - Improve performance of ``pd.core.groupby.GroupBy.apply`` when the applied function used the ``.name`` attribute of the group DataFrame (:issue:`15062`). - +- Improved performance of ``iloc`` indexing with a list or array (:issue:`15504`). .. _whatsnew_0200.bug_fixes: diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index c80e8c34aa88f..61a847ccf1523 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1697,26 +1697,24 @@ def _get_slice_axis(self, slice_obj, axis=0): else: return self.obj.take(slice_obj, axis=axis, convert=False) - def _get_list_axis(self, key_list, axis=0): + def _get_list_axis(self, key, axis=0): """ Return Series values by list or array of integers Parameters ---------- - key_list : list-like positional indexer + key : list-like positional indexer axis : int (can only be zero) Returns ------- Series object """ - - # validate list bounds - self._is_valid_list_like(key_list, axis) - - # force an actual list - key_list = list(key_list) - return self.obj.take(key_list, axis=axis, convert=False) + try: + return self.obj.take(key, axis=axis, convert=False) + except IndexError: + # re-raise with different error message + raise IndexError("positional indexers are out-of-bounds") def _getitem_axis(self, key, axis=0): @@ -1724,7 +1722,13 @@ def _getitem_axis(self, key, axis=0): self._has_valid_type(key, axis) return self._get_slice_axis(key, axis=axis) - elif is_bool_indexer(key): + if isinstance(key, list): + try: + key = np.asarray(key) + except TypeError: # pragma: no cover + pass + + if is_bool_indexer(key): self._has_valid_type(key, axis) return self._getbool_axis(key, axis=axis) diff --git a/pandas/core/series.py b/pandas/core/series.py index bcc1ed272b081..bcd58ea791083 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2378,7 +2378,8 @@ def take(self, indices, axis=0, convert=True, is_copy=False, **kwargs): -------- numpy.ndarray.take """ - nv.validate_take(tuple(), kwargs) + if kwargs: + nv.validate_take(tuple(), kwargs) # check/convert indicies here if convert: @@ -2387,8 +2388,8 @@ def take(self, indices, axis=0, convert=True, is_copy=False, **kwargs): indices = _ensure_platform_int(indices) new_index = self.index.take(indices) new_values = self._values.take(indices) - return self._constructor(new_values, - index=new_index).__finalize__(self) + return (self._constructor(new_values, index=new_index, fastpath=True) + .__finalize__(self)) def isin(self, values): """ diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index 7f0de963e5c56..91e2422873dd4 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -1668,7 +1668,8 @@ def _append_same_dtype(self, to_concat, name): @Appender(_index_shared_docs['take'] % _index_doc_kwargs) def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): - nv.validate_take(tuple(), kwargs) + if kwargs: + nv.validate_take(tuple(), kwargs) indices = _ensure_platform_int(indices) if self._can_hold_na: taken = self._assert_take_fillable(self.values, indices, diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py index a2329e2d1768e..0e8e8dc43ff03 100644 --- a/pandas/tests/test_generic.py +++ b/pandas/tests/test_generic.py @@ -1870,7 +1870,7 @@ def test_take(self): tm.makeObjectSeries()]: out = s.take(indices) expected = Series(data=s.values.take(indices), - index=s.index.take(indices)) + index=s.index.take(indices), dtype=s.dtype) tm.assert_series_equal(out, expected) for df in [tm.makeTimeDataFrame()]: out = df.take(indices) From d1e1ba08ef259724ba71e0953c52e8e4ad81bd17 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 1 Apr 2017 20:09:03 -0400 Subject: [PATCH 319/353] CI: add jdcal to 3.6 build as openpyxl >= 2.4.5 is broken --- ci/requirements-3.6.run | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ci/requirements-3.6.run b/ci/requirements-3.6.run index 41c9680ce1b7e..8f81c4620558e 100644 --- a/ci/requirements-3.6.run +++ b/ci/requirements-3.6.run @@ -2,7 +2,10 @@ python-dateutil pytz numpy scipy +# openpyxl >= 2.4.5 should be dependent on jdcal +# but is not for some reason openpyxl +jdcal xlsxwriter xlrd xlwt From 74f527ff0cbc8045b9f350382a4ad37694e8c5e6 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sun, 2 Apr 2017 10:19:43 -0400 Subject: [PATCH 320/353] BUG: Check integrity of sparse int indices The check_integrity method of IntIndex in pandas.sparse was un- implemented despite having documentation. This PR implements the method and calls it when initializing `IntIndex`. xref #15844 (comment) Author: gfyoung Closes #15863 from gfyoung/sparse-pyx-refactor and squashes the following commits: f435d28 [gfyoung] BUG: Check integrity of sparse int indices --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/sparse/sparse.pyx | 48 +++++++++++++++++++++++---- pandas/tests/sparse/test_libsparse.py | 38 +++++++++++++++++++++ 3 files changed, 80 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index a34b9feb2b2fa..230f39db67197 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1020,6 +1020,7 @@ Sparse - Bug in ``SparseSeries.reindex`` on single level with list of length 1 (:issue:`15447`) - Bug in repr-formatting a ``SparseDataFrame`` after a value was set on (a copy of) one of its series (:issue:`15488`) - Bug in ``SparseDataFrame`` construction with lists not coercing to dtype (:issue:`15682`) +- Bug in sparse array indexing in which indices were not being validated (:issue:`15863`) Reshaping ^^^^^^^^^ diff --git a/pandas/sparse/sparse.pyx b/pandas/sparse/sparse.pyx index 00d317c42b18d..0c2e056ead7fa 100644 --- a/pandas/sparse/sparse.pyx +++ b/pandas/sparse/sparse.pyx @@ -34,8 +34,9 @@ cdef inline int int_min(int a, int b): return a if a <= b else b cdef class SparseIndex: """ - Abstract superclass for sparse index types + Abstract superclass for sparse index types. """ + def __init__(self): raise NotImplementedError @@ -48,8 +49,9 @@ cdef class IntIndex(SparseIndex): ---------- length : integer indices : array-like - Contains integers corresponding to + Contains integers corresponding to the indices. """ + cdef readonly: Py_ssize_t length, npoints ndarray indices @@ -59,9 +61,11 @@ cdef class IntIndex(SparseIndex): self.indices = np.ascontiguousarray(indices, dtype=np.int32) self.npoints = len(self.indices) + self.check_integrity() + def __reduce__(self): args = (self.length, self.indices) - return (IntIndex, args) + return IntIndex, args def __repr__(self): output = 'IntIndex\n' @@ -70,10 +74,40 @@ cdef class IntIndex(SparseIndex): def check_integrity(self): """ - Only need be strictly ascending and nothing less than 0 or greater than - total length + Checks the following: + + - Indices are strictly ascending + - Number of indices is at most self.length + - Indices are at least 0 and at most the total length less one + + A ValueError is raised if any of these conditions is violated. """ - pass + + cdef: + int32_t index, prev = -1 + + if self.npoints > self.length: + msg = ("Too many indices. Expected " + "{exp} but found {act}").format( + exp=self.length, act=self.npoints) + raise ValueError(msg) + + # Indices are vacuously ordered and non-negative + # if the sequence of indices is empty. + if self.npoints == 0: + return + + if min(self.indices) < 0: + raise ValueError("No index can be less than zero") + + if max(self.indices) >= self.length: + raise ValueError("All indices must be less than the length") + + for index in self.indices: + if prev != -1 and index <= prev: + raise ValueError("Indices must be strictly increasing") + + prev = index def equals(self, other): if not isinstance(other, IntIndex): @@ -320,7 +354,7 @@ cdef class BlockIndex(SparseIndex): def __reduce__(self): args = (self.length, self.blocs, self.blengths) - return (BlockIndex, args) + return BlockIndex, args def __repr__(self): output = 'BlockIndex\n' diff --git a/pandas/tests/sparse/test_libsparse.py b/pandas/tests/sparse/test_libsparse.py index b6ab99dc66cda..696d2cf47f4c0 100644 --- a/pandas/tests/sparse/test_libsparse.py +++ b/pandas/tests/sparse/test_libsparse.py @@ -474,6 +474,44 @@ def test_to_block_index(self): class TestIntIndex(tm.TestCase): + def test_check_integrity(self): + + # Too many indices than specified in self.length + msg = "Too many indices" + + with tm.assertRaisesRegexp(ValueError, msg): + IntIndex(length=1, indices=[1, 2, 3]) + + # No index can be negative. + msg = "No index can be less than zero" + + with tm.assertRaisesRegexp(ValueError, msg): + IntIndex(length=5, indices=[1, -2, 3]) + + # No index can be negative. + msg = "No index can be less than zero" + + with tm.assertRaisesRegexp(ValueError, msg): + IntIndex(length=5, indices=[1, -2, 3]) + + # All indices must be less than the length. + msg = "All indices must be less than the length" + + with tm.assertRaisesRegexp(ValueError, msg): + IntIndex(length=5, indices=[1, 2, 5]) + + with tm.assertRaisesRegexp(ValueError, msg): + IntIndex(length=5, indices=[1, 2, 6]) + + # Indices must be strictly ascending. + msg = "Indices must be strictly increasing" + + with tm.assertRaisesRegexp(ValueError, msg): + IntIndex(length=5, indices=[1, 3, 2]) + + with tm.assertRaisesRegexp(ValueError, msg): + IntIndex(length=5, indices=[1, 3, 3]) + def test_int_internal(self): idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind='integer') self.assertIsInstance(idx, IntIndex) From a293d22ed25294c1753524903455ce5122319632 Mon Sep 17 00:00:00 2001 From: atbd Date: Sun, 2 Apr 2017 23:57:01 +0200 Subject: [PATCH 321/353] COMPAT: NaT support tz_localize / tz_convert (#15830) (#15868) --- doc/source/whatsnew/v0.20.0.txt | 2 ++ pandas/_libs/tslib.pyx | 3 ++- pandas/tests/scalar/test_nat.py | 3 ++- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 230f39db67197..781a912555e14 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -837,6 +837,8 @@ Other API Changes ignored (no longer needed to specify the new behaviour) and is deprecated. - ``NaT`` will now correctly report ``False`` for datetimelike boolean operations such as ``is_month_start`` (:issue:`15781`) - ``NaT`` will now correctly return ``np.nan`` for ``Timedelta`` and ``Period`` accessors such as ``days`` and ``quarter`` (:issue:`15782`) +- ``NaT`` will now returns ``NaT`` for ``tz_localize`` and ``tz_convert`` + methods (:issue:`15830`) .. _whatsnew_0200.deprecations: diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index d441f1ec4759b..5aa8e15d0d087 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -3835,7 +3835,8 @@ for field in fields: # to the NaTType class; these can return NaT, np.nan # or raise respectively _nat_methods = ['date', 'now', 'replace', 'to_pydatetime', - 'today', 'round', 'floor', 'ceil'] + 'today', 'round', 'floor', 'ceil', 'tz_convert', + 'tz_localize'] _nan_methods = ['weekday', 'isoweekday', 'total_seconds'] _implemented_methods = ['to_datetime', 'to_datetime64', 'isoformat'] _implemented_methods.extend(_nat_methods) diff --git a/pandas/tests/scalar/test_nat.py b/pandas/tests/scalar/test_nat.py index ce2ed237f5559..0695fe2243947 100644 --- a/pandas/tests/scalar/test_nat.py +++ b/pandas/tests/scalar/test_nat.py @@ -129,7 +129,8 @@ def test_NaT_methods(): 'timetuple', 'timetz', 'toordinal', 'tzname', 'utcfromtimestamp', 'utcnow', 'utcoffset', 'utctimetuple'] - nat_methods = ['date', 'now', 'replace', 'to_datetime', 'today'] + nat_methods = ['date', 'now', 'replace', 'to_datetime', 'today', + 'tz_convert', 'tz_localize'] nan_methods = ['weekday', 'isoweekday'] for method in raise_methods: From 67cc0213def8b9f56c4d1f71bb95ebef22790b24 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sun, 2 Apr 2017 17:59:15 -0400 Subject: [PATCH 322/353] CLN: Remove "flake8: noqa" from more files Another round of house-cleaning that builds off #15842. xref #12066 (comment) : the issue remains unresolved, but it does not seem entirely necessary to disable style-checking on the entire file for that IMO. Author: gfyoung Closes #15867 from gfyoung/flake8-noqa-clean and squashes the following commits: 0c84926 [gfyoung] CLN: Make tseries/common.py flake8-able 7a799ff [gfyoung] CLN: Make _version.py flake8-able 7087b64 [gfyoung] CLN: Make test_categorical.py flake8-able 5d5abf8 [gfyoung] CLN: Make test_categorical.py flake8-able 6ace90b [gfyoung] CLN: Make test_eval.py flake8-able --- pandas/_version.py | 3 - pandas/tests/computation/test_eval.py | 95 ++++++++++---------- pandas/tests/indexes/test_category.py | 57 ++++++------ pandas/tests/test_categorical.py | 120 ++++++++++++-------------- pandas/tseries/common.py | 8 +- 5 files changed, 134 insertions(+), 149 deletions(-) diff --git a/pandas/_version.py b/pandas/_version.py index d764923fd7247..4695b512feff5 100644 --- a/pandas/_version.py +++ b/pandas/_version.py @@ -1,4 +1,3 @@ - # This file helps to compute a version number in source trees obtained from # git-archive tarball (such as those provided by githubs download-from-tag # feature). Distribution tarballs (built by setup.py sdist) and build @@ -8,8 +7,6 @@ # This file is released into the public domain. Generated by # versioneer-0.15 (https://github.com/warner/python-versioneer) -# flake8: noqa - import errno import os import re diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index ed6006440441e..81e9b7c77a81b 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -1,10 +1,6 @@ - -# flake8: noqa - import warnings import operator from itertools import product -from distutils.version import LooseVersion import pytest @@ -28,12 +24,11 @@ import pandas.computation.expr as expr import pandas.util.testing as tm -import pandas._libs.lib as lib from pandas.util.testing import (assert_frame_equal, randbool, assertRaisesRegexp, assert_numpy_array_equal, assert_produces_warning, assert_series_equal, slow) -from pandas.compat import PY3, u, reduce +from pandas.compat import PY3, reduce _series_frame_incompatible = _bool_ops_syms _scalar_skip = 'in', 'not in' @@ -43,9 +38,9 @@ pytest.mark.skipif(engine == 'numexpr' and not _USE_NUMEXPR, reason='numexpr enabled->{enabled}, ' 'installed->{installed}'.format( - enabled=_USE_NUMEXPR, - installed=_NUMEXPR_INSTALLED))(engine) - for engine in _engines + enabled=_USE_NUMEXPR, + installed=_NUMEXPR_INSTALLED))(engine) + for engine in _engines # noqa )) def engine(request): return request.param @@ -66,7 +61,8 @@ def _eval_single_bin(lhs, cmp1, rhs, engine): try: return c(lhs, rhs) except ValueError as e: - if str(e).startswith('negative number cannot be raised to a fractional power'): + if str(e).startswith('negative number cannot be ' + 'raised to a fractional power'): return np.nan raise return c(lhs, rhs) @@ -74,14 +70,14 @@ def _eval_single_bin(lhs, cmp1, rhs, engine): def _series_and_2d_ndarray(lhs, rhs): return ((isinstance(lhs, Series) and - isinstance(rhs, np.ndarray) and rhs.ndim > 1) - or (isinstance(rhs, Series) and - isinstance(lhs, np.ndarray) and lhs.ndim > 1)) + isinstance(rhs, np.ndarray) and rhs.ndim > 1) or + (isinstance(rhs, Series) and + isinstance(lhs, np.ndarray) and lhs.ndim > 1)) def _series_and_frame(lhs, rhs): - return ((isinstance(lhs, Series) and isinstance(rhs, DataFrame)) - or (isinstance(rhs, Series) and isinstance(lhs, DataFrame))) + return ((isinstance(lhs, Series) and isinstance(rhs, DataFrame)) or + (isinstance(rhs, Series) and isinstance(lhs, DataFrame))) def _bool_and_frame(lhs, rhs): @@ -228,19 +224,22 @@ def check_complex_cmp_op(self, lhs, cmp1, rhs, binop, cmp2): else: lhs_new = _eval_single_bin(lhs, cmp1, rhs, self.engine) rhs_new = _eval_single_bin(lhs, cmp2, rhs, self.engine) - if (isinstance(lhs_new, Series) and isinstance(rhs_new, DataFrame) - and binop in _series_frame_incompatible): + if (isinstance(lhs_new, Series) and + isinstance(rhs_new, DataFrame) and + binop in _series_frame_incompatible): pass # TODO: the code below should be added back when left and right # hand side bool ops are fixed. - + # # try: - # self.assertRaises(Exception, pd.eval, ex, - #local_dict={'lhs': lhs, 'rhs': rhs}, - # engine=self.engine, parser=self.parser) + # self.assertRaises(Exception, pd.eval, ex, + # local_dict={'lhs': lhs, 'rhs': rhs}, + # engine=self.engine, parser=self.parser) # except AssertionError: - #import ipdb; ipdb.set_trace() - # raise + # import ipdb + # + # ipdb.set_trace() + # raise else: expected = _eval_single_bin( lhs_new, binop, rhs_new, self.engine) @@ -248,7 +247,6 @@ def check_complex_cmp_op(self, lhs, cmp1, rhs, binop, cmp2): self.check_equal(result, expected) def check_chained_cmp_op(self, lhs, cmp1, mid, cmp2, rhs): - skip_these = _scalar_skip def check_operands(left, right, cmp_op): return _eval_single_bin(left, cmp_op, right, self.engine) @@ -334,7 +332,8 @@ def get_expected_pow_result(self, lhs, rhs): try: expected = _eval_single_bin(lhs, '**', rhs, self.engine) except ValueError as e: - if str(e).startswith('negative number cannot be raised to a fractional power'): + if str(e).startswith('negative number cannot be ' + 'raised to a fractional power'): if self.engine == 'python': pytest.skip(str(e)) else: @@ -650,7 +649,7 @@ def test_disallow_scalar_bool_ops(self): exprs += '2 * x > 2 or 1 and 2', exprs += '2 * df > 3 and 1 or a', - x, a, b, df = np.random.randn(3), 1, 2, DataFrame(randn(3, 2)) + x, a, b, df = np.random.randn(3), 1, 2, DataFrame(randn(3, 2)) # noqa for ex in exprs: with tm.assertRaises(NotImplementedError): pd.eval(ex, engine=self.engine, parser=self.parser) @@ -682,7 +681,7 @@ def test_identical(self): tm.assert_numpy_array_equal(result, np.array([1.5])) self.assertEqual(result.shape, (1, )) - x = np.array([False]) + x = np.array([False]) # noqa result = pd.eval('x', engine=self.engine, parser=self.parser) tm.assert_numpy_array_equal(result, np.array([False])) self.assertEqual(result.shape, (1, )) @@ -792,9 +791,8 @@ def check_chained_cmp_op(self, lhs, cmp1, mid, cmp2, rhs): f = lambda *args, **kwargs: np.random.randn() -#------------------------------------- -# typecasting rules consistency with python -# issue #12388 +# ------------------------------------- +# gh-12388: Typecasting rules consistency with python class TestTypeCasting(object): @@ -817,8 +815,8 @@ def test_binop_typecasting(self, engine, parser, op, dt): assert_frame_equal(res, eval(s)) -#------------------------------------- -# basic and complex alignment +# ------------------------------------- +# Basic and complex alignment def _is_datetime(x): return issubclass(x.dtype.type, np.datetime64) @@ -1064,8 +1062,8 @@ def test_performance_warning_for_poor_alignment(self, engine, parser): tm.assert_equal(msg, expected) -#------------------------------------ -# slightly more complex ops +# ------------------------------------ +# Slightly more complex ops class TestOperationsNumExprPandas(tm.TestCase): @@ -1156,7 +1154,7 @@ def test_single_variable(self): def test_truediv(self): s = np.array([1]) ex = 's / 1' - d = {'s': s} + d = {'s': s} # noqa if PY3: res = self.eval(ex, truediv=False) @@ -1204,7 +1202,7 @@ def test_truediv(self): self.assertEqual(res, expec) def test_failing_subscript_with_name_error(self): - df = DataFrame(np.random.randn(5, 3)) + df = DataFrame(np.random.randn(5, 3)) # noqa with tm.assertRaises(NameError): self.eval('df[x > 2] > 2') @@ -1501,7 +1499,7 @@ def setUpClass(cls): cls.arith_ops) def test_check_many_exprs(self): - a = 1 + a = 1 # noqa expr = ' * '.join('a' * 33) expected = 1 res = pd.eval(expr, engine=self.engine, parser=self.parser) @@ -1526,13 +1524,13 @@ def test_fails_not(self): engine=self.engine) def test_fails_ampersand(self): - df = DataFrame(np.random.randn(5, 3)) + df = DataFrame(np.random.randn(5, 3)) # noqa ex = '(df + 2)[df > 1] > 0 & (df > 0)' with tm.assertRaises(NotImplementedError): pd.eval(ex, parser=self.parser, engine=self.engine) def test_fails_pipe(self): - df = DataFrame(np.random.randn(5, 3)) + df = DataFrame(np.random.randn(5, 3)) # noqa ex = '(df + 2)[df > 1] > 0 | (df > 0)' with tm.assertRaises(NotImplementedError): pd.eval(ex, parser=self.parser, engine=self.engine) @@ -1728,7 +1726,7 @@ def test_global_scope(self, engine, parser): parser=parser)) def test_no_new_locals(self, engine, parser): - x = 1 + x = 1 # noqa lcls = locals().copy() pd.eval('x + 1', local_dict=lcls, engine=engine, parser=parser) lcls2 = locals().copy() @@ -1736,7 +1734,7 @@ def test_no_new_locals(self, engine, parser): tm.assert_equal(lcls, lcls2) def test_no_new_globals(self, engine, parser): - x = 1 + x = 1 # noqa gbls = globals().copy() pd.eval('x + 1', engine=engine, parser=parser) gbls2 = globals().copy() @@ -1787,15 +1785,16 @@ def test_name_error_exprs(engine, parser): def test_invalid_local_variable_reference(engine, parser): - a, b = 1, 2 + a, b = 1, 2 # noqa exprs = 'a + @b', '@a + b', '@a + @b' - for expr in exprs: + + for _expr in exprs: if parser != 'pandas': with tm.assertRaisesRegexp(SyntaxError, "The '@' prefix is only"): - pd.eval(exprs, engine=engine, parser=parser) + pd.eval(_expr, engine=engine, parser=parser) else: with tm.assertRaisesRegexp(SyntaxError, "The '@' prefix is not"): - pd.eval(exprs, engine=engine, parser=parser) + pd.eval(_expr, engine=engine, parser=parser) def test_numexpr_builtin_raises(engine, parser): @@ -1834,9 +1833,9 @@ def test_more_than_one_expression_raises(engine, parser): def test_bool_ops_fails_on_scalars(lhs, cmp, rhs, engine, parser): gen = {int: lambda: np.random.randint(10), float: np.random.randn} - mid = gen[lhs]() - lhs = gen[lhs]() - rhs = gen[rhs]() + mid = gen[lhs]() # noqa + lhs = gen[lhs]() # noqa + rhs = gen[rhs]() # noqa ex1 = 'lhs {0} mid {1} rhs'.format(cmp, cmp) ex2 = 'lhs {0} mid and mid {1} rhs'.format(cmp, cmp) diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index ef1be7e60e0e8..0d75ba5f2bd46 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -1,8 +1,5 @@ # -*- coding: utf-8 -*- -# TODO(wesm): fix long line flake8 issues -# flake8: noqa - import pandas.util.testing as tm from pandas.indexes.api import Index, CategoricalIndex from .common import Base @@ -215,7 +212,8 @@ def test_map(self): # GH 12766: Return an index not an array tm.assert_index_equal(ci.map(lambda x: 1), - Index(np.array([1] * 5, dtype=np.int64), name='XXX')) + Index(np.array([1] * 5, dtype=np.int64), + name='XXX')) # change categories dtype ci = pd.CategoricalIndex(list('ABABC'), categories=list('BAC'), @@ -225,7 +223,8 @@ def f(x): return {'A': 10, 'B': 20, 'C': 30}.get(x) result = ci.map(f) - exp = pd.CategoricalIndex([10, 20, 10, 20, 30], categories=[20, 10, 30], + exp = pd.CategoricalIndex([10, 20, 10, 20, 30], + categories=[20, 10, 30], ordered=False) tm.assert_index_equal(result, exp) @@ -589,10 +588,10 @@ def test_string_categorical_index_repr(self): # short idx = pd.CategoricalIndex(['a', 'bb', 'ccc']) if PY3: - expected = u"""CategoricalIndex(['a', 'bb', 'ccc'], categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')""" + expected = u"""CategoricalIndex(['a', 'bb', 'ccc'], categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')""" # noqa self.assertEqual(repr(idx), expected) else: - expected = u"""CategoricalIndex([u'a', u'bb', u'ccc'], categories=[u'a', u'bb', u'ccc'], ordered=False, dtype='category')""" + expected = u"""CategoricalIndex([u'a', u'bb', u'ccc'], categories=[u'a', u'bb', u'ccc'], ordered=False, dtype='category')""" # noqa self.assertEqual(unicode(idx), expected) # multiple lines @@ -601,7 +600,7 @@ def test_string_categorical_index_repr(self): expected = u"""CategoricalIndex(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'], - categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')""" + categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')""" # noqa self.assertEqual(repr(idx), expected) else: @@ -609,7 +608,7 @@ def test_string_categorical_index_repr(self): u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc'], - categories=[u'a', u'bb', u'ccc'], ordered=False, dtype='category')""" + categories=[u'a', u'bb', u'ccc'], ordered=False, dtype='category')""" # noqa self.assertEqual(unicode(idx), expected) @@ -619,7 +618,7 @@ def test_string_categorical_index_repr(self): expected = u"""CategoricalIndex(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', ... 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'], - categories=['a', 'bb', 'ccc'], ordered=False, dtype='category', length=300)""" + categories=['a', 'bb', 'ccc'], ordered=False, dtype='category', length=300)""" # noqa self.assertEqual(repr(idx), expected) else: @@ -628,7 +627,7 @@ def test_string_categorical_index_repr(self): ... u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc'], - categories=[u'a', u'bb', u'ccc'], ordered=False, dtype='category', length=300)""" + categories=[u'a', u'bb', u'ccc'], ordered=False, dtype='category', length=300)""" # noqa self.assertEqual(unicode(idx), expected) @@ -637,23 +636,23 @@ def test_string_categorical_index_repr(self): if PY3: expected = u"""CategoricalIndex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'm', 'o'], - categories=['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', ...], ordered=False, dtype='category')""" + categories=['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', ...], ordered=False, dtype='category')""" # noqa self.assertEqual(repr(idx), expected) else: expected = u"""CategoricalIndex([u'a', u'b', u'c', u'd', u'e', u'f', u'g', u'h', u'i', u'j', u'k', u'l', u'm', u'm', u'o'], - categories=[u'a', u'b', u'c', u'd', u'e', u'f', u'g', u'h', ...], ordered=False, dtype='category')""" + categories=[u'a', u'b', u'c', u'd', u'e', u'f', u'g', u'h', ...], ordered=False, dtype='category')""" # noqa self.assertEqual(unicode(idx), expected) # short idx = pd.CategoricalIndex([u'あ', u'いい', u'ううう']) if PY3: - expected = u"""CategoricalIndex(['あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" + expected = u"""CategoricalIndex(['あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa self.assertEqual(repr(idx), expected) else: - expected = u"""CategoricalIndex([u'あ', u'いい', u'ううう'], categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category')""" + expected = u"""CategoricalIndex([u'あ', u'いい', u'ううう'], categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category')""" # noqa self.assertEqual(unicode(idx), expected) # multiple lines @@ -662,7 +661,7 @@ def test_string_categorical_index_repr(self): expected = u"""CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'], - categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" + categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa self.assertEqual(repr(idx), expected) else: @@ -670,7 +669,7 @@ def test_string_categorical_index_repr(self): u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう'], - categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category')""" + categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category')""" # noqa self.assertEqual(unicode(idx), expected) @@ -680,7 +679,7 @@ def test_string_categorical_index_repr(self): expected = u"""CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', ... 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'], - categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category', length=300)""" + categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category', length=300)""" # noqa self.assertEqual(repr(idx), expected) else: @@ -689,7 +688,7 @@ def test_string_categorical_index_repr(self): ... u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう'], - categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category', length=300)""" + categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category', length=300)""" # noqa self.assertEqual(unicode(idx), expected) @@ -698,13 +697,13 @@ def test_string_categorical_index_repr(self): if PY3: expected = u"""CategoricalIndex(['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', 'け', 'こ', 'さ', 'し', 'す', 'せ', 'そ'], - categories=['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', ...], ordered=False, dtype='category')""" + categories=['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', ...], ordered=False, dtype='category')""" # noqa self.assertEqual(repr(idx), expected) else: expected = u"""CategoricalIndex([u'あ', u'い', u'う', u'え', u'お', u'か', u'き', u'く', u'け', u'こ', u'さ', u'し', u'す', u'せ', u'そ'], - categories=[u'あ', u'い', u'う', u'え', u'お', u'か', u'き', u'く', ...], ordered=False, dtype='category')""" + categories=[u'あ', u'い', u'う', u'え', u'お', u'か', u'き', u'く', ...], ordered=False, dtype='category')""" # noqa self.assertEqual(unicode(idx), expected) @@ -714,10 +713,10 @@ def test_string_categorical_index_repr(self): # short idx = pd.CategoricalIndex([u'あ', u'いい', u'ううう']) if PY3: - expected = u"""CategoricalIndex(['あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" + expected = u"""CategoricalIndex(['あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa self.assertEqual(repr(idx), expected) else: - expected = u"""CategoricalIndex([u'あ', u'いい', u'ううう'], categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category')""" + expected = u"""CategoricalIndex([u'あ', u'いい', u'ううう'], categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category')""" # noqa self.assertEqual(unicode(idx), expected) # multiple lines @@ -727,7 +726,7 @@ def test_string_categorical_index_repr(self): 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'], - categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" + categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa self.assertEqual(repr(idx), expected) else: @@ -736,7 +735,7 @@ def test_string_categorical_index_repr(self): u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう'], - categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category')""" + categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category')""" # noqa self.assertEqual(unicode(idx), expected) @@ -748,7 +747,7 @@ def test_string_categorical_index_repr(self): ... 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'], - categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category', length=300)""" + categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category', length=300)""" # noqa self.assertEqual(repr(idx), expected) else: @@ -757,7 +756,7 @@ def test_string_categorical_index_repr(self): ... u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう'], - categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category', length=300)""" + categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category', length=300)""" # noqa self.assertEqual(unicode(idx), expected) @@ -766,13 +765,13 @@ def test_string_categorical_index_repr(self): if PY3: expected = u"""CategoricalIndex(['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', 'け', 'こ', 'さ', 'し', 'す', 'せ', 'そ'], - categories=['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', ...], ordered=False, dtype='category')""" + categories=['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', ...], ordered=False, dtype='category')""" # noqa self.assertEqual(repr(idx), expected) else: expected = u"""CategoricalIndex([u'あ', u'い', u'う', u'え', u'お', u'か', u'き', u'く', u'け', u'こ', u'さ', u'し', u'す', u'せ', u'そ'], - categories=[u'あ', u'い', u'う', u'え', u'お', u'か', u'き', u'く', ...], ordered=False, dtype='category')""" + categories=[u'あ', u'い', u'う', u'え', u'お', u'か', u'き', u'く', ...], ordered=False, dtype='category')""" # noqa self.assertEqual(unicode(idx), expected) diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index ea2697ec19df3..63c1ae70e35a6 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -10,7 +10,6 @@ from pandas.types.dtypes import CategoricalDtype from pandas.types.common import (is_categorical_dtype, - is_object_dtype, is_float_dtype, is_integer_dtype) @@ -25,9 +24,6 @@ from pandas.compat import range, lrange, u, PY3 from pandas.core.config import option_context -# GH 12066 -# flake8: noqa - class TestCategorical(tm.TestCase): @@ -291,7 +287,6 @@ def test_constructor_with_null(self): pd.Categorical(DatetimeIndex(['nat', '20160101']), categories=[NaT, Timestamp('20160101')]) - def test_constructor_with_index(self): ci = CategoricalIndex(list('aabbca'), categories=list('cab')) tm.assert_categorical_equal(ci.values, Categorical(ci)) @@ -710,8 +705,7 @@ def test_unicode_print(self): self.assertEqual(_rep(c), expected) - c = pd.Categorical([u'ああああ', u'いいいいい', u'ううううううう'] - * 20) + c = pd.Categorical([u'ああああ', u'いいいいい', u'ううううううう'] * 20) expected = u"""\ [ああああ, いいいいい, ううううううう, ああああ, いいいいい, ..., いいいいい, ううううううう, ああああ, いいいいい, ううううううう] Length: 60 @@ -723,8 +717,7 @@ def test_unicode_print(self): # the repr width with option_context('display.unicode.east_asian_width', True): - c = pd.Categorical([u'ああああ', u'いいいいい', u'ううううううう'] - * 20) + c = pd.Categorical([u'ああああ', u'いいいいい', u'ううううううう'] * 20) expected = u"""[ああああ, いいいいい, ううううううう, ああああ, いいいいい, ..., いいいいい, ううううううう, ああああ, いいいいい, ううううううう] Length: 60 Categories (3, object): [ああああ, いいいいい, ううううううう]""" # noqa @@ -1279,7 +1272,8 @@ def test_mode(self): s = Categorical([1, 2, 3, 4, 5], categories=[5, 4, 3, 2, 1], ordered=True) res = s.mode() - exp = Categorical([5, 4, 3, 2, 1], categories=[5, 4, 3, 2, 1], ordered=True) + exp = Categorical([5, 4, 3, 2, 1], + categories=[5, 4, 3, 2, 1], ordered=True) tm.assert_categorical_equal(res, exp) # NaN should not become the mode! s = Categorical([np.nan, np.nan, np.nan, 4, 5], @@ -2233,7 +2227,7 @@ def test_categorical_repr_datetime_ordered(self): exp = """[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00, 2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00] Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 < 2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 < - 2011-01-01 13:00:00-05:00]""" + 2011-01-01 13:00:00-05:00]""" # noqa self.assertEqual(repr(c), exp) @@ -2242,14 +2236,14 @@ def test_categorical_repr_period(self): c = pd.Categorical(idx) exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00] Categories (5, period[H]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, - 2011-01-01 13:00]""" + 2011-01-01 13:00]""" # noqa self.assertEqual(repr(c), exp) c = pd.Categorical(idx.append(idx), categories=idx) exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00, 2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00] Categories (5, period[H]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, - 2011-01-01 13:00]""" + 2011-01-01 13:00]""" # noqa self.assertEqual(repr(c), exp) @@ -2262,7 +2256,7 @@ def test_categorical_repr_period(self): c = pd.Categorical(idx.append(idx), categories=idx) exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05, 2011-01, 2011-02, 2011-03, 2011-04, 2011-05] -Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]""" +Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]""" # noqa self.assertEqual(repr(c), exp) @@ -2271,14 +2265,14 @@ def test_categorical_repr_period_ordered(self): c = pd.Categorical(idx, ordered=True) exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00] Categories (5, period[H]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 < - 2011-01-01 13:00]""" + 2011-01-01 13:00]""" # noqa self.assertEqual(repr(c), exp) c = pd.Categorical(idx.append(idx), categories=idx, ordered=True) exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00, 2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00] Categories (5, period[H]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 < - 2011-01-01 13:00]""" + 2011-01-01 13:00]""" # noqa self.assertEqual(repr(c), exp) @@ -2291,7 +2285,7 @@ def test_categorical_repr_period_ordered(self): c = pd.Categorical(idx.append(idx), categories=idx, ordered=True) exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05, 2011-01, 2011-02, 2011-03, 2011-04, 2011-05] -Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]""" +Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]""" # noqa self.assertEqual(repr(c), exp) @@ -2305,7 +2299,7 @@ def test_categorical_repr_timedelta(self): c = pd.Categorical(idx.append(idx), categories=idx) exp = """[1 days, 2 days, 3 days, 4 days, 5 days, 1 days, 2 days, 3 days, 4 days, 5 days] -Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]""" +Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]""" # noqa self.assertEqual(repr(c), exp) @@ -2315,7 +2309,7 @@ def test_categorical_repr_timedelta(self): Length: 20 Categories (20, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, ..., 16 days 01:00:00, 17 days 01:00:00, - 18 days 01:00:00, 19 days 01:00:00]""" + 18 days 01:00:00, 19 days 01:00:00]""" # noqa self.assertEqual(repr(c), exp) @@ -2324,7 +2318,7 @@ def test_categorical_repr_timedelta(self): Length: 40 Categories (20, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, ..., 16 days 01:00:00, 17 days 01:00:00, - 18 days 01:00:00, 19 days 01:00:00]""" + 18 days 01:00:00, 19 days 01:00:00]""" # noqa self.assertEqual(repr(c), exp) @@ -2332,13 +2326,13 @@ def test_categorical_repr_timedelta_ordered(self): idx = pd.timedelta_range('1 days', periods=5) c = pd.Categorical(idx, ordered=True) exp = """[1 days, 2 days, 3 days, 4 days, 5 days] -Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" +Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" # noqa self.assertEqual(repr(c), exp) c = pd.Categorical(idx.append(idx), categories=idx, ordered=True) exp = """[1 days, 2 days, 3 days, 4 days, 5 days, 1 days, 2 days, 3 days, 4 days, 5 days] -Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" +Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" # noqa self.assertEqual(repr(c), exp) @@ -2348,7 +2342,7 @@ def test_categorical_repr_timedelta_ordered(self): Length: 20 Categories (20, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 < 3 days 01:00:00 ... 16 days 01:00:00 < 17 days 01:00:00 < - 18 days 01:00:00 < 19 days 01:00:00]""" + 18 days 01:00:00 < 19 days 01:00:00]""" # noqa self.assertEqual(repr(c), exp) @@ -2357,7 +2351,7 @@ def test_categorical_repr_timedelta_ordered(self): Length: 40 Categories (20, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 < 3 days 01:00:00 ... 16 days 01:00:00 < 17 days 01:00:00 < - 18 days 01:00:00 < 19 days 01:00:00]""" + 18 days 01:00:00 < 19 days 01:00:00]""" # noqa self.assertEqual(repr(c), exp) @@ -2423,7 +2417,7 @@ def test_categorical_series_repr_datetime(self): 4 2011-01-01 13:00:00 dtype: category Categories (5, datetime64[ns]): [2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, - 2011-01-01 12:00:00, 2011-01-01 13:00:00]""" + 2011-01-01 12:00:00, 2011-01-01 13:00:00]""" # noqa self.assertEqual(repr(s), exp) @@ -2438,7 +2432,7 @@ def test_categorical_series_repr_datetime(self): dtype: category Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, - 2011-01-01 13:00:00-05:00]""" + 2011-01-01 13:00:00-05:00]""" # noqa self.assertEqual(repr(s), exp) @@ -2452,7 +2446,7 @@ def test_categorical_series_repr_datetime_ordered(self): 4 2011-01-01 13:00:00 dtype: category Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 < - 2011-01-01 12:00:00 < 2011-01-01 13:00:00]""" + 2011-01-01 12:00:00 < 2011-01-01 13:00:00]""" # noqa self.assertEqual(repr(s), exp) @@ -2467,7 +2461,7 @@ def test_categorical_series_repr_datetime_ordered(self): dtype: category Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 < 2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 < - 2011-01-01 13:00:00-05:00]""" + 2011-01-01 13:00:00-05:00]""" # noqa self.assertEqual(repr(s), exp) @@ -2481,7 +2475,7 @@ def test_categorical_series_repr_period(self): 4 2011-01-01 13:00 dtype: category Categories (5, period[H]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, - 2011-01-01 13:00]""" + 2011-01-01 13:00]""" # noqa self.assertEqual(repr(s), exp) @@ -2507,7 +2501,7 @@ def test_categorical_series_repr_period_ordered(self): 4 2011-01-01 13:00 dtype: category Categories (5, period[H]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 < - 2011-01-01 13:00]""" + 2011-01-01 13:00]""" # noqa self.assertEqual(repr(s), exp) @@ -2551,7 +2545,7 @@ def test_categorical_series_repr_timedelta(self): dtype: category Categories (10, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, ..., 6 days 01:00:00, 7 days 01:00:00, - 8 days 01:00:00, 9 days 01:00:00]""" + 8 days 01:00:00, 9 days 01:00:00]""" # noqa self.assertEqual(repr(s), exp) @@ -2564,7 +2558,7 @@ def test_categorical_series_repr_timedelta_ordered(self): 3 4 days 4 5 days dtype: category -Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" +Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" # noqa self.assertEqual(repr(s), exp) @@ -2583,26 +2577,26 @@ def test_categorical_series_repr_timedelta_ordered(self): dtype: category Categories (10, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 < 3 days 01:00:00 ... 6 days 01:00:00 < 7 days 01:00:00 < - 8 days 01:00:00 < 9 days 01:00:00]""" + 8 days 01:00:00 < 9 days 01:00:00]""" # noqa self.assertEqual(repr(s), exp) def test_categorical_index_repr(self): idx = pd.CategoricalIndex(pd.Categorical([1, 2, 3])) - exp = """CategoricalIndex([1, 2, 3], categories=[1, 2, 3], ordered=False, dtype='category')""" + exp = """CategoricalIndex([1, 2, 3], categories=[1, 2, 3], ordered=False, dtype='category')""" # noqa self.assertEqual(repr(idx), exp) i = pd.CategoricalIndex(pd.Categorical(np.arange(10))) - exp = """CategoricalIndex([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], categories=[0, 1, 2, 3, 4, 5, 6, 7, ...], ordered=False, dtype='category')""" + exp = """CategoricalIndex([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], categories=[0, 1, 2, 3, 4, 5, 6, 7, ...], ordered=False, dtype='category')""" # noqa self.assertEqual(repr(i), exp) def test_categorical_index_repr_ordered(self): i = pd.CategoricalIndex(pd.Categorical([1, 2, 3], ordered=True)) - exp = """CategoricalIndex([1, 2, 3], categories=[1, 2, 3], ordered=True, dtype='category')""" + exp = """CategoricalIndex([1, 2, 3], categories=[1, 2, 3], ordered=True, dtype='category')""" # noqa self.assertEqual(repr(i), exp) i = pd.CategoricalIndex(pd.Categorical(np.arange(10), ordered=True)) - exp = """CategoricalIndex([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], categories=[0, 1, 2, 3, 4, 5, 6, 7, ...], ordered=True, dtype='category')""" + exp = """CategoricalIndex([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], categories=[0, 1, 2, 3, 4, 5, 6, 7, ...], ordered=True, dtype='category')""" # noqa self.assertEqual(repr(i), exp) def test_categorical_index_repr_datetime(self): @@ -2611,7 +2605,7 @@ def test_categorical_index_repr_datetime(self): exp = """CategoricalIndex(['2011-01-01 09:00:00', '2011-01-01 10:00:00', '2011-01-01 11:00:00', '2011-01-01 12:00:00', '2011-01-01 13:00:00'], - categories=[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00], ordered=False, dtype='category')""" + categories=[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00], ordered=False, dtype='category')""" # noqa self.assertEqual(repr(i), exp) @@ -2621,7 +2615,7 @@ def test_categorical_index_repr_datetime(self): exp = """CategoricalIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00', '2011-01-01 11:00:00-05:00', '2011-01-01 12:00:00-05:00', '2011-01-01 13:00:00-05:00'], - categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=False, dtype='category')""" + categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=False, dtype='category')""" # noqa self.assertEqual(repr(i), exp) @@ -2631,7 +2625,7 @@ def test_categorical_index_repr_datetime_ordered(self): exp = """CategoricalIndex(['2011-01-01 09:00:00', '2011-01-01 10:00:00', '2011-01-01 11:00:00', '2011-01-01 12:00:00', '2011-01-01 13:00:00'], - categories=[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00], ordered=True, dtype='category')""" + categories=[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00], ordered=True, dtype='category')""" # noqa self.assertEqual(repr(i), exp) @@ -2641,7 +2635,7 @@ def test_categorical_index_repr_datetime_ordered(self): exp = """CategoricalIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00', '2011-01-01 11:00:00-05:00', '2011-01-01 12:00:00-05:00', '2011-01-01 13:00:00-05:00'], - categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=True, dtype='category')""" + categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=True, dtype='category')""" # noqa self.assertEqual(repr(i), exp) @@ -2651,7 +2645,7 @@ def test_categorical_index_repr_datetime_ordered(self): '2011-01-01 13:00:00-05:00', '2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00', '2011-01-01 11:00:00-05:00', '2011-01-01 12:00:00-05:00', '2011-01-01 13:00:00-05:00'], - categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=True, dtype='category')""" + categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=True, dtype='category')""" # noqa self.assertEqual(repr(i), exp) @@ -2659,24 +2653,24 @@ def test_categorical_index_repr_period(self): # test all length idx = pd.period_range('2011-01-01 09:00', freq='H', periods=1) i = pd.CategoricalIndex(pd.Categorical(idx)) - exp = """CategoricalIndex(['2011-01-01 09:00'], categories=[2011-01-01 09:00], ordered=False, dtype='category')""" + exp = """CategoricalIndex(['2011-01-01 09:00'], categories=[2011-01-01 09:00], ordered=False, dtype='category')""" # noqa self.assertEqual(repr(i), exp) idx = pd.period_range('2011-01-01 09:00', freq='H', periods=2) i = pd.CategoricalIndex(pd.Categorical(idx)) - exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00'], categories=[2011-01-01 09:00, 2011-01-01 10:00], ordered=False, dtype='category')""" + exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00'], categories=[2011-01-01 09:00, 2011-01-01 10:00], ordered=False, dtype='category')""" # noqa self.assertEqual(repr(i), exp) idx = pd.period_range('2011-01-01 09:00', freq='H', periods=3) i = pd.CategoricalIndex(pd.Categorical(idx)) - exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00'], categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00], ordered=False, dtype='category')""" + exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00'], categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00], ordered=False, dtype='category')""" # noqa self.assertEqual(repr(i), exp) idx = pd.period_range('2011-01-01 09:00', freq='H', periods=5) i = pd.CategoricalIndex(pd.Categorical(idx)) exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00', '2011-01-01 12:00', '2011-01-01 13:00'], - categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=False, dtype='category')""" + categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=False, dtype='category')""" # noqa self.assertEqual(repr(i), exp) @@ -2685,13 +2679,13 @@ def test_categorical_index_repr_period(self): '2011-01-01 12:00', '2011-01-01 13:00', '2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00', '2011-01-01 12:00', '2011-01-01 13:00'], - categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=False, dtype='category')""" + categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=False, dtype='category')""" # noqa self.assertEqual(repr(i), exp) idx = pd.period_range('2011-01', freq='M', periods=5) i = pd.CategoricalIndex(pd.Categorical(idx)) - exp = """CategoricalIndex(['2011-01', '2011-02', '2011-03', '2011-04', '2011-05'], categories=[2011-01, 2011-02, 2011-03, 2011-04, 2011-05], ordered=False, dtype='category')""" + exp = """CategoricalIndex(['2011-01', '2011-02', '2011-03', '2011-04', '2011-05'], categories=[2011-01, 2011-02, 2011-03, 2011-04, 2011-05], ordered=False, dtype='category')""" # noqa self.assertEqual(repr(i), exp) def test_categorical_index_repr_period_ordered(self): @@ -2699,19 +2693,19 @@ def test_categorical_index_repr_period_ordered(self): i = pd.CategoricalIndex(pd.Categorical(idx, ordered=True)) exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00', '2011-01-01 12:00', '2011-01-01 13:00'], - categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=True, dtype='category')""" + categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=True, dtype='category')""" # noqa self.assertEqual(repr(i), exp) idx = pd.period_range('2011-01', freq='M', periods=5) i = pd.CategoricalIndex(pd.Categorical(idx, ordered=True)) - exp = """CategoricalIndex(['2011-01', '2011-02', '2011-03', '2011-04', '2011-05'], categories=[2011-01, 2011-02, 2011-03, 2011-04, 2011-05], ordered=True, dtype='category')""" + exp = """CategoricalIndex(['2011-01', '2011-02', '2011-03', '2011-04', '2011-05'], categories=[2011-01, 2011-02, 2011-03, 2011-04, 2011-05], ordered=True, dtype='category')""" # noqa self.assertEqual(repr(i), exp) def test_categorical_index_repr_timedelta(self): idx = pd.timedelta_range('1 days', periods=5) i = pd.CategoricalIndex(pd.Categorical(idx)) - exp = """CategoricalIndex(['1 days', '2 days', '3 days', '4 days', '5 days'], categories=[1 days 00:00:00, 2 days 00:00:00, 3 days 00:00:00, 4 days 00:00:00, 5 days 00:00:00], ordered=False, dtype='category')""" + exp = """CategoricalIndex(['1 days', '2 days', '3 days', '4 days', '5 days'], categories=[1 days 00:00:00, 2 days 00:00:00, 3 days 00:00:00, 4 days 00:00:00, 5 days 00:00:00], ordered=False, dtype='category')""" # noqa self.assertEqual(repr(i), exp) idx = pd.timedelta_range('1 hours', periods=10) @@ -2720,14 +2714,14 @@ def test_categorical_index_repr_timedelta(self): '3 days 01:00:00', '4 days 01:00:00', '5 days 01:00:00', '6 days 01:00:00', '7 days 01:00:00', '8 days 01:00:00', '9 days 01:00:00'], - categories=[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, 5 days 01:00:00, 6 days 01:00:00, 7 days 01:00:00, ...], ordered=False, dtype='category')""" + categories=[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, 5 days 01:00:00, 6 days 01:00:00, 7 days 01:00:00, ...], ordered=False, dtype='category')""" # noqa self.assertEqual(repr(i), exp) def test_categorical_index_repr_timedelta_ordered(self): idx = pd.timedelta_range('1 days', periods=5) i = pd.CategoricalIndex(pd.Categorical(idx, ordered=True)) - exp = """CategoricalIndex(['1 days', '2 days', '3 days', '4 days', '5 days'], categories=[1 days 00:00:00, 2 days 00:00:00, 3 days 00:00:00, 4 days 00:00:00, 5 days 00:00:00], ordered=True, dtype='category')""" + exp = """CategoricalIndex(['1 days', '2 days', '3 days', '4 days', '5 days'], categories=[1 days 00:00:00, 2 days 00:00:00, 3 days 00:00:00, 4 days 00:00:00, 5 days 00:00:00], ordered=True, dtype='category')""" # noqa self.assertEqual(repr(i), exp) idx = pd.timedelta_range('1 hours', periods=10) @@ -2736,7 +2730,7 @@ def test_categorical_index_repr_timedelta_ordered(self): '3 days 01:00:00', '4 days 01:00:00', '5 days 01:00:00', '6 days 01:00:00', '7 days 01:00:00', '8 days 01:00:00', '9 days 01:00:00'], - categories=[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, 5 days 01:00:00, 6 days 01:00:00, 7 days 01:00:00, ...], ordered=True, dtype='category')""" + categories=[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, 5 days 01:00:00, 6 days 01:00:00, 7 days 01:00:00, ...], ordered=True, dtype='category')""" # noqa self.assertEqual(repr(i), exp) @@ -2833,7 +2827,8 @@ def test_mode(self): s = Series(Categorical([1, 2, 3, 4, 5], categories=[5, 4, 3, 2, 1], ordered=True)) res = s.mode() - exp = Series(Categorical([5, 4, 3, 2, 1], categories=[5, 4, 3, 2, 1], ordered=True)) + exp = Series(Categorical([5, 4, 3, 2, 1], categories=[5, 4, 3, 2, 1], + ordered=True)) tm.assert_series_equal(res, exp) def test_value_counts(self): @@ -4275,10 +4270,10 @@ def test_str_accessor_api_for_categorical(self): # * `translate` has different interfaces for py2 vs. py3 _ignore_names = ["get", "join", "translate"] - str_func_names = [f - for f in dir(s.str) - if not (f.startswith("_") or f in _special_func_names - or f in _ignore_names)] + str_func_names = [f for f in dir(s.str) if not ( + f.startswith("_") or + f in _special_func_names or + f in _ignore_names)] func_defs = [(f, (), {}) for f in str_func_names] func_defs.extend(special_func_defs) @@ -4418,10 +4413,3 @@ def test_map(self): self.assertIsInstance(res, tm.SubclassedCategorical) exp = Categorical(['A', 'B', 'C']) tm.assert_categorical_equal(res, exp) - - def test_map(self): - sc = tm.SubclassedCategorical(['a', 'b', 'c']) - res = sc.map(lambda x: x.upper()) - self.assertIsInstance(res, tm.SubclassedCategorical) - exp = Categorical(['A', 'B', 'C']) - tm.assert_categorical_equal(res, exp) diff --git a/pandas/tseries/common.py b/pandas/tseries/common.py index 7940efc7e1b59..955edce2591e6 100644 --- a/pandas/tseries/common.py +++ b/pandas/tseries/common.py @@ -4,8 +4,7 @@ import numpy as np -from pandas.types.common import (_NS_DTYPE, _TD_DTYPE, - is_period_arraylike, +from pandas.types.common import (is_period_arraylike, is_datetime_arraylike, is_integer_dtype, is_datetime64_dtype, is_datetime64tz_dtype, is_timedelta64_dtype, is_categorical_dtype, @@ -13,7 +12,7 @@ from pandas.core.base import PandasDelegate, NoNewAttributesMixin from pandas.tseries.index import DatetimeIndex -from pandas._libs.period import IncompatibleFrequency # flake8: noqa +from pandas._libs.period import IncompatibleFrequency # noqa from pandas.tseries.period import PeriodIndex from pandas.tseries.tdi import TimedeltaIndex from pandas.core.algorithms import take_1d @@ -162,6 +161,7 @@ class DatetimeProperties(Properties): def to_pydatetime(self): return self.values.to_pydatetime() + DatetimeProperties._add_delegate_accessors( delegate=DatetimeIndex, accessors=DatetimeIndex._datetimelike_ops, @@ -201,6 +201,7 @@ def components(self): """ return self.values.components.set_index(self.index) + TimedeltaProperties._add_delegate_accessors( delegate=TimedeltaIndex, accessors=TimedeltaIndex._datetimelike_ops, @@ -225,6 +226,7 @@ class PeriodProperties(Properties): Raises TypeError if the Series does not contain datetimelike values. """ + PeriodProperties._add_delegate_accessors( delegate=PeriodIndex, accessors=PeriodIndex._datetimelike_ops, From cd24fa95f1781b14d35eac4953bab02691fd9d04 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 2 Apr 2017 18:47:11 -0400 Subject: [PATCH 323/353] ENH: add origin to to_datetime closes #11276 closes #11745 superseded #11470 Author: Jeff Reback Author: Sumit Binnani Closes #15828 from jreback/datetime-unit and squashes the following commits: ebb4acd [Jeff Reback] doc fixes & cleanup 209591a [Jeff Reback] bug fix 56663a5 [Jeff Reback] add Timedelta floordiv ops a24e88c [Jeff Reback] rename epoch -> unix 6a8a779 [Jeff Reback] update docs / tests ad7356e [Sumit Binnani] BUG: Series creation with datetime64 with non-ns unit as object dtype --- doc/source/timeseries.rst | 26 ++++- doc/source/whatsnew/v0.20.0.txt | 24 +++- pandas/_libs/tslib.pyx | 39 ++++++- pandas/tests/indexes/datetimes/test_tools.py | 117 +++++++++++++++++++ pandas/tests/indexes/timedeltas/test_ops.py | 13 ++- pandas/tests/scalar/test_timedelta.py | 10 ++ pandas/tseries/tdi.py | 7 +- pandas/tseries/tools.py | 107 ++++++++++++++--- 8 files changed, 317 insertions(+), 26 deletions(-) diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index 7136b15a7633a..44c200e13b877 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -252,7 +252,8 @@ Epoch Timestamps It's also possible to convert integer or float epoch times. The default unit for these is nanoseconds (since these are how ``Timestamp`` s are stored). However, -often epochs are stored in another ``unit`` which can be specified: +often epochs are stored in another ``unit`` which can be specified. These are computed +from the starting point specified by the :ref:`Origin Parameter `. Typical epoch stored units @@ -276,6 +277,29 @@ These *work*, but the results may be unexpected. Epoch times will be rounded to the nearest nanosecond. +.. _timeseries.origin: + +Using the Origin Parameter +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. versionadded:: 0.20.0 + +Using the ``origin`` parameter, one can specify an alternative starting point for creation +of a ``DatetimeIndex``. + +Start with 1960-01-01 as the starting date + +.. ipython:: python + + pd.to_datetime([1, 2, 3], unit='D', origin=pd.Timestamp('1960-01-01')) + +The default is set at ``origin='unix'``, which defaults to ``1970-01-01 00:00:00``. +Commonly called 'unix epoch' or POSIX time. + +.. ipython:: python + + pd.to_datetime([1, 2, 3], unit='D') + .. _timeseries.daterange: Generating Ranges of Timestamps diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 781a912555e14..ceb8f0f5fabe4 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -27,7 +27,6 @@ Check the :ref:`API Changes ` and :ref:`deprecations New features ~~~~~~~~~~~~ - .. _whatsnew_0200.enhancements.dataio_dtype: ``dtype`` keyword for data IO @@ -55,6 +54,27 @@ fixed-width text files, and :func:`read_excel` for parsing Excel files. pd.read_fwf(StringIO(data)).dtypes pd.read_fwf(StringIO(data), dtype={'a':'float64', 'b':'object'}).dtypes +.. _whatsnew_0120.enhancements.datetime_origin: + +``.to_datetime()`` has gained an ``origin`` parameter +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:func:`to_datetime` has gained a new parameter, ``origin``, to define a reference date +from where to compute the resulting ``DatetimeIndex``. (:issue:`11276`, :issue:`11745`) + +Start with 1960-01-01 as the starting date + +.. ipython:: python + + pd.to_datetime([1, 2, 3], unit='D', origin=pd.Timestamp('1960-01-01')) + +The default is set at ``origin='unix'``, which defaults to ``1970-01-01 00:00:00``. +Commonly called 'unix epoch' or POSIX time. + +.. ipython:: python + + pd.to_datetime([1, 2, 3], unit='D') + .. _whatsnew_0200.enhancements.groupby_access: Groupby Enhancements @@ -317,7 +337,7 @@ Other Enhancements - ``pd.DataFrame.to_latex`` and ``pd.DataFrame.to_string`` now allow optional header aliases. (:issue:`15536`) - Re-enable the ``parse_dates`` keyword of ``read_excel`` to parse string columns as dates (:issue:`14326`) - Added ``.empty`` property to subclasses of ``Index``. (:issue:`15270`) - +- Enabled floor division for ``Timedelta`` and ``TimedeltaIndex`` (:issue:`15828`) - ``pandas.io.json.json_normalize()`` gained the option ``errors='ignore'|'raise'``; the default is ``errors='raise'`` which is backward compatible. (:issue:`14583`) - ``pandas.io.json.json_normalize()`` with an empty ``list`` will return an empty ``DataFrame`` (:issue:`15534`) - ``pandas.io.json.json_normalize()`` has gained a ``sep`` option that accepts ``str`` to separate joined fields; the default is ".", which is backward compatible. (:issue:`14883`) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 5aa8e15d0d087..cc1439711c1d4 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -3073,6 +3073,7 @@ class Timedelta(_Timedelta): return np.timedelta64(self.value, 'ns') def _validate_ops_compat(self, other): + # return True if we are compat with operating if _checknull_with_nat(other): return True @@ -3179,11 +3180,41 @@ class Timedelta(_Timedelta): __div__ = __truediv__ __rdiv__ = __rtruediv__ - def _not_implemented(self, *args, **kwargs): - return NotImplemented + def __floordiv__(self, other): + + if hasattr(other, 'dtype'): + + # work with i8 + other = other.astype('m8[ns]').astype('i8') + + return self.value // other - __floordiv__ = _not_implemented - __rfloordiv__ = _not_implemented + # integers only + if is_integer_object(other): + return Timedelta(self.value // other, unit='ns') + + if not self._validate_ops_compat(other): + return NotImplemented + + other = Timedelta(other) + if other is NaT: + return np.nan + return self.value // other.value + + def __rfloordiv__(self, other): + if hasattr(other, 'dtype'): + + # work with i8 + other = other.astype('m8[ns]').astype('i8') + return other // self.value + + if not self._validate_ops_compat(other): + return NotImplemented + + other = Timedelta(other) + if other is NaT: + return NaT + return other.value // self.value def _op_unary_method(func, name): diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 512a3e1c38629..02630c76abb93 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -1515,3 +1515,120 @@ def test_normalize_date(): result = normalize_date(value) assert (result == datetime(2012, 9, 7)) + + +@pytest.fixture(params=['D', 's', 'ms', 'us', 'ns']) +def units(request): + return request.param + + +@pytest.fixture +def epoch_1960(): + # for origin as 1960-01-01 + return Timestamp('1960-01-01') + + +@pytest.fixture +def units_from_epochs(): + return list(range(5)) + + +@pytest.fixture(params=[epoch_1960(), epoch_1960().to_datetime(), + epoch_1960().to_datetime64(), + str(epoch_1960())]) +def epochs(request): + return request.param + + +@pytest.fixture +def julian_dates(): + return pd.date_range('2014-1-1', periods=10).to_julian_date().values + + +class TestOrigin(object): + + def test_to_basic(self, julian_dates): + # gh-11276, gh-11745 + # for origin as julian + + result = Series(pd.to_datetime( + julian_dates, unit='D', origin='julian')) + expected = Series(pd.to_datetime( + julian_dates - pd.Timestamp(0).to_julian_date(), unit='D')) + assert_series_equal(result, expected) + + result = Series(pd.to_datetime( + [0, 1, 2], unit='D', origin='unix')) + expected = Series([Timestamp('1970-01-01'), + Timestamp('1970-01-02'), + Timestamp('1970-01-03')]) + assert_series_equal(result, expected) + + # default + result = Series(pd.to_datetime( + [0, 1, 2], unit='D')) + expected = Series([Timestamp('1970-01-01'), + Timestamp('1970-01-02'), + Timestamp('1970-01-03')]) + assert_series_equal(result, expected) + + def test_julian_round_trip(self): + result = pd.to_datetime(2456658, origin='julian', unit='D') + assert result.to_julian_date() == 2456658 + + # out-of-bounds + with pytest.raises(ValueError): + pd.to_datetime(1, origin="julian", unit='D') + + def test_invalid_unit(self, units, julian_dates): + + # checking for invalid combination of origin='julian' and unit != D + if units != 'D': + with pytest.raises(ValueError): + pd.to_datetime(julian_dates, unit=units, origin='julian') + + def test_invalid_origin(self): + + # need to have a numeric specified + with pytest.raises(ValueError): + pd.to_datetime("2005-01-01", origin="1960-01-01") + + with pytest.raises(ValueError): + pd.to_datetime("2005-01-01", origin="1960-01-01", unit='D') + + def test_epoch(self, units, epochs, epoch_1960, units_from_epochs): + + expected = Series( + [pd.Timedelta(x, unit=units) + + epoch_1960 for x in units_from_epochs]) + + result = Series(pd.to_datetime( + units_from_epochs, unit=units, origin=epochs)) + assert_series_equal(result, expected) + + @pytest.mark.parametrize("origin, exc", + [('random_string', ValueError), + ('epoch', ValueError), + ('13-24-1990', ValueError), + (datetime(1, 1, 1), tslib.OutOfBoundsDatetime)]) + def test_invalid_origins(self, origin, exc, units, units_from_epochs): + + with pytest.raises(exc): + pd.to_datetime(units_from_epochs, unit=units, + origin=origin) + + def test_processing_order(self): + # make sure we handle out-of-bounds *before* + # constructing the dates + + result = pd.to_datetime(200 * 365, unit='D') + expected = Timestamp('2169-11-13 00:00:00') + assert result == expected + + result = pd.to_datetime(200 * 365, unit='D', origin='1870-01-01') + expected = Timestamp('2069-11-13 00:00:00') + assert result == expected + + result = pd.to_datetime(300 * 365, unit='D', origin='1870-01-01') + expected = Timestamp('2169-10-20 00:00:00') + assert result == expected diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index 2e9f11297dc83..36aac8cafecc1 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -284,6 +284,12 @@ def test_ops_compat(self): result = rng / offset tm.assert_index_equal(result, expected, exact=False) + # floor divide + expected = Int64Index((np.arange(10) + 1) * 12, name='foo') + for offset in offsets: + result = rng // offset + tm.assert_index_equal(result, expected, exact=False) + # divide with nats rng = TimedeltaIndex(['1 days', pd.NaT, '2 days'], name='foo') expected = Float64Index([12, np.nan, 24], name='foo') @@ -867,10 +873,12 @@ def test_ops(self): self.assertEqual(td * 2, Timedelta(20, unit='d')) self.assertTrue((td * pd.NaT) is pd.NaT) self.assertEqual(td / 2, Timedelta(5, unit='d')) + self.assertEqual(td // 2, Timedelta(5, unit='d')) self.assertEqual(abs(td), td) self.assertEqual(abs(-td), td) self.assertEqual(td / td, 1) self.assertTrue((td / pd.NaT) is np.nan) + self.assertTrue((td // pd.NaT) is np.nan) # invert self.assertEqual(-td, Timedelta('-10d')) @@ -878,9 +886,6 @@ def test_ops(self): self.assertEqual(-1 * td, Timedelta('-10d')) self.assertEqual(abs(-td), Timedelta('10d')) - # invalid - self.assertRaises(TypeError, lambda: Timedelta(11, unit='d') // 2) - # invalid multiply with another timedelta self.assertRaises(TypeError, lambda: td * td) @@ -991,7 +996,7 @@ class Other: self.assertTrue(td.__sub__(other) is NotImplemented) self.assertTrue(td.__truediv__(other) is NotImplemented) self.assertTrue(td.__mul__(other) is NotImplemented) - self.assertTrue(td.__floordiv__(td) is NotImplemented) + self.assertTrue(td.__floordiv__(other) is NotImplemented) def test_ops_error_str(self): # GH 13624 diff --git a/pandas/tests/scalar/test_timedelta.py b/pandas/tests/scalar/test_timedelta.py index c2b895925b685..c22d1d2329fba 100644 --- a/pandas/tests/scalar/test_timedelta.py +++ b/pandas/tests/scalar/test_timedelta.py @@ -216,6 +216,7 @@ def test_conversion(self): def test_freq_conversion(self): + # truediv td = Timedelta('1 days 2 hours 3 ns') result = td / np.timedelta64(1, 'D') self.assertEqual(result, td.value / float(86400 * 1e9)) @@ -224,6 +225,15 @@ def test_freq_conversion(self): result = td / np.timedelta64(1, 'ns') self.assertEqual(result, td.value) + # floordiv + td = Timedelta('1 days 2 hours 3 ns') + result = td // np.timedelta64(1, 'D') + self.assertEqual(result, 1) + result = td // np.timedelta64(1, 's') + self.assertEqual(result, 93600) + result = td // np.timedelta64(1, 'ns') + self.assertEqual(result, td.value) + def test_fields(self): def check(value): # that we are int/long like diff --git a/pandas/tseries/tdi.py b/pandas/tseries/tdi.py index 5d062dd38f9fc..d0f373fcc5a45 100644 --- a/pandas/tseries/tdi.py +++ b/pandas/tseries/tdi.py @@ -326,7 +326,7 @@ def _add_delta(self, delta): def _evaluate_with_timedelta_like(self, other, op, opstr): # allow division by a timedelta - if opstr in ['__div__', '__truediv__']: + if opstr in ['__div__', '__truediv__', '__floordiv__']: if _is_convertible_to_td(other): other = Timedelta(other) if isnull(other): @@ -334,7 +334,10 @@ def _evaluate_with_timedelta_like(self, other, op, opstr): "division by pd.NaT not implemented") i8 = self.asi8 - result = i8 / float(other.value) + if opstr in ['__floordiv__']: + result = i8 // other.value + else: + result = op(i8, float(other.value)) result = self._maybe_mask_results(result, convert='float64') return Index(result, name=self.name, copy=False) diff --git a/pandas/tseries/tools.py b/pandas/tseries/tools.py index 5dc9746c6d6f9..d0f1671f9e309 100644 --- a/pandas/tseries/tools.py +++ b/pandas/tseries/tools.py @@ -9,7 +9,11 @@ is_datetime64_dtype, is_datetime64tz_dtype, is_integer_dtype, - is_list_like) + is_integer, + is_float, + is_list_like, + is_scalar, + is_numeric_dtype) from pandas.types.generic import (ABCIndexClass, ABCSeries, ABCDataFrame) from pandas.types.missing import notnull @@ -177,7 +181,7 @@ def _guess_datetime_format_for_array(arr, **kwargs): def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, utc=None, box=True, format=None, exact=True, - unit=None, infer_datetime_format=False): + unit=None, infer_datetime_format=False, origin='unix'): """ Convert argument to datetime. @@ -229,13 +233,27 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, - If False, allow the format to match anywhere in the target string. unit : string, default 'ns' - unit of the arg (D,s,ms,us,ns) denote the unit in epoch - (e.g. a unix timestamp), which is an integer/float number. + unit of the arg (D,s,ms,us,ns) denote the unit, which is an + integer or float number. This will be based off the origin. + Example, with unit='ms' and origin='unix' (the default), this + would calculate the number of milliseconds to the unix epoch start. infer_datetime_format : boolean, default False If True and no `format` is given, attempt to infer the format of the datetime strings, and if it can be inferred, switch to a faster method of parsing them. In some cases this can increase the parsing speed by ~5-10x. + origin : scalar, default is 'unix' + Define the reference date. The numeric values would be parsed as number + of units (defined by `unit`) since this reference date. + + - If 'unix' (or POSIX) time; origin is set to 1970-01-01. + - If 'julian', unit must be 'D', and origin is set to beginning of + Julian Calendar. Julian day number 0 is assigned to the day starting + at noon on January 1, 4713 BC. + - If Timestamp convertible, origin is set to Timestamp identified by + origin. + + .. versionadded: 0.20.0 Returns ------- @@ -297,8 +315,15 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, >>> %timeit pd.to_datetime(s,infer_datetime_format=False) 1 loop, best of 3: 471 ms per loop - """ + Using a non-unix epoch origin + + >>> pd.to_datetime([1, 2, 3], unit='D', + origin=pd.Timestamp('1960-01-01')) + 0 1960-01-02 + 1 1960-01-03 + 2 1960-01-04 + """ from pandas.tseries.index import DatetimeIndex tz = 'utc' if utc else None @@ -410,21 +435,77 @@ def _convert_listlike(arg, box, format, name=None, tz=tz): raise e if arg is None: - return arg - elif isinstance(arg, tslib.Timestamp): - return arg + return None + + # handle origin + if origin == 'julian': + + original = arg + j0 = tslib.Timestamp(0).to_julian_date() + if unit != 'D': + raise ValueError("unit must be 'D' for origin='julian'") + try: + arg = arg - j0 + except: + raise ValueError("incompatible 'arg' type for given " + "'origin'='julian'") + + # premptively check this for a nice range + j_max = tslib.Timestamp.max.to_julian_date() - j0 + j_min = tslib.Timestamp.min.to_julian_date() - j0 + if np.any(arg > j_max) or np.any(arg < j_min): + raise tslib.OutOfBoundsDatetime( + "{original} is Out of Bounds for " + "origin='julian'".format(original=original)) + + elif origin not in ['unix', 'julian']: + + # arg must be a numeric + original = arg + if not ((is_scalar(arg) and (is_integer(arg) or is_float(arg))) or + is_numeric_dtype(np.asarray(arg))): + raise ValueError( + "'{arg}' is not compatible with origin='{origin}'; " + "it must be numeric with a unit specified ".format( + arg=arg, + origin=origin)) + + # we are going to offset back to unix / epoch time + try: + offset = tslib.Timestamp(origin) - tslib.Timestamp(0) + except tslib.OutOfBoundsDatetime: + raise tslib.OutOfBoundsDatetime( + "origin {} is Out of Bounds".format(origin)) + except ValueError: + raise ValueError("origin {} cannot be converted " + "to a Timestamp".format(origin)) + + # convert the offset to the unit of the arg + # this should be lossless in terms of precision + offset = offset // tslib.Timedelta(1, unit=unit) + + # scalars & ndarray-like can handle the addition + if is_list_like(arg) and not isinstance( + arg, (ABCSeries, ABCIndexClass, np.ndarray)): + arg = np.asarray(arg) + arg = arg + offset + + if isinstance(arg, tslib.Timestamp): + result = arg elif isinstance(arg, ABCSeries): from pandas import Series values = _convert_listlike(arg._values, False, format) - return Series(values, index=arg.index, name=arg.name) + result = Series(values, index=arg.index, name=arg.name) elif isinstance(arg, (ABCDataFrame, MutableMapping)): - return _assemble_from_unit_mappings(arg, errors=errors) + result = _assemble_from_unit_mappings(arg, errors=errors) elif isinstance(arg, ABCIndexClass): - return _convert_listlike(arg, box, format, name=arg.name) + result = _convert_listlike(arg, box, format, name=arg.name) elif is_list_like(arg): - return _convert_listlike(arg, box, format) + result = _convert_listlike(arg, box, format) + else: + result = _convert_listlike(np.array([arg]), box, format)[0] - return _convert_listlike(np.array([arg]), box, format)[0] + return result # mappings for assembling units From f49f9058d152efc9a309e01541762407e16dc953 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 3 Apr 2017 08:24:23 -0400 Subject: [PATCH 324/353] DOC: doc fix for feather_format error message text --- pandas/io/feather_format.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/io/feather_format.py b/pandas/io/feather_format.py index ac74ac4823613..de6d04c105376 100644 --- a/pandas/io/feather_format.py +++ b/pandas/io/feather_format.py @@ -56,15 +56,16 @@ def to_feather(df, path): # raise on anything else as we don't serialize the index if not isinstance(df.index, Int64Index): - raise ValueError("feather does not serializing {} " + raise ValueError("feather does not support serializing {} " "for the index; you can .reset_index()" "to make the index into column(s)".format( type(df.index))) if not df.index.equals(RangeIndex.from_range(range(len(df)))): - raise ValueError("feather does not serializing a non-default index " - "for the index; you can .reset_index()" - "to make the index into column(s)") + raise ValueError("feather does not support serializing a " + "non-default index for the index; you " + "can .reset_index() to make the index " + "into column(s)") if df.index.name is not None: raise ValueError("feather does not serialize index meta-data on a " From 7059d898511a62710d6bd6487c8b40d7f535c1a1 Mon Sep 17 00:00:00 2001 From: funnycrab Date: Mon, 3 Apr 2017 08:41:29 -0400 Subject: [PATCH 325/353] BUG: Fix rollover handling in json encoding closes #15716 closes #15864 whenever the frac is incremented, there is a chance that its value may hit the value of pow10. Author: funnycrab Author: Funnycrab Closes #15865 from funnycrab/fix_rollover_handling_in_json_enc and squashes the following commits: c9710ee [funnycrab] add more tests for examples listed in issue #15716 and #15864 3cee6b3 [funnycrab] add whatsnew entry 9b0dff0 [funnycrab] remove additional blank line 75effb4 [funnycrab] add tests 6acb969 [funnycrab] fix for cpplint aec58e6 [Funnycrab] BUG: Fix rollover handling in json encoding --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/_libs/src/ujson/lib/ultrajsonenc.c | 12 ++++--- pandas/tests/io/json/test_pandas.py | 25 ++++++++++++++ pandas/tests/io/json/test_ujson.py | 42 +++++++++++++++++++++++ 4 files changed, 75 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index ceb8f0f5fabe4..63aea96ef3369 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1010,6 +1010,7 @@ I/O - Bug in ``pd.read_hdf()`` passing a ``Timestamp`` to the ``where`` parameter with a non date column (:issue:`15492`) - Bug in ``DataFrame.to_stata()`` and ``StataWriter`` which produces incorrectly formatted files to be produced for some locales (:issue:`13856`) - Bug in ``StataReader`` and ``StataWriter`` which allows invalid encodings (:issue:`15723`) +- Bug in ``pd.to_json()`` for the C engine where rollover was not correctly handled for case where frac is odd and diff is exactly 0.5 (:issue:`15716`, :issue:`15864`) Plotting ^^^^^^^^ diff --git a/pandas/_libs/src/ujson/lib/ultrajsonenc.c b/pandas/_libs/src/ujson/lib/ultrajsonenc.c index 5a15071938c1a..6bf2297749006 100644 --- a/pandas/_libs/src/ujson/lib/ultrajsonenc.c +++ b/pandas/_libs/src/ujson/lib/ultrajsonenc.c @@ -823,17 +823,19 @@ int Buffer_AppendDoubleUnchecked(JSOBJ obj, JSONObjectEncoder *enc, if (diff > 0.5) { ++frac; - /* handle rollover, e.g. case 0.99 with prec 1 is 1.0 */ - if (frac >= pow10) { - frac = 0; - ++whole; - } } else if (diff == 0.5 && ((frac == 0) || (frac & 1))) { /* if halfway, round up if odd, OR if last digit is 0. That last part is strange */ ++frac; } + // handle rollover, e.g. + // case 0.99 with prec 1 is 1.0 and case 0.95 with prec is 1.0 as well + if (frac >= pow10) { + frac = 0; + ++whole; + } + if (enc->doublePrecision == 0) { diff = value - whole; diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 7dbcf25c60b45..8fc8ecbdf8abc 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -380,6 +380,31 @@ def test_frame_from_json_nones(self): unser = read_json(df.to_json(), dtype=False) self.assertTrue(np.isnan(unser[2][0])) + def test_frame_to_json_float_precision(self): + df = pd.DataFrame([dict(a_float=0.95)]) + encoded = df.to_json(double_precision=1) + self.assertEqual(encoded, '{"a_float":{"0":1.0}}') + + df = pd.DataFrame([dict(a_float=1.95)]) + encoded = df.to_json(double_precision=1) + self.assertEqual(encoded, '{"a_float":{"0":2.0}}') + + df = pd.DataFrame([dict(a_float=-1.95)]) + encoded = df.to_json(double_precision=1) + self.assertEqual(encoded, '{"a_float":{"0":-2.0}}') + + df = pd.DataFrame([dict(a_float=0.995)]) + encoded = df.to_json(double_precision=2) + self.assertEqual(encoded, '{"a_float":{"0":1.0}}') + + df = pd.DataFrame([dict(a_float=0.9995)]) + encoded = df.to_json(double_precision=3) + self.assertEqual(encoded, '{"a_float":{"0":1.0}}') + + df = pd.DataFrame([dict(a_float=0.99999999999999944)]) + encoded = df.to_json(double_precision=15) + self.assertEqual(encoded, '{"a_float":{"0":1.0}}') + def test_frame_to_json_except(self): df = DataFrame([1, 2, 3]) self.assertRaises(ValueError, df.to_json, orient="garbage") diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py index e66721beed288..c2cbbe1ca65ab 100644 --- a/pandas/tests/io/json/test_ujson.py +++ b/pandas/tests/io/json/test_ujson.py @@ -43,6 +43,48 @@ def test_encodeDecimal(self): decoded = ujson.decode(encoded) self.assertEqual(decoded, 1337.1337) + sut = decimal.Decimal("0.95") + encoded = ujson.encode(sut, double_precision=1) + self.assertEqual(encoded, "1.0") + decoded = ujson.decode(encoded) + self.assertEqual(decoded, 1.0) + + sut = decimal.Decimal("0.94") + encoded = ujson.encode(sut, double_precision=1) + self.assertEqual(encoded, "0.9") + decoded = ujson.decode(encoded) + self.assertEqual(decoded, 0.9) + + sut = decimal.Decimal("1.95") + encoded = ujson.encode(sut, double_precision=1) + self.assertEqual(encoded, "2.0") + decoded = ujson.decode(encoded) + self.assertEqual(decoded, 2.0) + + sut = decimal.Decimal("-1.95") + encoded = ujson.encode(sut, double_precision=1) + self.assertEqual(encoded, "-2.0") + decoded = ujson.decode(encoded) + self.assertEqual(decoded, -2.0) + + sut = decimal.Decimal("0.995") + encoded = ujson.encode(sut, double_precision=2) + self.assertEqual(encoded, "1.0") + decoded = ujson.decode(encoded) + self.assertEqual(decoded, 1.0) + + sut = decimal.Decimal("0.9995") + encoded = ujson.encode(sut, double_precision=3) + self.assertEqual(encoded, "1.0") + decoded = ujson.decode(encoded) + self.assertEqual(decoded, 1.0) + + sut = decimal.Decimal("0.99999999999999944") + encoded = ujson.encode(sut, double_precision=15) + self.assertEqual(encoded, "1.0") + decoded = ujson.decode(encoded) + self.assertEqual(decoded, 1.0) + def test_encodeStringConversion(self): input = "A string \\ / \b \f \n \r \t &" not_html_encoded = ('"A string \\\\ \\/ \\b \\f \\n ' From 4cb730e95353f414bb47f571a69781746bd3e84b Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 3 Apr 2017 09:01:59 -0400 Subject: [PATCH 326/353] Revert "CI: add jdcal to 3.6 build as openpyxl >= 2.4.5 is broken" (#15875) This reverts commit d1e1ba08ef259724ba71e0953c52e8e4ad81bd17. closes #15861 --- ci/requirements-3.6.run | 3 --- 1 file changed, 3 deletions(-) diff --git a/ci/requirements-3.6.run b/ci/requirements-3.6.run index 8f81c4620558e..41c9680ce1b7e 100644 --- a/ci/requirements-3.6.run +++ b/ci/requirements-3.6.run @@ -2,10 +2,7 @@ python-dateutil pytz numpy scipy -# openpyxl >= 2.4.5 should be dependent on jdcal -# but is not for some reason openpyxl -jdcal xlsxwriter xlrd xlwt From b199fbffe2590a148eec8ebc38751c43d5c7c361 Mon Sep 17 00:00:00 2001 From: Tong SHEN Date: Tue, 4 Apr 2017 00:06:46 +0800 Subject: [PATCH 327/353] DOC: Fix a typo in dsintro.rst (#15877) --- doc/source/dsintro.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/dsintro.rst b/doc/source/dsintro.rst index cc69367017aed..4fcb63c18757a 100644 --- a/doc/source/dsintro.rst +++ b/doc/source/dsintro.rst @@ -153,7 +153,7 @@ Vectorized operations and label alignment with Series ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ When doing data analysis, as with raw NumPy arrays looping through Series -value-by-value is usually not necessary. Series can be also be passed into most +value-by-value is usually not necessary. Series can also be passed into most NumPy methods expecting an ndarray. From ed07df196e08a183c162c0e91a12f4f203d41041 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Mon, 3 Apr 2017 12:31:55 -0400 Subject: [PATCH 328/353] CLN: Remove "flake8: noqa" from even more files Another round of house-cleaning that builds off #15867. Likely to be the last one for now. Author: gfyoung Closes #15872 from gfyoung/flake8-noqa-clean and squashes the following commits: 3e610f5 [gfyoung] CLN: Make pickle_compat.py flake8-able 05e067a [gfyoung] CLN: Make windows.py flake8-able dc22c0a [gfyoung] CLN: Make clipboards.py flake8-able 90b00f0 [gfyoung] CLN: Make clipboard/__init__.py flake8-able ccb44cc [gfyoung] CLN: Make engines.py flake8-able --- pandas/compat/pickle_compat.py | 54 ++++++++++++++++++----------- pandas/computation/engines.py | 8 ++--- pandas/util/clipboard/__init__.py | 8 ++--- pandas/util/clipboard/clipboards.py | 4 +-- pandas/util/clipboard/windows.py | 1 - 5 files changed, 41 insertions(+), 34 deletions(-) diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index 279a82fea1cc2..5b4fcad252192 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -1,13 +1,13 @@ -""" support pre 0.12 series pickle compatibility """ - -# flake8: noqa +""" +Support pre-0.12 series pickle compatibility. +""" import sys -import pandas +import pandas # noqa import copy import pickle as pkl from pandas import compat, Index -from pandas.compat import u, string_types +from pandas.compat import u, string_types # noqa def load_reduce(self): @@ -16,17 +16,19 @@ def load_reduce(self): func = stack[-1] if type(args[0]) is type: - n = args[0].__name__ + n = args[0].__name__ # noqa try: stack[-1] = func(*args) return except Exception as e: - # if we have a deprecated function - # try to replace and try again + # If we have a deprecated function, + # try to replace and try again. + + msg = '_reconstruct: First argument must be a sub-type of ndarray' - if '_reconstruct: First argument must be a sub-type of ndarray' in str(e): + if msg in str(e): try: cls = args[0] stack[-1] = object.__new__(cls) @@ -34,7 +36,7 @@ def load_reduce(self): except: pass - # try to reencode the arguments + # try to re-encode the arguments if getattr(self, 'encoding', None) is not None: args = tuple([arg.encode(self.encoding) if isinstance(arg, string_types) @@ -45,31 +47,37 @@ def load_reduce(self): except: pass + # unknown exception, re-raise if getattr(self, 'is_verbose', None): print(sys.exc_info()) print(func, args) raise - stack[-1] = value - -# if classes are moved, provide compat here +# If classes are moved, provide compat here. _class_locations_map = { # 15477 - ('pandas.core.base', 'FrozenNDArray'): ('pandas.indexes.frozen', 'FrozenNDArray'), - ('pandas.core.base', 'FrozenList'): ('pandas.indexes.frozen', 'FrozenList'), + ('pandas.core.base', 'FrozenNDArray'): + ('pandas.indexes.frozen', 'FrozenNDArray'), + ('pandas.core.base', 'FrozenList'): + ('pandas.indexes.frozen', 'FrozenList'), # 10890 - ('pandas.core.series', 'TimeSeries'): ('pandas.core.series', 'Series'), - ('pandas.sparse.series', 'SparseTimeSeries'): ('pandas.sparse.series', 'SparseSeries'), + ('pandas.core.series', 'TimeSeries'): + ('pandas.core.series', 'Series'), + ('pandas.sparse.series', 'SparseTimeSeries'): + ('pandas.sparse.series', 'SparseSeries'), # 12588, extensions moving - ('pandas._sparse', 'BlockIndex'): ('pandas.sparse.libsparse', 'BlockIndex'), - ('pandas.tslib', 'Timestamp'): ('pandas._libs.tslib', 'Timestamp'), - ('pandas.tslib', '__nat_unpickle'): ('pandas._libs.tslib', '__nat_unpickle'), + ('pandas._sparse', 'BlockIndex'): + ('pandas.sparse.libsparse', 'BlockIndex'), + ('pandas.tslib', 'Timestamp'): + ('pandas._libs.tslib', 'Timestamp'), + ('pandas.tslib', '__nat_unpickle'): + ('pandas._libs.tslib', '__nat_unpickle'), ('pandas._period', 'Period'): ('pandas._libs.period', 'Period') - } +} # our Unpickler sub-class to override methods and some dispatcher @@ -112,6 +120,8 @@ def load_newobj(self): obj = cls.__new__(cls, *args) self.stack[-1] = obj + + Unpickler.dispatch[pkl.NEWOBJ[0]] = load_newobj @@ -126,6 +136,8 @@ def load_newobj_ex(self): else: obj = cls.__new__(cls, *args, **kwargs) self.append(obj) + + try: Unpickler.dispatch[pkl.NEWOBJ_EX[0]] = load_newobj_ex except: diff --git a/pandas/computation/engines.py b/pandas/computation/engines.py index a3de78c2f2089..aebc5bb02d59d 100644 --- a/pandas/computation/engines.py +++ b/pandas/computation/engines.py @@ -1,13 +1,11 @@ -"""Engine classes for :func:`~pandas.eval` """ - -# flake8: noqa +Engine classes for :func:`~pandas.eval` +""" import abc from pandas import compat -from pandas.compat import DeepChainMap, map -import pandas.core.common as com +from pandas.compat import map import pandas.formats.printing as printing from pandas.computation.align import _align, _reconstruct_object from pandas.computation.ops import (UndefinedVariableError, diff --git a/pandas/util/clipboard/__init__.py b/pandas/util/clipboard/__init__.py index 9e2b2faf858db..4066a3be5e850 100644 --- a/pandas/util/clipboard/__init__.py +++ b/pandas/util/clipboard/__init__.py @@ -25,8 +25,6 @@ """ __version__ = '1.5.27' -# flake8: noqa - import platform import os import subprocess @@ -62,14 +60,16 @@ def determine_clipboard(): if HAS_DISPLAY: # Determine which command/module is installed, if any. try: - import gtk # check if gtk is installed + # Check if gtk is installed + import gtk # noqa except ImportError: pass else: return init_gtk_clipboard() try: - import PyQt4 # check if PyQt4 is installed + # Check if PyQt4 is installed + import PyQt4 # noqa except ImportError: pass else: diff --git a/pandas/util/clipboard/clipboards.py b/pandas/util/clipboard/clipboards.py index bd5528334168f..e32380a383374 100644 --- a/pandas/util/clipboard/clipboards.py +++ b/pandas/util/clipboard/clipboards.py @@ -1,5 +1,3 @@ -# flake8: noqa - import sys import subprocess from .exceptions import PyperclipException @@ -8,7 +6,7 @@ Pyperclip could not find a copy/paste mechanism for your system. For more information, please visit https://pyperclip.readthedocs.org """ PY2 = sys.version_info[0] == 2 -text_type = unicode if PY2 else str +text_type = unicode if PY2 else str # noqa def init_osx_clipboard(): diff --git a/pandas/util/clipboard/windows.py b/pandas/util/clipboard/windows.py index 5c9be9ddaf508..5fc23f7102f41 100644 --- a/pandas/util/clipboard/windows.py +++ b/pandas/util/clipboard/windows.py @@ -1,4 +1,3 @@ -# flake8: noqa """ This module implements clipboard handling on Windows using ctypes. """ From 456e729384c315c291ef92bb150bcc4f79a22bdf Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 3 Apr 2017 13:25:55 -0400 Subject: [PATCH 329/353] DOC: remove gbq_integration instructions from contributing.rst (#15879) DOC: remove vbench instructions from contributing.rst --- doc/source/contributing.rst | 83 +++---------------------------------- 1 file changed, 5 insertions(+), 78 deletions(-) diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index 5e551a7fd5349..83cc1777b35f6 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -616,23 +616,23 @@ Or with one of the following constructs:: pytest pandas/tests/[test-module].py::[TestClass] pytest pandas/tests/[test-module].py::[TestClass]::[test_method] -Using `pytest-xdist `_, one can +Using `pytest-xdist `_, one can speed up local testing on multicore machines. To use this feature, you will need to install `pytest-xdist` via:: pip install pytest-xdist - -Two scripts are provided to assist with this. These scripts distribute + +Two scripts are provided to assist with this. These scripts distribute testing across 4 threads. On Unix variants, one can type:: test_fast.sh - + On Windows, one can type:: test_fast.bat - + This can significantly reduce the time it takes to locally run tests before submitting a pull request. @@ -657,12 +657,6 @@ to enable easy monitoring of the performance of critical *pandas* operations. These benchmarks are all found in the ``pandas/asv_bench`` directory. asv supports both python2 and python3. -.. note:: - - The asv benchmark suite was translated from the previous framework, vbench, - so many stylistic issues are likely a result of automated transformation of the - code. - To use all features of asv, you will need either ``conda`` or ``virtualenv``. For more details please check the `asv installation webpage `_. @@ -722,73 +716,6 @@ This will display stderr from the benchmarks, and use your local Information on how to write a benchmark and how to use asv can be found in the `asv documentation `_. -.. _contributing.gbq_integration_tests: - -Running Google BigQuery Integration Tests -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -You will need to create a Google BigQuery private key in JSON format in -order to run Google BigQuery integration tests on your local machine and -on Travis-CI. The first step is to create a `service account -`__. - -Integration tests for ``pandas.io.gbq`` are skipped in pull requests because -the credentials that are required for running Google BigQuery integration -tests are `encrypted `__ -on Travis-CI and are only accessible from the pandas-dev/pandas repository. The -credentials won't be available on forks of pandas. Here are the steps to run -gbq integration tests on a forked repository: - -#. Go to `Travis CI `__ and sign in with your GitHub - account. -#. Click on the ``+`` icon next to the ``My Repositories`` list and enable - Travis builds for your fork. -#. Click on the gear icon to edit your travis build, and add two environment - variables: - - - ``GBQ_PROJECT_ID`` with the value being the ID of your BigQuery project. - - - ``SERVICE_ACCOUNT_KEY`` with the value being the contents of the JSON key - that you downloaded for your service account. Use single quotes around - your JSON key to ensure that it is treated as a string. - - For both environment variables, keep the "Display value in build log" option - DISABLED. These variables contain sensitive data and you do not want their - contents being exposed in build logs. -#. Your branch should be tested automatically once it is pushed. You can check - the status by visiting your Travis branches page which exists at the - following location: https://travis-ci.org/your-user-name/pandas/branches . - Click on a build job for your branch. Expand the following line in the - build log: ``ci/print_skipped.py /tmp/pytest.xml`` . Search for the - term ``test_gbq`` and confirm that gbq integration tests are not skipped. - -Running the vbench performance test suite (phasing out) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Historically, *pandas* used `vbench library `_ -to enable easy monitoring of the performance of critical *pandas* operations. -These benchmarks are all found in the ``pandas/vb_suite`` directory. vbench -currently only works on python2. - -To install vbench:: - - pip install git+https://github.com/pydata/vbench - -Vbench also requires ``sqlalchemy``, ``gitpython``, and ``psutil``, which can all be installed -using pip. If you need to run a benchmark, change your directory to the *pandas* root and run:: - - ./test_perf.sh -b master -t HEAD - -This will check out the master revision and run the suite on both master and -your commit. Running the full test suite can take up to one hour and use up -to 3GB of RAM. Usually it is sufficient to paste a subset of the results into the Pull Request to show that the committed changes do not cause unexpected -performance regressions. - -You can run specific benchmarks using the ``-r`` flag, which takes a regular expression. - -See the `performance testing wiki `_ for information -on how to write a benchmark. - Documenting your code --------------------- From ca7207f6472ec50424054e899fe51d012d950c1e Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 3 Apr 2017 14:26:36 -0400 Subject: [PATCH 330/353] DOC: update contributing.rst for ci (#15880) * DOC: update contributing.rst for ci * typos & auto-cancel links * make it a note * add back accid deleted section --- doc/source/_static/ci.png | Bin 0 -> 224055 bytes doc/source/contributing.rst | 56 +++++++++++++++++++++++++----------- 2 files changed, 39 insertions(+), 17 deletions(-) create mode 100644 doc/source/_static/ci.png diff --git a/doc/source/_static/ci.png b/doc/source/_static/ci.png new file mode 100644 index 0000000000000000000000000000000000000000..82985ff8c204abcae6eead66d676cb880f92d3af GIT binary patch literal 224055 zcmeFYby!@@vM-DT2th*1> z=iBE!_q*rp=l*}^dDg66y{f8L{kpn(bycq)DoWB=uZUkEARu7L%1EjqAfUD(ARsrs zM18JliROKSfPhJ8BO#$8D|L)Xr8Lo4l?zHd0Ylo#_q4N_31vXgSgz z^@OHg`!&06kNVSc9uFf>Iu7Bzc0b_`d-mWev zrhXW@x-U_XL_dV{eTER5D#g_nexUvMo1_*8!Te)5jRK;mQbu-aztUIW#)}x{TT*h0 zGo=BC0}XsM7n}{*)KuM=&61eN`1Yarv!)E$bw1tl1m~ov&!*-ShEbG3+{b@1qW;*Y z66MthyP4z`y+VH9V-|9*m6-MJgXK>$n=QKF5HYPlpV83xP{W7s9J7$dK--OaTP-c<0*PN^t$&_=lQ^gh_RJ%K}NOauV!tT z>FXqX`0>N%7~e3piM_c*xIb!@z7b094@VDXLTjd`6XLM0KMZ=%I3BAZ%%vAE*QGdH zFwZsm#)YarGE785VA*9Ud#4tB5Svc?j?zB7*V%O~u%FB6mvFlp7Gda6H2HK$*YxH) z_W4GcnAgI@(e~SxH_|fOL;?x%1bj1%j07xfyMo3zlRE0%g?BZcfr}^fB|M~PU7aL1 z2(-9eaW85`iB}qMxG+17CWu8kk;G$=es%gxan8KgQtJ+d%EaJ2ed|HtTR&|3(C{S& z?a`yVaLdAXjPP@)O ztKyEzB;I+?+fI42^?UqB!%2VN2{XkidT#tlj8o8<#4jwTNUJ!IRLU0@ zopyft`SO8$_x+|$5sy0QN#!TV7WBAy7rR%7LaFC9*~zEvGD%Q z%8^VoV5P5Ms;95Vhd}w%E-ymiNEmm_^ZVDsNU|?6Lb2(N!tIvXE~3)|I>LqLvX6Jw z>XjZw=MU^gnH0Lq)ipFkl>os-B3n_69YnKFv;bqAF9B#LDC%N_-;qv@@iQ(pV$e0A zh!~w}DlgmqVO{DMffg8B;sCPm?eh}tE<7D4$EJ(tQ& z5^G!v6PL4x*jaKB*XRS`57D()$qy*2)bGs^YcUzbCf{m|aR;j&&G##| z2||1?VG6co6rU+vhcGFC#!TGqwN^x;Numz(%!@z!0395j0NG+*%T8R{URCu&S79fHKDdA6a$a-2w0vPDPDvFNBc38b|32aM6<*Zq(Qfdhw3qY; zJR(WzJmkgn(;luhpS6!`Jf>u)Ts84G-#SL< z0gkzDEzexqfwU6k`5?*$$C=-mtP8;{15igwtWUI0msTN!np4>_a+o}oHnohym!tdb z=djnkiFlu7^O*|mil)>?)sIx+0}#j_gnv*eesho|c8LijbCqXTbs$}eH=VGjm?`#7 zRa^a9RaxCpeR<59OCk-8`D>EAzG7KXW}#4yZ%qD>?f?qJc>{k)b6|SNeGr_G9LpPX zosdnF&1yvJFZopZ5D*vbxHBySQc9{k%m*ddteb4y;LX^Lsl+Q;6JwCkW5dZ;h;^fp{iD+`9n3e zOfA<%yfE}DJ0{WB2+Tg>K9fGhzGC#WH-dCdbmbW;0=aVwj#0jUzi40QIOVi)yjWQm zNP@asr+YI5D+C&$5s4Ie7HJ!CL^K((6j>L^7+IB$t4OLiq1c$}mXcTGQB+d&u1I@C zg%iY?HOEsoD*H`VDgGw@Bb|G?`iRiT`7mG@#7v0ag^z-dh0jX2udI~P=HRlwQI+J8 zM3e+#I@V**Rc!cJZCmSYZ*M!&SXtffAW?T(r)xK9eGZu!P6D(pT_uq%vGh1J8-yz7@V5LG2QG-CCgz(iZ(r{hCn(nJ<+LmBKUAGKKjU1h)ApJf4Pn7ipF(I)>^P zq||?@Unu}LO7|fyOPllSeQxq@3%fJxmFuG}Rc`(}x!YZ*v-_u0lRcax=Kg2~Fp&!*@I=JJ+ve6l2%_3(83EER>KU>H9#`AWP2yat}*r(Z#&3sMe%|L6k z{mc=qcva-ka7P77nw}2_iiBxL0>0}P?L;1&VU~*-He9Yp!}4`gRu;qj?D~E@fU8(Z zaZ|gsrygiIn-&d#o`KB9nR?cV&OGq^M)wg5ASlTc~f)4H{}iI_Q03k=7(ncbA|1SjBBoC)<4O& zH$GPnNhtgnNh~`CJIzk8_vWe-c}czA;TQzh%%?_b4GFbezPraLNhpOVj>)|&fR9s5 zU({}P=>BBehHZp>`@A${kaVo`wQ8lp+Q_EPWe8H$o3S=TlSE@dldWB&ZSXX=w>=^f zU9M!f)ZXVea@Ek&@X){uYpyE$sL3?K>RT^t?)!+k6|#oq$U6%6Dj9CH>!*d#ju&N( zbfuFt?KtYTZn^?(ZzEq-g|$==>uWpsc0|@y09yO6=E>3oMW=Rnp)YJF}pD9y_9)uyXy*>3d!ob=^I^F zk8a{><|4ZvS!?ng;U9TNr=e8NC(Ebn<+7piRFXRxIEiefHY#YFy5w?gw>Y=IU6ctB zp>TUzL+QY<(ihTKHgE&a`V8JqoDM}LlT^+cl{nS`1g=__j{D~i7xt_8zt|r*?OE?- zOaN<#!2848*#Lq{G{X)T>Iw#kECm4}tZoG6A4x$T3T^^e=56qgB5- zchGMAsHk%Z-@9I6_{@w+=VHr)Uj0 zniOOrjecQ|7j9moRukJJMhfj*zF2qOjvrjjB|GtIKS(2D;&bs41;FkRa(cV#klXrI zHwvyt7n%a>Ik&+wn5o2^DXZ>qpw&O zmF>Q*^`h=PFBQ<8WOQ8-5b$aKIuK>mKAa#RAZFQU=(y=9D)5^*+OwFLJDOUsc-cEW zQzIY#yg8#YvS2gQ<%72i!0fgS`D5_9OIJ#I+a4sIr1%nq*9{}l57$&s{hHFL3XaL>4-#?$z!pr7A zTXJyy54D~ZWc}+7D?1At>%U|_vkLxI%dcYNWnrfyX=88U;QHK#Fgq^?m*77b{(rar zv&p})>i#DyA2-|IIsbO&KR5+h|I*-Z8vPTlf7CwXCHzW|^3{2>q-0YMZ&R#HsE3-MqH%jfMhnD-=G{3WGiOh64H zOCY2SPqI2vN1;&g<-2SZ#1aPku!6I^fplC+#Lt0ew8Yg|?-TpV0urUc^aM5Ejob2G z(pRa)AT?9I2&8(EbaM7!t#SW58hYGuBuuZ*ZkIJD|D6*5QVrAVl=#0<^j|b@zgr`sw!Lq;)A9PB zfcXbKYMU3<{{Y@UWlGi~ARE3m@MRMGcVIu$wW0o-4naV9uNQz=^}SLUKkMHi9e~Jc zi2QFsdClAzpe?CQfua2GI=C}{pz7bzh<^Q!Z`{e4R~8*y{GXWqpFub742S(YnjBHd zXR~@YKHdLY{l7QUe@b~aKfu4EQTc!7_upE?|E>9rB7Fu*tLk7*wX$Hl=VwlSs$%Xd zp{8>pSGD$%puQa)@Henx9r>-f`5*5EgTCyxgtD$AIsATVF_Q`4$!*bGl%w z@h9h#icut@mt_XHU?D}G92jMFj}*RY3z3n8uqzz}e;KT-zZU;4hbuOh(RNXrL8VapD*j?nd9*8_(Z}J%=VF>QZelJ?dc2ma_DF0T^P9m^>Lh>g+swe$ z6K3FB-*o2XOuoAoVQMRgXR)K`NUcP}eU2cS+~vb}-0yA5PUo!sDZz)?KfsemJR+ga zHrxBir=15&N7Ox$y<7g+QkvivakZK7;?v$1tTAyPmMQ7$g39j<>bU$7iFkqK5~lA0 zUDz=G?h`enpFL%%S;bk#dnM!JW!kmWOOzuQ-re9P!yWxA*BI0(H=7L+O1>WXPJl-V zlA*SDzQNpFbMbo>dNEl4qL_9LHf;w7+TlL1k%0Vn(WQWs)Xee(SYR#k5^_YP&(sgb zt>utZkg}?A8P;KfCL{4G^sF^ASiJJOOz3->`ZzyZbt!p&)|!tu7T%pX@umXPIg(>) zOZPyhiM&57fVPSMA>}K~nn+=&Jsfba=0?cYuZ&7|n)RY>K5_N8ac5aM6h}l;wO6b4 zsxm=zSEEkDexl}aDRdEBIox2PJAb_;bK&h5Byha2`D^Z->tbz&f(*nKou$uoH#vKS zK(k3*UpM8#SKL%{E8kW6zaZoI7c!oYX9w@aSLkaevU>T60>>W1y1q4Y#;VM$xlmNt8{1gSceOL(sfjB*G_EKDM=gsNy=Te zdt!A9oI{ZWv;K7Hd(HysO*$@sTG<1SG8O|6sW}q4FufQE3%@YkpXVo_l@@FObuwI7 zE&bMoA9i5EJtsXkuNcB<*wns$ZLH1sLJ#rwF+jzW9;nWrFN?ge1~u(nS-TR{+WAYE zXf{&2vi=to{QpA1pX?m7?3d_i(-4~%+*_Vu;hHRTaFCgKKk!K<3RMHOEl|tyH^-~D zLt{okLK6No(MhYv4jAhvo}0f2AYR_D+w|QXt5L6>4sCX2LE?KqU(*h;r5`eyrYOQZ zcAZGl!^l|K~&H>9^ zUHrc69|>hsSG7rn1d9aFshrn-g2{Mh=!&ocCeZT|uMxnAP5Ox5kXPBu9chZY+){`+ z*+8})!Zfwbd?rw^T8KnCTT9meDy&euuff{MfK;zOEQmPbr8haAHeysJlX%5^S|@IrpY zDwQi+-r#_6t~b87ot7eHI@cm8KjdzLPAgMw^794BS+r??$JKfShIb~f!!r4)%Tj68 z`&0V2I0L@ZA7*a7PqMNLY*y$ZX`pTS^izV4)&Z$+$&c)Y!vNZr z_?BcQ0?4ELd|QW|{#W_6yLI&DkbQo_7InboZP3&<5uaa$H@me@{8Z{;1|lHnQf+tj zou(HO$n#LFeR#ic2^5*m595xwAGI_g1XeLNC;QLRZN`{RhgC9eXRP>)LS(lfoGjY@ zeS_R%KT`$Q({hidT_U$+$p$%#;Fm5VknI3ghUJnxQ#)NQWZl+9==>DjIb;5+cSxdj<$;fgd%~5>+SLmfnsfQ0C+w_8 zHcLKjlH0wYUGKh`Ct*EgO;@Y>P+}P5Nn%PoBmEWAevb1H$GY~?J|Z>#MH`NY)@K0_&PF)P!M!*26X+1trv-@Ic;LaS1Gwjk0t6LhzBw77K+U&815^=an_8s^-q6h5|jg|m9*LVbON{k^ueuR^U2;P{L3VkkkLVLA#6pd`O0=ES?;lMXR1~h+j03D zYvW<)kUKfd<0$f8pigh~=mxHM#qlx0L2vmfpH+a#iZt6YtCvjMDEa;&Pr&;32?S$|wv^?FOKN*vFg7(V^N z+UGs^&2AE1LJJn!4Q%*$a`pa$ejpaGdLBjfO1HJ*ph=bh^CAFlY z%SVby-^ar~=f@A5cVW|&Rh2Hjx>XX6RLy^1iG8)DefEajWhz-p-_Yqho*XI9?G@V@ z(AJY2V9M{^=fyAVj<$Wy8cpH#Om=Z0*862dQqf91X~3as37$_VZA!kEEE5Eky$t(N z99;_UX+;QwcQKa0BY{}YrjHIt38){nif!p_v3tD#^^-gj-_b+F zTdo@37cEz2w-G2hKovkvBREgb^kPGGXMU0R=SfnbAz+tJBsQuk|Tl|ei-WeBgv~}_YvE_COg@STm2e(8t;M`54@}gu>dbPe{Bw-cu`63 z+nD>(2z`2G9 z)9P8tc}e`P*kLW{G#&-EO*Oz}ekQ&GFG*T4ohaA97t&N!bIFXU_yQX^o&} zYpc`MEOnPZQvchg|K6U#>kIaHj%r^qM?m*OuV8t~C){`~hOaD+qkZCz41kcr@;>}; z3@eF(pHObxQhhw^aAf-f9A+rhtC#hQc{-vAo?3{}4(1@}p9h7~PwhUgCGt3( z&*28ucNx^!?&>XDmHI@R3#NzQU$%XO2T%AGCqCY;3gvVJBfz`{m*WlC?vCnGuG_oo zm~vUO$fg+wnjgOCJd{)}b16*S)yxbR%xxUz5ECbsK2Xt2BZA?V|gBsR68H|Nx*`o7A>NA zpuR05gmHNJTTg3Vf$8eGfPn8TzNp;0b;RPiwiH+DQqt<Y%(hY~9y~-cI2C`-3dkebr zRX0tjlpJY5HVe9-RLp}r6~!BKDqBHmjNo98!LdwMz0e~r94d)n@Oz12*?hqGmR;&P zWB=k?O*E`L))kFhU~i9nfNdkMs0Hn%u38|v@PZv9v72mHS@#lb1+fI`rtHg)-RkAU zjj9MVf>QmgyMt3Ueguhw&#b#5NG}>>9|Q ze3Co*V+y`6D|Cty^98(G=}cFq_N{OXP*pA_PB_h~r%?V={*cmT(|X-<=4pQ;cwBE> zV*hdsS7eAOa#|@^3pPh=Z2(%93Pt0Xdh3@=;M0aBw7!%s?>gLE#=H8oB_-J(jML}Y zo1*E5lW*tsa^IsUzob^m2vfat$dB-TJ4+%@hpqrZYWds>?~NgV>EYdc zd7oufaT>vvjg_=iKRP{Ar4~xQS@=ik+`l9F;-NZe^r|G{xHA2{vxd0CQ;c%L_btcF!CLBi^&8(O=Na2(VqtNNBtHYYlgD8?bQY% z8Se*sPBe6n&4I-)6N58n9|r~{x25r$VZD((&x>Stft$s$q$MBW`uH3e=vh6(ewt-T zY9q74?pD33c0}h;vII?6k)-W-Jm;si!morGo4B#+SEnrVS;bvP0?xNmn7b?JpYRX+ zQv~8pzw)Y!U?_c+5|M3A`}>HA{rEh$7R-Fwfrv&(He*It z*?lQtb%1|Im#$;f&qM8_LsX(=#_vZRK7eo%W5>rP)V8bj;Hf#b-v-PD#eLtXgg&I? zV3{NbX&32()DxIjG|?ES`}38ph{)OfacZ;Eqq#Y_l1{^vcY^6C4#R0(hRr4>?ZTRh zb*TjzUH29bYzl6D?&!^r&bev;9bYK21M%niTB1ysj10w-#*yEDM69wb1cG3*B+V`G zCG^W4?!9?JNmgo#Erv5NF`S70Y4Mq z&hd0u()T2JPPQ;xh2+&)7oklUJnpWDiknYt^q9$m$tYl#;?UDk6B(^>^%Q^T#7m0$ zprk6vcvjp~x$4k=HiC%_$aXdjLDhHTWaPx7~mR{gF{q26570-x4}cUr@F7pk%j+zpm3Xq6)28U^6GLcP2mA z#pR5rG1YKnk!$&%x2q2s53$O5XDbnYt(>pU<0#lG>cwD#?tGmqKuc;LgH8ezd{O0~ ziCaIsuF#wRaN`^jT~8n}EnA@;mP0>*fBOA7(BXPL>B{wnJ&UFfHkjvvm3WzT0TQ*B zXnznjiMU651qTd*z(XGQ4OG|5-%d+ptdBhI9$4#QZWHNrU~L3RO8fiHuO91$Cv@yH z5-x~!?_3-YKxUXly54HNrd?LqImN}v$>ZH+2HClUr^YyLEi~lp?f@GzWz&)0ZV|xA z1HPGE26TME0}-8Pd;N6hK~PjwWEQT94LtsH7Me0nZD_PU|A&hIf!1K|BkZjHGa)>m z`Bl47_{CI--}U9u#^iVMG`_Zrhr-aKZ|F8MW0jsTbf5MM8gp-uJ5RSA(61L%DRlni zoIHFDl0W*ve&&b*^|XPCb+cDXqxpJC-e1oI&}~yUS3Iv6e0YAH+p~hd^kW_!k>k0+ z^oD&;4UTa{rXE#_j=2^dS*~@ep*W7dpO_LZv+C0zuKp+VALtsUjx&+>lU7m-jezZS z>4P0Y6W~8rPV|9Rnl{LW_AzHV6kNj16yvfI>>t?4uiz1fcg57}IG)s7rYu?tBw@i> z#%Ug+uJj@cF(Fcjj{YxQ(PTxp6E5)LRz;>^LX%pis{Yrm3lsMWc$XBs1QOl@;N%XH zxw<||eZC^%UmtH=Uh@~1f^$vTowZPpno({{fwV;o*PM6e#>eGw@7GLFTxHz;%T2XG zK$NZ|u&DfA2WgCZShFvVc*iE0R;-LpP>eAfG!5>5jJC-6b9h;Eo~bab;XRF5qjPsl zcN6?1v?uP-QJJz8Db&!_liUw!?#VY^Mm3KZP|11+rZG-g#U|t@ugm{55K}qcoO1oE zg<@>CB=PKvYb&)QGM+!*gn&=J*1X4K|5|M8y0gQvo`UzL#CpQgv2CY()O`SEL(8Ph zI4zMY9i|lBt!G~c4^y{FLwjOY)fN9_b|{$t{hc`Z%tBVzUNm+@_etj8{pPgnb7o{u zBV90g052wF%wz)BTTZUx2{Jpe4=8xZ>!_Ox8l1qT>B_prJFqe31K7BwvEdI1Mb6op zov`mJ@<1lj%f)_+w(*y?S}4!@$r>aDBr7Xp(KsP2R6!ep(y_+mB%0@2CH#O-rNb68 zoqB3MPis2n-XTlsAfca;Uu1)_#PpOst9x2o^~i0^jhG@2W)K32-yTqk4Xk2!&jDR0 z2^t}!cC0(D)v{~@yD9hZ-kjYdqb=T;EcU9W8svKKiz{P>e!h?%eJrIx$C44M-No$^ z!)WFLE6tEXD^0{&iuCWA!ErNqF{;EO%=?9p_xihC+|Z?k{a?`6r4(TsX zm6KpV+#xL=P*O(sX>C*;-JulreF8KJWRWN#X_l&O07x3{agKuf{uph$cuyp+XI=P9 z&gD&k&2$qF^(bLYr(n^*&NoY9#?uU#P!4&s20t~=b#N8uxRSrjmWfFRh*{$Ml87tK z*3~7~31sn8f4FUK4Nq0<@U~_Qt1HG}j_LS#LxI^4#|LZFbl`J8gP7!hHt`P^4tsX|(j5;n*B1WG*^M$JKE z2Ai#>_1pJ@Z7h#Ip#$yB=`hp&@%NI`_K^hzGq#PQSr0p=ZTHdyd)JM(b}{~DpM18< zz&6v5kc-SB;j#j^LZF|Hved0c6llWKcGve{)W3f@tMB4ck5bpylA-@f7gKm;`WIBA zJc~i7ekx$}^RBOJ+1!0m{Dr7q<%Q@(NVWcKRXWh0;Vb67@7&F5v(EJt_)aMAojS%7 zt?bG5FICH3qC{$;{Cc>0zlCA@J3gQb1!QUV!LZ=sWU)$besbb|U#adoc}`!(TRAEQ z9uJ2b*jMkYTm0%L1I$F5V!_=LD96==WXu`LGP+x8Tom<@Ch=76^pNCc{JnO8d#D|F zLHS3~-9`)F$A4TdQTQBb8n5Y3w3g&swHC~u-N|6Gg}h9FCUq{CHl6N9Z@)ft$<@V4 zl^?rtPb{cI4dXYxg~^Z0GG;^BK||+k-RFV{ho2YNE~2__H1Hd=*PmQtIS&ne=bjE) ztmZk;#an_N^a9uMR1`Fj!}iW)Lu?;cB5|A*CTz~@9t>CNf6X6cu zUO)J}Qkrw@xibytsYO^$P$b z^->RU(cdReQ?^G?KW}19$C76*gpGUxRFM?2!-O%@lA`(hzrVJw$yhhGd8<&1Qc+J9 zU7lp9dA#2-rWF)ysar0jQg2Y7oYv%6-J98Gl-~|2d#swUW(8kt$+HRp5GFF)cLr2< z*mL*SRJ8S6rd`~a&-p_77VvheFDb|6s(8})Q6kz5ZOz)|Z)|iON1)fl>OmjoeGDFB z?nFJgY^<@`W&vHtR-1Q0W&!F8=l@JnRD@@7)F%_~)MYf`#mIWGk-fR&ItO_jqZX20 zf#%bub_-=|)h+5h4`MnQkoAlH)}deJ+=E93a4qbe?1lc@{R zm1SILYf3K9{Opekud{U#>WVXnDA}OW#Bzmb#%VLiV^8zf?Jr`r_W{#Z8A7R#b*_9% zB@kET`DnA-^1sHr{5(r)>ZVpkLavC!In()}tm&=^)#j%*+mCU5RTo)V7B{Tuj)z0H zxEPjJYe7q+=WmDvc>F%S;Zt6!@UQ2J86xIZzGwSkiF{?VTQjb>TSFxPS(M^DD#3vP zuT}G6LxE6Mjv$1lVl&W{1f23~7Nl5AfD5q-Unqon-Q?CJ=#fSY(lz0#0h&tK0u5zF zLai)r)Gkv?;qrTrdrL;QO(A?-fVuWbre$isEX}7Z3i#P9{<8*pM?||qaKrGNkF4^# z2O>;r6RF`{sBH zuA_IBCwwJd$T)>bqEx8?tHf45AGJ-8Ms@j%nxTyDCaXsqJ5rF*+!nn2zzS<|aEk}b zZsx_|;JARVxV}fNht*k{>YDmrCU2GJ$=g<^M6gf>SH=5>3{J#gUEh8;ku9@HgDt`8 z5;~DiVMCbBf}ksjO?Fs2E2u(4Zg@$V@4a>z+0vL-4GJ$M<#6v@5o3NTG zE;zL*DSKGaJUU+{U#G^ilDx=+%bdJcdm1>9pmFuNj+9EcpTnyR zD=|t;Rz8|36(CEzJCzJuoERv1A;7dR|H@0Yb>-u3>P3`QisZVeD!YvJGU5_mYmWN% z%w5cvzM_ER(olOigS&`hG-^Xrrb}drI{?)tuvu78S``xe0dbV8n6lC=W~?J$)vp*N zv+SV~D6YOJMnzrWG16o`O}x}Xn8i3O+g!nS`&FP+EV4(Tg8FbR-BXSK3G69e6+1|^{W z+N62|4)>2ux`u~8bG?#oQQL+Q;_rteOfZ(Rr&bbPAQwo>u$@2d+|w%dUZD&|VQu6W z!up()tFSdpr+vT|s)jDo#2_%$b*<)eL!_R_98;P!OzC2b z^0Q3D)mw}9CY!9)_tOX+HQ853ZB?iY<%satv%h%-JpS^N1^AkWB0{#0`_5JzF zg0}bVuYgFe8FDUE=t~}Km027eiJ5V?PRcBVSo@`zi16h@vh;Q786GYg$F7)tsbeMx z_ou2ibnw9mcNFJdmO)=sw&}qxNitO#+<0#_NMgZQp-L99)M>zUi<1rw>A*cS)w%6_3O8!sGOm72rC1Zq*Cq)pPHl?on>(j=+W5q=j3LN^}XAhMZU?4Z^@7np?^ ze1ddn(L@SZFDf%1uO2g{MYK1$Xl?MZt6RoLbZbKnw!OhoC1Ib=x2!Lp>S~oS+-%09 zlMgg%6_&eHcxP?p7s64%R~ z$+|4$OS;rMF+`?%>6HpZ`New%GaXCuymS#Ts1v5!(>2*T1Xu`@Ad$kLLc7Oacix3Z zWJ7ur9)mq4iInUGA zAVSt(JYl`>D@$F$j+y+J@+R5wcMSH?mJX=B@3a6&!B`*$j4KwKOLy!i8I`*K^7vp@X4qI zbNCgPEK^@uwX0gz3C?VX)+{|a7_Jv~(8+T!+v^^{GJwqUN9j38YFC`@cZ*WqHq4Wu zGj)s9Vw;Yr_OBYj%wh#ANcQl6Wacr>O|YVjY6ybc$BLR!H7&Ek>m{BKIi6sU$xvAk zd*`HU$!YgQ*+EIAWmpbmV*vp18npwehY0xi1*K*$(@c&E7v$dAad;&w_$R zSQkPTHJbi6hcEhA)K7S$7-D+G$I!4F6<HCne2}ql*~aFm3tt+eVsdiCq4xr5 zK;%J*zSS%()iVrWKG)SXJ(mqRoi7~{IAJwgPZq)4#R(C&j#Mo=SMZ-cTnX#Ul8ld6 zjX(V!Co3T#yVT&XwE7@`7xv?9&8$i!8BIj6__0#~HMZ%Pcx!ArxW_VRLkcz03V-^?w@-#MyH zmt)`$ExNlG!eD3YW=rS1a}dh|OGEiBA#*!Uc=&nrT4&VZ?Aal<9p2EF%`}RWg5l|D ztyakNIvwOx-DgYYT-y^4pnQSIdP!(G7r}YjRu*|C$Ev0OF*|y6(~XXkOx|Dlj#S1V zcM3fDsJTXJ6ql^tJaQJYr)vrLmAu|aLMP$%I7s??NZ34Se5d1lq|0js2*uUEYrgKA zh^AX5IL*3s$A2N{Qahy555_qqGa5(lCqmA` zqNJhZWcETN2j_7NkxTH*Abpjp1@jJx4YJ>DQl{Bmrje1K37K<^?$LIpKQWj0O&Ku~fHjN{NEBMwY<2<+ss^#;3->2|Xhe zKD-FOo|jS}@6gZNkg;!Iu*4f%&|-E{lp&dZ-jjQcwTr_W@4-v!wzlgmW?VZQAClcn zIJ1SjT+YQ&y@POMFNeaeg7O!^%?4#Q8r5AaOAN1)^V`&09gx$s-S$Z_xp*;&nX~=6PR0U1K~OPa7uzP0u|N?_dT`LC9p2J})ohy}F{iA*$RPF$ zErK$`(};-QqXLuc{uiHHIS9*Eia%|5qrMk*LEiY%Bzq0mU{-i*@vfyM`uNorsrF`4LMuXFz8LL1cD_R)u zpr&I=^ovuZsGM23x>=@KP#RF9ZhX)#hLfaugOuhI+Z@%WoKf`|ac?b)hnk=)n;(&1 zAZ@(1P`*0JLIX8ov~h(%wq0zol)6tR)8rYkNNuByK&>Bp*twiDcOMpy@ zyT&OZYd+@Pec>A&UQp8F8@(}nLN;&gI7i0Zer(v%#HHivq?~1LjaqKb8eyIo$q??J~mW$Vhiw5MHiJOUDO{V{S)u zp1HF|-^ErJsw|*?Yx_)wmwkeb4DRq@?Vz(B{>H0kv%Ca*-&ndGTCPw#Totd~s z`4FW|ts#;$t5nBMj7F1QVZ!CtW?HFM)-T$aBHL@{;Q6a3dgxL@PmRs_6Ec8hZF`O9 zA#*1hqC%Q~5s-3|bb-0Qb3|lGK2WFOMqJs#5gG|a1>j21g1aVIE#Q>O8KZQ-fOmVY zYGg>$wc#AYKi5eO{f?Y3LCfsMmBUy?evauD934i?o6c=lkgLohzvvr~R_dnn6H85k zqVP|cmrMs;Lz_HdDay`K9qsjkAD`wPy$0feCN8T;YCO#IHN*qk28a00Yn_;t(2Zr=5CX0!sU9ZONeex#kR?h`#M@)GUE z;J_T;H0iu*+>wugpE28lCpN1PBlSuZ%55a^7ZDkf;j21fIS;|A6E9=SR zXOh!^*55UK#t6FxgRQ4R>O_CS`*e!?`1?`0IeE`X6H)mrjz09e_Mq=xp4GR)!p#%; z?Ww%Iy<1S2!aZ~f1@*?>ON5jFoEvn>?RU?Em8n76uT`3mi@-5qv@;{Z+1N}_(K~59 zTuF?uf`JU1A4y!czWvvP!E-oGNiKHMclj93qRP7}(H26|n-5bhystKPDlQJt+oD~m z3+tNyto)ewT9Eb@FL2We)N=8Kg0}2^>ASDMQj`r=ocTL}}UD&!)>#JYsaIU>(^|=@*iy1sYdrI*Se!p}*3RS@E{XJOuv!+@hla1mIhRD_{GgN|;xLdCo3rgp2c#`o_ zn+#RY@$5_vk;s`@#;-x^B1e72$L*1_x)i^grGtc2DDP<^dqDBz;_A2KKjewS^7Hqp zi`u<&f6|$37+(VLtE_$Z@C)*}_ZAm2d)-rT1+(w6*2fMw4!i0T_$?%2m@lOVYjbp?uZ*#G;HVqGQE|5VplBN0jyPoMO+^#!GPnIDH_F0;zYVE0*3Mt)--9G zZGf*n6#Tl6{is-45u?&J;7oEcTw1*ap}o0;9)TBK*FTKx1(-njyC zhOkr(z;(n)|LzfQA>Se4BJTc5{lR_JmUQg20v6s5*^OQ^v2)0tB`6vEVf|gu&c_u! z$xdi>P6O@D)~J}c1a;xI0dtjAVh4F#qtHk+_U6t!H43udG_h*0^vUi7jZQOkXLC*g zsb&{0YIjsRM|eHz=V^F%$&c&V*oNQ#bjepsP@#3?IW2jbEVMv(C9xOeN%b_J_UWoy zTPLvr6A;x*@pj9%@?gDcbxE1J=9)NW-|LJKPiBH-c=#cslJW3c#x=5w_DHfTd|lY; zb|FU-$!}5^zu&{|cbe}JxnG}@KagZ$`q~VDZ#t#FaH)x<=iZ+p_T$%8%O@_Ioufm& z8IYP#$nhDKd7*`I=BNSMV34}AHY1Tzc?o86Dat}3pnJLb#=NRV%?mpZpSmyWxp!F!)QRm( zGw^`D2J2?W1i*ryc)OS$k?F~5thK)ZIoyBKUS3p&OlmByBhIFEK>%sluI`}N{>pHl z1>MDsBZzdq(L)w&VJ&#r@@MRipPsnBBZK|BHU71yQT7|-M7%HY(}(2xVb>wDx*HY@ zWu~id5aI%|i3e0c%-nvT)VGdX^r6uEeySA>Z-~?}q*eBRi;?7BWiImIZ zW0o*j)hICL%C%;A-Rk*WSaWc2`BofB!R$k)Ir_zrUuGOX_)-4a}_F|8@OwMIzi>bzmQT|7LpdF(b#g^DJ~Ao0K0d#k9rwq;#7A&>+Q z?h@SHo!}5$ChqPM+#$HTyF+kycX#&y!6!O#?#w!8-#@Fhz4m_p+ih(=@Gux-RQ0N_ zs=nTqHjML^KCtJ?{DasP+K9lMm{-S}SXmD2a)Y1?Ch zAWeyGHgPD+AJjR{{>#4%ZKAn-6Hqb-E)Be3N-M2Hh8+fT+L5zAs-oMd%m{tG)W%}4 zp`6q-YXV%}o}E?6U7xmMGTD^(qB|tC)78B(9Ga~Dy0QOBrNfA(S#N52ZEUvjVs^!U zkXA_yg4E$!U|9Q*5folDm|kL{W$wLci#1W}?^rNP-k&EJC(7YC_R;#DOa_|;!Hk!; z)2q=^WkN*5#^Of<$qlG`!RLfNrGd}&rvv7c+Tn8nf-knIm{gTSSz~Oa1k1T^|LlXS zn;X@^VbS(_n@)MR>^ZA0CAjvVbu7pTz4AJXTl>s=Ug++&u`Jta0iqkydQEu2y-8~H zBzUfcM0hVzHGdfzdp}cCcRy1HSU!`G@GyyAb~V@F!DPD?zfXjBvCW@z#4S=W~BgrdI&H(F=XOJg07#)TKS% z6XZL$hzqkOc7(hRZV)gQbtaf+A7L}l?7Tx#t^F7meaxL0hBkGs&(}co&1z;-^!&W!HdHc6W^f>Ipi!meW&odT zPIBYg2)RFhEE>su9qGHVp~}ZyO0Bdz+giYWDv(8(ma3^?olf?)pFG@0pK(DthF}|O z5`7eu<0*cA(tLZ-!>p?Z`^-Nhj3x~x$+ImUV3!0ghphOE>2+DZFxmJ5Yv5vt1p@5 z(#n$=)?x9i`kH4pbhbl^KA(35L%L#tFQwAQh%s(>M$PhJ6=F=|{E%~Z9i7y5GispR__&($P5$Ki%XHgqTNE+m5y1 zyr%KSuHI`!L|k@&#*2)^1)xdP0*6_ygs-$!xymcA7nyWY=VPH>ySQxfs_F&VoOhWF z=`^C=1?=;!C@A$Nieu%gNn%9mY9|>E@i%AIW1ZI3kZ0U|n87$ueb=EL6~fQ&n`s_B zoCvCBy(%z?ZXJ||ZAN4cf1FC3I2R+`V%5EN_*KQN;pb+INao33-nfcFJXN32%bV2s zVVN02KcNVaXqSGgm4Bep2$3&URll^2PqmWde?l|ATx+5P=e7a*q!5b|? z$pM;20SFCpubvcrSM7P5$jv7lcP4WHAmn3`kjR?O{HZ2k$DdTjCu4II#(gYm;0`zia3&O+McaiISD08blyr#Ye`Oja4rmN} zjaY>Fu+K7DWvo=;hJ{qp_DA=*x6z&&%}m3SRPh@UHDd8!N%A)h4~XbUxtbZaaD+G!bCC zsR52#b7lE$|(9KPuHhYXo#mefg|DSyLtq*EkBg z`qH^MS@0-B{dm=@LKPJC7j;|DH|lra7|FUdZ@tFU1}b86Q$wtbwQy-OX_{Q!a~nqR zXg|?tL^Ti{yMR}hKr9V07$ho?6&t;2un}pw{_-TppXh{WNVfOcvrEoH7n)&E|9Uix zp)KeO&#OCgY;Wgb6V*QazX>qfA|FKQhso8gw=&*(_bdHgbpKjg*$A}KSWZ?pCs+UBMsNPXFo`@c3AD;Ey9*YK=rn9@k zps%=nD;QG`nQo+dT+Wic#MO>TC_&scPwc6k6V2@39mw|1y}{}IVc!d+ap{#SP_c8dQ;@%Db{i>tn7a%% z(-FJSJ9mTR*aig*dbGS>`EUj*+%N$}VREv8gk=_14ixSeb#2~MUH1PHF~O*_Pl&Ud=}(hPfWVQ^rux+w) zo=zD9+iSZNpEga)Jub-$y=bu6;pCR0@z!KTA3Vty?cnddV$N5h*PN4nYV(!sQEoC~ z_`6%*QECCpMlUnaJ6Gs?CY{A=yx$YSl@>IUO zh+lt4r|xc_Dpa8ER0GvUCYfy+jP4QZdJFUO$xhInYuLk`bp71T?V^M5K(Uq&`vW6? zRspWj)7H!pS4(0%sEsE{Qcs6uFW!55l%HzvTK^rNJFUp`>g|159eMqe$@xygwh;3 zTSS}XsqTB3aj*_loaB7pHgY(&{^!I8TO(+OEk~ny_R0TImfxqPYEzoR1JmA(W*yp= zqR+cg&;x@r?cnC&r98EVH9cOj31KRCmxL{6h_XWduoF)=cKU-ld0NK%06WvoDOcvj zL~Y&8H2>-2d#epREa0Vr?Y1Nu5ZYxu0_xl-`(V5PGx1Ea9`>@W9{(Hnm63Q)m_S*p zJGgP3SqnlN@f-Fpc%Dcv3*ta$Lp(@{F~dJ8^`;%LB3pJ!)5*X7rAkt0+|@R8ZW&F1 z>5D5&>Z30_MI))b8YYI6V2 zIRBq<{yW$GU*GtD#tCPqP2Ay$kS609|Ya{9+2ESz;MxBx*GqA zC+Y#heW&%g6o(dAzqL>_UB6xad8FE78JF*sm<-Pat99axUPp8-?LS~@57ktIjMSLv z1xM;a3*V1Hz5Tu45cd@Lc`|3z|su)|IZ zh+JU^%*wcH(R8&nG4a)5Yss4lVxJ3S^td1L{sQaN?li0Pz9qdZ)Z(8pO{v8P{m$*D zj^a)B;x7ABWG{}2TLdH>_yNA2R36ms&H+(vRM(eGZqDN!cDOBr-w-y3nN0ozlv6Zz zJ@}1df_g1)FR2<0@0h{i>}jBYtEc=JpF-JimwI_Oi8%oF&iN{7c=92-yn z_SP?!qbL82(%;nuj`!wITloo(N>hiLPtS8EGVcR^AEf-GA4o(E2Kk&{{81waE{@A= z;`D0(yv<7;aqFDF0r%hT?n_17<+M(vi1rq?#|I#g8M-;-18^C~0>V_%JN)xJC%JWS zb`%y0BFYoR0+#CE^no8j{rznv@PWzspS3vu zdrNnySRTyfs`4^!p>4qEby2r-`~psQ{(m3} z8XC|X6V2bke4)Q!G|Bcwfl&qxW2KKwfNJNRh2#ZOM``Bg6X%SLsJ1Kei^*M`oifE$BhJpylJ6B9U?on(a{jZE zHX;9I3hKtcO+m2pW(YD!u{E8%P%M zNB_4(`8Szb_RjB|&gH5GH|A|+O>ux-mMvskMOX_u)~V?30X&NBjWQrX_2C>AQFrbZ z4wuipuCFd85o{~sWsHeQ|Ez^QaAu|4{L+n|@wQU8fQmw^_?SM`%;O)Yfr%h9n8U;Udm|KE@% z$M`!l3wLmh0y~u}lu(;D_(`Y7o8Mngc%GI3+vcr z@iU^cH%kLVfm*06*9OU#E~_E3i09j@4EFK^!fxIFglq*)U);k^734L;?0? zb%2PGod@ID zh~*xSOi{IXf*w0@0}U$T-hx8%Bf#P)SXKY{58~*6Z$47#uPX@_hd~l8nG7rrHekN% zl6!wA(T(<+Ir*k?vHE(-{q3ajw=(`M+409iv&%5pDVy2dA?_>!+br=FFeH=AqmN4d zW163R`P*ThxIPv_!nr`@Dfyax!2-5Oj__XW=lr(pUnaKM6c|DcGPkP+Wg_b?G&0GN zWYdd>hx`v0%wLz|LTWLz&?r7_2q_zPQ6K-}R=wWkXJ9#N6mNa$24+(Hhd?0U8~uB; z0C}Lw1Z?U{AyUpha_`&fh%MS%Uuv&tgmtu}=!af^+VtjUT-mNISd^({aC~h#MEVwW znp@QBa-TbPe*@RSSFiP|sr90y(iPTSabyp5!SlBu^@rTP)c+F`?EZ=g0bEx+VT&}_&n~AqYi-GN9GT<2~UfMD-BKjXZU@K9on1P9`_L1k05SUd)tER zGcG{?_Ypw)$VV5(CFKsT-HrdSii*EiaSrii39Mcl<?E`cWnL!2o$<#x?Q73 z^2+>-;5(X=X@c*_vYTsgv5}EdtdS?vRjcj2L!Ez~9yW^+3Q-u=7Xc0~`c>4(l?aJ{ zYtZN$=p$bIEz15FzUsX>cM-SkUe>IITNIE4pC{4=D{ItFI86} zXC0re@S^raXntU=r1ain`tZ-Gf!-%@K!YpR8;X5f$&j5$K3e(-I5X0DQ?!oRxGVDX zi%ULpw#5E1kbBFK6spyJ60bMaAWP^Nsp)<$(th6Y;Qi@zyFA9Vr9H-u1v@1`d+nbk z|6j_)`0i4_i`t{XXr#FmvaPPCGXvb){M-GCEGDvrkKhh&S z#T{dNO$2aQEz~4-ianj|awYe48>kb+05JY%vLoB@w_?~zVJN{62UDLNpF;%(m>o|Y zmp3TR6a5>^uPTx_u32Ney-W&pQmEfaHgAUD+?`l+TQC2%zr5cD~NN+@u3GRe3cQFle|$$ zwLN}cAY{?8=j3)*zH^_oU^&9<-v^r!B3lX37s^DZwqL16%W02t+3LU71Zy6Q>%Y~8 z3!Evg%U%>4WZODjL%i9Wxm?F~fQ{8}%>@Tlu*V`)#KKMJwD_zCJQ_t7!{1ft+L+bS zD7(FHgK*f(m>e|NeWE>m6E?)K)A6Ppl;+1SW<_Vk{Lt&n-l+E>oY=G^8N_w zlnahBJpVbts2YQ=)8avw(WgP5(TCU%&V1G*7%8d+4{s}n=8|N#%K=wFf2om`W~BFy zofGcYiku%9oO_`NOWz0b+Y#?n+*-p-e(|aKe&F+Y=*E<8zrzmwfTT?4xdO{;gN6$- zMJ0m$3Ut5th}y)M1G)mC>2Xw=qD^cZ@wo!L@FTNs z7hrM?cBh-HhaVZCpopZp0(vi>E;+eb zN=e#O=^*40jSl#tMrBv4zK|uQrZ`@ciU8O?=Cq2IqaWLYlS^bEXdYY6TCE|tdD*cJ z8*;0|dYPkK6E~tWPjNErD*I}Q)&Z`)&}8uMd}5G+OcO+)wWzzv-ZEe_oHwYm<^;94 z6i`b?w!wqyxf`1=mb6er4U~e-jaa|P{aNsgPgqy^J66Xk7iBMIHAA4X>gIu12!9UqS%&MhU?SUYV7S2SF8qu7I1{y z8|GaHb_6zHu`Blbp{Y7~g_02fB`dmZLX)SgGBhkwq`WNedhd5)bbtN|W7Yc5!dA+s zeZ+v|%*#-Bl;tD{@Jh>GWYNZaqB$x9PZ-bwWvp3qhGG-O!;Qf7P zH~hvqiyRwmw_m&|lTyb(+!g@1DprRkX8e^v&3pNBUKqoHQOG&_1>oT8P)d8%@YGq6 z5}b70Fr$qih-9O-l5cR;iuie;wP(^#2>)bzZ?KdI@IjFtO)V2S3Ot%UMEI*5Ex6_` zMV}vv`=bW^7$o4+6>+r<0o6vgHJ1Zn`fh&S8q@#%P5LVIBpz>MxyA%h=N;8t$E|rE zYpH(&o*^V9rIcJRHF{>x!gv73tVCB?)tlB1+AiNVDFw;#60((sNI}gt)*rK+W(u%Y>g@Jy2$Bdc%|F$-@^qwp zG>ONtmnK#N@>XT_7js}a$a!HbMp-{C*6^(~&{T_zC(63;*&|c}n*zB2V8V-5ed4K9 zvP+5^-s-H}u3X`!xiOcAw{FD2jTzJt$d2g!i6IP${RJVa0kgPh+ReE@;?3EN(Ww+6 zOQkIs{g6e~r!1Z#)l@w#4NPsRIg`MdkDkioMS@D`E@z6khha?C?%-duY)pP+jSn2@ zLQr?-GhUYYfT(l3n?qBQG|S;!5-j^r=LNSyU0#%eLU%a-NH>()d=#rlbSWVq+hC%$ zUaE_LF(+A5&7V7DYNI|X2S=gf4m@va-|~KGH^t3V!f+tpGpMk1%fQQY(=YA{XEX@R2ZeJmt3h-0n{ z_fjX-&M z$8WQbTA+jy=$!JK$_u1&2CoMxR<1{>0(dycQ7q%07)``{3Ym`;^ZAnAndGX}5tmULm;sb3bt_G%Nn$ z$YB*RdxQF{LfvAo`#fs=|1~50dy-L3}?&<|_z$>G6`b zX&M<3=CVnqp<*|gkqvQ@WAI>1pQR+C?ZEo`wCt#N*Qj=4>Yeq(xT(fu#(YWobFgQx z(LS{lG!yrP%IHO9&j1$t_CcgC?`VbIBbYB(G8IZ?e5d_+kH*_rnRd%4UB0&%GoS4| zNbsUP2c7_s2moh5cSPP6U0UvJ&9DwD{3S)pj&A&gXTyCHvyTvGim4IcN^E8X1zNC@ z-#X>{c#%_lVC6{B=~_1L%YvC`f6~=88ISFO+{raQCpw6ve`nUQwcX+qJ2K}hZs@>1 z_GZCf_I@$W2~7PxDEufv_2PAz8-*j|=sC)}5A&FXDgS*)yqvg_b*y-z6riy>9<`#` zsKkQ0F8h0p)^aE{De0QMB8lB}E)j-N7>(Y`FmePV57~qFq-(>bTBb>6imNfF3Ex4- z8PCDfgw6fS#^zP}*g5m&xlMQ49ngCO-!Q>^dm`{=`Y5xZsmO?TH9ounb|UQ{$LYk( zQoA0`roCGD;o81fj`D3nrA;CA^aqsc;Ra;I^b0`J>;8}o1ktyaR4 zjCL)_y{^M;oBT>wHcwWYaQ8FIFoe_8T>BD%n=OgHG2T1#3m%UQh`tByxJ;d>Jefsb zQ|0x$jqCKapad!Oo$@FZy{ihsE0~_A1$5f!79&`e)&j2ZazC;Fepp(U0{pemg7eO> zekpd{K_sg+NTuS0VE30Nh6kP{2s9>~DdNe{yPMuUsq2GB)vjnr7Sjz>(Aw;mOf>4j zIjbJ8dA{vj_$}YTI3I*zExF#vMiqn2gXC&mYJK`E?4K8Xxk8m^#qw+n*&yM}_3_J- ziR3G?1uERnlj`qW7w{#|B5!Yyr6X!=T|M3zw>7+-Isp#xcc9?p$toBecKPF#B^taS zZ&%Iwk6aA)M|EXLAm*7=MmCj~F3Y7c)(+3C#C>q@RExNO^}@)aS%sM=UjhBhbk_)# zwHAx8-_A&DvGH$vm#BMqw0CIxx*{XB8_+G)o-Hcj5iYA9Mc7cwrWB z2RTzK(sTQKRo>?Pg$IzbG66#(0P+9*w$LF$RcH1WTs)R`L%$j)215BG zZ0E3)t0N&1n?v<|q$Sn6+$FzgS!0A%5lcK1-7~^~@Z_hD5r4-`_N!+u#;P+LM1#Zy z;aq+yTmFCq9p+y;@FRZ|C@^~&>o(>8ob~FL?@aP#c=6@_Lq!+R*vm}haL%4;XFc1$ zqj@NqJ#k=;ZjY2THy=KJ&dUOwpnqNG(JHcgd$B^0(3o~ivzFQ=<$y!en^~nN88P}{-l3IG1-Nb-q#gzZrw>^-!4&+P+uqH%la0`!&ii(? zJ-5da+;)$MFY;kq*9C53Gk-oa)FH116)5>k&mCT28 zofRPxW!57YEChKz9Oc*-{uNL6z7`{F5+9}0tUw|BG>Jw+t01~ave`a&zQdtDk)Fj?=(_|k53#%S($zjCguVUjNgJso2W*wyx_cr0jp?4!+uZq1rh z2wqVYehXQ>&0A9H)R82#+L><|GDB@E%{B4YS7kOMI70|F85p8l2ZnX(j~R4saXu5h zHzN{AoSO9H|M_yfE4%gPMc|1wNq3vL1Tm28j)R{z8JUW&(F;I^yjirHA_xl_18U;OQE zqJcsX$JdTyr!hJ7TnkWE5F&mV2Kf80-xI`-zT9so=%o2gbJ)#;JGNqk9#lh=T@-Gwi1>p zx*x5ocOI*Y^^k*VUVglJ@4Hxw*pBn9Z)HnBV20!bYyJ!EMMjaX(rJTecU{ID9>EXy zleA^i8Div%@kJxj?YnAQB@aF3s)C^hkHs(klC4#0_u3lR_b}$WQ`0ls(dD+?GTPiz5*;l!1hA1@D(630^VhFqYNJTfd zHzGgo*$5Ykd7mOlC(i_D01T)cj2g?!b>6LA&c0F+>*2bD-SRVQ$Mfox?# zI+0wO!I%>Tj6# z#Xr$>L=|HaR{Ej(ejLYbF+jQdxcnt)_sCJquGQP7F&XnD#U+CBG?z_YnS<63@lhv) z&2hI52kI<)OLfn4C5@!CF=B4#Xm5-&gC5Kcm4Ja1#*MYu#?gr11UcPhTDuRS^)6Tgf zK+MkbZfqx>FSK3cgooXUGjsl@HuUl{Vh`>pf(Ah!v0tQ~nW5rDI8yW|uCz_fR*p1d zsLbw_Uhk;MbTjXOY!^9RhCORuIh~Z~p3Ly@ek+8www&e-`1=GTK-U9*|0%(l+6?Zd zw^!tC7AYE8-k!d1rRO3cq=nKA>0FeHA#q_puaHFOL^7VD$_wvrmI#`Zy`enBh*O_S z@=iPJ)0ZeP;Y^L{Z#cAiH!YykP9d*hQ#R?>-ef#z*0n}Ib%0LbFZV^ZpzzlM(c8=` zWW2gNhcHeTTkKn&^*MF?LXz174a~U#Iw2T=`c@#jn3u7B0CAQ!!9StA>0`0J#8>&Y89AbF9x+Nz zgpylC#+D<%PykY>fcbR-C-w-#0OALE%NeY8@p5PDT48eV=MH5cLiOyp@7ISd^GOfE)4U^&vfki=&8u1*M+>c{;h%+x*OTncOz zt>(>*#ekv7F>F5f9}T3cN`>v=V<|YT==Oxa;U0w5jl5mWaB$uZtZb)*G?7qJlT}nw zv$Z@

n}GM~~wjFkcDQQl~}a^Kv3sInFI?RDQM>nXa{lW&uxwRA=2K3CN@D8C!DA z)RG5|Fwb#f2m1jcfg+vNvyk-M5F$VM(M3W{okIz4V=;N;?kDfWZq9s5TW1k;m+6nI zp0A@QRPu{6v8xsJErde*L|z*q=${yvr^wXF??1bkHDHiTTbv1olrBGt)J1}^y8N@aY^1S z*3KFgpLl;7Nx+7ZM9#tEU|z%I+>?o$hC)V34EJZKw-SSmrboS|(ZwOCHG@;B0jBHD zx&5g*J$Xqz#E!?7EGJy!a+P0Z#61|=@@hKg%zk)O(?4{y&&y5Hz~O1|!%M%gt;&6f zy|jGzFgra0{xHoWMY*RZrkkv?2L)}Lyj#ddgI==xEsfy1yO{bwGH97c0^XFSVMP(K z7BDuDVQ#nrg>-m^9wUR6N1|XFC7mB#?R-O3w>59I#sfEX%S%nA4ZW>1#M~yGsrRaA zE^$mQbdy4Nc#%py?RxmgK-^4BhLdZ5q!*K)WU;j>J6wCJC4tDH!itJ6D1N5iq4-J9 zvQ{|b9q1!cWMlqa`t!8v+TrRfmE)#4 z+{2ZC0c6@jhQxIkx|FnVrUwAF*I|ax6?@}BY?kmH)=O1>tfjR$DW!hJ3209i9-Lco zOC5cxn|Lh9J z5#G)reBIbYnNVdU2`d|Oq{~Y*tRmTByE&Cio$9Tun#FjVbMl+S3!a*0bE_Is7m1@z zrj9vpJuIw*58(#Hh&gy#{KPD*)QPQ9`T<}wI_Uv|CF+WPJ*^vNtc z0G^Vt=mj&UAIScK5(&1)j-N9bE#fpObG8zwRr59|p(=}s$JK(YKedfPN@^a|Z|%jKnmq)C!bG8(Kp_;ZK+@7J*n;st$T=7O>+M5(hP z(N3m}*V05Kb@1AEeBrN<2&_aJIa(<=B_1gb(N1s8Yx8T$Sj2O!T0Z1jJ>9w>SoZ_m z6`zxW+e`VZub`=Yd#cE9{Qv~Z%oaXIlQe19zGU67MT%9owOSnKfUZ#O2$j;Mz1n`N z{p9`+Wy$jHRr&WPC588Og&)g-TbB0|EVq&RaoE*+P5G8a4vK~a!$*J^oUMrWxiK~eQQwX+1nLbsbrIdFdSU^R-=g6 z#XTW)dz=#>K^A@{&*?LU(WWBJjC%te{jVRYIs{S(DHWaw$8g}*P1}hy>slLoLg!;$ zRt(Fp_)I7I{4F#^$amCQJ2rZcu9AJx6`qx`&-qWC=_@L6J&HB#OOw9s-Ph{KlnaGl zku>G^k(H>Op*tSvh^xfrK`3Cobw(lwrcps+t$b$^8c}|0dZ$A-cxA=3iA*-7g{cAI zn0-eZzm*`;(Czx=_Po?=y_1@wY42-B4Dr zAloF)#Xt{V8w}z?OPrXYegK56JyAt=7;S8F+8e#aJ|~yl0U`CLB-$2OZ_Pd&WY6n? zoY&wbG|iLx^7QeZC)75UxU0v0mj@8D>E->q<$R21MZ!^w$b9`nL=C+fxh(7D)4@p zCViED$KCx}oJCcO75gjNsU-*2_c$NU!o0d4micRu+P;>Y%yl@o(U!35 zDCO>r`6a1yt~K{}8h4M9ArDz5MTi%O`l^W_AjNkiT)P=x44WM6r(_sS*6Bhjs&+3M zTRTlQlJ4croB3@fCA6_8I>{|Un{ z$d2N%E?B~!JV$nLRbDNskg1`nHMCHcW-=F!V?PgGywv&?_H+b^cFMUrm2CE5Y1@kmrmb)@-FvU7@eJO7fe)3_j2eN-ovgEW!Tht0Cw!6uk zw;G@?pQY1N!t;YFM%yoKi5^%k$omnkN?#+XTjCX`^`k39=^RCii6%LPJg~Q7 z5ZHAVAqdhqyM-}PijMHfNcn+!ptWNmE^S8` zak**;2BV608h49gQd2_>$fHK@`8-UQP{yhFmO}c>=hhh|x)=MR{U)2(({5Z*YAxzT z6yWbio44SuYmtc_XtXU+v*e_5k2V@b=TlPO)0OX1PO4@n+Q)|I7R|+<+fS%g;L6MP z;R{9ihQZVyZR3Z$X)V|Goa@*&SE94H@{C^z0)(6s06YkO3Cb9H*WA*o=ibxa$UQzB z%L~cMmZ|_VHLxVW{A&%!N&*Q??sN-|`I!PEy%JrOa!TIPOZY&F|KDGB)u&)_8hJg6 z9*OUM7&3*~&r?b|BrQ+Lr=xv4cYQ}y;63j2Zcb@n za7ctHJ-KpK^``%bKWUAL(v19g;hMBra2)bF+Z@jNd({5Sl*JwMG=YYU^V43xEV`qs z<9k)sU)|P7_i`6$RPReVhKW`k(hP0t)jHq^(4=-j7>52Pqo2L9yin74y|eog3K*)w z07HsUm5mPok;Rq*(vef2d`BLfU)!!B-*BuWzg^u8T)m*#XIFrJ0>TeLu$i+WmAQ8> z2^Mzk&y`JuPSc%W)b}iGo+gH>T+2@N=W|d5N>XSej9PmtM>W(iU*rob^H!OCM95OM zCs|ewS4e5_PC`FTxEMT_KVrPHhH2fpDHWEL-h7qW&%Vqa$eVA;1}TF+^wC)j@&ELm zy={+J=6}}UpA5lDjlL}wj@sMMlb$47E@m`yQzTpm5txUBgtk|`^$hB;V9CqK-@rMS zZ7*7@K~kY1k|uGCr#qF2DvxO|f0$6QRttdMGhR|W)^}F)AVjjPw|c$ML(_-5;%p<-}XKs*(E|z z^`>7k+~6$lmJinH2K>mBaBH)Q#h<*rs5>0Jy?;^3JeNaKVw;qFw5zev{{a3ZW4tPU z()lPJl9!G-Ykz)GV9qn`hRzp1Mczm(P<_`=t?t+HNxw|siiLZRKEKok$bPU za?H4p&L5PYPBWXQn0N60MNs_3RM^t~2297>P+R2ckNFKTXhXxCpT=iZA$Q##{ zF_<8rU;eJf<%wVx5E9N(8-d=LQNq|v4EQrUE;&C<9W_XVVT9!BbEL-;G1aRWK#ZX+ zBOmER_baiBVZxfuNqg3@Uy@<;y~{OU9sf_Gb)+%MS?ICRNmGcnMR=Pq$nQWU~y*WxF5u4B6pA4k%x`;UzJ9e0L( zP!StEZtF}bvpUiNs1RLJ;oJ)#wdo71W*F1|NX6b8A%&{c?8d&4p>z?!Gn?QVqwOxs z&qwtrws*g-+@Ss8%Z%6-wkV7`Jp{z#oQLN7(XTiJj%A}4V| zPRb(rwv{uT{FtnHodY^|TEBgmIEqBZ*RRXe zW{c`LGZjYOF-ue*zaNiM^%Y;$exy$s>vd~IN0)GGeSfLydP^&|&X35fBtOWsHgwVx z*zAdd^azeh9inL%Q6fc1WJq%zLX=k2lOw9fwa7IcPQ4?|-CF^wbG3<9)M1F@eo5p| zajSLx;WV$b-=Ax%oU4T|!sT*ZtS?WK$mCt$;b4No((3g6_4rw&G*+#?AEso9%+e?}P6+i@pd=_fn#h!NDgMo%ed z?9s)wwFQ9X^WFBFP-z%ZRCpWp_gcqE0?_G$i=#5TIi5#s*v+`x1!|yP!RmvrQxo=# zcXd|L~Cn%qjA@ji0$_N$_maA&|~Y z?i{n40HaT;_xQCXD*X81+j|m^WR-ysUW2I;?zSMr7Zv0ZR(YcOI%9K|RLvuUXHtB? zsI~?F`6Il;rbKTti%u&QQ3bC7hTZ--yE8wf%O2gP>5WV`wK_@OoUHXn?G--J-pXh0 z6$z?txkrB%CBxloC9@VSybxPXc~iAV&)ic&6{grq_%Tw zXvX28z=YSDU#)U5hYsZo_+^@d0|l`u*7V$+wW!{>%oGtuy| zguvzG&YXQciqNQ(7SLSgklw*G{?{x3O?9fbM%A$E1W^JW9CT)U=W??ip>Nt;@y!W5 zFdWbEaOx(=GBWyAN_ejmmYXE{cE1uJoo?>hg*25{9J*uRPTL#f$!6M&FUTP@xT?&V z@Ep;2q8AmO^uDrr`X*%k=+PHaml%!LMMrvO<++p^h@{o1b~?3}fV$_5yti5MXdr0R z%w1^1Ef23bM+tK=c0(U8RNXY%-r(O~Xe0Z-6z%UnFi<{KlN?^ewWy+|$mM?|uW`FO z?Ncv4pJ(`q_}BxgH#)OkdvcU}!>Bm_u)Kppb7HyNc`Y`lnK-N-jlO&pDtxse=RudSbi_@EmT_E}Xe5>-_bbZsSi@O*r^ao=%)XgwK}o>D)uLmgNTPCuL&6A7p-8J2fFc!VVrH{6y|!=#Pt zC2i2l@dH#>Fh5V)AH|?rZeBu>A*Pt#@tM{4hGd!Blo0vgZM36>6(RIKy_Put)I`e7 zJ&gMwqS|lLry*#wjUHpC!M^aqGbxd`^V3mEt<9vu-|*6w_U$CSxYrZFw2X*1O*>F+ zoo>>w)ZDFGf9Xn6TJ!W(S@X24`Fg&8WidwiaoYWYbJI(^J15fyKWH0IYNvV7yBl*n z@zbfS6<;u;!-Q_E8Yq;~50IK9GTBKc`CUaNlI2IrBXgj~vpb>N$ZAV)KI)I~_d$_& zGuWxO-(#EkDA(#xLyfJwi^&z!K8|yNdiBCn9RCk{Zy8kAmPL&Q0tpZZ?v~*0?!n#N zCCEX8yC=8=cXxMpcXyYAySu!T+qZAueqXx3zpv_b6-Dhj1^ev1)?9OrIp$b%abT37 zn2_3<+ZPO;RJt9oV)S z*mpd#kmoPPlj_7W;$%+S&^tKr{2i2iP+3b~sv*nF#8rAalSmd|Co>tf8~^oejYWPeNj7E15!Yrmj{$ zI4>wdGfV{nCG;yQ)J_6wYl9bG)x(Gun5PUbMWiZ(xbL<5hAwbowCkL)W9==FJ0pegizX)cc z5F^zn;!uDNoq}3O+M`)W>PKKF-rsADv$GQ_k-ceMS@~4UC*^M3w@litrcmhoh(~xE zCCQ&6@?GGSr&zsYzP^&6d||%QE;~C}@s&Lx+$S#d8wQ2^$seV+yQabK{JbCh<3a^5 zKXIe6d3EB;n2;(p_2Ek?a$pb2V++0>&}GXO4YFUr8IckspMfDvLk_IBib>usoM z3=69*3MrWrCc?ST-?AVth>KXS_jzHW6dm*Wk%PMgn?rqu&Udz?Q(sraJ;Wxy(*_;{ z(duvvxnp4UW6c+2;YA~4Y$+}ZDHbAG)u+&+0|0C}&jhg^h-6BdJA3dsLt18)V>gb; zoX#J2nUa{>qR9iD0)>Za1b|UHk3!73s;TFJUwKfP2{NKz?Lo%X1`mrSsBw1Su7rel zd%R?S)5-p14@73On=J>&!Dtg75O-?n2+l`2)Y40CzLjeBID2k)XH7uny*)JZgMqDaAE&OUH>cKJOio40Ns90_l#=+ewfnf-NQU7 z?5p`#3}rXA7O>mZ$))z?wYZl87z#h^*#ZH&KK%uJH9!vL+ zAE9O1$L~ouO7b)yQkmD8c-+pdFz#xRYIA3lrWh8Xv=I%^!LJ53w~@s)^49D!0v?#a*iVn8%~ zNq_Yhi3)do634>;Op;~2v<6+X)QErbmVXhLKlx&zWzPl~D%}8l9h5rd{&-A@J}MIg zak?*3v{{y#Qts_4@Kn>cqA=VXbm_fadurD_ErWS|v%m%|g@_whmwJ-0Mv)Fm7ax`I zLfniTGAgBNh@HU~4Mt+>jFi2YH~{R7Km8m8Z4%_dPeiD~FD$_;em&nxEJ-?pR<%GO z(^?=o&oas1FNH+?bafgr3J>62iL~8d)4@dbl2+MYGRGo}tp@3x46P0I`MaXY96U0< zr8fn^C)~w4uwdhm9{=1P%U~}PAm!=zL%qFvf4QvU_nfn>!2_`C!oDPV(U!hdEXB-wtfDbQHk`ok+W5l)Udn7iYa-#;P zeXUUTgelf8qEKwC`*`()sb2ftm%VR8*8x%alLk$!x)0@gce030)#?Jo?f_bD_ktE~ ztiIM`tSq0W9_@pO6jcs00uowAj|EeB?ErX}caZre58WrOnG2!n7WCCcXFRQ_jJY(> z6@9Ap#?q%hc>l%4{6)%q(trR7Ek0LS~{TuFAmZ4$H*S>ij%8oodS7d;i9g5 zNf1XpbQJ*g#$e^aE>{kp9$DN8A6XK>Ykg8Ax+A!n+^g=6l%2jM3LZ1VIUskaIk@LK zQjcvtyxs^v8Yb^mR6nSKy6_bU8d)T+(IX@nt-PhQn_9;WWP5VJGJT(kUc)JOB?dLY z$>vdCbvCA1AKNVQx3bqig>xHFvS%q_6;GBl7ZI?TFoKgbn}%4wY)RNMp1 zIcm6%LSAizKEM_w6pI|=aC#!$O2BmMjJ=}OhDKSUIdl~DJfyN6utylgEkJ9l=Oykj zyT5YQlgVhRAoe_HKlACnvK_Z7zxWCI0_;b1rjcHER^|SHv*%X6m?hRnH+utIDH)!2 zO^_Y};*3dTbitoc#Bg7|5 zX&W~dA7ClKhj?AHPK*0fR~E$#&S&#$!7@n4rASC;cbmwCxl#h;Xj`#c4f~B5f~bhK zHbSy%ci}3lW3PIRkD#V`RKN%K`#Vs*uq}fSjq+B+ ztsHE{lp=U2Vp?ud#Ky&Z$tJ{IT0ZTEDq9)#K7{|xsn1fOx_~9u`?xI2L%l&dhY9?b zC8QVvz>yy%;7C~1rN+|5h1oM@J&)Aztx-rq0Hzr~1_}A7@YQK(_Rfs&glfl*ALH1$ zR{Z58lmzJfMB;t3NrmU}7c^{Z@(;tTZL2ajmkc?#O@R2jdWm*nnL*`YPXm_T7K=i3 zIufwssP`GCT^4d#n(1^ONY$D2G!~3$Fj~;(>jrYs$5M&&T~GVhajz3#qf>iTtYo^_ z5M3M~2AfGyiQsn1Kl_fiuF}I6mw51Ya(`H@HPbnwlJgUOEp+Wp@T=6sdof}c7F%qs z$*p#O393oD?e+4s>qud4aUH!MA_Qen&B|`PaHwC$Dy?NgJ2thx^uF*!y_4x0PnGHl zFq`OpvrCK+;0q?;z=C8;@UXW;v*|aCX^X;79+A9wVXL8Qs|R%_70n zMUKeT<9>-_y9$&72-Jr61!d_7j%dLv?R$M;#`7jA_2aDWFIB7bUj~)x`m4A%)K!{s zfrqfr#%QMrP)^c#eHBx9P9C>!)Qtu+D?oB=L~Flk!(3tKZ~J1rq0>68Q$8-|xNbdCjA^p8J|rv*J^o9m{7>IYgb?-yvh153%qTaEHe?@qBk=@Ub197y z<|TG3(W6t$#N(QiDfWZME%>N5n{oWT5^-5~yBXdu72@7@L0jGz3GaH&W3P>Uv-0ek(!q*mX`9`Sz-OZs{Yv1YO4Zz5zV@vqS+&G|JN(bzt5EJD3Xvu?9TNBUF*kH6FQPI|7|DGY|s#{FR;Mdb4n>1DSZfT)e3lb-&~fW zk0A4JwXA>QrwcF8cyW2*{P;}fBm}0<)YMPiAyGvSl0hkkNb$vWvs9|gm5_muaS$4L z&|71L^c+^rZ%&vVH#kG+-UNxTUDrY#AL3oVL6aQ26XN9JtQ=B}#i6M&pN_x3zoCLF zK8LDi3g-Id51Pgd<)bINaXh7MM;GdUivtb$>LY^N!)|E^2;D^jtAyn&=OW()aM zRaLNr(cURQiy*@}9-qWOi}m$E|7mkVrtUejG()5F54qvq0lSUOZ>{kkA36(o`KYXZ z8{H>EX|@rk8$7<0KMShd58Dg#iJOxO_vC$N7Ul_rN;914-+`_JEe1d18qCArKH8tp z==IB%ie4wv z)EdZD2jTn+`Z*tdDlz@|P3-+MO=`CQrAF*sSt!jg@o8A@v(URVPB8IGCv>58g=nRY5?z2(qGZ}MI?n?gzjYY@xc9P5dV1MmZ)6WpTbv9aVoD1(I$e+QCkgT214N^Yrec-o?W1%E%!O)rKMh=!&7F@$*-}|M> z^*1Qas|aC25C3?++@xm&l7iK$}o=%Y2vX_4G=hb%|E9DmGg$H=oxYQ ztKhO8$J$TkBgt@cFEGc{G?$9uQ=RExIz{8Hohaj=lb3xa-WuyQaY#xM4kygK&6%z= zPtLPS4A!1vIiOy;#`d>_16EE*C?_WOWWM;86G<&bN3ctd+~ZU-xYQoxT6C2d{rM2ham`Pb<70zt3RM_*tCEDR=g{AHuZeL5f6M<`6FuT73#S5hds$|9xE6`3T zJD^Mz7jCdmRGcFT==P3+sTF=jU8$~NfItMHz8;?TI?;=|u|nx=I$kf|Q|O9HC5;$H z`*S_W#KHV}ej6N|h1%f`8=ZS7c>6&5&9p0ahYZ5?Ty%rM2A%SYJ(pt63i>`d9d#y^ zs*BL^LVaX0EhlGhPZ|cnvx8ER?e6roJMudPes9)Jv6dX}us3tzfEGGl=Z-AV`Ybwde{D?n(u%VJc%^;B zZHwSP_TrPZ>nUU^j0M^ta2OWelFhcMKbWq6J|x?)wVf~5NMRaR;{7g8JG`=|^Z2{c zS!z;ri~^lg2fV(jGZ*7E09duO26rftR`C`b*duGlc z|GK%a+7DyAx=XJ&z-i~T_&DXxn$QD6o>|2Sw|#bR?lp_9jjp!O0#!5dkYb6Vnr^zKAU*7%#ifQOZ0~HWwscXyd`*d+{lASfcy1bZ5#L7EEPaz z)^$L^B9wvuEnnuBt2dRMO529GU&QsX^hS&EI)PMZ#PFR+Qs&Un2^!x^qZB&K9y7@? zo~@xiq8#s_bo`-8^b;+}o~eUy}hs4q1Gm*=A;cS4oz z8>ezaS`VJU^;o&=t#Yzlfldzzz*BHte;TEpKYQlUVj3r?Kq}JoH?t}xi2O1euAnpA zV_@678PjGTVi&y;tbHCE)9rPRJTV_9PLDJ}WR5!qAHVDg!-CO!7GCNmJ~)s-YcJ3& zZ>Kwef{1+tiJp@fKuR0nlqr}u5Kqg^(No77t3KtVohOCQHC0fW@huoCjCS8AtBvg) zJw2wdF#R>Xp)v!-L*dt$qQN2Fpp7a?IqW;|+T> zN)h;;<(33?sZ0b?F%*5iD|vJJB!KP8@c_ArW6J}P2%F_hY&_!u4{|Wnl4(R#;fNjt zE$|ZE*HR`7*|jCYLhZU?vlYb>=ow$Rv4l2IFTx z9-PjvISuq*Ng{909ihD__9c??xZuGMhmBp<3GGt8c|bGOv9nVEnA66mOkJ#Itrp-s z+uEQvh3+I4nz3wkG=h#j;%4jNKcoWo4}~gvR5FE5uMZc&1Gl@aT;~i9BrYq6$7_A7 zEpW@**-FuV%wgX$7X{1H8(ZtHG&xynpQ!mxVvp1ZNuQC(@JSy-ZHM3Bo${q<$xV1aU;lX%hj(-b-7d$Ybjt< zHjikWNhCHnBGlfPUQ@gr(><37QvMWS!$>SoqumC;sf)U+%G#0H7F}b~nKXMyixG|| zNBrqvQT4ToA+DQbAbz}wM?Q9d$K~}#1PzO@$FJD1Q+Rp>O8wQ_gXI zB^7BVITBE3J$vs&@H&=X#td?3VI+_P8?W!*FN#G+IN+=_e6%H{g%$1>b*bIeaql45 zn~!4iQ5zUD2&MVjo#B7vK64;U443XsC`gd_y)a97<*ouKYQzfEChyWS5yf(D9>DQFO zUK5NkOLc#0kx}u+KJogT7(tkWTU1wplP{#MVQ|sz1Jb6T-v`mw21JV%tH>l36Ravcj@XwJbRAY#u!Jg9@Q)R$-@ihj%yxz?Drs% zkF?!h%a{x5S6)P_zK`_FfZHR`g#z}sk@W?KT}+=oi?8MVax`D4gU0& zGR+0t*1n=$gFaUim8tEVgQ>oNr?jQCkv257B1%?2R-IBY<+u`m)@02?_4q+bu+FbH z0JjFdt05Z=hpGVbnk@Y%e|Uz_IzKKlGggL3ibUw6mDmWzlixl5<0P;kGt9fKZm7F( zF2g#7!(_airf}wGb%1xo6X^o5uui!f&Gm~c;!l1XC@s=xou}9(_d+?v8W%<{q&0a!xV}BZi+}E+R@urA+X)ui`3Pw zyZJ$H&m;XTaoX}g0XtjlW*U~&4hcuyq_Jrxnah;T|Q8t>tQ&hG# z^iHKkEB7))kPqh9Q21O{q+%{!VUm(F`i}Y3)HV=RQ=5*Yhof{z$ZOnOKfZ8|7z%2>|ygBV1saoSTDqLe( z+-Kn{zD;6rxG`vRotlr7tmf^0zUoAg_vOK@Ls%*;B8I6qo!Ug;@_QT^Px`5$E+w^r zTrT5l0wx+y%++J*;rvkA^u%(zFcKzNi;$zq?_Qq5J1R*vLp~oH#abx4>0v8@M|#dUE~M-> z7?aK33E#o91#60D6|KKAPVK9fk>>R(Y6Y=E2W56SrutWT_q>Omu%tFgDR_*vd~Bl| zz-Ak2hp~yqbZ@QC97*6=Vy$*r?EDe5q|+8hjoADp&O-6s8^Z&G&iRjb7Y)1at|0^E z@+^j?3>1Df?XHXjX(HqBMgMPV><_vIg$7)6&fc$${BIW1O33IC^zEurg^hpaqbl7X zPv!nJI^)@Vvc<#q;~$X-R9u*ix&~=9=LjUE;XUuPt|-DmJ{yeLAoP)N2Ed3hW*6BK zBPk-={zg%c*gflLh^ecNR-tEB^-TV~g2eF!gtBx(__4mmem9Azhb*$HJbl}%EY;<) z!t8MZxruRAocu%XE&Vl6fQ-gQ=Tf^x?llM_`9VLnd?7O&Sw{yViIg9T1z;ROQ==~) zBN&3T67$)?q{*$Pf0??k19gu_zd+@xa30oH``{?F_}7ZF5;Cv21*%bDvGtR-D)!B@ z`^rgG%q+i0$&hb~!|nJ$DkO=!x5Nvl^mSNomOkh7IM`?->#eaIS1l69_32UY&!N`! zA5n+r^X>Pm0Ro5#MnU~wDQ#T3tr~;H-`yq;1&$QLCtf3Fl+b+gIW$=iux$FMn(B9W z+SN>|@lfi9zi{19f&L#{(;EdWjx9_OYD5s3kaR=2YU}J0b(D4mI@x03c3_l38FAW{p==w{hU3`Tjd1JWt#<4m% ze2D!w)OX=d_~fnNN@txt_;ihP(O%!i?_N4E#z)EGbBdFFIzD)u)zZu%U6QQii4^5f zVFh;(814JBoG4cGec~s-5zmoE8kvy`-jSc;Hr+C`~j$TH;&AF4e@qIuSTf3&#neu?kOLUj%Ve%o6K!v!1x2G!GK#^`A|MGcaUEi zW_KpUGQ*zx? z(uq!q$Z78jE<%2vZrc5FKE1xbn0}AU7@E)R=vfjt&gTd_JoIRk&zlPU08j({cxw}? zPKE3~Zy{tsH<%#qo8!Xugn};FJjt_C=M7$`J{KdjWFFda6tAYS_k3vmdDveDowj2?`gLdFKdTv||Ez6$2J3-%&2L+-(cW#0R@Xi{mx z#IEjuM{ri6#R56Ain_N*sMgXAp~n(NdW)0PXdP4_d}-HO#<;j*DOt8>k@Wd^NB^Wx znyn`oaIGvH_Qx*Nge3{4Z zp0XH6n{Q=?7s)dv*%wsIBdv7k_eEBaX`*$G{4V2iPBniNF!DtA^D+iRn#}BcwmVVe zMNrzc$jT;W3iNs5sTt$8;tk$a21K|keJ!ikd(gH%cveV?)n7EDEMMU=m&8%8iH53^ zmXZX%(=cAxk#zqC9-QieCgE0qCMR$+>)P8?r(NrY#}s4yTOyJu9eKU6lo1aCR6#vM zF0q3KYsJvo6t%|!oin>|`!-ChT4i5i(t1Z9r?h#8Ly>A%iX~rp59#qIRn6^yCiY6{C%bX+J)zHLSZT>wov;-+6j=*v=Vg*tdOk zyA%6e{=BaJ2Uf89J5~rJWQY4J;rl->y!i!w0n%^GpnA|U*f8e~CzvH~_h~#&_W3m5HqvFf>{40&B zPxOgnd=jM8;s=q0acq%O|7v#ge`K^|Odx9P5LN}megBosB8k{w{oXkKc&Q(Lzp{=E z(|@-RY2V-b!e8%_K92Cqs5GVi%^?l|ehc0H@gWMIfwm=3A=+VySCqH`YQb2;^iCY`Xs)n*4;jOv#)Y{;$#e4?Wpm zkO2gi|L?X9f+T;7|2}aNIRCde`wL(EE13W9MgA+k`rnNF6I%ZN+O$UPx4P%nIyb|4 zdumsBH{FDVQ>C+=Bw-J>^MeQ`Mx4J8JILK;v7Fp3KBtAg0l2hOz`&O46#w6W49Z*cc*eFr6N=xgS7q zk%fJ~t90gd0>g{YgekW*!GiwD_kvs>?;R3?(cQh%Mqd5zu~$>5-4-u^`lOH?5S0N+ z{o>^RnP-9KJC-nb4sgV*I+gyU9_O$bek7k~K=$}{PE!f>IEl5VP{RsQX zq4{!~GC$3@aSG6UJMHdt14Ldu$A;};wQ}H3g)0j^SO_-*kDvW#FMx>w8Bl9YOEy=$ zk%zFpwIt#Pr>!{=u=PkCh2474EWZQC^ylenzSppRM{hCt-x zeag45Pk({_**gEHDs)aKlp5wEPe*$N{s!ykZ`&jAy31=;LyTyBt(qauX}E}-ppMjD z@4w}|TIt$!_*^0TpL_C}y@pZ+V(1|&WN?*I;m3U-tdRJ(=^lql?pq}1%^0yf0Y@Tw2jHAeGDgJ)OL!(VgJ?A4{JuP(I47F858}K@h{-j7z$JvmJ z^_W~UD(V$aO&CfG0jG(`Puc^sX4@V87+7cV+S9f}-7y>Gz#S)8)uax58Fk!{$F86@ zm_y>?IJX6;!rsDT{b2EPJV2!S>3CAe^oj6BJtXPQgyhtu>0C|tUx4XH3?GNGq-aNf zS39c1>rJ$d8D$U3ml|>U`YHgIWr1zMC11|7PngWJA1cofe#0fhu%!5S*{B7#mko%A zln6^02z1KU20Sl|@&2m%Y5W2ye-ws67u)@O-{X4BrM_SI8v^n?KW$Xn#J?;@?K$0Q zA-zFEsb`XtXqjzfhy%>&SgrV_%J7Ec-@a~zeww0q_i@Up_2Mb~jDyx^uH;?E;b~b( z7joS~pX#w25b?==(vj!z)h8w?O<+4Yo9`-MewvJo1#FxCwo1lY!tIUbNm>A0co1CL zy>KDu2AucQPN)zx3nZT4AKry@_f^Ih;>NAsuu2}Qjk)SwUgmkWeK+QfSVOBO%>CFa zW&5yc_O8|40_mlN6_q}2&3pIV3ZXSS zTaR(MIJPNUOp|zMGtJ({L1~M$cGerOMeHpOhc8OK`em$yo<_Vr+9CPTTtFMrvjS%@ z;jT9dj*e68q`EN6`azCT%k&z!~}>~UBGDU*1mqXVf@Mq%TDHW|K^-f<<~|u)Z(LP zh$GOB-V2+AnoF%`qWcG$n{*h_0=$i!KotT}%t?#~!e+PIgV|#5*Wu*vMWxOB=v7ZU zk3VRn#u@Mzxow>f5*YV5&H!!?KWf>?fupyv3&SC&7sP{n7SrORniKTYWywm4e zdYfheNIimfSRB#IwO15b4`Z@9M%fD0k`rjYVJTyS5^@t*c}ZB{Dnw#F*x3Z28{m@ZD5#A5X;C?>g-yInfY) z2>lrx|DIHL+P%V%S*g|mRqiY@!tGRZPq3-zs z%pG49kkXS0wvl={tLdV`q4R5V7PN;wazrcLy&v~?l?j*QA%PPV=YDs2>1ZN;mm0&4 zTk%inPUkHy4!9|4i(wU0Yn7+&Bl1J_ASFeD*Fz~KTeIVP7K_ozGL@pZSOS+3hXC#S z{uh(?7h9YjS)51{3=h3xL)+v|$8OdPXvW(bU9tG!f>}E9_;%*Fp`BI!<+hIwZr{d( z%%=*42#GC7HnHoIAX<&W49=r2%N1N%C1k{l0U{3HQS)*A*)eTw=yH%(T{z?ZUVYf(s&W9!7gBl82 zIVo)f55bW_qJ(WE1w zS8)#S7@AZFoa6Pt39sJA_gcJl9Fzu_Kua}J67VEMXz>OV`j4%u7l=3j@~Ae%uji|V zIcl|M9F!M&n%$OTxWHXM;xI@8T;jsXDlS9;s%I;o&7D0dd4@Owd={By-_9UTv4G*8 zHsfk7Iek!sB)#3NdS1iyP%iVA4y@}^;qUT!Yn3@AP$0~GpLDZ|E^2CozsR|UJMVqV z`-Gr_H!K9VkI$Sr(JJ6-_c*X(vNG&yQ3^!bb~xiCBx)6Ys|I`RP!}F%JA&N(nq+-; zSu%gKkC)s()H71I>_0J}^)+NL))6fYWN8{aW1T;^Fx$53P3M_Do~JaXKc-ZSHmX;p zLzeOHYaCgwc$Wzcj~~sNjN{-UH>UTkbPgLUCNGuA+E{Cpw7d5B(|JyVKgL<4yazSN zS(UQzzjPRE6dVe%54?i}+Hbm08AhOO6{@x9rP0dXc_ zk+sPNO%Z#Jfwc(NN9l<2}|uHwg}r&H<`|4UhM6;Ye9qkAjEcGW}c(P&D2(dyBOWh z%2tcVW9pAC^V!vyMS5pzAh}+)#euu}$=XqO>C{gGlf{cO#3xW4u3 zkzDyCPc{5;;2VW%hlD5mJ^!jeP_^rHeca+h%j&kso2f04H)~k#(RYMizDQ!6b3xfL zWMw}t@Jw5G<<1x|E@=OiOMXxFB5;~1@wUBG&&+L;4omB%-O=uTcj@-pbO4kY2z8&M z8SUDjwJpEhQo4uLfMnKEnqaD>+0=LEN?}j(ol*&^IFvD_(@irfg_-4DKnRp;H&hgg z!dN(8rrFUyZ4G84zM++PPOL;QFPe8TK{d#xxGMfw;Q7$(mKtjpD5by513~J9x_NbC zJ8%00Lo?KOmv{j=bE?|RLGub;x}o$EA~5k5vP@Ps8x9F7lFtyWd9rG){i4OQwuc5T zf#i$}mluT)=uvw5KJeU6X-Wa>;b24(aM$}`Oz-S1?~GYKqNXx((NU7s*m`?JR=rD& zDh~ov)N5iK+p3o9OX%y}y+OT=5>L4aqV`<1&MI47=aFnK6IJ((VOz&u+{?+4Aol@1 z*lF7tnSQkYkgGDuP@|^nh`M$E9n)J}cdG9;bOtz)^3+Q_v!{cc-&OLsym#vWhlzKq zjl4A&!NMyg0n$I5Y8+X)JQs`CzJl{CUF4J3J12LoV9Xo9&lcPOvtN>w-NoM4A2Sn$ zYXzLTInw4h61oFF?0EzeG(&!NLQP)w-tQ9D%i7M1dv2-_J-SgoR=4LL?P1V7*Gb7# zhBaztgS5CZlaV)l7MC|o5I95Ws@?=`Lh5`nT*4(EStbnY9S14Q4_V;B3ixJooflP; zdS{%8ltYl@`fM*XNb${5lS8B~ytxW(@wsH91DVyt7t14Oq2!NT8~`|Tk?$t~H*WzY zlGs2hOoOoSccr~4^sc>hObRDTMd}g7?$vf*-Z`XJiBGGbKkzW137mw^e7Jle?%X5@ zu2vsgb_^+kS}>c8XjFQg-BMbZ;de{iTp}$Sbn)&~%p62}+O?KUu9@g_VooCs8l&eK z!I>||v1K*d-wbyA;etUi@uj6X)A#!+sSRAol`UL@KAq)(pq=FgN~U<%?7oDq!+9XQ z!7^~Ag}~kpd)L9BXW7pDl0I#OgEvHsdsKhYP2kf-*sQh7#$mqXZDi%B!rBI*+-{eW zs_QU9$~2EAH4^<}-GIpsg4zt$>f_Rmba#-iUrG9;Ss}Un3{yr>chDM7JvH0H8dC^Pzt+4Li+tGl2B*?N&1t;Uw;%!G+F)0(* zPjipD4I-}CxA|Ib=2h7~D<*jF(4DV4iYpwV&DTI3kla}XqI1^`6}CVAZB@qekweh# z-`niyF*&u2Uu9r@qy>bM;H9P5wz36nP|^Uo@l#2d3d7(OG%^C8TVx;72+RTiv;_&phyQSj$d3OA9Ij7(Lkx6RzTDDu_BLNiWHSTRaxf?x|cK ziUOSQpLb$^<~i71A$Eg-wtjw6@}fX#rf4S`|1l_#&H$sWjMb$S*?T50m&(1MjwL{y zvOy zHjCpH`n1Kb@@5WA?Q!U?c}K5K0mh*O>&U${a2~V?uO}y9ommyV|6^&AU-p{C#mRUP z5f%XZHHb(enc~YC1zRI@N@T9H+BwZxd65zsI?XD#%2uR>ZQo#*MORn;_pemW9CBjs zNw#?=aq(1}-c+$9O+V_d)=68rGxxwr9f{RQNIDylB$jus$C0V)m7nbk* z>9&B)*|3rk(cYx)$qYeTU^l_GM`X2x*l4;By^*~b!I&RNGeaM?f#xLg@(ei7CJ}@} zNMAB7x$zSG5N}!R1>YC!9=reWoTZ$i<5fVs-_mOvQX;B7Uc7kX1-sP2BR4yNtRj~r z?i^7fLNowNw8x`E1mXqQPqUR9kr-KA(`I7F5R04C79;0r!mxwdD$d79XUNd8_xY$} z#3lNTD${9^FErwg^u4#u)2kuhtRLdOQNI#=2mzCxI84^8iV8KG1)VX`H3_4gQ6J~V zk%`7~5{PR1E*b$~%>0x2geYZ$v$0;$#x8D#;;Q}5O%KiFBRytnR58cG;Mv6I+w-DFyLc|@ZhJqpSeMbXQFZ@HRmoZ zPGIbi9GHSaO`dPk7cW|kq92hC;1QdPtNzZBwFmRb zUw2S_q58QV6XlE{Wnn#59{p*pqY_64+>pCUEpmZ$(J9R6+L2I)+-XI0(eM=1$9rL+ zf*3kFkMJv@lISH*^rtVq!T4&fM za@vG^T+TI%w?xsqlHw6kLp$0Be;@)F$(c7P z!|zWcTubI(etvu1PiFu}TT+rdE*aP&Te1P-8j%{q)b;F(+GfC&N{fMx3a+!mLY+faz?b1?6l;oe zG-YLZ(t~7Kh#k zqc_OqmhPSru@^Z#t(RYx$5aeS-IdpxvKWLoHMg`WC9**qt)ETmfdlxi86~PJd4A0cujT96c zdc&>^6QTgJ0mWMS%>gFt!+D-q>!Wo%zG8sp^NQ$;qGTWe-$_3@YRVy6Ob_;n@)wb~?@3 zg-m5Xh;69d%-FUiUmd{ANF=*RE+ukiE&IoiuBPpTc0X?TUgfA72(Jo?5v?OJB9&8_ zGmnj#tNDmosmocbUyVXtovwm`I80!~v6cQTHs^7}2^Ieq8zMpINJAK|pcgii+yBGf zTZP5BWDVPdK(GM8B|*}-LvV-S9^9MYjR%Lu0>K@EyF+ky2~Kc#Z`|GG>+G3%_n!IR znd`s4gYRh0o<8WVr@CsbTD9a}ufe;E=M~ROsvpwhbhG{HT=bL9E-|y*elhE!i)O9>&>5Ym( zK?7rt<{r|(q}K&FH#Gj(lzh5-Lqdp)5=xTgb+w1Ic(3rdhAEIV|4!Ff^KKy8r`$xB zEIW)=-V8l!Isz2v#=9Jvq#6=cb9GhAl2;v|=yWqGAB4^YVI&ydvh29!NwOi{Q)N!-PcIb&@-_g>GFAdow5 z#UoPKYHWxxpr2kzG%?4NHIjpQ`0)H!o8jssq`a4QmrVrlgLu*}_cL6XkBCjS^0TIz zMNs73B++{cg9#ywWXpxKa`!8}Cm-+1<+vK6l`Bk7o@s_YM|UTp53hRqca@NVw%;m_ zHiJMbqc(hkQnQP=3i%`(;QW^F>mpl_0}q#m9i3*+(lX0nTM++A z)E%B4@Jn;RD3mvhr6=T>Chj>-7b86{D+8OhQuJ53+ z%t^%a_od_o{lN*TBM_hs!oeHsU`|G!n)KvR*xjkY-^oz9%MNWH^Du)t$Ms}NHdd#+ z3(us!FCdwow1I&X6CqbcX*k5viN5Vl+>wMg0|_;P`7u$8E>1nh3_dgn${{co9}jei zW?T;B5N%o8h_@R9ul^o-kurMX8Ndwu~#Qoc^DUVs9Ef3M_GJXfDoo2 z(zXy7m3dtCmj#XN(gluAOeW}Sf&36lkP zI(@5Zie{?tD;mb&8BX;W7J>9s1a#4rP!0F!O}dU3i*Z-PCw~BfNTz zj-0~In$o8LVHBWwa9;*}73QDTu!<9GE?%?UhovY5h}$E~kC%jdZ>rP|B)aSBv=sYE5r3ArJ6Gh)}WEEE#JOyxo~>Lb{`xtWenH z4G7PkM)7xXdQQEp@YFE8Zx&oA46MBf+KT2GtJZW!JMT$rmtFEf*(Ns_tf);)(Q8b^Mcs0#*Oir2pPCJ!iCKbby&fR&69eQB8mE}I?7=u(}EMCo`nPKp?C$A zDPBow5vwX+N-I6l;$DQS&dL?Jsqkh%^YT(7DtcHd;8GNZRjk|`5c-%SlEP?yxY&Yl z3x5VT4Jy|BX%7+CKP&UU$8H{fzQ7B;KfXPSRK*$!MP~(T-(p3__ycCiou!;4nAu&W zr`rkFaNP!nuZm^5vm)#i%bXZT-I-02JLH(5!B^eYZ?p9lue6eBFi=FX_cq@6#py3` zDTAtC(ds|!dvc;`pIM8yH0vzhtzw5ZmNkg~Or&M`?5oZ;US}=r45`Yr(yWSlzl)n* zMOLzYvPVU*V_u{yKDAi3+{5a%NDFBrz#AoOOJ(oS7@+^UT4TETMRR2^SpIzC+1X3t zIt_BLQ<=lPA9+=!5krD!yrW-5r>KBsGLOOOzi=2y`_cFO#kp;e^GO5-RT__rSwIAMzUN;w5Ha8Veh7=|hmS29Hf z9JB&-&t(5{6+sh9R6#fVZ6&O=N<2-qIwZJdUcE&!I$+4rzsILsAe*>>x^y|C*|Xoo90>B z>(QOwC5)B%H>^unOXoil1WDZ_&y4{yhC&V4etPT=>M(VmA2YzN5dpG|K_$nQ!(^^h$8j0IwTBYV8V_Cn_EQ9ZPUHD>7 zA>Cn&d8>}uLGdMm$UtLAj~e+WO4v0UH+8>-(laH(4AkF1?>g>m9G*ROS23zIEu3k8s zp6d#S!(zHl9?#^s+-cxx4?}9w z*5K6#f4VVU3Jpfh6WXqx3e4{6AvW?ciKb*!iwH5i+UA9s6@$>#AB&nOy1Wn5Q?(^ybr9 zL_EFDLcP4}W0f^IM(1W+pY)OF5AuRZ_X0eYe~56oKVaJp>;BA5;oI2!W$^@Wbhx+; z0Y}~I&x;n3%{Ja;OI-eIsPDc5){ z72T(*8>IRgrT$<@duZ_z5GmOOY!==6L4*BL?BX3pTi*Q}!C2yE=c)a{8AnhZ;yUmQ zBj^XGdKN4{Jk>(;cDNl;$B2#_$EjQK?AgHz12H!|O&x84g3qr+uj*kR9Np5 zdh=d#4#y&+g*~~G^q0vAB?{o+Kri~ygF;oBoz?@gn6G;K(F}Qqd8SppVK5@_7ZeOt zO5Cv_|FUB2pgs6=C69x+@C#_oB;BnzG`&afeZ#^RXJ2h9O$5u>8nlE>`SWiVlJ-1mxn` z)59f=N1zJZ4XUt{U%9#B8tv#hEX=kb5zqhGmNx7W}YcXd^BnhFzsetJ22 zyzDfrEZz1ryK%S_N^{Pcv^$$JRMUHLiWG$Z;X%JTW+!Kr!%ytnk)(p#EZO`}W!B{P zm;zQ{N)Sh07La^DfB_ikO)t0jy4rPku6s#?g`a5Qq1s|P>*Xs{dGT#7c6P(vxx$W* zq}P-7(}WiLbHj7(Fl!C@I+I+)4R1M^(8NQh zA3I9k@G1(d3)W)|p51m`nZ^BUn2dZiSqN(O)iJ}*TDagyHg-TuKR&MTyT*Y_JvM8# z#XMscacu=!^HYcUe8$S~R{0eSiF=`2FQ`4lv>bHBopW=GHZ9oWhxGDowgqn;XnU;( zl{;tEQG{8~nF_knZ{MRv3Z1}psGi)N&%8ao_i7E6Q9nCq7npZ~p`2DjH3$^WdHl|$T#O6aahw&p1xHi`F z2Ja*<)t}s65qmKIDk|-%@!Te^*C9O>nqgZQ7iDGg@HIeX8o8|D$?Ln*_*LPrC;Y%b z(^0g#L+K!$WTGv-K)UbT-I$${vWq5mauvY zw7TP{gP=mJB|;@WMc)@6Y^Do67<&D*x$nt(sOJJ#qgQekf6Nngk^1uK)=CV3$tl-0 zDlD(hS&tTrF!%)EjX{_#<&r|@{N)VhdQp<&8q$7%m!FYK8A*NkDDd@KTcW~M7u{Q? zJo}<7NRmODO*kXsKAsUM^zhP{_>x1KU$St(IC@V> zoANZK*p_`s3nE=cJ>$&4R;c~=BPx{yLbBkrN}{BoFat`c zc)lVMKovPMHz%>+&DAeLMtQTcp8V>Sy;SQ1KeH16Q@ef@S3;*!d>XdwvWY%_lKp3M zH%d`lZ_$Sp&L`m6Nw;l*Xw0um63}9}Nh?|m3m7KqMF*gqhR#j&?W z|AS@+Ewiadicvd>zGICrtS7iN%BQ$^)9l}?a+x?jy2rV_&hF6C_TGO568@4->ZJen z=1tPmuUlP~OlzF#LW<|979`Y-`^P~@PgPkYBCxT_kL#h^`DO>gXBoM}qW5uOT#uvu zq>I`D#Q6{ly%S3S^U!EqVv*jyjS`we6pdcZ#ejB`rs93TRNeIQy1UbDCLjCg*L~#m zrYM+{t$2x)Z)#0ZrA#YLVSIptgrcT!xm`y_8ZTjRe!i8#g$5TIu*b?7EHTvKJ*8};% zBN(BU1t<-j3!Qa1pK5z}Fg8i9&_vcWA(s&vPZ<-H5H=1I!nP0C6EVN;x!246NrMi0 z?7y)eI+gj@lWfBw#*OZ&<{LNN5i(R73$oeioZtPbeQS#}YXvXxgYWYdjeOvKIGd$+ zmSBuyxypMn4;ar8P!8JR>QJYRiu9W&SM77ftXV~mD)U0an1cK%%u9zB?J+1ct3eQD zVcNP2VMXhBQhB~6A`I|yTgWX9Po9C2+H(%#9=S6pb%_6WmDvJ};HK_#@}>`kV{UoHcSmG7<>ReMdvJY1*8!l%H?>waapN_jxj7i0-O_3=in z${s^?V7c2O$R-e@b|}>c2O$gzv~?_?y6`K4aJrQ`D!`7RDXYnF=gs*%%4nOm%NdhP zNo>v7GIze?hF2=n6sqG&x`$)wot)NJVcXoB#hg-Os;F=p%w5n*oB4MC(=P@Bjr&+i zS#^;J32{LCr1eV_VDO8hzi#%JXdx&bXG!AIzVSY{sOo{%=quZKXEyd+{l}!fbbA?^ zM{1hf%o}boUk=uJ^Zpi2vfLGPijs_ZIqI1cr9-WI^)I)UjA*8M9sQ238b)3SMZd$Z z9oGOlwS_Y`D9_W=Qp5~eK|pObj*Ao{M5+VNWTl}c-I?pQEcAqf0gO4^uA|H>EkXk> zT;m1I=Bg49JtKq&qkuqLUu*1Hlj>Y=IhjL&txjJ#}$tDiX@ z2cLv8i+s=Tx>RwFS4-MK2VbzN;_rQx%~|V9+m#R>7lhB`czsJ&s&0MSHAmIr`#D&(eA* zGE$FVKEsMcFcF69R!HAOpW-RKnnFSl`UMkl4KpJAMC=F<|a)-@u$X_48;9I5fxr0Cd}pX`LIA9h@7Vj{VTr!P8%#>!2lc1a54%Y<08_4m7OFA;w6S~BrC{`rTTV)Q zDPJ+Z?Gc)Q=&vu|owuT`W(x}Up9#CEcK^5$z>50NCt-BaKwm7s5`I1vGFqtp;VyCP z>(fDzJz;Dl{k3M+e6HJbmJYhzMxibWGSd)`Ow1Ow;32yV@%|BNVk3-)_S>1J zJ7R(dc8F7i+n=_%5bP6T%*PX+X9lIb)Ly{E6=#(E-5APHd|4g)H32Dc`fA||InwkV$Su* zhCHIbTsHDK?}@DIwPh-}qBhkrh44kwKX$n-Nipvi5BH;B(eok4K z6oiq$Xc^659e!siJ`^c{R8FA_*3em3q>Yc-DMT!eKdA_}6o7Y@cYC$!-}0!HKAqVe*AuQSbo%vH(FeAI(xUC%g4vzNXhXq(krfAF`EI@efPLic4m=C zh}JH%e^=y1@>78rEV?k|$70xfqn*$q!cTwxWI7OiIxFKAd*yMu=QfgM!Frpa7#VVC zp~M3A&ef8t@n#~upGyNje)XA|!b(`)HPtZHI(j7@;)U=Z>`7>b0x zoqzJ<%}kyC|4BrMCJuW;w^A6R=;F;abnP}_K-!cu<4+}+b!%+Bdfgb?-Ozr zu$WWKsANa%sw}QBd|s279%H%;=7vz`qiIt8Iz)rtERBZ4we%b(#VfeT(}~}gkYpc$ zyJz+zTpB?dO*i9es@X*Om3DSW<{4>I{1vHoko>ozmr8oXVJ5^tqv3RVANAH4Frt4U z{tnrC?;{M!4xbBQ&m~8q))h|s01l_nxE$dz$+X!@8M1PTPZ5y`(>sft^lHLDy>lvh z&TZLL1$N9nzf9$Ag4e}P08`^uE>xM4n!y+gX^+|C1$D8LO0KxXlAM5K4g!n8hTvmv zOd2(~=*^-~7S*1F5|UoY{vI0fesP*~#!ZPqaLa}kvAOPmiRLsND~sU7=h81lv1;kwrDlCM5NB7gU#}TmlbkQZz`KxmME2L$an6h<&LiSx`i>n7XOj1(Wc^S zy=RN7hB8wx44}|h`z1b$a}jrn7Cwt38aTlxid|e)$a>D6-UOWmaPu&)UPi|9Vdoe3 z&Ww~U6z$MBkr2q@ahvumij4;Wm3F9yGhPFJ^=NU+t2-^ zv3W+>-ubfg(E>4v(BykNa^wxAq&Jh)?)58 z$`N_)!_0fBSYmfgb~XH{a}nj&jdlp6I&cWw_nJVyrW=qs7@CccPsdLT#0?K@-{Flh zb?yc*hv9fj)d5l6;;O~z?gP}6sJHRt_zlbI99w*5jDw6UK8sgUR2PLP^tyQ}tHLqe zo^GwJ^mar)y@ugDVuH$d?;RKnm2duw**G|9Dbhu#eAh#ZQ`V2BfKfo);4cb>pfC}) zlHV6?fEG8PS#*|=yFd`==@gur_NmAsm(<;U0^w$JgFq|lec&&d!@X$60(+{-f}@VB z=b?`0P2jkKMU?LRm7c=u&cTXo%ePvI#x~vasQ9dOa(ENPck`}8Ws#yTX0^?}1SOwp zDbGHiRNw3WP(&PzbF6)pv~LNI-7IhY@!@fz*d&2r-x*a!;Ik%`ouHQ#Tt3cp=HvV) zfp+(NMExElb_N3w9FrtlXXSDWQC9L;{MF}4w+FN}#>iEQ5<%_7w-F52kHB$W+iU6L}>%KG8F$jk}4WQSqMlkan#r_>OmE{*hvc?Q~XbI1~ zV`#$k_NAg+n&{qQ7kj*)VZdEtkiq3tjVb{ws#vttC_3T0{Oaqv{gX^#-q4*Df-v(s zA$Ka)vAlrM7wYuld}+S5aynekm)4f^_^Y2O#`G1W9TJ|3xOR0dBM72>Z`8)> z9q!)LmUz8PMl*+Yx9kos|1kDO?ZS}4D&rkz$bR^H{`t&dbc3-xNB#||>M;=AyyUR+ z5jV`YC)O92y0=^pa{(HSp7}%k0#aG}cW%flU@BrS?7XW)@D3*z5|Nh%dVKg%mt8kw z^<~o&^}chWBau%Scf79F0KdoSo2RQD@q=o7vrAQZ^U2th@g5$BpZp+--8UodEm#)0 zB||geaP+*|syd@OLFQuwhOe9@Jc5vUZbxK`m+B{4k5Xn*LF*GhMR842q+S0&PVjc^@>55I8}wzBA`IBh)kgx^sEHc~iuSK1T!&Qv7JN z;4XCw5;xn!skx49=f@y<8YrjjX;Eq(`B%xoQ1 zJM%IxM_W(jxSV$|pQ>5V&x?=w1hT5-Z3KMPjx~r>+xxv8G1S`#Jtr$L%+(mW(({RW zPR{dA8x8Ka9gPo53;Qq@>{n=hzADmlZ2`8UY`3+4HEXy^(tTW*>KgP^JR>Tml_zTc z8tc`%K^*e%fyb-xc1h!OP5e;lq){=atc(O1IMW+ZB9);62Y_Z$zdB%Q$#h~|oWM6X zEa%|wkcn8w)b9t!ey>}`{KlU=J(m@us)zL>SgebX?~1^3wo!LJLC|~5k3nQ{JeGDS zu2z?>Ct_hKIAW$5nhID8{N3{xTu+`gtE!`=O3xXC-aFk|jJdfgYur~) z!-Yj@wnrntD%6*}i(zV%R?vu>H z;K84!GQHS2tYSjKOgQk%?DH>`o1sK}PYnbG8C~=*xnFTL%X0C5gJ!K63pXY+cE1 zI|B<%7>3(6PJAcMGc{M_(iS~@7ofFp_g@Q5#M#^0iZp8c$1}nZipmWRQzNPAknb!j z!GV=Y^+qMt@c}`BkrC)7ay{moYi+9Cr-TkXA|3fdTM(yDq* z#50poH7LxwtuU{a4UFXTsO^=UY9%=6+b_cIVV(2EzPC^-y6!R2iAC!1y5=>+#T;)b z+6`?a=sECrqpUD2<{G!gi^@v7f*TkoxsS2xtmIB;E5enQ>^W_YO=J`G#yzZS%_F}~ zPDaT)y|{A@k$)mTU6txkybM{zQjjsdtpyFi}2_yUaj0tiCc9pq1*5 z(UOKUHo)M^qsFk-hbQ1U%Fo_UV1~O2XH0t6aUEb2$ zmr}Y>ICpN-qu2I0i;oxLXV+iRWM)pc_!bgH`$;s<8t}oG+Z+}}f-N4zgfY|Q#85#r zh%0%bL^d%gS(*!2M^zVX?%anaD>da{oVsj*P|{kwWA9HN1)aS8&=)HTM5t1K<@bxsBH!1C|m zR%ashao3@74*Eoc(tu$}a~}S%&mn1*PctxOmu)H2up~sYmC_MF9<#?)`EkoU`V_Me zI`Do~rNF2UCwXwMeJEp>b1Q-f0=f_@YnJF?$&Lo@bn_jh0GCM~P*tA8A~i5GfqE9>BrJ&bfBeuKcN-Q!HtLYFvGS_z|4@5DWKui|1Mrcu4mx+6N zMP+MjRR74B7_;L$RQGo}ivcxu>1`11HZ|3TqdQc|9@3!5v-y|{Zw9yi7QQas?XNlF zsk!;$VeO$%2W?m{AFq{e3r2 zD-4&}rOlXM*{CDXNzC+Q(eyNGbsn34&HWW-N#;heWI)Bgl+_E??r+qiOONs7nZ3ok zsLgNV3eVjq{~&kT`+;Y3m?z;RWGnFObZ)NDo(u?0IYjKASwVm=+c%X&A%DZ>Es5D& zEo_lBGti`mN0W!&R)1;gQrMYY0GzsClN$X>tI_>8pOEIq@3!P#MHwT|CSXlo8V8Ju za7=PAbd0V3>N8e9&&!(qzez+aTvn&{j0yfs*bq**m}RmBKlEzG4)2FV|Okx7b2$L$quWN7zqp= zPAqd2;+(np>}}bYF$t8%PUZC@3s8VW(=%!1Q+}aA z06AD#{@A*0)TLqk2poogY@8{5xO)#C89mAcG{jf$mw}f+qQA=AzpVs+*;*AyxfO@g z({#gLC&^06axK!Rz-q=ROX{{ON$q1<#w+U$kJrcjD<>-^@vFP(FPcd7t~>O+B0ymj zU+2j+(RdZst*0Zw)F z$`z}ga^>>ydOkbxPQ(UTL#GzqgLC zpc*MJ+Evt5<8SePsF;7cAH{I++`+)cHhb=q*^Yi1wWk|gUei>u4R2crKH~Q>lwf;Q zn|_q$n0YMZk{`!{HX;j_`B}HBYWtY`zX0o^WTEAME`yBlWhV(#V1oB>0L7Ew$RDTN zp?jy8YsN^r^F^({!YQqT3NAZ$UfsAaS%AiwweaeSC;j6<#$uM5Wm@(YE8t62wNDy> zZeF)=Pn_BVhxwOOOi6(zNY-GoK0;u+g&U6DxlWtKQhxN_C_xGE{bSzmh&7HMvd

ZQQ$QPVs~$+ZBpFMGDgZBfaZagd&!#%2;C5Qnp4 z|E3nF5_h^6GznKfda~_uIoJAP=2&U&OZsn9=c{{hDl@?`$ik_ChwO1v-*dVgK=;?1 z!{48&y`b;ZpKePHhJq51g9Xa5X8yqfs1}A5{*6x{FF;l*f#xduQ)wB+;u_~#-L*nn zKf+P-pCRvDL0ld-H{n6#7WTWUF){vN?`c_@b37y`&FOGL7DvkE`G?Fg*<gUE#)(F^r#1^ zh)JBS*E1V=r z^5;NjM9ut@@9MU<^8xd$iffKB-3I;+I5y7$pP)-5{_(e|*i~qkKR>F4JM@VvuvF-#4xDMKMT`rL5F)6~lfMvzHnh5Zp)4HIgi0LD9LRwCKH@76ssO!pQ#%o-w|4B`qjRYkiir_UPm3iMQEa|iSibya`hWfX%i z&7lJDme1YGCXc1K01xpI@Fzv!pEOUP#&11nXb%13d-=u&5Pvrh{YV+z`5$&i0o(8C z_NqW%FTyn|9s>%dQTLT45BN{8O*1HI06F4E-e1HBF;NojKOri|p^R!AVr6KLT)wR! zpg1CQ?Jzz>z57qJ&D`H_&zA?y82TR-4=@67zy7Bg|I^I8rGGakbdhEMp92fp?5j`I z4*#!FmVbGU_5ZDV5}<^xH5P@74nj{B>hGP$0wsrb7^dwQ`~IHV zd#qpj{~`x}{is6t$HEl;cVYf+N&I(V{$EH;+-gnf+A*1t=zkJ47$i{+Z|{{yEt1u6 zU-l^eQDzb%_%h~np+*wmZ{0BCKBoQeW$wjqLxy@^_ziVe6(R$#l$B3`hd()kNs z`25Aw{nr}XuW*2rKlI<{zZen7Z9&CJ5ANLd9KRIo+6j;zA3pE{I2?Uk=__v^^mNY-R*jN>B}i|;;6fla~{WRZT1@Zq}u+fJoKq$*)MoefiT69q)( z{LD;{<>kJAuis3!iu^LM4kxPAZhCf$$#Zy6U&0Jttnf7r*9j#Le~h_O3lTwjsc7%~ zCR_t-0s~JiQ3zghm?Z}5moaF@h-sFDg&?|L$cVv22fRkrwNL!^Y0yjNqti5MB-P>4 z%Y^$gwUmqM)nI55DmZE_m2CvodvgbGX>oE^9^a)gEQU>2>CXxd*HZ+1#?sPo8!5R z!QNZDFg~goNrBO7s&P|tKkq=Tt@h;`fB2>-p7%ue7L&*>L)$@-OIK-Qk_77#uNn96 z!*#alwitVB?T<2>2%UQl2s%e)TSyl3GlNPdwyk?8;+xH?B|Ch6cpO-)Z}`JY&bjNr zZE48L5$!kUW4~Se^B(Da93Vuz?6wAH(%*Yca(7ourgxpxSeT)zjCa(MFVN@|c;~s; zoCah{>-kEJ_ zrG`nW`v;6p|N6%(e+tQ^`jN5f^x?=%fCxf-93ePggJRm}b`Yb^+wUOjgH){uqx7x2(zr zw}}ig%)4?B!qdj^kFyFv;VNConfTA$^f|H`M-0~Y_%LHwKW;NM8>vz;%Ay=(_80QRTny|{+ z&b>E!SE6L^dRNBPgP3aJLlawIg5-vJ_P$xD(<_-Mh%Tg*VER=oyF@N$H1Rci&fcEf z&6I*5c>Xt9UjQj$mf8nO$L%Ye10n%bvt?-Hp7eU%L8Kj}Zld2uef` z;CB~(wFQA{m){I&Ewf@)1>C(MgAMI&vw1-{mk)bW7D(+WhPfdBHFe!1de1fp!+RzR zUCl+@*{?7ihEZil`8~eZGPHIEte7V-SkP0kbMq39@s`j*AA``5c$G}6>fo5SP%>@Q zl$;XPJYWzny>pzNjHl)g*r4x}0zU=f+2b6JH;?W!xL4WJ2e!6^W5J3VbrSI^QD2_^DS>i!ZV$un*`aU<>ArS47=NYvSkgT_UdNwXF`4r+9KmZ~5| z^8G4gjMp4orrSw!W%T*-scnZXpwx?;{|`^jnHcas-tbH1fiz_rBOqM6ViU8S94$?n5AlFKb$}*jtlG(4J2{sYLp8grOq02gp*#C>3~eTP^AoMs4JS zvJj5Z^at*>Pc^57u_sI4?+dF+$S%M+G@m;;SWWO{=)rF*e7+CRu6R6e0WW}|yubo8 zvg$SnqeC127ww^w)3REF0f}^oDDfBjZ(E>AdfcVsrnfBYXzv>)gm|NWwLz;BzomNx zGH5%m?%RJLie4}EpWRIuh37wNCE{Gc9`$f{{*18vd?MoR%B()$PbGEQ<`HnUbM?Jx ztWopwYnXk6r{8s*XXwaP%H+Z{=uOxbs&@9ol5;kMfs=RMJXCPOqk}PI_Y0L4=S6M0 z8wkOPh`r;no1n{i-^afnVeuOQfHqevYy6vw*Uam#9YgxAw80VEywRPG%5*nL_r=*808x z@bG^Qlm~m?_A-2dfr1}mcMvJ278?!$S}n%G=7$?EE;Jx7s?CKwB8!U`V(KhnZ!E|O zl>LyLz2t?B^KTkwFKQBVq+~8*Ddi69x*q?*P^1$KcgU2t!a($zg|)H?shPQ|ki*`F z3^RBlMe554e_B83K?aXNV5?#O>_+TWiqGO zVc}Ka;}O@^wwP2M^)VCGbF3ek1pB?v|*Vcgo$ z*=Z{u@ecC(!X%ct2sANw$CInfR);=R!ZNZ&G)#YkbP(1b>~1o)x9Zg^QcpOVV7d)Me9 zJ#Pxhx!J~iE}SV1h!Z>tH8H@^ zJT>|d z>l0jABl{O?pv>gO%7u%BNxBv$>G(@D>Dk!@~2<^lA+NON(I4`ER~Q3-NEi<4?J7x1HHIA z_UyteRZITrU=}4Duv);RpvO}%wmO!ZHHi;aGqNrCn)`s;Q_M}&Kk2Ny=l>_oP!lKU z0LchM5dPLXj4th? z#kDEXb%PI<%Vkjd+O6&p;Cgw<<&r7FU`Zsqe2o5r;_uwSoMAI(neF8zjp|eQi`H&! zjgQOkn|U^o_}YdTM!6PXm(sOD#j=mCaOI)2md5S0_6tWh{^Z%-!|&UljivQ8L`H^W z#by+3=C`AoU(eA1TzG0fYNi(@yLm8j*Mau&cC1WaQg=1#FEK9}Osh{HCz*;vBnvye zm;4PQ%*r6jOr#B;xha{h4py09E;C0<=FU`z)>dqC=A`AGa#hVhlf<))|QQgCF0wUPWrkvAP%?^2u_-*O){TGfgRg z`L4eiX(kUyb33U4nw3Hk%WiUlBzH-QocHNE=(e*r@$da}z*Du2sTD?R%x(KY1hKHs z{8i@+gL<_~BZQ)3_&lBfFpb57hUKHyWkp)tN~0a%Snn;1jBnL<0oqHj5#R~Z#K|JF zy@&lsy1UcUuO-4gKKuu@QCjFrD{9XQrECvx(x+9^eTt7`pGYk7T-BWQl%vNLm2^DI zxn63X<&N8j#P7fFJ_ny{KDUY9=olfUr*T|Bl*(1f0>jJRA>XT#Zu@M7SlxIHWI2{JC zV@XSSkijLj!@MD{goOJ#mK`p@rTG2MuIaLT18zLQOjg@uK$?|{>_3b@A=piY3@DX> z%bxq3nXs9$je4rioU1v9J;~*voMAg88^3k^!!oPI4<@9rLc`8UHIRW|XVg=EAE812 zwpLARZQDf<{%eKu*7EK37My&RRTKuy!Cti$KV4-*jQmFzg}H}Z8G2}C>My=v#WKBG zF@f$J_wdr-2v2*NK4o5>JH&P9ed0|dtq=~>qw^Va=i}!4PXzExbYW|>Qid)+Bt2EF ztp5r{M1n#Qp;#o^5kB9-&ZC!3B;8XIK$y}#CiDKv2I~$vi!==W4ESKZIUfrY*g%QA z$5A9;g9c|ULY@su&uKfRRE&W*rr9M*#isK!I%MV+FJw{eSiFZSWNb{y^8(8hTIpj$ zk4zn1&j4~ugLWDkliP_Q&oT_sW*g^t!jUT6q1eTwl4n<#s5B|A)P|3X5yo_I(oq!QI^@c!CEF9yCC3 zC;|j6ED)e@cXxMp_rfhW1b6qs-EU>?bJm>ee)sJ4a_`Ii_FFxT8ly|=t+oDdzb3;O zF544BxH*2%f_|or>CO46A8!^%ABAdIUM9uI_pJiDtliIVM=+Q%pxAn?13uUy$Rl@-l5f-=e~ zasx@Q#Dk^nN1qa_L!cZ&iQv+z@76+(js^WSKt4wSakbsgyAx4d9Piyq&Q4@jmF&dV z3Qg1*RE4QBbAu}rEgP;|H-m{O4i}PyiN=c=HZP_{B~~ z&lr?_IUb(+bCA&gR*J0kVKbzy!EG6A$6B-Z;S{DC;MjmRKh>`0N+`oE6zk7|XYv{H zE>8*u2I&n?FVGe^nq_YP^>;3GZ*V+e8=++D!VEUOA(4SElr;OafNTz(SC_bJ$O8KRA!^%9|&-QYjn;) z;BD#?BlmbKMhn_DZll4JVj@|t{olaDNKT(gJ=PHQX{4V|+p^*Hs4fD2`vVO* zYpW^j^D2!GFOW23*RRucFCH`~Y+fRWqE4|=es4_?HE=@bVj{;{}#WLCG_3Rw*8rFOfHg1vOt(oSAV0VbTSjQ z&Jv7@aKb=*)!n=m>?(`%G}9mh2*`+0?L=ibBspA=nB2}q`$$s=C!(S%*#3Qt3MO@D z6}m?)*-$kaL!8&;jj)s)@zWH3$tK24@In>HTRu6Y|3PA7Je~_Qp{BlFPcMHz!ucRS zwql970kOksxfEn>oTWAWeAC;%^64aCr^T{n>V_6n*GxR7fP2kHzUjxMy<~qyTC!)n zi#6|pV*IoLs09K-1+7bas#y*iM{I)izPEap=J?&3>_|Ey$do++y8AOL5Quksx|yEm zqZB%)ge1#!GH=q@I+=`r|GW$a2bR{>4XM}0CMHvx%whk~L+jC}2n zyK>Fm4dTpBv+v?H;0QQ|GgT34x|Jme#R?||Q{%=>InuHwAY47z!$~`Z(LAuu86%;tcvy5j*>^~PDtVb4a#%ls0WjR}xQ z5Z7s}MGh**FvFZcH=IzZ4rHa=T{A^s$u?`!)-R-Bu@?(v z6VfsSKF3EX*)+oN+8@-`%QpMk#WwtEw9c6c6=N9zdB9TzH87jlt`ErasjV;eKL+4D8FZ5Ba9X;Zt%O5uhZ;X8o zR`u|q&uiU~&Y;JW%ycV~J&C`U{}i=bD03`lAn~I#5BvBTzx>Ue)cJK&@1%cVJc-SN zBH=}unt9;F8OP~2@;Bj!T5-aTGzqJ0v{Tb-tLv1h0JsNQ>KB~;2i{GibOIZHPuc+_(Lsp^^HK3!eWaD27aK|(xv z8P9FI;anB*(_Cg18Lkv=L8?AFy`({;KY+!*dQZ3|LEhKPWb=WRRIs&(IVD4N<67r-X|GM+Y@@wGm*_$(O zDh5<(tpF6vme4X@6J-BzX*B`A?9*Zd^o4|X(xZ7s8_RWCXU7M0Bpe0FS#OZh+k7%Y z(<-JKoI$u`OMz{MlcC4oDk_{vjp(ntnI%?#goxqygqVA7NYLKp;ensH4XinI$88={BA%z^fSEPZ5W;23qw5acch?ncLro|yk+=NlKeXMgR zkq#WS4!Vn@y8%?4M_jWi0-H0M0-NC`pP$QjQQwc_q^3@{A^@VQif$+Ej5Q=!bo5hw znFJsQiY`g@#_$^_Q%9@RWXSt3c3Qrl5i`b}QPkLMoeKDdfN~yGa;TjOkVTg3cj3uu z566<%ZQFY!n=FD)wH|#=GxQ(cziXaq%-$=~bYG`nMx9M2Hd8l2nAJAB$U6nX-E z%uOR(_sz6-yj~wTU)*8y!>YrkmoLIrv!MEOLUk#09B{D@)gp!0D@0!?kCAkC`!3?6 zyA;W=KFYLL96iX7%>|~BhbV8%`qc_&4}6{9tDElas<;n9L8Wf$LBxTlFGN1oH!t|J z8Fy)`Z;K^Pv3}4fbgjDSUrw2SxVA{yyY~E`Kt3wdT^)fmQL98b?tp@4njP!=CN##z z^SBw~)2=7$9>G=ZYT3fL7~38@q4iT(ucB&#*-v9m}}~>-L~D z9c=?0pjYV8BD=`T^DZm8Hb{sXRsk5;i9bW6zBg z9Th;J3}R1Bi$1Gh?bJv!7XJ>R_&Ti*S=NqwsgTNdK2p9<<2Q4(0=G(;B0J?Z{j(Rk$U;DZ1VOm2+uv6RB4XGXsG zfT0IaU+FWg&tZirL0&y@Vq=MI_yth&rW#Qx&V>!%kx$DSpYr=r2;(0cXJErCE{*W( zqQ6o~Kr9_=3}ct5i9CYy3?_vE@NLupFYD97J8Ke2bs>OiRyUdLn6AWrPChcAt&5_V zOga8RfiNXtZv7;gIJ})y;`AKhb>UQrt?=)eGoJTW>k1}0ZG9$Vlsokzwi+zqdh5=` z^>90F0_Y9%LMC<9v{Kyq8yiPZx}*>edQs3jX9dB4zz(2WV25^e!MAQ6E&D~6n!(Kk zjDks8p7=>*8DKkQU!P*{sgSeha*dRO!P7ZU9Q%iv_i9geQim$5GOR(f*>(-(AO`cU z4T&>Jc?YMQV8=y*Gb`fa5?$I}txFB38)U z_ycH6>Qa6k|8xMP;cLg^v)P$V1Mf&YJ3|3Pk0LY4-)j)$Y$39-ws>fJV|)U9cQ2{l z3kaCt=P$UOvM}o{{nUqHDe2~655PTZM|jD9wq6dTqz2(Z5tB4LfN{&So6FU+Ic_#2R7Heubk&KkJ;? zqeMK5zn%@TRnrwvhkZNbq-}Kc<*b>Zg3ZM8T?Q3jtinh1(8?e9RfboLrOPRY0wqrq z;E(xqLKgnRc6w*kIdk8x5~~v8Brf)=P0LyKN<@9sN@WZ_WrAfYX`Li{dxKwDa-1OA zvmT+ilEW*viww-~$YPu~qQlg#78uF6ML4N2z1tSB_%dYn(2F0H5JR1zxBDZj5J3V? z-DX_|b99HS2cS;S?%8irTecGLd>-@pcBmKY`8T1JWD|PGb*sN`oQrOmeb-!~LUuS| zSPMYnfv?uSMfPX3Q#{ylIn4AuE@69_=57{(gBjHvsV!M;$8fDvV7!S^X-eSGq=b$` zkikR_*87Xsx)>$*yWw^8SHHkK+dx&5feLPyN`wdPDpwnho6}#(m0lI&Qn!Jb=GqJp9pRrA!iG#pYDU02*;07|xld=(3A9YkS*iEMF!`N$A zjPn;YC`3`XNlhLnqIglBt*isC0?6>IDG=L4CelNxix4kvqZquScZ+w}#%$-_U)H>4 zk9so3@16H>iN6@cl{=)YcjK~v~Jon45}YU^nE1~&(LH|SOov<-s!uS@brTfZ9jTxG?n0MIHpQQB)fQc34G~9|`HKMK9*Cf(YDl$lLWL6R2&Vy{$QtOGz;|yfD z%l*B4mkVM5zfBuA9uTCQvpaLUlC|W zn>4|a2a%rWFkAaMO29e~?N=EPw+dU6)tPo6Ke-Fc!dQtN&CRJVST4a6%Jwgcz9!n9 zl5k|QZui#qtWUb}Ewij~X?O9OQr}&)UA?QBSmkQRYho=Hji$nI`ifeZfd$cPhY5Kp z4q&-7730G^y$$_n{&GV@VX1?@8GLk=?aL}RBZkkosWiX9cq>4Hl@a{IE%{(}`uRao zbb&3~ZRW!1Q+nOI3Gn+vAV776M>R1d(p`<<>1+3Hcmr9;^D`Y*JmJfRqr`$W6eyUQ zvL|FxwmsDS8{8fy;5}m#HKPU+|2uoh<0H}w$EK19v&07urqx+Tq1Ms==CEj|mvl4} zC2**Vu|4QmT7P|29wQBEB{(UeZ-uBsFHI%c(t4}m?xby{$SWN7zKX8sQ|YKu4n3pHt}atE?dT1tY@P3&-DqWxW|C7vs$Os^y!{!%o;)^M zX-it7QEmp4-6$Ga{o;g>Mc*ArX}XXMK0|IC;X)apvLu z?sW(NzLP-Eht*|=3Yq~WTzbVM_)1)Ot=WJ8wT>${1Y<4Jc4ow*gz9ci?hjBG?@oKN z*_LRT`~`If<-~VWb08Xi2xWhFMD~}(e6tDaKD|K_K00-oVmI02Xl^1+?UXI z56sKWXj+-tUu8*zvWr9GFu*Xaj^HGUT^P(7n0GncdX;R%)^5>E>sN!ARns!KD=%s! zybJ4{dVQHj8oi7qAr0|Ugk&u*(m@6@-}eFLtn~03f|CsKc35}Kk33y2 z@f?#+n9995Rtd{F3P|^dtE*gj=ys!}c11X6EyU7psOev>^BJ@14Z z%I1b^n~D$4wrv1Be)7Z*Z=S+*3}jw#+XGnSAB4XM>rLH#8B5@B*UR81Bb7}tJJ-y2 zNqfC%T+tCZ8eEX?S{>$Yj`x;(DZ($LCanp!ZpnB>+=L@ne|v?Tto{u4??R}rNg@(( zZ105G@Uce^NL#wG7GNt7kGjDAoSeffjXz1(w!yBa~eUgj8YV_uxw$Tbk2g)o4e7@v?w08yDlIJuwVIS1w2FK5*7H(mhiqU zvWP4|;!Uqgj)lA{i~8xHJ6hgL9og+7;>iw=rSC^a$Nnqfp$(g%e1ug_cvFXvKpprm z`A%-Qd~c4OX2OnrPj~yWFDWqxA1$USL1s;{STq5%3hKg9RaBJ%Cb$}@3BeUvr^!Pr zr*x*jrAHSIsMKIncHg>aSfG>gt~D`#?+Gq6uCN}H*fuXZ?KqjZ*$u`|-uP^cO32VP zBs4&q2dpG>m{XjC!?bqFto_&<-Eyc^1ZV-|&=s;BEgtsCpcTQ@lPGJ22@Y+Z-V&*h zXi;=&lwvuLF{Xxzt6hH^4oV8{c6O{p`o1s#{G|~4sOzu5!;br)hBDh)tz$3Hh?dG^ zB&jtdt=UqLxQ=LyPld_#wxW%b&}~uXnF(LJWEmG94B=MWYn!wXrn{RZli`qr_^Es| zT0({G41cEp^5?8Y095YGSD~mMrLNfWWi#X5Zz zCLeyZT{-qBD-$=9eNZ?!YOXUO`?~{b#|Y(@Po0NrpE-5RN{RjSSECm(@&r2y{5dUV zUa2d$tLtApAp;CdvJouH;k&Wmxjy~&S&ggoHs_g6>Xx;foqNGkAH(11$%3|qR`oUN zQ#RbZ|D`we8a32_=MKtRm46EuIA#E;Vv&?Yi5MZkLUCnbs>o)I2dBi1=SQkT;~v*f z1e^{_-m4yW^%9WZNVGpaaALCE@nQ`Uj6IyZ6DFC4k_2>_jyy^@^n5!5?%zCw=UkrY z@e4LS&WwxF0v=|pqYj{boYrCLNHi*rhs9To&1dV!y~^?p$LjmXdv++dO3IhD_cRen z_lDzWz133#E%h2(%)KrlmVs5S>P+H0w_c!6XWSn|vCOASyO1#&3{rgIYK3X_0=qNF zK2^E$GsQ19Sn`~~U?25~&6#m&_Hkj_4lG%4OUk5IrA3O3PS&zk`v^dC1<+ zj@+nXVXL!o>-H6aaf_~`DIS4N zh{5mLSx4$N6Tv+AvSUT@pZq5u)!WwMnp{Yo&8Uj1;6NuDqH07pX|?(d6DkBKrJ?!? z?BL`LU;Iwlx0S#%l+uscFGvKu`ebw~MjqtSA6rIbBV)?~94<}~_-aV8Rk-&<&cjVh zHTVXGRJ@}@_7ci<#Gmn^XnDpB22g8P;T$f9e+m%fvE`?1vP-ac;6w^Q&Oh=M7bHjF z57%Te&UB<3e#S*+#6(lTeVzhK;xUaPPtxG4@UwY5J+$Qi{8nAMuXtzO561f`o*LNEDEqt1>dTq^1_y~Jaj+=I28JKgcw zi`|S^uzyKHt$kjz>A1#|JbIz`tkebnXE#ls58SF>E@;`^?r@1%CuiYHdb?%GsvWGZ z${Y^pFRJ#`P2P{)*{NebqqtaaK%tT;2rWpuN~gsz0&28g!B_~!owCG-ClOX%(QklC z&db}!8*e;!=C8yXctcOlF|2+RmMjS|PK{5x!&L;&F{cC8`SAt~RCQy+W#GbhRCDXc zs-dy+=R+5@5d7av`S{Vy1)I}18Dgv4=Mp)M82miBl=?yI$~3q*isfv%R;jI44p{f@ zir;fKE`9+%!qt43u9uQQXlpvgaOI!$r=pz1P^iti7QgrZ+H+~jq`OOT=tnPbE?wk} z&+6(w_xj8#g5j6S4l>~7je$#Z+fmTdyzC|mx15Vx00OT;hG1@HY7j}f@9=J4X4+DH zFTQPL(ygYsBG0*A_VqY`Yd{*bZCIu>yCZ*^?3q~mM)EaEVigfvcvyU_ z<5en|?SfzNlA{ChJ5W(bJCW`{cO0uzY@BtrC_k~80@p2e zht`UPdswD7-ZdxQQfWo+<++@2WB^Fv9jClx$P;;g!21Tz7bk+Gg+40)Q%E6kg+I9? zCA_M&6+|948xd_dNO=SFXPGh?C^|}f?-BDL2n}d(QpT*by^@IAf2K`z9|_JaHe$-y zn06!OTB@H1yrV=t;yZ3ePxqy{ORcr|&{w+;)S-M{ptQ87TJUX>iRE#Z6$79R(%_hb znQ)3h9rCFu!CsP$;yI1r2t36xn~>S-o2Yjm&2Sknnl=yizSeJ1E)^pmW4Zq<_a$!~ z!MKV2QOt)lGWKfGMHZ-BWQa+qyL{&Ncb(3&;2%O{fyqn*@7e5R;xjy#qZ%nBUO;<7 zCcO;XnoO)AV(?PG7U=mnzgpuYs2gt+Sg^c~!6P_{dBX&Rhu#Hc-_SctcjaL(Qm#8d zfpK!M`R}OM`LruSx|u%aAA-x^TZR zcqepwVYeT{irsSzEu{)V>)iwFR9&*xR8_r86=85GFd*Iu++XD{o$uD1(G}U=h*bu> z=j_XBRZCgpq9IyucZZqF@cAvJr`%Hb+ua@!ke(eiRH{ZrjF9m%ukk;btrEQq=pj`WYzL zT;r@om_VyHV-n3aREJ=KK5ZI%ewCcwstukM$WR-6vT578bMDVc=6TQL7!oVbIk>`d z&Fr|iY1L7(w%vB%Ce_m@7UF+68;nL#zX~fM&RJ|ql^s@d5nZqr*Fvia`6{lQgSZsN zT-B7)H$K~n-QgzHDSNJSWp$8Wd{Rh;x{%}mvRwd)J6iZ6OF0_c4VH~YdW}|Z2v(_q z=^Qt&%0r&8YhD_dAcgC^|gkELk|~7G?uoAuN*2E-+etD zAUeaFaBB~Rdp1l_|K?r~kh)r9U%0$44ivq=!XtRtWB3_V8UlP-h)^YVZ1O{?Sl@}g zhq_O-H{MAJ(ls4kUfq%g(yP~vk7I0=`kM}^KZ7ir-RdQr{yLg?b{>fXzvbcF%%SGG z=fSdV8Qihqe;T`_qcWa)6-qjX+u&M&ZFOkTZY?-$o6fM%B5B*Ze*^9Rqam9x89}>6 zhB473O!ve|G_timjh$iDt2H2-X$AdzPJO$l%ddjM2_^5PE);GfPWWst#}j_lEWJ_g zfaWayRH~}n$OZU}UnNg7eOg&{STBRL9^aJhx@nExPIunTsSDQDQn|g2=-PX@&(zdF zRDdj-yOBFxJ=V6PGd?%@RFg1A=xwy=k>PmR)k>@hL|iF;M&qO(a*&ph#mUlvJ;K4P zh0)&T@g`A^1*NOX70&fk^5#<`!%)wQ)Pc?nfElenIzpuL-L#pQY z)Bz=op!KeHcE+ahv>KrJ?AyefZ?GS^(Yup2@vDv_5=N?iO%A9VH>fzbH?=^+D%O+1 zYst=%>Me~3SjJBjIIwJ%b8TJ3=Cj^{p;_E-4NF)%OGerJsVw@WV=d>r4vUDxc1j+# zEWHynltHJYm!$mdIw=O}L*mXBv3WXYRV)PeU#j_;Gq$sB$7KcL0k{#gn^~5dzt33A z0zY4NOSt^(C*4sLJ$c6mzT`%VisQw2w{nS_gFOpu28s7FC~}z`>D+JvNV(L&a)0)$y3CibBj2X@c+83Zf?4Ugdw?^5i@JD0&-mqj`eI3m(I0ckZJ8t{>%F z0x9q>;XKeDQoC-;S=*LB#p=7;K>aXu`u2U|B7ju4N<@H-e{MCf+#rCaM?0CRC#5X^ zp;B=eM~-x2+6}UT8IcsU@jmCp0?RrgZ^I~Gz^^bN2CUT{3)VW8ISAkFs&k%h)3bGD zGG-oD^JQ^CJZgk9W=#w*?cVQW$Od_WkPs!aYyo2@qB6?)lN=*1pO4@zf`)_?JBNOS zY)yE0*27V0I5o%$|H%~aRw9cE9VP7Cv@ZCLim^T zoSsr++0);Zv!|!0X^egS?i!zrm2K}fh~>I=y;Gwjk)fY6ZoEndeh5o#Km=$F#8BPl zH^S-x<+p!LyfI^=Ph8P0m)#E2GwXb?(qx?()A^1G0pnlp^zJcg_KuON{seBa;HUhw zyUsxwb`?V}F>R=Ls3eR&ls)%mOd+!U2?DzUt`X%-Rg>3P|4te#y@DxT!xgy*Bb^h> z5zN*ez}5!Tm+0*nj8{`N@U$gRjjCuHJN9Hc)CX8gd_SP4ig^x9(zM&}soyxF-AJxo z{<47f{em7;lx|i`Kg)=DM#n(45+vV0arEZJOmM5}Cq$dZX;{GB@U4N-RV?%b5hStx z$pl^Koci^OFi2ispq7kszMI%zhCs}yxp<_zWHbhy!s1-<%k=?#V;d-DK_c$D1S*6h zvRy||V@r)N(hdjB;m*6zC@be1?pdmJ_Un!->WZUE-P>`{$YsrD{vM@6LbvLyX_x6! zkeK_8S8Qk^5TI$uPx%yXd#?7`;(%@CfhC`=;))6)%^zbzA!OCa;h3;!Y*cW%=D-Yk zCj9P#M%rpnu<2(Ok!8n`=UuH0k~?@#xM^PYSA=#(7xK4?s%J}(abqd3=c313*lXy4>I~9y*(Zj!%Y5|E!z3D}?mY z{TGHQ?aktjuoFU3FQ4|fi?4o8gMe{x)u&3}4w3D*Qsyt{5!DYSO>ckf%UWn8da3 zf3%NcQrL)gBR$vq!+1`@F-gRMb$hLhw&b+D%tPv;JnDdt;Zb)O+>ui|=Ap!|pd|s;D3g zT>*P6o4i?+o)O^SqRAKY*XFw0sOkQ9vp#|7U_g4d0S6fH?fzKV!8y8yQ#usy~WIDaw?GbMDpMZJUNeC7OF=# zdM>2Z4t1y-h+)OWE#Fx}q+bm!JdY4zKm;ska7_TWJ7<;XUIfpdG7M}Z`oFLZ_fuIo z*_f|g9h{$bbvsf%Hu1)`zO*+!51$lU=1#MMnwXhb8O8M{By{Wrhp{KXw8@-j^m7bi zwll4Cd`i&XKpPU7X0o+Fyl=W@`KPHM)aiutXu5Jh4BfG-q73>r1R%EVCyIlk3s)Q9 zL&PO9wByyFqQhXw7Fz|hKjan>55Y6VTEk~F(fwSjrPn5lrvnZSlwO}U6Oa}xXpay$Q@m;9hPn#R1fQ)n{`wmV z)+uW*$!a~Cie>8)FUggj92rqI-nfvV1z|!7`-y5hAR>~6U)e1Z5-V;e<+X0(S8wBn zddZwut1YT~)4FwAznyd#9MCLZo~`CY?1_BYQ+#1tcPRTQqbI`X_M zewGOZlVw|fTg0OU7k{1cAlvKOka0MKH@~dPG)yuJ)(AlCMcacrr zdH6QDKmSigBr_SY@Gi@OlCq?!V30e3o)X(0U1-2*w_D{4D)ykOavUwGX=_NdHXEUK z>K!AIfwA+)?AAa7ulhbHnBnhRE1FE`bz~fYr4G zPaanBfQvw5MI&&cNUDlu4Uz?F1_DF!n z3ZAY6Ijoa%tk}MG7*~hdL3U^^lTaUn%LbJ{LD}p!7CMhe{mg{++5O!E-VY#pkKi@?z0{fA+^B#5 zzfCTN9JCG+uHQoG6e-*(JC5=X3e86TyFD_Xkl6p@g-3WL{KEmoD1J61Myw;D;D_6C z0It7SXGzyD4X^+8F;)a#(X#lox+a)bMJJK7Or|qnQX;N4{pBkCzmX1qm}Oom($JnI z^1ix$<uA^`=%6*E+MSl^lBgG1L*Vov>4+>w4-YzwF)r^A)<*m)B44*qF{ zL`DhtTOso=9l(G6@GAZT?XdUf$$^8~b)OFp6tD6B45mHP5_{)R$7wfOA@(%d?CJ_! zY#gD<8+Rxugq;!ZpDj`UZ$z~}k2CwpQ%42O?FFZ#O*=r>miHkVLW|+YFUof$+B`pN z!);>S>M#8rI{W@kKK?r&1uB$#1$~Jh_)u=#KZjOhDWoGyg==!}_(&=K&SIoCOzrPP z{{IFi{;#zyf`IvFjl_KZ_cdYO`)%6|{LJkNGiKV6otP5j5Q{O3ndnEqi$h8IDRl_VG9Ok#hKl@zJJmh2ExuFOXe zEenHm_aojMQTbrd$^R|R_`lE28a0$@$GZ~>U0B1|Whj_`2Gfx$Z5_429gSq`5*9rWq;nD_YWTMUqdPSAMk)$fAD}dfAD~7iS$~8RDa5z{{tSd?hhVt?GGNX z^Sxs22k}3j7WLob0nPv50XzTT0sm)W{?{7+pNaYJftvqs>=?1r!Qe;xz|g?|j`m66 zHBN`&a;!f3+8wWzvKF3bD+g-Owy>oyd_w*#^kgyg#!+m44+50-e-sa*G znID{X6`B2{f^r3F)J}@8mCQ~~Q-f~VbU1)9w{+}^>A}~CW z(ElbzXdp_-UsDZZpT|Lrm4f;uPQKdg+g?adFtl*WiAm1S5No9nM=NJuhkk@=*UWfRqI$h+pYXg|t%EDpEZn`o6GT}_O92Zm;s5TX`*M2$Z%yYvqr99qqtu`YC9fj7Y5%g8 zvV6GL)3|;pX=H5iAu|hUJMvb74iNpJ_Jw6o5fS_qpUhgQeLgK_VlsPb$r>fq z#hA%M#rP2-bn89Dz#kuQm_Y{TK>E6^3=FSXh6xT1rzqDWX|q`tS~mVp1r+^ivnw*u(#qWuD_;JNTT`F&7nbXs~#FPb`ZD8`lkBil|{t50E8OLDg zdSFZI6S&*I(6ts`lE61VD)b32BPIB$$Z>guiOETAN9?>043%N-cqqrle(MHbLKgm$ zOn2D>X2gR!U9Y;#Ih%z}yk%w})x>)f{jY8C&k&+Ks1%AZS@rrTOzvCR|MpGPHV7}Z zJ+K*2hRpP2hlofjP(Tb5Gb@Ox+pDv%+x; z#X`7=f{0x5JCHI?bzj+5jxxV%Sw#MllS8b2x6#o0O>D+I)8>4GwTVKHs@_ZWB zq@JNu0TaJ~Rag+M)KFEaa!_k=hJ{ydt<&C0mm=mv-bG7fbZk(}^+o+A+KpV7_m7GC zn!a@jizH!mVuiaG6npXBZ@t6%gSX<}5{2IY6$y_DD?Y^MeZXUbSqb-3-fdh_hsPwx z3%#PtEwXf49P2&TW9w?`LFMlZEPnd-eVEiJt^F?<*ht3i^_of%!Sdoo%cqqL zbqcc>S-CdV1-ZIo~4gJ5Oi7rai_Mpbl{Row>G+Eq~Pb<)?_i zDqW-CszE|q09+^DLDe4T2R)sAlJ4GYhT-H3f{(L-f&R0Rd~M`_x!vZt?C469O@b+e zNk)+4iH8#&-U*LiF}D(ww@LprQ3`k+?p})vbWB3NuoTyjxu9ZBTdG8OwJ&F#$h$Vnof)2v2&(@SkJJcFPmd7K+Nd+1A+cgfcLsTeMHGZ)+#NvWddC- zC+<4Sbjj&Xd%#+o7U?F>;d?~_BRY$PsvNAH>JTIv3tZ!R3ic-}XD zWjrgi&Q^t!vuf8|r%}T$nvG!4x~)@Jfe%|FrlbVKm=x z!TI*~nN7w8*S%lcmUQ1S_C7Qj-8+76c*4T7aR@L+e{tNZ%h`L{x8Ki0b6`90Hf2M9 zXp%2n8=YRVuH-MkL8VOvtv)w9Cd|OXRUmnPKa=^d+*_$M3@-hbMYxLU!`ZWx?j#M5 zOG|q24_&e@?Bxv%wE~4+{S{v#TrBYMzSk*j42`pzAjK{)mURWz^^)gB4egP#E5iw4 zj|OIb8p!Z8ZS56Nmnq?8SqG)@MfxLR{f;C9%sgb@u1Q{!rR;p1hZ$N~_0O^_H3EIr zgqe+m+MQrZ(0?VbQHpJ%Mx!#K{4Q|}DM<;=O{w}&Z4rf7IVGg3-y5B&%^J9f$;rz~ zkx$E(+)^l~kJ`BCqmsk~28qRSZ`~U-SxLWf>!UF|7(W-qfH9E}NHz1B2iHLa+OSbC!1G%APOyG7(;4ED!3|=9)K|XlhGN)Fl))5s;?NBWO zJ0givAtH$h1iWVkyL?Wz_^UQ1C7s`-9(hRqSpu1J3NO(Oa_YFUVfEk2we-QS4UgvS02)^xnWL zz;f;pQ-W|ahC1upE&9`+`jwUn+n7&h#adV=_Uug*FI&sX^jd=1Qu{{H8w_^~q*Cou zW~{y9%e1o>j~w+y6kqJ?{O!;!`s*WvdiSX^KVQ_F&gWylU2@@Bm>&D2bE45eA9E7v zKWpw@wnZMe%5;?TEw(U)B9{C z*qpT*aU!|PP4)eL9;yAKXS>A<(>2Mc-Avgk7;Zt*LGq;KM6%*U^U0gD&jl^rp<$-H zKW&d>f8Jex)MV}^?)aFkFVm%+AEU^%Q!t%Rwd=Jz#N34Dx!Rg_fv+Eg8sO*_Tr+zq zEBWBZXRsMQW>W>g)_FMK9p6@X+-mDIHQ^f5EaE*uxF!)S;Q3O)^7irsHs^Vr!^BHc zDG@_hd8co>^;%gckWJ_rgI`E!Ew%f8dC@u(r%IRlD$#y0rCssM4DlHP+g(yz25$PM*CrF8G)^aY`e?+Pl{w8MXjNc-h zTSMyS)e-3GV_aWDD@*sQZEeQaxY1XM+~R78VsuY5@OdXv>-o-sK{SANYK;(+R(Uzw zcNqUirf$i}yUJC5kdLv0dgvGZWQ))^v$rP`vv@Nu&H(vbKgZ5QXO zSlv%6S!_SfOE?;tUur)Ram7Lv612r>CYmGV8zH|QBrnK-f!s%aPV*9}kB`Wyk#@AS zXg|R&1XeZj5-Y1Wn|s2FL8xd;SQA8~+0EJVK>64v@0x;9eoNp?Jr|Lm87auhBm&Z3 z<&oNDRCHp)I}T|{8*g=_M}J`CavwYPlzoWebPGFH@-zMZ=n(x*hjIHR_-50#;3mgO zkC&~4s)Et@ojqlSIB~j6AKvaPVU7Dl+iYazZ^BY6sN~@6AfOp!6s&5jS;jZ{HdEf? ztnyK!HV=Qui>mo!w&Z8lrZpseAR#jP-*A9TH7qEBukR|Vz3>gu?M7rufD|5}%Xsz* z(~+y2e8cJr&|Hc@-^S1pB`|==#@9i_+cn;-VkqwPuv-_3(JL}}Cn`S9DM zO4J@(J70Nl<%jfL1)D2V!J&-H zj`eN_Yz6T4{+kfmu6qefRF>D{uTws}h;Zvi#GTXTv(Sk2nzgte zd#I&+=TZ%Gd6Xs8>AH%JcJ z(x&<`xUz#Jb+c(i7VQk2&&0~9YfGpptYdjvB(dALD!=) z)T5aSN`a#0A(`Mct*7t)AA{G`r-;IiHBym1C znq())G7moeq|TR@vAZ!d8+IDU-nXt08Mb^2*D79xu-4koYLB0BeUH|2QyV|9T!;DN z%#7;O-JG+cAJ-QB%$cW*qnyL*BLcXxLS?(Xgmr!(@r znK@_Xe3#$Fd$+qAcCTK0Rn?OE|B4|mG-&50ZMYsXRc_g{-VuKBXoHu({?TtwX| zh8=De+I#XOw3S1-G2_?L0$fpz5ZNPh3^~$O0)hy?N+T1elaGl$3#}Y>O(4i{God|m z;H*fZ5wwkvT*{&qj|)JQfzh&wlu7DB6mi>n0)uu>k(}-m=cDII`^++nLgxSpCzoi- zYf}1U)By<MXKOL$p12S`!kk7eYcig7&7bbp*y!imG6ZcK=eF*q`_-!ojCo}L2fC7Z2 z3#T3XIUrHO;JUxYR2QzsCSbFP(@z7pmnW}xI^`TYXiaH%J`joJ1-Q1RLg8^mx~Ach zmB*M$jJuAL0hzUsedN4o;}J}7-aqYm4Gr|~Phmu&geXDaL0<9dd_JGP2YkqXVIN)j+oB3p&s^n}ZcdJVe51ErM%F zr0+zW8XUd*=Mu}08#?m#l+t`LPJ;aJF5}za(?YqZg%1G1Q>moa!(h792^{~<=lAS znCp6u$(BTy-tnVlL+WbU<=(*OB>XDd#NpIpW$|Q2$vXMweYsArD5@Aj*?lQeh$Ts& zO3K!mynjWzAha`L1_za~Tfg9Wf_&GWi*mrUzKO3N*HUR4YfJp}7~5yt4iq>G%Y<^! zW^(?ro7HK}-!&ciHk++`#(i32N_m3(hGg{uh7YA*os8H6L`<_(z}(d)q2my$3^u_# z4U*1G5+T{TOU70beRr+sT348bIdsISjf}?z)t4b58}W*SCC89E>a3OF28D_9rNE>r zi=xqAAN319>Zj~+D0ItcpT4cJR`^=Af@p?y68p!2{gmQ6v`@{16(TdFFR(I=5L88& zkZJwg5mp*sWT?C}w2m~U=1IjVpC5l#o)kFUsUcd1pD@0BJ^ex^`VHeX^*uRoA3|(u zeT8Nng_I|WDCiyxkabGuHLt7BXGNpcUcXJOPmRYLFlK4{^icb=D5JRmoRMQibt3Kz z&a(csry31GCH=h?8o1JlX*Ttx!rXm!B57%pVG6d2mR||?5WYyRbPa9d<2peItFo^v zg$D`V-Qz&v{I4qFP9~u%Dk|aq7Qh-R@O%*nmO1!00g)rKYZ6);;WNYhqIuedpqY{` zLX?JOd*VCan}Y?XWvU0l2x(z)^%)c|edxoLu+Ciy+e0%pbtZ9-KY~anOOA=nz(r$` zAk*&Qh8UH~04+F0U^1;rA6$2608@-|&aEI#syn&5`1fYKGRd9;j}RA@3O%r90^}a< zIPulUj!KW@>&<%PjK12;+g!$|mzUij)9lO#JxAOPpd1Gjl;nTK+HIg4XwJ;_;7WLC z`=}H(ogXRcW?nJ?RjYa!>Ds9plYJ6PRHv4MkOTdf*`if%-H?+?=l$49Kt=1SQD0DK z%kcPwH{?@6IRU^aK}sHEu)Nr)sOF5!JwAXs#Nq`BDAk zj|D}v{AQ?mxzhY~!Wb2n&+Y~Vw~(4nVlBlO+Q>gDFq4y&g&Ax=8LB;Srnt+TjMZ~o zS()8wJKDOhWe=dR$jWh}E=ojLpvajv9fs|D1H6Stp7;yra;BJ_z$>|V_d3>O=Bw{= zz3wd(4V5w1nrbNOr(y#*d3z;B4D$+IwEXAX3jV1GzT~>@-E_mN4&(B(9^>`YF*}lc z&Z#Fgztx3y0SFF>4#)MJy&+CSViyeCY-6qi)<$K@qqf0YxT$-CZ0Hjr{F1$k{F3O} z(P!T-vQO&98u*XJGVL>+VTja4yM)8FUV-Qm{jK@oQ62!JUdhfqwRgXs_>454VlhEYequBYK?0{O0o5g= zcjPC?8jxj70I1vpy{I^w>QmqO*uJ&e9eBC8SJIa!ai|q8!za6A@170sX?b(X@$)9P z>lTHx0rRBFQA!apq7MPxVSQ(B(``bLMi<-A%va{NZ!8qJD%|T#TC!T}bUv1r0%o2Z zW?UOFHT7Ik1`St_y>$GvEly%M(0eoZR*U@do(8s zCKk2(=&$Om4@#)+JPhJ3*FZbTJeIEL{l8Iksis8mhgp33TvQQM(x!$iQeoYMF0KuH zs#q`x8Iy~%8r{uv38J_b+#Ra69fy@4-SU?^feU9JyLraNA7T*bw)8Jk>_H?V#x}3V z$a)ggDr2m#{ywuK#=vBJN{~_j_F)t+DfARe$B+i50K69etw_2qoO$J3?z~#*+I5v1 zQpQD4@ymb0(3Pfr* z{5C@sI?wS|V*--8!=Yf1eaQ57YX`7F=;yhWmYLRygfeI1Ax_fPxmA z>ng;=1+wz-6HsM+KN!dh7rOKbMW+8M1d?wVIKov#EbB`C<&-k-)a-|!X5@$OP-A%= z$Ha!}NyUFeq7;M|Tr9JbmD*{ud-0WoLKlCxxdq3zE}4kFs&_R4b#tZGZO-)hLx*y; zbGYAo2Y*QcavD{x_jsJms)Ft*q#FvY41_{w@-076D+VYES_^-CDA@>9A6(+8R_~-e zY5AGC$0=#&+XLEi_{C3~CrU?ln+*{HpV1jUUrT0VqE=R{!9hYRC3xF=FduriW;l>o zvx=JeBUnEe5K&WWcTSF3Nc*s;VtK+uhO7{%{=~klBsIoqNVlY7D6*o0165j9&=s@4__%hb@_>-4kO4`;xWr|&8 zR};;UkZl=U8fu;&CWJN7&&mA|XZeWe{E{Rr>waGTBkMK*_lp5wDYH1;R@3w*NofOU# zt|=l10g*k>{-xmxK0~tko8zc2`&{}01~Ohv_p+rLZpzws9r`D zqof|dd{t=!3h>m{`~B$owddN{bGOdh47phlH#qjpSsRn9CWYocn9OyHoINc*z>okf z%QTz+`!OcXxFBWZnKfV!cN`Kgo9+18W=9d3PLkV}x>k3TR}HWFw!~!~H_06Sc(>RZ zja#u}h^oGtxA(>p^Sjq8lhoH+1}M0U1N0F+`KqXwNofl7isCvU8jbMc0_UXbG&hu! z^LqmeYBQfaA^*FVe!=e5#hGlYoUQeReOkm>&_I`A5ECO%6LcbWRzQ{g5%{s@5I2_nj*z zc~4O9_rXIjq#uwWNVy@lg_;Jo$i;poA|W)M=6J`9PjZH7K~%jvdo;R`pYVloW_`Vl zUt7_J5)D)J5|w2u_>RYx19rr&{6Hu*2}mx6DmHezWWy?ukvD+ik!wJ4uyOQPRyO8J z;SK*B%fOkj*eBygB%|{fDmvQ`%!r4SpUM?dHS^kYeFsSHwInhV_WqQ^U(A(oXzm;1 zer#C+eR7U)casr(Kb5zeBTyG%B(d+hVT^TN5HHB+^8jr(j#);}oDO-QsXcW=q^B7? zX$clr;FDErw9wMy_!^y>ZX-Loe;+3be7vD4N~dINMT3TgXF$SavLPOcpe3oC&mrg6}+XJ@Pf;Hy-3+QhcJD=zOBaEGoS5sfMODxwk_APY*!aK%YGPJBU z$5>vK3LF@q5=8$2i7fKF4mwba=M~--klWXkC+%B+Bkh<*Jp+Urrob()LB3|k;q$Jy zQ#8fn8mymVRK6Pf;A=$VhI&S1CB2=^bBl~Op4X&5P0f1a`p-*V0zddyhVd$5B%*%< z*_}tRdqU+R(#e{;9I1q^8D84S_mD&)9Fvo5X{K!@Gj=m-J052Vv!0I+{^G2*@NiD0 zA==+CJi4K^=73sy4bv!>f4t}C4BJpdBo_RkVm&F5uWC+x`UZg-Us%|q31hqp$4TOa zqN;Qm?s^V?@LcWu#8vbBgmG}epSUJ%+G+A~1i$n%kh3hWfwL^_c){))*1L>}{N=79 zFfs39=KDy~d)tz8u$tgxmD>M@z(pM}+!Q*yFjMZ6cjg+$RXE`$L{30cFS7m4at>ZlK_` z8X1Uzht?44U76Mz2ucmd3SKsk_fF5dQ;h61509y$s7?vx*{rTzD@wZQYOxt9!&14z!76#j^5;z8gzP5!WdOVnZgo+N_w zRt6v6i;yNA1&X&2uuQZ2rA7OI=n>UO9nI0AD^t90Zea5@h=B3~Dxrwp z^%vK!G`a-)*GD5>zFD#R!SvTU0O$TgN`qMzNO9T9)fAErq$R*NDwOtB!RILyWwiV! zs>(L=YiFJ6tzj}#j@~pFdk6AbfevFotkeQ#rTXjgM?&q<19f!1OZJoahye@zR{vM$rf1>NX%*ZZhYR24$t{DvGjZOD$|L8H3t}Ughuex0%D_e zWGxH9KZ)gb;yj@G0Y$51k)ac?hHB%13_>{xa@;j%JTT9YPS)@C6^HIK*`Mw6mRmi* zgBnKepr!+taRbgGsUK-Aw4~B(w=VdcFbtef*`ichNBA98)gJGs=lTa9(dq)iX+5g8 zUJr6UI0-$DaxU6c-SYWK@ntSNH|GJO(TEX{qnBOO|!9{Kq# z8&5-VC-`okOI&G6Ztt9>kh3*AL{ca`ovmcL-i(9m0WOvXAsz^w zhq&75-H}d8$$UFrs5T&(g!qTE=8gNf^1%h%ov5Ha(6MZ8*|MkOBkk-q`{Fyc%#>t zA&Y&^%-bYv#z_l_PPEY&8~^*4uu82ef2}W%Dknl3XkLLrnW7Y9j*nVG3%WZ#SV2~_ zZFkdB5X0L8SaXN+8w+sKyeTR&EGnd-s@ozuN{{77+8yZ7oAn*rQdnG6$V{a49ZdsF zOF!^vdpufJ!E*=|EC~V5KQ$FU*?v#Wk~>k?M%R^S)?th>*J0k{q;uWI&s*UQRINbD zC5)fCrfQ$E2m8&2o_n%sDT2Xw2y}l3-(8$P8i5srd3`G(ty)qOrMOjA{f1zr2ZVVI zt9VDzLUbh`%9#|QDr6bY3lmP;%VJUelV!A9QuC)Dg9D_-Fd@jW_DM!KehG2XX7wukHM7OuyELo$<}R~+9SaDH z6e9OFo=hja-X|^>k{%tE-{V9lG_xVq zd|W5Ysq|pC0Fcd`FyiQbyoABf&SPmzyY5WJ(ktD=173}T!^!vfkB`S}=V|wuEp_L= zaJ;v;iLNJwMW3tfGlnMGI(}4p=h>S*@aQAiQ+1kb9m((!+%AqUcLNL{uL^`j6(<>$ zCC)0Z5NSY=(yN%VXO$7O)S;L*&bi3Sq{iR!Jzw3BU%%lpZxaiGi{wQhPeYF}63+6S zQ7vWqS`wtk45KIot*crWC+606ho#C6&)ODx3Wp=Qd;>WGib2QREf9j_z4wUniooeGMxh=kWocv^R2TbhlZ`Plqp34p+dZ3)D6?+ z-(+q76dP?4$trY6EYP$0Q?AyltVv0jAUEgv`OoEj`7)ygx@U*tnnF|X+aAHSw_ z*I|1Fl6vkXl2iFHGF}w^#8b`2p6C%unqPXTFa>5aMk<6pWmQwlvQ? zilFr6z?LvSpa5$oQk3T}huH_<+3ziu_@=$J*@t-ZUt#l5d&?-_n zQiRdws4+>Na3C-}$_Nf2J$^6}UhJ(elRq0&3%k&^gnF^HGlkuzf5W0U6iumfVO2eJkV$?5qfBo+Rva}2 zMQn$B_$YD8t?+KsIQ-O4$s$;T5?NV3bEq+1^tkkw-R*pW{AodanDsFZwMXK@)w4lO z&dlN(JaeZ!a(wvVRpr|0q@fhMv)8`EDa6K$1r#Bn_fq)o{;2K%tz6c!35& ztpP!c#IiSOv0rsYlz|y#II_r6TtEU9wRh9{@e{JqHhlrTKIwIC1sMg42HV4oIaU)C zB_y+?(PNWN{FD>Ugwh9-hx{B-_d~YH%9gtsw`s;Ul=vM>6v+?9@viC@YhKww;wNo$ z0HCPiAj`yIjpb4^8YY_6ZCx`jL=`E1NAdfe;5VrIk96B7nnPY=&FS9V$0AoNd9Lz< zzFzIIPM_Ox4=}lPd)q3+;?8Cs<>jv^JJsiTsy;hTb9*3Z=yrO66db0<$zwiWci^*> zd@F>=6!4Oaq2RBh*eLa25jM+AewI(i270UB?R?1UCD*imKLp3NsX~pU!JMe+uhX8N9J!fFIiUQ&+LZ!uiX+dhkBZaxWHA!gJL0 zPRD7OHMVnt!}9EAxq;n`V1fG6*J;9 z2h?`@S1;;l8}#hCZQg)4^l=v-aFcyhJbrDdlE^~7&4&RlR943Ym?OSoskF5isc&!A z^$|`@Bb4F0bdfljh8tbEy_@VLaLxhm_G!nj<%$@ZB5O_0rI3bxzT4B$d1Z*>xz^zF zDf(rlUf3;93qyRS4BpOW)B27o-bw9N_4#Kkdl{*t&MnBj(jKtzD6~Fz%RJ2=quWwb zDVP+DgSlJs7D`MzA*1ROL*AH~P=5-|{{GG5(YX*PxICImBdePG3K%vtM``-D38dYG z7-Dnhbs-1H92MWXX<_5fYF{w-iDYYpUp5|W%;TrKX9*40GB(mHo5H$0j}!5yxTg% zs{Z9popCtZ#<~!?X4FCt$P>e>n%`kJnu$xX&p}vpu*PEnd#)+G$;6W=uj;@}CQih@ zMPcrMFzvf6lPOYM)wVZ7m!${!4`PZ6S%S=1i>w?>pRJ2DYz{E7$o2bI84k%VB`=1v z^YE%!6ts)3GXp|?GEOTiA`{U9{l1K0l!&2Nep$k?R3xL^*l|_phAd?^w89YGlJke4 z91RjON8#iTMyc3B>o6b{_o(94FFc%%7|O|5P?X`SiIAI&V&M9GlJ0&~~N~}wU zof>6;ybJVg9VpM|semV}8}^W#-CrP*JHHg~LzJJnjKkSxd5!QJT@Zk(r1eDwLo?2q!Yrr&cfW7^@#n z6|2Ayd|2_t>N|dqlEae#4IUQ`TkAyh%cybCzG+(d6khD)y1 z4Amtr1j~evrHD(1_}pB0u~~2$)rT>ar$S=$&XSL{yYld1v<5(GG6V`YroRL!?6+T2We3K%0d z{SrDVbTB#;npXLy^qo-%gw>h*(EwJ6@KHRV@>8?B$CsV`q|dx9ti_j?%ztES@@@jY ze+0fIu4?TGMwBLgVXkpfW^$)-tOW>o6tSHrW(R@VIz0fz%ygVyMWhG&Avpq^$5Mgy z&kaD3sEbon<~1pl`cw3Udnd?8rOT%DkOpkL;!)3H-=QGV z=s>0t(}HEpA6OrST(L0IGJ3~V18d7zJIp~ODX%xCYwfmiJm)x$A?$dwiEG>R*e=5d zbTWBk$vtGpA(XV4nQMyoF?y3E*V@%&0U(ypZ~AGa7G=U8O^?AVK>lA)>PK+;OLs6#Fk@kcJ~Ot}rn?;e&NP(IH| z53|QpTYd_wMGWF9YPkV{QRmIgD#wU|VH|Pj$1TcQ9TIV*3L^}sZ@dw%*bQ5&6XT(Q zyl<7mF}<&Mhkb?MJl-tR;q|UO8)Z7jQFZ!c<%bv0T=KF2Z? z`}lH~JXM|cG^iHAivHRWnJjmRVY?W_sMzZKxi5x^vE5m^JybqVoS>+@JDAkfLf?)a ztU5I96VWsz%suR*Fy8nfu)vf|`%BwGB?PQ9&aD@YU?g6sx5w6LP4Dp- zTmNL=ZI~K$2dwO&eS?VI4{eZkb&DN}3%Q3R{LsjYT-L@FR7$vv&K_dp_oF9keEsee8lJQ;fYk^Qfk7(v{!mgINI#YWj@+u3Fl~=-n3U587t%H7M4w>O!ORBes{G%{pG?U8F_g5nz z&dPKbJ{13J$xa~yY~$?@GYzrqgd2G~^)^p1z#&TEl=GH$GV#NuroM?oDg7*?{xEnU zP(J$I3)k{Edk$(Dd3t+XXhw&C;ZdJJu9M0~7{eosRhkY2ec=48(phJZm%fF3be+-c zUTaB(fuhLO0j^KgMGMj8HFaxEe_3o9H$mR;N_ik^Gj@WrrE&I}l7`)p$h#UsBsibM z+9Te5_!@I*8U3Fn9dic~XJ$85$RCqApwKjK)?7o_7J8D3SRP$JH5*)H`FizyhaON$ zL9Y4=yI--M=R_)E&#*B;;)UEXB@U<}(c4E0R;`54SyQ#5(xZ{%%`07yaYmi!q5_#_ zNjP=N(hM_SS9(O#A4^+n?wYvP^wfEEAc#!E3*O6q`aVPj zGW1B>k4URzAn56D0QqsZWmvhamd{iPIio(C_v6CR3EL zdm^+>7Bwf-;ZC^UeRRvjT+G7ASIdP(K^!r`N&W(n9HFAU)a@T$ww?$^v|h91l9^>KARZq+`^ zNh$TqE7EQK-`_xiq>GT*yYgu|NSYkHV(4rNb6~3*E$*&RgKuK?*w6PbwC{dxDY5I* zLeg6fGjWhUSQjBb8z&0^4l=M7-RS|FpBHRf1aowiW%HI?XqoLMW9^@%Nb4kCDO;8x1jecin~)MlH*(kGlqLwm9;8FFj; zNxliW{YUD@R^$n$3X2y(LC~+WWOi3=lLiG$~(hVf3X@UH%Hsjatl)Q-JS_NUh6=GU8xpxxE0@PP4_89DRVk z^a4OK7~%F4M-ggsweBD(^UhFPuMa#yp>Jbk(`q%a_ zN3E%ovfT=ujq-0EW1-fwB@5q*xdy}6%RkcJR>GE>dqA=5&WCyl=;!GknAj`E63}V( zv|gL4nzIHJvF^~OY-6lvh5yJS#pNaygUG<&`2mZda~UEvcU_~jGLhogo@2bLclWK` zi{q+8T$;#v1j=HdP9qat>kHN={1fX86*t2W-cND>k6`Tsu55+%mo}Y~)0-4QidkBL zsE1Dd6-6)MMe$p+7AE8&7W$A1OW@PkvT76ChYxgj^3Hb{smiUCx7 zco2f`JwuK*NzEiR#k<|tdWN7sw(wta_L?IES3C`owWfK!iVG@$mWdECAYkUovsHR` zRjA%pX4;;;q5oKJxao9suYOcJubKX+w=lNx`RkIMm)e2Vyu&qzVJQ)L0bJ~^-lmvy zl?gHxxe0&nGTH6xh5yb~)cC55{r0oLiH(s6Hj4ccO^GSt2LG0ZQwX z4qlcB1bf}yereFz%BXkB-6#e)%?`^y=(RUJW8C>|60s@FroP+vCPAxt%UOgBKC>Ca z9IR-~Ulf=7pM%NlUm?yGkQ-VD+_i9w9af$C(CG zgeJIZh%c878yjZ4>||nRJRYIW7)cnU=tVM^bb1^ky-uc1Ok5GGY?^f)Q-YXr4;zOC zJBsM8<`N;yL?WQebad#wd5LI2T(J3kE0?;OUlQ-+!)RjGo*ptxXxZC^NZ$n~-n!9g0`QjE z43NZp_Mz4!JVLASy9YCm?7m0txEb|5jT&elo81Y`%Ce}%Wo^!Ctb;MxjXdc)9jiD`mtM))h(of7O9;~b&gCu)Q-X?ApIu}fE3unhjP{b*c(rI?~R4yzSz~voL z56Y6cCbQ5V?ZoGau+sBge6Wh1P(EV}_+-_C5n?NZBTHjB+W@-&^^mP|#c%@-E+#4i zt&%4tO!;H$gEirV&WAH&^1(K6dVDAs({b=^9I>#A!hEt6|PN&WHBU++a2-by8Uu;)3@WfDc3=qC6 zxhm?LG~pb0q{@}O9=6#u95IsJczpTUO=oEdjvfQ=XLlAeKF|S$@5@PCQi=*F9apWu zx@YI1ubtt9Hl8w60mBOiSPyRvB%Vt@HU;*^e+062ma2zScJ5VVXKutBjuCOJvUa+Q z;oK{F-Rq2oL--`|+O9witDZ{Zqx`g5aBg%r-&zz8cCX5VkFNGAh`Bk9vZ6Q?TQ>1R z+qH~w>rLmb)SvodAX!EQxD?jtKK~a&+4p@gL17y%6U?Cv$TTsGLi#A&K`pAO-xT(H z?sm1q0eoZiicdw7yY47vsY(Eb`8|B!OJk6{ZlaPX+I`b;Xk&z{r$Ql~coJZiBkd5; zWI_q8$iGIlqFMfC@rr^TBSWq6ov)kXujUT=c-_RDt;Hj;fe$JeVK^M-ab{Fo=C0o)#zZ!FugtNJSQ2+6KtCyy>!kGd6Zl@b<5rqbv| z+vg$Z2>LQ^5yZ^|F-~w9QRU9Xv$&A*EsL0`C7mT6bnrAA_r^6TWtx+Ky zyCWKtjM&tjBJvr4Eagp=^|4w0%uxd$ivElPs~W}6zodkM=OW?MCZY<2;6rUSV5v%n zM8XPvBVFPFn{9!$^L=8j8NA1^p2Em_o`E93|(w zLWiqD_18n>?<&sEk}j?+U`UBscqt!;{F#~TQd)5VSW3KI%&`^#4TK74vJ$8|`>g4uParF& zn^tlI-WsfttN3$$_D3fFZ`5N7_$jGpn~z6;pAsr7x%t8GegHE7u`m(irdxYw;I&GM z*r>R*Tm2`Ro0pJY)N`i}%EKEZE3 ziA-r?;J)(Aisu6Z8WXE<=Pf4POls{!vcEjIKAC7{K4pggV!{5;H|hA>O`89_$tCVQ zMRqgh@^&*$F#4Z5hChjWUlV?}zdi!of+s<0UoZfW{sa5Pl#3q?DEB;vWvaXMD*sz~ zDT1F^?7xqrzd(ZRE`J+P#((y~Y_zLumov}%R?hcv@vKMunj_vEZW|B%Zpppnx&!ts ze=HCF$nXSP&)>Qe%>VmqY(fSTH^BYJKPU7i1N)0aCQYgT={}XHuQ5Y^W+(jHwF%?j z+6!jc{qL9j#)1BCcm3M{`Jb2n99_?^Z{EClebt5U^ZjRyo1+%QZxKo>udigAud97Q zhT@9YN7(9}v1b(1Q&%FyO?f5N7TJEedai;1837zbl*vj1^EgZ8!72sHwmJ1~)^PMH zltCn2tgg>YVXM0jUZsaGOt-^aOe0(?D#?$HV~(S{X_xG~jk_6_t=yRkn_0MPeD6qP zDIr`9;HMAoMRr-Ka=DfH|D#*^?Xr;Yye1G*^!8y$Wx)s||BGB(6r10( zgw77u|Kiz5WVe6QTQ3B1SfjwX^LO?`Ida;7a321@7_r1}B4s}&m`EATAZ=mD42~5=DR3lB;9vZQtNoX2n}OeWO)LR0 zUX!smxTKg2{H;o{-Uh7yqf!3LL;drD4htMY0Qx_FV`aWp#83a`otiO59YOuEjrXr% zBC4JA_k6?v4$Rl=$0V<}1+|M=Is1&Pl49kjn<{=@SK7>mCY zEXTGDmNz08&+xxv|Lsq?kiU$TeqhPy_z*}fa#U&JXzhfhygG#hZ!ULLnG|>9sM1S9 z!kuY~e&~BOw)?|8`a&8)^~HyG&ag{@-skG;yd)MHCLc;1;dMAo!E_<#` z+_nEop8xE%48=uSD3^oxJEnC1odKuT`i%wZ~G8n;R8dQ}h2sF?Vk4fWt0QK#p%-JIw3mibg3(b?Y!&sVkb+ zvRqKT3TKf@5Lx(@1k!D#r~MKAyAVzJVYifOl@ZKt1e4wL9StJZr_-(_587{^=J#01 z-cO!x7&}wE2ZAY8y>0afl~8cijnGW_aQx4*Fv@25$TC3G^R0H5@1mb%^4K@TqMVMN zCKYq6MnvK`y}sNH>>?9r(2JfNxR~x{k`Xy>QtoSVJ7VaYopTX z2<-sVne1)v_B_)9036%B?Z~1c5npVO6zsoc-alqbIQ&pXS>7#FmfQ1EP`El;%1Jvc zgRsw!)EV#Abk|X4E`IgWS%02*XtGy(oH%0Em$F>Xw){*}%18R;r+X3D3AS=l4`PFB zc>*MwNY(H|D%h1Ja|q(`|mr9bdX*^9@W(9}q1aXN*!SqtS>DYV#O z=vf|QC%qUFsjpESX!77oC^W;~_ya)KcF@29(?f<@RxULvdpgu3wP35;Tq-4mzMbQ2 zS*vUtT>>EH14ZqRaGEdz!MgLZ_4zvqXI;+SSVfXtD-lU^!d(jtnn(MPNZx8z4@?x2 zNJl~PTwMWi#6!z?SI1EM+l_2X1*ER=93`_bsvL&<#po$>%vQ#5@=66mvb!4g>WC0>CDeyccnQ9adCc=gA2!j1@O(QsW{#*LR$RN z{gm79WU%ZR_AHoz&rh2pZ5miKXj!liVHCk0QedR91840UJ&V&h(}1`7W4@?z%nui} zS(4Yi0O@xCfS?YK`Q`<_f}<-tc9r09sopGm_AS&a=!2!pdR$PrbN;atQ-a5A^V^*= ze>oqrnVgYN|8ksedzYsb=dQ;$J90%3!%$uAic)Gc*&a||_*TV*w)I<(8e(Wt&4^^M zRL4%dfV*Iv&uQISZsLlN_l=HRQ7VIO_nR*O;Sc(`X=r-(4;C~dRDzrCI%{*bm>{W! z0WDt7TADEOdDu;@zII)L*vtjE74v^`}8e^OdRn9q0nR8K{wl9 zNwx!l&0h%6>7@?2WuD$UakGAZ8lD7~c9AU?@VoetAiIKV&!ZaZ4cNG#@a8?Y${`!; zlP1*=)r*N~`z4L&6|^2Z%d26>uAOIho(G0( znKG*D4NG%V^$J)l&>L5VCIt?BLZ9+HP-H)99{8wKP;iwUi}zhvzZ5FC>@7T=ne>rB zPZ~q>z!XzCnE5e&$>67ysyfhCeqZ-ojQ&vl)Bgq_KaJ*1sv=l*)C{ZYcE`zr&l9O9 zApkpZre&FRW>nU1|AU0!~oq zy9F0vZG%8Jx#Lyz5E=Ix82Sb&>+cPX5&0x5FO?4lD4UsdQC2u~6(7dq?(4Th+TrK! zE5D-bFm)LMyL1oP>i!%2rauh%*{29RFf5TkOZj&oEd^ItEi%VVIriptzqccG6+6-Yw9Rd|@XbAc$_E=I8uk?$y?S z>^YI6w-gH#aon{KIiqVo14NxFBkq*5?AF8yEM;}z8#DKz|BR8w83dnM+!a7yd0iQP z_(ddCnRfjA0cWBv^tBQ)u(IMlRbTkjypC^mm9;Xi*R_4Vo(~M zTXdklD!T*R%-qvPg-OrH>iVb;U(5m{j^UiqoVmg$N}MtJU?rP1S*+(j2>dGcmG*EY ze0Z|8c-i|@{4D>FwP+QQ@qF6e?s@FqUES=lSaIxTNdV7wFwk4XdN**{vNdnKQ-DV+ z!`AaKHxZKPuRD4}JLlGE;gK4_C{0B^Qs`H$a49&HSCQ_~mrW@1)%}5uWzM5Ce6O}* z&&dPV)#>~=R=42`4{U|`CdOAYza!yIC≻hoxXUCsCdVwdW}Ip2x$v4BERr>&*TP zDd8V`wOlqTog*d$B^Fj1QGr^oF%5P`94N80GDxN>ZKwue$38yp=M+th=*OhEez)Vt zUKh}<3#ozZ3+{<*i+T3AP&cQ(nE9qFdJm6j$A{%Nlk_&S&C}}DW|1r^)dI6CTu2J5 z%VX%XE8ikYN*b4E^)?=YM3gdY5?PM=d*v#oD^`6)k4hpUS!UbX1I8-L56^6~X@8AI zq;q7=(z|{4Fn1QXY#kGhSX;R1n*b3_4%6pmWVFFOZ>={fj1Sf_>|`3B)U^Q@PFaj^ zv=+*rnf6bO=N!v7&=KFD7<6^2W-E zsA1W5+zTd);TxS!0GZk^kJ}mcy)6nhcA4%G9sw;bj^jVqenN;_`g)$THtiHHvv_$l z7bCf~-N7BQ9t)(igxg%DW(V!tezH2I&tx0yt7W-$f|1ya8cPRWG;Ho7s*Wf}pv6mb z6T1x>=TK{SyDOK(jup~Z8d!vn9qlPwZZTPXJKZgvsiV2HSE4Gl&s{R!G0s^gKkkmD z&%1;_ag(mx8ZcfEIedv2X+prdm1b&i&RWdY$mD)e3uY~KeGO#4)xvKsG}gvEDrs%( zTfG~ef9zq$YSOaP-u*EV&#G@AkH~6WWE1a6+u1kStAGEtnG&XTRDnu8;6LuQQIdR{R|n!;Z=V zoCyxP?=Rkqm}r!#M`|)*$8XRE>lOMBcLNn2NB1JOFSbZn>treqC0O|Da9M4Q&H&G! za)>34N75!rWds^BzO5Uvp!Cq)cmPzfy>wuD`69vnh!)T@`yp83fuCO zGr|IVtu>Jt%$p?Fzp{-B1Q~P#Nh-!qp=(Qc4s?1KqV7&25WnQeGYP^yR4Mei=<;y& za}w%>P4}1GR{mdIM5`Nu5n}B;&O9|ISC+Q~gip?;;VE3dc7zoVsl1?8WCTx#w0`UK zJWguugDuSTfBTja-u}tjWmTjsIlulzQvfsf$CESEup9y=hM`4m*F|3p4LZ&%gJ5#x z3Y<@=68NwI6OLLT?rSLmyIGIT^aNim|C620*-V-h6j)Yq5%ciKDBoSq zA?K69LcZzno&IbMeM%(K!sI^cSH{qT?Yj}aLr%#PrUl8y1Uca;UKG@JFO$S%U3OrM zxUGyA3Jswq1(okFC~0WyEBKSKZbg;02-{CRW(rxKMI|b3CO7^CR^okJcDuzoTH1+; zrqA2QN$fx-*CGQU^2FEQw@Gk=I|V>*+VA&mdcMx-;cP=jH!Dc4Mx#1PkHs!R9XkGD zHR~Z0wQO|7QGLy2&Sz6V2;%v-)1lADp#h?4b``e69wQ?)SaL{^UODNp^du1-SDRg2 zK2TyaLM3C65hP7ARyzcJSZ~ZYn|BbSXV$0g zYG&u$cbtBF_-{R5^DM|?Xwm@AT`!^FtN4Qg@c~Xy$Nu1V@?mrGY8%9-IEW?nfxUB4 zxm6HOJd&GzkUd|ew&xr^G>t2mS+YTggv=S7kd_Vr02%`W4=8Ua$Ow zUZR9{!i{)=ZpDnd_5NvI-jdc{3z-&O3H3ALQi&>sS#tJZp*O_&MN&lf{u{2i9pnp_ z>wRYMe7gL1%o;_}Ow{`8?xL0(oFiIDV^$QVCnAkDC{G-Fq0vG(l%*UP&1E)G&);^2 z$U63!@>bdQo8AWLYzb{%qbIlQI@6D4dY4b--1@$*g=eG3UVFY>L??VY>P7inLwMKDA0m}b=_y~V6jAQKyjAni zx;cxbn#A$M1EA#@aP8U|wBd!!&NudSHq}N{xeCK*!AI_&|>gST4 z_Mt>gdt@S<&Se}5Lm!vT=`c5 zn9YbG)>4pM=-(nnpcc%1GL^$0-UGg#nIfHN{`kFzv4FuU zXmnlNf|8vWA?2=!I)t@ug6lI7WusTWskMJ$*;J8>$UziQ1S z`koCuH2HVp&y^?dxisghdhwP=uh9|uaT+xhd1wp4K2OgmIb=F(i2QNFy0&`}h#bjR zaAtG`h+Mo7Zc2iswu8RdETk*QwAAr-pl*IAc>0MS|0!U8k0ShE4d{c2EFP6b8}r^j z<2Tgx3vtgaY7q_Hm!CvSH*2#7LT|N`Q|^D$enSJaReGb)#@Z^fXos`iButgs|7r*A zsUU!a>qKXKVl>ufSr&HVCnnIf!9MgK=;cw$(0x>AY8jap?A2SK$#(yQ{Dh&KwRfmf z?hStNBeh1Ez!t+&qAyJ25UqmGR-?pCFr%DWRav14Xkw_6N1+mGBPzX++i z>;PoK|9xkIR16w&OBf8A!ZzIo`&fc{dqE9MTLAUcp{Tp5C-XCgmxN<1YKd) zFcdC#FhAvc%H4SxGKWB8d#{}m!L?mA*n!8?XAO6(Nk>^K{Sl?+cglrcV1yHMybz^7 z;l2?^MJN&q{iw{)pyFZ#{(QktcgfT2{#GxMyS1humBDU?^pn;PXg=n=JF7zZtL_yR zCf6i=a;#sg64n&^TdrCLb(2*3Xf|H%SWujO8R3hUtGN-CLnF}jaadZ)Ap;JyluuoDEZsw^@!x<@l~`;{H9nrxYUCkAHVg` zup5bf_{A_U{@?I+01q&I`(L+b7~bJT;aVSP6i zrMn-J%(6)bCw6uT8H`#zVjtAai*>o)MHZu7kA*r8u}8H@{>@>9e|3i=J?qNdD7awp zmj3S!tQP$Bzimb2yHWbr-$U~LBWzM|Pm5=gL-ON^%u6b$tL9C7l1xRf?<7-%DFO{# z7rKZudsbC3jhCk15ufido*7JGtmHHx zmGm-JEZd)c&{$c{`~sr_i)c7hgJC^WP;{_Nt}OyP?D-fFNknJTy9uuuaZ0)RVl~+gmk&(EdUECCF6h$V;7IE327^Ycv8GU4|1tz&|OkSak(BYaa$K`P|7ua1Z` z)`IdZu!CcBa1E`wSz}0rT7-7=o?O-bWVC~Ya znsx#ZR2rd7D4)ZPq7f<7)11Tp*ECf_i|{H$+uRG5tR#xutoUn^Z(Ppi5#+V?Q=MM_ zdvixzB?j>E*l24H2QwcCizfE-CkQm?bHN7jPU2$b4h)^MPOpa;_BuF7$=Ac%`rfFG zjsJYw0t)*i*S;g(+}mEgO|JG>k{3gK^L-YOAf7?KtxvXZya6bC>l?5KFh&d+cd3dL$ zplA9oB^&*~(SDmLaPb=J%^SAa>l5&fTsu^;S=5XNZd%rADbCnC=5a&lvxBsJ4^3x& zEw?k5{`FcxbuIJ2S~IhIR!&r28_Z{4IHU$P zV(FA1^QXg`DKYUN`?<-29oiRUnsxi1S0WvmfYmjA{cnQE_!XP4p`!f~iHl3i>HQdg zxw{p<+F(CkV_yC}SxotK^Lw5omXVgVBS1sv>ell<=E4%&l_J>nIar~gqE3lydic9_ zRNgR+<0!`$%BlMMgO|nl$2^)rm+wTeNnR^|{v`8$!RxV6L=H%0h*fPaaTp=HMo|w{ zpZqpLVuE2EeI)V;ndaiOIG8CgQnNoYsMYWV8UTB-k7Jmz0-?ws68!=z(I z+2(N|;-anjucuGyQnU>-QnZ~ zZ_dl{u3rvPvLC0=S-4=W-oynpT1;)Q*r&Bx7O!>t_*KI@BKu?ZZH|{Qr>Y#hjxQ*H z|JplVI7vrlroz1C__JFFQ&3gpqmXn|LbEhR-eSY-0TfT>TSy3yo{L0bkLq+ySafGV zL&>E2!IidJg2tBQiM!9(|LW7@K@rmQo6UH2n9QWi(QPrzo>sebbqe@Z8NcMVCbX|$ z+8|rMTBn|29p&|`V`JE zB&YTFspwcBe?T&j)<6%5!}ZPwr?o#og!np+K61smibt(A8>}m6PA8a6!hcJQ<3PE~ zTJ4rWt^W-gVM%8^)%OAppbooL(4yC;P)`%oN?K!}poW%GP*W4wVb#iX$yqgNDCO@cDhEor!arD(OZJT=@Uej;py5wMb5vo~A0Pw$l9$WF2BCdzZfqFSjjnOTU_T2< z>5ZV0xDVu~xX;LEoEg5cuNpTS^9X^BWqq8y>kr^C0?bnMVx*+SCVJDR4)jBfewu`^Pa_I)sA_`Qy`XMIg3NlfD&|umv=)3qLldskOywtKC*}7Y=z%hmIQ& zR%~{!?ts*wALBq*fMB^7Li)V)*eD#faq*v=qjOx^=M6il+4dCy8;0NhoRHryRX!Y@S2oM~Q`^dC z#QHt!D3ZGVHBq8X+mQ2rI-JAdj}=5p??+Ei&RgiiQ@1Lrq+Mr&ju@Wn$NR>l_#T`r zpKRAM)lWU4R772sU&tbwd5?%p#+p3rs$SI&8^H9%>1tPm)aH|fT@N`;WoXTd%N&4WC^&_B2bmzWkoS@8`TXZGe zX6|C@;Lh~Y>v)`J_m$4Z#p5yM)lz?=W--Iu-(uoL>WNn!jE}V6m-nIMy<<_%*U|5e zcx&h`!fgxkxB<#{-Drw!%$`Lqkv zwKAZ#{wLauGk+?|-ghcYnyV&!UW!R372pH8mNDXQX{?$xU8?+$}r`)Cy`py$FQ&cuG1jz zrzzf|6_9EpwrUmBPSK2h*=x0V__>C0y7N8>8hqx%ZR~HS{J(mLf-r{5D>0eh$!xBb zj7p;+Bf%5f{F>s!X*0-=7G)DEIML9+)~a}oVSJMs`c4ZK>J_Fqd+04QWG0?Y8@I1_fza^E%r#6&eX3-tEvw736Ef7wpjOziW|?x&)90_LirJ%o7QG` zG#_z6=QA4`>iAyV-+p_G%zB@mBAV^rypfn*cwM4$Z4qS3M{Zi=CkDATU`hh7A=q#T zP##y$?-EHy&u_R9fQ_GeLmaqp^FYI|s0_;rWqd{3;7@jAB^zm zl5TK*beO~A^~$-@Sh0qFzE*T&Csa!9Ce3DVrk(sNvA%DF_<}VFLf(0B756GY5gAl_ zDPaa8E=!-r1!2f@Ng(kav*~MhN6$u9?8F&3Fx8jGmP8L;zaJ#M+__c6epGEHId?a} zy)sM)f_qkB(ZDqMVbK$gRl@pDuKjTI)nHJ(Q!DS`mrTdn4|xv0`KA-{J#?GW2og+% zKe0M4M%2>%8Gnj_yTIvIn?(0eQ9MvR&!BN9wRTG|ZF6d7Z1gAOm?PRJ;Z_8m#wn&s ztk~|cS?%-@1#A;tJJrz-6T5Td`CD}2XO7s$cMi>Pr_P~PtkGMIYQ2gR+FmlOo9=Km*~@5NduKI z5%~RovvJthV?+)$+FwCMx??Xd*oBtex^z*O+i!M<(5K0GH802cp}|)jc7NFjM4Te= zP*=3r=*MRE(si1EMV;-B`qzwM>y%nvI(I;pwd$Z1%uWUToWuZ)0dDqK5*YS|B)Rg* z9}D?g+t%G6-zD|om{HWr1Ls`DzUM|SJ=U=i+Ih^sVRXuUF@ zJ7=amvc6XEcAVcZw0Yc=_Wo|i?CK3%i2Ci(Yks7a3t4?L^Z-yN^s=_;P}06GzPmla z4pm%ikf51t0S?dZTOJ8 znLwV*Shn^5=F8BSh6d>=lv%)`&IkJ?e$be79z(9CVYg9MS~((5L#@?*8ZuO zdD>M}_>TW^qu$EXNFC+a=+Ng=l`&L@HP5Rl=Z?R|iIrC0 zhrMX8d23ioOIqGRoX&RcWV(8wVhEQV^_|5XP0741hzdh2V1#nc-d=k zlZP(}15uD|DLOSL%^@@nIjIUTBNWc38nH8uw^V#mwTF>coIIQyXI?NKXMO>8E0&Z5 zqja>edMMfUA0Z#Ng?>NHkfOe$lF(H~1Q|w7z_1CqBk?|(K8Pn*#_vuW@Khw_$z{GV zlI)S#tc}!NM636^lrf?`U};$%1|8396P%^>W00nKV{mSw*6A5%;1%@;jK==211uAC zA4(}KtnZ~7yFTqiw@3uJ~Hln~CzgX+gt23XbE6WjU#= zY*}Ym)4;Z0_L4-?5g*bd%Av|Nt!hb~D1ESvH6OL6NMrI!E3^5@Vm?WMm#)R_y$4zK z@f;i*?Ofp{c8j0l#yb!3?DsKrr}FrkyJnj$dA^Y7`s+6Rc|kdW$(v@`mMIN%Q7Jph z_B>?j+~bpiAAz4%9<~LYbMS~Yt^&?zvwv;J=<&OTj(kU7J{fZ=asbf%$FVU>=As2? zEckx$Qpl!H?t_4FE~^Y8p$pda4yX=gm=8;Cl-J9?JEz_Nb16K1Sj=5{wR=Z{IAI4$ zeETIHL=LY2w}dv+=oK+DaS@kc(q#F<_~ps*K0t#?B(04yFh)=>%tjlxPjFGv;R?R} zf&{rpbBJ7Rcv!0yCgoyIb=R_iIfSVHVwHwB`-C+duvq8U87SF46YuxR8luNX6zl5! z7j%RVd;If8ojaYnnK6z94|Uxm0R{_0xkP?`fYHQ^!w9GKO~qv|w=y%e z)sqJU2ixBnIwaj4RX%-lr@oT|j`sOGu@`)*!{PeV`4zU0S=o3xzr=>ORoyz0$PY>? zhazS0->pR!6yZhQs)saL5?4}O_LjAA+WLP?VSruxymJBD+-OB_JvV#9z1LaQHHqpK z>mZXu(@ziPLpS52{aLHPLt|o>9w8tpx((ePvh`-o)}8kCqIu32Iq(V%1v|88C3W7z zmcg3gL1(I2qmHev3ix%-;o_|C-^Bc~DcTdtuzlXwSjqKc5BFCqa9(f8BdqLY!OttYd8vu5z z&A8h`g89lWhRvO^$pm%s>wa>RMG^QBab{XI2Fczn9UKJXEe4p_B@34{rpv)*C~vBe z@$_~TrK>yn1hB;}?QQx+{9X{?XXl7_?{?X_$_cXDiFx@)Rq79g#nI9>9%-_6C32sB zo}jXiFdByg=ZnU$;lu#mYZUtakV%vU&)swy!SK{&mO71$SGaARi@1+jOvO{jVQ}A9 znI*tK2G{6#Ju`G_-Y4d07KUb{O)Thu@fNzT;ZxG|^*1V1&`lgCg##Vk-!MjsIEjJ) zNJ%GSsupWxD#LuKIe0~iq8cysS~t2`qrTjavJHT5^L1v#LX90c*3a}>|9gaR$74?2 z#eZb}_hbG2-;Y%~4gO5H1BF(18_1$-WVwEQ;j_O~MfFr^5UKg%6|uSyAH8X`M~Zm= zH|y+%-MdFlK3gQBbyn$<vPm5D{YJmlkbi>WK+7%P*ePug&%)!_!0OQOD zJ zPGupC`inf3;rpGWpPJ%#7m{7C;ZX*VD?@|$=3j=^$ z?n=HHCutXI36A~Pt_UTo66Y!bZ*y}9HOiTnMh8)`)-qO{-`9}5ThSnZ$5RdciyfMJ+9fKdBpBE`xF>sV|B2pytX4XxCh|y$pF+{BeqO<}Nb9Qj^(ij^SPxw8QA- zC!AC>_0Hr!ec7e0G?xi&loIoy;KW3P#mp;C$f;Us^4(q{XAHV@YhtHE&;96COeExw^~jQM3ep}G%rbDJ{D?n>v*(pNvwY8I;0IPyP%|Ji`3 z^0Igqg#*%qtk#;Y#cX}`2S4|b$tkrcTz2A?f&}U;ma9gx%^;a5+plw^6>n3J(!`f` zR`QxnH0ziE z2Vt?P*ew0-`L%X>qJFY=B=T+0JGXCR zo?{?VmhEx`cTd5-DG2L%=tJe3CnZJ}q?1q^n)0&XySmzd1^egX!(B7J{JexK(cR(> z)pMK#P@;a%D#|)!_WL$3QKFtbz5z(iiWil*AQIMx5Jdh$?lvQCcRW$o66W=3ntat$ zuUF&vf}&4=-<9wV`KKN{>`YGkHb;iWEu3>!6GYKtqbaY@O@+>e(OSZhe|SDkx}yi0k4TIu$Yw3KH*+W?8vhoN(20)xC1`_#bWXtw@Wj@a$l?n9GCVC)0# z3c+hJUC14VAKRF*>WT)6ei0pHcCTdXZr0JFDwWaygcfKeQ5vov>;8&vJ>uu!^CUuW zc3&SRI>FJOPL1>bpP7`!-yWc0aL`0~X)NnFE@j8dfog>!-UtO@rclicaX4jSEZN`Lv`ZBRzeiEaKl7Ci+4SAMe%m zl*2m2*}xfOsXXE|SN7l($sNj?n^rhag1b=nHx^fp9A$$2z{4w6?GIpBK0#l*qcz7A zbJoX{A-mOX&Kfa28TA3=NovPqV$Uaz>`$PK>`!h_m?POg-37dUB-hmFI9`8(T%>S% zt}Yl&2e{iQ(T(diL|uI_i$XQkM-V!ht0h>XODzGhQ~hIMzPBqV0c`-p&Pl6oV@z@- zy}EryHe(9)`_8bkKXNYq68S@q`g*xSuls_Q7p*M@N{d>e(tHe6Ia-$L-6y8xaYV)1 zX9IdjQ1cE|VlGhUHLkPW^poPK0-Tz{7-CpYJhfoN%nY-mm#9Efm3j-Y3~;27IF0Ap zy#Gt%fnJTkZYzDZPS?|*tO5lGx9eJ`&ri|SN?vP_y#pOvNA5{9?M_TsBrdCy3M5L$ z`I3s3R>$ju=g`8e{iRQ?I@#XZcoS-hFPLm$BiqtXu~gqOo!G$u_pl@On6LS&tf%uZ zzm7vY%-MkN$Y?_Y@I>Gt`~-k^IbKf1l%{+qsup0u)uH-C)Ge+%9Zq%zF!ENq!N$WQ zRoF@3d_$Ha0=s-)o|VQQoC`kJ2~oh$8+prbYvYJDFgol1fc)p4u+OWyGl+*w3zcTS zbHTbBK9M>hSIdAk$F;kphpI!(_M7ZxD!>U*|8-rS#&jRQ*=RTkA+#i65mb_?msNL| zz~W-`4ts!|a3!4P!@C~9$y|*ihC>m0byJxuiwB@LQ`J2zAQk7wj>bANAS?hXJr^sl6gkTM}0(HuwuF>;w5&0 z+Zg3ry`VMXak1k$Yl&Sibf1Qqlp8|JI=U3B+AQeK2IBBL|9PVVzLpNN`tRx+`IQL~ z1cUrHDh>prD%H%+W%`ifnh+y;}2F3cTauI2(NLA}<3D6fq6Ioq?!-!0rN+G*sv} z6QetOuCCz9ehl^4K8r}F(?{-m_HH5;&-o$pDs(ZWoBNY?_TQLI)_(!rj3TC|Zxz^W zJlyKFZ^g*w8DvU=qLNXCzfi0gSxzY&;9NmO6%cV`hlNqt_lt}d|p&=KE)+8 z;g_LKoJX4oq1#l3{c3PGiTzrqxAt2PWn+q;=u6`G7Vm#f|IL4?Il*77x_nFn?I-Qlm z!9*bzre}eCO?TYo6hg>JIXJOU{?LZ_66&rP<70xG(5^`xlOlQZyHyon7?Tm~eMt`O zZ~=hE;2$HKDO@kS1`1-W>uAQJ<1jjlv(`qj?V?yTa zPXD|v-}wyKIK&4*R53xChy%D|tGGZ1QokZBtDXK7%puZAR|CytZO|tAC+wd8>jhwuui8s{N?r`w2>P}vG%kaY z(-~S|W51*(JvKbu7zy4RA@}qOWczMI!Ox_`I!Gm4Vv)lk2~KeMnwG$ey*PI(naj?O zw9<#V;nQTxUDB^(lW<&xFH_1Seo%O&c~ziZD)1;1qahGSRoHp{Wg1 z?cH2ZizIcw)g(%?pG@C7@mzh47@9SYUgj=tBlr!WJTvRiW1PX-D)-9y2dv_)qKl^w zz=Ny}#oZzGs~?#bo=hp{UKE)B zLs*_B%AV~iYQTYH*m4C^m+{+SHgpuKI%c9(Uz|m&MjiUDiIGLl5&HvglU>eO8N~w^ zI=Q?{=|w`hCX;=jQ_2W!0&oaz0rmeD32+BcE4%L6f3oSM)T)lA)?KVk5}MrBu?EHh z6WRGXW!L}W8ELUAgMZC#3&FT@l5XSw>i7gS7GGRfW?y86lGDI_SUW8& zUm^0v26G#E?D<7SeqNG8^1dFVv4M;T8ZjYPRKV-#%^RwtRsIei53`}xd%5AO;PiUJ z(QRR5=@G*iuRooRkpc#F z4EZJh;-MGSJ%QCf5h?9nGNw9(P^%oHsY1w$ZX1+dyt`Cykki+(Zm%+(lQpQq6X)#y z(BkBvDA!AaT9<6Sb64`>Wu3Qya$RF2$t*R|WGpAkJCJzF!j!Sc)=rNo48rrz-Qr&u zBaTKsF1Ks`w5wkO(Q8;F!3V^KIZ>B(=Au|zUD6zf5a7IiUvY@w>#+qAD$Yy#p?awGS=FR~Ot-IG1 zr^xAyHYIClUHYMJcg8WUA4~gZVv?<|K3rX zz})bxs+PYI$~{KHH)LdX?Rdw{_#P?Oy-Si|;_sU{V~J&pdozgMz$IszL%G%`!e5m zHPAd=_ZVixzUmo+bFItI2*0b>^!I2oaLC7CRC7XR=(gR|Rab~s{&40^v0pCbFe#;Y z%qK@Vc*RxVR)l^q{M*mpAQA1~g|ZX6Q`HsQh@H<3rV%>P&Uz|T0_uKc>+IHLm6WT#LRZVuq#1WkR ztsov62GtH)25tR>u52IeZ18d_*dt#Jhbt7gBcJ};-S-O#;8s@4JN8b`ySc6Yb>K>~ zPQ!pLZeUxBXhlKi8HSeg@7eu)TIGEEgP>YC?JNVVS0fU3&$_Uq698+L2N81*niVi)8asjPm#^ z#rWcwJGL;4Z7paN+NNmn>h{R>7eT)ndPx&5{a#qVl(zn5Q1I*KaC*{UC~;JtN@hl! zpd$$@VWz%K4YIrZpg;mX%@|$Hg&wd4aR}6$ZYoe!2PZc zopz2$ce%|&lag%di$t+qy2_F#F;v%0o-d@o>FPhoj?w|l{v1ANVaGG1QP(|ZeKth+ zvG?@*6brNN=W^n#oy@C#il2 z{I_5Xb#7@?2BV~Y8cqDl^bvMiC~{T9?bKhcT^pJm?fvhlP~~e0>V6y|_IA7n&__(( z`?-HcsCq^v*4GZ8AKW-a7WvG9Z`@bNDKrky&olSLX#I7trRLLxAYOzZjw*dZVJ2g)FFR+ zun%KpSbQE0_N|_5qJq^Bm~G4;9rP1PoSFv#EpEe|hI+6d-p0j9?VO!(?Ef~AR#t=u zoliHV3ShsBV~Z9v?!iyn9%|2EgNm$j_D9MmG#{iWrw-W$dA9aXSJf1M`EqDXI+@iY zh(7J|$q|dLnlmzp5q3yud9zc+QYw*DXq{|W4)L;M#VcW>-pbb=t#XhEE&3gp!^I92 zEbfk%fBI*mg}@kyt20cj>iH`2zJBaGZ~UKI?na zZUl4Xg;|+T=iX7`aOA8qEGOUJUB`2_{imR~Sz>?_X&1-tuqE|X3V=>KU@IBtdRbNl z36tBL=a{8laX>7c4KM!s%hV&X6?`4?^_kk4W4B#8$flXz$!hM|%_>O-d!}g3KcZ5V z>fMBtG+rZlW;L280^8HOSpqiR>`~bX;J&$Ct~u#|-zLVpf+lg_=*!JduF#3Hjf0+1 z>8Z7%Cka8}@x_bZk_bM7|G*8>#cmZb@Sgo1qBx2qxW+%)O3g70KifB?q=-cYZ58d? zkgpO-Ah{$6H!5ydV5)C-cBUU0k~!7+Ehc8av{%|FOQLbBpyyZ~)++)~cf+hhx4RYo zHnJ5sUayk>(|w_;yJg5N`Yqj2D{m>@KT7>A?UmNE4trxt$x@n9eVpsoU#x|?pv8$LGVwG_oG;vTn@&8+}F2M9~Xvsk@&4+j6G2pd^v$$Q>V6MuL>fsdt zkV41%&30D1rP>B{u1YJcnc;k>Cb9S`#P~@A-R!bD{U81+N0-L$dG*d~lIpGCL{Y=3 zLV(v~wU^-Bz)z3APvz!3PQXhhRw1x`JWxsij{|t^y*}<)2Q#P8{p;68@Jf`3vNX4S1deqJK%;qIAmz&tVv#v|u ztwvo+=0XmiOa&MsbARR>t#`+YvFrH4DApRcK*Ptlxb?kO+q`&{j6~nVz84C=4g=^C&^(Zr4?a~)K z={(#(W&8W8{ZC@>$TzKHW0m66$JxcsgUmpyH_k@i0|65`YeD7uqV8Io{!C+e$oB4y zO3Z_KW>va>#KNRWhqftlH2k2f86q&dE8F?$&|^n<{e}-!w+uFv{wxZ+Kf46uip(1D zS{f-f%cQBM9{T^8y(Ejr3MZxgW~}MxvQAo!C_HXi@kf?3dN^6Wm0 zAC4`oAPP7G1Za0eu84gvkUje}=KJgSMI#R5=7I)&SZI9X6kE1{Dy_>FzW=Yt+=R{# z1qQEEw<4%XO7+~TXGEAJ9?l+wi*_I~2PqGO2B<`opHnoo?ZYr_4vUgv4s+hwoXvMe z;n=vs1Los3?5yAP<0n+mpn;KQ)71wEQEgxDoYkzrSLYi&%(MH^w1+XZBT5sIFmp4k__EY6`H5Y|!C-;D z)v)H&FdO6c>?Mr-kMzK@Y^!pHMRDw_5GLx4S2ft2VZ!$8QX|nzJdxxxmU%cHXjUC( z1M4ghY+6Z+w$v_Utl|*haZA|)QWqo~fPE)8gUaNcld)N;Q^6%F8gpfOSJ79S1~~%} zmnieBkcYqOjd4x|H#!=yE@WNxcWT|__P4x))O|8-qP}Cjla*|}mE>JS!s*jp0GR-x zHsd#kG|3@PA`6E4c*xF|vGH5In&=BR4tgbuQ2#~C>-ZcR8Lj?7lLhD9_^IV6uR=Wc zNVeBb;x!MlOS}QJm~*XANoPVDy*rs7!W|nqw2=)df47^q*E0t9d(72$#&_2PrExoX zS-m#MA5}O=BP|HKgY-S*;y5R?X(5|RN%4953SB`CUau-0rkBU2ya4syikC~q%*S02 z`6ad}1+KssQj2&IE`)IgD{(h>f>@0GTZn}GG-wd32Mx0EPc3M3~2Xxzkk1f$~26W!D z(KbH9ub=tEtU0e(|MDLzi;@KG1iPs5Cw62Uh^)a|y)`m{FbaZKrbm zBvq`_TSn_H*VD8Z3cL^&YWGcG$Z4K^dt$0ox!_`vmTq;|2R@3qeCcD+K2GPnYNGAbivA=y$9Tk~#zWdL`P>p=Bh z!UIov*OTnuLkSMf*;2fqih#wfTIzhW61jWv%Nf^)s#ZI1L_*|@mK zreK(O&-nH9A7HCWsdvz|8@x>!=QO{?%GBb)lxtA_UF{h5o2G6= z-vRVb-ytQpI;_mjfrN=V)7aU4pWP*sy>RZen@Iziieq``(lan#uhSqHxkmGToV%$HJ zLguT@;pF@FG`H)lnyC&r9{(xVFHzXBJxExa!hc(Wxh9>Pt9p7xeZ#UA-oL01s6;!B zzPq|6T@%{!A-Gsd?!2g9dMpV}YgW19oL?cICkvz`3cqJ*HokAbyvtt7u~6^6|Nkhg znBO1uRh?l(ihsJhl02T&#sc082=jVifZ6+5nl(Kq(n)&#RB>g$7N^)z*O;XNZ<5V# zsFgf4F5Q=eTS-tORhWck!&C~`=iI*kczSWFJ>ezzydH;%9=|zzfPNQz62v>m4 zjWQx4)>^U``!>jiHT$K%&fhkL{zaS$1;K>8?EkR$R?%@bNw%mZOSYJqvBYFCGc#Gt zlEq|OOcpaUGn2(sVrFJ$X694==X6g`PfyS5UF)8;^q_+LA~JJFRAyx4j`gMXQkLHC zF`8F?N2LXGVU2bsO%ZmOX1kMyHbyLro!w0{5TnJ@G0gM9RJGbNLwG5T5h4E3!t?2f zd=h2q8ivFz`6y(#Mh4?_f7w@^TC+7g2XOab`T=fw_wmE@60LQ9y>lTTdzh(4&`vs< zqHMv>vq7QSC_9fi0`2@NRU#fKL&X^0a{Q-P^3B2T*Z%Qy2_MR&s?lGb1c4K~^m(bt zFVG2mySr=S!!!cc$ZHWWAjSc(9+#FZaZu7urS%qaiG%nv94&3!W2_f~0}kZd%XP%< zG!5Hk^w<{#O|2`~EGB-6kMlXn?g-8hf-Ts)_9VAEgFIt&aZMT}Zv65*G|x?U0wDGo z15?|tJT*SG^6gGHF62M1XE<4cF}~jBzZ>9jX69|p+0)3vzWHmP8pazCis_YVD}ss? zv|3)trwJ(v@{J9EyG##jWqbZ|(?tiKw04GsMrFFen8J*O?5pzDn$7YQsi`$Fae81h zP3?7&SgZ0&Ez#XRL4hj57rO<8seiXGGUwwS(7uJ49MxvBfzaa9kH|Hx~5S)*&faro5qE1m7z!0|Kn1eU+j>2Fap?TSL3C`a)C|^{OOlVXB zKf;lsGN8O92h5TWzJ7h`L-O$G|tHAMnQ zWG~&TM3E>k$E8T@WPt9UICS~QUAf|o$Iov&d=l4BJW)JE6G5d33tnN$`fNtDph z_6&Xm?8TS326C=>#jlc;@+J0bAdR;Lnb0(M9Nn$mFjNvE0`L;UeKTzKapo6#)OJU%IpkzVRy5MBrh`p~!6hK@=SBA=} zrzgBWz*hN3x*;Pi6Ro<*-JF}W%Ja?p)5|S4MWHpERefU!561@?#B8BWl4uYsoks+j z_hSL^WvA;K%U=1|V}=hZ@t34LONE)0sDr9`-ryfGK?gaTmqvsbYVLNUhg6`0MO86G;yU!pQ* z#%xv5fG>v743{XN`rp<%f$7hYq{r9r1J!Jfw){}Tnb=4K@(KUQ8(HKCVptUIi&FYK z#V&|`kTYsf>-uu;eu=cbAcy?Rj3{!3#X(3SsWhj#CQEB*bZ7QDB&gUPwK^ zl9dHIf5PYV&rAN3lJbrej6AqD3@mBoPxqhP#yEC!6m=luY$A$Tci;Xjq;A-F^3&y`(kvtkqgCr8&EUs11keJfQtj$Q#(69ueaa zH@3CZxQ6xA@rpck_Z`~-=O2qcP(p&ZEAxNdqJGQYxJB(UolPY8LuSdnCw-5;albqk z-}F(9y(swJ#^M(ia%(LqcO}*<|6sRs|^QEWz8EB_}W?VtW;qy8Fym46)n>d#lv3|Gp;~1)C2Whjvd$|m7xeyy`f!JE}1HY8*itxZS(K3UwVE&26`|mH~(m@icYf49A zKzplLw&Ug<5G{Df2~`xsv+e5j{q)1udT;f9!b$xua^_3EgS95AS8Ek<%DnxT_TSrh zx=(7}umLrwNwHS>I`h8)d?)J)cVIuPIGUWr{<$yz*)@e|ka?<-tzOm0c9y=VPoY4^ zNr0&0f`dxj1GGU&Oc(owRgVvl$Uqjbp_u#~88imc(F3u{_oCo<5=YkH?|vn8?`^-9 z`9^&=mv{4UP3pd%)v7N-NWl6pln~sXekr=5_#gYLLJyj~_~DkaI?!5)hBJZG`wNI_ zT=kQg>hGYv!+0PoVd;+$@cOfMGKkI8R@|&?6yJ-R$Iyf4LGu$%d`(JWy0Fcw4ETdZ z^r$Zke;>tv{zsq}RK7y>GHf{a6DajSFq%z*NCZS!C&}O4!yWE4WDN2G)6aVB50es8@>*wcwB`6-uzI^#<;f}q; zRuo(*uiNo4b!@>UEUk^-7Ng2aEb&Pxf*8)@kkS8tgDI32*rO z(I$tXY@HR+euGX-?HsyOK;8o5X_!yvV^3*maNgy=p$IQ>{e#C=rptK}7~Qub#&8Jg zAW-*Uz@=;c&7hMh2>xk?wD6rxy0bLRdkPO4;t$a=7QQa&e`6v4PwCzz;-JZz#YOd^ z%h}kCgqfDe^7%u&PxPDR-*U#I|CwV*Z#vT&5Mu#06Z* zahCTAiTu!S=A|?mp(fFqaq)ZX#Tc^e(^umu-WV~TXuCu&e74Scoz6zmlAi-%S1f;> zA4WU9rCsyw^Nc4qC0?i8KQn>>Iv|ohpz4Fm{DS_^7q^igV0CXR<=fFFOWp>=M;rc5;gAA|b6u^Wb< zXY}tI&hUc-%(nAYsgYs;PZT$Q$aDyUl)XYyXB++(gYpk@|L?aGFabJWKLn46 z05b4x^_dPazT;c+e}lr`oBs7lM>+&YUN*P6>lg;CZidltLh>(o(7ahiT|oaYf?GkV zs(S8FCbSWL5Omix=Eu00kA)Tju{LWG;NDS zo2dWt>wiN9qqnQ74o#h>JYJu<< zL8tW%oDDQ<)q-6Ve}DMD#`0|nsNrq1+sshaMt;r)K48P;4}>$A691Kv{exIL3Xo^xpqq&i^3k zZ(ZMz18LS2Td;3cwXw@tZeu4r_w=_n7;|{YNvol#H5QpTzxzY7>CIFbGSyWbQuyesB~jK@C+aTg1!p zV0Ecs)(pTiC_t+2WB-8uuYLMQpZV7(PADMF(ph*-Lm?T)t4xMvIs!nDV3T-&`1ga8 zr3>m(sjgF`CMO0^1r68@!6Oe+b)0z*<$uB485u~k|7Vc@UpL6@CzY;93a>6i{+C~R zo}9i@Q%cUP)*)G0a|d&EG*5To0(N$(PbxYUu>Te)j(^<3XDMEuULQ5y6SNAYZE}!X z-9FXIHz6knSh<|@x;tIq{yaa53`*^`6IRE4Xt0VHR#m0s@&co%@#^+!dxk849km}{ zRm5U4;l|F>pzg#5?FD4KEcln&+_sf#CW4T& z&^HgRuQY}_Kxb&|OwY)&kD!F0!NbPG0)pF8L2cZBD+1JoZY&5|XVx&!EloBQ(9>CC zB3M{*fj~Uzw|Y1ax96?57TYTA*TX`0ZIZ|eSd(wU7LUJm%s$ZTmi2_|2pdqBca+rW zZ6vfg>A!s3J}KCbb)e8^OV-ahGee(&X_Gy+iKLqe5P+0{1avf@Yt~&Ahl7o6+kk&V zg^@v@A@4NXJ+|4lssupbbTbeAY(x6?K$PcDn!=Jk{lz;#zC$}$%GTD|jrFBq<{3i; z`B=(ubY0`Nt+dhSKm36mJSwBw#QJOaxveJ#5T`HcTW6`k4eES#G{~uzM*qy42!q8O zVtn`CzwF@EVaj@k-W1#O3e%~{A=3dWQk0ckeaA_z>#vEI7A?>oY3gs4Gwc<4Y zqSI3!iSTi4m2sCL*81uqxEYxvfe&*M)=51aPIPI$bRu!z==f7zL6dAh({nwRvHG$M-%*&~vkiD-8J^-3jN7dC2dE6H$LHLMU0YUF5{g z2;7-Pr&rLpuqZcm-cVc@TQd-$BTq8=qCPf2el}fTt-mz7@z$?*VqowG(_#Ic1G}a$ ztcicqz_V)e`^&~^SWni3{!b``qHJ``She)t&y{VV%4Am#58V)9gamtrea#f5BX4NT zi6&V|G7eZszIhA8C6h0e>%9ddHNWH;0f3$?VffyDAj4Yk-g_E3aXBx5?~zEzGD)u- z>U<_mK$V_Py$QXLntGD+%cuKSgnNZino-XG@QodO-Z1Xb-*JrOw6lk0FM4z6vCvjz zQhOPW zE+vk2by_~7&_Ak#w`i@c#r3@8eic&kT>m;dwmG50+!G>>-Iq?&QW z%LJiWqrJA)(o;e^IZXGBm8PxY(ef)!;T$qj|I7y*nZu*b&wrU%Br0V$1w(qL zkM;(ao7c}t`S!at*@>oB_JH*@9ADSZu0SyxhO8UBzEP?^im%0 zS@c?4igw8s$8G`{53>(%O}kn<8IOcENA~zK$M>6Y^CoZ}PY5UP@(7>Sm(bhP=yO-$ zttVUd9WX)z+r~wE=nS0AXWzSpQ46>Ga9h~u^$E;nAsqp{5xwhQiO?P|@`ITJ&A1 zfMa&;{J|&T)P(I_2J^nMB|5m8m^~trg{TRcu36=3yIF6)QZhAvnj!Yct8~B*)*(~M zHnu<)ZZ#r%>4>!1RKFE9R^t1e0!a0n{GDA40iXFP99Uh4QN7eVI|3T-b$c0m!oL>9 z74+U;fXwWevEwc37M6r~?)A)Fr*8$0lL5ZQ`;M7HFZ=O7>p+!}oCHnXz|(wzW1!sT z$69yV-vCX{eA2C_;jXz}(i@-q(%LZyYg%Uwu**R4Kl?`HGOIY~g+J@Pk=5S8SF zqXNt@=0P-S7pHBgl$0%D+~POYwGS7Frglb&NX{-U28-+WhKuVlR3aq35<8>NU@4TI zVLfHOb7Y%nVwR!SSajFp&zD=F%lZkrv=&~Uzsci!e+5P)qE=)ys(5>t)BL_vfCL}< zrFaRAH9H!fV}vc+h&TsWc^;DyUNZjvxmk?8HEH&IgLz8kVA9x>gNA&&}F32nDejg|BnRWn!_icK0?zsXLKTHW=978W3S);0y~ z*qnVuD_@X8&xx{A*uy&(Wy)M%KTvBanQ@)c4vp<@m_@TN2_i@Nb|Fp50gKIba$(o7 z?ws*HO=xxADqi?#Et!~mzCIPQU z@?e=23*DcSBPuyqXEB5#E{+x}co24iMq=1!DFm)=lmboXdG$Nqt0Di;0UOjQKSa7u z9d}-ZkE+h~e4A`q&74v6;Mj4A0qF=ywyIUIbbm8_)q}W{eFFITWH;H z$6`7G)OBONoeJ~Zi-2|>#`izmeCFe&6Ycy~n!H|S{p=NAX zz@r1V=?y~erkj5t6o;UWlJlA8F(Co2yW_bVS+&9q?iE40ugO9TbkkJmv3x1tdjb@Y z()D%vwWNlWjeo12HYKvyRGXtl$iIj#wc-ID{dje7U=B)l_4dmIhrBU!9POq z;k+by^V}RV!cM%rx@xgpuXu%x0mO^mygvIA^=JvM<^^o{b=1*1OB&AOQml33X!aN8 zNWE>{Ss(cp+CI_{96Gk6THKy1oh54nWAB_+e(aAP*y()u!uLopA6VCy-QB*iBflT@ zqZ!Gz+!c$@yL=+gTshD79pkam-TLw(1^8s(cW)Lsj5)f{{!T%w-2|KaK7(q#45`G? zeSCSmczu95iH%9GvXN=dzhNt#sHgg2&j;`1#ftq7e<+~Ip8;B&59*k}yRG4_Gs^}) zeXiI24eks7diaQKgx1xkhfk!n9+a+k?eNhaC;P1X`2B5!u*NNX_{f}jurpjQ&SHG$ z%aFN!>)``7hO`+lI|PFOejjkFG>(th7NdRor+dXvtybWg4ua$K)h!`yX&-KIr^I3N z#dYbQ@Ad^8%kD)hvp*t#FGhOid&c8#&#zav`fN0D(ZXb*j)}`O8utV1vsZna=&3kt z<(dV>=y)8UKc$btZ|yrwLlr5yGHyUq;5})}faY@qZ*iuG+!rjJ6eLZS`UY#SB-x(L z(8}oI`o+)0>0OhRUJ##dtE^+KngH)#H`M3Y2NH!HNwZ+XOL1A8a#l!ZBEa+URRAtt z=xHmF<9u&vb5=3T{y%Yo+;YzZ=8+6^h-#qw-=vLpy9`A7sbddRDh+}qBF{hmz;=mS zMT)3EL+EPMIPrSpY#V0lRKq!E`Nn-0(NVN1j9j2Pnl$=7&CQwJ(eo01EUH1%?78B= z*#q+i{|k}<-OVZ7sV9xAqr%Xf9&9L<6oD#WOU{AgJKm#l>HcqVWL}XILh~{&-;qj$ z&4opwCVG|ej8-?p3&Kx1mY+RDW%&uk={LC^A`jK8yHhg0^R{rIcOUFFqqq*YQ{1-i z+LCuKoG)b_RkE8}bI)#GsLiZZ+MR?YtJeuhC{PNutoTWYm%-Iry6S1b*1KbM8vwkn z=yJGbcCm^0E%V&&Yj~f7E%_!$@M4YnNn~C~m2kA&+J*MAg$L>)1lO}74whRGc~Ni6 z^7iN2Nasv3X1U7l%b)BmtfQz*`FR<3>=BOqb$t(AgiW|S#Hbh#njBAPO5`zS`)Suv z+XPw8NJto+^$Gog#`vT%xZH8h5sS7X`pUTq>cD^AGtOE@`R;o<;sjSohG3N<%2V-? zq6J9t$3N5x^^T*lN1PSAo#7ccl^q>TD0)U1d)45&-4W!v&H8<1U-53l(&Sbyb@W|A z^^ahZr|<3YR5}n6PsQ&nErFnDe?>6$jBSh5%raTl|Jh6qo;?dj#T%OYIfA6J5pHd5 z?ZO;c;PF$I?}pBH5k^ItQwr1@mF720xSLk?y<9dSwyfs{JR?pOCl$O1*o`Urq6kvD1 zL3bH)-&U=WCZf$$wS0O1&Td@{5E%ExlIF|8uql1d;CbZ4LQLzJrqZ+P?YVU#WRM!) zWej8e8P?ZrmfrTAE%04yN!lgS=l ztm!}hb57tt!qP>F5D=Zi>zJa$+}TaJu6ui%n@H5-!?=lc%zet`1N5O{an7s7mXWO>D)oX&pmRftC(oB~@j!e@Vejs-_xjMw)WfH_Omn=P$z(dm+oZxX4mKr;n zo=Gr(WQ)KF!yI3%*Py~JqKA)9Z$|Rcs1hDFtYQCz>%`DYM2F)o8)6}^fK_wQ89U1d zo173bvcNZI6~F}wY19ZuZui~>o~DFrBuDI*Z%&21nlkvd(MlNdI2zTogUd#;?;r9Ca+Oh+ADZL|avxQA{Fz1Z{6wa`MMm`H2Vlq zn7h(^ZNj<0v2PD-6i1bDm_H=?zC@kZgMjWw+P#39(f$Pd6bWRi^_>5HY}~r}cEEi~ z-jg(MbmzLuEo3XX!=T4B115~jdxAC9>*zOq--Yq$CTLr3fY(F|r;$PmrE5*fJbz=P| z^$!5vFCSXo>QY4@t*7T0a)0;-{xlJ3<#NLosWuQQ}<9dn{$T za$V-3v|xIPBH`YRD0ab+pSa zOmcaVD9>cB7;qbAX{mau$%3Ei8C?_R@+H|%iT|-9!;U;**Gz9<-n>Ue27WQpMGIk3 z68~4Q{8&$t=hJ>nL}K$^8edB{ix45!ymYDDZ|Y~_kD*F@v*FnJGtrSFJ%q6J`jdrC zKZjT=zOQ|^0u|bFoM40n@*>c@#f2u%@LypmmPu!>VF{ba;1K{aRPN<%^de2L15Z{m zl~8cbYxeYw8_PS=-)~!-Ia#M|QaA7eu*XMxejUT-md+Vf(~tY2oVc2Bvf)?%AbjA! zte^TD7l1g?;iLCLT;tkSxO3gSzcRPC(_bMXHGctk2!@jJ-ZEtxLDFSXhc-LMLUG; zws@Y-6RYx8`->)eC^g-JDu6v2eHB+{DaWyCPn&}x`r}XXn4tS!X7)zj+T02_Oa#!e z2bdDCL>DMzdjL_V}QKaV&R;?ZT4Ugd@!T9YA*oKOFFzoK9Y> zVJs2JLCuyGZku1Jy(cc%cVm~v&Cb34__H?QPUw5XJz7EEN=U)ax9B9VtQw7wa5$B< z6^vobl}PEG&D-(OXUTmFpZDi>{_J8W=Hc6P?)Luh)k@*{F_w=~TK0pYFW2R^7lB|- zMmrJ|wilX8^LK=(Q-z4#K;DX-%wOXO01_FU_Y!b!%r)PKpZ%!0DYMxU&#vfCFxQ7m zBK0iTB+F5nXJ$(57LS1v1k=&q5^s)K%}t=(x_Sz}M*M6=oVqW_DRfxCZWX7+Sek5) z06g^5=0h(rB^Xcfk6FBxJ+~9%>X5Oh=gW=h%j-it0fT ztEcmlj!-mYuYU{`kw&CL(TG-s?hXMMR1sC z$`_6QJXCMwpShVm>8V%_ejpw)Zf5+77tZ?mnCGjqOl#9g2_u=BIucOZtg}ahh7q8h zhQ-idVe+GjYPn56F9qlcF&C5aHBs+-MZL64VFJTRk*+OS2w60vgG%E!TeX{rq4g%5 zn)-n*-9lu0b)KpQSqvQ<6m|^xnCYNfrDOd;4~$gQ1Viy+{Se^D0KP1TppaybullP> z-dS4i$0}MZVKlBG6-^(*8kGWea9_wt5u^$WvM*4Nc<59a?F^1!&?Q42Qv92KQDLsS zn)HF@e%0X)jsRzLLg$au^Y{}mcFk(coyWe5WC%%16=b%;6!o9OQiHLbj=SkH zSAZWv9f#Y!@l9|0;(BQqWhU}N+Y8w8kCu(OfiH}tRM37@UryB+?XJVYYk1!ZJv^WR%Ink*uL`moo zp*oKbJ0Q?iy8VQHBOTI|k#k*8=(d(!uKW=EaKEAYu#0@^Nk4>WPv(MpjLZM7q>(}^ zBr8)k@TXo`ZN?%=2pKZ%cqY}Z&Zs;FumW(pln6SdIbF1u11K1Q&!=pdfN1R@WKmiO zj+qFiUHPTf6t;Gx!lQNT-~h4EUV70%oY1)rCKX!mkcNeDu`9E7hu_hw?@YD_oE9H0 zXWki&JK%SURf9u9q#NBIL*gK0Q(Zg9$8pIELK*ddekd?H9H)cPP&TA3K3=!ygvc`> zCRw+`0!b#@MLt@+dKI<$76cg$%K>IV{-)(0BP>1PtC!%1{Ay}~|N!@(W%^=ydUyRcRsnyv*>WQ$?Z(BGi z9Bia?)`2mTSJ>#x{%ihpc@wHeNb=dGd9r#Dus)c(!6_+00}+5cI&GtiJDC2y%4x;{ z(5}D6nhKb5b=;?rv=I-JJGkPjRe}ucVF?&C{qpN!7rN0@Uv0ykZ3?sBa8)oLUEt)2 zH>h0|Riu`N{sbFPqiWC}GdqFMtNnn*^M0U^)aLP->d@7;GGlsC<+ z`4b*wnz*7=ts9`?2hiOYIqGo!rXe1ioNx$Cq(-CKdB+uCAy8Cx17=$0rKR`rF}f%* zCop6VuAhqL%m)z3J7H>zzC$Ukr^)-+5G#p=cX0_`oZ-g)YD7l9An#=Mc+)n9nkRCm zJeAJjlb(K(*&&WMbDs;bjOO%=P$4pU(jQsNo#nJxQtDZCqXDOn*FUfK-vP@He~v0X zj^Y&dJTvFiUT2QrrYcm?MWe&AI|-F#yU)hcRgyAk4D8(x`?=VR-OG(nDt<5ClI`sH z$-{QX-Jkpz{+>|8U{c|8zQWT9rxl!y2!bb!3z|8D8OWSsK2;qnK>XHn)9Ngp{zb{d zWZ*}hrl;YEq3cE^n<~N>M3K{IbSan22zf8A6vPEH>Btq!WANr6kiI~8F`zwywYFwM zr_Fh2(bDs?dR1R0?S<$K^xl=5G_^MAI=i84pRrEHXP4zfQYO-QrM8>#U&u^ouZAC1 zMFrmOv40Yt%(?VnKdX)oaQ}(ld5Z}OAf=VSnwwdlU)BxH-2o(yk`8a=?3k(RuF&%QoI+f+R+z&yzxV3Kk=MB4{NGyy zj%|`R<@q(iKfY`nPlwHYih8>zVW4MVA(yWxDWPeUd6&*iq}LG>-6fApRNLYyr%}|< zFd0Lat64JSyyBEYUULgm6HUGGHy#0i0A3 z%kEpt)7A0N1Y@VAQ_P|yPOh31i@$-JU=nTNu{42R(8`AtG-|4B#%~OzUC~o)iTv=f zi(JG592#->!3~Pt9&AF+ea5w^F#oSXIje-nAC+o=y=62d*EGaqVqMfE+ zL*smc2Wqo2!BTRrJLNc~NB=AwDXd zKrkjUN_ZyY!QKMD-31f$)&Sn>56S_<<67cCk&g(|P$szk$l>9-zp390=?dPDgRFF6 zG8?{Aa|QWNz7ao-c|877CCe`~-x|ta(liaIeG-24*jsgAnd8GWLH<$ub9bT2jE(J` zBy-XQY3jUew9=}s%yy~z*-QqT!41B|rBrTXcVYI*7_e$;dB|7etQ-^65F)~}*k1a1 zZmKyw;aWtPXr!@hMNEjEw$VvGwr*0p-LseX*{&@?Glx+V6S3z-LJ@3(o0LX+7GQ{p{7o&OE_AD zmZiESRe@@%$O-b}{otXHwrJ7)NjBEZ%^A%(P=yNqS-2mO`Nt-{)`4!g{gt?PM(GVo zuv9R{mFW-#4;Q>>2ES6tc~7$rrwYrS8e1QG4Ty_#N#ftPiJOv|DnBSsd`FMZmQ{d7 zsVq@YvFdxg2JB$QMgLL_$+_O8q$?MA?1gFe9K34Bba7&}TtaJWYpZokWY=uC6SdT| z=1#JzHQ$!c(4pH{pGxhi9A->q(BSDD8cm#>s>IIlw87MMbEs3ozxSAeFoN?>ewM3R zu)!&JV--!;R*@r=mKPer1VGd>g5gu>SCNS<_@SlxjC4;Ey}u}ZpITxuXGTiTb7|`- zTMOO~9E>FZEm=1n@9#+pQdP~WQx+Pi19)`eBpq7S$z|^fc-hoZSN#S}%Rh|)VoNjg z5>ArZh~?{xEmdN37_)4Bt31DB1eza>fE=Zka_6u*pNkS1gKS4p5Iu|B?B3EkQ+j8c z?ezx_Cv$Vj!ovkGq8V0;zg%U|6I1$=B09bKX?oZz*~Gz-z}5*RiYfXzj?sfy;oYtU zb~mBew7RwxeyC`eN+Fb=A2x zE7-4tm~ZZT$CGI;Sst{Pm{P;k1ok} z>a;NTDRit8ljIt76MtOStJSRc=9wfXT;~XoyBZ&gKub}%Ho_$d5Lo9fl{)si2H#yt znkAc{Yl54P{w9}?_*8zWTPTa_K-OtVRoPdvgo*n~@lG|n1LjD_v#vIsi4Cu|Nbm7@cJxzk2={z6F@C#ItWk&v?X zAQ4W}?Ns^E?;Q!H!te$>K5C}1bj`HcJts=U(!F=QXCQTVeXj3i%f9@twN~xk3YzBh zNRHFt5yKp)2t6pbl^2`O5L_nf61k{pgWOfqyi18mDT6OiQ;4G_&QuG(<-vXPvN-& zlmSDf@h30XDeh0YC}lM~V1vXvfZnc`OE`0Ucd;(sE94Zv81Gg z@%Eh{pI?EnOHLE$(XFNNu>LMJ5z~dWW>(?D51jBZ#7JOi^2w~w?n*9^#=zH)OC%IZ zW$86|Qa*40**3I}NTVdP}(`MkK%>Gz5yPgbh^m-C()5ns-xG_?%^zkpVjlbiTHbRPygPAs!XwMFMn}&iT z!1C5zZO)3%)(hK;j#6C>_X_1Yj;99z!Va{W^t5bgd9xoF(Xn8nJGy5dT z?eVPmAp2CyBJb)kdJfmdP3iJUoxRj-Q+O@pbwEpT5ii^c?WCZtAwzM#bt?Zyc0`EB z?sQzDuWx_Ec44dwCDo9)ATso^>G;M$XSW4*pTl7u5TQTKFo`cc{P=uMwqPPMWWz*{ zu?8Kh_NUk~Yis3>#aGfObre-*`m$38|3KE_+g$J_ipcGSDl6mtbU{x=;!6gBGmWSB z4g2s`13XW0A%ie#Gz)_AJ_XOdi>$GuLd;ix&H7{C1Gn>zEbos@4|^3*<|arT1G!AD z7sl!AaZ$_FxL@edldEFXjpcu>NLw~zTWKmSJ@@;2GCIduSdMF)VxJ+hS`}Au_!fTC z)8wZgt@ccVbzodVi5%ULyo{TT72-Mfe$~w_B(-;s#L;pDEj;STpmgN`DXildvSdj9 zgjuivDSLi8<~bojw8)c#!j^JkWGdS)E9?ixmZa2E7Y~;z(QHim_ z<0C?#8mz(^_MBshiNyT(PzWz1ycgj)iu7M!*S{}XitX0{0+`_d=b4aP=Y$@u5XjVS zG|-kU{uVB|YR(KIH;3UtTCTamZroj=QA=wTlyEbj5gBY@{F^qLChtn5^u7o+@^x|# z5;Js$bhG#{?t+R93e2Dn#W?ly&7DD}8naPow~XgGSg=#jMhPQ*ir$E~qH&H-AMGq& zsiCB&?t~0^Q}`;Dxl7LRbHnfCHLoR-rYcm(%0@|;e9GNa0V`m2bGK7y1&dlhoY#eT_i5Pzrk>5_^Ne_Ha#iu{;%5>@Ua4P5S7A(NH<)qvQnu>o1$x4 zh_Ux`jx>JrV)eg9#~Gi>zutIYXZ#k6_ppY6)_t`kbym&dD1;~+`1TUcL1$({@Xoo8E-;Bwe)V1`?NF9fV>C*T%+IThaI92g(|@Y-OBTX+q+!ZEHrcS7rbe4;>R{bhnP>W^>82=5MZ^9uS%fhs6D{2<_SN>!l z_3YyUrKTBb;B0FBAa^VPpCWMi8;dh?F!-+;N!{M4YnBRGJFXgov?R=4+I)nuDUO=* z9jCa0GdLad_G=aZ%J;KL|8y0L^7k;kWVsAHO=mVzR1b?siT)|ZWV(otxSKshBm;A4 z4c!O$ISmY2jIMNxr8T3Rq8D6}YYd2HYYcZMT2~3D;yWAasz{UhKZZIKsvqH|_T>9e zSgo=*o6jaFA?8&tauib0j(!GjsY_;yutt*VbbdmVqf57UKj`zB6{$0a(^X^x5AKy? z-R{y%$aPa#$9Wt89erIBgw9sZnlNPl9OL-vwdSp>n8Jut^CWy_b_(^nP;;yOU9jtKUTxOFWfg6ilzF7t9Z3x#~mbq$))`nDe(M zb*;7OuOl5PWok;8UWAmski(XKop5!{f5KxE=jsftNFeA8aIviF7i>eAzwdj1X2L?M z!2D{-1?X-2T^#F+I&>{Ca{LpR5~;tYY-yiz0=n<=fjaXW=!Z5M3G_po?_vmcgroZn zMvX%mTI~P_XM0I4Wff36=IZ44yu8Gj=zwEyn|v93O1_%h46AEg!9qwC8X9%5;dc7Q zXiFy2(3?e&Vpdq%qspb_?08~mP zJmo`RewGnbzS51U@5Abwm}1<_92u#+*h{svKM9;p5>ZCQpFraVj&O+jB=GwKtb~q# zqARNl+g`m;=Q0L-gVDmmk=hikh{DBmF_)Z-50y2+a^L(CpabH?C5!QpZ6hc5imLs!0#@L(g#wFtqB>ISi- zxIo}t&fxWB(riLSx*WDTvpOG~2S7I`xH@Aqo5N9CHTR$dz(OUdgPqvNH}e!alqp9H zxTl;uanqAhV~#b`DPid)9jgjV{H9f9&K(eDK0-!ti`qkq>)m5G)GIwhxq zzOnZ;BKpidePpp{DSqM$6WnUIR>0#szmt?`_w!Lehhr=%Np(O5j~dHfFbina)fU;H zznyAJdfnKiWh%rMq$_%SpqsdQ zOwqFZO1RJydVYS^O%7KLG)AO<1hRt!Ps)R7(BmoYKiGNcJQpzTl#dyH;)jYhjPwfquGl`^W= zF3s2*`Wds}OwA~9-p6wOhOy?il|>mC!l#Nc@2S+YCWUtlvr|F=FHDuK4*TQMM{wa$ zPjWfJ2bA}<8p8L2pLY z3|wk2ce;$cu=N@9ylMARhM2Tnf^53NF|bn;Dwgi z%74E<z@pGA1q&gV!55du!!=jXy9K9-KO|4*+RN|I7QHK(@5|}_oATrRnGF4 zjsRb<2_#v2z4vAbelVI9W%3|6-myK8|&gK3w+Tl#EHW3X5gMui!bi zDl1v7?D)J^>?@KKfTP|-`z>ugd&M+q#qRbHS?x7T0JPk#te3RxtXJBi9nWFqw+3XV zo?w?&>*p?kJs}jxsdz~X!L%Y~VWa2$?G0QbRzyT;RI7P(u0|6d;_!*NYttwXpRNP^ zmGC;t`r-^Jr4Xc}ZJF%IQ5O0%>$bM8M%G$Y3Wz2aMQgKCMKe1Ky^8#*%2WTv9}5J+ zSgfO^|A)P|3XW?@wm@xx1r{?iOBORTGh>UHnI(&vnOU-!C5xGv$zo=(7+;?=aA)Se zIX~~~McfE|w089F-d$O_vMMWcE#VyEhunHbchGv99;%e<-8J&slz`R`O5%otoO`T#ibACy>te4m_ zS){O3*76ys#siyjy=isO#P`)=A?h2R8qC0TrQT0|$^(gq0vNQ8C;!&i@#Z*e=c|Xo zGo^~1V>clAi-Zxw)h!6nN58PknP=5|;0 z8%@h#{TOMHzLgNA68@K_bb}s!r^MXqi1o7 zHHye(?6F)Gn9YLs*KQ)aXMI%%Mxurm-aKtJA$({Jnzml=16;p_YFTIcZHmA#6m>Hq z>?ahLsQ#UwwMPhr9~mR>Sb#x~;UQpi5&PYnk5`Q`TQrq#JV(pNx0W)@gw~os7B9R| zkV>X0RUOPPgmXSpPc(nD%mJA|e&lUA?=imuRD32px3|J=AX7RY24aINGYY(pVOhTH zXkX=fOcLfzJo`^)Zdb;-Z91c<%F_&HS-OR}B)v)fR5$A_EkKV=2Lz+sc~1OMCfD;$ zL@f!J@~-=%NA{v&{Etq{x^HCiuztZ*ZW0smf>@m{XSx(!P6|t-AvTfzU9M8??mOOD z%cP{?r+1LW!F=-+{Z+tTzj3!$s0McDFNdok`6BXP!KP_8_g>!3wy)>ty0-gE>wnTR z$8K!#NSx}x2v{lxO`@Uf7SMHx0eajem%#M%Dz6WJ^jz62qO7^#_g^c4r=qZY!WCy$ zVl|(rLf?GOT!d}&-)@HQL$;6UlR}tEbh3G3Zl{#HZmD5lf(_7&Sfr+CU(x&4uF(c| zw)RA!*^Jr)MtLl%8r!E6cSMtQGR1S{j923|xNe+lG8Wx% zA}8NBdlmz&W3YjxJMs;vq*vqf1hdZfQ1%e>xD%jB+4n}6)r?vM+lyH=!<|rG8lWlP zw~iPDk_D;PEYU06o&aUrMkJtsRy-w}&>`|_j^tYuAmQc6FO0M`Xp zu~m>yB%*R8q=uom~%)j`XKCTmaFaHU?7r@`X9c9Np4{H#u+L$ec!yhoDn%YD(P| zKQ@Hsi|$$mP*vOlgBYjSWd3`hVu#u|QZ5+9yAP`&$m<46I*tn!gyF z8s3Hk88hjhXX?i3Ar_`@MTo333{Se&-m)zW%t|E|V5YxB_{xFgZ`}d=3KMnJiKS5H zUvVL&$$f%#!@2p6(BSFzjWw@}G%PL6Sr}=2Iq|>J=%V^xl>oXtxPU7Mo<(QMF!1Wu z>D_ke++Z-QI^fFTZUVpAU|<{er`N!2JB61!#wp^o9Ws2xzD#QQA^g+hNXn=z*JYd4-^@%wQICvh2?mPca(p7exP6u!2T8QnIKHoyw07v%C{n{C8~LiFExf znWX};k8KPdk3^64b*`7ifh{`A=&vS2db{Lwda)=c6IOx;2PFNK@_?}tBY*NG*F?6# zR5oJ70GnJ)4M##oRoS64=9wlytUdQgxyq0`7SgV`ycQ9(#VN1wcxB!m2mf7jINPMv zytBc^&kNX9#4gzW@9p-|ruW4AY3fG^q5kMB^)@iD1|%zAF%v@P4~O!p*F9Q3o-nSJ z&`YyP(aL(kyaiN|hzy&5)OgBjg)}Vtl`RE59j=Fld5DhLPEiX@=uAZGGim|X7eZHz z6{uoLM#~gY11`*-h3{sp9wak%K7^=KVh^=VEg&mgsst9%gTfipA^NyvEgf-l$wcpy zc8Ny5L3@EoSk%;TI$T+d(~p=#Mt_{V-M(UIL>5lFq@5lT*-WTue(6h%r!^{(5mlbm zXOY<(cXqQk0OL{fs=(uLir~DyVhlD(gb^XF>T3`AqIsfVX9QtDKDp5N_d2;5#bTwk zMNJNw-Ee+`TBQfm?~RvI{%tKkMIeGvrJOtkw-(=TkKx;opF0SzUvai&`;OSU06EUWTJBUB*)x|K!{A z__F(SR~j0fYp-Jeg-eY)V&&MvkWzyFkDUE50;MyHb1W?DkuNSZwTl+1bpMj_RlakK z%Xn7QMAt~LXH;squ!qkP>&=ElKFbsh@-<`ug0ws@;3tHF1^B(){S;%0CS4P#zCmD-#D zR{aeaiJ5<8@MZwG^DMJ$VtRHYT!gF3>X&P|F*E9o2|tU%lK{^U^TglsVy7H>0OF$1 z3n}7x&o>nuB63y0PFQJ;`%c=6B^yXCmmg2 z>EMb~ZGpJC$vHTk+e5d(^ZA+tO4Sj-J?$b%NcDFgURnfYUlFvj!Bic`vfeSit+a;? zhbWanA*nTK9Uv`YDe)?^*P8~Iw#r;g$}o?f$Jst)G$9?_L1Oj2Lw~c?#G8 z-Cdzvjv;Ou`Bo-1{%5bN-xoQ4gmxSc6P?~tx+#KfYy0yGvSq1LD-p<3t1OQP5)U5% zTSjp$bw?B)*MOeWV~`*r?t);UU(eE}zer!-d9&|{pS{0@kZ5y|!q~qZLv+=t#C0=g zfsu{bGvR2hqh<|kx;ydgw|F^sRJK9`&_@!!4WEpl&Gg}W?s8GE)cQdCqj!iqom}eV ziu(o!;_(^vjO~W5D*~A;Yk+VYP0NHYy@@066MK2Ir&*rOX?&XkK**|P?(#Rl;A>=GsN~j2+4Y(&89i8}j{p=E&1}jm@n0FtG=(B9HxHbQruC(HOP>RL_uB7fxe> zz2d8|nE>+Bev+rM+tuuI9&=k*I&XIV9@d7MjLE^q#6DFR^mB}dyKJe2m1GAlnS^H0 zj*IkiGy^q)TJ6dgCjM{2WynDXf3yy`Np84aaW~Mp#VuPwT9;xt&BF~%{Sc%%26m-} ze;zZX&(FWiHDBW{EW*C<9!u9B43pC_DS;@qUDTUa-hC4g05LM`aXb4+CGzJpR>4#= zG_uhs2_0mp@7<>mTVYUf$tg}Zom0J@m=MYW`BljCRZU|>PJ4(_+2Bt#dI<1 zfqVm_ZqWE{IT*atVQz(cwJ(%=O4Xhr`a8s~ggamO_RiO$mUvM|n=pL^-S|en`=U!@ z@?ou47v$jSo)`1Ri|`LL{fQmBlQZhcoos)c7xXD>MtT@7AJt-T9C>Iz9&=e0U-_0) zrsry#;DXUb_-+wMH#JmTfk5Ip0$!NJu8o)}bV%yQ+~;;NcXiN|$4R3A$&vH=F#g<`U) zfI86Akv5sQm*MSNYp`q#{4hA5efEYBK~FsSRl4T6&}VP{c0+OqMs{-;^}A zwaZI4rFiQNx;%yNZvsJw{eyaNf~C~bWqq@N1ML^O_@*U*JwwNT-j&wOI*;voG^c*I z^4t7SLK~1kDZc8(s(x#qH`yu-N?9tG0TQ4{j2pz3SjAFIAFSx*WRh)cic6hCigmb5>*1J~r)-T5XEYAahT6 zNrC=R(3qge6%mUm&@AB?( zlB_uB?Bb&9`7$GZwD?2h6pS<1dc~3@52PgHk~Vs7Z21)-`-@-Ax}RX4P^^{nM$ME< zl!kQD+{{bnL>vVzG;>&(bH-nu=cmT;eZ42P5vF0hGhHY!O`#j;4oaO@z-}m=c~^7PyV9^dW{{?Le8Gzk;dc&EXxWy%lltrPxV) zPj_o@`;903keppJd+_@HOX3t`ka~BxXH}aO4{+X#=y36K_cGZ9IkuKZ!L0f>=kT4? z@<({WBqw6GO?{x9`HpZ4VWnZ*C-x3+mILFX0SF^L5e{N8tXcm`esI zucaY~v^u|d5J((7DDfQnBQ4drYx||!VnV+|GA7n^N^_xpsMzi_3_!dy98lFYrOcmjd7rGnmD> zWen6n>R5~9h#YF=bx<@lHN*_kNKHvChRz#gG$WV`G%RYwbw+KssvI+=5I%5}VvxSw zVa(|sldG(9iB!MAzSy0+oMA5+kvzn{)|Gfgjrqzh5^40x=sBtOx zv1UQ-QTw%5(jRPB^6*eV3E8g;1&6yEX-mZUxE(EqrfqIS=zU6~Wp8*Z*V?8q@b#Sy z70ZNi=Ja9GtJF|8K}kxS@y+OAmPTtaR&8}0$nC;Y%rgn6EJ)X6Ux_wKD_4$Mq@cTw ztP1Rc$INk=_B|&w6ybFYd&Y$H)~%Ak>DWMxfuwqveuG4A64uPYY*tqKFiAe#39eso zxtx^YuHT!^%>e09au0Z79PYks)veMSn!fx}e7a-lTBehz+aG9A=JFQ6aXe<)#X}O z3(X$``Fkjj&CKl+1&hN`T!%4+>vbaHD|rtmagvb7Y-qm^NcEK2j2-6Ws}kBF@dNgN zMO$Uf`b`1SM+0axTKx6Zas}=sW|YIfxAtr*WHe%!XNVs7a^2kGJ9HOjpg{N|mdQP3s*9Kd=)BCJbN8H$z_N)G5{b%L?g$v7(d+d9IdK zv{nMZIcMHc%(EEbLf)?VK)SWF*zXFOtzBA=0yBhs5lqz&14$z_7IQz8>9 zMJ-05J$Nnk zsTNLVS0fncNC1Ge`&F2%z1Dm4Me2+b>=)YZqM^<3v=i z+FTler422sv<&Ny8he!}v|V;a)q!)svPVlTW?;?mM9q)m)MY-1xk`O_T!)I$p0Qf$ ziUfX=?<8~2k^O(<=+O`@EJS{XW&UzpI;&OWJmx>#Hz z!pN!Fpb~t3fW9=P^YF{FPKD$|L#bzfECj=K%)a`)L$JvA*}FJ(QzU5guY%jh4`$X) za*iWj^hmlIdTn@U?lK+CJ1kyT5df0JXZ^u(_P~n*gaov_`S*_8d94phH^nAkZkMUe z;0*~{_eZ;etqhGj3(v#2vQs@|1-*>}dT`_%FQd?fG2m1{_ zn23i_U&6;7!V&Wfuia|lhcPr z&R;9Y+B>lUf*%RIgn5y=51=je*$u-zVWbO-E<5+s_NZEl^73*9snq^kETb4e&GN=t z!0j8EGK)iI@3;HsI*(F^ndC0B5fa}(5U^&;4)7G5^~_k)Vr_{olzkix@D-Kr z4cyKY)aVD{j0$TN<#UBI!i`hMv(} zGqrTIVg$2mzFGIXp3GxVTxu3Ir<&i4t1L%A+W{yMZ$XB)Q2S#tS{Z)q4csH6D^gv; z*}l7sQ){vs8w@Fn{e+wR*AonZ7Fhx&Z$DQ~D?vp7Xirx(@@Wm&+xt)U!C z&N(#LXt<=jhDtDWy4u4vT;V2P^Z*&*+GVGfsygJf?cCQQQnM9Sm|LHj%LbMFiW)F< zDr5cpB42vVX|1)36{@%#Z;+?1*#}>}Pu?2hIp;IAj_A{#YHHTh)Knx2F{?(!__6Qj zIy4#WWs})`;LHV?8)oaQ7#o1>N(WdTrOI4)N_G7MlFXK;~4JDmU_VtNsk`E<`*^6>ExHtrN!Yo?nLq_L0 z5Q@HYJjYV-*JY9oxtED2ry4R$kBom~ARGoe^2}ZKN_!WLUA(&y)T4Nd+B$Ki@;22e zyxcjMO{q!2wU2ycW%p*EN}GmGjDDiG(Rv+8hch_WwV6J!394JJYi~FC0#}cUWj6wJXr@*NJ9BhG zETu+oF9Ch|w0fn;V}_%|0NdiNNgZ&FtFQDPxs1#A9kzkf2fjK2!IrauzGLN5*k;} zxZ931OL%^R6mB}69Td$!%gUfRI7(`acP+G+npmK8eU3ItaXXwhe$}BZy zx2cCYCf$m@quvA*a0|$^PnU$jf77`L(rpbQVz>uLu?suylPaqMikpiww(oA~u3z(w z>T&`W1^ilL!&?U0Su#$9JWnif zBTro^2{BGBy;}g2P(N|9pj#gU%JgCK}>9>n4GaG0{NawXb4MN6K& zjVxL8kgZe@vnqEE^&8qsj0~;5lpb;$`?nBQ8mhsz%XV-& zI<>cHYzSQ&aO59dU{p0s$9VlE3hdby&AiJ$xljWH%VdV3V89$6toUEk=_rkeYE+8+ zDT5mjCZ*(iV7c7%_0NGR!@Xg|$-3Wva60=yfUIb&)wrBv0adxly@H7EgTP*$&-fc= zZ_hXLlC?>Y&Gf)%GirWkSq~?n<>lSA3Wh|ADCcZ_S%j>j1=?>3%dY_6c3@;j%FJE| zOcd_YzLF(p?H@eXTj@l98qekrW;{OoZ`e>kwks?NF-4{(o3-ea6w(5*DLa(uoh@rk z=7IgpO#}ghg-?$JN0R3qb&}#|t9FKvtet7$WHM#w6Ik#nHc-fB>R^9Fm&ugr)zi-u zw3GtBKi$k*t(WPu_*?$tTmOSTS6K=;lop5!5kM-?g$SE1No{H%^RIdAr937Jv5rKAJM{JXYHYg^Wq?=Q)8f<^?}y0`z}0@#x`9eKq18&naNfVzS5%CkX7ZEPQMBtX9eFamtfaw$f-J@V1+g%*!=GLP_rJM8-yk|g zM#on|%dMhSzaUCUkX244g){zQG(mVx_uTzBUyjH_&&cHnGvZLA-EOExU;Xt9`3>TB z=v*iyU+lp~QuS|rCI9ff`nXE<^AcW^Iyd594+LNsX-k0|;N z=L^Ri$NQF;yoIKfC?WF z?}i0&a%&No@nm#4-w1GrpX%Ymed2O3hOhP*v3i8h9AwDsB{%hgJ=9tLOx)!Q>*3vo z-1&UMeE;T(il_SqqS4GSDX^rtlkgr4sZu9 z=fTD$K)@L7d1+j~(tipS{ENy5fQUaNCB>sp1D&%ybL9#e>Zj~?IJ%`cMS}mXpYaKb zBWd$_E&egg#q6LIDs|QIl6K%>7ypu;`|J}Urf^P zFCLI2Rv9<|q^Q-Fy~kG$u%zH|{8qpV119}Qe|6UuDez71rEHnbd3|GPu+_2OPr-oS zic1Op7m78p{EAT1+eympRZt~;V=Rd%!XUN43#b{0QvX#<1Sm4{Jq2=ld%tMIZZQas z&Xl!X5Tpt?fySH*QN;hdQVl3f30-};y){KGQsC1NIQL}KXC6~=ly@Y z=9h$=uF60fkE#lUs}?0L{Dc&m^9g42>fI6?QdH(&zV*DmT4y6B&f{vQMWum2VX0tcV6=huW_r@~1k{=(ATUxAjs6Qt{3^nOm->%WwQ z{B1Up^lYt2ob6aqu^{3EK0r(FnhwPO>%0E*^%gkL(jIx}2B-gsf>nrHy+>G*&_sVR z_Fe{S~gIQjoqHmxs68M$sPFqI?EbET0ZZu=$bI4?JDQP0< zd~e8nKDxAODxdEPZ&d*I3y(8=u!DE!)zaZ!(4>Eb4#JSk;B7WU=M(dFz)h9Kl)_~s zN$kkig6$)Zo2yn)F~@;5d(rtNInGV#A+`2y_2~Z|!3Ze97+0z?Pzdl)B|}ogxPmQM zWI)n>(qol=_4T|mx8i-a(Bw204>`57P2oyCq5pQPLHX~K>R*F4KiR5thGF&0_5zM) zmbEOlpR^Xn$JNa5RL5Kx-OQV!s5FIZ4bz7HhANTA?I*Tk6?j$Ie|On`4Vu%hkVfe` z%};moxdT9eU&=OZn-e5RKpPC%@P)A~V=W0vP%0{L2-)(BxO8b9|NWoX`>&t1#px#v zw8F+C43c<=+81Xul?gA9v>N(FA7><``-y>j-?JN%E1%7y0C%3^{JXS6H$^@wk-vDh z21p}+on~`HTXjodq{Cp_5CMTD+PVv>eIDBdlF1S;YkB4hoJ%dp@MyUXAQx=SI z02URM9~`I_xUmTt5y_^)yyPBEMpI-JI?bR%(@5(thlYE(HV%o=DeyyZYQ*hdGtY*S z6Ed>MbY^GXl^4wSe|^bv&?0Yi*u1vDDTjTa(KIVigNi!_=a5e}c_m;jA`|kA8EFk1$cEoc%UKBiP6aAnuB$#1)kS}9cBH##ySumkG z8TH14SHKDQrz-IjQv^ zm|PJE2R$Y1#aP-!7Djg+5gE_F{NKWB!l-pc?5l%0XuKd)^A2T-!kP8DGi4~S<19H- z!r?nX_w&cRipp%%e;-)f<{1j>&p!6eJKpE*9CW7M*$6*mLhUd6xK!jD;G z`)&psRCAhNZ+oBF#%9*LfQ3F)#vOR1r;U`ywgAd|T1-hXAI0ajkVbHChPQ7jkMdUU zmXxi`J9)A5)oEq#_IPctYXp;;I#g&w+76|~CWK@KLxNxK5I9WbO%+uB4kF_4pXt_| z=Y;w*8fUlnCw%f$WfH%uq5osjo08lq0847S2V6_UU9+|;^;KMR>ab3IXTRCizxwHr ze3?f_RhV0B4`<-PKDb)_Fe0=`Zx)Lq1Li~YYzHbS*@joMY|HFdDv0=4GWkz9)(!>n zG3P7=4O_QmwzEjKc>Q*{?B*Ip(8Sb!U^&Z#^Hh0z?)&*G96&&i%htzgceV-U%Hl+- zY2ri@xecb{QXH*ZT_pp5A8hXSjcvdev*}PRgQ+4>NL!y~lUw!S+zX;Zm=l^Z$-n#3 zG2xad;1#Od<7KJ)lTnYNKE$7xGTCT^Lg6(;BLs_W~x8yGAui6<@_Fw z-4py7F}74Jn{$FhV`i=KvX8CqT#K^TFl2!N6+cN!^t@x!6!=`Ktb-?IHgGAu)t6lA z&0te7hifr!jN+*q_UTwfYMro$;mYh?yKpdIEDAKRk`wL`76W(;!zz1V^SyQZk?R%2`mV6 zu#iEsyN8pBIZR}$U^r-%o=IT3MZFqhnrzzme&5}y55l+!MOy!5&FE4^J~N3C1orQ!9J>wHZn89Cmnor?16N&`SEPqjgM78x z%kI3X4U95nIx{fbOoe^UscVK7@hlJ9Eto;6w~{OlRxB#J%R@~G!^b}u{C%{eF2~FI z{o~NVZNwMxmwot(Ux{CWicXVg3k+IUGpan4rZ{+q_*>m-07#I?(-qa=QcVV;*(TPa zAiJEmxuM*4aknYOuoENO`-jw?rq5looeuAkT#sjN_HG?W$I&+Nqpn%JNwz0T26@D9E@n1^1|_zBozRf{4>*Mx^I6*^}&v} zSdAW-kA}TE2s`f=>bZGn&)XEraO!kbnMe3DCrs3WZJY2V#VL-@L;Ol$ldQ&Yeo7(o zPUf9;V)=!AFA;@(mL-<{VndCUuyr8|_{Q+mVe-4wf?Hdf_HGmMIDnL?#187;JV~?k zh`7gB;kVZHd=|Z3$UFu-v}0be{*yd*Qi5o(^<$0{!~W0> z1?HrfWp7)VC0nZr`s14#tmBVM{O<2LsChB8xdK&7IIDh*)YP^5r(|bp`ST8~cS8?0 zIH5h8*nyU`bymqp0EwMN_gflDG*E9z=-qx`sLwk(YBJ4#gRVoV0Pmuv8Qg&YRMRp zWwHXnPa7TD-{l0pdMF37zqOtnUQ}sxsVw#FaIg)cSjyabNl^B{3yh-@dX{>uGp z*{Po6NqZQn95u9?7CfdTT+_Xi${g60OVMjD&UTNvYg2>NSPEeRxKXqNjBp=Q+O-(k zQ+{iganM+vF(s+LpB39_@(KZ5XFeP>48uexP9F?^&c7I+VP$f%f86mP3*YCPUo^>_4 zD3C@6jy+8uP7=Y?R95r0KN?}WEQE3!n!km>^N6nd_2_^~N1v|*hFgd1rQ0j4ddA(p zgaSU3M%A`y%wmkbo7h&;2n5xUVS4jePZ~MJ#_JAS^L&97dB`nbDzY>Ib)M&H$GTqY zo~;xMJau@gl&_VM01~=I-r4tCHyiBL09Ku1(HP+H3&u%zUlcn7a-ISrqmW}S^`APE zFIz5AfB7-o;LcDUxC(E%js?Ts@N{k`Z9PcyM`hY{&~JqcV0g{(!`QvzA}Xv*aKtNT zEBKIOb#1c_yAG^Ya=o5L_0vts_6bC-&A|!2Zl{el1SgbPvE8bU~Sf zedf6+B%Vt6$5P&i<}=-jGZUHtG)hSyHF&HBI8-2{o3XxC_>2zW;BmxFxWO+d%EQ~L zCs5D(XBqbDe1zRaD&GpN%Pud*CLWzoSyOddvwP39pEoKf4|N#I-YV+6r3j8qBBjI> ztuM>q>W-KE5x<8Os}Bm+f3Txu=;FtqU*d&xko6Tl9b6DgT}QH)ucpe4k%twwY1}J( zeYAO1E8$TH2DX0HDBB}F(TQ`GXT?uak@UHmQ4fCT#UmZ-BjfQhc&WA&X2M`8_n8*m zckCZ=SorKk3L{u$e}TbdVRDdlWWElQp|~h>XiMsb7{Q78C(o@A(F*B4+Mt%Bu(V5tJEa@lTUUA{yVH*)#w06JaqQzW|-E@ zagqe%R8ot+O(DCx&A{^==}}dWd6t^DPYVX$yIygH+TvzlN6o>6!!1-LqRp+}xvPb5 z#fo;IX)wwThb4C6)Szv$=^k6eKDqo);fW2I$KxMJXV5wJoUxlkQ)9M>r^|~DHvO0O zB`5CL|JX4T7(%%XRAtnDQYvJ}1yF)aScrEMY$XLE4JWv>7+Jd&-D>XB4^250_M!rob@>+zRBI;%9I>Hr5Uuxp{1{>aJ5L_3q6}xqsUUc4QJ!?4!v}xvV2wo@1@86t*|9 zORU*oO;_p+UT&ERI;-Y%(c21hansTz8-GRKzug?IBtt|^DBqhl4fbs2zN2mv8@Pq7 z@_{FF$Wc+uw=^MgF6BJI0jTQW@SXZ#V+0{*m#?VTxg&pZb#Y)|H`l`_77dh{9J(C5 zic&LA$`DSgsOnXAN(?!b-_)6SHrrW3`Fm>XlQ zwz?7|_t5WPZ*Rr!;`ns}D&r~GIN*U#xw%x&vbiJ%pb*)Z+I6#|&=z1ttN4(EQ7ah4 z%0tH))cnbb_s_&QGZYA*_LLPWRl*7kc*GniHAA9Dx)l^udlFHAaibZh(nxma{+q@4 zk9<=2ni1QDSyV1(<<280m)HeYahuxYx8-)wJ{7}6(d>~>vIm?Mis0c=G$E4KJiSre zpT$g9d4&yWzt5EWR7`O>%?d?CDS`&Eo5hinvb2E5m!NdssQSf_+;U# zb&;)#@p7{@R`XzqD-OxDIa1KF3uT{d8WmpEVL)+3r%>b&g@}Jr`8{k;a}X9Cl+I)P zbEjz_4mz=bw=a07J%CG^GP^g03r+teXQ)q|7|Feb<^DUxq~kVk``0U7Y*OzZqAfe* zO$G*hEU0=F_N2!QTv*3)EWJjNDIM*opszb;3hJ#~uZJW^PSwKxUy55gd|o*5a;#*vtwlqFfNacrl5qCI`^t0ky#0 z|C)!|tMCs%fzw%vHC>qv5@jpB`)h75gxxow5J*)V)ZaV1nC zC^PR+=Trv^lZy@^>}QoBHyi-e8>?Mr`3i>n3qb(%JrA|HJMjrvOg~|(E>3>a?-|+U zLy3mjd&~RFqJSwhgJI%(2$0sp{oNa1kbCPSl$!KL{iZWlHpTVJ$wrQyhI>-Kk^|SQ zER95_zCFM1EqM?iAp2IoN3ZW~khU9F@wfLi@%t@e3VSBvlv7$SVUIe0m+1<0oeo)! zxlWzNz~yuXpfB78Z#RuT9Qy3PFeL*VbPXJ7wGrX}d03VX`I&CnIRkREkTn3fdD|(n zL!w8%>h1lxQzPt%<272Lj{L%0I+lg8&cQcOS%>X<7OV)7g!9pT5d=;x_4dMwIk=oI zRIPUdPlB5(5?P5C#a-Ta+PJyl-)kH`)3xSirKMpco1{kiB@lb!3mD5pq6qA{n5mF1 zt4TFQ${P{LB#rJJ-Z-XKD6|M^7k`F+-z0~!{X-mSs{0UuQ=Ck`7t7F=Wm<&nilA%) zZTZXcKzx2}S3Ft?8r{1KPIz)!l)0*n?D;WM6Ux^?I1%z)D6^blk2)}shzhtqA_lBz zwaDNF^~ZI;Cgn(P67M}^>(ExBi9HTJ=ZKTkrgJ@Uu+| z7(~Z66?6%43mytDD=qlY2Z@IHwGR6Hu`IpL0&n*vnb9b=@BvV50BG?#Llk`l-^(a33-Xn5K>rf+K`f} zI2-m{#}2}Ylp22bSR3EqngU`f9G5FO;uW*gH=PNyiA|IA(+m0TK1^cY@N=}-M>-d= z;K5!+@F305TcvO&rlQdy^6y;3wJ(!nNSUCovm*K%cguTfTa%AwSJcPWPeF%jz1J_SsAF9IV!`E-JDW30=~OcOa8K;IG5)9@L?x{< zTLYE5OFIv*cnn&O;!-7!D4;pVUxu4eUWiY>^l-G`)#AZ&HvuKS$`{5*p`3w`^_<38 zsR#XdgYTolanqS@^IdjB;6y*yu~klFW{f8qK}kA5ClX!G7=W~_4Vwbc$(g#sYV4k6 zMrnyjKKHC7-v%IdSkzi(SRK%Qk=xz>-8kqPd@CY5orl71sl6Pa;0-NfzEbha>3pQc zyH?od4n6`37l`*Mq_e|T2(rRx%yNZO2(;h=w453UhFkZ}!L>`ToMh&{&lTBkm zLJty~@b{s(y8sEHh4uL0QG*&*81t&zxHuaLm?sM?4(H8maVf9&XI|JGvw*Q`c=qGo z5(Av0R5mvnoHCTd9%;v@_zfFd_70uBc)3BrDZ;V>fvSCp71HGS;y5iUv9KdN@3Z4U zzqwbmp~1+!RHK!YX&mpol;*vs%6q5WbdDMCQHzQX2{uR1##4Qg*&#*tCMIgQkQ7?z z-n~dAs>YJHQ@XgK2${Gd&fS`Xgw^ClYMkRTWvYBLvg0>kdU{mg@DeZpm(q2pre_X=o)6jQ@!OnLixjSN& z;K6zpg_n;r+p^yIfm_09u|K-cR5bZ&w?bSk)$e$o;x~0uU#QgOcT~l02U2%7leM@h zelZ@0@_W9*P}K8$X7)35Q%L$=Lv4uW%CPve!J1bb<3U5&XN{9Cn>#-rt5aeqXG|#R zE{(v6AH2<}K5dwWo*BZtYxzhlr^)8J0^bgUCpnUEV*Yv1o(}zIh=3k5Y66A`quC-W zq#6YAhhFuU1&LeF{Y@IpqRZW-YnI54E3{wsJu*I|+3$8d$=)?sCD*=DsurFfAN;3G zQ|1kskKq59+7tN8i;2#0p6BLOR(EE5X_24?p|eHa*c`^6!+!%AI$czilnd{LW1NDW z*8y~dEi9g+q?H*QAS3O%0a;8p{S-|k1}I-gBwyEUD3~0PkvIHQj@*?>iCQzbSvpp4 z=beu7ZEZO`q7~Qj&jJX8&IZX|l*R}(g0HUzOvh@a&gMifAHBXY~NJx(3udslznW5F*r+T3tn0J>S^;> zq{^Vr`bsT<^Vh!cA=bQNW4-k|LUeGXZx#;2!tk5J$y#9U+{+nJTB<3v~P z%Cr2)m_sosXe-SBg&#htFo(Z-f59YHjis_SSl;)HXv-l~?EOyWIl*iZ)%ESYH{Hkf z$`*7&dw~yTt6C%bgF;A-WuPnplu%y#ceM!~JICp(t7WE@xRY(mcdVD3#LEnK&t6WA z!y0Nwk_B!S!_Nwxj#0z+3To;2TMU^|YXd779xDf5&&)H0jFG)#79co7QaJMEN)_hN z?${!1Xwhlnj=I~o;7LRh?cI#ls4BdhS&!K7w*CLeM5!GO&9~iTxkMauRrWYeD$?=& zv&MuO7Ua0;lod97#1pXrE$2OcV^zXcjkWIzk2#dJ=Jwq?X%C?d#!CaU@=T2O(9RQjIueD*&62L zAc`|XpN<$@167m}5;)R(3dS=UJ1gA~dZ5K4W9h4r%CM=mmSgjIHrvoqJ?^}L|B3%d zJCY@;nkN{w=DSqeq@$)xd=I4GA8vHVbEdxenewn7h1<;KSXRUFw>B)9P{i@=S;bM% zQIh^wwk9_A05+#z}7+%Sw=?qSQD0S4&SN$`3GA zs2P18RnS($tXnPTwfY7}R>*3w<;poVb<(N^IzvkbT>4%N?rR0@d2Q6bA zk8f9gF8N$aiy&Uv9c7AJRpG>b^rZuv8?vx3{G_ME@d~oEuMJ4_Q_ubt% z%UJ%kmFY4*0iOxxSa?)>=tDzd#ehnquzv9=wF|sRs{oy@Tq`*7h!U4?WmA!zcvXMl zOcY|(y2-oZEU9;J&OdKs{`nIXU?2Pf?a?laPz=)jjGA_1CjuaMEW6)FZDS_NGP&0S z-v2m9wvOwzJS%{}d)F4EUQ%3f7LY_9e|)9{wN_R$h#DfK6tuMtcTkFd zCH@PQm-7p_UpE70&sQ1maO>YB)gNJfvTZ|E< zQG|bh?EQN~-o%x6Z&4Ch3iN7$2U?^X};9!79{rhARqTo z3X{^Bcby(on9aC!NVFkEs~T*Fg&yP<99iMeJ|xO8jv)FNY&?stwOp=yBoPv`5#rU* zR-89Wo_L`nM;;UJ$3FrByK(q7k4qJ;d`YWd53kH@x$OITC(H4!F-c#%b56>A8_mOW zenP3200q`uQ}x?S=B_XxYk7tM9>ap&)I;haNcM^d#e`Y_`os{vg8|JDN8bWz&hIE!&kjEu@?6|t zFW)_QQL1)c5@EbTjz|9V9 zeB^$-+;ODzWPj`~Sw#)ind4nL7 ziqK`9_k+-7Bf+DD$cCIGWbafripFhCQHGSj8=yUl18wR7g+rFB1R%}!(-aggL)1Y8 zg@7d2UY?z@hffaCHO92wItscn5uCEQQJclPZaNM%+_m+S-XRY)2EleDO#Jf~HFeX7 z4Y&P9cJQ+okzQ!HKx7UA&0rA=iCG>4+19Tax-d!0#8JdMnG5rk2w;lp7PL*JV^N(zhF z1yxXL6;rtK<%DvXR~wEW*lx$#c&kwt?1NCn6JPT7L!XTTZS&(N^B?;Cgv>r}1BU+D zYeVm?7Y()jU5tj`j7=)*N_ z{C?xP4mm(MAspIBJk#1lpX$m zp$1wS&hPr(oK2NIA1C1qemxVCTXqfs7?#btl{=PW@EkgkEOHK#Eh&GcoaAHNPwk6z zri2-Tn#*34p{7GpFGc+S-Pxf3XKylRYnrlF4AHXos;M_u{IfOLIVg6nEFf4xUl|aI zi4k_sUrw#d23*W16+#WhD=nZE+Fo1lv`l;H#eqvsAo(BCu6Gy*FSl-@w2YsQggsAt z6|`GmxFj`2h5jTW7T9>8ehnoF;d)}+FRucYzO3U5e!~W1Go!SMPBZMZ1Nyx-deQ&b9SZ z4QOZ-eUnRF+N+_ixD%Z{(wGaP{bQDt6(FGk@Z4RPn>+w+CMOOWHJC3YgVs`^QBX^; z8MZFQW-MVTXuG*M680GCMVHOTFyUJ#aD8Oo+Gxl8xcXW~1>aqs(d=W$y6d-W?9(e_HE+ur^TIpj&JY~j7|AAV1sze5}GEl|bB7d`=2qT=e$qi2oJ686a)W2>zXHFK-K z8;h(s2ix9}I2#Tv>u@8m%8E9n8@Gn00c@;5b2{U&d*A}#jUL>3RJq1ES6rZo01uIwO$hNqWOewlgY z2(DJBXl7(jpAtkPv-L*V#LPny53A}{ze9|}4c#{tmjbSv9}wNYo|S#J?s@M1`$1T{ zrRXKDgH8p6D^xptF*5wUI<>+y-FyID9Bl9{|9eoc*}8Bu)>c`V`G){Ft4}^V2W#Mx zq9Ab7`v*>BfC+W>`{?7$3nKFEo7GYSjF|yrq^re=mej#*F+Oyf29kPI?0x-;TIWuwGeQEwOG?*XoS#bxZFbEw#7=9=vN>vqo>(|m|MZdlW@$2UVgdm*MLQ+<&>yk zDGd6v>(muB-T;P?rC0~nntbVCn!iJKO5Fse)rZKwTGWcm5#~aC6our9G(Ot>kd5|* zP0wufKeYzp&$2A63go_eHQ}IX8a!4kHsSxIhw(@7nwxdZX=_B|4n4 zi@0UzZ(G_K9&YYW#PF==V|+K4t>+f4vkR-$CqB0KX2hINPxUi@l6YGv~f>J{(}I*<$wXH^u-pQcX7A z=2|(8#j3;4a?e#rLsLUS%AhfBtpX#ZZ@IMY;2BHbDkbOOEi6%vY50bc_u*KDytSyb zw+gV?pe}a}T2(Kv8K>hxGnq0g5f{-Wq~n??tG@m>thkZ>z=|+_qLP->aPJtw`WNU= zGSj-BuX4RnJEpN=)IDo*)R0iZ6Kvf*dh0FM>eNkisU8=72uW{zI&p(XC4t|BlBBCz zm^5}}uQi`&yWz9zzURE7B#F%utrmI(a_;mq_hCf@E16&=COwv z7e4Y;AA4-({WzU2TCx6I)lj&OUYU9r;nb@%^tBwdRSbdPK$n0I$Q2B*lgR@hz>Hn$fw%^fu+LzGS+Z?6VinQ8PLXmhJeUkQ2pP?<) ziT3D*2nYBVyqc>xJ|zlxOIc^Eh|)5iv+))Vi}jt(J?>Bi;6zTy_K6wzp%MF3<7eg7 z>RE=9VCBNtIIV4dkn1xS7n&Cp7k=KLH|*{4r$q3l+GHB7b}9?OR6gwo89<0GP5V8< zZR?_`6-u}jh`|#nH$BNvwf1ejdd$8?^K#)vXy?5<4iJAd7s67bnVMwdgagNv_FOjdxFWMlTGm5-gpNh0)nr8Ki9I$R zyJrtYe*gzc7X#BOSa|FVEBW2!WWj`SSW;f93%B1KS&H0qO-`bQ=z)u8A6=1&d;L5Q z=TjJbdu&k~q=ab)=Z{Xl=0z;(IF)*_S2v4kUkCb{v8R5{pvw&hS2y&{OB29Vmr6uRzdomP)AD|bSV`kki%{OSc68^l?gcFK9#D#wFXYa$E_5U#x%C};7 zn}TX{bQyaj<6u|v(d*W4Cej2)gM@cm`lDU00==D`^|x*mcnUnlXBQ;Sp-VJaTaTUWpV`f-@93ipibHF z#t_H4DFWzzA==Lv9lo%;Kv(l6s1vZ8+V zn_jP2w-K#21ln2r6j&SgV2NUYfx7jL5uVZ7Nh{;yuH;$DU;yDGvQ5|9?i3z8G3U)f3c6`M9YWZq}Zyi3yp9E4l_Gy{xFRNlFeH6s$kg(nK;n?*Mm?swR~Ubn2r zg@yzspR zfkBz**Lc3u#~Uex5ekfJ_q096c}Km*I|m1$E`6S9+M;t8+=8v94&(}^MuE3Qx;YmW z{8D=|$$iwwEJ1b3YIYa?cTZJ&53nad_j#}^Fi#ML28tQ(PC|Il-=4Q)#8$9HJO+Nsr z`EEK3Z1^-qHDcnRGug%U`zPJ%2d#(X#N(8z2C5yJlk`m7@NUkNcnPZW*>qm+UpeBg z^8Jii>!!qV=sp}O^v7gLJzSo3T z4}2kIUtV_oh1Z0MlbNOL#s)dcQNhgv$Hj~rM}%#1U*y8antlgY&Aug+(Bm*~iFch|csUJehlEmKU3Pk+xnSL?)2$6NyJi+l2$Hx2q7GF>B`l zj+8R&5~BBV8G%j*JhHHa9a62B^P$nH!Y0Y>BSd=^9WTfi|m8 z2n#ehz>kX0F&(z@HUoaYaw2)~)p(!c`IJ!9Z8sL(Fg&fzzqlh)CNu>_ps}u#kJNPw z{i<>71gLj*bcxCX6`GT}SXUmk+WY?_C=Iz(VPFw0rq(Y8)JW6ZQ;XR#R&_TEk<6*u zLNv6*lM}Mt$|PjZ{+7386~C2kFlZ=tly?`}J(1-z9;AOB^u8G%@?|mLYi_#|P9%|g zk&7o`9BScgT*m{VqTEK$Vh^zJ`CGDnY9ZWM=G_Q&fCUtO=E~t_1r?V@16Dq!fX6VegFEG`m|6>bf zDMM$EJ=P@uKq4q7MYkrfPX}2_Fd0UB=|88t@7e_?8fu+Q$UJn${ccFbIO#{LI22R^ zBUMxb1^4znSAweW>Ju z@4M;g^Ub^nFxf)4mF>LT;DPHM#iTLH5R!P4C%2d%Ur&k`G3ja|1}>nKQDo$rJ$Or> zYGxuMyQ~tY>Y=r}vev@8oa#M)n{Gj{J&iOgJ+E6v>Ooqdr#NLV^X7t+4vLC4XR+{s(~DyxR?8%A$DYi5PgO4scvhXFuCCeVQV1EhCCSpF zoeC~m-5GCP)8E58uQyoSk`X95m+gCNnO8c&7StVft|UufYR2{An{-t;?f6}v*LDp&S$@K*5jj~D|>_6GrVw&2I2H0n2zfXBQt$M0N8LW(G)3f^}qS>(0 z?+mSLbl+=+7|t8>gjwz~SLv7M&8DVn+_cJCoim)0bCUcpo-bk#DyH_U)l9OF^yzMS zt8cC@oDy(RZ?sk&uI6%zj2dou4e4+s4tHKG+uVb` z+#;gOU-Yp;!XZWD8e&ym0gTPkM7eu$qmJbT3(tXaoSR44=;%mZ*7`7OWwg|83k{P_ zX($lE9m}|OkA|;g->+BpSAO^5PjfS_)>^8o|7V0Jhbh{Qc!j|VY%^2wHKzWGa z^@xd)Y2xaK3875PeEhczSrr&9d=-$1t@FbNQd=d(hHQou)o<$$Cefs%FZ9w(qv@_7 zUDkvs=cIjIpBF7m7RJuU^NpViCT{cC9~5_iaKN}78Igj-q^MD(cR)z@cjdMIB@w># zod(X=W~+&jm}s?Dv!=o<``;ZFo~&WyX2PxgfLm0-Q;^MwF1w9#$|Mm@aczc=bGvZR zCv7{KCP51>WXXOaVv#~oj_ZnYPa-+mS4*6Aq2?ktC9tfGVlK{msdnZmJsJ$E;OKOa zQ+0u>l|_5pHN6l&aE$B0_m{+@KmwYjv!h*@?+&BY@tLVnH7(3<;6=XKJBQL9?1(`c zRwGkCu8Lx)*Pmv2_pn{dgTtMD2Ly>%)Cp@7^XFAldo4yYeS~ME!d%Bxi-7XbFlS40 z?ChJH1>N~d7s>nwLmhHF7M9KvL-!r|BFD%)eSHBVGWn`K)=jT2)A)V|#MNM|-wF{R z{K8{8{vGKFFKbBGf#^|Ze?Udo0-_v;U%B7N$N+O!++G|XK&xR!M*P#qbEYYTjSJ^o z3@n`_-VUqjCX#U$M_1cpy7B}CnkeBe9fTac-#^0{T$IfzKbNYT-O>`-NWkOiD{79WA^3-AGcHUi zX*yetp}Dz-F}qn)xlg2F(aMjp=SaE@OD1w~N<5$5&~`(!_7$OsNx))V+{#j`hfu6h zu^Jh$`nstWqGqS+k}sI<}tDhg#;P+I0yR`=TX0Gsdy(oZ;Q(x(+Wdu#!_ZrysqHhcSQ^ zYoM9_fds)gnrQ=KpU%r3dK(!z;A-tA*D8Q6K=@R3b>3BwmPDi?(TTxjF^G32W-}mt zp6hmj;*w}{S39!0zG3i#V&Ba8aSbm8f)YVlpmhU|BIMoK67vXv2r4_`wACW20nO!P zv-y3u>R32E^K(|VeFy8NjnbQ3kgbF8_tnRd;>5?})7Jq~%>q&r5H)}9sp}4EQw+`Z z`Dz2?_RXb(KpJ`Z%&Mb-W7$=$&N?NCyJFw(Cy4w_K94A|K4)hEF{5jlbaocT&79~tcN|BTF+Dj7Pplts5%Ix0zBndW&i)sf z~<^PYB8dTY(6f3Cpx_j~dGSyBprR}^cs zE%wx@sj}HQ`c*Dg$d}uiXR(~qq>$1f( zS+r|)K~DSaO>@2%8s!x!?#^%>FGoY)l)PHn?cb&H{~H4Txb_5po;_$L>r3Ffgr}m! z-kM7!Z#?ac%WgLq9^LvpeVqoA#+RLD2w6`v{qU);%Fqdy(V3L~=RdY4{KBvX>Kz@! z&jZ&X5kI_VO?~s6oedOlz3VmX>)|AM^>f&M^R)ZiVR)BM+3R1Q<^M-0>wtBVV=$YR zH&KcUrzW??7SafxSF(XH+MWnz)a^$5R|pb>USjC`dPwdo=KtNA^4~CMRr*=#ZgBiv zt~}V)Ge42v#%B9vYb(d2?U=mnQ(pyWKl_goIN+F{NXC?-%vYU$Vm9_P>Nb)raLG zbGV5JgKsO`vY0wX24z$1STvYdBMsZ;Q=DA%An8{`wsxTb`1mRH7llw2qs*YPL>0MH zs=xm4=e{>ncl&0nV`q(t>e2rhJ6k_cd_QPjp-3cSg@X>mV`f4Ajp@QabfI7bv+Ip` z_Hto&IOsUbZ7}^kA0w@%bDOjb101>iA5UI`-@a|q!u@3E)Uh2!Q+Fu(z{!MP4nZ`@ z8*SUi0}ZyoCtmPpFGRK15Vx#)W6($Pnbk?HM8D2HoJ~_k_&yy!NuJ}p0`NUC6J{e( z{(KJw(3R2+D$qS&_0bksS2-w3k{xYBj%Fi@L?0+)4?f7Ptbl~$i@w&amFkRt@teuT zX|+b~w66qE3w6YmT=hcO&4_sZJ?TFX7$-?FdC)9_u9#V4e_M;d&%med{nX~N9Qn^m zkJ+&CBNzR5MFdyT`-F9^No+^VX~7{4voAH8pJ9j?eyspq$Qt1L)f21Hv4|y1&DF1J zztfx&a&zNPNfr691wyRa{$ad_(KZTsh}h2UYswz;^`Oa$>MS)y#dARgRG*y$HK#Lk zn4c0Gqe(rOh@7zt3f&7%BgP)%LG|xRZ0O`Aw=8mE1-26-fE5}g2|&O9{6^l=#Duvo zZ_jzbLw28`McSk!#dmy2lBgH95}9ELT!xtcZ57wfXN^AH;VG!7uiD2i)uED0 z+okZit0FMCvuY5Vp+1QS1m}CsG@1OqN{>dwQ^-E&;}bDXFb1AnP?j<11p95Qwl+Rl zf<~U)-<1zvb7|-$8n2rJ!5)z72@`!wKaTa>kw&}Q;7>wh5I>adT#uFu-n*Y_7;9SW zepJY{m^i6vFxcFU*zGvNbB}hypiba#8?>=Uw&g+ zBE#((u5TZn>^tE**Y_7p?044pyP2!!xRsih7vD`+qYLv9&aVR?bm9{F@{~Ma# zpZ#SuaklFz#YJkkvWH?lfr`YF|3|pc0UR1X^xXT-PegiYChoHQ>yZid=$DczzW?MV zb?4E^;JzQq;BgaW@CrF_eh#rpVIU8}kWSF=IWPa`Y0VFXPh`?t@Qqi(8T&6kYZB&v zEQtKDY8V!-PZS#7noV$iGz`Y6jwHmVL!8*e;xvmqu?C&cEZIyXVH(O%$jAN%(Cpt@ z%DNlydwOcThTHKS67Emayy2M+s)8R_=Pvhz=8SyaA}WzVV#l7Ma*ZAlcrPbQECSO_@uhN2=51Hp|AK?9n16hxnk=vAYi*iH-A@Mx&+2^rW z&FQ$+7K5<WAYmu%kdXFIu^NNNg2E25>GV#52w*iwVJI~I`jR) zMiaGuFiu!o6~2_xRCc=r->rWK`GUVe@uMpmy6x(h4${)|A&13mY_`@sdH}&4fgkKm z$s^?stqRcI!o`msJs|YE^A^}a|8xCdhLjt*U+AXfrSnRRO2EBXYsa0idipi84(n%k zQg+E`X12Jtp`4C=^yA)&4zey8LOz`xaEPKodt`;d3l#xHH>so|!v_)#A&>Qk?&+cs z4&g!t<@Tn?ebPtZUPMXT@p>SJDEdCFpHrtq<8>TL?P1kr&@ncNrw@D7%e!ic(x~0> zO8x~p>u)oH#2>4p{g1RRN%s0Ov@XP%>niQgkii1CqxK}8S3fQae^|mfjd2O5%UDJI z#wR@w@euckp`6+Fq(`c|NV8lqO;W|d#sen}UpqzlN^cV3`>KmVP?aM7_2Oa2Q_Q%Q z(IB`9uZ`UX19;$@a``4By$V$b)TJYQx>lApUb1riiG1ggkI2RH%255)PQPIJLY9~X z@ErhzZ-iFz$ac<4qqIGT$hGq5exWuMF#lq8U-YFx4cdSrK+Z_IsXo1(*6-R*jXWd9 z>-4cj%JJiru++@gCAqtekH8EmpI5V=nRF1UP0_3)i$tpsXi@m@j>no!U&;nh`9w`mQ#11@mUm$tp5t6@g#}JL zA?81s84p{J%WjukNGTCgC-UaC>$W4B${+iflWDdBm4%3N=~p6F>>^c?Xg3>xo+Blt ziM)Ev6r~BnK8Ds~gH~6=AG|+U^ZX{(5)Lj_g<4Itv^E`TW@UVf+hi$&|#+ zajqM>OI*qc+>;Y9jy3Te-gm^6B`1m0kNDzXG;jK@)c{Pk1A|;2`v>(B!cdwbCIXj*jZ>c5%Ss_Rt=gcz$Jw1C0wF@; ze5$j))U_F6&{*uF1G5TG$K4U;y)WeT$n{{?k1j@r zeius@u?tnm?=|ve(CrBHhk{j7KpE)y5gvV{y~rzFAmPr0-d%|F<@L=vt$-<|D2bY*D z*7-o|!FRTA4hf5JpiW}Xk2-2Ho#|W&44%jy>^t*vhnEcG*8Sz1j*(Cq97&tb#Bu`eXQWv|+R;66FJraoDB-GG_OBdZM=XzBu~E!-xIg zgjX6Z97TKUe+*2k`Li1tgxrL;r!@GK0mWBOPZTKLB!U|Z@5TF5V^rj5r z;5$nQ5-kFU#Gw&c#L65=csh>bxN1<9QN0#Z>s$*7{qVF~I^;O!oc>+hBh%~&zjxha z#=tlS8wl`tPUhSC(m(;dCA!L_D{v00SGhEtW47FIoIF7JrijDM(U76xH@+y14?qGe zj+o*1tm91RG5%wHHw!5Jf#D2C%=)681BKu+;-!Izp>IkP`JFR?bgw(Ll(b9N%8mcKW&OWO=w>zuaF*WEc(FW*F_)_%wGdxMBM?L<=>Y z`bwH+wh7A^+syES^&XdQyZA1~FNzgIsXKg4#0*m>F?TfZ$5N;xclzT?8#>|7vXL$$ z>MYlk*=C10oV*5=dTdwff}tY~)s>IUQgO(}j%InO`5Z#uC5i}?G| zy$IXzK-Yk`%)EQMv1Rmu#pbLsmBvv=!!uz(LoCj>kRSQBDYRXKCcf&Zm*&g3be#n| zegQ1V_WO~VI+sNY?dTZn(c)#7iS%d3Y8m}c?@8on!-0$r-S5oy zL|xksX#*Vt#;?pk;ILb2hifw_ss_`{lC5eb4S}}xK5c>94+lsq;OQHtcO2`Qt`Cs; zqPsqy>&=|}B2*!7?~QTX-rct)4Err@PY>0{So3l2>_2Ua;ZDqg#8<4z(JSjn#ge;b zz__4JYWvQ$$5ly91Jcolf;>T-9qR{+i9Vg_qGVMf$1+)(->qJ`;}wt0D*N)aasIr9 zQVs9IeKaM(xqfCOPs0R#RGYht?4Ra0`boZ7e@^nz=!-BSSjhb}%-UGLcq71QbNU=et6T}9yA`>LqYA0xq9G=+p;^MN+!sGe%c zH!!bpI1Z^TqM*;@G-zN{Ii?2jx=n z?d;~)IR*|(qDLW(*LsH%upNPuE~a<2TzN_JU&^=?de+T~w69WF66+Q@y}WfwAg^ceIOWHb_f`tHasE$?!+^nmDis!2Rn01s3zk$1@K z^InyjXb&|qsrR~wV1J~US^js$yD$KuO*jCP_?$S-#MX+UH(7MQps1na^_rMdSA1|f zM>ox2$@{SCGkichsoFOujT3a2<<9Tu4lU%k1^(Uazv{0>JP$IYFQnJ-YL1Kx^N-&$ zkr{*ltCQvg@i98*R1(_-9x^VVxnKN+wN$^d5|Kv5DBpE)RExSR^>FT0s@X+v8HSj@ zRAj!7#QuwHsN&6E4#8r@ciopxY3+ii2rpD@=xLgdkCa`F5D8m4>Lss{BJOBZnRkyCeuoWMdPwX%LS%_lhoAkQRBlm)fQ;-;FAw%?Y zv^hRSvMoGgArvEwhlDvc_(S2@8t23#lnOJzf5dO%*yGimfg&g_G{rZd2h_WsK&u$q z*Xu>(&Ubr^eh-|`*-jqbBs1R0#1U+L9>o1o#dG6XDlO)>^n^PFpi-E3Y;6`ZMC9#^ z(KM@TgBAM+LZ9e?MmuQ7HD{vCV#6zV4`6ppXbK}1=VXkRc3u7pT%i|MssSAKWle3W z9s6KqQoTqg&zrRTA2XH2kx-YK?xB(PvOc51#0?D~uqP28*dx4bt{b)D!krizbcp-F@GtH#A^U3~qZ2z9LYey7To5 zXUt8yjf9vZl$kh=?lbOTM%axqh)GWqg>_x4tqu53bC7U4>gHP9YYd5I`}MrYbb~lP zB~1F9o+RgYB4I~l{~Gr!<8YEaWVi~ZF2F&sdHBG2B-$#NrH&}W4TCDDR{bpg)j=-Z zBKP%>&jlsnS6cvsE(WaAB)9kPcJ`u9FOy0FGC3NF|%aQI*IB6 z5J&6IBuAXgOXVf^*luV1>aF*3=*XKY7To>vr!cY+Z z_H+~obzJObzs4Ae8u)Dc&>{O|v;F>>%<1Y05UUQxj&It4(%58OVUmUH-Ab5GqhDv>NuvfS zZ3wLd_@HI`%42Wt<#8m)PT^ZgU5>a2xjySATTHu>!K`}0zL$pU6c@71VRJS|V4_VXIv6w z&rXFtmHQPIa34^q)Lt!;KXdvyIQgH;za|a$SeD|+Xoj2m7d8hD^0@sKjxZL{9OoFv zdbfFrVZYlzBH zIM5P0)x(8v(LS!8qw*Ks0Q_GSSB>jynP= z(PGHJ_v)uyMUEra(z z!Wd1IZ!%nGlC}Pmk*45-M*<-C*~d3<^T1yyQ`z%x*Sh9#nsqHctL4;8`DMk2COY;6 zeSfYgNX|~nn?pf~U@8RE~&B6GYP&NY7F%AFI!G{1~+Q zA(xj0dZdyJ@r$M^d^~YyNiIET$k*p4)m&^yqSXFEp=_>bHX zq+V{(|7ZC{`j4Y`*e_Wl9YE_uUj{F~*k4~?Av1)h*RrC32=(wG_b7<#_ECznG2)w+ z03lAoxg-H3C|zg+%+RPf;~!4e-Iq;q{34+E8!Fi3C5wT`EBt*JK9*H%TMtXHx`b2> zr}|h=S$jLk+A^AeWyIrFl##Jy*v)1zSdsBoGBOl7L2Fgi`%TcKRvJ}Sb@ixZh>Kpf z9rWCBRPb`{RFR@CZ#sswg?)olx^(bmbOP-63I(RLM=qg{ zUbyJq_^io7=SWS||CI5BeYNjFBJrVbifbJx;Xc=i*Iq=zu!Y)p;tY&tzSKv!P)%vv zJ=ynJtmJTVl#=k+QZB}>#@B5+2%_U&Jm8RP=$3<6N1xZ=<{yU8+k7Ek5_&Z|m|?O1 z?!ZSctXB9(#oUo<4&7!(2$gBbL4E^AW0K)}bG(HNAHIcp0EXRS!^ek^W0_jjHQ8no zL$p1YzRPzn6-k-bn+TdMTbVakU6)%MlAof=D7bY30KU1nE2|HNpN^&ZsatXU+N#U} zr*Ampg}I~edQ8{>Wz5hHf_VFADdKoF==vZ(^|93tmNznL}o z?SU1XpbCppn=h76r+N=vHH_>Ef<1KHSK}rE=&KeoEDwXlGKDI3vV$2f>1UE+d7Ap0 zk*=2c@!DEmPRs_2Yn8_ew?1Bk`PTl)abUMO(h>bW&NK3e!qn4Ls+D=X z`aIs(6PZ^l@@VNd_(E-VOfP7>ZO&TiUlA*uTkr}7iknn~j5Vi2f9xue=L99MokMW4 zuWpROTr8b`3DTgF!D|)A@)!Qs75Rw_CGVG=E_>DDCQfMEs&wtX>}jt%fT~Vib~bet z-YQK)yppkauIq6Ndw`v7MO3jqTy%Z!zE#cE@rPmf72UDKd}NiQa-^bWan-o0xaiO) zyNr;E4G4P}ZRYO@?cn(_o4)_UCjOt$P}@A|Iz4Q>d#bsjb5zCId3usEZ8|3Y5Ok^g z@i)iwQ>3h~FDR*UKEFisRrx?|uZ%@u$ZV~RL-`Ud zJ_by3Y#DHyOi+6IQOTi;n6U}YRXvF>iK@eBUD^(giL-Wi(S?c5 zWrs%6ol2w6N$s(6v^WQ~SYH1%&nM%hKqWLRe=t37O=C;{W8hc$&j`S`t;BvWDx`C8 z?7c1ww5SbyLM^d>91W~4Q%Ngu!rbDhM)!ku$jE{LQ4{FEJvdioEARYb-3*I1Qn-^) z@P?3W(v+N-SInV(X{qeo@c35syYwxi`EV1qU73tObzuE! zf-he$(b|I)y~!69i#W#&V>T)HBs(PXkql5_8SR4phRnP6P^NKd1`w5!<;aO3tyvdUM`L_yz$Sc4wjZK`N>7S`?Fv4D}60FB@m^Z1J6D zx>EDA$?d$H^iiE0--2kPXL7RB5^HMJazLyWqn`c3K5_9zei|x<4;D1$OWQa>=k;x0 zoT4j{{G{SI*2Cs^$*@Q-E%T7pC3bL!%n!X~a=ykFl`XDTjU6^V&BCr0IX>K<6kT^} zAF1B{1nm^`SI6y#LnKA&_V`e_k6gpW?nL!Nr*=Mm&6mY?_%_SW>^LOmKrkFPx=+8k zP=`)gx0~kSbS3tL^d?eqykxhO-&Ecp8#Ugac6IgD*#oa=N&xM#_<+sj71EKnYfAf- zULz`fP26p3>&wbEhf%!oTa@lWwpqoVyiUbzhvUV7-kzk9(AiKYUFW^8^V1$8VdKlz zA$qoCmXGd{MjAH5Ap62)ib83N^y4{EjhCtx@jzH2u6Cm-H8Qlm>*o_O>lGn0@9KXs zw|vh>p%8O*Tvg?h5@f?Hv^QdZaHemt^b*`|3!a||HLI;1I0$_MOaZ4;eIIXOm8W`K z@?WfU^dF)z4Y)L7x$8r=1zJ%LaP`{Zu8V*x#%Btv;9qQ$@<%cpk;!Q`;7zhB>1zW@QJ7!o+pR7=3zJ zY{P|AQjIn6cp+f3cAc=no#LQ+Jo`WECx_-9hdCtu3JYa(*tZ&it zWqa|Y?ls&jv~G8y|YFszRx{;ET#V%Y1PQ4Tg)xP z+Pqdl+?-n#>fs~ehmWF}B9~7%3qbu&D`2N0Jw{B~n%tWf0Gux~@VH{F`Mi7h%TC#f zIDHz;1MN6Sk~jQ^$55a4JtX*t!0+uX&=G2#pCx;3u?uS2iqY33zGTeO0yNsUtt2X7 zlS#oOlY=^gU(!5>22f3b0WOxI&^5rxtZV$DfzwLS!zy9bDrU7>pqA( zAPv3EbhR5rxj}|&6onOY*IW}TGbw3p3+Kxhwzi7iOZ+cvaD(kdsfkI9wH530E-XY zOYgQDSjEb>!C1KkGo|w1XnY0v)-(y*NJ-|!$t>6{_2Ar65mN28(y&8q%V)0-|CL9j z26hut^Y)Kph!Ltq!W!e@VtltAcHvlyZaU%7jAW)M9_NwqOGisIv=9F@FRVxnDkN?F zSyCc|7jqz8m#Q%%28qq%Wp^w)2Q*_|`&bjxV7_IQ;}y2QR{_+vHhLk?wU%c6yw2(F zVXv{Pty0nIFnoc&8`m1K_v!<4wi?Ti(dHFBY1d_*unO)Tg!5E3usAZi?xQvz)ZhJ* zwI+UJvFiV@_mx3$ZdtntfnW&)cSwN7CAho0duW2YJBv@*#wLyDUvhLl~u;%rt!8FtHb zTgUyVQ|Pra$QNrhSkbs|7}rHxlTb!wXb{`$wCnPKC?=qp)7*&GhD5aV(_f6+FXCS= zn(-rOJS96zk$8d&;G$o^_r1SWrMNMkXED_3tTj?T(x1&s;T3Ct@666Ft#XiYM>}jM zAXLG}w^}iq{*8UVFh&u@rmKq**s};3X^TIEmD-&v#}Qw3;#&}<6QcwPM=N5B7O6+sL!eUR*1y)e00A7vvfubvzx&6}VFpIkI`jOeV{KWHXKub)TB{ z9~*@@&7@?*r9>@P-Mcy!VyPbMQ@?z-+@;RvloYZLB7MS&RU{0b^^CPd>M;$GwY(jg z+aB`TG_;-cdgA(w-)0>Vn9yScCt|BT}h&F8x*}*TBS66>aOw3CWfOFD;`LOr&x`;_~ga$NyhvT zy;DCdq^c$VT_s?+%a{PZ=44!?<|GCk%|O;=XK=!lB@@kedipgMmg`kLhvC*5!|0$B zHM1?a6qg13iP>S7BqQEycnLfZSQctr%g``{{)O>-;)?)Jo)vGioRAL_H#v1=kT{Oh zsRyjNeRW<@s)DT`);Y&lI`S35#sbvbO#&P8R7tuGayz z5`1%y*TTG4HAMO*fYyx#)rg0Ah6rSOP!VIddHy_Pz4Y7FQi+{YigPcqMSZbMUFY6_0~S8$1e|X^4np%G(x&H1k?2qf^Ze}Y-{}%Nx zT!|0mN^vrk`cWvNX+LGLy`+R9+8PSO!7n1Npu-1VOzdK<19DcIc}%H_kO*`QEkz^Q zdy470-0n1EvyH%hWv$UVinUS5DN!Ol zn~8nfsproXBgZ1*qBF1{j{1d_T~SkNU&p^g(>8qQkf0M5Z9))#a?x`mWj3ze?w?Op zSW~lQOeDUJ$QC3?5R0FqR7*wrDZp23;0Z|DW)b?vLHJ>|>~JC7RdI-q4^hIwQL%D6 z8L;a(RN%!~(ua6g$-1eZ5m789HEo<0dfZF1a}hUjJltKRB*R*?L|VsLsy29^g@5th zS{~gZzscv;V0bAQu2G>E67*wHmH6I`{Fev`nr;T=Ujj_8f7>K4RK*ye>r5Jzg%>IW zwMh(an}!`q!x5TlhhF^pD7NcN8_JI>HJ&*}L+Ov$c&ItAi9RWj zv|EXNe<@>)XM-+YBHQ>mb+n(xot<$p`I^+1SGknf8D)Yn^90x%Q5`lRx`x$|{GL%@ z4JM@O7TRn-1&e3)XUhcBt*M%&ROg&JJYO#p9kCB%B8^gwa1Mkb&iG?1FjNDViXA@8MDwb|=`DM{2bFGIFs&INhR9ne^g+EZ!E}LxjHN@{op^60R~VZ=@ML1G)eK zCor)8d>PWed|qRLNt?Y2?xu3P>sF{^ehPaoqw5_;iX<{s!SCO2kWh^3?z3R@ko}f4iZm z8Y-vf!I@^pg3#4^6GArs=3fJpf1c>@4fycGUDxM(ieHkzgDCAQVW{c~5)PXDIq<)n zdmw)U-Lm)H)0nLf>y+Af?_daVUu{UT`~Ryh{yDSv`oGnZq@rML8&zO@=*zpcAGq%% zYJP8r|LG6v*IYC|*|EVf`rL-9zYc(kJ~EhJ?|i8X`QO0nz3*oXT!&Lv@mCy9-duBUuiOuy)UxEux~c~Y z{Q%ahOH5F<&Hj5Yfd3h4zx;kY7gh1^1fc0R+fSs2BpE*UzmUL7bOd%^nZ_tq`~if@ zk|R?jpxUB99m8knZ@~>`TcP|fthR>u`&Hrp0A1)e{v!`&L22Mmic&m9`i~SP2qn!w z*&}NW>p!yxl+XU;TER}*|ID>es`)1x^wapyGzcZ;e_|-O%Kywzzd7?yK1TxnXFmUJ zVf;z8n5{7Xk!pVnB!uE26b%0>kiS`@|J9KH1?0Nk{r>=2Su}`1M(;cP zm3hdAcHRfJFnMBB1pU9+<$hA{i?yB87T2sUtsS{+_-~p&v&WYIC69{!-j_!Di=PIT za|DgP zZVwwPIIiI?-V!@nj4^PHHCsdyo&F<~B<$4iNs4Oxpm&8OOlH*b-+B(lqR`Rf`O@te zP`M?re8yIQ;r1QEZUx@1THb7FmV zrQ&uclp3tT#7$y)BUx#{BKf`=LGcvbC-VVKtR?CLRGei^$0woFFXy!XjeA!F3xzhC zmmbt5(1awC*|0E7K0tBaZ+7fSpTR331hg7IV>&DW4XxBef0X%A-~M*%ezqjh-|X$b z(F548FQDYenecZXG3y+zQs}i5Sr_el+278kZx88bM7~glB^^S3ymJo&=NAqCZX?k6 zzJRNg{sIi6p>|7v!LR))XpX4byjCCJr(y2Z?+F#Cn!n)qC~x;yQJiCX6wKBV@s2#K zzsk9j;c{c@=tppLHq~02bEiL7YX!52VrG@Z8z}V7dqeb;M#u}EHt{O_mKM6 zgQA&KD3Y$fm-z$Kq+wpRfv!jp2YL-j)%agWK&}`A6`C6P(ZsA+XX)kPw>Vg7*{lh; z%8q0|jjJtgKtnp-Z}&Hg=3P}l#vilET!Dt$X)a|J{_69-v-*RE^#0-J|HCd&G>4_j z4)#lG%KPwtGa>C^gvu{Y0I2GtCqgARd4(OU5X{^~tKad)it{h>6%0hnT2s?6QYa=x z0u}~DCrf1+!P4lR-Y4kWU0r{gs4~hhw15h^L>Ribxa(NZo(R`tB^&q)|1J>W3;4rU z`?s?B;+GV&?j**i>BrdkF zpOfr_&IkG5&8PSGeC*2<@+qM65sr43$_jvmirJ$Vz11S=rvt@r)%GnB$}*c}>i(K{ z9m*G_MaYh_6C0$KYdV4S7iIE&kMSG+Pn7>|Q~$$a^_3=r?t9Eu*>z~}F5g*9UG^{W zjIV-!X!rb|+RfnB_E+&laxnehiAVQTTNHZanD}p!`MvG`CK;j5(vmBs1(ClD>oW-4!~0!}%bkSARjD#>*;t;Ntd+rbf5(oX9u8&zksP|PYhsV4rb40`_9SM7y= z*ZCr+&Vy&QjJ1S`jn_*v_vm?3*1jgw7P!Rp57qC2!)vN;P7B>!K(A-ud&ahBBCs*4 zD!LgJCrj4)`P|&DmNhNBo_6$x{}iPf!qnh`-^B$_=o<@x#LQg9GZXoEL10 z9a>(H_RqS%K+_b8UxU*QNwYkhS*^4=ymg|D^taEu2g))6ykCt9cAuj9-G+yxGovm( zJHN9;RTGIW)qbC!f0s({(mo^;(?N+*Sve|Eu3pcyJA|EJDoJfIDiH7Qw<$Ox?yOX5 za-|&q?hBoSLyY5Po?uaAP-Ka>5AusMgVZWhFi7)pPpIz51{_tX;IxAS#KLG8**?=Hej=nlGV)t(~2Ij@m`#zHa1Uj@_t zQ0shs^DCrus$+k+W$|oKx+Vs<*qryN^PAQC=Z(DNFN1G<3k&(G-5pR1JUMQ>xU4kkLPz6&3bYNWEth6RIXFVUvKp;MpQt~U6pN$}h zN=a(6j6&8LxOdmXt6AyCi5@U;oMc1PeyY@huwL?aV0+wHMZ`A%I{+7_`+0o0&ONN}$?EBY()Icu-d&v78+kXA5OnZ_ zYMV}porL4#WN!WxD*F@{{g5|lFSR;jMuqmP@RDXOdi};|)0sL???znnK9w2$ApT=B zGb+t3--cQtKK?8p(OaD<@szEu)QmFw@n6tbRzZ%L21|R8Wx5@b>&8e-n~DHh9PFXbyYPM z#rN%%bj)Rq22dI*(Ok(dkA%|WAQK5vTrSnzNl6w+Hlb_NoITD^Kqt z`jVEEHplU0+;30W?k)pwJfD6hRA4@6D*Yxm%)N=}+{OaMaBcd86iu=(K1MKyZIVezLmx(zSe^fw=)O`zHmBot=+}G-j&$C1 zjL)?;w*L0%*7|gg7dLYaC*&55CH~<&EpX+$Mh`Zq$L};gL^Ui}NmHnbTgM;(T|?Fq zJ|MmhUAvOTLj*RYD_OELPL&328)}DVnO%g%@9Fgdc`P3`iITwq#aJF1paL@gzNP{aHtsf&bJw8?!a- zm<*LN8urfUH)9Ov6Dg>jD)X$-6Rf`S(KL3+LUUZ{d~D-Q{5bUVjt{gF=`UUbt`^O> zKmJkJPJ5&S&1X&g>{qKM3f-oUa8uamRl3YLVS*%h8igvddphalvC;JDjY zHR@qe7#o=DTg0@KH8@d{i{*Oti5;YlXpg$dLPMjnRRy(`8{&$!;x#CVZ=+`ZgLI*G z_Vjk|%h!WQ$y7EDI5V!tn-Q3t)$oj-To?-8jVy=b>o~H#e5Otgd9(w1{>PRD!IP)& zMY9I^x~A~f6B_m_dX~FO^G|O>XE(wIbbIBd-1?d-|QCQ0k0vk%cSH9&AuoL&SsnOTXDNCiF9x z7~)OXo@LsS$anBvxe-4Cfr_1A&SuTMbnwZkaI5Pol5hImFuA_A7la8RRm1 zuY?#3kbWaT+`H19xze0-g4^M z91LUCXvr2m`dxQOk_r5k1>Rw`S9h_EBAzFdo@4DiXeUc9QV}zCO2>^985Dio)1s%) z&q8edR%6!aMM3&9{PIDgm>2EPQ9tPxwu!#ZD5cx_MW7~$JR!HDp9W53@!`7j;!w+% z+LNxfaz@eAl*sIc8R~cklpUYId_8{6*0;y!x|zY5Ak^0!EkEcuddd}ZE>O07!ml8! zy>*p*Ij8$?f3OhUhxiQ7`YtsFWPUv4#Cp|ZJ0jnxiKVYGiqG4$S3Kad?~=xZKbAKC zipXM+JfaSnviC#`a@N@x_wlGIq-2>*N$tp-g15CbAIF>sRUtX7^QzoAujX_pNTRCz z=*aE+5x`7Og(6rrRe`v-;*iZv@_@E`EHglVggfxj<|EfRUV(L*mZwh74oe(-7HAo} z2yG)YPK;CYTjdQ$ z0D;R`Wo5Bh8C8~f9)yyE=y%4pk9P^vj496HDGhIH4#~Dqt`s&p9_VWy#xPkV2zlHIdjzhsu=ww+K8SF=j!PLi zShP-p*p9v|ZfTxcR^8{PX14`nrEb3_E3;t7T1_Kau;U(6N!{hzK zWlv*hw%$n4%+(u%wX6_V9MrprV|XXHkKNE$T5#K_M0CLeq9T|bW`~HFYQ>)Eb;Rgp zbPdOK?3ZS48{Fv6m&zs6ZA!H`;!9Mo@=?6F1EGAbJ*1tj+TJ_oIb<~lISvw(FISHx zZFwWkK32U4F}fVzj^gGL)JX{i*XFD)A~|dWb>4$0)G8ToXirWO62v=JgT|&~Ky|ZK z)N;do-;pz)6%5)VLH}BFzkaL z7i)FIh0-&V0~uY0fFNaC`uWNVwbW*_JuaXHT$(b;IK+c(k0f%urnrxX2YIn^n@pla zWi!h38Cv|zTyV7Fjhh^-MnJA0vFC^%;MvsZb5@Q&w>6!G&G6NjFI}cBC8@0AE_kc` zx_<7x9>AHz#?2(13Uu29@u5Brby!__Og3&g#F_g}u}}X%6`BhLw#bcEbG23LBE>%*ut$1;E4Dxf!kqe5EWyi zCnK;z(?PQ3D(QVSIeurL71g8-eH>r@!gU(;yoKhnMKrR*l!Mi0b8j#HWaIV`^Sau* zV-~E`UwGb9+v6qv`~>wk|hE!Ey*SB8l8onC`pf5`*cmogDvM$?=W9mdCfi?Vy}5)f>xsI zD7%ggOUKiJ6(fOZX-E?!L-vRR#Cjy~vxsQ#U=M%X(zN!ju<9!Ng#% zp;{6=RnY{;82SxzvrhhLnsb4fAyBgeqO{bYf+Jjao70Bzg4anXxCme=4N(H|pME9$OGc#?@J`n(qGGH$`|> z=?L(fts@R+%8D{hiofY5cCt0NAqf_vblGT7yZL+z2)9?Wri~^GlzlqtRC#Vf9@~{+ z42)fxv8S-Hq&6s{4S!;~*|k_|wkePaI10^gI}xhcMqaOnR$`af%~z@1FMKYoWW$Th zPk%k{ekna=zmCo8ScPcA(`|BaTcNqP&2wWhOAte9IVRy1y7q9Aj=GjnNw3dkF~r>R zM-*c!k3&)2562Fn7INMD@blZnZB0F{TjK3g50Llgm4>Fl)C}gu3J9Jr6^~?C01i~W z+qt?qzKs@*`hvc)yK9GXbAzRhpFHO%#D=U&%E9%!DOzGg*e3>aoj!y&$<*8R@m|QQ zP4iDtmeU@@G&&Q#i2&k;ybyXHmwD2_EgRja)#qdWvYGw`#q|*nxgR-r9J%G!z4UYU zq|Ga=GRjU}hj-moc86Z#I)v9wkMxNnME+Y;7=clB*{Fbq&A+*sF_0U^%C6n0^%%vT z{jP5<_Y*7!tv5uIoSt5X)3 z>%kFIMDJx6I$UM5f^zvZ%-i9)e;4$%h<|>G{)4n=yi^TieE*|ne&ZsRjFnRHQ4&45 z*44M!!;5iPip7l4nwte}%M&)aQ+xnFJ^(fNiv8N9_CQr4j=_@ZDg0VzELq(v%sM(@ zhf{rYF7&Boc+6Zx+h8JrVO%+JJdKwOr&h%AKwbWdzqoPE*)UHRa1h>m_a2qjpgPT6J%-xB{V{w9$6WBaB${sEubx5AbV9PQ_^@3jEsCr3fjcO(23 z+bSn}v+P3qY`r%cd~NohCLsAnNUNwSK^rkIo7_JCI=8DH@lN%fHslEu;6X*IuChht z)n8f++Bl+*(|_KtRVMDrO}{sYxyNl^#WooKN$jS&ETC+pH`ZI>#TRZa((@>`h+4KI zBJ#s7K86MH2s~_&KLq{M*E_y1+!jjRiMU)}CDXPU6JdKAi#hYr*t$hUGo6jH z4Id@;Yt|+$uk$iee{LSQTceuzWGGkSX*wyv+Bf9Bn)PfN^GIoEB-8Xfzslz2sq0}a zH|#?ukTYg7;0@|-ChTv8NBJz=<5hw`GxV6_C^IV%k9yR9$Tc5jCZDRyR+i>~%`lT} z_SnIC)03uPyyPX{-sl4OrF4-p=XPYid;h8Z>`pa_HG=SB#G-3w`>E+XmSh~{zV(A` zx2t&odc~l~d({rD_6tJ)$S1N664Kk|MRTti&V?HSyi^V_pQmqgaMA{U${k<9*l^^o zWr=kE2|>9HnDCL0SKMN6f+23(Xcot8v$cuv0#2`g!4(>I8YxjcCNXz(U|POUFz-87s7d;bCu0x1o?Mr%w!n?7x;Q1ZTk?B)lvHm+1@Cm44L?Rb3^d~s zE!iR13J;^)tSgVhi95C@c8Z zwjp=<7SRr_B{BCP3vx+lM&XXK@7@#SY~T-|+>253=nL`evQ0m&J>N55?k}_m7C9YS z_CrgGoDlt9usc!D#cr)yo0P3F3!g7HYu>PJFn9}e`<##-Wrudy=*!uMN#-_`_5p#Wko|6B#m^^(q z*!CPJd%N_(Pa}vWLG;}qqJ$Uo-Nl^y?$SW|&pY>cw9~9dFW`22i3I^QVri1 zi?@}GMKPGqIly^SFo?F*K&is)%z&24pA^Zjb!Q|Ky)5XgYHDjGyb z@hTgf?X1Ec2{?TYh|x4J>Uv^=9~m%P`sQ5{S{WCt)-=^8=v}6T66e%qbP8xE@U_q{ zMjLQv#xeaG!8g^V2aq`}X6qSd7aKizOUeanhEN4%2&>TjXVAk$SR^mq$fL3};wmbc zOvL@qkOZe4fs^hZkl|H?=+<^@kupeC!dmL~?V#WVu^F;*pyqihvD@cda)(v_w&PlL ztr^geZMDZ4?l)D@DSVoo#OB9KpEm@BH;p6@L(5-2Bt`Z%DDpQJ)gLpGeZ1F7Q0l3Q zBn=Yd&C^%I^|--*ChN18#5CuTz%+NiK}$-Ail!B->(KOOE>3z%XB+D|n_cEdgS&pz zT3l6Zr;Rh7S9r|8WdgSJ1FauiHPLuG3`?_Dw ze6v+ISVc}tXzsXqZ03j$=GubSyLHB_R9FmO2X#^MqR4j-!7pwaG#2f~D3eHBG6|$) zl09s+M@zF_+Jy#r>y}z7=1!h^G*BFCci?Ogk>%bk6m_@u*N01-HY*(0lm6I0QWbMs z7FhCD9A9WLZQu~CjD7ZYxF0%?B-IE#|2srTj+`M*bgK76l6_7GJ@0( zu<07X6kk7#b8D;|M6|}_ud|ap$ezC%jARtO@rbOC&3_BZ>{+wU-ZKJF>{(xBb39nY2{MC$M@|D@HoX>5h$QY>;H-8J@=yvr&Uu^I#M}h11)& zWSC4PX%S&kgZT|o@12C;uNN55Y33!72pLEKys%U)4NsX~0t}b}+}v-c$nx!J%7Yu% z)D+_byC(N@+Jn>8wP@rj@6Y*I5I}pQ>~YoY-GFO>(?}Sk>Wfo(!iVbDu?LjU z+lREbOI(A;^2b_fyeLxE)ZWWlsF!~5(!C#?nf^vfx?&d%06T2AIB>3be zw=Hmn>%aaCdO|JQn?{vC#=OQLg*9R^oqytcx?;-xfrNGpp1EYEAK6e$^h;v4)Ai7c z02(2w^GUqv^%V;j!B95bLOfjM&iDa*w9|{wF(?QW6`=ZAY<|SV7tkO9` zI@(2MIm)D%k_A&dfNm3Gd|FTog8Q35a{}GAs%~NgxaTmLYrQaa8RkdxJo;m8H*0;S zsNo2~Boh5}tYt9D=au2IQ;f4*qgam$HmReO(>=O%&&t*Dg-^s(koU|_53i09LMhHf z-zV3T^#9s%4ZO~eMNBR}8rNE0C=I30PHIUv_rfQ2R+KTk`{AioVbyg=Hh_@tWo*Fn z21!o!AkO$YAe#brfP8C$M<+|KKS=!8^UA4SbZ~^7e=yeBaa4?_5@K4!uqkJlqwsmp zxzveqs0|ot$kplALt$#tPnlY%5~8d`ju+S7Pb7UC zhuGsq+QY_6?WZ+|gU^%fwwO4BUM`=eNV_T4YLVg1?ZCen?Na$oVaFEJmjOWjbI;1E zV^?hMYFGA3N{P0S`hq#@&bXxWpUNBX-nl@!hGnSLd zJYrG84nC!c82QKpXq>-OijbD1qaIo#3;ZMLWkYegY&SUF}Cz5AACS>ytzAt-2 z{O*yl_Y)Es`gGkz$0B>^0E_ve3Q%<1e%J+;wNQOhKUopyK~ z90W;e5Rq-q_Z?b5M4_^zfcl%dcr%9SCM?>>juoG@0&W3w!YiiLqaC-C+(jHI)k%qh zj){ERypr~8pagk|eV^}XH_^2w-K;@mOiHA`tq%9>xTaZc`lXOU>*?4s%QZF!@L zH=IH9lE6fSpV*tYqUO8Y#cz)v90izvnHUQ1HmX@<{yJa#avtN6uyqr6;jO3M!Lxb5 z;?hF2YHu!7?CpwYy+b1KM~cN(>F;$7zf&wtq;QO9^CT^dU8k?*&&azH1j0gMI!G34 zp8er!P-pK$U|&@|lvG!Q^kV~NsP^6tdjp6vK;hTY>^1P%@ zS#vtvlKWADCjgk2(s|lX}|k^++jN(D)wW3?VTOc*^G~E&z4s3Vc|+!6D(WN2JsrK-~`iO$>`)vhJ7QYInNqq3*7ejDVEt=TM*&@;K7 z`$dwFHJj3uCEe#E!lnB_>W?1t#Tmq)1LQr>YRbl^fR6k(=@Rzfkl!(^CuH`Xi|avYshA8}gL{$K08Yt;1+7 zH%o}B>QCq3f(_fl+^!k?48{R`!NO zY3uRdehB*H-bfuJ7HXoM&JVLGe6YX#j5_F3vBikmDU&)S0%p>D9JNhfeO2u=Tl~caj)P^Fmi~0*>(pXd0&1sjg*l zTHxfoJ7Ag?a=xQ_^z&%3){~|p<1%H+i7V0iF8mOtsLjDue_r~k$5YO!X+>J?7#FR$`6cnv;!zi(Vj^I|y<*n3Le>ZJWGvhmOI6ho0sWG9IFUItxJ4dXWr zFx;7XsFo^YcJUE$@J#Lxs4+03*(Xf5ou3@>0Lfp6Vi8BQqtfn;pdH2@>UfpPDh@FC zj|(?-XVc@jqPocVMN=bc(HCZ?mAxa4sXKnn05+F_*8^NkyL@-5Ay|_xhNr}XGzxAc zaa7EY2_2iD2g}mKHgZUeu480~#l2XiVPxovNt=zUYBY_w&%!*&U|21g_msu!;iaSS zum|&U0=aV1$E=(4QA+9MzVSgAqMTftYvM>a+^@Kljlz-Vzr@ObkE zt;Hz0ccX_v%`)EGqT!HpW@Hfn)=Rfny_jA1i_PxvMN!@bv27ZnjNf)LGp8U3Rl^J;NQDRL0P8 z6R!uH^@Zw=Or=}5#)}ze6dJS2?E>X*xT@<5_hE&9MHAl5yd>1xfq|Jsi3}X zC%dz!UE#oGca=7NEu#oo9C!my7-P0C<-%Hj!)cED>lWiTb^{ZMrUMTZi#_;xN;u&! zwm87B;U}`|rSyO@QCzRR*=q+m9q;+RN< za4(O*bGy;78)!dJfwz434KeUu!Z^8e??y{>IY`Tm^kio{%7|iuSNLoDZQIhBApTD} z3=diU=E=uokDcgXo09_T|o_UbRIE|#HF-WcfMBW?BGfcDCH$g{2&-GU~AXtZB{eJm#Q^Ot7FCb&nb3VhL) zac_906#X&<7od=Qq#KedFbfNy{vftr1zdC#5f0>DxA9L~Xa~UrJt@eL?F6}^bxP`? zoZh-D-9P13Y37B6J;efYraEZ1eK%g8HvlVF2jb~qhUxMGGO4h9cUa_3!>Y!e`Bcv) zp&cjn%*O2+Dr3*AY&9^`=LA#_H74usX&;$2Fk?rx6>zsX<$9=em%w}v_zXIYiZaVm z3-=*2>Yq=Z?shx0ZjwT>H1?E09w4!@qulWNd$w}?B`V=*N$Wd7shi@De0dWgYk=(o zG++~#!H&nWd~Mk=b?xb4vdgNH>8uYILgO)WW$-hN(19@hf)c~rL;kaq3sO|j=Mo=y zVYp=%rPqBN^%((2H4K_#1wS!6>;J(2V1H_3U_8X%n|vFJ^StFvKI>#qFnHp@uHs^ z1Y%RMCV^TemH~7!O;Tm%>;~<;QmZ4~6>T+R;L0jV-5AQJ*})8k-gqhn!jpcxx?0LiRsz65 z9(Z5B>x)SYLD#`Hj%yi9XiZ*r(#Op@uBdGc;|{4CH&C z~`6E^8je~G);?R!+WFUx;ZUFB?>crXlLu+B@(v0snzBUn^}aVlhMXxqVpU?i7df* zQFcolZE%cjZ#$CUKZ_rr{s?$eo!i8%KfdtBDdq*<0N)wv`MVKjE<(=7Ot)4a0H;c z_xVdS#?DFN)pRA%p*M!}^$_wL=EWx1w5N1u(LG#uRvl;ineP-nb=9gQIOX6KXm{v6 z{a}8Iy&K||H_OH>ck(b zdL)lH!rNC9jT=3pDQNc13Hz1$D3`|oP0F%P%r3dy>Ae=CT*x_t=@<0Yg@r3DLft?c57%BG)z*q{&bYLg|0S}If27Q)I0nPq}l9)H0*hZEM!A;IX?~Lr%nP6 z2nw^j()(&!_4v$l=>TpX7*6J~5%+~!ccN+Y4*o>z+YRI6-n7wl)AoC-LQ|Uvw*VO3%KmY_ zr4v?SY0dq-E3Uy>m4{MN@h(OHVTM`6vD_hl;goo)IU^l5al;y5mLH7(B~S)|p&t{nYhSMV{WZtEsB?c%!Bdrwrdm(~hm|J(IUQADWuIO1__jrSDHg zNBN}NVk$o#hT-)ezw4BFBpCz0YY{yy2}y#2+HMU_t13xdCQ=8X%*+!U(Ns^u_0aM; zDFZQ=%qS+yhx~AXnZLmrMgPVphcSyV?XbP{#+B~ok<_#43jet{XTj@J2k%ZNp+W4{ z!$Gc6x=p^QYp#i4{`eIX4f1){8cLZC$R{;cv9S-c`kznqvLqVi%?cpO8F?fhAVpN2 zv{&=yNymwDieqD5ab}Q1iy1Imc=<+djUof3}EvH3KXA zR9J)d;7Do~1hf!%(=U@Tv*Ovqohz4Ph9Ru3rA~99TdRWcD$Sf!AaD`d>GVN?%5vMx zWiL*0oFbF?8KB_Pk!=!Hmvp$vMxFoK!?iM;ST_Yv2Gv3%m!z*u2Lt|6IcIA!jx#!! zJ|9OKu>BUh(=w1n<#zH@av333SX#&^PvUdkK@H>FLTKbT%_si@!F$!287r-*T1f`Z zNBj{+7ep7nW`Qe2iIYL&+JF&srU{Wy!I_!}m(C0cXo#S8zF}|U;@3|8=#_GzhtwiZ zszg*rDi+e!M9*66jaa#NZQak+ee}>|LNxn#^2$~|wMjK^7+rBk%}C}T9Oh3`Lvkc5 z%8A^nGI-5;DI@j~$ocJh()IeCv>Da2)&=)v%fVO1ZyV`jAGsp8ZMTU#gpSYFL$Iw| zXA@cL7PFA7Ee>6aqq-Q~pZ3;V4pyjRrYr%m3*To4PC}*7jSIX>2Ka%QV16J%xb@@N zbr7g3Pe*S!30%xL$XBG|WtAcTLzc0^Ai%h>h|FRdi3PX!iIl@Cbw}to!i|&OyXC#9 zhD7mW&#h`zO1bhxKM}p+(?$m(CR0LL$uopl#b$lp5$;lWZH%WSsRrD-XvX=Tw0s}kTb9%lEEmKWv{S-Lx}crh-^VLVH1-wUQwCfZ}v2kH|)&R zDNNyqW9Y|5XZP2ey5K9Zk6(?6Z-~*>Ek{%fC9w*>1r%(G;T{mEEQJ?wIFW9bok2!~&{yj49Do(x)t7YH|_a+Y}#ehEE zd09{V^JT3Ci~}>fbbyAN&6HN%nUoJY_X2sCG%oO3D%A)FRsKC4GrOjHWIQV zgB0yq6edhwRWO}Z{8r>LrSmHoX4WG6!)kMnk;>3;mZl1Yt(bLkv z&tf~pv)LyK#%g)Gj0Iy_#Yej@DE&7c41U!^9X0U_$M5YweTnea%VyI@?+F))Y4cmb z6S3wsbz4r(G<2WL4e}CM_p={>!}pFYcI4`V_c6I zFiGpTsB>Dg_>Q6Bn?GjgskV{0n^Xzkb7H;=Wl_FMWn7ye32fq0a488k93A-=t?Ohk zELen91hkIWwA@1gu#SEP8Yp41{Pw?=IdY;UhOk>sb`52W{g94dhvMibh8l5_t&Nbt7C$O9VMrYTx!mAoitu+>Xa zKMkuCaH|(e4~J@9+pP0)@Ob|bZU>LdPg6P_X_v}chqHfYRy_KgtERWo&@hu0ET&wG zd7L|cm(I~ii!;qed_M7gNu-OWA$=N#ZqHVGL$I5RWhA@da~)+KgihjJA+|{MLHgQ# z9H&qPk!)WD4M@U|XCu7;VBEadgv17_fPE$SHW@we8`);bJTBiXREyg2Jr7#DO{COD z)V>Yc#gx#gPw5JN#r>p3x|9aq2xRmio@-}j=r=b^5k=brBQCU~`xaKx=13s|CvZrC z)+9ZNQ38z$psEH&abmH?PWc1bO$$pZttnO;Y?o@L=#>Qr>!Ojfi;&$Gi_p$!XjEsT z9jd84tkd#e8#1?!=M=Mn(oMpnI%4uD|&uBt?1a zq(iKCM)5iWEm)IL1`egkD!vImS#|MZ2A=7@WVEK_o9Mxe?U$gy)HASm@6md`KxYM8 zR-WH1^&%Rzo7N2G7ID{#^(c|aoRH1j{kj{)g}%Iwg;zl@Wh6JTD}!$mVHM@=Kmcyc zQ6Sx~2uidTQ6gZX^lUwfpqg;*vJ@6qKtE{aN6T06n2$3ar+6b5vm0J6QT?md)98@1 zm~W6|$Ed2Wr{!2#c4{vYmE=HXhzqXF4t>^_c(72xLTm>Q>{DIA^S0`a2XAgC_pTS3 zmfb(SFdk3zo#G6Hqb}4vR;WIzcHD)e{Gax|GAycf4Ot>fkw$t*=^S#%VZNpN>~r?^eCJ%>_y6$Yy;xk&de;;8dq2-w zym8p@nIR#<%F3WJ@lydyv_s#Jx@?!1Xv%>iHY|at#|D_BXeU@OIC2oX+YyoV#&pgg z$$)lQppH5xBRQ};z{avp5aP?5*S!%tBkw@urG;5hq$0PuATD#L=R(PiF&=zdXrxi` zXpU=B21_0mVS2^~kIwQtbGj0}L;Xg#73YJ|!pv=u5a=;MKqqALDsID_^1Rgw%$HGX zXw^Gw!_hJ6gGb3znCe*ZUhuc{3|wYD+gS*vWg!ScaUOan=}1G&Nt-GlyHfv>v=B3N z$GxXfPmV^C5#o!l3GwapSYN~pvu_W=?6$MiaF)y4ZqU$>V$EE z&1#6zZaLG$bBeSSRt9vkh~?;^3x5KdnrcimfQQ_9}4a2(STju{rFrAzU#etX7ms@tiaLvR~g){APRi|h%(}km$xqEj8ETJ!_yx!^ZrDgZmcu1-! zbwt=?CYL*xdLuM#FX2~IBstnPrySCtBW~rr&Yv+|4^$dq<^bM*zMUwxF)=g20?jf8b{PTD*t*uXO}X^P>awo@aXZDo9>T+pGBCVT3WOC`su3 z#cBtOS8HS1j8f=(-v>s%WSU@F-xt!NIL{V(5^H7SnqLhjxc647vHp;pN%bH9RD*;P z!nqU9(nmL*X>v$nUIpsO@0*-#dD-0%ch!+Ip_zI2t}F7ZJQUE5)mQg&AL-|yX+rOz?d7Sqp(4F~=cJIEkw12C zSQ)*(V=!7DyTU=_CrHqGUnMHEVrLDYmeq}rHNq9mU{_I}NPTyaWA;a?&X1*NXOOm|kUBO)mKWF8S24FMPV2Gg|`%pB~Fz1Dtmp8(eZG4d6uyg+)}8`-oQ z(3C*(-M8#Zne9S9atj$+A^royglR1=%C;n*tx@CYmgpC*zUq6H;jE;1Zaqn(C#@lO z3sisutk-c}tz)7NKiy0W`-lw-_0glQhU#f@Yj-7^erIjo@)l0bCE1Ems(!L^Uw@z* zm|qnjdoOL`J#IaIE3wiBmxpl2J)81Az4C)tdzFEpPw5QihcW2|^Yh%c=jwunVvXz~ zE^(UbVYZZv=b>W8TO3)-UuWN@i+53~L?u`RB+|at77@+51$t=%ZBOgT=xk9Mpy+z7 zo;+_RZinp8;|aOP^EJ8G>IjW6JR*@^5$yQCZZ@frWFuJiS912x}I0lvR=de8edb9=?z<7qr zO&zht8ZK-s({GqAbL-h#2i8r;t$xXTScPKxj?RA+)ZrgfF1pAuq@lIOvt=kB6D^ExpwWN98kp7Ls+Q{W>e&Vnq^Hz%W>DTyY z1MM9Y#wa_#RDSU*oBL+4pMk3#OK~KyOS70OjLKQW>h*i z1RQcwbw+b&dp~5q`YR|#d1)hHPIBaqhFHh8L@SmN<6)#Ta4|TB_nF? ziojr91u<_=Zls6zTy~_w&ET22qq3IEesDOdqD~E!ocpz|lXs105F-|lzwQlC^ zAT@*Ut$H)jY}5n}L1aaeMMI*JZ~_wRVk2kGL?7@;$o?dmxV`C>qREdHBFz$Gxz!Q; zpFUaeXRLW`pEX`^#>;k-HJ&X_G;Qg-7=ldiQ!s0mITwGPcwuHWo_&@bcPyxXB^>xp zYtiMiC9%(BEm-m{4t3P71_f#ozkj_6m{2Sp(H?SB%=1 z@5`0OS|j5QfyF`jjdIZ0eV+ilPpIaz@!~fc&*prFDk7M1U0c^t_B^k=o^2})_;b}B zq0dYE3kE&tf-r~YIb3*hcj45YZ}H3NUStUBE}rR>H!Hu`ik6MRFc7U=W)3&t#95kk zrS#jq!vlx8KGBf-P+WKK5p_{4GnS2@NI(ZcA|8pwjJ;+DDqep+dhSx#;m4}YN?j3- z{l*#R<;1WT@nkJ|xPq+ufvgMF*6n`&lIp-1IkiP|KTggUjj?^!JW!D(Gr|f5b`_y4fhUqnhYFbdMoT& zqG(O-5KWU(bseF>3767euGp~CofxJP7>=rLBOAkUVW#Bz}G$6B@+;8e8sv@{86ZMfsTCc$p^7qUz2w6M~PV z6`J~_;j>XR+EHo4%(zdv?Vg)jah&9|U1butg6^t*=8ahj!9+w8Of4Z#@b8+_Jqgqf zPl(vuY7khUPd>hJSFB1RVD8EWYT?}wQ@}@ZUk{>^1H)vFRDLS z!_NPEEB^?sc=PyhFurE7ogvYROeDn0#Q%YEmCcA8%Cuq1yabZ2|PyRN9vMMbf- z3^{8$`pVasKql5eWhBtPg9JBMHCKQZ3)AxW{S#^Hto(W}#}&Ooq{ZzyE;U^sW13f-4HbpTIi+i8wx!xiuyJ{2-*jb6qxs*cM6s=+PV^ z+z~?z!pVERe{Ea#t^0POS&A*K)nf3k`bcNMOJVP93%OUm)jC$IZP-h*TCYHRN(-#Q zq-+j5tRL|Z^U|s=>n&S^n-%KlKKE5j)|x@MeU+iFkby)9#4v}^-9oRFZzKd=zpn9( z*SDuzMx%>Xw5Q&G80`~-{F;Bc!>N?PKH8`*z)ObEv1`%o+RW}@o{1CeO4RtZ>DS`V zZA*S_e))5Ag_0kP@Y_Tmn@}d2UxFpkz{cQyN_B268Wt)fy5%coNh5P4p_JAvu?OWz zWfHp2qc}8m=>oG#r0Xs{);VhPGx2PvY*s;{);Pt?ZG~Z^3(V_oJ829=uQK%{;$f|r z1+)rob`nc6h5xv9MajB&nkH|Zu!`e)7bZi8>0#6Pty)8}jyOD~Z5x5p61VT6)s zxG&TA!>pP>pZzQ*$^Hbk@(fE&Q-BP+9q>SXVp${7W}-JkgwmK|Pcma=zgG=qeOI^b z8m#d}1`}?)Ne?z&wMKPILff0PmOJ!n8hqE9&KAv7-aNZjjAUC_aVD~F8oO;Th>yfv zB;p&fI}REb=Rm=5>+cmHl=j zo;)Ai<+(B(=!{uS=+!8;WA3(d{LJ5~0(IS4AG#6Qd`JjR8@(;8hhQc);gkkPV?U4x z40k>Y>e_Rs-3c3E4uY--ZVFsI$TJA4KDxCx8U$O|QPM~;POLA`sHVuz#@vv^ak#J4 z$}}JmSST9Im4w(^*^)Fy6+HE>>F?TTXb6e1;VQdN@e&l#m5~|%+NSzQZ$ox@df4b; zT$cp8rDjjOv4&kYYla(tdogE9)D3*y z$6E=KG%gZc#Ulolvwo!cdkIif1}WcUGV%J8rMFx$)^Z^(!#m(+P1uTuU{ zjKpy%xG2g6bzkT6SFjSWKAC7rpg0gIInYTmnQbk%HvCy)|32oGKKCe7bJX)YJqki{ zk5LPhq;WPCO zLppZ@ty_0e#uJ(IM*_^cFKiCnc?XYMW3)zzy{yJV2qNg(lg%c{FY?)U9&ot$Zusbo zk8o5$3{}Y|6tO3zi!Klhw=a9Yaa=-FS@vm+o07WFDD|ck@jRr$r{mqBoM6lAmv4Hj=?PO3f6b6|~m(mQ6Z1CdsRMjCIw%961~Qs0Z|| zsXXr4Z7jwA%mx+U0aysq@yD+x?2g^NIZK->I#!3|avxqV z9+*OXo#2Viw91sNSfcfct;eO4j0oqz&-7jMyX&e~BSROvRWALW+@Tt+uJ6Q)be}Wv zKdOpx^x7^~d-LSLdZO`?P@aqZc9>xSIkNl++`DMWX_C$p;=!OTjQz%Jt}6WDNjdnl zXXOdXh{}z$4IfcaUDs#k z+D7I*`{V2P7AM2gNopjrBrQz-^((ODZ5qa`OBvmQEw9r*wulP<=N3`G6cP&;FG*_dczT;CelJT6?Xv=;x z$E4TdunBzj?F0w&WIyHBPBa7RLdGIQb~Zjo-{||TY;m)%H(a$4%vRUaSiwZTeNNf3 z-nZjf4}CbVg=4T0wEFHPaXD%!IW=QuvPbm!$Kx3=;-D0hnz4bW?UD3{L!FBT~cFaW2w{c$jMWS>T%B-y$}tDq$e5- z*LX4nbJRLM;5!%`siu6}d-=E_%(ibzbnQYGNq=14#1L6ruVxK=EDQB|H6!`OJL#kW zTxNH~lqf7?Fo09({{49)PcW}??5YgJw|IEgXsEt&?PAsRgQQNBICUtwc)L7>czb^w zj&C|ohSH^Lu3d1;(k<}LsI`swmS19e#jNMcO`$$-?Ujahfw!r4I8mJl8#2{c6^oqq zO=d~3WwZe}xsjm{YXnq#N^zeJOO_@yI!8ssdse1x{`1E%xTZj2!WEPtspYw-?Y|I32GK&aap#3R-`^ zU?vJjyh)fPfHCABr;jTafx&&R8;uY!<@DOi?^@faBLT~# z3Su*B_Uf<}^CXb(s>b6dkw%lWcMn93%-y+B1(|5?RRTshvTKc6XTlSIEFA&v$Lt-T zoMJOJ>m1|LsbcjGryuX7mhDS>q4bX~4NSI=SI_2!PYKXXRFH{4QD}4QX(S3bI;_yZ z{`#kqTWhO=?EL-ibi6!n4QWEdG?80f)aPX_XT78@%}}<0df* zdZZS~1DmAVe%^VL*AOq;yabCfF5j4`Y&FH$+pl<`nnA0|Jd0n>X;*D}COz>5#_G)d zP+~^Q-nRu;_TYIRd511bX%H0-7F;dPbzD=6g0m*ynrP3Rx4A1-f}#W=5*A|e=C+$J z7Ch4sP`TsvGDrb!`?jm9Pf7cseMZ{IImhBLE4t~fUEaumauzyv+yt_H+W4r;o8{Qq zFiflvgS33uY^`gQ*J#jXcIr!wxV~G-hK&ag&7$Db@o(W^B6>tWpCQGJ13E)E-TA*3o$zt>vCVZ4PRcQG}Fid1ciF*}wz;Eh!NbS_SbJj-dv8+v?fajb8_tMlNpf)K6 z)0GBazNjo{K`bTB2I6TSQzUDT6MZ5P!dk(;yOX|*4t!gj96u_Dw zI(eG}E|>(jWU8PH$h|TIrB3S;esQwGk;4*}&a7iqW)TjvjR|~@OCzK$lzaR8ETg;n z8tc2*KpbU*qQQmHY78*<9@9U=C(bUETp$$RY_)ZxI!pvr<-=J>W}8^Ao!=_cKB>^- zntiTxR91azcl^+)xjBAzbydA_AkvX&bkoPQ`p1?AmC+>Qce`d|!9dID92tH}vTRD% z;D)P13_C>&Ra-sQHI-i)<0&gy5x}gi6uS=}gQ%{rp%aRbcLI+Ny9mBr+=ork(zs4k zfsAk4#E-T+&!o5nGcMLwj8BkOjMgeGj$fRUR!l*}Uttb2^2ttd$ycCTFB)9&bO2q>47& z|AH2+>iX;&4Q`?ZsC~*Oq|_{>w+SXZM8ySypfi)}twnDas!jRm&kOht-_UAkQfgoF z(8YFMFcJOsEu^c5uu(|MKJsvtt>$1kuq$`H?!|zn#ro-POA`$@c6)i?1_6)COpA6bW zoO3ZtRM>ZsB0Slw)`r$d*UFBq8_DLpyrT^=T*0dKdXY-z18Zv+VAa!uaBtJ`_HaJ2 z(+{Si5v+;#40RBa+ro~kK~4h$?s?R}I?qxu^e{lrw8)(y|E=R&(D=_;MbcSj~vwV@-dZuD9HE*xuIOho@M)GwN5e5?CV^MdyFQEk~Kf0FoNiUVh zE~4ML*3;I_#)7NOMmWu7x6zIdS#!R`qJT6=dqo|_v&M3FvoOu|At*;werjxac1*`n zI0Mlu=Zd~{L@qw-28~b~79l{gELQdFNLg@yGQu79^4+(FIIkNtyH}fKbOYpbjk2kL z>4AX&>&sCU3(4GLg8mf@-b%t=&4-^B4hKIKWZ7zLXgN4z}7+! zf1LBt7#BNe!3tbT`ydHDbv%7)Ny}3M;%s-fj%}2aWSYOPA0r+h>a)o9jD#Q_>saRG zgZ*X;C;HLBp!LFXaI5S!*s5>xi&v<`Y4G5Emaeu!x6V?Ry`UleRjZ1c2-KReOpt?^ zRg|#6_MBqHXalFnV#rkcCZ`1f4`ec=4LWG0NP56bTAB_O-jt~8m<{VoCh;YubK_Yk zVi=sW6OSl2INsj)Fx=iVq_gNUN0H_;XJr+a8!ItD>h!8r<ct%tkc>b6}h zdsfWR%#9%5NUv!|!v<7L!56>5hRT&0QRb{kS?#X=;W`D~&589!MV)7ieSO2kLyPe` zpKUa_#+hAINKD6O$aN0i=&sYyxpDJ~7qR+`%hWXM1rb6kH%ha$4Xf0O-rS#Ik=uqQ z1oMYc7xzp;X!7(2onIDGPum@qTjn)Z9JTH78h&cHEEwO8#)EG)SI*QJp~yu;$}$eu zN@RxWxRMTMqe+Epb!%o$j0elIUB(x%fn%VU`$)g_1n(8v-mxl}u9-X*nP?n2V(E@?M}kn5KPU7gW8KeH#$eX z%Gx=t$TYC3GWD~VhPhLt^LGs?N6xuMaK`#b>hGgLi!~V6LvG$C%vzcBOIOT#=IRjr z0LAP*$BeA5AL3xn1cQi5_{Guf0)8ymM^kpPt7LxCxFCy5uw|uACJtz*>P~#aK!ZWkMElKX1It5< zN{U&>IYG~eZ;49^wYS5Ub@#8utfR`~OPI<^!EeJr6dJoU5_zj{#Zy&JGa1zcvB|T~ zltd{EP_PT6J>wA**fwN;HogE z13ntI^~Iuc{c_6+0|3IPEpn+ve;$82$VR8O4s)N@Lo9OQ*R% z*NENg>}X%hmP@$w{-ioX>6^4%?{Lw^RLxfzG@jeAwCPX;B{ROvk2Okz9pjebJX(p% zU1|P0vP#yo^x9pq*we1PV=8rS8wus%ayvZHZ`k8L1^zijQz7zmNS~xm_A(jU_Gs60 ziPJ2n@Q5J{5+N=VB>B8Gnuk6w3wdIbM0=SVFY#lnGM+W0qjU5x4v4Kz6)(#U%#sc} zk)~6pVP??nA2pj6a$&TZj|_8DFFZM(yA-JhDNm!5dworE#VW^n83xJB2W1FgF9={l ztP|2`sbzL*zj+*kYp{9U9!f%~D}Eo|GVN{@`X!!vlhI=Xf4B-XQUNW6{3t9e*svw5 zwH^yrJcUP6xR9cJnii`J_;7ron13zAD8NCYR8I%_f}6muugU#u@YQUys3F(+H?nTj zFY*Aj&D;uWzs$iV; zQQ%h6t4OKie00|Bv@D6fF+nxUle^`@g*oVuH=lG_DS%-tabK6o)^^9G#T^L%(uT`rU8I1s@*~ zd)$l5h}TV)YP|;Yl*wMA`6jSC4B}u14*c%ym{&ee)!s+-UxuQlmFtbdcny98dLzsn zY~l|$C7zzK5Ul1OOQh79W{Ev#d{dla@$U zLs6g7W)s)Y8DBeKFw;4ikLB2BYB8(>)DNz?DweuLD)yM`?MG&OwL>PVPng*bEO-#W zobK#=uS)VxUf*4j#o}qWHSiCEehs6a4W9Jd77jdZOwaw@{kb?|qFoZ8SaCH19-O|b zD)kWyC-630x|7<7<6Zq>Tz|RI%O&#l>4hkMU$~NvZ>j9gRB7&g?cR?;EkDC=jRlLD zj?U-*t7-IarIY~7#mhSr0gb3;4<8X*dP`w}s_f)#Y{Jj{e(fntLuxyN&E=6MzoHY5 zB6W*Jf!TN%52F0JQ8#o_6s#pu~cOo)ea~ z{uZUoTZA6()zims-hl5Ln=oMS-(_=|cdegn4{PCQzHZ8nO~_XJ_SZ-lYv1I;g83t3 zpa0X(|G?1&khGMu!*2jh+Gn#QJQA_p`eXmkrnp!zzRB!~+t?2@=64fL0J|<8n~T6* z`$E4VZcG^eNqm2HCk>2}n-pRLqeLFdH@b}RmYAgG<0C11e!?mN(BV1wje`iGhu-=1 zfqziupLY`CDFXp#)AQ|N-rXOM9VS%dc)HuNnRvLXjF3{5z2cWe_VDL>^hs2p(I@PiTBB@Rdr((yyh25Lcbd-(6GKcR)x`xKEFy8 z{x`G(koW*Z;(_=0G73?4ThrHO$+Eo;a8xoNP}HFoR6~_sSZVlnTkc=>|N4#I z0U8gP$(QmNP)g2QcWgu8ahL{;QiGo)Cz`e~82J;Jt?Ln*YrJ7Qg`VVi?X{ zkJE2Tukk*S0XRBJGQy9S3DVDL2MYBQweA?e_w-k>c%qp^@w-{M6qW)Hnt0~WTWeTV*oFVrvC}VQi~p)DA9)`L zB)>LVqK|8(nFe{S%zqm&SN!&Y;rbvP#SAoVSqWy0PgVVNlcLHgezV}uOMrHvw#2<5CR!|s0d>zi0j0FnQqj(_&} zH_iU%{{OlEFHrg)`u~5)XSIn}9p8KS0Z#@BcdjQ_>hEm$mudoo18pgM))2A3pVWNZ z{6_f@D1&>1ly6GYSG3&Q%nsH zFAs-8EhprVCA-szauzID^;#JxmM5Mq@HtIiust{HfDPKzngA+0{Z>YeWk>QbP{_;eIAb~OUCPoRYq1??c*k#5^S{#C1>xFANp`B! zh?3LmNN4iSL4v`kZ$JroBYE`qNB&0TKvKO2<;eL@RQv~V1^@}-9x;F6liwxc*Y-{#`98`ul)X)JMHLf1!rIrHx;jNIe7s)LN_c;J-^L0#Xc5Dk%Tk1F!hK z0!k!*$DHT?T`CHYlJ++j_;+Rg>YG+PAZIQr=l^#p1|So^$L_QKFMa^r;!gxXr!!vb rKf>+T?b5{m1K1yM`hNwO9|m_fkugx@?K`EC2rkE|$V` literal 0 HcmV?d00001 diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index 83cc1777b35f6..82f9b18c1e2eb 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -113,13 +113,6 @@ want to clone your fork to your machine:: This creates the directory `pandas-yourname` and connects your repository to the upstream (main project) *pandas* repository. -The testing suite will run automatically on Travis-CI and Appveyor once your -pull request is submitted. However, if you wish to run the test suite on a -branch prior to submitting the pull request, then Travis-CI and/or AppVeyor -need to be hooked up to your GitHub repository. Instructions for doing so -are `here `__ for -Travis-CI and `here `__ for AppVeyor. - Creating a branch ----------------- @@ -432,7 +425,8 @@ Building master branch documentation When pull requests are merged into the *pandas* ``master`` branch, the main parts of the documentation are also built by Travis-CI. These docs are then hosted `here -`__. +`__, see also +the :ref:`Continuous Integration ` section. Contributing to the code base ============================= @@ -444,8 +438,9 @@ Code standards -------------- Writing good code is not just about what you write. It is also about *how* you -write it. During testing on Travis-CI, several tools will be run to check your -code for stylistic errors. Generating any warnings will cause the test to fail. +write it. During :ref:`Continuous Integration ` testing, several +tools will be run to check your code for stylistic errors. +Generating any warnings will cause the test to fail. Thus, good style is a requirement for submitting code to *pandas*. In addition, because a lot of people use our library, it is important that we @@ -467,7 +462,8 @@ Here are *some* of the more common ``cpplint`` issues: - we restrict line-length to 80 characters to promote readability - every header file must include a header guard to avoid name collisions if re-included -Travis-CI will run the `cpplint `_ tool +:ref:`Continuous Integration `. will run the +`cpplint `_ tool and report any stylistic errors in your code. Therefore, it is helpful before submitting code to run the check yourself:: @@ -514,7 +510,8 @@ the more common ``PEP8`` issues: - we restrict line-length to 79 characters to promote readability - passing arguments should have spaces after commas, e.g. ``foo(arg1, arg2, kw1='bar')`` -Travis-CI will run the `flake8 `_ tool +:ref:`Continuous Integration ` will run +the `flake8 `_ tool and report any stylistic errors in your code. Therefore, it is helpful before submitting code to run the check yourself on the diff:: @@ -542,6 +539,35 @@ existing code, so don't break it if at all possible. If you think breakage is r clearly state why as part of the pull request. Also, be careful when changing method signatures and add deprecation warnings where needed. +.. _contributing.ci: + +Testing Thru Continuous Integration +----------------------------------- + +The pandas testing suite will run automatically on Travis-CI, Appveyor, and Circle CI +continuous integration services, once your pull request is submitted. +However, if you wish to run the test suite on a branch prior to submitting the pull request, +then Travis-CI, Appveyor and/or CircleCI need to be hooked up to your GitHub repository. +Instructions for doing so are `here `__ for +Travis-CI, `here `__ for Appveyor, and +`here `__ for CircleCI. + +A pull-request will be considered for merging when you have an all 'green' build. See +this example. + +.. image:: _static/ci.png + + +.. note:: + + Pushing to *your* branch will cancel any non-currently-running tests for that + same pull-request for Appveyor. For Travis CI, you can enable the auto-cancel feature + `here `__ and + for CircleCI `here `__. + +.. _contributing.tdd: + + Test-driven development/code writing ------------------------------------ @@ -875,12 +901,8 @@ updated. Pushing them to GitHub again is done by:: git push -f origin shiny-new-feature This will automatically update your pull request with the latest code and restart the -Travis-CI tests. +:ref:`Continuous Integration ` tests. -If your pull request is related to the ``pandas.io.gbq`` module, please see -the section on :ref:`Running Google BigQuery Integration Tests -` to configure a Google BigQuery service -account for your pull request on Travis-CI. Delete your merged branch (optional) ------------------------------------ From cd51bdd27423fab8a69431dec5dabf4b6bf56c44 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 3 Apr 2017 15:49:00 -0400 Subject: [PATCH 331/353] DOC: add section on how to use parametrize to contributing.rst (#15883) closes #15608 --- doc/source/contributing.rst | 132 +++++++++++++++++++++++++++++------- 1 file changed, 108 insertions(+), 24 deletions(-) diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index 82f9b18c1e2eb..467d6456d60cd 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -51,14 +51,9 @@ Bug reports must: ... ``` -#. Include the full version string of *pandas* and its dependencies. In versions - of *pandas* after 0.12 you can use a built in function:: - - >>> from pandas.util.print_versions import show_versions - >>> show_versions() - - and in *pandas* 0.13.1 onwards:: +#. Include the full version string of *pandas* and its dependencies. You can use the built in function:: + >>> import pandas as pd >>> pd.show_versions() #. Explain why the current behavior is wrong/not desired and what you expect instead. @@ -209,7 +204,7 @@ At this point you can easily do an *in-place* install, as detailed in the next s Creating a Windows development environment ------------------------------------------ -To build on Windows, you need to have compilers installed to build the extensions. You will need to install the appropriate Visual Studio compilers, VS 2008 for Python 2.7, VS 2010 for 3.4, and VS 2015 for Python 3.5. +To build on Windows, you need to have compilers installed to build the extensions. You will need to install the appropriate Visual Studio compilers, VS 2008 for Python 2.7, VS 2010 for 3.4, and VS 2015 for Python 3.5 and 3.6. For Python 2.7, you can install the ``mingw`` compiler which will work equivalently to VS 2008:: @@ -219,7 +214,7 @@ or use the `Microsoft Visual Studio VC++ compiler for Python `__. Read the references below as there may be various gotchas during the installation. -For Python 3.5, you can download and install the `Visual Studio 2015 Community Edition `__. +For Python 3.5 and 3.6, you can download and install the `Visual Studio 2015 Community Edition `__. Here are some references and blogs: @@ -544,26 +539,26 @@ signatures and add deprecation warnings where needed. Testing Thru Continuous Integration ----------------------------------- -The pandas testing suite will run automatically on Travis-CI, Appveyor, and Circle CI -continuous integration services, once your pull request is submitted. +The *pandas* testing suite will run automatically on `Travis-CI `__, +`Appveyor `__, and `Circle CI `__ continuous integration +services, once your pull request is submitted. However, if you wish to run the test suite on a branch prior to submitting the pull request, -then Travis-CI, Appveyor and/or CircleCI need to be hooked up to your GitHub repository. -Instructions for doing so are `here `__ for -Travis-CI, `here `__ for Appveyor, and -`here `__ for CircleCI. +then the continuous integration services need to be hooked to your GitHub repository. Instructions are here +for `Travis-CI `__, +`Appveyor `__ , and `CircleCI `__. -A pull-request will be considered for merging when you have an all 'green' build. See -this example. +A pull-request will be considered for merging when you have an all 'green' build. If any tests are failing, +then you will get a red 'X', where you can click thru to see the individual failed tests. +This is an example of a green build. .. image:: _static/ci.png - .. note:: - Pushing to *your* branch will cancel any non-currently-running tests for that - same pull-request for Appveyor. For Travis CI, you can enable the auto-cancel feature - `here `__ and - for CircleCI `here `__. + Each time you push to *your* fork, a *new* run of the tests will trigger on the CI. Appveyor will auto-cancel + any non-currently-running tests for that same pull-request. You can enable the auto-cancel feature for + `Travis-CI here `__ and + for `CircleCI here `__. .. _contributing.tdd: @@ -620,8 +615,96 @@ the expected correct result:: assert_frame_equal(pivoted, expected) +How to use ``parametrize`` +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +`pytest `__ has a nice feature `parametrize `__ to allow +testing of many cases in a concise way that enables an easy-to-read syntax. + +.. note:: + + .. code-block:: python + + *pandas* existing test structure is *mostly* classed based, meaning that you will typically find tests wrapped in a class, inheriting from ``tm.TestCase``. + + class TestReallyCoolFeature(tm.TestCase): + .... + + Going forward we are moving to a more *functional* style, please see below. + + +Here is an example of a self-contained set of tests that illustrate multiple features that we like to use. + +- functional style: tests are like ``test_*`` and *only* take arguments that are either fixtures or parameters +- using ``parametrize``: allow testing of multiple cases +- ``fixture``, code for object construction, on a per-test basis +- using bare ``assert`` for scalars and truth-testing +- ``tm.assert_series_equal`` (and its counter part ``tm.assert_frame_equal``), for pandas object comparisons. +- the typical pattern of constructing an ``expected`` and comparing versus the ``result`` + +We would name this file ``test_cool_feature.py`` and put in an appropriate place in the ``pandas/tests/`` sturcture. + +.. code-block:: python + + import pytest + import numpy as np + import pandas as pd + from pandas.util import testing as tm + + @pytest.mark.parametrize('dtype', ['int8', 'int16', 'int32', 'int64']) + def test_dtypes(dtype): + assert str(np.dtype(dtype)) == dtype + + @pytest.fixture + def series(): + return pd.Series([1, 2, 3]) + + @pytest.fixture(params=['int8', 'int16', 'int32', 'int64']) + def dtype(request): + return request.param + + def test_series(series, dtype): + result = series.astype(dtype) + assert result.dtype == dtype + + expected = pd.Series([1, 2, 3], dtype=dtype) + tm.assert_series_equal(result, expected) + + +A test run of this yields + +.. code-block:: shell + + ((pandas) bash-3.2$ pytest test_cool_feature.py -v + =========================== test session starts =========================== + platform darwin -- Python 3.5.2, pytest-3.0.5, py-1.4.31, pluggy-0.4.0 + collected 8 items + + tester.py::test_dtypes[int8] PASSED + tester.py::test_dtypes[int16] PASSED + tester.py::test_dtypes[int32] PASSED + tester.py::test_dtypes[int64] PASSED + tester.py::test_series[int8] PASSED + tester.py::test_series[int16] PASSED + tester.py::test_series[int32] PASSED + tester.py::test_series[int64] PASSED + +Tests that we have ``parametrized`` are now accessible via the test name, for example we could run these with ``-k int8`` to sub-select *only* those tests which match ``int8``. + + +.. code-block:: shell + + ((pandas) bash-3.2$ pytest test_cool_feature.py -v -k int8 + =========================== test session starts =========================== + platform darwin -- Python 3.5.2, pytest-3.0.5, py-1.4.31, pluggy-0.4.0 + collected 8 items + + test_cool_feature.py::test_dtypes[int8] PASSED + test_cool_feature.py::test_series[int8] PASSED + + Running the test suite -~~~~~~~~~~~~~~~~~~~~~~ +---------------------- The tests can then be run directly inside your Git clone (without having to install *pandas*) by typing:: @@ -675,7 +758,8 @@ Furthermore one can run with an imported pandas to run tests similarly. Running the performance test suite -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +---------------------------------- + Performance matters and it is worth considering whether your code has introduced performance regressions. *pandas* is in the process of migrating to `asv benchmarks `__ From ff652a5abafee88cbd858c12cc06dd60e73a6647 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Mon, 3 Apr 2017 16:45:41 -0400 Subject: [PATCH 332/353] BUG: Patch handling no NA values in TextFileReader When cleaning `na_values` during initialization of `TextFileReader`, we return a `list` whenever we specify that `na_values` should be empty. However, the rest of the code expects a `set`. Closes #15835. Author: gfyoung Closes #15881 from gfyoung/keep-default-na-excel and squashes the following commits: 0bb6f64 [gfyoung] BUG: Patch handling no NA values in TextFileReader --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/io/parsers.py | 2 +- pandas/tests/io/parser/na_values.py | 11 ++++++++++- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 63aea96ef3369..fd7744158829f 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -995,6 +995,7 @@ I/O - Bug in ``pd.read_csv()`` for the C engine where ``usecols`` were being indexed incorrectly with ``parse_dates`` (:issue:`14792`) - Bug in ``pd.read_csv()`` with ``parse_dates`` when multiline headers are specified (:issue:`15376`) - Bug in ``pd.read_csv()`` with ``float_precision='round_trip'`` which caused a segfault when a text entry is parsed (:issue:`15140`) +- Bug in ``pd.read_csv()`` when an index was specified and no values were specified as null values (:issue:`15835`) - Added checks in ``pd.read_csv()`` ensuring that values for ``nrows`` and ``chunksize`` are valid (:issue:`15767`) - Bug in ``pd.tools.hashing.hash_pandas_object()`` in which hashing of categoricals depended on the ordering of categories, instead of just their values. (:issue:`15143`) - Bug in ``.to_json()`` where ``lines=True`` and contents (keys or values) contain escaped characters (:issue:`15096`) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 30b88de91ef76..0080ded1ac03d 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -2890,7 +2890,7 @@ def _clean_na_values(na_values, keep_default_na=True): if keep_default_na: na_values = _NA_VALUES else: - na_values = [] + na_values = set() na_fvalues = set() elif isinstance(na_values, dict): na_values = na_values.copy() # Prevent aliasing. diff --git a/pandas/tests/io/parser/na_values.py b/pandas/tests/io/parser/na_values.py index 2cbd7cdedf2ab..cf29dbdfef49d 100644 --- a/pandas/tests/io/parser/na_values.py +++ b/pandas/tests/io/parser/na_values.py @@ -11,7 +11,7 @@ import pandas.io.parsers as parsers import pandas.util.testing as tm -from pandas import DataFrame, MultiIndex +from pandas import DataFrame, Index, MultiIndex from pandas.compat import StringIO, range @@ -303,3 +303,12 @@ def test_na_values_uint64(self): expected = DataFrame([[str(2**63), 1], ['', 2]]) out = self.read_csv(StringIO(data), header=None) tm.assert_frame_equal(out, expected) + + def test_empty_na_values_no_default_with_index(self): + # see gh-15835 + data = "a,1\nb,2" + + expected = DataFrame({'1': [2]}, index=Index(["b"], name="a")) + out = self.read_csv(StringIO(data), keep_default_na=False, index_col=0) + + tm.assert_frame_equal(out, expected) From eedcc8fd493158be3d88cf2aa139914a7b21c349 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 3 Apr 2017 17:37:33 -0400 Subject: [PATCH 333/353] DOC: whatsnew cleaning --- doc/source/whatsnew/v0.20.0.txt | 40 ++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index fd7744158829f..107b682a86d00 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1,7 +1,7 @@ .. _whatsnew_0200: -v0.20.0 (????, 2017) --------------------- +v0.20.0 (April ??, 2017) +------------------------ This is a major release from 0.19 and includes a small number of API changes, several new features, enhancements, and performance improvements along with a large number of bug fixes. We recommend that all @@ -9,12 +9,13 @@ users upgrade to this version. Highlights include: -- Building pandas for development now requires ``cython >= 0.23`` (:issue:`14831`) - The ``.ix`` indexer has been deprecated, see :ref:`here ` -- Switched the test framework to `pytest`_ (:issue:`13097`) +- Improved user API when accessing levels in ``.groupby()``, see :ref:`here ` +- Improved support for UInt64 dtypes, see :ref:`here ` - A new orient for JSON serialization, ``orient='table'``, that uses the Table Schema spec, see :ref:`here ` - -.. _pytest: http://doc.pytest.org/en/latest/ +- Support for S3 handling now uses ``s3fs``, see :ref:`here ` +- Google BigQuery support now uses the ``pandas-gbq`` library, see :ref:`here ` +- Switched the test framework to use `pytest `__ (:issue:`13097`) Check the :ref:`API Changes ` and :ref:`deprecations ` before updating. @@ -542,7 +543,7 @@ S3 File Handling ^^^^^^^^^^^^^^^^ pandas now uses `s3fs `_ for handling S3 connections. This shouldn't break -any code. However, since s3fs is not a required dependency, you will need to install it separately, like ``boto`` +any code. However, since ``s3fs`` is not a required dependency, you will need to install it separately, like ``boto`` in prior versions of pandas. (:issue:`11915`). .. _whatsnew_0200.api_breaking.partial_string_indexing: @@ -776,9 +777,9 @@ New Behavior: Index.intersection and inner join now preserve the order of the left Index ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -`:meth:Index.intersection` now preserves the order of the calling ``Index`` (left) +:meth:`Index.intersection` now preserves the order of the calling ``Index`` (left) instead of the other ``Index`` (right) (:issue:`15582`). This affects the inner -joins (`:meth:DataFrame.join` and `:func:merge`) and the ``.align`` methods. +joins, :meth:`DataFrame.join` and :func:`merge`, and the ``.align`` methods. - ``Index.intersection`` @@ -844,8 +845,6 @@ Other API Changes - ``inplace`` arguments now require a boolean value, else a ``ValueError`` is thrown (:issue:`14189`) - ``pandas.api.types.is_datetime64_ns_dtype`` will now report ``True`` on a tz-aware dtype, similar to ``pandas.api.types.is_datetime64_any_dtype`` - ``DataFrame.asof()`` will return a null filled ``Series`` instead the scalar ``NaN`` if a match is not found (:issue:`15118`) -- Reorganization of timeseries development tests (:issue:`14854`) -- Reorganization of date converter tests (:issue:`15707`) - Specific support for ``copy.copy()`` and ``copy.deepcopy()`` functions on NDFrame objects (:issue:`15444`) - ``Series.sort_values()`` accepts a one element list of bool for consistency with the behavior of ``DataFrame.sort_values()`` (:issue:`15604`) - ``.merge()`` and ``.join()`` on ``category`` dtype columns will now preserve the category dtype when possible (:issue:`10409`) @@ -860,6 +859,16 @@ Other API Changes - ``NaT`` will now returns ``NaT`` for ``tz_localize`` and ``tz_convert`` methods (:issue:`15830`) +.. _whatsnew_0200.develop: + +Development Changes +~~~~~~~~~~~~~~~~~~~ + +- Building pandas for development now requires ``cython >= 0.23`` (:issue:`14831`) +- Require at least 0.23 version of cython to avoid problems with character encodings (:issue:`14699`) +- Reorganization of timeseries tests (:issue:`14854`) +- Reorganization of date converter tests (:issue:`15707`) + .. _whatsnew_0200.deprecations: Deprecations @@ -915,7 +924,7 @@ Performance Improvements ~~~~~~~~~~~~~~~~~~~~~~~~ - Improved performance of ``pd.wide_to_long()`` (:issue:`14779`) -- Increased performance of ``pd.factorize()`` by releasing the GIL with ``object`` dtype when inferred as strings (:issue:`14859`) +- Improved performance of ``pd.factorize()`` by releasing the GIL with ``object`` dtype when inferred as strings (:issue:`14859`) - Improved performance of timeseries plotting with an irregular DatetimeIndex (or with ``compat_x=True``) (:issue:`15073`). - Improved performance of ``groupby().cummin()`` and ``groupby().cummax()`` (:issue:`15048`, :issue:`15109`, :issue:`15561`, :issue:`15635`) @@ -1000,7 +1009,8 @@ I/O - Bug in ``pd.tools.hashing.hash_pandas_object()`` in which hashing of categoricals depended on the ordering of categories, instead of just their values. (:issue:`15143`) - Bug in ``.to_json()`` where ``lines=True`` and contents (keys or values) contain escaped characters (:issue:`15096`) - Bug in ``.to_json()`` causing single byte ascii characters to be expanded to four byte unicode (:issue:`15344`) -- Bug in ``.read_json()`` for Python 2 where ``lines=True`` and contents contain non-ascii unicode characters (:issue:`15132`) +- Bug in ``.to_json()`` for the C engine where rollover was not correctly handled for case where frac is odd and diff is exactly 0.5 (:issue:`15716`, :issue:`15864`) +- Bug in ``pd.read_json()`` for Python 2 where ``lines=True`` and contents contain non-ascii unicode characters (:issue:`15132`) - Bug in ``pd.read_msgpack()`` in which ``Series`` categoricals were being improperly processed (:issue:`14901`) - Bug in ``pd.read_msgpack()`` which did not allow loading of a dataframe with an index of type ``CategoricalIndex`` (:issue:`15487`) - Bug in ``pd.read_msgpack()`` when deserializing a ``CategoricalIndex`` (:issue:`15487`) @@ -1011,7 +1021,6 @@ I/O - Bug in ``pd.read_hdf()`` passing a ``Timestamp`` to the ``where`` parameter with a non date column (:issue:`15492`) - Bug in ``DataFrame.to_stata()`` and ``StataWriter`` which produces incorrectly formatted files to be produced for some locales (:issue:`13856`) - Bug in ``StataReader`` and ``StataWriter`` which allows invalid encodings (:issue:`15723`) -- Bug in ``pd.to_json()`` for the C engine where rollover was not correctly handled for case where frac is odd and diff is exactly 0.5 (:issue:`15716`, :issue:`15864`) Plotting ^^^^^^^^ @@ -1026,7 +1035,7 @@ Groupby/Resample/Rolling - Properly set ``__name__`` and ``__qualname__`` for ``Groupby.*`` functions (:issue:`14620`) - Bug in ``GroupBy.get_group()`` failing with a categorical grouper (:issue:`15155`) - Bug in ``.groupby(...).rolling(...)`` when ``on`` is specified and using a ``DatetimeIndex`` (:issue:`15130`) -- Bug in groupby operations with timedelta64 when passing ``numeric_only=False`` (:issue:`5724`) +- Bug in groupby operations with ``timedelta64`` when passing ``numeric_only=False`` (:issue:`5724`) - Bug in ``groupby.apply()`` coercing ``object`` dtypes to numeric types, when not all values were numeric (:issue:`14423`, :issue:`15421`, :issue:`15670`) - Bug in ``resample``, where a non-string ``loffset`` argument would not be applied when resampling a timeseries (:issue:`13218`) - Bug in ``DataFrame.groupby().describe()`` when grouping on ``Index`` containing tuples (:issue:`14848`) @@ -1073,6 +1082,5 @@ Other - Compat with SciPy 0.19.0 for testing on ``.interpolate()`` (:issue:`15662`) - Compat for 32-bit platforms for ``.qcut/cut``; bins will now be ``int64`` dtype (:issue:`14866`) -- Require at least 0.23 version of cython to avoid problems with character encodings (:issue:`14699`) - Bug in interactions with ``Qt`` when a ``QtApplication`` already exists (:issue:`14372`) - Avoid use of ``np.finfo()`` during ``import pandas`` removed to mitigate deadlock on Python GIL misuse (:issue:`14641`) From da0523a346abd9575ab05746e242ec67c1c442d4 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 3 Apr 2017 17:49:22 -0400 Subject: [PATCH 334/353] API: expose pandas.errors closes #14800 Author: Jeff Reback Closes #15541 from jreback/exceptions and squashes the following commits: e5fbdc8 [Jeff Reback] give nicer deprecation / message on infer_dtype moving ab4525b [Jeff Reback] typo on pandas.errors in whatsnew d636ef7 [Jeff Reback] document removed exceptions 3dc4b9a [Jeff Reback] more docs for exceptions 2bb1fbd [Jeff Reback] remove AmbiguousIndexError, completely unused 5754630 [Jeff Reback] fix doc-string 35d225f [Jeff Reback] more examples e91901d [Jeff Reback] DOC: better docs on infer_type 7e8432d [Jeff Reback] remove need for PandasError sub-class 92b2fdc [Jeff Reback] corrections 991fbb4 [Jeff Reback] API: expose pandas.errors eec40cd [Jeff Reback] add pandas.api.lib add infer_dtype to pandas.api.lib --- doc/source/whatsnew/v0.20.0.txt | 26 +++++ pandas/__init__.py | 3 +- pandas/_libs/src/inference.pyx | 110 +++++++++++++++--- pandas/api/lib/__init__.py | 5 + pandas/compat/numpy/function.py | 2 +- pandas/computation/align.py | 7 +- pandas/core/common.py | 30 +---- pandas/core/frame.py | 4 +- pandas/core/indexing.py | 2 +- pandas/core/ops.py | 4 +- pandas/core/panel.py | 8 +- pandas/errors/__init__.py | 57 +++++++++ pandas/indexes/multi.py | 6 +- pandas/io/common.py | 45 ++----- pandas/io/excel.py | 4 +- pandas/io/html.py | 3 +- pandas/io/packers.py | 2 +- pandas/io/parsers.py | 5 +- pandas/io/parsers.pyx | 6 +- pandas/io/pytables.py | 3 +- pandas/lib.py | 5 +- pandas/tests/api/test_api.py | 83 +------------ pandas/tests/api/test_lib.py | 10 ++ pandas/tests/api/test_types.py | 83 +++++++++++++ pandas/tests/computation/test_eval.py | 5 +- .../tests/frame/test_axis_select_reindex.py | 2 +- pandas/tests/frame/test_constructors.py | 8 +- pandas/tests/frame/test_to_csv.py | 2 +- pandas/tests/groupby/test_groupby.py | 4 +- pandas/tests/indexes/datetimes/test_ops.py | 2 +- pandas/tests/indexes/test_multi.py | 2 +- pandas/tests/indexing/test_ix.py | 2 +- pandas/tests/indexing/test_multiindex.py | 2 +- pandas/tests/io/parser/common.py | 3 +- pandas/tests/io/parser/dialect.py | 2 +- pandas/tests/io/parser/dtypes.py | 2 +- pandas/tests/io/parser/skiprows.py | 2 +- pandas/tests/io/parser/test_unsupported.py | 2 +- pandas/tests/io/test_common.py | 18 --- pandas/tests/io/test_packers.py | 2 +- pandas/tests/test_errors.py | 50 ++++++++ pandas/tests/test_window.py | 2 +- pandas/tests/tseries/test_resample.py | 2 +- pandas/tseries/index.py | 10 +- pandas/tslib.py | 2 +- pandas/util/depr_module.py | 15 ++- setup.py | 2 + 47 files changed, 420 insertions(+), 236 deletions(-) create mode 100644 pandas/api/lib/__init__.py create mode 100644 pandas/errors/__init__.py create mode 100644 pandas/tests/api/test_lib.py create mode 100644 pandas/tests/api/test_types.py create mode 100644 pandas/tests/test_errors.py diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 107b682a86d00..74fe7916523c5 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -76,6 +76,28 @@ Commonly called 'unix epoch' or POSIX time. pd.to_datetime([1, 2, 3], unit='D') +.. _whatsnew_0200.enhancements.errors: + +pandas errors +^^^^^^^^^^^^^ + +We are adding a standard public location for all pandas exceptions & warnings ``pandas.errors``. (:issue:`14800`). Previously +these exceptions & warnings could be imported from ``pandas.core.common`` or ``pandas.io.common``. These exceptions and warnings +will be removed from the ``*.common`` locations in a future release. (:issue:`15541`) + +The following are now part of this API: + +.. code-block:: python + + ['DtypeWarning', + 'EmptyDataError', + 'OutOfBoundsDatetime', + 'ParserError', + 'ParserWarning', + 'PerformanceWarning', + 'UnsortedIndexError', + 'UnsupportedFunctionCall'] + .. _whatsnew_0200.enhancements.groupby_access: Groupby Enhancements @@ -858,6 +880,10 @@ Other API Changes - ``NaT`` will now correctly return ``np.nan`` for ``Timedelta`` and ``Period`` accessors such as ``days`` and ``quarter`` (:issue:`15782`) - ``NaT`` will now returns ``NaT`` for ``tz_localize`` and ``tz_convert`` methods (:issue:`15830`) +- ``DataFrame`` and ``Panel`` constructors with invalid input will now raise ``ValueError`` rather than ``PandasError``, if called with scalar inputs and not axes (:issue:`15541`) + +- ``DataFrame`` and ``Panel`` constructors with invalid input will now raise ``ValueError`` rather than ``pandas.core.common.PandasError``, if called with scalar inputs and not axes; The exception ``PandasError`` is removed as well. (:issue:`15541`) +- The exception ``pandas.core.common.AmbiguousIndexError`` is removed as it is not referenced (:issue:`15541`) .. _whatsnew_0200.develop: diff --git a/pandas/__init__.py b/pandas/__init__.py index 5c7c9d44c5d10..1bc85899fb89f 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -62,7 +62,8 @@ json = _DeprecatedModule(deprmod='pandas.json', deprmodto='pandas.io.json.libjson') parser = _DeprecatedModule(deprmod='pandas.parser', deprmodto='pandas.io.libparsers') -lib = _DeprecatedModule(deprmod='pandas.lib', deprmodto='pandas._libs.lib') +lib = _DeprecatedModule(deprmod='pandas.lib', deprmodto='pandas._libs.lib', + moved={'infer_dtype': 'pandas.api.lib.infer_dtype'}) tslib = _DeprecatedModule(deprmod='pandas.tslib', deprmodto='pandas._libs.tslib') # use the closest tagged version if possible diff --git a/pandas/_libs/src/inference.pyx b/pandas/_libs/src/inference.pyx index 933fc8fb1cc9b..b0fb7048f154c 100644 --- a/pandas/_libs/src/inference.pyx +++ b/pandas/_libs/src/inference.pyx @@ -218,9 +218,91 @@ cdef _try_infer_map(v): return None -def infer_dtype(object _values): +def infer_dtype(object value): """ - we are coercing to an ndarray here + Effeciently infer the type of a passed val, or list-like + array of values. Return a string describing the type. + + Parameters + ---------- + value : scalar, list, ndarray, or pandas type + + Returns + ------- + string describing the common type of the input data. + Results can include: + + - string + - unicode + - bytes + - floating + - integer + - mixed-integer + - mixed-integer-float + - complex + - categorical + - boolean + - datetime64 + - datetime + - date + - timedelta64 + - timedelta + - time + - period + - mixed + + Raises + ------ + TypeError if ndarray-like but cannot infer the dtype + + Notes + ----- + - 'mixed' is the catchall for anything that is not otherwise + specialized + - 'mixed-integer-float' are floats and integers + - 'mixed-integer' are integers mixed with non-integers + + Examples + -------- + >>> infer_dtype(['foo', 'bar']) + 'string' + + >>> infer_dtype([b'foo', b'bar']) + 'bytes' + + >>> infer_dtype([1, 2, 3]) + 'integer' + + >>> infer_dtype([1, 2, 3.5]) + 'mixed-integer-float' + + >>> infer_dtype([1.0, 2.0, 3.5]) + 'floating' + + >>> infer_dtype(['a', 1]) + 'mixed-integer' + + >>> infer_dtype([True, False]) + 'boolean' + + >>> infer_dtype([True, False, np.nan]) + 'mixed' + + >>> infer_dtype([pd.Timestamp('20130101')]) + 'datetime' + + >>> infer_dtype([datetime.date(2013, 1, 1)]) + 'date' + + >>> infer_dtype([np.datetime64('2013-01-01')]) + 'datetime64' + + >>> infer_dtype([datetime.timedelta(0, 1, 1)]) + 'timedelta' + + >>> infer_dtype(pd.Series(list('aabc')).astype('category')) + 'categorical' + """ cdef: @@ -229,27 +311,27 @@ def infer_dtype(object _values): ndarray values bint seen_pdnat = False, seen_val = False - if isinstance(_values, np.ndarray): - values = _values - elif hasattr(_values, 'dtype'): + if isinstance(value, np.ndarray): + values = value + elif hasattr(value, 'dtype'): # this will handle ndarray-like # e.g. categoricals try: - values = getattr(_values, '_values', getattr( - _values, 'values', _values)) + values = getattr(value, '_values', getattr( + value, 'values', value)) except: - val = _try_infer_map(_values) - if val is not None: - return val + value = _try_infer_map(value) + if value is not None: + return value # its ndarray like but we can't handle - raise ValueError("cannot infer type for {0}".format(type(_values))) + raise ValueError("cannot infer type for {0}".format(type(value))) else: - if not isinstance(_values, list): - _values = list(_values) - values = list_to_object_array(_values) + if not isinstance(value, list): + value = list(value) + values = list_to_object_array(value) values = getattr(values, 'values', values) val = _try_infer_map(values) diff --git a/pandas/api/lib/__init__.py b/pandas/api/lib/__init__.py new file mode 100644 index 0000000000000..c86bfc6148655 --- /dev/null +++ b/pandas/api/lib/__init__.py @@ -0,0 +1,5 @@ +# flake8: noqa + +""" public toolkit API """ + +from pandas._libs.lib import infer_dtype diff --git a/pandas/compat/numpy/function.py b/pandas/compat/numpy/function.py index f448a9aad04c6..1dd22795533fc 100644 --- a/pandas/compat/numpy/function.py +++ b/pandas/compat/numpy/function.py @@ -21,7 +21,7 @@ from numpy import ndarray from pandas.util.validators import (validate_args, validate_kwargs, validate_args_and_kwargs) -from pandas.core.common import UnsupportedFunctionCall +from pandas.errors import UnsupportedFunctionCall from pandas.types.common import is_integer, is_bool from pandas.compat import OrderedDict diff --git a/pandas/computation/align.py b/pandas/computation/align.py index 4e12d58a4ab85..b4c80f4d493af 100644 --- a/pandas/computation/align.py +++ b/pandas/computation/align.py @@ -9,7 +9,8 @@ import pandas as pd from pandas import compat -import pandas.core.common as com +from pandas.errors import PerformanceWarning +from pandas.core.common import flatten from pandas.computation.common import _result_type_many @@ -101,7 +102,7 @@ def _align_core(terms): 'than an order of magnitude on term {1!r}, ' 'by more than {2:.4g}; performance may ' 'suffer'.format(axis, terms[i].name, ordm), - category=pd.core.common.PerformanceWarning, + category=PerformanceWarning, stacklevel=6) if transpose: @@ -121,7 +122,7 @@ def _align(terms): """Align a set of terms""" try: # flatten the parse tree (a nested list, really) - terms = list(com.flatten(terms)) + terms = list(flatten(terms)) except TypeError: # can't iterate so it must just be a constant or single variable if isinstance(terms.value, pd.core.generic.NDFrame): diff --git a/pandas/core/common.py b/pandas/core/common.py index 93e24dce8b623..bf4acf1fbf257 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -20,6 +20,10 @@ from pandas.api import types from pandas.types import common +# compat +from pandas.errors import ( # noqa + PerformanceWarning, UnsupportedFunctionCall, UnsortedIndexError) + # back-compat of public API # deprecate these functions m = sys.modules['pandas.core.common'] @@ -73,14 +77,6 @@ def array_equivalent(*args, **kwargs): return missing.array_equivalent(*args, **kwargs) -class PandasError(Exception): - pass - - -class PerformanceWarning(Warning): - pass - - class SettingWithCopyError(ValueError): pass @@ -89,24 +85,6 @@ class SettingWithCopyWarning(Warning): pass -class AmbiguousIndexError(PandasError, KeyError): - pass - - -class UnsupportedFunctionCall(ValueError): - pass - - -class UnsortedIndexError(KeyError): - """ Error raised when attempting to get a slice of a MultiIndex - and the index has not been lexsorted. Subclass of `KeyError`. - - .. versionadded:: 0.20.0 - - """ - pass - - class AbstractMethodError(NotImplementedError): """Raise this error instead of NotImplementedError for abstract methods while keeping compatibility with Python 2 and Python 3. diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ffae22447cc65..237af0f85e866 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -56,7 +56,7 @@ is_named_tuple) from pandas.types.missing import isnull, notnull -from pandas.core.common import (PandasError, _try_sort, +from pandas.core.common import (_try_sort, _default_index, _values_from_object, _maybe_box_datetimelike, @@ -347,7 +347,7 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, mgr = self._init_ndarray(values, index, columns, dtype=dtype, copy=False) else: - raise PandasError('DataFrame constructor not properly called!') + raise ValueError('DataFrame constructor not properly called!') NDFrame.__init__(self, mgr, fastpath=True) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 61a847ccf1523..9e22bdd5facc4 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1140,7 +1140,7 @@ def _convert_to_indexer(self, obj, axis=0, is_setter=False): ix[['foo', 'bar', 'baz']] -> [i, j, k] (indices of foo, bar, baz) Going by Zen of Python? - "In the face of ambiguity, refuse the temptation to guess." + 'In the face of ambiguity, refuse the temptation to guess.' raise AmbiguousIndexError with integer labels? - No, prefer label-based indexing """ diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 5dac8a7e4d2da..9e777fd94de66 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -21,8 +21,8 @@ from pandas.compat import bind_method import pandas.core.missing as missing -from pandas.core.common import (_values_from_object, _maybe_match_name, - PerformanceWarning) +from pandas.errors import PerformanceWarning +from pandas.core.common import _values_from_object, _maybe_match_name from pandas.types.missing import notnull, isnull from pandas.types.common import (needs_i8_conversion, is_datetimelike_v_numeric, diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 5ab3c44b175fe..9e95023ccb359 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -21,7 +21,7 @@ from pandas import compat from pandas.compat import (map, zip, range, u, OrderedDict, OrderedDefaultdict) from pandas.compat.numpy import function as nv -from pandas.core.common import PandasError, _try_sort, _default_index +from pandas.core.common import _try_sort, _default_index from pandas.core.frame import DataFrame from pandas.core.generic import NDFrame, _shared_docs from pandas.core.index import (Index, MultiIndex, _ensure_index, @@ -174,7 +174,7 @@ def _init_data(self, data, copy, dtype, **kwargs): copy=False) copy = False else: # pragma: no cover - raise PandasError('Panel constructor not properly called!') + raise ValueError('Panel constructor not properly called!') NDFrame.__init__(self, mgr, axes=axes, copy=copy, dtype=dtype) @@ -1150,8 +1150,8 @@ def _construct_return_type(self, result, axes=None): return self._constructor_sliced( result, **self._extract_axes_for_slice(self, axes)) - raise PandasError('invalid _construct_return_type [self->%s] ' - '[result->%s]' % (self, result)) + raise ValueError('invalid _construct_return_type [self->%s] ' + '[result->%s]' % (self, result)) def _wrap_result(self, result, axis): axis = self._get_axis_name(axis) diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py new file mode 100644 index 0000000000000..f6719e7be421b --- /dev/null +++ b/pandas/errors/__init__.py @@ -0,0 +1,57 @@ +# flake8: noqa + +""" expose public exceptions & warnings """ + +from pandas._libs.tslib import OutOfBoundsDatetime + + +class PerformanceWarning(Warning): + """ + Warnings shown when there is a possible performance + impact. + """ + +class UnsupportedFunctionCall(ValueError): + """ + If attempting to call a numpy function on a pandas + object. For example using ``np.cumsum(groupby_object)``. + """ + +class UnsortedIndexError(KeyError): + """ + Error raised when attempting to get a slice of a MultiIndex + and the index has not been lexsorted. Subclass of `KeyError`. + + .. versionadded:: 0.20.0 + + """ + + +class ParserError(ValueError): + """ + Exception that is thrown by an error is encountered in `pd.read_csv` + """ + + +class DtypeWarning(Warning): + """ + Warning that is raised for a dtype incompatiblity. This is + can happen whenever `pd.read_csv` encounters non- + uniform dtypes in a column(s) of a given CSV file + """ + + +class EmptyDataError(ValueError): + """ + Exception that is thrown in `pd.read_csv` (by both the C and + Python engines) when empty data or header is encountered + """ + + +class ParserWarning(Warning): + """ + Warning that is raised in `pd.read_csv` whenever it is necessary + to change parsers (generally from 'c' to 'python') contrary to the + one specified by the user due to lack of support or functionality for + parsing particular attributes of a CSV file with the requsted engine + """ diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index e6ae0605d4758..f12b10ae682fa 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -19,12 +19,10 @@ is_list_like, is_scalar) from pandas.types.missing import isnull, array_equivalent +from pandas.errors import PerformanceWarning, UnsortedIndexError from pandas.core.common import (_values_from_object, is_bool_indexer, - is_null_slice, - PerformanceWarning, - UnsortedIndexError) - + is_null_slice) import pandas.core.base as base from pandas.util.decorators import (Appender, cache_readonly, diff --git a/pandas/io/common.py b/pandas/io/common.py index e42d218d7925f..8bc7217db87f9 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -12,6 +12,14 @@ from pandas.core.common import AbstractMethodError from pandas.types.common import is_number +# compat +from pandas.errors import (ParserError, DtypeWarning, # noqa + EmptyDataError, ParserWarning) + +# gh-12665: Alias for now and remove later. +CParserError = ParserError + + try: from s3fs import S3File need_text_wrapping = (BytesIO, S3File) @@ -69,43 +77,6 @@ def urlopen(*args, **kwargs): _VALID_URLS.discard('') -class ParserError(ValueError): - """ - Exception that is thrown by an error is encountered in `pd.read_csv` - """ - pass - - -# gh-12665: Alias for now and remove later. -CParserError = ParserError - - -class DtypeWarning(Warning): - """ - Warning that is raised whenever `pd.read_csv` encounters non- - uniform dtypes in a column(s) of a given CSV file - """ - pass - - -class EmptyDataError(ValueError): - """ - Exception that is thrown in `pd.read_csv` (by both the C and - Python engines) when empty data or header is encountered - """ - pass - - -class ParserWarning(Warning): - """ - Warning that is raised in `pd.read_csv` whenever it is necessary - to change parsers (generally from 'c' to 'python') contrary to the - one specified by the user due to lack of support or functionality for - parsing particular attributes of a CSV file with the requsted engine - """ - pass - - class BaseIterator(object): """Subclass this and provide a "__next__()" method to obtain an iterator. Useful only when the object being iterated is non-reusable (e.g. OK for a diff --git a/pandas/io/excel.py b/pandas/io/excel.py index d324855bc2f4d..6d136869fc73f 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -15,9 +15,9 @@ from pandas.core.frame import DataFrame from pandas.io.parsers import TextParser +from pandas.errors import EmptyDataError from pandas.io.common import (_is_url, _urlopen, _validate_header_arg, - EmptyDataError, get_filepath_or_buffer, - _NA_VALUES) + get_filepath_or_buffer, _NA_VALUES) from pandas.tseries.period import Period from pandas.io.json import libjson from pandas.compat import (map, zip, reduce, range, lrange, u, add_metaclass, diff --git a/pandas/io/html.py b/pandas/io/html.py index 8a3709dba2176..7b58e612de2df 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -13,7 +13,8 @@ import numpy as np from pandas.types.common import is_list_like -from pandas.io.common import (EmptyDataError, _is_url, urlopen, +from pandas.errors import EmptyDataError +from pandas.io.common import (_is_url, urlopen, parse_url, _validate_header_arg) from pandas.io.parsers import TextParser from pandas.compat import (lrange, lmap, u, string_types, iteritems, diff --git a/pandas/io/packers.py b/pandas/io/packers.py index 4662e8b635d3f..ca5a27ee5b68e 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -59,7 +59,7 @@ from pandas.sparse.api import SparseSeries, SparseDataFrame from pandas.sparse.array import BlockIndex, IntIndex from pandas.core.generic import NDFrame -from pandas.core.common import PerformanceWarning +from pandas.errors import PerformanceWarning from pandas.io.common import get_filepath_or_buffer from pandas.core.internals import BlockManager, make_block, _safe_reshape import pandas.core.internals as internals diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 0080ded1ac03d..b624d2cc0c7ad 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -29,10 +29,11 @@ from pandas.core import algorithms from pandas.core.common import AbstractMethodError from pandas.io.date_converters import generic_parser +from pandas.errors import ParserWarning, ParserError, EmptyDataError from pandas.io.common import (get_filepath_or_buffer, _validate_header_arg, _get_handle, UnicodeReader, UTF8Recoder, - BaseIterator, ParserError, EmptyDataError, - ParserWarning, _NA_VALUES, _infer_compression) + BaseIterator, + _NA_VALUES, _infer_compression) from pandas.tseries import tools from pandas.util.decorators import Appender diff --git a/pandas/io/parsers.pyx b/pandas/io/parsers.pyx index 3728cda559050..4053e726d0a04 100644 --- a/pandas/io/parsers.pyx +++ b/pandas/io/parsers.pyx @@ -13,12 +13,12 @@ from cpython cimport (PyObject, PyBytes_FromString, PyUnicode_Check, PyUnicode_AsUTF8String, PyErr_Occurred, PyErr_Fetch) from cpython.ref cimport PyObject, Py_XDECREF -from pandas.io.common import (ParserError, DtypeWarning, - EmptyDataError, ParserWarning) +from pandas.errors import (ParserError, DtypeWarning, + EmptyDataError, ParserWarning) # Import CParserError as alias of ParserError for backwards compatibility. # Ultimately, we want to remove this import. See gh-12665 and gh-14479. -from pandas.io.common import CParserError +CParserError = ParserError cdef extern from "Python.h": object PyUnicode_FromString(char *v) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index f75a4761e0948..9b525b76b0f17 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -32,7 +32,8 @@ from pandas.sparse.array import BlockIndex, IntIndex from pandas.core.base import StringMixin from pandas.formats.printing import adjoin, pprint_thing -from pandas.core.common import _asarray_tuplesafe, PerformanceWarning +from pandas.errors import PerformanceWarning +from pandas.core.common import _asarray_tuplesafe from pandas.core.algorithms import match, unique from pandas.core.categorical import Categorical, _factorize_from_iterables from pandas.core.internals import (BlockManager, make_block, diff --git a/pandas/lib.py b/pandas/lib.py index 6c26627a97de3..859a78060fcc1 100644 --- a/pandas/lib.py +++ b/pandas/lib.py @@ -2,6 +2,7 @@ import warnings warnings.warn("The pandas.lib module is deprecated and will be " - "removed in a future version. Please import from " - "the pandas._libs.lib instead", FutureWarning, stacklevel=2) + "removed in a future version. These are private functions " + "and can be accessed from pandas._libs.lib instead", + FutureWarning, stacklevel=2) from pandas._libs.lib import * diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 73222c246fc70..7d1308d67668e 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -1,12 +1,9 @@ # -*- coding: utf-8 -*- from warnings import catch_warnings -import numpy as np import pandas as pd -from pandas.core import common as com from pandas import api -from pandas.api import types from pandas.util import testing as tm @@ -33,7 +30,7 @@ class TestPDApi(Base, tm.TestCase): # top-level sub-packages lib = ['api', 'compat', 'computation', 'core', - 'indexes', 'formats', 'pandas', + 'indexes', 'formats', 'errors', 'pandas', 'test', 'tools', 'tseries', 'sparse', 'types', 'util', 'options', 'io'] @@ -129,80 +126,6 @@ def test_api(self): self.check(api, self.allowed) -class TestTypes(Base, tm.TestCase): - - allowed = ['is_any_int_dtype', 'is_bool', 'is_bool_dtype', - 'is_categorical', 'is_categorical_dtype', 'is_complex', - 'is_complex_dtype', 'is_datetime64_any_dtype', - 'is_datetime64_dtype', 'is_datetime64_ns_dtype', - 'is_datetime64tz_dtype', 'is_datetimetz', 'is_dtype_equal', - 'is_extension_type', 'is_float', 'is_float_dtype', - 'is_floating_dtype', 'is_int64_dtype', 'is_integer', - 'is_integer_dtype', 'is_number', 'is_numeric_dtype', - 'is_object_dtype', 'is_scalar', 'is_sparse', - 'is_string_dtype', 'is_signed_integer_dtype', - 'is_timedelta64_dtype', 'is_timedelta64_ns_dtype', - 'is_unsigned_integer_dtype', 'is_period', - 'is_period_dtype', 'is_re', 'is_re_compilable', - 'is_dict_like', 'is_iterator', - 'is_list_like', 'is_hashable', - 'is_named_tuple', 'is_sequence', - 'pandas_dtype'] - - def test_types(self): - - self.check(types, self.allowed) - - def check_deprecation(self, fold, fnew): - with tm.assert_produces_warning(DeprecationWarning): - try: - result = fold('foo') - expected = fnew('foo') - self.assertEqual(result, expected) - except TypeError: - self.assertRaises(TypeError, - lambda: fnew('foo')) - except AttributeError: - self.assertRaises(AttributeError, - lambda: fnew('foo')) - - def test_deprecation_core_common(self): - - # test that we are in fact deprecating - # the pandas.core.common introspectors - for t in self.allowed: - self.check_deprecation(getattr(com, t), getattr(types, t)) - - def test_deprecation_core_common_array_equivalent(self): - - with tm.assert_produces_warning(DeprecationWarning): - com.array_equivalent(np.array([1, 2]), np.array([1, 2])) - - def test_deprecation_core_common_moved(self): - - # these are in pandas.types.common - l = ['is_datetime_arraylike', - 'is_datetime_or_timedelta_dtype', - 'is_datetimelike', - 'is_datetimelike_v_numeric', - 'is_datetimelike_v_object', - 'is_datetimetz', - 'is_int_or_datetime_dtype', - 'is_period_arraylike', - 'is_string_like', - 'is_string_like_dtype'] - - from pandas.types import common as c - for t in l: - self.check_deprecation(getattr(com, t), getattr(c, t)) - - def test_removed_from_core_common(self): - - for t in ['is_null_datelike_scalar', - 'ensure_float']: - self.assertRaises(AttributeError, lambda: getattr(com, t)) - - class TestDatetoolsDeprecation(tm.TestCase): def test_deprecation_access_func(self): @@ -264,11 +187,11 @@ class TestLib(tm.TestCase): def test_deprecation_access_func(self): with catch_warnings(record=True): - pd.lib.infer_dtype + pd.lib.infer_dtype('foo') class TestTSLib(tm.TestCase): def test_deprecation_access_func(self): with catch_warnings(record=True): - pd.tslib.Timestamp + pd.tslib.Timestamp('20160101') diff --git a/pandas/tests/api/test_lib.py b/pandas/tests/api/test_lib.py new file mode 100644 index 0000000000000..db2c68c6197d7 --- /dev/null +++ b/pandas/tests/api/test_lib.py @@ -0,0 +1,10 @@ +# -*- coding: utf-8 -*- + +from warnings import catch_warnings +import pandas # noqa + + +def test_moved_infer_dtype(): + with catch_warnings(record=True): + e = pandas.lib.infer_dtype('foo') + assert e is not None diff --git a/pandas/tests/api/test_types.py b/pandas/tests/api/test_types.py new file mode 100644 index 0000000000000..686de4a196034 --- /dev/null +++ b/pandas/tests/api/test_types.py @@ -0,0 +1,83 @@ +# -*- coding: utf-8 -*- + +import numpy as np + +from pandas.core import common as com +from pandas.api import types +from pandas.util import testing as tm + +from .test_api import Base + + +class TestTypes(Base, tm.TestCase): + + allowed = ['is_any_int_dtype', 'is_bool', 'is_bool_dtype', + 'is_categorical', 'is_categorical_dtype', 'is_complex', + 'is_complex_dtype', 'is_datetime64_any_dtype', + 'is_datetime64_dtype', 'is_datetime64_ns_dtype', + 'is_datetime64tz_dtype', 'is_datetimetz', 'is_dtype_equal', + 'is_extension_type', 'is_float', 'is_float_dtype', + 'is_floating_dtype', 'is_int64_dtype', 'is_integer', + 'is_integer_dtype', 'is_number', 'is_numeric_dtype', + 'is_object_dtype', 'is_scalar', 'is_sparse', + 'is_string_dtype', 'is_signed_integer_dtype', + 'is_timedelta64_dtype', 'is_timedelta64_ns_dtype', + 'is_unsigned_integer_dtype', 'is_period', + 'is_period_dtype', 'is_re', 'is_re_compilable', + 'is_dict_like', 'is_iterator', + 'is_list_like', 'is_hashable', + 'is_named_tuple', 'is_sequence', + 'pandas_dtype'] + + def test_types(self): + + self.check(types, self.allowed) + + def check_deprecation(self, fold, fnew): + with tm.assert_produces_warning(DeprecationWarning): + try: + result = fold('foo') + expected = fnew('foo') + self.assertEqual(result, expected) + except TypeError: + self.assertRaises(TypeError, + lambda: fnew('foo')) + except AttributeError: + self.assertRaises(AttributeError, + lambda: fnew('foo')) + + def test_deprecation_core_common(self): + + # test that we are in fact deprecating + # the pandas.core.common introspectors + for t in self.allowed: + self.check_deprecation(getattr(com, t), getattr(types, t)) + + def test_deprecation_core_common_array_equivalent(self): + + with tm.assert_produces_warning(DeprecationWarning): + com.array_equivalent(np.array([1, 2]), np.array([1, 2])) + + def test_deprecation_core_common_moved(self): + + # these are in pandas.types.common + l = ['is_datetime_arraylike', + 'is_datetime_or_timedelta_dtype', + 'is_datetimelike', + 'is_datetimelike_v_numeric', + 'is_datetimelike_v_object', + 'is_datetimetz', + 'is_int_or_datetime_dtype', + 'is_period_arraylike', + 'is_string_like', + 'is_string_like_dtype'] + + from pandas.types import common as c + for t in l: + self.check_deprecation(getattr(com, t), getattr(c, t)) + + def test_removed_from_core_common(self): + + for t in ['is_null_datelike_scalar', + 'ensure_float']: + self.assertRaises(AttributeError, lambda: getattr(com, t)) diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index 81e9b7c77a81b..97ed88b1dc22b 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -10,6 +10,7 @@ from pandas.types.common import is_list_like, is_scalar import pandas as pd from pandas.core import common as com +from pandas.errors import PerformanceWarning from pandas import DataFrame, Series, Panel, date_range from pandas.util.testing import makeCustomDataframe as mkdf @@ -1023,7 +1024,7 @@ def test_performance_warning_for_poor_alignment(self, engine, parser): df = DataFrame(randn(1000, 10)) s = Series(randn(10000)) if engine == 'numexpr': - seen = pd.core.common.PerformanceWarning + seen = PerformanceWarning else: seen = False @@ -1045,7 +1046,7 @@ def test_performance_warning_for_poor_alignment(self, engine, parser): is_python_engine = engine == 'python' if not is_python_engine: - wrn = pd.core.common.PerformanceWarning + wrn = PerformanceWarning else: wrn = False diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index 839ceb5368240..7ed2bfb601eb8 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -16,7 +16,7 @@ assert_frame_equal, assertRaisesRegexp) -from pandas.core.common import PerformanceWarning +from pandas.errors import PerformanceWarning import pandas.util.testing as tm from pandas.tests.frame.common import TestData diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index ba7e45d7e66fb..1ab292649a973 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -20,9 +20,7 @@ from pandas import (DataFrame, Index, Series, isnull, MultiIndex, Timedelta, Timestamp, date_range) -from pandas.core.common import PandasError import pandas as pd -import pandas.core.common as com import pandas._libs.lib as lib import pandas.util.testing as tm @@ -774,7 +772,7 @@ def test_constructor_more(self): # corner, silly # TODO: Fix this Exception to be better... - with tm.assertRaisesRegexp(PandasError, 'constructor not ' + with tm.assertRaisesRegexp(ValueError, 'constructor not ' 'properly called'): DataFrame((1, 2, 3)) @@ -1242,8 +1240,8 @@ def test_constructor_single_value(self): dtype=object), index=[1, 2], columns=['a', 'c'])) - self.assertRaises(com.PandasError, DataFrame, 'a', [1, 2]) - self.assertRaises(com.PandasError, DataFrame, 'a', columns=['a', 'c']) + self.assertRaises(ValueError, DataFrame, 'a', [1, 2]) + self.assertRaises(ValueError, DataFrame, 'a', columns=['a', 'c']) with tm.assertRaisesRegexp(TypeError, 'incompatible data and dtype'): DataFrame('a', [1, 2], ['a', 'c'], float) diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index e49dfffc48803..927b9f6a48718 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -8,7 +8,7 @@ import numpy as np from pandas.compat import (lmap, range, lrange, StringIO, u) -from pandas.io.common import ParserError +from pandas.errors import ParserError from pandas import (DataFrame, Index, Series, MultiIndex, Timestamp, date_range, read_csv, compat, to_datetime) import pandas as pd diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 83502434e6053..c17c98c5448be 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -8,7 +8,7 @@ from pandas import (date_range, bdate_range, Timestamp, isnull, Index, MultiIndex, DataFrame, Series, concat, Panel) -from pandas.core.common import UnsupportedFunctionCall +from pandas.errors import UnsupportedFunctionCall, PerformanceWarning from pandas.util.testing import (assert_panel_equal, assert_frame_equal, assert_series_equal, assert_almost_equal, assert_index_equal, assertRaisesRegexp) @@ -3475,7 +3475,7 @@ def test_groupby_multiindex_not_lexsorted(self): tm.assert_frame_equal(lexsorted_df, not_lexsorted_df) expected = lexsorted_df.groupby('a').mean() - with tm.assert_produces_warning(com.PerformanceWarning): + with tm.assert_produces_warning(PerformanceWarning): result = not_lexsorted_df.groupby('a').mean() tm.assert_frame_equal(expected, result) diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index 4681879d708c4..4be9999982f12 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -7,7 +7,7 @@ import pandas as pd import pandas._libs.tslib as tslib import pandas.util.testing as tm -from pandas.core.common import PerformanceWarning +from pandas.errors import PerformanceWarning from pandas.tseries.index import cdate_range from pandas import (DatetimeIndex, PeriodIndex, Series, Timestamp, Timedelta, date_range, TimedeltaIndex, _np_version_under1p10, Index, diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 0c274b2f6c4ff..470526043234f 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -15,7 +15,7 @@ from pandas import (CategoricalIndex, DataFrame, Index, MultiIndex, compat, date_range, period_range) from pandas.compat import PY3, long, lrange, lzip, range, u -from pandas.core.common import PerformanceWarning, UnsortedIndexError +from pandas.errors import PerformanceWarning, UnsortedIndexError from pandas.indexes.base import InvalidIndexError from pandas._libs import lib from pandas._libs.lib import Timestamp diff --git a/pandas/tests/indexing/test_ix.py b/pandas/tests/indexing/test_ix.py index e68e8015a2f39..b12d1eb97f88b 100644 --- a/pandas/tests/indexing/test_ix.py +++ b/pandas/tests/indexing/test_ix.py @@ -9,7 +9,7 @@ from pandas.compat import lrange from pandas import Series, DataFrame, option_context, MultiIndex from pandas.util import testing as tm -from pandas.core.common import PerformanceWarning +from pandas.errors import PerformanceWarning class TestIX(tm.TestCase): diff --git a/pandas/tests/indexing/test_multiindex.py b/pandas/tests/indexing/test_multiindex.py index ed943202872a7..1fc0a87764b94 100644 --- a/pandas/tests/indexing/test_multiindex.py +++ b/pandas/tests/indexing/test_multiindex.py @@ -5,7 +5,7 @@ from pandas import (Panel, Series, MultiIndex, DataFrame, Timestamp, Index, date_range) from pandas.util import testing as tm -from pandas.core.common import PerformanceWarning, UnsortedIndexError +from pandas.errors import PerformanceWarning, UnsortedIndexError from pandas.tests.indexing.common import _mklbl diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py index 2c8bca490f274..7faf485b65d10 100644 --- a/pandas/tests/io/parser/common.py +++ b/pandas/tests/io/parser/common.py @@ -19,7 +19,8 @@ from pandas import compat from pandas.compat import (StringIO, BytesIO, PY3, range, lrange, u) -from pandas.io.common import DtypeWarning, EmptyDataError, URLError +from pandas.errors import DtypeWarning, EmptyDataError +from pandas.io.common import URLError from pandas.io.parsers import TextFileReader, TextParser diff --git a/pandas/tests/io/parser/dialect.py b/pandas/tests/io/parser/dialect.py index ee50cf812f72e..82871628e54d6 100644 --- a/pandas/tests/io/parser/dialect.py +++ b/pandas/tests/io/parser/dialect.py @@ -9,7 +9,7 @@ from pandas import DataFrame from pandas.compat import StringIO -from pandas.io.common import ParserWarning +from pandas.errors import ParserWarning import pandas.util.testing as tm diff --git a/pandas/tests/io/parser/dtypes.py b/pandas/tests/io/parser/dtypes.py index fa95c18c4d7a9..8066718363803 100644 --- a/pandas/tests/io/parser/dtypes.py +++ b/pandas/tests/io/parser/dtypes.py @@ -12,7 +12,7 @@ from pandas import DataFrame, Series, Index, MultiIndex, Categorical from pandas.compat import StringIO from pandas.types.dtypes import CategoricalDtype -from pandas.io.common import ParserWarning +from pandas.errors import ParserWarning class DtypeTests(object): diff --git a/pandas/tests/io/parser/skiprows.py b/pandas/tests/io/parser/skiprows.py index c53e6a1579267..cb1b656e42be2 100644 --- a/pandas/tests/io/parser/skiprows.py +++ b/pandas/tests/io/parser/skiprows.py @@ -12,7 +12,7 @@ import pandas.util.testing as tm from pandas import DataFrame -from pandas.io.common import EmptyDataError +from pandas.errors import EmptyDataError from pandas.compat import StringIO, range, lrange diff --git a/pandas/tests/io/parser/test_unsupported.py b/pandas/tests/io/parser/test_unsupported.py index 48dd5d4ba506b..14146a3ad1e9a 100644 --- a/pandas/tests/io/parser/test_unsupported.py +++ b/pandas/tests/io/parser/test_unsupported.py @@ -13,7 +13,7 @@ import pandas.util.testing as tm from pandas.compat import StringIO -from pandas.io.common import ParserError +from pandas.errors import ParserError from pandas.io.parsers import read_csv, read_table diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index 3c980cae3351a..c08d235b07c9e 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -11,7 +11,6 @@ from pandas.compat import is_platform_windows, StringIO from pandas import read_csv, concat -import pandas as pd try: from pathlib import Path @@ -89,23 +88,6 @@ def test_iterator(self): tm.assert_frame_equal(first, expected.iloc[[0]]) tm.assert_frame_equal(concat(it), expected.iloc[1:]) - def test_error_rename(self): - # see gh-12665 - try: - raise common.CParserError() - except common.ParserError: - pass - - try: - raise common.ParserError() - except common.CParserError: - pass - - try: - raise common.ParserError() - except pd.parser.CParserError: - pass - class TestMMapWrapper(tm.TestCase): diff --git a/pandas/tests/io/test_packers.py b/pandas/tests/io/test_packers.py index efa8587d64657..1b6b0fc62f913 100644 --- a/pandas/tests/io/test_packers.py +++ b/pandas/tests/io/test_packers.py @@ -10,7 +10,7 @@ from pandas.compat import u, PY3 from pandas import (Series, DataFrame, Panel, MultiIndex, bdate_range, date_range, period_range, Index, Categorical) -from pandas.core.common import PerformanceWarning +from pandas.errors import PerformanceWarning from pandas.io.packers import to_msgpack, read_msgpack import pandas.util.testing as tm from pandas.util.testing import (ensure_clean, diff --git a/pandas/tests/test_errors.py b/pandas/tests/test_errors.py new file mode 100644 index 0000000000000..aabce7ecb7066 --- /dev/null +++ b/pandas/tests/test_errors.py @@ -0,0 +1,50 @@ +# -*- coding: utf-8 -*- + +import pytest +import pandas # noqa +import pandas as pd + + +@pytest.mark.parametrize( + "exc", ['UnsupportedFunctionCall', 'UnsortedIndexError', + 'OutOfBoundsDatetime', + 'ParserError', 'PerformanceWarning', 'DtypeWarning', + 'EmptyDataError', 'ParserWarning']) +def test_exception_importable(exc): + from pandas import errors + e = getattr(errors, exc) + assert e is not None + + # check that we can raise on them + with pytest.raises(e): + raise e() + + +def test_catch_oob(): + from pandas import errors + + try: + pd.Timestamp('15000101') + except errors.OutOfBoundsDatetime: + pass + + +def test_error_rename(): + # see gh-12665 + from pandas.errors import ParserError + from pandas.io.common import CParserError + + try: + raise CParserError() + except ParserError: + pass + + try: + raise ParserError() + except CParserError: + pass + + try: + raise ParserError() + except pd.parser.CParserError: + pass diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index fe03d7886e661..ceb12c6c03074 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -16,7 +16,7 @@ import pandas.core.window as rwindow import pandas.tseries.offsets as offsets from pandas.core.base import SpecificationError -from pandas.core.common import UnsupportedFunctionCall +from pandas.errors import UnsupportedFunctionCall import pandas.util.testing as tm from pandas.compat import range, zip, PY3 diff --git a/pandas/tests/tseries/test_resample.py b/pandas/tests/tseries/test_resample.py index 57a655b0b7610..57e5a1631f8e8 100755 --- a/pandas/tests/tseries/test_resample.py +++ b/pandas/tests/tseries/test_resample.py @@ -14,7 +14,7 @@ from pandas.types.generic import ABCSeries, ABCDataFrame from pandas.compat import range, lrange, zip, product, OrderedDict from pandas.core.base import SpecificationError -from pandas.core.common import UnsupportedFunctionCall +from pandas.errors import UnsupportedFunctionCall from pandas.core.groupby import DataError from pandas.tseries.frequencies import MONTHS, DAYS from pandas.tseries.frequencies import to_offset diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 9123131a6dccf..8fa842a836051 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -25,8 +25,8 @@ from pandas.types.missing import isnull import pandas.types.concat as _concat -from pandas.core.common import (_values_from_object, _maybe_box, - PerformanceWarning) +from pandas.errors import PerformanceWarning +from pandas.core.common import _values_from_object, _maybe_box from pandas.core.index import Index, Int64Index, Float64Index from pandas.indexes.base import _index_shared_docs @@ -618,8 +618,7 @@ def _has_same_tz(self, other): def _cached_range(cls, start=None, end=None, periods=None, offset=None, name=None): if start is None and end is None: - # I somewhat believe this should never be raised externally and - # therefore should be a `PandasError` but whatever... + # I somewhat believe this should never be raised externally raise TypeError('Must specify either start or end.') if start is not None: start = Timestamp(start) @@ -630,8 +629,7 @@ def _cached_range(cls, start=None, end=None, periods=None, offset=None, 'Must either specify period or provide both start and end.') if offset is None: - # This can't happen with external-facing code, therefore - # PandasError + # This can't happen with external-facing code raise TypeError('Must provide offset.') drc = _daterange_cache diff --git a/pandas/tslib.py b/pandas/tslib.py index 3ecbffa20700d..3d96dc496c0de 100644 --- a/pandas/tslib.py +++ b/pandas/tslib.py @@ -3,6 +3,6 @@ import warnings warnings.warn("The pandas.tslib module is deprecated and will be " "removed in a future version. Please import from " - "the pandas._libs.tslib instead", FutureWarning, stacklevel=2) + "the pandas or pandas.errors instead", FutureWarning, stacklevel=2) from pandas._libs.tslib import (Timestamp, Timedelta, NaT, OutOfBoundsDatetime) diff --git a/pandas/util/depr_module.py b/pandas/util/depr_module.py index af7faf9dd96c8..0885c81ce2757 100644 --- a/pandas/util/depr_module.py +++ b/pandas/util/depr_module.py @@ -18,14 +18,19 @@ class _DeprecatedModule(object): be used when needed. removals : objects or methods in module that will no longer be accessible once module is removed. + moved : dict, optional + dictionary of function name -> new location for moved + objects """ - def __init__(self, deprmod, deprmodto=None, removals=None): + def __init__(self, deprmod, deprmodto=None, removals=None, + moved=None): self.deprmod = deprmod self.deprmodto = deprmodto self.removals = removals if self.removals is not None: self.removals = frozenset(self.removals) + self.moved = moved # For introspection purposes. self.self_dir = frozenset(dir(self.__class__)) @@ -60,6 +65,14 @@ def __getattr__(self, name): "{deprmod}.{name} is deprecated and will be removed in " "a future version.".format(deprmod=self.deprmod, name=name), FutureWarning, stacklevel=2) + elif self.moved is not None and name in self.moved: + warnings.warn( + "{deprmod} is deprecated and will be removed in " + "a future version.\nYou can access {name} in {moved}".format( + deprmod=self.deprmod, + name=name, + moved=self.moved[name]), + FutureWarning, stacklevel=2) else: deprmodto = self.deprmodto if deprmodto is None: diff --git a/setup.py b/setup.py index 1b471f76ac5e6..96b25f7427370 100755 --- a/setup.py +++ b/setup.py @@ -631,11 +631,13 @@ def pxd(name): packages=['pandas', 'pandas.api', 'pandas.api.types', + 'pandas.api.lib', 'pandas.compat', 'pandas.compat.numpy', 'pandas.computation', 'pandas.core', 'pandas.indexes', + 'pandas.errors', 'pandas.io', 'pandas.io.json', 'pandas.io.sas', From faf6401dd41c1469be50e360cba071555205e219 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 4 Apr 2017 14:29:38 +0200 Subject: [PATCH 335/353] DOC fixes in contributing.rst (#15887) --- doc/source/contributing.rst | 14 +++++++------- pandas/core/series.py | 2 +- pandas/io/json/normalize.py | 8 ++++---- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index 467d6456d60cd..8af7de688a2ae 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -536,10 +536,10 @@ signatures and add deprecation warnings where needed. .. _contributing.ci: -Testing Thru Continuous Integration +Testing With Continuous Integration ----------------------------------- -The *pandas* testing suite will run automatically on `Travis-CI `__, +The *pandas* test suite will run automatically on `Travis-CI `__, `Appveyor `__, and `Circle CI `__ continuous integration services, once your pull request is submitted. However, if you wish to run the test suite on a branch prior to submitting the pull request, @@ -548,14 +548,14 @@ for `Travis-CI `__, `Appveyor `__ , and `CircleCI `__. A pull-request will be considered for merging when you have an all 'green' build. If any tests are failing, -then you will get a red 'X', where you can click thru to see the individual failed tests. +then you will get a red 'X', where you can click through to see the individual failed tests. This is an example of a green build. .. image:: _static/ci.png .. note:: - Each time you push to *your* fork, a *new* run of the tests will trigger on the CI. Appveyor will auto-cancel + Each time you push to *your* fork, a *new* run of the tests will be triggered on the CI. Appveyor will auto-cancel any non-currently-running tests for that same pull-request. You can enable the auto-cancel feature for `Travis-CI here `__ and for `CircleCI here `__. @@ -623,12 +623,12 @@ testing of many cases in a concise way that enables an easy-to-read syntax. .. note:: - .. code-block:: python + *pandas* existing test structure is *mostly* classed based, meaning that you will typically find tests wrapped in a class, inheriting from ``tm.TestCase``. - *pandas* existing test structure is *mostly* classed based, meaning that you will typically find tests wrapped in a class, inheriting from ``tm.TestCase``. + .. code-block:: python class TestReallyCoolFeature(tm.TestCase): - .... + .... Going forward we are moving to a more *functional* style, please see below. diff --git a/pandas/core/series.py b/pandas/core/series.py index bcd58ea791083..1aaa106d2c68f 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -80,7 +80,7 @@ If True, performs operation inplace and returns None.""", unique='np.ndarray', duplicated='Series', optional_by='', - versionadded_to_excel='\n.. versionadded:: 0.20.0\n') + versionadded_to_excel='\n .. versionadded:: 0.20.0\n') def _coerce_method(converter): diff --git a/pandas/io/json/normalize.py b/pandas/io/json/normalize.py index 518e0bc2064e2..401d8d9ead2b8 100644 --- a/pandas/io/json/normalize.py +++ b/pandas/io/json/normalize.py @@ -114,10 +114,10 @@ def json_normalize(data, record_path=None, meta=None, meta_prefix : string, default None errors : {'raise', 'ignore'}, default 'raise' - * ignore : will ignore KeyError if keys listed in meta are not - always present - * raise : will raise KeyError if keys listed in meta are not - always present + * 'ignore' : will ignore KeyError if keys listed in meta are not + always present + * 'raise' : will raise KeyError if keys listed in meta are not + always present .. versionadded:: 0.20.0 From e50d397efe31404802c55637793ea97850ac4e84 Mon Sep 17 00:00:00 2001 From: Aleksey Bilogur Date: Tue, 4 Apr 2017 13:34:11 -0400 Subject: [PATCH 336/353] API: add top-level melt function as method to DataFrame xref #12640 xref #14876 Author: Aleksey Bilogur Closes #15521 from ResidentMario/12640 and squashes the following commits: 1657246 [Aleksey Bilogur] two doc changes 28a38f2 [Aleksey Bilogur] tweak whatsnew entry. 5f306a9 [Aleksey Bilogur] +whatsnew ff895fe [Aleksey Bilogur] Add tests, update docs. 11f3fe4 [Aleksey Bilogur] rm stray debug. 3cbbed5 [Aleksey Bilogur] Melt docstring. d54dc2f [Aleksey Bilogur] +pd.DataFrame.melt. --- doc/source/api.rst | 1 + doc/source/reshaping.rst | 11 ++-- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/frame.py | 104 ++++++++++++++++++++++++++++++++ pandas/core/reshape.py | 96 ++--------------------------- pandas/tests/test_reshape.py | 102 +++++++++++++++++++------------ 6 files changed, 182 insertions(+), 133 deletions(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index dfeaf8e60feb1..24bad7d515305 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -933,6 +933,7 @@ Reshaping, sorting, transposing DataFrame.swaplevel DataFrame.stack DataFrame.unstack + DataFrame.melt DataFrame.T DataFrame.to_panel DataFrame.to_xarray diff --git a/doc/source/reshaping.rst b/doc/source/reshaping.rst index eccaa9474bf6d..2c5aae133d4d9 100644 --- a/doc/source/reshaping.rst +++ b/doc/source/reshaping.rst @@ -265,8 +265,8 @@ the right thing: Reshaping by Melt ----------------- -The :func:`~pandas.melt` function is useful to massage a -DataFrame into a format where one or more columns are identifier variables, +The top-level :func:``melt` and :func:`~DataFrame.melt` functions are useful to +massage a DataFrame into a format where one or more columns are identifier variables, while all other columns, considered measured variables, are "unpivoted" to the row axis, leaving just two non-identifier columns, "variable" and "value". The names of those columns can be customized by supplying the ``var_name`` and @@ -281,10 +281,11 @@ For instance, 'height' : [5.5, 6.0], 'weight' : [130, 150]}) cheese - pd.melt(cheese, id_vars=['first', 'last']) - pd.melt(cheese, id_vars=['first', 'last'], var_name='quantity') + cheese.melt(id_vars=['first', 'last']) + cheese.melt(id_vars=['first', 'last'], var_name='quantity') -Another way to transform is to use the ``wide_to_long`` panel data convenience function. +Another way to transform is to use the ``wide_to_long`` panel data convenience +function. .. ipython:: python diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 74fe7916523c5..355dceba1b953 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -324,6 +324,7 @@ Other Enhancements - ``Series.sort_index`` accepts parameters ``kind`` and ``na_position`` (:issue:`13589`, :issue:`14444`) - ``DataFrame`` has gained a ``nunique()`` method to count the distinct values over an axis (:issue:`14336`). +- ``DataFrame`` has gained a ``melt()`` method, equivalent to ``pd.melt()``, for unpivoting from a wide to long format (:issue:`12640`). - ``DataFrame.groupby()`` has gained a ``.nunique()`` method to count the distinct values for all columns within each group (:issue:`14336`, :issue:`15197`). - ``pd.read_excel()`` now preserves sheet order when using ``sheetname=None`` (:issue:`9930`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 237af0f85e866..3980bf6cdbc09 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4051,6 +4051,110 @@ def unstack(self, level=-1, fill_value=None): from pandas.core.reshape import unstack return unstack(self, level, fill_value) + _shared_docs['melt'] = (""" + "Unpivots" a DataFrame from wide format to long format, optionally + leaving identifier variables set. + + This function is useful to massage a DataFrame into a format where one + or more columns are identifier variables (`id_vars`), while all other + columns, considered measured variables (`value_vars`), are "unpivoted" to + the row axis, leaving just two non-identifier columns, 'variable' and + 'value'. + + %(versionadded)s + Parameters + ---------- + frame : DataFrame + id_vars : tuple, list, or ndarray, optional + Column(s) to use as identifier variables. + value_vars : tuple, list, or ndarray, optional + Column(s) to unpivot. If not specified, uses all columns that + are not set as `id_vars`. + var_name : scalar + Name to use for the 'variable' column. If None it uses + ``frame.columns.name`` or 'variable'. + value_name : scalar, default 'value' + Name to use for the 'value' column. + col_level : int or string, optional + If columns are a MultiIndex then use this level to melt. + + See also + -------- + %(other)s + pivot_table + DataFrame.pivot + + Examples + -------- + >>> import pandas as pd + >>> df = pd.DataFrame({'A': {0: 'a', 1: 'b', 2: 'c'}, + ... 'B': {0: 1, 1: 3, 2: 5}, + ... 'C': {0: 2, 1: 4, 2: 6}}) + >>> df + A B C + 0 a 1 2 + 1 b 3 4 + 2 c 5 6 + + >>> %(caller)sid_vars=['A'], value_vars=['B']) + A variable value + 0 a B 1 + 1 b B 3 + 2 c B 5 + + >>> %(caller)sid_vars=['A'], value_vars=['B', 'C']) + A variable value + 0 a B 1 + 1 b B 3 + 2 c B 5 + 3 a C 2 + 4 b C 4 + 5 c C 6 + + The names of 'variable' and 'value' columns can be customized: + + >>> %(caller)sid_vars=['A'], value_vars=['B'], + ... var_name='myVarname', value_name='myValname') + A myVarname myValname + 0 a B 1 + 1 b B 3 + 2 c B 5 + + If you have multi-index columns: + + >>> df.columns = [list('ABC'), list('DEF')] + >>> df + A B C + D E F + 0 a 1 2 + 1 b 3 4 + 2 c 5 6 + + >>> %(caller)scol_level=0, id_vars=['A'], value_vars=['B']) + A variable value + 0 a B 1 + 1 b B 3 + 2 c B 5 + + >>> %(caller)sid_vars=[('A', 'D')], value_vars=[('B', 'E')]) + (A, D) variable_0 variable_1 value + 0 a B E 1 + 1 b B E 3 + 2 c B E 5 + + """) + + @Appender(_shared_docs['melt'] % + dict(caller='df.melt(', + versionadded='.. versionadded:: 0.20.0\n', + other='melt')) + def melt(self, id_vars=None, value_vars=None, var_name=None, + value_name='value', col_level=None): + from pandas.core.reshape import melt + return melt(self, id_vars=id_vars, value_vars=value_vars, + var_name=var_name, value_name=value_name, + col_level=col_level) + # ---------------------------------------------------------------------- # Time series-related diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index 2822d98b7c906..c7e06d63fbda9 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -28,6 +28,8 @@ import pandas.core.algorithms as algos from pandas._libs import algos as _algos, reshape as _reshape +from pandas.core.frame import _shared_docs +from pandas.util.decorators import Appender from pandas.core.index import MultiIndex, _get_na_value @@ -701,98 +703,12 @@ def _convert_level_number(level_num, columns): return result +@Appender(_shared_docs['melt'] % + dict(caller='pd.melt(df, ', + versionadded="", + other='DataFrame.melt')) def melt(frame, id_vars=None, value_vars=None, var_name=None, value_name='value', col_level=None): - """ - "Unpivots" a DataFrame from wide format to long format, optionally leaving - identifier variables set. - - This function is useful to massage a DataFrame into a format where one - or more columns are identifier variables (`id_vars`), while all other - columns, considered measured variables (`value_vars`), are "unpivoted" to - the row axis, leaving just two non-identifier columns, 'variable' and - 'value'. - - Parameters - ---------- - frame : DataFrame - id_vars : tuple, list, or ndarray, optional - Column(s) to use as identifier variables. - value_vars : tuple, list, or ndarray, optional - Column(s) to unpivot. If not specified, uses all columns that - are not set as `id_vars`. - var_name : scalar - Name to use for the 'variable' column. If None it uses - ``frame.columns.name`` or 'variable'. - value_name : scalar, default 'value' - Name to use for the 'value' column. - col_level : int or string, optional - If columns are a MultiIndex then use this level to melt. - - See also - -------- - pivot_table - DataFrame.pivot - - Examples - -------- - >>> import pandas as pd - >>> df = pd.DataFrame({'A': {0: 'a', 1: 'b', 2: 'c'}, - ... 'B': {0: 1, 1: 3, 2: 5}, - ... 'C': {0: 2, 1: 4, 2: 6}}) - >>> df - A B C - 0 a 1 2 - 1 b 3 4 - 2 c 5 6 - - >>> pd.melt(df, id_vars=['A'], value_vars=['B']) - A variable value - 0 a B 1 - 1 b B 3 - 2 c B 5 - - >>> pd.melt(df, id_vars=['A'], value_vars=['B', 'C']) - A variable value - 0 a B 1 - 1 b B 3 - 2 c B 5 - 3 a C 2 - 4 b C 4 - 5 c C 6 - - The names of 'variable' and 'value' columns can be customized: - - >>> pd.melt(df, id_vars=['A'], value_vars=['B'], - ... var_name='myVarname', value_name='myValname') - A myVarname myValname - 0 a B 1 - 1 b B 3 - 2 c B 5 - - If you have multi-index columns: - - >>> df.columns = [list('ABC'), list('DEF')] - >>> df - A B C - D E F - 0 a 1 2 - 1 b 3 4 - 2 c 5 6 - - >>> pd.melt(df, col_level=0, id_vars=['A'], value_vars=['B']) - A variable value - 0 a B 1 - 1 b B 3 - 2 c B 5 - - >>> pd.melt(df, id_vars=[('A', 'D')], value_vars=[('B', 'E')]) - (A, D) variable_0 variable_1 value - 0 a B E 1 - 1 b B E 3 - 2 c B E 5 - - """ # TODO: what about the existing index? if id_vars is not None: if not is_list_like(id_vars): diff --git a/pandas/tests/test_reshape.py b/pandas/tests/test_reshape.py index 7ba743a6c425c..ee255c1863b41 100644 --- a/pandas/tests/test_reshape.py +++ b/pandas/tests/test_reshape.py @@ -30,23 +30,46 @@ def setUp(self): self.df1.columns = [list('ABC'), list('abc')] self.df1.columns.names = ['CAP', 'low'] - def test_default_col_names(self): + def test_top_level_method(self): result = melt(self.df) self.assertEqual(result.columns.tolist(), ['variable', 'value']) - result1 = melt(self.df, id_vars=['id1']) + def test_method_signatures(self): + tm.assert_frame_equal(self.df.melt(), + melt(self.df)) + + tm.assert_frame_equal(self.df.melt(id_vars=['id1', 'id2'], + value_vars=['A', 'B']), + melt(self.df, + id_vars=['id1', 'id2'], + value_vars=['A', 'B'])) + + tm.assert_frame_equal(self.df.melt(var_name=self.var_name, + value_name=self.value_name), + melt(self.df, + var_name=self.var_name, + value_name=self.value_name)) + + tm.assert_frame_equal(self.df1.melt(col_level=0), + melt(self.df1, col_level=0)) + + def test_default_col_names(self): + result = self.df.melt() + self.assertEqual(result.columns.tolist(), ['variable', 'value']) + + result1 = self.df.melt(id_vars=['id1']) self.assertEqual(result1.columns.tolist(), ['id1', 'variable', 'value' ]) - result2 = melt(self.df, id_vars=['id1', 'id2']) + result2 = self.df.melt(id_vars=['id1', 'id2']) self.assertEqual(result2.columns.tolist(), ['id1', 'id2', 'variable', 'value']) def test_value_vars(self): - result3 = melt(self.df, id_vars=['id1', 'id2'], value_vars='A') + result3 = self.df.melt(id_vars=['id1', 'id2'], value_vars='A') self.assertEqual(len(result3), 10) - result4 = melt(self.df, id_vars=['id1', 'id2'], value_vars=['A', 'B']) + result4 = self.df.melt(id_vars=['id1', 'id2'], value_vars=['A', 'B']) expected4 = DataFrame({'id1': self.df['id1'].tolist() * 2, 'id2': self.df['id2'].tolist() * 2, 'variable': ['A'] * 10 + ['B'] * 10, @@ -65,8 +88,8 @@ def test_value_vars_types(self): columns=['id1', 'id2', 'variable', 'value']) for type_ in (tuple, list, np.array): - result = melt(self.df, id_vars=['id1', 'id2'], - value_vars=type_(('A', 'B'))) + result = self.df.melt(id_vars=['id1', 'id2'], + value_vars=type_(('A', 'B'))) tm.assert_frame_equal(result, expected) def test_vars_work_with_multiindex(self): @@ -77,7 +100,7 @@ def test_vars_work_with_multiindex(self): 'value': self.df1[('B', 'b')], }, columns=[('A', 'a'), 'CAP', 'low', 'value']) - result = melt(self.df1, id_vars=[('A', 'a')], value_vars=[('B', 'b')]) + result = self.df1.melt(id_vars=[('A', 'a')], value_vars=[('B', 'b')]) tm.assert_frame_equal(result, expected) def test_tuple_vars_fail_with_multiindex(self): @@ -92,26 +115,26 @@ def test_tuple_vars_fail_with_multiindex(self): for id_vars, value_vars in ((tuple_a, list_b), (list_a, tuple_b), (tuple_a, tuple_b)): with tm.assertRaisesRegexp(ValueError, r'MultiIndex'): - melt(self.df1, id_vars=id_vars, value_vars=value_vars) + self.df1.melt(id_vars=id_vars, value_vars=value_vars) def test_custom_var_name(self): - result5 = melt(self.df, var_name=self.var_name) + result5 = self.df.melt(var_name=self.var_name) self.assertEqual(result5.columns.tolist(), ['var', 'value']) - result6 = melt(self.df, id_vars=['id1'], var_name=self.var_name) + result6 = self.df.melt(id_vars=['id1'], var_name=self.var_name) self.assertEqual(result6.columns.tolist(), ['id1', 'var', 'value']) - result7 = melt(self.df, id_vars=['id1', 'id2'], var_name=self.var_name) + result7 = self.df.melt(id_vars=['id1', 'id2'], var_name=self.var_name) self.assertEqual(result7.columns.tolist(), ['id1', 'id2', 'var', 'value']) - result8 = melt(self.df, id_vars=['id1', 'id2'], value_vars='A', - var_name=self.var_name) + result8 = self.df.melt(id_vars=['id1', 'id2'], value_vars='A', + var_name=self.var_name) self.assertEqual(result8.columns.tolist(), ['id1', 'id2', 'var', 'value']) - result9 = melt(self.df, id_vars=['id1', 'id2'], value_vars=['A', 'B'], - var_name=self.var_name) + result9 = self.df.melt(id_vars=['id1', 'id2'], value_vars=['A', 'B'], + var_name=self.var_name) expected9 = DataFrame({'id1': self.df['id1'].tolist() * 2, 'id2': self.df['id2'].tolist() * 2, self.var_name: ['A'] * 10 + ['B'] * 10, @@ -121,24 +144,24 @@ def test_custom_var_name(self): tm.assert_frame_equal(result9, expected9) def test_custom_value_name(self): - result10 = melt(self.df, value_name=self.value_name) + result10 = self.df.melt(value_name=self.value_name) self.assertEqual(result10.columns.tolist(), ['variable', 'val']) - result11 = melt(self.df, id_vars=['id1'], value_name=self.value_name) + result11 = self.df.melt(id_vars=['id1'], value_name=self.value_name) self.assertEqual(result11.columns.tolist(), ['id1', 'variable', 'val']) - result12 = melt(self.df, id_vars=['id1', 'id2'], - value_name=self.value_name) + result12 = self.df.melt(id_vars=['id1', 'id2'], + value_name=self.value_name) self.assertEqual(result12.columns.tolist(), ['id1', 'id2', 'variable', 'val']) - result13 = melt(self.df, id_vars=['id1', 'id2'], value_vars='A', - value_name=self.value_name) + result13 = self.df.melt(id_vars=['id1', 'id2'], value_vars='A', + value_name=self.value_name) self.assertEqual(result13.columns.tolist(), ['id1', 'id2', 'variable', 'val']) - result14 = melt(self.df, id_vars=['id1', 'id2'], value_vars=['A', 'B'], - value_name=self.value_name) + result14 = self.df.melt(id_vars=['id1', 'id2'], value_vars=['A', 'B'], + value_name=self.value_name) expected14 = DataFrame({'id1': self.df['id1'].tolist() * 2, 'id2': self.df['id2'].tolist() * 2, 'variable': ['A'] * 10 + ['B'] * 10, @@ -150,26 +173,29 @@ def test_custom_value_name(self): def test_custom_var_and_value_name(self): - result15 = melt(self.df, var_name=self.var_name, - value_name=self.value_name) + result15 = self.df.melt(var_name=self.var_name, + value_name=self.value_name) self.assertEqual(result15.columns.tolist(), ['var', 'val']) - result16 = melt(self.df, id_vars=['id1'], var_name=self.var_name, - value_name=self.value_name) + result16 = self.df.melt(id_vars=['id1'], var_name=self.var_name, + value_name=self.value_name) self.assertEqual(result16.columns.tolist(), ['id1', 'var', 'val']) - result17 = melt(self.df, id_vars=['id1', 'id2'], - var_name=self.var_name, value_name=self.value_name) + result17 = self.df.melt(id_vars=['id1', 'id2'], + var_name=self.var_name, + value_name=self.value_name) self.assertEqual(result17.columns.tolist(), ['id1', 'id2', 'var', 'val' ]) - result18 = melt(self.df, id_vars=['id1', 'id2'], value_vars='A', - var_name=self.var_name, value_name=self.value_name) + result18 = self.df.melt(id_vars=['id1', 'id2'], value_vars='A', + var_name=self.var_name, + value_name=self.value_name) self.assertEqual(result18.columns.tolist(), ['id1', 'id2', 'var', 'val' ]) - result19 = melt(self.df, id_vars=['id1', 'id2'], value_vars=['A', 'B'], - var_name=self.var_name, value_name=self.value_name) + result19 = self.df.melt(id_vars=['id1', 'id2'], value_vars=['A', 'B'], + var_name=self.var_name, + value_name=self.value_name) expected19 = DataFrame({'id1': self.df['id1'].tolist() * 2, 'id2': self.df['id2'].tolist() * 2, self.var_name: ['A'] * 10 + ['B'] * 10, @@ -181,17 +207,17 @@ def test_custom_var_and_value_name(self): df20 = self.df.copy() df20.columns.name = 'foo' - result20 = melt(df20) + result20 = df20.melt() self.assertEqual(result20.columns.tolist(), ['foo', 'value']) def test_col_level(self): - res1 = melt(self.df1, col_level=0) - res2 = melt(self.df1, col_level='CAP') + res1 = self.df1.melt(col_level=0) + res2 = self.df1.melt(col_level='CAP') self.assertEqual(res1.columns.tolist(), ['CAP', 'value']) self.assertEqual(res2.columns.tolist(), ['CAP', 'value']) def test_multiindex(self): - res = pd.melt(self.df1) + res = self.df1.melt() self.assertEqual(res.columns.tolist(), ['CAP', 'low', 'value']) From e0b60c07295a92eb760c38870c5f8c40e412f7dc Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 4 Apr 2017 18:02:28 -0400 Subject: [PATCH 337/353] BUG: Bug in DataFrame construction with nulls and datetimes in a list like closes #15869 Author: Jeff Reback Closes #15892 from jreback/construct and squashes the following commits: 6bf2148 [Jeff Reback] fix perf 7fcd4e5 [Jeff Reback] BUG: Bug in DataFrame construction with nulls and datetimes in a list-like --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/_libs/src/inference.pyx | 79 +++++++++++++++--- pandas/tests/frame/test_constructors.py | 9 ++ pandas/tests/frame/test_misc_api.py | 12 ++- pandas/tests/series/test_constructors.py | 8 ++ pandas/types/cast.py | 102 ++++++++++++----------- 6 files changed, 149 insertions(+), 62 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 355dceba1b953..2e1cc396287ce 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -997,6 +997,7 @@ Conversion - Bug in ``Series.ffill()`` with mixed dtypes containing tz-aware datetimes. (:issue:`14956`) - Bug in ``DataFrame.fillna()`` where the argument ``downcast`` was ignored when fillna value was of type ``dict`` (:issue:`15277`) - Bug in ``.asfreq()``, where frequency was not set for empty ``Series`` (:issue:`14320`) +- Bug in ``DataFrame`` construction with nulls and datetimes in a list-like (:issue:`15869`) Indexing ^^^^^^^^ diff --git a/pandas/_libs/src/inference.pyx b/pandas/_libs/src/inference.pyx index b0fb7048f154c..33c05f302dd94 100644 --- a/pandas/_libs/src/inference.pyx +++ b/pandas/_libs/src/inference.pyx @@ -439,31 +439,86 @@ def infer_dtype(object value): return 'mixed' -cpdef bint is_possible_datetimelike_array(object arr): - # determine if we have a possible datetimelike (or null-like) array +cpdef object infer_datetimelike_array(object arr): + """ + infer if we have a datetime or timedelta array + - date: we have *only* date and maybe strings, nulls + - datetime: we have *only* datetimes and maybe strings, nulls + - timedelta: we have *only* timedeltas and maybe strings, nulls + - nat: we do not have *any* date, datetimes or timedeltas, but do have + at least a NaT + - mixed: other objects (strings or actual objects) + + Parameters + ---------- + arr : object array + + Returns + ------- + string: {datetime, timedelta, date, nat, mixed} + + """ + cdef: Py_ssize_t i, n = len(arr) - bint seen_timedelta = 0, seen_datetime = 0 + bint seen_timedelta = 0, seen_date = 0, seen_datetime = 0 + bint seen_nat = 0 + list objs = [] object v for i in range(n): v = arr[i] if util.is_string_object(v): - continue + objs.append(v) + + if len(objs) == 3: + break + elif util._checknull(v): - continue - elif is_datetime(v): - seen_datetime=1 - elif is_timedelta(v): - seen_timedelta=1 + # nan or None + pass + elif v is NaT: + seen_nat = 1 + elif is_datetime(v) or util.is_datetime64_object(v): + # datetime, or np.datetime64 + seen_datetime = 1 + elif is_date(v): + seen_date = 1 + elif is_timedelta(v) or util.is_timedelta64_object(v): + # timedelta, or timedelta64 + seen_timedelta = 1 else: - return False - return seen_datetime or seen_timedelta + return 'mixed' + + if seen_date and not (seen_datetime or seen_timedelta): + return 'date' + elif seen_datetime and not seen_timedelta: + return 'datetime' + elif seen_timedelta and not seen_datetime: + return 'timedelta' + elif seen_nat: + return 'nat' + + # short-circuit by trying to + # actually convert these strings + # this is for performance as we don't need to try + # convert *every* string array + if len(objs): + try: + tslib.array_to_datetime(objs, errors='raise') + return 'datetime' + except: + pass + + # we are *not* going to infer from strings + # for timedelta as too much ambiguity + + return 'mixed' cdef inline bint is_null_datetimelike(v): # determine if we have a null for a timedelta/datetime (or integer - # versions)x + # versions) if util._checknull(v): return True elif v is NaT: diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 1ab292649a973..6d28d3b4dfcd5 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1366,6 +1366,15 @@ def test_constructor_with_datetimes(self): .reset_index(drop=True), 'b': i_no_tz}) tm.assert_frame_equal(df, expected) + def test_constructor_datetimes_with_nulls(self): + # gh-15869 + for arr in [np.array([None, None, None, None, + datetime.now(), None]), + np.array([None, None, datetime.now(), None])]: + result = DataFrame(arr).get_dtype_counts() + expected = Series({'datetime64[ns]': 1}) + tm.assert_series_equal(result, expected) + def test_constructor_for_list_with_dtypes(self): # TODO(wesm): unused intname = np.dtype(np.int_).name # noqa diff --git a/pandas/tests/frame/test_misc_api.py b/pandas/tests/frame/test_misc_api.py index 42427df90401d..50fa0dca6bf04 100644 --- a/pandas/tests/frame/test_misc_api.py +++ b/pandas/tests/frame/test_misc_api.py @@ -12,7 +12,7 @@ from numpy.random import randn import numpy as np -from pandas import DataFrame, Series +from pandas import DataFrame, Series, date_range, timedelta_range import pandas as pd from pandas.util.testing import (assert_almost_equal, @@ -328,6 +328,16 @@ def test_empty_nonzero(self): self.assertTrue(df.empty) self.assertTrue(df.T.empty) + def test_with_datetimelikes(self): + + df = DataFrame({'A': date_range('20130101', periods=10), + 'B': timedelta_range('1 day', periods=10)}) + t = df.T + + result = t.get_dtype_counts() + expected = Series({'object': 10}) + tm.assert_series_equal(result, expected) + def test_inplace_return_self(self): # re #1893 diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 24e4355fa9f9a..dbe2db67359f3 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -327,6 +327,14 @@ def test_constructor_datelike_coercion(self): result = df.loc['216'] self.assertTrue(result.dtype == object) + def test_constructor_datetimes_with_nulls(self): + # gh-15869 + for arr in [np.array([None, None, None, None, + datetime.now(), None]), + np.array([None, None, datetime.now(), None])]: + result = Series(arr) + assert result.dtype == 'M8[ns]' + def test_constructor_dtype_datetime64(self): s = Series(iNaT, dtype='M8[ns]', index=lrange(5)) diff --git a/pandas/types/cast.py b/pandas/types/cast.py index 985e5b9f95831..580ce12de3333 100644 --- a/pandas/types/cast.py +++ b/pandas/types/cast.py @@ -748,8 +748,6 @@ def maybe_infer_to_datetimelike(value, convert_dates=False): this is pretty strict in that a datetime/timedelta is REQUIRED in addition to possible nulls/string likes - ONLY strings are NOT datetimelike - Parameters ---------- value : np.array / Series / Index / list-like @@ -770,64 +768,70 @@ def maybe_infer_to_datetimelike(value, convert_dates=False): if not is_list_like(v): v = [v] v = np.array(v, copy=False) + + # we only care about object dtypes + if not is_object_dtype(v): + return value + shape = v.shape if not v.ndim == 1: v = v.ravel() - if len(v): - - def _try_datetime(v): - # safe coerce to datetime64 - try: - v = tslib.array_to_datetime(v, errors='raise') - except ValueError: + if not len(v): + return value - # we might have a sequence of the same-datetimes with tz's - # if so coerce to a DatetimeIndex; if they are not the same, - # then these stay as object dtype - try: - from pandas import to_datetime - return to_datetime(v) - except: - pass + def try_datetime(v): + # safe coerce to datetime64 + try: + v = tslib.array_to_datetime(v, errors='raise') + except ValueError: + # we might have a sequence of the same-datetimes with tz's + # if so coerce to a DatetimeIndex; if they are not the same, + # then these stay as object dtype + try: + from pandas import to_datetime + return to_datetime(v) except: pass - return v.reshape(shape) + except: + pass - def _try_timedelta(v): - # safe coerce to timedelta64 + return v.reshape(shape) - # will try first with a string & object conversion - from pandas import to_timedelta - try: - return to_timedelta(v)._values.reshape(shape) - except: - return v - - # do a quick inference for perf - sample = v[:min(3, len(v))] - inferred_type = lib.infer_dtype(sample) - - if (inferred_type in ['datetime', 'datetime64'] or - (convert_dates and inferred_type in ['date'])): - value = _try_datetime(v) - elif inferred_type in ['timedelta', 'timedelta64']: - value = _try_timedelta(v) - - # It's possible to have nulls intermixed within the datetime or - # timedelta. These will in general have an inferred_type of 'mixed', - # so have to try both datetime and timedelta. - - # try timedelta first to avoid spurious datetime conversions - # e.g. '00:00:01' is a timedelta but technically is also a datetime - elif inferred_type in ['mixed']: - - if lib.is_possible_datetimelike_array(_ensure_object(v)): - value = _try_timedelta(v) - if lib.infer_dtype(value) in ['mixed']: - value = _try_datetime(v) + def try_timedelta(v): + # safe coerce to timedelta64 + + # will try first with a string & object conversion + from pandas import to_timedelta + try: + return to_timedelta(v)._values.reshape(shape) + except: + return v + + inferred_type = lib.infer_datetimelike_array(_ensure_object(v)) + + if inferred_type == 'date' and convert_dates: + value = try_datetime(v) + elif inferred_type == 'datetime': + value = try_datetime(v) + elif inferred_type == 'timedelta': + value = try_timedelta(v) + elif inferred_type == 'nat': + + # if all NaT, return as datetime + if isnull(v).all(): + value = try_datetime(v) + else: + + # We have at least a NaT and a string + # try timedelta first to avoid spurious datetime conversions + # e.g. '00:00:01' is a timedelta but + # technically is also a datetime + value = try_timedelta(v) + if lib.infer_dtype(value) in ['mixed']: + value = try_datetime(v) return value From ca8ef494df8c841ccfde779b5b120ffea218ed46 Mon Sep 17 00:00:00 2001 From: the-nose-knows Date: Tue, 4 Apr 2017 15:32:46 -0700 Subject: [PATCH 338/353] ENH: Citing source in README file (#15856) * Citing source in README file For GH users who strictly or heavily use the web-view instead of a local Git, having a direct link is handy, as it does not require downloading the PDF _if_ the user wanted to go to the source of it directly. It's an alternative that allows those interested in more uploads similar to this PDF from the same author(s). * jorisvandenbossche's feedback I re-read the PDF and made sure the wording reflected the content presented. I also changed the source-citing so that is more friendly for .TXT files instead of Markdown or unspecified. * Update README.txt * English enhancement Improved sentence structure for English speakers. --- doc/cheatsheet/README.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/cheatsheet/README.txt b/doc/cheatsheet/README.txt index e2f6ec042e9cc..d32fe5bcd05a6 100644 --- a/doc/cheatsheet/README.txt +++ b/doc/cheatsheet/README.txt @@ -2,3 +2,7 @@ The Pandas Cheat Sheet was created using Microsoft Powerpoint 2013. To create the PDF version, within Powerpoint, simply do a "Save As" and pick "PDF' as the format. +This cheat sheet was inspired by the RstudioData Wrangling Cheatsheet[1], written by Irv Lustig, Princeton Consultants[2]. + +[1]: https://www.rstudio.com/wp-content/uploads/2015/02/data-wrangling-cheatsheet.pdf +[2]: http://www.princetonoptimization.com/ From 0a3706780feb77f241715ffcdebb14ad7d678d3d Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 5 Apr 2017 12:59:17 +0200 Subject: [PATCH 339/353] DEPR: correct locations to access public tslib objects (#15897) --- pandas/__init__.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/__init__.py b/pandas/__init__.py index 1bc85899fb89f..83ad85e3e292b 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -64,7 +64,11 @@ parser = _DeprecatedModule(deprmod='pandas.parser', deprmodto='pandas.io.libparsers') lib = _DeprecatedModule(deprmod='pandas.lib', deprmodto='pandas._libs.lib', moved={'infer_dtype': 'pandas.api.lib.infer_dtype'}) -tslib = _DeprecatedModule(deprmod='pandas.tslib', deprmodto='pandas._libs.tslib') +tslib = _DeprecatedModule(deprmod='pandas.tslib', deprmodto='pandas._libs.tslib', + moved={'Timestamp': 'pandas.Timestamp', + 'Timedelta': 'pandas.Timedelta', + 'NaT': 'pandas.NaT', + 'OutOfBoundsDatetime': 'pandas.errors.OutOfBoundsDatetime'}) # use the closest tagged version if possible from ._version import get_versions From dbc1654fb1604b99c1b4fe31a26b5548ea623565 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 5 Apr 2017 15:15:37 -0400 Subject: [PATCH 340/353] TST: better testing of Series.nlargest/nsmallest xref #15299 Author: Jeff Reback Closes #15902 from jreback/series_n and squashes the following commits: 657eac8 [Jeff Reback] TST: better testing of Series.nlargest/nsmallest --- pandas/core/algorithms.py | 56 ++++++-- pandas/tests/series/test_analytics.py | 180 +++++++++++++++----------- 2 files changed, 151 insertions(+), 85 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index a62d290277443..99ef76e0f4812 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -12,6 +12,7 @@ from pandas.types.common import (is_unsigned_integer_dtype, is_signed_integer_dtype, is_integer_dtype, + is_complex_dtype, is_categorical_dtype, is_extension_type, is_datetimetz, @@ -40,6 +41,44 @@ from pandas._libs.tslib import iNaT +# --------------- # +# dtype access # +# --------------- # + +def _ensure_data_view(values): + """ + helper routine to ensure that our data is of the correct + input dtype for lower-level routines + + Parameters + ---------- + values : array-like + """ + + if needs_i8_conversion(values): + values = values.view(np.int64) + elif is_period_arraylike(values): + from pandas.tseries.period import PeriodIndex + values = PeriodIndex(values).asi8 + elif is_categorical_dtype(values): + values = values.values.codes + elif isinstance(values, (ABCSeries, ABCIndex)): + values = values.values + + if is_signed_integer_dtype(values): + values = _ensure_int64(values) + elif is_unsigned_integer_dtype(values): + values = _ensure_uint64(values) + elif is_complex_dtype(values): + values = _ensure_float64(values) + elif is_float_dtype(values): + values = _ensure_float64(values) + else: + values = _ensure_object(values) + + return values + + # --------------- # # top-level algos # # --------------- # @@ -867,9 +906,7 @@ def nsmallest(arr, n, keep='first'): narr = len(arr) n = min(n, narr) - sdtype = str(arr.dtype) - arr = arr.view(_dtype_map.get(sdtype, sdtype)) - + arr = _ensure_data_view(arr) kth_val = algos.kth_smallest(arr.copy(), n - 1) return _finalize_nsmallest(arr, kth_val, n, keep, narr) @@ -880,8 +917,7 @@ def nlargest(arr, n, keep='first'): Note: Fails silently with NaN. """ - sdtype = str(arr.dtype) - arr = arr.view(_dtype_map.get(sdtype, sdtype)) + arr = _ensure_data_view(arr) return nsmallest(-arr, n, keep=keep) @@ -910,9 +946,10 @@ def select_n_series(series, n, keep, method): nordered : Series """ dtype = series.dtype - if not issubclass(dtype.type, (np.integer, np.floating, np.datetime64, - np.timedelta64)): - raise TypeError("Cannot use method %r with dtype %s" % (method, dtype)) + if not ((is_numeric_dtype(dtype) and not is_complex_dtype(dtype)) or + needs_i8_conversion(dtype)): + raise TypeError("Cannot use method '{method}' with " + "dtype {dtype}".format(method=method, dtype=dtype)) if keep not in ('first', 'last'): raise ValueError('keep must be either "first", "last"') @@ -964,9 +1001,6 @@ def _finalize_nsmallest(arr, kth_val, n, keep, narr): return inds -_dtype_map = {'datetime64[ns]': 'int64', 'timedelta64[ns]': 'int64'} - - # ------- # # helpers # # ------- # diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index b747a680c17dd..732142f1bce9a 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -1381,80 +1381,6 @@ def test_is_monotonic(self): self.assertFalse(s.is_monotonic) self.assertTrue(s.is_monotonic_decreasing) - def test_nsmallest_nlargest(self): - # float, int, datetime64 (use i8), timedelts64 (same), - # object that are numbers, object that are strings - - base = [3, 2, 1, 2, 5] - - s_list = [ - Series(base, dtype='int8'), - Series(base, dtype='int16'), - Series(base, dtype='int32'), - Series(base, dtype='int64'), - Series(base, dtype='float32'), - Series(base, dtype='float64'), - Series(base, dtype='uint8'), - Series(base, dtype='uint16'), - Series(base, dtype='uint32'), - Series(base, dtype='uint64'), - Series(base).astype('timedelta64[ns]'), - Series(pd.to_datetime(['2003', '2002', '2001', '2002', '2005'])), - ] - - raising = [ - Series([3., 2, 1, 2, '5'], dtype='object'), - Series([3., 2, 1, 2, 5], dtype='object'), - # not supported on some archs - # Series([3., 2, 1, 2, 5], dtype='complex256'), - Series([3., 2, 1, 2, 5], dtype='complex128'), - ] - - for r in raising: - dt = r.dtype - msg = "Cannot use method 'n(larg|small)est' with dtype %s" % dt - args = 2, len(r), 0, -1 - methods = r.nlargest, r.nsmallest - for method, arg in product(methods, args): - with tm.assertRaisesRegexp(TypeError, msg): - method(arg) - - for s in s_list: - - assert_series_equal(s.nsmallest(2), s.iloc[[2, 1]]) - assert_series_equal(s.nsmallest(2, keep='last'), s.iloc[[2, 3]]) - - empty = s.iloc[0:0] - assert_series_equal(s.nsmallest(0), empty) - assert_series_equal(s.nsmallest(-1), empty) - assert_series_equal(s.nlargest(0), empty) - assert_series_equal(s.nlargest(-1), empty) - - assert_series_equal(s.nsmallest(len(s)), s.sort_values()) - assert_series_equal(s.nsmallest(len(s) + 1), s.sort_values()) - assert_series_equal(s.nlargest(len(s)), s.iloc[[4, 0, 1, 3, 2]]) - assert_series_equal(s.nlargest(len(s) + 1), - s.iloc[[4, 0, 1, 3, 2]]) - - s = Series([3., np.nan, 1, 2, 5]) - assert_series_equal(s.nlargest(), s.iloc[[4, 0, 3, 2]]) - assert_series_equal(s.nsmallest(), s.iloc[[2, 3, 0, 4]]) - - msg = 'keep must be either "first", "last"' - with tm.assertRaisesRegexp(ValueError, msg): - s.nsmallest(keep='invalid') - with tm.assertRaisesRegexp(ValueError, msg): - s.nlargest(keep='invalid') - - # GH 13412 - s = Series([1, 4, 3, 2], index=[0, 0, 1, 1]) - result = s.nlargest(3) - expected = s.sort_values(ascending=False).head(3) - assert_series_equal(result, expected) - result = s.nsmallest(3) - expected = s.sort_values().head(3) - assert_series_equal(result, expected) - def test_sort_index_level(self): mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC')) s = Series([1, 2], mi) @@ -1729,3 +1655,109 @@ def test_value_counts_categorical_not_ordered(self): index=exp_idx, name='xxx') tm.assert_series_equal(s.value_counts(normalize=True), exp) tm.assert_series_equal(idx.value_counts(normalize=True), exp) + + +@pytest.fixture +def s_main_dtypes(): + df = pd.DataFrame( + {'datetime': pd.to_datetime(['2003', '2002', + '2001', '2002', + '2005']), + 'datetimetz': pd.to_datetime( + ['2003', '2002', + '2001', '2002', + '2005']).tz_localize('US/Eastern'), + 'timedelta': pd.to_timedelta(['3d', '2d', '1d', + '2d', '5d'])}) + + for dtype in ['int8', 'int16', 'int32', 'int64', + 'float32', 'float64', + 'uint8', 'uint16', 'uint32', 'uint64']: + df[dtype] = Series([3, 2, 1, 2, 5], dtype=dtype) + + return df + + +class TestNLargestNSmallest(object): + + @pytest.mark.parametrize( + "r", [Series([3., 2, 1, 2, '5'], dtype='object'), + Series([3., 2, 1, 2, 5], dtype='object'), + # not supported on some archs + # Series([3., 2, 1, 2, 5], dtype='complex256'), + Series([3., 2, 1, 2, 5], dtype='complex128'), + Series(list('abcde'), dtype='category'), + Series(list('abcde'))]) + def test_error(self, r): + dt = r.dtype + msg = ("Cannot use method 'n(larg|small)est' with " + "dtype {dt}".format(dt=dt)) + args = 2, len(r), 0, -1 + methods = r.nlargest, r.nsmallest + for method, arg in product(methods, args): + with tm.assertRaisesRegexp(TypeError, msg): + method(arg) + + @pytest.mark.parametrize( + "s", + [v for k, v in s_main_dtypes().iteritems()]) + def test_nsmallest_nlargest(self, s): + # float, int, datetime64 (use i8), timedelts64 (same), + # object that are numbers, object that are strings + + assert_series_equal(s.nsmallest(2), s.iloc[[2, 1]]) + assert_series_equal(s.nsmallest(2, keep='last'), s.iloc[[2, 3]]) + + empty = s.iloc[0:0] + assert_series_equal(s.nsmallest(0), empty) + assert_series_equal(s.nsmallest(-1), empty) + assert_series_equal(s.nlargest(0), empty) + assert_series_equal(s.nlargest(-1), empty) + + assert_series_equal(s.nsmallest(len(s)), s.sort_values()) + assert_series_equal(s.nsmallest(len(s) + 1), s.sort_values()) + assert_series_equal(s.nlargest(len(s)), s.iloc[[4, 0, 1, 3, 2]]) + assert_series_equal(s.nlargest(len(s) + 1), + s.iloc[[4, 0, 1, 3, 2]]) + + def test_misc(self): + + s = Series([3., np.nan, 1, 2, 5]) + assert_series_equal(s.nlargest(), s.iloc[[4, 0, 3, 2]]) + assert_series_equal(s.nsmallest(), s.iloc[[2, 3, 0, 4]]) + + msg = 'keep must be either "first", "last"' + with tm.assertRaisesRegexp(ValueError, msg): + s.nsmallest(keep='invalid') + with tm.assertRaisesRegexp(ValueError, msg): + s.nlargest(keep='invalid') + + # GH 15297 + s = Series([1] * 5, index=[1, 2, 3, 4, 5]) + expected_first = Series([1] * 3, index=[1, 2, 3]) + expected_last = Series([1] * 3, index=[5, 4, 3]) + + result = s.nsmallest(3) + assert_series_equal(result, expected_first) + + result = s.nsmallest(3, keep='last') + assert_series_equal(result, expected_last) + + result = s.nlargest(3) + assert_series_equal(result, expected_first) + + result = s.nlargest(3, keep='last') + assert_series_equal(result, expected_last) + + @pytest.mark.parametrize('n', range(1, 5)) + def test_n(self, n): + + # GH 13412 + s = Series([1, 4, 3, 2], index=[0, 0, 1, 1]) + result = s.nlargest(n) + expected = s.sort_values(ascending=False).head(n) + assert_series_equal(result, expected) + + result = s.nsmallest(n) + expected = s.sort_values().head(n) + assert_series_equal(result, expected) From e4e87ec55765d31e59e97d89c71ed5a3fa2f3d38 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Wed, 5 Apr 2017 15:16:40 -0400 Subject: [PATCH 341/353] ENH: Add file buffer validation to I/O ops 1) Allows for more uniform handling of invalid file buffers to our `read_*` functions. 2) Adds a ton of new documentation to `inference.py` Closes #15337. xref #15895. Author: gfyoung Closes #15894 from gfyoung/validate-file-like and squashes the following commits: 5a8f8da [gfyoung] DOC: Document all of inference.py 81103f7 [gfyoung] ENH: Add file buffer validation to I/O ops --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/io/common.py | 23 +- pandas/io/excel.py | 5 +- pandas/tests/api/test_types.py | 2 +- pandas/tests/io/parser/common.py | 17 ++ pandas/tests/types/test_inference.py | 16 +- pandas/types/api.py | 1 + pandas/types/inference.py | 328 +++++++++++++++++++++++++-- 8 files changed, 361 insertions(+), 32 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 2e1cc396287ce..cbb4d32cc5edb 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1033,6 +1033,7 @@ I/O - Bug in ``pd.read_csv()`` with ``parse_dates`` when multiline headers are specified (:issue:`15376`) - Bug in ``pd.read_csv()`` with ``float_precision='round_trip'`` which caused a segfault when a text entry is parsed (:issue:`15140`) - Bug in ``pd.read_csv()`` when an index was specified and no values were specified as null values (:issue:`15835`) +- Bug in ``pd.read_csv()`` in which certain invalid file objects caused the Python interpreter to crash (:issue:`15337`) - Added checks in ``pd.read_csv()`` ensuring that values for ``nrows`` and ``chunksize`` are valid (:issue:`15767`) - Bug in ``pd.tools.hashing.hash_pandas_object()`` in which hashing of categoricals depended on the ordering of categories, instead of just their values. (:issue:`15143`) - Bug in ``.to_json()`` where ``lines=True`` and contents (keys or values) contain escaped characters (:issue:`15096`) diff --git a/pandas/io/common.py b/pandas/io/common.py index 8bc7217db87f9..8ee6ded67f790 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -10,7 +10,7 @@ from pandas import compat from pandas.formats.printing import pprint_thing from pandas.core.common import AbstractMethodError -from pandas.types.common import is_number +from pandas.types.common import is_number, is_file_like # compat from pandas.errors import (ParserError, DtypeWarning, # noqa @@ -197,9 +197,19 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None, encoding=encoding, compression=compression) - # It is a pathlib.Path/py.path.local or string + # Convert pathlib.Path/py.path.local or string filepath_or_buffer = _stringify_path(filepath_or_buffer) - return _expand_user(filepath_or_buffer), None, compression + + if isinstance(filepath_or_buffer, (compat.string_types, + compat.binary_type, + mmap.mmap)): + return _expand_user(filepath_or_buffer), None, compression + + if not is_file_like(filepath_or_buffer): + msg = "Invalid file path or buffer object type: {_type}" + raise ValueError(msg.format(_type=type(filepath_or_buffer))) + + return filepath_or_buffer, None, compression def file_path_to_url(path): @@ -416,6 +426,9 @@ def __init__(self, f): def __getattr__(self, name): return getattr(self.mmap, name) + def __iter__(self): + return self + def __next__(self): newline = self.mmap.readline() @@ -433,6 +446,10 @@ def __next__(self): return newline +if not compat.PY3: + MMapWrapper.next = lambda self: self.__next__() + + class UTF8Recoder(BaseIterator): """ diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 6d136869fc73f..737141f11d7d1 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -243,9 +243,8 @@ def __init__(self, io, **kwds): # to get_filepath_or_buffer() if _is_url(io): io = _urlopen(io) - # Deal with S3 urls, path objects, etc. Will convert them to - # buffer or path string - io, _, _ = get_filepath_or_buffer(io) + elif not isinstance(io, (ExcelFile, xlrd.Book)): + io, _, _ = get_filepath_or_buffer(io) if engine == 'xlrd' and isinstance(io, xlrd.Book): self.book = io diff --git a/pandas/tests/api/test_types.py b/pandas/tests/api/test_types.py index 686de4a196034..f3fd6332417a1 100644 --- a/pandas/tests/api/test_types.py +++ b/pandas/tests/api/test_types.py @@ -24,7 +24,7 @@ class TestTypes(Base, tm.TestCase): 'is_timedelta64_dtype', 'is_timedelta64_ns_dtype', 'is_unsigned_integer_dtype', 'is_period', 'is_period_dtype', 'is_re', 'is_re_compilable', - 'is_dict_like', 'is_iterator', + 'is_dict_like', 'is_iterator', 'is_file_like', 'is_list_like', 'is_hashable', 'is_named_tuple', 'is_sequence', 'pandas_dtype'] diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py index 7faf485b65d10..36d5f2dd5274b 100644 --- a/pandas/tests/io/parser/common.py +++ b/pandas/tests/io/parser/common.py @@ -1678,3 +1678,20 @@ def test_file_handles(self): if PY3: self.assertFalse(m.closed) m.close() + + def test_invalid_file_buffer(self): + # see gh-15337 + + class InvalidBuffer(object): + pass + + msg = "Invalid file path or buffer object type" + + with tm.assertRaisesRegexp(ValueError, msg): + self.read_csv(InvalidBuffer()) + + if PY3: + from unittest import mock + + with tm.assertRaisesRegexp(ValueError, msg): + self.read_csv(mock.Mock()) diff --git a/pandas/tests/types/test_inference.py b/pandas/tests/types/test_inference.py index b41df0da45234..de3a2ca35a7f5 100644 --- a/pandas/tests/types/test_inference.py +++ b/pandas/tests/types/test_inference.py @@ -17,7 +17,7 @@ from pandas import (Series, Index, DataFrame, Timedelta, DatetimeIndex, TimedeltaIndex, Timestamp, Panel, Period, Categorical) -from pandas.compat import u, PY2, lrange +from pandas.compat import u, PY2, PY3, StringIO, lrange from pandas.types import inference from pandas.types.common import (is_timedelta64_dtype, is_timedelta64_ns_dtype, @@ -78,6 +78,20 @@ def test_is_dict_like(): assert not inference.is_dict_like(f) +def test_is_file_like(): + is_file = inference.is_file_like + + data = StringIO("data") + assert is_file(data) + + data = [1, 2, 3] + assert not is_file(data) + + if PY3: + from unittest import mock + assert not is_file(mock.Mock()) + + def test_is_named_tuple(): passes = (collections.namedtuple('Test', list('abc'))(1, 2, 3), ) fails = ((1, 2, 3), 'a', Series({'pi': 3.14})) diff --git a/pandas/types/api.py b/pandas/types/api.py index c809cb3614a8c..e78514ce77822 100644 --- a/pandas/types/api.py +++ b/pandas/types/api.py @@ -52,6 +52,7 @@ is_re_compilable, is_dict_like, is_iterator, + is_file_like, is_list_like, is_hashable, is_named_tuple, diff --git a/pandas/types/inference.py b/pandas/types/inference.py index d8e3b3ee7329b..91418677c6b19 100644 --- a/pandas/types/inference.py +++ b/pandas/types/inference.py @@ -4,7 +4,7 @@ import re import numpy as np from numbers import Number -from pandas.compat import (string_types, text_type, +from pandas.compat import (PY2, string_types, text_type, string_and_binary_types) from pandas._libs import lib @@ -22,28 +22,211 @@ def is_number(obj): + """ + Check if the object is a number. + + Parameters + ---------- + obj : The object to check. + + Returns + ------- + is_number : bool + Whether `obj` is a number or not. + + Examples + -------- + >>> is_number(1) + True + >>> is_number("foo") + False + """ + return isinstance(obj, (Number, np.number)) def is_string_like(obj): + """ + Check if the object is a string. + + Parameters + ---------- + obj : The object to check. + + Examples + -------- + >>> is_string_like("foo") + True + >>> is_string_like(1) + False + + Returns + ------- + is_str_like : bool + Whether `obj` is a string or not. + """ + return isinstance(obj, (text_type, string_types)) -def _iterable_not_string(x): - return (isinstance(x, collections.Iterable) and - not isinstance(x, string_types)) +def _iterable_not_string(obj): + """ + Check if the object is an iterable but not a string. + + Parameters + ---------- + obj : The object to check. + + Returns + ------- + is_iter_not_string : bool + Whether `obj` is a non-string iterable. + + Examples + -------- + >>> _iterable_not_string([1, 2, 3]) + True + >>> _iterable_not_string("foo") + False + >>> _iterable_not_string(1) + False + """ + + return (isinstance(obj, collections.Iterable) and + not isinstance(obj, string_types)) def is_iterator(obj): - # python 3 generators have __next__ instead of next - return hasattr(obj, 'next') or hasattr(obj, '__next__') + """ + Check if the object is an iterator. + + For example, lists are considered iterators + but not strings or datetime objects. + + Parameters + ---------- + obj : The object to check. + + Returns + ------- + is_iter : bool + Whether `obj` is an iterator. + + Examples + -------- + >>> is_iterator([1, 2, 3]) + True + >>> is_iterator(datetime(2017, 1, 1)) + False + >>> is_iterator("foo") + False + >>> is_iterator(1) + False + """ + + if not hasattr(obj, '__iter__'): + return False + + if PY2: + return hasattr(obj, 'next') + else: + # Python 3 generators have + # __next__ instead of next + return hasattr(obj, '__next__') + + +def is_file_like(obj): + """ + Check if the object is a file-like object. + + For objects to be considered file-like, they must + be an iterator AND have the following four methods: + + 1) read + 2) write + 3) seek + 4) tell + + Note: file-like objects must be iterable, but + iterable objects need not be file-like. + + .. versionadded:: 0.20.0 + + Parameters + ---------- + obj : The object to check. + + Returns + ------- + is_file_like : bool + Whether `obj` has file-like properties. + + Examples + -------- + >>> buffer(StringIO("data")) + >>> is_file_like(buffer) + True + >>> is_file_like([1, 2, 3]) + False + """ + + file_attrs = ('read', 'write', 'seek', 'tell') + + for attr in file_attrs: + if not hasattr(obj, attr): + return False + + if not is_iterator(obj): + return False + + return True def is_re(obj): + """ + Check if the object is a regex pattern instance. + + Parameters + ---------- + obj : The object to check. + + Returns + ------- + is_regex : bool + Whether `obj` is a regex pattern. + + Examples + -------- + >>> is_re(re.compile(".*")) + True + >>> is_re("foo") + False + """ + return isinstance(obj, re._pattern_type) def is_re_compilable(obj): + """ + Check if the object can be compiled into a regex pattern instance. + + Parameters + ---------- + obj : The object to check. + + Returns + ------- + is_regex_compilable : bool + Whether `obj` can be compiled as a regex pattern. + + Examples + -------- + >>> is_re_compilable(".*") + True + >>> is_re_compilable(1) + False + """ + try: re.compile(obj) except TypeError: @@ -52,21 +235,95 @@ def is_re_compilable(obj): return True -def is_list_like(arg): - return (hasattr(arg, '__iter__') and - not isinstance(arg, string_and_binary_types)) +def is_list_like(obj): + """ + Check if the object is list-like. + + Objects that are considered list-like are for example Python + lists, tuples, sets, NumPy arrays, and Pandas Series. + + Strings and datetime objects, however, are not considered list-like. + + Parameters + ---------- + obj : The object to check. + + Returns + ------- + is_list_like : bool + Whether `obj` has list-like properties. + + Examples + -------- + >>> is_list_like([1, 2, 3]) + True + >>> is_list_like({1, 2, 3}) + True + >>> is_list_like(datetime(2017, 1, 1)) + False + >>> is_list_like("foo") + False + >>> is_list_like(1) + False + """ + + return (hasattr(obj, '__iter__') and + not isinstance(obj, string_and_binary_types)) + +def is_dict_like(obj): + """ + Check if the object is dict-like. -def is_dict_like(arg): - return hasattr(arg, '__getitem__') and hasattr(arg, 'keys') + Parameters + ---------- + obj : The object to check. + Returns + ------- + is_dict_like : bool + Whether `obj` has dict-like properties. -def is_named_tuple(arg): - return isinstance(arg, tuple) and hasattr(arg, '_fields') + Examples + -------- + >>> is_dict_like({1: 2}) + True + >>> is_dict_like([1, 2, 3]) + False + """ + + return hasattr(obj, '__getitem__') and hasattr(obj, 'keys') + + +def is_named_tuple(obj): + """ + Check if the object is a named tuple. + Parameters + ---------- + obj : The object to check. -def is_hashable(arg): - """Return True if hash(arg) will succeed, False otherwise. + Returns + ------- + is_named_tuple : bool + Whether `obj` is a named tuple. + + Examples + -------- + >>> Point = namedtuple("Point", ["x", "y"]) + >>> p = Point(1, 2) + >>> + >>> is_named_tuple(p) + True + >>> is_named_tuple((1, 2)) + False + """ + + return isinstance(obj, tuple) and hasattr(obj, '_fields') + + +def is_hashable(obj): + """Return True if hash(obj) will succeed, False otherwise. Some types will pass a test against collections.Hashable but fail when they are actually hashed with hash(). @@ -82,25 +339,48 @@ def is_hashable(arg): >>> is_hashable(a) False """ - # unfortunately, we can't use isinstance(arg, collections.Hashable), which - # can be faster than calling hash, because numpy scalars on Python 3 fail - # this test + # Unfortunately, we can't use isinstance(obj, collections.Hashable), which + # can be faster than calling hash. That is because numpy scalars on Python + # 3 fail this test. - # reconsider this decision once this numpy bug is fixed: + # Reconsider this decision once this numpy bug is fixed: # https://github.com/numpy/numpy/issues/5562 try: - hash(arg) + hash(obj) except TypeError: return False else: return True -def is_sequence(x): +def is_sequence(obj): + """ + Check if the object is a sequence of objects. + String types are not included as sequences here. + + Parameters + ---------- + obj : The object to check. + + Returns + ------- + is_sequence : bool + Whether `obj` is a sequence of objects. + + Examples + -------- + >>> l = [1, 2, 3] + >>> + >>> is_sequence(l) + True + >>> is_sequence(iter(l)) + False + """ + try: - iter(x) - len(x) # it has a length - return not isinstance(x, string_and_binary_types) + iter(obj) # Can iterate over it. + len(obj) # Has a length associated with it. + return not isinstance(obj, string_and_binary_types) except (TypeError, AttributeError): return False From ba30e3a2e376035549b009079d44ba5ca7a4c48f Mon Sep 17 00:00:00 2001 From: alexandercbooth Date: Wed, 5 Apr 2017 16:47:07 -0500 Subject: [PATCH 342/353] BUG: addresses #14855 by fixing color kwarg conflict - [x] closes #14855 - [x] tests passed - [x] passes ``git diff upstream/master | flake8 --diff`` Author: alexandercbooth This patch had conflicts when merged, resolved by Committer: Tom Augspurger Closes #14871 from alexandercbooth/fix-color-scatterm-bug and squashes the following commits: 3245f09b9 [alexandercbooth] DOC: moving whatsnew entry to 0.20.0 8ff5f51f1 [alexandercbooth] BUG: addresses #14855 by fixing color kwarg conflict --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/tests/plotting/test_misc.py | 6 ++++++ pandas/tools/plotting.py | 8 +++----- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index cbb4d32cc5edb..ad190671cbbdc 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1056,6 +1056,7 @@ Plotting - Bug in ``DataFrame.hist`` where ``plt.tight_layout`` caused an ``AttributeError`` (use ``matplotlib >= 2.0.1``) (:issue:`9351`) - Bug in ``DataFrame.boxplot`` where ``fontsize`` was not applied to the tick labels on both axes (:issue:`15108`) +- Bug in ``pd.scatter_matrix()`` could accept either ``color`` or ``c``, but not both (:issue:`14855`) Groupby/Resample/Rolling ^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index 11f00386ec592..812f039f1a2c7 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -76,6 +76,12 @@ def scat(**kwds): _check_plot_works(scat, diagonal='hist') with tm.assert_produces_warning(UserWarning): _check_plot_works(scat, range_padding=.1) + with tm.assert_produces_warning(UserWarning): + _check_plot_works(scat, color='rgb') + with tm.assert_produces_warning(UserWarning): + _check_plot_works(scat, c='rgb') + with tm.assert_produces_warning(UserWarning): + _check_plot_works(scat, facecolor='rgb') def scat2(x, y, by=None, ax=None, figsize=None): return plotting.scatter_plot(df, x, y, by, ax, figsize=None) diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index d311b0e6d83eb..f70a2b0b22140 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -349,7 +349,6 @@ def scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False, >>> df = DataFrame(np.random.randn(1000, 4), columns=['A','B','C','D']) >>> scatter_matrix(df, alpha=0.2) """ - import matplotlib.pyplot as plt df = frame._get_numeric_data() n = df.columns.size @@ -367,8 +366,8 @@ def scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False, hist_kwds = hist_kwds or {} density_kwds = density_kwds or {} - # workaround because `c='b'` is hardcoded in matplotlibs scatter method - kwds.setdefault('c', plt.rcParams['patch.facecolor']) + # GH 14855 + kwds.setdefault('edgecolors', 'none') boundaries_list = [] for a in df.columns: @@ -2864,8 +2863,7 @@ def scatter_plot(data, x, y, by=None, ax=None, figsize=None, grid=False, """ import matplotlib.pyplot as plt - # workaround because `c='b'` is hardcoded in matplotlibs scatter method - kwargs.setdefault('c', plt.rcParams['patch.facecolor']) + kwargs.setdefault('edgecolors', 'none') def plot_group(group, ax): xvals = group[x].values From 1fbdc23def1cc91280c508ac5b7806ced579b264 Mon Sep 17 00:00:00 2001 From: Tong SHEN Date: Thu, 6 Apr 2017 15:07:58 +0800 Subject: [PATCH 343/353] DOC: Fix a typo in travis.yml (#15915) --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index d864b755541de..e5e05ed26da56 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,7 +7,7 @@ python: 3.5 # set NOCACHE-true # To delete caches go to https://travis-ci.org/OWNER/REPOSITORY/caches or run # travis cache --delete inside the project directory from the travis command line client -# The cash directories will be deleted if anything in ci/ changes in a commit +# The cache directories will be deleted if anything in ci/ changes in a commit cache: ccache: true directories: From b070d519c94bda36e116327b6cf854d8e9888308 Mon Sep 17 00:00:00 2001 From: Baurzhan Muftakhidinov Date: Thu, 6 Apr 2017 16:34:24 +0500 Subject: [PATCH 344/353] Fix a docstring typo in _fill_mi_header (#15918) [ci skip] --- pandas/io/excel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 737141f11d7d1..7f2f0cf4943b8 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -571,7 +571,7 @@ def _fill_mi_header(row, control_row): ---------- row : list List of items in a single row. - constrol_row : list of boolean + control_row : list of boolean Helps to determine if particular column is in same parent index as the previous value. Used to stop propagation of empty cells between different indexes. From 763197c3422d46b8e4cc807d58a63c6be6a9a288 Mon Sep 17 00:00:00 2001 From: Tong SHEN Date: Thu, 6 Apr 2017 19:35:59 +0800 Subject: [PATCH 345/353] DOC: Fix a typo in indexing.rst (#15916) * DOC: Fix a typo in indexing.rst * more typos fixed --- doc/source/indexing.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst index bc8997b313053..f988fb7cd6806 100644 --- a/doc/source/indexing.rst +++ b/doc/source/indexing.rst @@ -69,7 +69,7 @@ Different Choices for Indexing .. versionadded:: 0.11.0 Object selection has had a number of user-requested additions in order to -support more explicit location based indexing. pandas now supports three types +support more explicit location based indexing. Pandas now supports three types of multi-axis indexing. - ``.loc`` is primarily label based, but may also be used with a boolean array. ``.loc`` will raise ``KeyError`` when the items are not found. Allowed inputs are: @@ -401,7 +401,7 @@ Selection By Position This is sometimes called ``chained assignment`` and should be avoided. See :ref:`Returning a View versus Copy ` -pandas provides a suite of methods in order to get **purely integer based indexing**. The semantics follow closely python and numpy slicing. These are ``0-based`` indexing. When slicing, the start bounds is *included*, while the upper bound is *excluded*. Trying to use a non-integer, even a **valid** label will raise a ``IndexError``. +Pandas provides a suite of methods in order to get **purely integer based indexing**. The semantics follow closely python and numpy slicing. These are ``0-based`` indexing. When slicing, the start bounds is *included*, while the upper bound is *excluded*. Trying to use a non-integer, even a **valid** label will raise an ``IndexError``. The ``.iloc`` attribute is the primary access method. The following are valid inputs: From a0b089e1feee4e132d274271215d867295fc091a Mon Sep 17 00:00:00 2001 From: gfyoung Date: Thu, 6 Apr 2017 09:31:31 -0400 Subject: [PATCH 346/353] BUG: Standardize malformed row handling in Python engine (#15913) Closes gh-15910. --- doc/source/whatsnew/v0.20.0.txt | 4 +- pandas/io/parsers.py | 87 +++++++++++--------- pandas/tests/io/parser/c_parser_only.py | 9 ++ pandas/tests/io/parser/python_parser_only.py | 18 ++-- 4 files changed, 72 insertions(+), 46 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index ad190671cbbdc..462341d3d692d 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -365,6 +365,7 @@ Other Enhancements - ``pandas.io.json.json_normalize()`` gained the option ``errors='ignore'|'raise'``; the default is ``errors='raise'`` which is backward compatible. (:issue:`14583`) - ``pandas.io.json.json_normalize()`` with an empty ``list`` will return an empty ``DataFrame`` (:issue:`15534`) - ``pandas.io.json.json_normalize()`` has gained a ``sep`` option that accepts ``str`` to separate joined fields; the default is ".", which is backward compatible. (:issue:`14883`) +- ``pd.read_csv()`` will now raise a ``csv.Error`` error whenever an end-of-file character is encountered in the middle of a data row (:issue:`15913`) .. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations @@ -1034,7 +1035,8 @@ I/O - Bug in ``pd.read_csv()`` with ``float_precision='round_trip'`` which caused a segfault when a text entry is parsed (:issue:`15140`) - Bug in ``pd.read_csv()`` when an index was specified and no values were specified as null values (:issue:`15835`) - Bug in ``pd.read_csv()`` in which certain invalid file objects caused the Python interpreter to crash (:issue:`15337`) -- Added checks in ``pd.read_csv()`` ensuring that values for ``nrows`` and ``chunksize`` are valid (:issue:`15767`) +- Bug in ``pd.read_csv()`` in which invalid values for ``nrows`` and ``chunksize`` were allowed (:issue:`15767`) +- Bug in ``pd.read_csv()`` for the Python engine in which unhelpful error messages were being raised when parsing errors occurred (:issue:`15910`) - Bug in ``pd.tools.hashing.hash_pandas_object()`` in which hashing of categoricals depended on the ordering of categories, instead of just their values. (:issue:`15143`) - Bug in ``.to_json()`` where ``lines=True`` and contents (keys or values) contain escaped characters (:issue:`15096`) - Bug in ``.to_json()`` causing single byte ascii characters to be expanded to four byte unicode (:issue:`15344`) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index b624d2cc0c7ad..a85f9cda50879 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -2469,26 +2469,7 @@ def _next_line(self): next(self.data) while True: - try: - orig_line = next(self.data) - except csv.Error as e: - msg = str(e) - - if 'NULL byte' in str(e): - msg = ('NULL byte detected. This byte ' - 'cannot be processed in Python\'s ' - 'native csv library at the moment, ' - 'so please pass in engine=\'c\' instead') - - if self.skipfooter > 0: - reason = ('Error could possibly be due to ' - 'parsing errors in the skipped footer rows ' - '(the skipfooter keyword is only applied ' - 'after Python\'s csv library has parsed ' - 'all rows).') - msg += '. ' + reason - - raise csv.Error(msg) + orig_line = self._next_iter_line() line = self._check_comments([orig_line])[0] self.pos += 1 if (not self.skip_blank_lines and @@ -2510,6 +2491,43 @@ def _next_line(self): self.buf.append(line) return line + def _next_iter_line(self, **kwargs): + """ + Wrapper around iterating through `self.data` (CSV source). + + When a CSV error is raised, we check for specific + error messages that allow us to customize the + error message displayed to the user. + + Parameters + ---------- + kwargs : Keyword arguments used to customize the error message. + """ + + try: + return next(self.data) + except csv.Error as e: + msg = str(e) + + if 'NULL byte' in msg: + msg = ('NULL byte detected. This byte ' + 'cannot be processed in Python\'s ' + 'native csv library at the moment, ' + 'so please pass in engine=\'c\' instead') + elif 'newline inside string' in msg: + msg = ('EOF inside string starting with ' + 'line ' + str(kwargs['row_num'])) + + if self.skipfooter > 0: + reason = ('Error could possibly be due to ' + 'parsing errors in the skipped footer rows ' + '(the skipfooter keyword is only applied ' + 'after Python\'s csv library has parsed ' + 'all rows).') + msg += '. ' + reason + + raise csv.Error(msg) + def _check_comments(self, lines): if self.comment is None: return lines @@ -2688,7 +2706,6 @@ def _rows_to_cols(self, content): return zipped_content def _get_lines(self, rows=None): - source = self.data lines = self.buf new_rows = None @@ -2703,14 +2720,14 @@ def _get_lines(self, rows=None): rows -= len(self.buf) if new_rows is None: - if isinstance(source, list): - if self.pos > len(source): + if isinstance(self.data, list): + if self.pos > len(self.data): raise StopIteration if rows is None: - new_rows = source[self.pos:] - new_pos = len(source) + new_rows = self.data[self.pos:] + new_pos = len(self.data) else: - new_rows = source[self.pos:self.pos + rows] + new_rows = self.data[self.pos:self.pos + rows] new_pos = self.pos + rows # Check for stop rows. n.b.: self.skiprows is a set. @@ -2726,21 +2743,17 @@ def _get_lines(self, rows=None): try: if rows is not None: for _ in range(rows): - new_rows.append(next(source)) + new_rows.append(next(self.data)) lines.extend(new_rows) else: rows = 0 + while True: - try: - new_rows.append(next(source)) - rows += 1 - except csv.Error as inst: - if 'newline inside string' in str(inst): - row_num = str(self.pos + rows) - msg = ('EOF inside string starting with ' - 'line ' + row_num) - raise Exception(msg) - raise + new_row = self._next_iter_line( + row_num=self.pos + rows) + new_rows.append(new_row) + rows += 1 + except StopIteration: if self.skiprows: new_rows = [row for i, row in enumerate(new_rows) diff --git a/pandas/tests/io/parser/c_parser_only.py b/pandas/tests/io/parser/c_parser_only.py index ffbd904843bfc..837b7a7922d75 100644 --- a/pandas/tests/io/parser/c_parser_only.py +++ b/pandas/tests/io/parser/c_parser_only.py @@ -408,3 +408,12 @@ def test_large_difference_in_columns(self): expected = DataFrame([row.split(',')[0] for row in rows]) tm.assert_frame_equal(result, expected) + + def test_data_after_quote(self): + # see gh-15910 + + data = 'a\n1\n"b"a' + result = self.read_csv(StringIO(data)) + expected = DataFrame({'a': ['1', 'ba']}) + + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/parser/python_parser_only.py b/pandas/tests/io/parser/python_parser_only.py index bd76070933c47..36356315419c4 100644 --- a/pandas/tests/io/parser/python_parser_only.py +++ b/pandas/tests/io/parser/python_parser_only.py @@ -225,15 +225,17 @@ def test_multi_char_sep_quotes(self): def test_skipfooter_bad_row(self): # see gh-13879 + # see gh-15910 - data = 'a,b,c\ncat,foo,bar\ndog,foo,"baz' msg = 'parsing errors in the skipped footer rows' - with tm.assertRaisesRegexp(csv.Error, msg): - self.read_csv(StringIO(data), skipfooter=1) - - # We expect no match, so there should be an assertion - # error out of the inner context manager. - with tm.assertRaises(AssertionError): + for data in ('a\n1\n"b"a', + 'a,b,c\ncat,foo,bar\ndog,foo,"baz'): with tm.assertRaisesRegexp(csv.Error, msg): - self.read_csv(StringIO(data)) + self.read_csv(StringIO(data), skipfooter=1) + + # We expect no match, so there should be an assertion + # error out of the inner context manager. + with tm.assertRaises(AssertionError): + with tm.assertRaisesRegexp(csv.Error, msg): + self.read_csv(StringIO(data)) From c1122523ede85340b042b83b629731db8176378f Mon Sep 17 00:00:00 2001 From: Roger Thomas Date: Thu, 6 Apr 2017 09:38:11 -0400 Subject: [PATCH 347/353] BUG: Fix nsmallest/nlargest With Identical Values closes #15297 Author: Roger Thomas Closes #15299 from RogerThomas/fix_nsmallest_nlargest_with_n_identical_values and squashes the following commits: d3964f8 [Roger Thomas] Fix nsmallest/nlargest With Identical Values --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/algorithms.py | 75 +++++++++- pandas/tests/frame/test_analytics.py | 199 ++++++++++++++++++--------- 3 files changed, 200 insertions(+), 75 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 462341d3d692d..cb9e2496757ef 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1097,6 +1097,7 @@ Reshaping - Bug in ``pd.pivot_table()`` where no error was raised when values argument was not in the columns (:issue:`14938`) - Bug in ``pd.concat()`` in which concatting with an empty dataframe with ``join='inner'`` was being improperly handled (:issue:`15328`) - Bug with ``sort=True`` in ``DataFrame.join`` and ``pd.merge`` when joining on indexes (:issue:`15582`) +- Bug in ``DataFrame.nsmallest`` and ``DataFrame.nlargest`` where identical values resulted in duplicated rows (:issue:`15297`) Numeric ^^^^^^^ diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 99ef76e0f4812..80664a9ba3019 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -931,6 +931,15 @@ def select_n_slow(dropped, n, keep, method): _select_methods = {'nsmallest': nsmallest, 'nlargest': nlargest} +def _is_valid_dtype_n_method(dtype): + """ + Helper function to determine if dtype is valid for + nsmallest/nlargest methods + """ + return ((is_numeric_dtype(dtype) and not is_complex_dtype(dtype)) or + needs_i8_conversion(dtype)) + + def select_n_series(series, n, keep, method): """Implement n largest/smallest for pandas Series @@ -946,8 +955,7 @@ def select_n_series(series, n, keep, method): nordered : Series """ dtype = series.dtype - if not ((is_numeric_dtype(dtype) and not is_complex_dtype(dtype)) or - needs_i8_conversion(dtype)): + if not _is_valid_dtype_n_method(dtype): raise TypeError("Cannot use method '{method}' with " "dtype {dtype}".format(method=method, dtype=dtype)) @@ -981,14 +989,67 @@ def select_n_frame(frame, columns, n, method, keep): ------- nordered : DataFrame """ - from pandas.core.series import Series + from pandas import Int64Index if not is_list_like(columns): columns = [columns] columns = list(columns) - ser = getattr(frame[columns[0]], method)(n, keep=keep) - if isinstance(ser, Series): - ser = ser.to_frame() - return ser.merge(frame, on=columns[0], left_index=True)[frame.columns] + for column in columns: + dtype = frame[column].dtype + if not _is_valid_dtype_n_method(dtype): + raise TypeError(( + "Column {column!r} has dtype {dtype}, cannot use method " + "{method!r} with this dtype" + ).format(column=column, dtype=dtype, method=method)) + + def get_indexer(current_indexer, other_indexer): + """Helper function to concat `current_indexer` and `other_indexer` + depending on `method` + """ + if method == 'nsmallest': + return current_indexer.append(other_indexer) + else: + return other_indexer.append(current_indexer) + + # Below we save and reset the index in case index contains duplicates + original_index = frame.index + cur_frame = frame = frame.reset_index(drop=True) + cur_n = n + indexer = Int64Index([]) + + for i, column in enumerate(columns): + + # For each column we apply method to cur_frame[column]. If it is the + # last column in columns, or if the values returned are unique in + # frame[column] we save this index and break + # Otherwise we must save the index of the non duplicated values + # and set the next cur_frame to cur_frame filtered on all duplcicated + # values (#GH15297) + series = cur_frame[column] + values = getattr(series, method)(cur_n, keep=keep) + is_last_column = len(columns) - 1 == i + if is_last_column or values.nunique() == series.isin(values).sum(): + + # Last column in columns or values are unique in series => values + # is all that matters + indexer = get_indexer(indexer, values.index) + break + + duplicated_filter = series.duplicated(keep=False) + duplicated = values[duplicated_filter] + non_duplicated = values[~duplicated_filter] + indexer = get_indexer(indexer, non_duplicated.index) + + # Must set cur frame to include all duplicated values to consider for + # the next column, we also can reduce cur_n by the current length of + # the indexer + cur_frame = cur_frame[series.isin(duplicated)] + cur_n = n - len(indexer) + + frame = frame.take(indexer) + + # Restore the index on frame + frame.index = original_index.take(indexer) + return frame def _finalize_nsmallest(arr, kth_val, n, keep, narr): diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index aa15e9fbab4cc..dda52bbc536c9 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -7,11 +7,12 @@ import sys import pytest +from string import ascii_lowercase from numpy import nan from numpy.random import randn import numpy as np -from pandas.compat import lrange +from pandas.compat import lrange, product from pandas import (compat, isnull, notnull, DataFrame, Series, MultiIndex, date_range, Timestamp) import pandas as pd @@ -1120,73 +1121,6 @@ def __nonzero__(self): self.assertTrue(r1.all()) # ---------------------------------------------------------------------- - # Top / bottom - - def test_nlargest(self): - # GH10393 - from string import ascii_lowercase - df = pd.DataFrame({'a': np.random.permutation(10), - 'b': list(ascii_lowercase[:10])}) - result = df.nlargest(5, 'a') - expected = df.sort_values('a', ascending=False).head(5) - tm.assert_frame_equal(result, expected) - - def test_nlargest_multiple_columns(self): - from string import ascii_lowercase - df = pd.DataFrame({'a': np.random.permutation(10), - 'b': list(ascii_lowercase[:10]), - 'c': np.random.permutation(10).astype('float64')}) - result = df.nlargest(5, ['a', 'b']) - expected = df.sort_values(['a', 'b'], ascending=False).head(5) - tm.assert_frame_equal(result, expected) - - def test_nsmallest(self): - from string import ascii_lowercase - df = pd.DataFrame({'a': np.random.permutation(10), - 'b': list(ascii_lowercase[:10])}) - result = df.nsmallest(5, 'a') - expected = df.sort_values('a').head(5) - tm.assert_frame_equal(result, expected) - - def test_nsmallest_multiple_columns(self): - from string import ascii_lowercase - df = pd.DataFrame({'a': np.random.permutation(10), - 'b': list(ascii_lowercase[:10]), - 'c': np.random.permutation(10).astype('float64')}) - result = df.nsmallest(5, ['a', 'c']) - expected = df.sort_values(['a', 'c']).head(5) - tm.assert_frame_equal(result, expected) - - def test_nsmallest_nlargest_duplicate_index(self): - # GH 13412 - df = pd.DataFrame({'a': [1, 2, 3, 4], - 'b': [4, 3, 2, 1], - 'c': [0, 1, 2, 3]}, - index=[0, 0, 1, 1]) - result = df.nsmallest(4, 'a') - expected = df.sort_values('a').head(4) - tm.assert_frame_equal(result, expected) - - result = df.nlargest(4, 'a') - expected = df.sort_values('a', ascending=False).head(4) - tm.assert_frame_equal(result, expected) - - result = df.nsmallest(4, ['a', 'c']) - expected = df.sort_values(['a', 'c']).head(4) - tm.assert_frame_equal(result, expected) - - result = df.nsmallest(4, ['c', 'a']) - expected = df.sort_values(['c', 'a']).head(4) - tm.assert_frame_equal(result, expected) - - result = df.nlargest(4, ['a', 'c']) - expected = df.sort_values(['a', 'c'], ascending=False).head(4) - tm.assert_frame_equal(result, expected) - - result = df.nlargest(4, ['c', 'a']) - expected = df.sort_values(['c', 'a'], ascending=False).head(4) - tm.assert_frame_equal(result, expected) - # ---------------------------------------------------------------------- # Isin def test_isin(self): @@ -1965,3 +1899,132 @@ def test_dot(self): with tm.assertRaisesRegexp(ValueError, 'aligned'): df.dot(df2) + + +@pytest.fixture +def df_duplicates(): + return pd.DataFrame({'a': [1, 2, 3, 4, 4], + 'b': [1, 1, 1, 1, 1], + 'c': [0, 1, 2, 5, 4]}, + index=[0, 0, 1, 1, 1]) + + +@pytest.fixture +def df_strings(): + return pd.DataFrame({'a': np.random.permutation(10), + 'b': list(ascii_lowercase[:10]), + 'c': np.random.permutation(10).astype('float64')}) + + +@pytest.fixture +def df_main_dtypes(): + return pd.DataFrame( + {'group': [1, 1, 2], + 'int': [1, 2, 3], + 'float': [4., 5., 6.], + 'string': list('abc'), + 'category_string': pd.Series(list('abc')).astype('category'), + 'category_int': [7, 8, 9], + 'datetime': pd.date_range('20130101', periods=3), + 'datetimetz': pd.date_range('20130101', + periods=3, + tz='US/Eastern'), + 'timedelta': pd.timedelta_range('1 s', periods=3, freq='s')}, + columns=['group', 'int', 'float', 'string', + 'category_string', 'category_int', + 'datetime', 'datetimetz', + 'timedelta']) + + +class TestNLargestNSmallest(object): + + dtype_error_msg_template = ("Column {column!r} has dtype {dtype}, cannot " + "use method {method!r} with this dtype") + + # ---------------------------------------------------------------------- + # Top / bottom + @pytest.mark.parametrize( + 'method, n, order', + product(['nsmallest', 'nlargest'], range(1, 11), + [['a'], + ['c'], + ['a', 'b'], + ['a', 'c'], + ['b', 'a'], + ['b', 'c'], + ['a', 'b', 'c'], + ['c', 'a', 'b'], + ['c', 'b', 'a'], + ['b', 'c', 'a'], + ['b', 'a', 'c'], + + # dups! + ['b', 'c', 'c'], + + ])) + def test_n(self, df_strings, method, n, order): + # GH10393 + df = df_strings + if 'b' in order: + + error_msg = self.dtype_error_msg_template.format( + column='b', method=method, dtype='object') + with tm.assertRaisesRegexp(TypeError, error_msg): + getattr(df, method)(n, order) + else: + ascending = method == 'nsmallest' + result = getattr(df, method)(n, order) + expected = df.sort_values(order, ascending=ascending).head(n) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + 'method, columns', + product(['nsmallest', 'nlargest'], + product(['group'], ['category_string', 'string']) + )) + def test_n_error(self, df_main_dtypes, method, columns): + df = df_main_dtypes + error_msg = self.dtype_error_msg_template.format( + column=columns[1], method=method, dtype=df[columns[1]].dtype) + with tm.assertRaisesRegexp(TypeError, error_msg): + getattr(df, method)(2, columns) + + def test_n_all_dtypes(self, df_main_dtypes): + df = df_main_dtypes + df.nsmallest(2, list(set(df) - {'category_string', 'string'})) + df.nlargest(2, list(set(df) - {'category_string', 'string'})) + + def test_n_identical_values(self): + # GH15297 + df = pd.DataFrame({'a': [1] * 5, 'b': [1, 2, 3, 4, 5]}) + + result = df.nlargest(3, 'a') + expected = pd.DataFrame( + {'a': [1] * 3, 'b': [1, 2, 3]}, index=[0, 1, 2] + ) + tm.assert_frame_equal(result, expected) + + result = df.nsmallest(3, 'a') + expected = pd.DataFrame({'a': [1] * 3, 'b': [1, 2, 3]}) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + 'n, order', + product([1, 2, 3, 4, 5], + [['a', 'b', 'c'], + ['c', 'b', 'a'], + ['a'], + ['b'], + ['a', 'b'], + ['c', 'b']])) + def test_n_duplicate_index(self, df_duplicates, n, order): + # GH 13412 + + df = df_duplicates + result = df.nsmallest(n, order) + expected = df.sort_values(order).head(n) + tm.assert_frame_equal(result, expected) + + result = df.nlargest(n, order) + expected = df.sort_values(order, ascending=False).head(n) + tm.assert_frame_equal(result, expected) From 4502e82083f4e253630588665a4fc6002c4f32ed Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 6 Apr 2017 12:41:07 -0400 Subject: [PATCH 348/353] TST: skip decimal conversion tests on 32-bit (#15922) xref #15865 --- pandas/tests/io/json/test_pandas.py | 5 ++++- pandas/tests/io/json/test_ujson.py | 26 ++++++-------------------- 2 files changed, 10 insertions(+), 21 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 8fc8ecbdf8abc..a24e8cdaf0273 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- # pylint: disable-msg=W0612,E1101 import pytest -from pandas.compat import range, lrange, StringIO, OrderedDict +from pandas.compat import (range, lrange, StringIO, + OrderedDict, is_platform_32bit) import os import numpy as np @@ -380,6 +381,8 @@ def test_frame_from_json_nones(self): unser = read_json(df.to_json(), dtype=False) self.assertTrue(np.isnan(unser[2][0])) + @pytest.mark.skipif(is_platform_32bit(), + reason="not compliant on 32-bit, xref #15865") def test_frame_to_json_float_precision(self): df = pd.DataFrame([dict(a_float=0.95)]) encoded = df.to_json(double_precision=1) diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py index c2cbbe1ca65ab..dcfa939f84d7e 100644 --- a/pandas/tests/io/json/test_ujson.py +++ b/pandas/tests/io/json/test_ujson.py @@ -8,8 +8,6 @@ import simplejson as json import math import pytest -import platform -import sys import time import datetime import calendar @@ -25,18 +23,14 @@ import pandas.util.testing as tm -def _skip_if_python_ver(skip_major, skip_minor=None): - major, minor = sys.version_info[:2] - if major == skip_major and (skip_minor is None or minor == skip_minor): - pytest.skip("skipping Python version %d.%d" % (major, minor)) - - json_unicode = (json.dumps if compat.PY3 else partial(json.dumps, encoding="utf-8")) class UltraJSONTests(TestCase): + @pytest.mark.skipif(compat.is_platform_32bit(), + reason="not compliant on 32-bit, xref #15865") def test_encodeDecimal(self): sut = decimal.Decimal("1337.1337") encoded = ujson.encode(sut, double_precision=15) @@ -153,10 +147,9 @@ def test_decimalDecodeTestPrecise(self): decoded = ujson.decode(encoded, precise_float=True) self.assertEqual(sut, decoded) + @pytest.mark.skipif(compat.is_platform_windows() and not compat.PY3, + reason="buggy on win-64 for py2") def test_encodeDoubleTinyExponential(self): - if compat.is_platform_windows() and not compat.PY3: - pytest.skip("buggy on win-64 for py2") - num = 1e-40 self.assertEqual(num, ujson.decode(ujson.encode(num))) num = 1e-100 @@ -275,8 +268,6 @@ def test_encodeUnicodeConversion2(self): self.assertEqual(dec, json.loads(enc)) def test_encodeUnicodeSurrogatePair(self): - _skip_if_python_ver(2, 5) - _skip_if_python_ver(2, 6) input = "\xf0\x90\x8d\x86" enc = ujson.encode(input) dec = ujson.decode(enc) @@ -285,8 +276,6 @@ def test_encodeUnicodeSurrogatePair(self): self.assertEqual(dec, json.loads(enc)) def test_encodeUnicode4BytesUTF8(self): - _skip_if_python_ver(2, 5) - _skip_if_python_ver(2, 6) input = "\xf0\x91\x80\xb0TRAILINGNORMAL" enc = ujson.encode(input) dec = ujson.decode(enc) @@ -295,8 +284,6 @@ def test_encodeUnicode4BytesUTF8(self): self.assertEqual(dec, json.loads(enc)) def test_encodeUnicode4BytesUTF8Highest(self): - _skip_if_python_ver(2, 5) - _skip_if_python_ver(2, 6) input = "\xf3\xbf\xbf\xbfTRAILINGNORMAL" enc = ujson.encode(input) @@ -462,7 +449,6 @@ def test_datetime_units(self): self.assertRaises(ValueError, ujson.encode, val, date_unit='foo') def test_encodeToUTF8(self): - _skip_if_python_ver(2, 5) input = "\xe6\x97\xa5\xd1\x88" enc = ujson.encode(input, ensure_ascii=False) dec = ujson.decode(enc) @@ -696,8 +682,8 @@ def test_decodeNumericIntNeg(self): input = "-31337" self.assertEqual(-31337, ujson.decode(input)) + @pytest.mark.skipif(compat.PY3, reason="only PY2") def test_encodeUnicode4BytesUTF8Fail(self): - _skip_if_python_ver(3) input = "\xfd\xbf\xbf\xbf\xbf\xbf" try: enc = ujson.encode(input) # noqa @@ -1029,7 +1015,7 @@ def testIntMax(self): num = np.uint32(np.iinfo(np.uint32).max) self.assertEqual(np.uint32(ujson.decode(ujson.encode(num))), num) - if platform.architecture()[0] != '32bit': + if not compat.is_platform_32bit(): num = np.int64(np.iinfo(np.int64).max) self.assertEqual(np.int64(ujson.decode(ujson.encode(num))), num) From 0cfc08cf4584e8442c84c30d53f1dceafeac5abf Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 6 Apr 2017 20:16:55 -0400 Subject: [PATCH 349/353] CLN: algos (#15929) * CLN: clean up select_n algos * CLN: clean ensure_data closes #15903 * return ndtype, so can eliminate special cases * unique * fixups --- pandas/core/algorithms.py | 942 ++++++++++++++---------------- pandas/core/frame.py | 10 +- pandas/core/series.py | 6 +- pandas/tests/test_algos.py | 21 +- pandas/tests/types/test_dtypes.py | 1 + pandas/types/common.py | 2 + pandas/types/dtypes.py | 2 + 7 files changed, 471 insertions(+), 513 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 80664a9ba3019..244f882f2c103 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -8,30 +8,22 @@ from pandas import compat, _np_version_under1p8 from pandas.types.cast import maybe_promote -from pandas.types.generic import ABCSeries, ABCIndex -from pandas.types.common import (is_unsigned_integer_dtype, - is_signed_integer_dtype, - is_integer_dtype, - is_complex_dtype, - is_categorical_dtype, - is_extension_type, - is_datetimetz, - is_period_dtype, - is_period_arraylike, - is_numeric_dtype, - is_float_dtype, - is_bool_dtype, - needs_i8_conversion, - is_categorical, - is_datetime64_dtype, - is_timedelta64_dtype, - is_scalar, - _ensure_platform_int, - _ensure_object, - _ensure_float64, - _ensure_uint64, - _ensure_int64, - is_list_like) +from pandas.types.generic import (ABCSeries, ABCIndex, + ABCIndexClass, ABCCategorical) +from pandas.types.common import ( + is_unsigned_integer_dtype, is_signed_integer_dtype, + is_integer_dtype, is_complex_dtype, + is_categorical_dtype, is_sparse, + is_period_dtype, + is_numeric_dtype, is_float_dtype, + is_bool_dtype, needs_i8_conversion, + is_categorical, is_datetimetz, + is_datetime64_any_dtype, is_datetime64tz_dtype, + is_timedelta64_dtype, + is_scalar, is_list_like, + _ensure_platform_int, _ensure_object, + _ensure_float64, _ensure_uint64, + _ensure_int64) from pandas.compat.numpy import _np_version_under1p10 from pandas.types.missing import isnull @@ -45,40 +37,190 @@ # dtype access # # --------------- # -def _ensure_data_view(values): +def _ensure_data(values, dtype=None): """ - helper routine to ensure that our data is of the correct + routine to ensure that our data is of the correct input dtype for lower-level routines + This will coerce: + - ints -> int64 + - uint -> uint64 + - bool -> uint64 (TODO this should be uint8) + - datetimelike -> i8 + - datetime64tz -> i8 (in local tz) + - categorical -> codes + Parameters ---------- values : array-like + dtype : pandas_dtype, optional + coerce to this dtype + + Returns + ------- + (ndarray, pandas_dtype, algo dtype as a string) + """ - if needs_i8_conversion(values): - values = values.view(np.int64) - elif is_period_arraylike(values): - from pandas.tseries.period import PeriodIndex - values = PeriodIndex(values).asi8 - elif is_categorical_dtype(values): - values = values.values.codes - elif isinstance(values, (ABCSeries, ABCIndex)): - values = values.values - - if is_signed_integer_dtype(values): + if (needs_i8_conversion(values) or + is_period_dtype(dtype) or + is_datetime64_any_dtype(dtype) or + is_timedelta64_dtype(dtype)): + if is_period_dtype(values) or is_period_dtype(dtype): + from pandas import PeriodIndex + values = PeriodIndex(values) + dtype = values.dtype + elif is_timedelta64_dtype(values) or is_timedelta64_dtype(dtype): + from pandas import TimedeltaIndex + values = TimedeltaIndex(values) + dtype = values.dtype + else: + # Datetime + from pandas import DatetimeIndex + values = DatetimeIndex(values) + dtype = values.dtype + + return values.asi8, dtype, 'int64' + + elif is_categorical_dtype(values) or is_categorical_dtype(dtype): + values = getattr(values, 'values', values) + values = values.codes + dtype = 'category' + + # we are actually coercing to int64 + # until our algos suppport int* directly (not all do) values = _ensure_int64(values) - elif is_unsigned_integer_dtype(values): - values = _ensure_uint64(values) - elif is_complex_dtype(values): - values = _ensure_float64(values) - elif is_float_dtype(values): - values = _ensure_float64(values) - else: + + return values, dtype, 'int64' + + values = np.asarray(values) + + try: + if is_bool_dtype(values) or is_bool_dtype(dtype): + # we are actually coercing to uint64 + # until our algos suppport uint8 directly (see TODO) + values = values.astype('uint64') + dtype = 'bool' + ndtype = 'uint64' + elif is_signed_integer_dtype(values) or is_signed_integer_dtype(dtype): + values = _ensure_int64(values) + ndtype = dtype = 'int64' + elif (is_unsigned_integer_dtype(values) or + is_unsigned_integer_dtype(dtype)): + values = _ensure_uint64(values) + ndtype = dtype = 'uint64' + elif is_complex_dtype(values) or is_complex_dtype(dtype): + values = _ensure_float64(values) + ndtype = dtype = 'float64' + elif is_float_dtype(values) or is_float_dtype(dtype): + values = _ensure_float64(values) + ndtype = dtype = 'float64' + else: + values = _ensure_object(values) + ndtype = dtype = 'object' + + except (TypeError, ValueError): + # if we are trying to coerce to a dtype + # and it is incompat this will fall thru to here values = _ensure_object(values) + ndtype = dtype = 'object' + + return values, dtype, ndtype + + +def _reconstruct_data(values, dtype, original): + """ + reverse of _ensure_data + + Parameters + ---------- + values : ndarray + dtype : pandas_dtype + original : ndarray-like + + Returns + ------- + Index for extension types, otherwise ndarray casted to dtype + + """ + from pandas import Index + if is_categorical_dtype(dtype): + pass + elif is_datetime64tz_dtype(dtype) or is_period_dtype(dtype): + values = Index(original)._shallow_copy(values, name=None) + elif dtype is not None: + values = values.astype(dtype) return values +def _ensure_arraylike(values): + """ + ensure that we are arraylike if not already + """ + if not isinstance(values, (np.ndarray, ABCCategorical, + ABCIndexClass, ABCSeries)): + values = np.array(values) + return values + + +_hashtables = { + 'float64': (htable.Float64HashTable, htable.Float64Vector), + 'uint64': (htable.UInt64HashTable, htable.UInt64Vector), + 'int64': (htable.Int64HashTable, htable.Int64Vector), + 'string': (htable.StringHashTable, htable.ObjectVector), + 'object': (htable.PyObjectHashTable, htable.ObjectVector) +} + + +def _get_hashtable_algo(values): + """ + Parameters + ---------- + values : arraylike + + Returns + ------- + tuples(hashtable class, + vector class, + values, + dtype, + ndtype) + """ + values, dtype, ndtype = _ensure_data(values) + + if ndtype == 'object': + + # its cheaper to use a String Hash Table than Object + if lib.infer_dtype(values) in ['string']: + ndtype = 'string' + else: + ndtype = 'object' + + htable, table = _hashtables[ndtype] + return (htable, table, values, dtype, ndtype) + + +def _get_data_algo(values, func_map): + + if is_categorical_dtype(values): + values = values._values_for_rank() + + values, dtype, ndtype = _ensure_data(values) + if ndtype == 'object': + + # its cheaper to use a String Hash Table than Object + if lib.infer_dtype(values) in ['string']: + try: + f = func_map['string'] + except KeyError: + pass + + f = func_map.get(ndtype, func_map['object']) + + return f, values + + # --------------- # # top-level algos # # --------------- # @@ -104,92 +246,41 @@ def match(to_match, values, na_sentinel=-1): match : ndarray of integers """ values = com._asarray_tuplesafe(values) - if issubclass(values.dtype.type, string_types): - values = np.array(values, dtype='O') - - f = lambda htype, caster: _match_object(to_match, values, htype, caster) - result = _hashtable_algo(f, values, np.int64) + htable, _, values, dtype, ndtype = _get_hashtable_algo(values) + to_match, _, _ = _ensure_data(to_match, dtype) + table = htable(min(len(to_match), 1000000)) + table.map_locations(values) + result = table.lookup(to_match) if na_sentinel != -1: # replace but return a numpy array # use a Series because it handles dtype conversions properly - from pandas.core.series import Series + from pandas import Series result = Series(result.ravel()).replace(-1, na_sentinel).values.\ reshape(result.shape) return result -def _match_object(values, index, table_type, type_caster): - values = type_caster(values) - index = type_caster(index) - table = table_type(min(len(index), 1000000)) - table.map_locations(index) - return table.lookup(values) - - -def unique(values): - """ - Compute unique values (not necessarily sorted) efficiently from input array - of values - - Parameters - ---------- - values : array-like - - Returns - ------- - uniques - """ - values = com._asarray_tuplesafe(values) - - f = lambda htype, caster: _unique_object(values, htype, caster) - return _hashtable_algo(f, values) - - -def _unique_object(values, table_type, type_caster): - values = type_caster(values) - table = table_type(min(len(values), 1000000)) - uniques = table.unique(values) - return type_caster(uniques) - - def unique1d(values): """ Hash table-based unique """ - if np.issubdtype(values.dtype, np.floating): - table = htable.Float64HashTable(len(values)) - uniques = np.array(table.unique(_ensure_float64(values)), - dtype=np.float64) - elif np.issubdtype(values.dtype, np.datetime64): - table = htable.Int64HashTable(len(values)) - uniques = table.unique(_ensure_int64(values)) - uniques = uniques.view('M8[ns]') - elif np.issubdtype(values.dtype, np.timedelta64): - table = htable.Int64HashTable(len(values)) - uniques = table.unique(_ensure_int64(values)) - uniques = uniques.view('m8[ns]') - elif np.issubdtype(values.dtype, np.signedinteger): - table = htable.Int64HashTable(len(values)) - uniques = table.unique(_ensure_int64(values)) - elif np.issubdtype(values.dtype, np.unsignedinteger): - table = htable.UInt64HashTable(len(values)) - uniques = table.unique(_ensure_uint64(values)) - else: - - # its cheaper to use a String Hash Table than Object - if lib.infer_dtype(values) in ['string']: - table = htable.StringHashTable(len(values)) - else: - table = htable.PyObjectHashTable(len(values)) + values = _ensure_arraylike(values) + original = values + htable, _, values, dtype, ndtype = _get_hashtable_algo(values) - uniques = table.unique(_ensure_object(values)) + table = htable(len(values)) + uniques = table.unique(values) + uniques = _reconstruct_data(uniques, dtype, original) return uniques +unique = unique1d + + def isin(comps, values): """ Compute the isin boolean array @@ -213,38 +304,11 @@ def isin(comps, values): " to isin(), you passed a " "[{0}]".format(type(values).__name__)) - from pandas import DatetimeIndex, TimedeltaIndex, PeriodIndex - if not isinstance(values, (ABCIndex, ABCSeries, np.ndarray)): values = np.array(list(values), dtype='object') - if needs_i8_conversion(comps): - if is_period_dtype(values): - comps = PeriodIndex(comps) - values = PeriodIndex(values) - elif is_timedelta64_dtype(comps): - comps = TimedeltaIndex(comps) - values = TimedeltaIndex(values) - else: - comps = DatetimeIndex(comps) - values = DatetimeIndex(values) - - values = values.asi8 - comps = comps.asi8 - elif is_bool_dtype(comps): - - try: - comps = np.asarray(comps).view('uint8') - values = np.asarray(values).view('uint8') - except TypeError: - # object array conversion will fail - pass - elif is_numeric_dtype(comps): - comps = np.asarray(comps) - values = np.asarray(values) - else: - comps = np.asarray(comps).astype(object) - values = np.asarray(values).astype(object) + comps, dtype, _ = _ensure_data(comps) + values, _, _ = _ensure_data(values, dtype=dtype) # GH11232 # work-around for numpy < 1.8 and comparisions on py3 @@ -396,53 +460,32 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None): note: an array of Periods will ignore sort as it returns an always sorted PeriodIndex """ - from pandas import Index, Series, DatetimeIndex, PeriodIndex - - # handling possibilities here - # - for a numpy datetimelike simply view as i8 then cast back - # - bool handled as uint8 then cast back - # - for an extension datetimelike view as i8 then - # reconstruct from boxed values to transfer metadata - dtype = None - if needs_i8_conversion(values): - if is_period_dtype(values): - values = PeriodIndex(values) - vals = values.asi8 - elif is_datetimetz(values): - values = DatetimeIndex(values) - vals = values.asi8 - else: - # numpy dtype - dtype = values.dtype - vals = values.view(np.int64) - elif is_bool_dtype(values): - dtype = bool - vals = np.asarray(values).view('uint8') - else: - vals = np.asarray(values) - (hash_klass, vec_klass), vals = _get_data_algo(vals, _hashtables) + original = values + values, dtype, _ = _ensure_data(values) + (hash_klass, vec_klass), values = _get_data_algo(values, _hashtables) - table = hash_klass(size_hint or len(vals)) + table = hash_klass(size_hint or len(values)) uniques = vec_klass() - check_nulls = not is_integer_dtype(values) - labels = table.get_labels(vals, uniques, 0, na_sentinel, check_nulls) + check_nulls = not is_integer_dtype(original) + labels = table.get_labels(values, uniques, 0, na_sentinel, check_nulls) labels = _ensure_platform_int(labels) - uniques = uniques.to_array() if sort and len(uniques) > 0: uniques, labels = safe_sort(uniques, labels, na_sentinel=na_sentinel, assume_unique=True) - if dtype is not None: - uniques = uniques.astype(dtype) + uniques = _reconstruct_data(uniques, dtype, original) - if isinstance(values, Index): - uniques = values._shallow_copy(uniques, name=None) - elif isinstance(values, Series): + # return original tenor + if isinstance(original, ABCIndexClass): + uniques = original._shallow_copy(uniques, name=None) + elif isinstance(original, ABCSeries): + from pandas import Index uniques = Index(uniques) + return labels, uniques @@ -471,7 +514,7 @@ def value_counts(values, sort=True, ascending=False, normalize=False, value_counts : Series """ - from pandas.core.series import Series + from pandas.core.series import Series, Index name = getattr(values, 'name', None) if bins is not None: @@ -483,17 +526,16 @@ def value_counts(values, sort=True, ascending=False, normalize=False, raise TypeError("bins argument only works with numeric data.") values = cat.codes - if is_extension_type(values) and not is_datetimetz(values): + if is_categorical_dtype(values) or is_sparse(values): + # handle Categorical and sparse, - # datetime tz can be handeled in ndarray path result = Series(values).values.value_counts(dropna=dropna) result.name = name counts = result.values + else: - # ndarray path. pass original to handle DatetimeTzBlock - keys, counts = _value_counts_arraylike(values, dropna=dropna) + keys, counts = _value_counts_arraylike(values, dropna) - from pandas import Index, Series if not isinstance(keys, Index): keys = Index(keys) result = Series(counts, index=keys, name=name) @@ -513,60 +555,45 @@ def value_counts(values, sort=True, ascending=False, normalize=False, return result -def _value_counts_arraylike(values, dropna=True): - is_datetimetz_type = is_datetimetz(values) - is_period_type = (is_period_dtype(values) or - is_period_arraylike(values)) - - orig = values - - from pandas.core.series import Series - values = Series(values).values - dtype = values.dtype +def _value_counts_arraylike(values, dropna): + """ + Parameters + ---------- + values : arraylike + dropna : boolean - if needs_i8_conversion(dtype) or is_period_type: + Returns + ------- + (uniques, counts) - from pandas.tseries.index import DatetimeIndex - from pandas.tseries.period import PeriodIndex + """ + values = _ensure_arraylike(values) + original = values + values, dtype, ndtype = _ensure_data(values) - if is_period_type: - # values may be an object - values = PeriodIndex(values) - freq = values.freq + if needs_i8_conversion(dtype): + # i8 - values = values.view(np.int64) keys, counts = htable.value_count_int64(values, dropna) if dropna: msk = keys != iNaT keys, counts = keys[msk], counts[msk] - # convert the keys back to the dtype we came in - keys = keys.astype(dtype) - - # dtype handling - if is_datetimetz_type: - keys = DatetimeIndex._simple_new(keys, tz=orig.dtype.tz) - elif is_period_type: - keys = PeriodIndex._from_ordinals(keys, freq=freq) - - elif is_signed_integer_dtype(dtype): - values = _ensure_int64(values) - keys, counts = htable.value_count_int64(values, dropna) - elif is_unsigned_integer_dtype(dtype): - values = _ensure_uint64(values) - keys, counts = htable.value_count_uint64(values, dropna) - elif is_float_dtype(dtype): - values = _ensure_float64(values) - keys, counts = htable.value_count_float64(values, dropna) else: - values = _ensure_object(values) - keys, counts = htable.value_count_object(values, dropna) + # ndarray like + + # TODO: handle uint8 + f = getattr(htable, "value_count_{dtype}".format(dtype=ndtype)) + keys, counts = f(values, dropna) mask = isnull(values) if not dropna and mask.any(): - keys = np.insert(keys, 0, np.NaN) - counts = np.insert(counts, 0, mask.sum()) + if not isnull(keys).any(): + keys = np.insert(keys, 0, np.NaN) + counts = np.insert(counts, 0, mask.sum()) + + keys = _reconstruct_data(keys, original.dtype, original) return keys, counts @@ -593,33 +620,9 @@ def duplicated(values, keep='first'): duplicated : ndarray """ - dtype = values.dtype - - # no need to revert to original type - if needs_i8_conversion(dtype): - values = values.view(np.int64) - elif is_period_arraylike(values): - from pandas.tseries.period import PeriodIndex - values = PeriodIndex(values).asi8 - elif is_categorical_dtype(dtype): - values = values.values.codes - elif isinstance(values, (ABCSeries, ABCIndex)): - values = values.values - - if is_signed_integer_dtype(dtype): - values = _ensure_int64(values) - duplicated = htable.duplicated_int64(values, keep=keep) - elif is_unsigned_integer_dtype(dtype): - values = _ensure_uint64(values) - duplicated = htable.duplicated_uint64(values, keep=keep) - elif is_float_dtype(dtype): - values = _ensure_float64(values) - duplicated = htable.duplicated_float64(values, keep=keep) - else: - values = _ensure_object(values) - duplicated = htable.duplicated_object(values, keep=keep) - - return duplicated + values, dtype, ndtype = _ensure_data(values) + f = getattr(htable, "duplicated_{dtype}".format(dtype=ndtype)) + return f(values, keep=keep) def mode(values): @@ -635,40 +638,34 @@ def mode(values): ------- mode : Series """ + from pandas import Series - # must sort because hash order isn't necessarily defined. - from pandas.core.series import Series + values = _ensure_arraylike(values) + original = values - if isinstance(values, Series): - constructor = values._constructor - values = values.values - else: - values = np.asanyarray(values) - constructor = Series + # categorical is a fast-path + if is_categorical_dtype(values): - dtype = values.dtype - if is_signed_integer_dtype(values): - values = _ensure_int64(values) - result = constructor(np.sort(htable.mode_int64(values)), dtype=dtype) - elif is_unsigned_integer_dtype(values): - values = _ensure_uint64(values) - result = constructor(np.sort(htable.mode_uint64(values)), dtype=dtype) - elif issubclass(values.dtype.type, (np.datetime64, np.timedelta64)): - dtype = values.dtype - values = values.view(np.int64) - result = constructor(np.sort(htable.mode_int64(values)), dtype=dtype) - elif is_categorical_dtype(values): - result = constructor(values.mode()) - else: + if isinstance(values, Series): + return Series(values.values.mode()) + return values.mode() + + values, dtype, ndtype = _ensure_data(values) + + # TODO: this should support float64 + if ndtype not in ['int64', 'uint64', 'object']: + ndtype = 'object' values = _ensure_object(values) - res = htable.mode_object(values) - try: - res = np.sort(res) - except TypeError as e: - warn("Unable to sort modes: %s" % e) - result = constructor(res, dtype=dtype) - return result + f = getattr(htable, "mode_{dtype}".format(dtype=ndtype)) + result = f(values) + try: + result = np.sort(result) + except TypeError as e: + warn("Unable to sort modes: %s" % e) + + result = _reconstruct_data(result, original.dtype, original) + return Series(result) def rank(values, axis=0, method='average', na_option='keep', @@ -859,6 +856,12 @@ def quantile(x, q, interpolation_method='fraction'): values = np.sort(x) + def _interpolate(a, b, fraction): + """Returns the point at the given fraction between a and b, where + 'fraction' must be between 0 and 1. + """ + return a + (b - a) * fraction + def _get_score(at): if len(values) == 0: return np.nan @@ -887,261 +890,186 @@ def _get_score(at): return algos.arrmap_float64(q, _get_score) -def _interpolate(a, b, fraction): - """Returns the point at the given fraction between a and b, where - 'fraction' must be between 0 and 1. - """ - return a + (b - a) * fraction - - -def nsmallest(arr, n, keep='first'): - """ - Find the indices of the n smallest values of a numpy array. - - Note: Fails silently with NaN. - """ - if keep == 'last': - arr = arr[::-1] - - narr = len(arr) - n = min(n, narr) - - arr = _ensure_data_view(arr) - kth_val = algos.kth_smallest(arr.copy(), n - 1) - return _finalize_nsmallest(arr, kth_val, n, keep, narr) - +# --------------- # +# select n # +# --------------- # -def nlargest(arr, n, keep='first'): - """ - Find the indices of the n largest values of a numpy array. +class SelectN(object): - Note: Fails silently with NaN. - """ - arr = _ensure_data_view(arr) - return nsmallest(-arr, n, keep=keep) + def __init__(self, obj, n, keep): + self.obj = obj + self.n = n + self.keep = keep + if self.keep not in ('first', 'last'): + raise ValueError('keep must be either "first", "last"') -def select_n_slow(dropped, n, keep, method): - reverse_it = (keep == 'last' or method == 'nlargest') - ascending = method == 'nsmallest' - slc = np.s_[::-1] if reverse_it else np.s_[:] - return dropped[slc].sort_values(ascending=ascending).head(n) + def nlargest(self): + return self.compute('nlargest') + def nsmallest(self): + return self.compute('nsmallest') -_select_methods = {'nsmallest': nsmallest, 'nlargest': nlargest} + @staticmethod + def is_valid_dtype_n_method(dtype): + """ + Helper function to determine if dtype is valid for + nsmallest/nlargest methods + """ + return ((is_numeric_dtype(dtype) and not is_complex_dtype(dtype)) or + needs_i8_conversion(dtype)) -def _is_valid_dtype_n_method(dtype): - """ - Helper function to determine if dtype is valid for - nsmallest/nlargest methods +class SelectNSeries(SelectN): """ - return ((is_numeric_dtype(dtype) and not is_complex_dtype(dtype)) or - needs_i8_conversion(dtype)) - - -def select_n_series(series, n, keep, method): - """Implement n largest/smallest for pandas Series + Implement n largest/smallest for Series Parameters ---------- - series : pandas.Series object + obj : Series n : int keep : {'first', 'last'}, default 'first' - method : str, {'nlargest', 'nsmallest'} Returns ------- nordered : Series """ - dtype = series.dtype - if not _is_valid_dtype_n_method(dtype): - raise TypeError("Cannot use method '{method}' with " - "dtype {dtype}".format(method=method, dtype=dtype)) - if keep not in ('first', 'last'): - raise ValueError('keep must be either "first", "last"') + def compute(self, method): + + n = self.n + dtype = self.obj.dtype + if not self.is_valid_dtype_n_method(dtype): + raise TypeError("Cannot use method '{method}' with " + "dtype {dtype}".format(method=method, + dtype=dtype)) + + if n <= 0: + return self.obj[[]] + + dropped = self.obj.dropna() + + # slow method + if n >= len(self.obj): - if n <= 0: - return series[[]] + reverse_it = (self.keep == 'last' or method == 'nlargest') + ascending = method == 'nsmallest' + slc = np.s_[::-1] if reverse_it else np.s_[:] + return dropped[slc].sort_values(ascending=ascending).head(n) - dropped = series.dropna() + # fast method + arr, _, _ = _ensure_data(dropped.values) + if method == 'nlargest': + arr = -arr - if n >= len(series): - return select_n_slow(dropped, n, keep, method) + if self.keep == 'last': + arr = arr[::-1] - inds = _select_methods[method](dropped.values, n, keep) - return dropped.iloc[inds] + narr = len(arr) + n = min(n, narr) + kth_val = algos.kth_smallest(arr.copy(), n - 1) + ns, = np.nonzero(arr <= kth_val) + inds = ns[arr[ns].argsort(kind='mergesort')][:n] + if self.keep == 'last': + # reverse indices + inds = narr - 1 - inds -def select_n_frame(frame, columns, n, method, keep): - """Implement n largest/smallest for pandas DataFrame + return dropped.iloc[inds] + + +class SelectNFrame(SelectN): + """ + Implement n largest/smallest for DataFrame Parameters ---------- - frame : pandas.DataFrame object - columns : list or str + obj : DataFrame n : int keep : {'first', 'last'}, default 'first' - method : str, {'nlargest', 'nsmallest'} + columns : list or str Returns ------- nordered : DataFrame """ - from pandas import Int64Index - if not is_list_like(columns): - columns = [columns] - columns = list(columns) - for column in columns: - dtype = frame[column].dtype - if not _is_valid_dtype_n_method(dtype): - raise TypeError(( - "Column {column!r} has dtype {dtype}, cannot use method " - "{method!r} with this dtype" - ).format(column=column, dtype=dtype, method=method)) - - def get_indexer(current_indexer, other_indexer): - """Helper function to concat `current_indexer` and `other_indexer` - depending on `method` - """ - if method == 'nsmallest': - return current_indexer.append(other_indexer) - else: - return other_indexer.append(current_indexer) - - # Below we save and reset the index in case index contains duplicates - original_index = frame.index - cur_frame = frame = frame.reset_index(drop=True) - cur_n = n - indexer = Int64Index([]) - - for i, column in enumerate(columns): - - # For each column we apply method to cur_frame[column]. If it is the - # last column in columns, or if the values returned are unique in - # frame[column] we save this index and break - # Otherwise we must save the index of the non duplicated values - # and set the next cur_frame to cur_frame filtered on all duplcicated - # values (#GH15297) - series = cur_frame[column] - values = getattr(series, method)(cur_n, keep=keep) - is_last_column = len(columns) - 1 == i - if is_last_column or values.nunique() == series.isin(values).sum(): - - # Last column in columns or values are unique in series => values - # is all that matters - indexer = get_indexer(indexer, values.index) - break - - duplicated_filter = series.duplicated(keep=False) - duplicated = values[duplicated_filter] - non_duplicated = values[~duplicated_filter] - indexer = get_indexer(indexer, non_duplicated.index) - - # Must set cur frame to include all duplicated values to consider for - # the next column, we also can reduce cur_n by the current length of - # the indexer - cur_frame = cur_frame[series.isin(duplicated)] - cur_n = n - len(indexer) - - frame = frame.take(indexer) - - # Restore the index on frame - frame.index = original_index.take(indexer) - return frame - - -def _finalize_nsmallest(arr, kth_val, n, keep, narr): - ns, = np.nonzero(arr <= kth_val) - inds = ns[arr[ns].argsort(kind='mergesort')][:n] - if keep == 'last': - # reverse indices - return narr - 1 - inds - else: - return inds - - -# ------- # -# helpers # -# ------- # - -def _hashtable_algo(f, values, return_dtype=None): - """ - f(HashTable, type_caster) -> result - """ - - dtype = values.dtype - if is_float_dtype(dtype): - return f(htable.Float64HashTable, _ensure_float64) - elif is_signed_integer_dtype(dtype): - return f(htable.Int64HashTable, _ensure_int64) - elif is_unsigned_integer_dtype(dtype): - return f(htable.UInt64HashTable, _ensure_uint64) - elif is_datetime64_dtype(dtype): - return_dtype = return_dtype or 'M8[ns]' - return f(htable.Int64HashTable, _ensure_int64).view(return_dtype) - elif is_timedelta64_dtype(dtype): - return_dtype = return_dtype or 'm8[ns]' - return f(htable.Int64HashTable, _ensure_int64).view(return_dtype) - - # its cheaper to use a String Hash Table than Object - if lib.infer_dtype(values) in ['string']: - return f(htable.StringHashTable, _ensure_object) - - # use Object - return f(htable.PyObjectHashTable, _ensure_object) - - -_hashtables = { - 'float64': (htable.Float64HashTable, htable.Float64Vector), - 'uint64': (htable.UInt64HashTable, htable.UInt64Vector), - 'int64': (htable.Int64HashTable, htable.Int64Vector), - 'string': (htable.StringHashTable, htable.ObjectVector), - 'object': (htable.PyObjectHashTable, htable.ObjectVector) -} - - -def _get_data_algo(values, func_map): - - f = None - - if is_categorical_dtype(values): - values = values._values_for_rank() - - if is_float_dtype(values): - f = func_map['float64'] - values = _ensure_float64(values) - - elif needs_i8_conversion(values): - f = func_map['int64'] - values = values.view('i8') - - elif is_signed_integer_dtype(values): - f = func_map['int64'] - values = _ensure_int64(values) - - elif is_unsigned_integer_dtype(values): - f = func_map['uint64'] - values = _ensure_uint64(values) - - else: - values = _ensure_object(values) - - # its cheaper to use a String Hash Table than Object - if lib.infer_dtype(values) in ['string']: - try: - f = func_map['string'] - except KeyError: - pass - - if f is None: - f = func_map['object'] - - return f, values - -# ---- # + def __init__(self, obj, n, keep, columns): + super(SelectNFrame, self).__init__(obj, n, keep) + if not is_list_like(columns): + columns = [columns] + columns = list(columns) + self.columns = columns + + def compute(self, method): + + from pandas import Int64Index + n = self.n + frame = self.obj + columns = self.columns + + for column in columns: + dtype = frame[column].dtype + if not self.is_valid_dtype_n_method(dtype): + raise TypeError(( + "Column {column!r} has dtype {dtype}, cannot use method " + "{method!r} with this dtype" + ).format(column=column, dtype=dtype, method=method)) + + def get_indexer(current_indexer, other_indexer): + """Helper function to concat `current_indexer` and `other_indexer` + depending on `method` + """ + if method == 'nsmallest': + return current_indexer.append(other_indexer) + else: + return other_indexer.append(current_indexer) + + # Below we save and reset the index in case index contains duplicates + original_index = frame.index + cur_frame = frame = frame.reset_index(drop=True) + cur_n = n + indexer = Int64Index([]) + + for i, column in enumerate(columns): + + # For each column we apply method to cur_frame[column]. + # If it is the last column in columns, or if the values + # returned are unique in frame[column] we save this index + # and break + # Otherwise we must save the index of the non duplicated values + # and set the next cur_frame to cur_frame filtered on all + # duplcicated values (#GH15297) + series = cur_frame[column] + values = getattr(series, method)(cur_n, keep=self.keep) + is_last_column = len(columns) - 1 == i + if is_last_column or values.nunique() == series.isin(values).sum(): + + # Last column in columns or values are unique in + # series => values + # is all that matters + indexer = get_indexer(indexer, values.index) + break + + duplicated_filter = series.duplicated(keep=False) + duplicated = values[duplicated_filter] + non_duplicated = values[~duplicated_filter] + indexer = get_indexer(indexer, non_duplicated.index) + + # Must set cur frame to include all duplicated values + # to consider for the next column, we also can reduce + # cur_n by the current length of the indexer + cur_frame = cur_frame[series.isin(duplicated)] + cur_n = n - len(indexer) + + frame = frame.take(indexer) + + # Restore the index on frame + frame.index = original_index.take(indexer) + return frame + + +# ------- ## ---- # # take # # ---- # @@ -1534,23 +1462,41 @@ def func(arr, indexer, out, fill_value=np.nan): def diff(arr, n, axis=0): - """ difference of n between self, - analagoust to s-s.shift(n) """ + """ + difference of n between self, + analagoust to s-s.shift(n) + + Parameters + ---------- + arr : ndarray + n : int + number of periods + axis : int + axis to shift on + + Returns + ------- + shifted + + """ n = int(n) na = np.nan dtype = arr.dtype + is_timedelta = False if needs_i8_conversion(arr): dtype = np.float64 arr = arr.view('i8') na = iNaT is_timedelta = True - elif issubclass(dtype.type, np.integer): - dtype = np.float64 - elif issubclass(dtype.type, np.bool_): + + elif is_bool_dtype(dtype): dtype = np.object_ + elif is_integer_dtype(dtype): + dtype = np.float64 + dtype = np.dtype(dtype) out_arr = np.empty(arr.shape, dtype=dtype) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3980bf6cdbc09..f6199be2d1fc9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3441,7 +3441,10 @@ def nlargest(self, n, columns, keep='first'): 1 10 b 2 2 8 d NaN """ - return algorithms.select_n_frame(self, columns, n, 'nlargest', keep) + return algorithms.SelectNFrame(self, + n=n, + keep=keep, + columns=columns).nlargest() def nsmallest(self, n, columns, keep='first'): """Get the rows of a DataFrame sorted by the `n` smallest @@ -3475,7 +3478,10 @@ def nsmallest(self, n, columns, keep='first'): 0 1 a 1 2 8 d NaN """ - return algorithms.select_n_frame(self, columns, n, 'nsmallest', keep) + return algorithms.SelectNFrame(self, + n=n, + keep=keep, + columns=columns).nsmallest() def swaplevel(self, i=-2, j=-1, axis=0): """ diff --git a/pandas/core/series.py b/pandas/core/series.py index 1aaa106d2c68f..d6a1a9d98faf4 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1856,8 +1856,7 @@ def nlargest(self, n=5, keep='first'): 121637 4.240952 dtype: float64 """ - return algorithms.select_n_series(self, n=n, keep=keep, - method='nlargest') + return algorithms.SelectNSeries(self, n=n, keep=keep).nlargest() def nsmallest(self, n=5, keep='first'): """Return the smallest `n` elements. @@ -1903,8 +1902,7 @@ def nsmallest(self, n=5, keep='first'): 359919 -4.331927 dtype: float64 """ - return algorithms.select_n_series(self, n=n, keep=keep, - method='nsmallest') + return algorithms.SelectNSeries(self, n=n, keep=keep).nsmallest() def sortlevel(self, level=0, ascending=True, sort_remaining=True): """ diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index ac3a42c3cf122..d893183dae0ed 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -620,9 +620,9 @@ def test_dropna(self): # 32-bit linux has a different ordering if not compat.is_platform_32bit(): - tm.assert_series_equal( - pd.Series([10.3, 5., 5., None]).value_counts(dropna=False), - pd.Series([2, 1, 1], index=[5., 10.3, np.nan])) + result = pd.Series([10.3, 5., 5., None]).value_counts(dropna=False) + expected = pd.Series([2, 1, 1], index=[5., 10.3, np.nan]) + tm.assert_series_equal(result, expected) def test_value_counts_normalized(self): # GH12558 @@ -1356,16 +1356,19 @@ def test_uint64_overflow(self): def test_categorical(self): c = Categorical([1, 2]) - exp = Series([1, 2], dtype=np.int64) - tm.assert_series_equal(algos.mode(c), exp) + exp = c + tm.assert_categorical_equal(algos.mode(c), exp) + tm.assert_categorical_equal(c.mode(), exp) c = Categorical([1, 'a', 'a']) - exp = Series(['a'], dtype=object) - tm.assert_series_equal(algos.mode(c), exp) + exp = Categorical(['a'], categories=[1, 'a']) + tm.assert_categorical_equal(algos.mode(c), exp) + tm.assert_categorical_equal(c.mode(), exp) c = Categorical([1, 1, 2, 3, 3]) - exp = Series([1, 3], dtype=np.int64) - tm.assert_series_equal(algos.mode(c), exp) + exp = Categorical([1, 3], categories=[1, 2, 3]) + tm.assert_categorical_equal(algos.mode(c), exp) + tm.assert_categorical_equal(c.mode(), exp) def test_index(self): idx = Index([1, 2, 3]) diff --git a/pandas/tests/types/test_dtypes.py b/pandas/tests/types/test_dtypes.py index 8ef2868ae324f..e7b2edeb57714 100644 --- a/pandas/tests/types/test_dtypes.py +++ b/pandas/tests/types/test_dtypes.py @@ -149,6 +149,7 @@ def test_construction_from_string(self): lambda: DatetimeTZDtype.construct_from_string('foo')) def test_is_dtype(self): + self.assertFalse(DatetimeTZDtype.is_dtype(None)) self.assertTrue(DatetimeTZDtype.is_dtype(self.dtype)) self.assertTrue(DatetimeTZDtype.is_dtype('datetime64[ns, US/Eastern]')) self.assertFalse(DatetimeTZDtype.is_dtype('foo')) diff --git a/pandas/types/common.py b/pandas/types/common.py index a1f03e59a5e6e..017805673defe 100644 --- a/pandas/types/common.py +++ b/pandas/types/common.py @@ -359,6 +359,8 @@ def _coerce_to_dtype(dtype): def _get_dtype(arr_or_dtype): + if arr_or_dtype is None: + raise TypeError if isinstance(arr_or_dtype, np.dtype): return arr_or_dtype elif isinstance(arr_or_dtype, type): diff --git a/pandas/types/dtypes.py b/pandas/types/dtypes.py index 43135ba94ab46..c3494df93476b 100644 --- a/pandas/types/dtypes.py +++ b/pandas/types/dtypes.py @@ -82,6 +82,8 @@ def is_dtype(cls, dtype): return True elif isinstance(dtype, np.dtype): return False + elif dtype is None: + return False try: return cls.construct_from_string(dtype) is not None except: From 078dbdf2d320e434f4a442610e473a64e0f14f90 Mon Sep 17 00:00:00 2001 From: Paul-Liu Date: Sun, 13 Nov 2016 20:41:25 -0500 Subject: [PATCH 350/353] BUG: resampling with NaT in TimedeltaIndex (#13223) --- pandas/tests/tseries/test_resample.py | 9 +++++++++ pandas/tseries/resample.py | 13 +++++++++++-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/pandas/tests/tseries/test_resample.py b/pandas/tests/tseries/test_resample.py index 57e5a1631f8e8..f8cac1fc0a0f2 100755 --- a/pandas/tests/tseries/test_resample.py +++ b/pandas/tests/tseries/test_resample.py @@ -1035,6 +1035,15 @@ def test_resample_timedelta_idempotency(self): expected = series assert_series_equal(result, expected) + def test_resample_timedelta_missing_values(self): + # GH 13223 + index = pd.to_timedelta(['0s', pd.NaT, '2s']) + series = pd.Series([2, 3, 5], index=index) + result = series.resample('1s').mean() + expected = pd.Series([2, np.nan, 5], index=pd.timedelta_range( + start='0s', end='2s', freq='1s')) + assert_series_equal(result, expected) + def test_resample_rounding(self): # GH 8371 # odd results when rounding is needed diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py index 2856b54ad9a8c..9f8e71ae4facf 100755 --- a/pandas/tseries/resample.py +++ b/pandas/tseries/resample.py @@ -1231,8 +1231,10 @@ def _get_time_delta_bins(self, ax): data=[], freq=self.freq, name=ax.name) return binner, [], labels - start = ax[0] - end = ax[-1] + # Addresses GH #13223 + start = ax.min() + end = ax.max() + labels = binner = TimedeltaIndex(start=start, end=end, freq=self.freq, @@ -1241,6 +1243,13 @@ def _get_time_delta_bins(self, ax): end_stamps = labels + 1 bins = ax.searchsorted(end_stamps, side='left') + if ax.hasnans: + binner = binner.insert(0, tslib.NaT) + labels = labels.insert(0, tslib.NaT) + + n_NaT = sum([ax_i is tslib.NaT for ax_i in ax]) + bins = np.insert(bins, 0, n_NaT) + # Addresses GH #10530 if self.base > 0: labels += type(self.freq)(self.base) From 3c54c4bae177e8acc44072d5f8d238194ebe64f8 Mon Sep 17 00:00:00 2001 From: Paul-Liu Date: Thu, 17 Nov 2016 01:39:46 -0500 Subject: [PATCH 351/353] BUG: resampling with NaT in TimedeltaIndex (#13223) --- pandas/tests/tseries/test_resample.py | 5 +++++ pandas/tseries/resample.py | 7 +++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/pandas/tests/tseries/test_resample.py b/pandas/tests/tseries/test_resample.py index f8cac1fc0a0f2..da2022b450b1c 100755 --- a/pandas/tests/tseries/test_resample.py +++ b/pandas/tests/tseries/test_resample.py @@ -1044,6 +1044,11 @@ def test_resample_timedelta_missing_values(self): start='0s', end='2s', freq='1s')) assert_series_equal(result, expected) + # all NaT + index = pd.to_timedelta([pd.NaT, pd.NaT, pd.NaT]) + series = pd.Series([2, 3, 5], index=index) + self.assertRaises(DataError, series.resample('1s').mean) + def test_resample_rounding(self): # GH 8371 # odd results when rounding is needed diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py index 9f8e71ae4facf..bddfec1dc23b0 100755 --- a/pandas/tseries/resample.py +++ b/pandas/tseries/resample.py @@ -7,7 +7,7 @@ from pandas.core.base import AbstractMethodError, GroupByMixin from pandas.core.groupby import (BinGrouper, Grouper, _GroupBy, GroupBy, - SeriesGroupBy, groupby, PanelGroupBy) + SeriesGroupBy, groupby, PanelGroupBy, DataError) from pandas.tseries.frequencies import to_offset, is_subperiod, is_superperiod from pandas.tseries.index import DatetimeIndex, date_range @@ -1226,6 +1226,9 @@ def _get_time_delta_bins(self, ax): raise TypeError('axis must be a TimedeltaIndex, but got ' 'an instance of %r' % type(ax).__name__) + if len(ax) > 0 and all(ax._isnan): + raise DataError('axis not valid') + if not len(ax): binner = labels = TimedeltaIndex( data=[], freq=self.freq, name=ax.name) @@ -1247,7 +1250,7 @@ def _get_time_delta_bins(self, ax): binner = binner.insert(0, tslib.NaT) labels = labels.insert(0, tslib.NaT) - n_NaT = sum([ax_i is tslib.NaT for ax_i in ax]) + n_NaT = ax._isnan.sum() bins = np.insert(bins, 0, n_NaT) # Addresses GH #10530 From ec144f3f4d04ab1ab86347639eb8e6a386e7b55d Mon Sep 17 00:00:00 2001 From: Paul-Liu Date: Thu, 17 Nov 2016 16:56:31 -0500 Subject: [PATCH 352/353] fix pep8 --- pandas/tseries/resample.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py index bddfec1dc23b0..a20b7e6236334 100755 --- a/pandas/tseries/resample.py +++ b/pandas/tseries/resample.py @@ -7,7 +7,8 @@ from pandas.core.base import AbstractMethodError, GroupByMixin from pandas.core.groupby import (BinGrouper, Grouper, _GroupBy, GroupBy, - SeriesGroupBy, groupby, PanelGroupBy, DataError) + SeriesGroupBy, groupby, PanelGroupBy, + DataError) from pandas.tseries.frequencies import to_offset, is_subperiod, is_superperiod from pandas.tseries.index import DatetimeIndex, date_range From 30c749c77ece16d3fc6fd6e1c4249b553032b2bc Mon Sep 17 00:00:00 2001 From: Paul Liu Date: Tue, 31 Jan 2017 23:03:15 -0500 Subject: [PATCH 353/353] better error message for all-nan groupings --- pandas/tseries/resample.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py index a20b7e6236334..854ccd381b99b 100755 --- a/pandas/tseries/resample.py +++ b/pandas/tseries/resample.py @@ -1228,7 +1228,7 @@ def _get_time_delta_bins(self, ax): 'an instance of %r' % type(ax).__name__) if len(ax) > 0 and all(ax._isnan): - raise DataError('axis not valid') + raise DataError('all-nan groupings not valid') if not len(ax): binner = labels = TimedeltaIndex(