From 4a4292bc296431fdb937d4ae785d649130e57164 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 20 Oct 2015 14:10:59 -0400 Subject: [PATCH 1/5] TST: move some tests to slow --- pandas/tests/test_graphics.py | 1 + pandas/tests/test_groupby.py | 1 + pandas/tests/test_indexing.py | 116 +++++++++++++++---------------- pandas/tools/tests/test_merge.py | 2 + 4 files changed, 62 insertions(+), 58 deletions(-) diff --git a/pandas/tests/test_graphics.py b/pandas/tests/test_graphics.py index 83b76393f30e0..b85f4628ae013 100644 --- a/pandas/tests/test_graphics.py +++ b/pandas/tests/test_graphics.py @@ -3169,6 +3169,7 @@ def test_pie_df_nan(self): ax.get_legend().get_texts()], base_expected[:i] + base_expected[i+1:]) + @slow def test_errorbar_plot(self): d = {'x': np.arange(12), 'y': np.arange(12, 0, -1)} df = DataFrame(d) diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 8eb641ce8f494..46026a4c887a6 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -1655,6 +1655,7 @@ def check_nunique(df, keys): check_nunique(frame, ['jim']) check_nunique(frame, ['jim', 'joe']) + @slow def test_series_groupby_value_counts(self): from itertools import product diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index 90f85b3f4576d..0f7a5261cb447 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -25,6 +25,7 @@ import pandas.util.testing as tm from pandas import date_range +from numpy.testing.decorators import slow _verbose = False @@ -1689,74 +1690,71 @@ def test_multiindex_perf_warn(self): with tm.assert_produces_warning(PerformanceWarning): _ = df.loc[(0,)] + @slow def test_multiindex_get_loc(self): # GH7724, GH2646 - # ignore the warning here - warnings.simplefilter('ignore', PerformanceWarning) + with warnings.catch_warnings(PerformanceWarning): - # test indexing into a multi-index before & past the lexsort depth - from numpy.random import randint, choice, randn - cols = ['jim', 'joe', 'jolie', 'joline', 'jolia'] + # test indexing into a multi-index before & past the lexsort depth + from numpy.random import randint, choice, randn + cols = ['jim', 'joe', 'jolie', 'joline', 'jolia'] - def validate(mi, df, key): - mask = np.ones(len(df)).astype('bool') + def validate(mi, df, key): + mask = np.ones(len(df)).astype('bool') - # test for all partials of this key - for i, k in enumerate(key): - mask &= df.iloc[:, i] == k + # test for all partials of this key + for i, k in enumerate(key): + mask &= df.iloc[:, i] == k - if not mask.any(): - self.assertNotIn(key[:i+1], mi.index) - continue - - self.assertIn(key[:i+1], mi.index) - right = df[mask].copy() + if not mask.any(): + self.assertNotIn(key[:i+1], mi.index) + continue - if i + 1 != len(key): # partial key - right.drop(cols[:i+1], axis=1, inplace=True) - right.set_index(cols[i+1:-1], inplace=True) - assert_frame_equal(mi.loc[key[:i+1]], right) + self.assertIn(key[:i+1], mi.index) + right = df[mask].copy() - else: # full key - right.set_index(cols[:-1], inplace=True) - if len(right) == 1: # single hit - right = Series(right['jolia'].values, - name=right.index[0], index=['jolia']) - assert_series_equal(mi.loc[key[:i+1]], right) - else: # multi hit + if i + 1 != len(key): # partial key + right.drop(cols[:i+1], axis=1, inplace=True) + right.set_index(cols[i+1:-1], inplace=True) assert_frame_equal(mi.loc[key[:i+1]], right) - def loop(mi, df, keys): - for key in keys: - validate(mi, df, key) - - n, m = 1000, 50 - - vals = [randint(0, 10, n), choice(list('abcdefghij'), n), - choice(pd.date_range('20141009', periods=10).tolist(), n), - choice(list('ZYXWVUTSRQ'), n), randn(n)] - vals = list(map(tuple, zip(*vals))) - - # bunch of keys for testing - keys = [randint(0, 11, m), choice(list('abcdefghijk'), m), - choice(pd.date_range('20141009', periods=11).tolist(), m), - choice(list('ZYXWVUTSRQP'), m)] - keys = list(map(tuple, zip(*keys))) - keys += list(map(lambda t: t[:-1], vals[::n//m])) - - # covers both unique index and non-unique index - df = pd.DataFrame(vals, columns=cols) - a, b = pd.concat([df, df]), df.drop_duplicates(subset=cols[:-1]) - - for frame in a, b: - for i in range(5): # lexsort depth - df = frame.copy() if i == 0 else frame.sort_values(by=cols[:i]) - mi = df.set_index(cols[:-1]) - assert not mi.index.lexsort_depth < i - loop(mi, df, keys) - - # restore - warnings.simplefilter('always', PerformanceWarning) + else: # full key + right.set_index(cols[:-1], inplace=True) + if len(right) == 1: # single hit + right = Series(right['jolia'].values, + name=right.index[0], index=['jolia']) + assert_series_equal(mi.loc[key[:i+1]], right) + else: # multi hit + assert_frame_equal(mi.loc[key[:i+1]], right) + + def loop(mi, df, keys): + for key in keys: + validate(mi, df, key) + + n, m = 1000, 50 + + vals = [randint(0, 10, n), choice(list('abcdefghij'), n), + choice(pd.date_range('20141009', periods=10).tolist(), n), + choice(list('ZYXWVUTSRQ'), n), randn(n)] + vals = list(map(tuple, zip(*vals))) + + # bunch of keys for testing + keys = [randint(0, 11, m), choice(list('abcdefghijk'), m), + choice(pd.date_range('20141009', periods=11).tolist(), m), + choice(list('ZYXWVUTSRQP'), m)] + keys = list(map(tuple, zip(*keys))) + keys += list(map(lambda t: t[:-1], vals[::n//m])) + + # covers both unique index and non-unique index + df = pd.DataFrame(vals, columns=cols) + a, b = pd.concat([df, df]), df.drop_duplicates(subset=cols[:-1]) + + for frame in a, b: + for i in range(5): # lexsort depth + df = frame.copy() if i == 0 else frame.sort_values(by=cols[:i]) + mi = df.set_index(cols[:-1]) + assert not mi.index.lexsort_depth < i + loop(mi, df, keys) def test_series_getitem_multiindex(self): @@ -4653,6 +4651,7 @@ def test_indexing_dtypes_on_empty(self): assert_series_equal(df2.loc[:,'a'], df2.iloc[:,0]) assert_series_equal(df2.loc[:,'a'], df2.ix[:,0]) + @slow def test_large_dataframe_indexing(self): #GH10692 result = DataFrame({'x': range(10**6)},dtype='int64') @@ -4660,6 +4659,7 @@ def test_large_dataframe_indexing(self): expected = DataFrame({'x': range(10**6 + 1)},dtype='int64') assert_frame_equal(result, expected) + @slow def test_large_mi_dataframe_indexing(self): #GH10645 result = MultiIndex.from_arrays([range(10**6), range(10**6)]) diff --git a/pandas/tools/tests/test_merge.py b/pandas/tools/tests/test_merge.py index 929a72cfd4adc..b555a7dc2b3a1 100644 --- a/pandas/tools/tests/test_merge.py +++ b/pandas/tools/tests/test_merge.py @@ -20,6 +20,7 @@ from pandas import isnull, DataFrame, Index, MultiIndex, Panel, Series, date_range, read_table, read_csv import pandas.algos as algos import pandas.util.testing as tm +from numpy.testing.decorators import slow a_ = np.array @@ -1410,6 +1411,7 @@ def test_merge_na_keys(self): tm.assert_frame_equal(result, expected) + @slow def test_int64_overflow_issues(self): from itertools import product from collections import defaultdict From 61cd97a6eb1ad2a94af4818a7e3c24755ef6300f Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 20 Oct 2015 14:16:22 -0400 Subject: [PATCH 2/5] TST: fix some warnings filters --- pandas/io/ga.py | 13 ++++++------- pandas/io/tests/test_data.py | 1 + pandas/io/tests/test_excel.py | 2 +- pandas/io/tests/test_ga.py | 8 +++++++- pandas/io/tests/test_packers.py | 23 ++++++++++++----------- pandas/rpy/tests/test_common.py | 1 + pandas/stats/tests/test_moments.py | 11 ----------- pandas/util/testing.py | 1 + 8 files changed, 29 insertions(+), 31 deletions(-) diff --git a/pandas/io/ga.py b/pandas/io/ga.py index 5525b34951524..a6f9c9ed9467f 100644 --- a/pandas/io/ga.py +++ b/pandas/io/ga.py @@ -4,13 +4,6 @@ 3. Goto APIs and register for OAuth2.0 for installed applications 4. Download JSON secret file and move into same directory as this file """ - -# GH11038 -import warnings -warnings.warn("The pandas.io.ga module is deprecated and will be " - "removed in a future version.", - FutureWarning, stacklevel=2) - from datetime import datetime import re from pandas import compat @@ -27,6 +20,12 @@ from oauth2client.client import AccessTokenRefreshError from pandas.compat import zip, u +# GH11038 +import warnings +warnings.warn("The pandas.io.ga module is deprecated and will be " + "removed in a future version.", + FutureWarning, stacklevel=2) + TYPE_MAP = {u('INTEGER'): int, u('FLOAT'): float, u('TIME'): int} NO_CALLBACK = auth.OOB_CALLBACK_URN diff --git a/pandas/io/tests/test_data.py b/pandas/io/tests/test_data.py index 29970aef760f2..60dcb91bd3c5e 100644 --- a/pandas/io/tests/test_data.py +++ b/pandas/io/tests/test_data.py @@ -293,6 +293,7 @@ def test_get_date_ret_index(self): class TestYahooOptions(tm.TestCase): + @classmethod def setUpClass(cls): super(TestYahooOptions, cls).setUpClass() diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py index e7ed83b5708f9..b06216719a016 100644 --- a/pandas/io/tests/test_excel.py +++ b/pandas/io/tests/test_excel.py @@ -6,6 +6,7 @@ import os from distutils.version import LooseVersion +import warnings import operator import functools import nose @@ -1829,7 +1830,6 @@ def test_column_format(self): # Applicable to xlsxwriter only. _skip_if_no_xlsxwriter() - import warnings with warnings.catch_warnings(): # Ignore the openpyxl lxml warning. warnings.simplefilter("ignore") diff --git a/pandas/io/tests/test_ga.py b/pandas/io/tests/test_ga.py index 13d31b43ac39a..965b3441d7405 100644 --- a/pandas/io/tests/test_ga.py +++ b/pandas/io/tests/test_ga.py @@ -1,6 +1,7 @@ import os from datetime import datetime +import warnings import nose import pandas as pd from pandas import compat @@ -13,7 +14,12 @@ try: import httplib2 - import pandas.io.ga as ga + import apiclient + + # deprecated + with warnings.catch_warnings(record=True): + import pandas.io.ga as ga + from pandas.io.ga import GAnalytics, read_ga from pandas.io.auth import AuthenticationConfigError, reset_default_token_store from pandas.io import auth diff --git a/pandas/io/tests/test_packers.py b/pandas/io/tests/test_packers.py index 894b699281c80..3434afc4129c4 100644 --- a/pandas/io/tests/test_packers.py +++ b/pandas/io/tests/test_packers.py @@ -461,20 +461,21 @@ def test_sparse_frame(self): def test_sparse_panel(self): - items = ['x', 'y', 'z'] - p = Panel(dict((i, tm.makeDataFrame().ix[:2, :2]) for i in items)) - sp = p.to_sparse() + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + items = ['x', 'y', 'z'] + p = Panel(dict((i, tm.makeDataFrame().ix[:2, :2]) for i in items)) + sp = p.to_sparse() - self._check_roundtrip(sp, tm.assert_panel_equal, - check_panel_type=True) + self._check_roundtrip(sp, tm.assert_panel_equal, + check_panel_type=True) - sp2 = p.to_sparse(kind='integer') - self._check_roundtrip(sp2, tm.assert_panel_equal, - check_panel_type=True) + sp2 = p.to_sparse(kind='integer') + self._check_roundtrip(sp2, tm.assert_panel_equal, + check_panel_type=True) - sp3 = p.to_sparse(fill_value=0) - self._check_roundtrip(sp3, tm.assert_panel_equal, - check_panel_type=True) + sp3 = p.to_sparse(fill_value=0) + self._check_roundtrip(sp3, tm.assert_panel_equal, + check_panel_type=True) class TestCompression(TestPackers): diff --git a/pandas/rpy/tests/test_common.py b/pandas/rpy/tests/test_common.py index a2e6d08d07b58..4b579e9263742 100644 --- a/pandas/rpy/tests/test_common.py +++ b/pandas/rpy/tests/test_common.py @@ -6,6 +6,7 @@ import numpy as np import unittest import nose +import warnings import pandas.util.testing as tm try: diff --git a/pandas/stats/tests/test_moments.py b/pandas/stats/tests/test_moments.py index 86c8f5298e0ab..e2ed27156d2b5 100644 --- a/pandas/stats/tests/test_moments.py +++ b/pandas/stats/tests/test_moments.py @@ -43,10 +43,6 @@ class TestMoments(Base): def setUp(self): self._create_data() - warnings.simplefilter("ignore", category=FutureWarning) - - def tearDown(self): - warnings.simplefilter("default", category=FutureWarning) def test_centered_axis_validation(self): # ok @@ -890,7 +886,6 @@ def _create_data(self): def setUp(self): self._create_data() - warnings.simplefilter("ignore", category=FutureWarning) def _test_moments_consistency(self, min_periods, @@ -1516,9 +1511,6 @@ def test_rolling_functions_window_non_shrinkage(self): functions = [lambda x: mom.rolling_cov(x, x, pairwise=True, window=10, min_periods=5), lambda x: mom.rolling_corr(x, x, pairwise=True, window=10, min_periods=5), - # rolling_corr_pairwise is depracated, so the following line should be deleted - # when rolling_corr_pairwise is removed. - lambda x: mom.rolling_corr_pairwise(x, x, window=10, min_periods=5), ] for f in functions: df_result_panel = f(df) @@ -1585,9 +1577,6 @@ def test_moment_functions_zero_length(self): lambda x: mom.expanding_corr(x, x, pairwise=True, min_periods=5), lambda x: mom.rolling_cov(x, x, pairwise=True, window=10, min_periods=5), lambda x: mom.rolling_corr(x, x, pairwise=True, window=10, min_periods=5), - # rolling_corr_pairwise is depracated, so the following line should be deleted - # when rolling_corr_pairwise is removed. - lambda x: mom.rolling_corr_pairwise(x, x, window=10, min_periods=5), ] for f in functions: df1_result_panel = f(df1) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index d142ffdbad983..a278c4d0f9045 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -2020,6 +2020,7 @@ def assert_produces_warning(expected_warning=Warning, filter_level="always", warnings.simplefilter(filter_level) yield w extra_warnings = [] + for actual_warning in w: if (expected_warning and issubclass(actual_warning.category, expected_warning)): From 06cfdcaed74aa857574a5bf48012e0c57b6dfca6 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 20 Oct 2015 14:26:52 -0400 Subject: [PATCH 3/5] TST: import pandas_datareader, use for tests TST: remove some deprecation warnings from imports --- ci/requirements-2.7_SLOW.pip | 0 pandas/io/tests/test_data.py | 7 +++++-- pandas/io/tests/test_wb.py | 4 +++- pandas/tests/test_rplot.py | 4 ++-- 4 files changed, 10 insertions(+), 5 deletions(-) create mode 100644 ci/requirements-2.7_SLOW.pip diff --git a/ci/requirements-2.7_SLOW.pip b/ci/requirements-2.7_SLOW.pip new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/io/tests/test_data.py b/pandas/io/tests/test_data.py index 60dcb91bd3c5e..afc61dc42f569 100644 --- a/pandas/io/tests/test_data.py +++ b/pandas/io/tests/test_data.py @@ -9,12 +9,15 @@ import numpy as np import pandas as pd from pandas import DataFrame, Timestamp -from pandas.io import data as web -from pandas.io.data import DataReader, SymbolWarning, RemoteDataError, _yahoo_codes from pandas.util.testing import (assert_series_equal, assert_produces_warning, network, assert_frame_equal) import pandas.util.testing as tm +with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + from pandas.io import data as web + +from pandas.io.data import DataReader, SymbolWarning, RemoteDataError, _yahoo_codes + if compat.PY3: from urllib.error import HTTPError else: diff --git a/pandas/io/tests/test_wb.py b/pandas/io/tests/test_wb.py index 51d6ac02f0f20..ef72ad4964ff2 100644 --- a/pandas/io/tests/test_wb.py +++ b/pandas/io/tests/test_wb.py @@ -5,9 +5,11 @@ from pandas.util.testing import network from pandas.util.testing import assert_frame_equal from numpy.testing.decorators import slow -from pandas.io.wb import search, download, get_countries import pandas.util.testing as tm +# deprecated +with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + from pandas.io.wb import search, download, get_countries class TestWB(tm.TestCase): diff --git a/pandas/tests/test_rplot.py b/pandas/tests/test_rplot.py index e79acfcbc58d8..4342417db193b 100644 --- a/pandas/tests/test_rplot.py +++ b/pandas/tests/test_rplot.py @@ -1,12 +1,12 @@ # -*- coding: utf-8 -*- from pandas.compat import range -import pandas.tools.rplot as rplot import pandas.util.testing as tm from pandas import read_csv import os - import nose +with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + import pandas.tools.rplot as rplot def curpath(): pth, _ = os.path.split(os.path.abspath(__file__)) From d635fab34b10164fcb010e70ea8b2515428b3272 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 20 Oct 2015 16:56:05 -0400 Subject: [PATCH 4/5] DEPR: fix VisibleDeprecationWarnings in sparse --- pandas/sparse/array.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/pandas/sparse/array.py b/pandas/sparse/array.py index b765fdb8d67be..f275a34ca90db 100644 --- a/pandas/sparse/array.py +++ b/pandas/sparse/array.py @@ -283,7 +283,15 @@ def __getitem__(self, key): if com.is_integer(key): return self._get_val_at(key) else: - data_slice = self.values[key] + if isinstance(key, SparseArray): + key = np.asarray(key) + if hasattr(key,'__len__') and len(self) != len(key): + indices = self.sp_index + if hasattr(indices,'to_int_index'): + indices = indices.to_int_index() + data_slice = self.values.take(indices.indices)[key] + else: + data_slice = self.values[key] return self._constructor(data_slice) def __getslice__(self, i, j): @@ -513,7 +521,12 @@ def make_sparse(arr, kind='block', fill_value=nan): else: mask = arr != fill_value - indices = np.arange(length, dtype=np.int32)[mask] + length = len(arr) + if length != mask.size: + # the arr is a SparseArray + indices = mask.sp_index.indices + else: + indices = np.arange(length, dtype=np.int32)[mask] if kind == 'block': locs, lens = splib.get_blocks(indices) From 0634ddfe1e41252347b04b4a2a2d80e79ed04449 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 20 Oct 2015 18:54:23 -0400 Subject: [PATCH 5/5] TST: remove some warnings in test_nanops --- pandas/tests/test_indexing.py | 2 +- pandas/tests/test_nanops.py | 12 +++++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index 0f7a5261cb447..a2d789aaf8b70 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -1693,7 +1693,7 @@ def test_multiindex_perf_warn(self): @slow def test_multiindex_get_loc(self): # GH7724, GH2646 - with warnings.catch_warnings(PerformanceWarning): + with warnings.catch_warnings(record=True): # test indexing into a multi-index before & past the lexsort depth from numpy.random import randint, choice, randn diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index 6d6c289a6dfa6..b9db95fe06a43 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -3,6 +3,7 @@ from functools import partial +import warnings import numpy as np from pandas import Series from pandas.core.common import isnull, is_integer_dtype @@ -135,7 +136,7 @@ def _coerce_tds(targ, res): return targ, res try: - if axis != 0 and hasattr(targ, 'shape') and targ.ndim: + if axis != 0 and hasattr(targ, 'shape') and targ.ndim and targ.shape != res.shape: res = np.split(res, [targ.shape[0]], axis=0)[0] except: targ, res = _coerce_tds(targ, res) @@ -364,10 +365,11 @@ def test_returned_dtype(self): "return dtype expected from %s is %s, got %s instead" % (method, dtype, result.dtype)) def test_nanmedian(self): - self.check_funs(nanops.nanmedian, np.median, - allow_complex=False, allow_str=False, allow_date=False, - allow_tdelta=True, - allow_obj='convert') + with warnings.catch_warnings(record=True): + self.check_funs(nanops.nanmedian, np.median, + allow_complex=False, allow_str=False, allow_date=False, + allow_tdelta=True, + allow_obj='convert') def test_nanvar(self): self.check_funs_ddof(nanops.nanvar, np.var,