From 3c2e71d84fde3aff3d670fd61a81171e6ba46d76 Mon Sep 17 00:00:00 2001 From: Jeffrey Tratner Date: Sun, 1 Sep 2013 02:24:43 -0400 Subject: [PATCH 1/2] ENH:Add raise_with_traceback & use in assertRaises * Adds `raise_with_traceback` to `pandas.compat`, which handles the Python 2/3 syntax differences for raising with traceback. * Uses `raise_with_traceback` in `assertRaises` and `assertRaisesRegexp` so they provide better feedback. ENH: raise_with_traceback method. --- pandas/compat/__init__.py | 19 +++++++++++++++++++ pandas/core/frame.py | 31 +++++++++++++++++++------------ pandas/tests/test_frame.py | 7 +++++-- pandas/tests/test_tests.py | 29 +++++++++++++++++++---------- pandas/util/testing.py | 8 +++++--- 5 files changed, 67 insertions(+), 27 deletions(-) diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index 7a444ab01e10a..494cc5fe9ad29 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -665,6 +665,25 @@ def __and__(self, other): else: from collections import OrderedDict, Counter +if PY3: + def raise_with_traceback(exc, traceback=Ellipsis): + if traceback == Ellipsis: + _, _, traceback = sys.exc_info() + raise exc.with_traceback(traceback) +else: + # this version of raise is a syntax error in Python 3 + exec(""" +def raise_with_traceback(exc, traceback=Ellipsis): + if traceback == Ellipsis: + _, _, traceback = sys.exc_info() + raise exc, None, traceback +""") + +raise_with_traceback.__doc__ = ( +"""Raise exception with existing traceback. +If traceback is not passed, uses sys.exc_info() to get traceback.""" +) + # http://stackoverflow.com/questions/4126348 # Thanks to @martineau at SO diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d184120185955..177569ce00c3f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -12,8 +12,6 @@ # pylint: disable=E1101,E1103 # pylint: disable=W0212,W0231,W0703,W0622 -from pandas.compat import range, zip, lrange, lmap, lzip, StringIO, u, OrderedDict -from pandas import compat import operator import sys import collections @@ -38,6 +36,8 @@ import pandas.core.expressions as expressions from pandas.sparse.array import SparseArray from pandas.compat.scipy import scoreatpercentile as _quantile +from pandas.compat import(range, zip, lrange, lmap, lzip, StringIO, u, + OrderedDict, raise_with_traceback) from pandas import compat from pandas.util.terminal import get_terminal_size from pandas.util.decorators import deprecate, Appender, Substitution @@ -351,7 +351,7 @@ class DataFrame(NDFrame): Index to use for resulting frame. Will default to np.arange(n) if no indexing information part of input data and no index provided columns : Index or array-like - Column labels to use for resulting frame. Will default to + Column labels to use for resulting frame. Will default to np.arange(n) if no column labels are provided dtype : dtype, default None Data type to force, otherwise infer @@ -438,9 +438,10 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, else: try: arr = np.array(data, dtype=dtype, copy=copy) - except (ValueError, TypeError): - raise PandasError('DataFrame constructor called with ' - 'incompatible data and dtype') + except (ValueError, TypeError) as e: + exc = TypeError('DataFrame constructor called with ' + 'incompatible data and dtype: %s' % e) + raise_with_traceback(exc) if arr.ndim == 0 and index is not None and columns is not None: if isinstance(data, compat.string_types) and dtype is None: @@ -528,7 +529,8 @@ def _init_ndarray(self, values, index, columns, dtype=None, try: values = values.astype(dtype) except Exception: - raise ValueError('failed to cast to %s' % dtype) + e = ValueError('failed to cast to %s' % dtype) + raise_with_traceback(e) N, K = values.shape @@ -4282,13 +4284,16 @@ def _reduce(self, op, axis=0, skipna=True, numeric_only=None, try: values = self.values result = f(values) - except Exception: + except Exception as e: if filter_type is None or filter_type == 'numeric': data = self._get_numeric_data() elif filter_type == 'bool': data = self._get_bool_data() - else: - raise NotImplementedError + else: # pragma: no cover + e = NotImplementedError("Handling exception with filter_" + "type %s not implemented." + % filter_type) + raise_with_traceback(e) result = f(data.values) labels = data._get_agg_axis(axis) else: @@ -4297,8 +4302,10 @@ def _reduce(self, op, axis=0, skipna=True, numeric_only=None, data = self._get_numeric_data() elif filter_type == 'bool': data = self._get_bool_data() - else: - raise NotImplementedError + else: # pragma: no cover + msg = ("Generating numeric_only data with filter_type %s" + "not supported." % filter_type) + raise NotImplementedError(msg) values = data.values labels = data._get_agg_axis(axis) else: diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 118672a85d3fb..ce93f930a0ec3 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -4329,6 +4329,8 @@ def test_arith_flex_frame(self): result = self.frame[:0].add(self.frame) assert_frame_equal(result, self.frame * np.nan) + with assertRaisesRegexp(NotImplementedError, 'fill_value'): + self.frame.add(self.frame.irow(0), fill_value=3) def test_arith_mixed(self): @@ -8157,7 +8159,8 @@ def test_shift(self): assert_frame_equal(shifted2, shifted3) assert_frame_equal(ps, shifted2.shift(-1, 'B')) - self.assertRaises(ValueError, ps.shift, freq='D') + assertRaisesRegexp(ValueError, 'does not match PeriodIndex freq', + ps.shift, freq='D') def test_shift_bool(self): df = DataFrame({'high': [True, False], @@ -10588,7 +10591,7 @@ def test_dot(self): df = DataFrame(randn(3, 4), index=[1, 2, 3], columns=lrange(4)) df2 = DataFrame(randn(5, 3), index=lrange(5), columns=[1, 2, 3]) - self.assertRaises(ValueError, df.dot, df2) + assertRaisesRegexp(ValueError, 'aligned', df.dot, df2) def test_idxmin(self): frame = self.frame diff --git a/pandas/tests/test_tests.py b/pandas/tests/test_tests.py index b52ab61f7be6b..1890c2607fc89 100644 --- a/pandas/tests/test_tests.py +++ b/pandas/tests/test_tests.py @@ -4,23 +4,32 @@ import unittest import warnings import nose +import sys -from pandas.util.testing import assert_almost_equal +from pandas.util.testing import ( + assert_almost_equal, assertRaisesRegexp, raise_with_traceback +) # let's get meta. class TestUtilTesting(unittest.TestCase): _multiprocess_can_split_ = True - def __init__(self, *args): - super(TestUtilTesting, self).__init__(*args) - - def setUp(self): - pass - - def tearDown(self): - pass - def test_assert_almost_equal(self): # don't die because values are not ndarrays assert_almost_equal(1.1,1.1,check_less_precise=True) + + def test_raise_with_traceback(self): + with assertRaisesRegexp(LookupError, "error_text"): + try: + raise ValueError("THIS IS AN ERROR") + except ValueError as e: + e = LookupError("error_text") + raise_with_traceback(e) + with assertRaisesRegexp(LookupError, "error_text"): + try: + raise ValueError("This is another error") + except ValueError: + e = LookupError("error_text") + _, _, traceback = sys.exc_info() + raise_with_traceback(e, traceback) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 8af88895a8b73..1142181a6c6aa 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -26,7 +26,8 @@ import pandas.core.panel4d as panel4d import pandas.compat as compat from pandas.compat import( - map, zip, range, unichr, lrange, lmap, lzip, u, callable, Counter + map, zip, range, unichr, lrange, lmap, lzip, u, callable, Counter, + raise_with_traceback ) from pandas import bdate_range @@ -1111,8 +1112,9 @@ def handle_success(self, exc_type, exc_value, traceback): if self.regexp is not None: val = str(exc_value) if not self.regexp.search(val): - raise AssertionError('"%s" does not match "%s"' % - (self.regexp.pattern, str(val))) + e = AssertionError('"%s" does not match "%s"' % + (self.regexp.pattern, str(val))) + raise_with_traceback(e, traceback) return True From db74b2a86524c8dda75e103fe3e2dd611d46e620 Mon Sep 17 00:00:00 2001 From: Jeffrey Tratner Date: Sat, 31 Aug 2013 23:26:20 -0400 Subject: [PATCH 2/2] CLN: Better exceptions and tests for core/frame CLN: Big simplification of test_frame TST: Add test cases for bad input, etc. CLN: Exceptions that shouldn't happen should be assertions (mostly _apply*) CLN/TST: Right Exceptions + remove duplicate tests A few tests were duplicating the same thing or actually just the same test, those have been refactored or removed. Many tests were changed to specify the Exception they were looking for (and also to use the new `with_statement` format for assertRaises, etc.) CLN: Harmonize SparseDF and DF Exceptions ENH: SparseDF apply now accepts axis='columns' --- doc/source/release.rst | 8 +- pandas/core/format.py | 5 +- pandas/core/frame.py | 152 ++++---- pandas/core/generic.py | 4 +- pandas/core/indexing.py | 2 +- pandas/core/internals.py | 10 +- pandas/core/reshape.py | 8 +- pandas/core/series.py | 4 +- pandas/sparse/frame.py | 18 +- pandas/tests/test_frame.py | 546 +++++++++++++--------------- pandas/tests/test_multilevel.py | 39 +- pandas/tests/test_panel.py | 2 +- pandas/tseries/period.py | 1 + pandas/tseries/tests/test_period.py | 17 +- pandas/util/testing.py | 2 +- 15 files changed, 391 insertions(+), 427 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index 557c4b293a84e..544e414132acd 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -71,8 +71,9 @@ pandas 0.13 when the key is a column - Support for using a ``DatetimeIndex/PeriodsIndex`` directly in a datelike calculation e.g. s-s.index (:issue:`4629`) - - Better/cleaned up exceptions in core/common, io/excel and core/format. - (:issue:`4721`, :issue:`3954`) + - Better/cleaned up exceptions in core/common, io/excel and core/format + (:issue:`4721`, :issue:`3954`), as well as cleaned up test cases in + tests/test_frame, tests/test_multilevel (:issue:`4732`). **API Changes** @@ -143,9 +144,10 @@ pandas 0.13 now returns a ``MultiIndex`` rather than an ``Index``. (:issue:`4039`) - Infer and downcast dtype if ``downcast='infer'`` is passed to ``fillna/ffill/bfill`` (:issue:`4604`) - - Factored out excel_value_to_python_value from ExcelFile::_parse_excel (:issue:`4589`) - ``__nonzero__`` for all NDFrame objects, will now raise a ``ValueError``, this reverts back to (:issue:`1073`, :issue:`4633`) behavior. + - ``DataFrame.update()`` no longer raises a ``DataConflictError``, it now + will raise a ``ValueError`` instead (if necessary) (:issue:`4732`) **Internal Refactoring** diff --git a/pandas/core/format.py b/pandas/core/format.py index 978b82aed22d9..6b4dc979d5279 100644 --- a/pandas/core/format.py +++ b/pandas/core/format.py @@ -825,9 +825,8 @@ def __init__(self, obj, path_or_buf, sep=",", na_rep='', float_format=None, # validate mi options if self.has_mi_columns: - # guarded against in to_csv itself - if cols is not None: # pragma: no cover - raise AssertionError("cannot specify cols with a multi_index on the columns") + if cols is not None: + raise TypeError("cannot specify cols with a MultiIndex on the columns") if cols is not None: if isinstance(cols,Index): diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 177569ce00c3f..0cd9f7f3f5330 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -180,12 +180,6 @@ merged : DataFrame """ -# Custom error class for update - - -class DataConflictError(Exception): - pass - #---------------------------------------------------------------------- # Factory helper methods @@ -237,8 +231,8 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): casted = DataFrame(other, index=self.index, columns=self.columns) return self._combine_frame(casted, na_op, fill_value, level) - else: # pragma: no cover - raise ValueError("Bad argument shape") + else: + raise ValueError("Incompatible argument shape %s" % (other.shape,)) else: return self._combine_const(other, na_op) @@ -303,8 +297,9 @@ def f(self, other, axis=default_axis, level=None): return self._flex_compare_frame(casted, na_op, str_rep, level) - else: # pragma: no cover - raise ValueError("Bad argument shape") + else: + raise ValueError("Incompatible argument shape: %s" % + (other.shape,)) else: return self._combine_const(other, na_op) @@ -407,7 +402,8 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, copy=copy) elif isinstance(data, (np.ndarray, Series)): if data.dtype.names: - data_columns, data = _rec_to_dict(data) + data_columns = list(data.dtype.names) + data = dict((k, data[k]) for k in data_columns) if columns is None: columns = data_columns mgr = self._init_dict(data, index, columns, dtype=dtype) @@ -528,8 +524,9 @@ def _init_ndarray(self, values, index, columns, dtype=None, if values.dtype != dtype: try: values = values.astype(dtype) - except Exception: - e = ValueError('failed to cast to %s' % dtype) + except Exception as orig: + e = ValueError("failed to cast to '%s' (Exception was: %s)" + % (dtype, orig)) raise_with_traceback(e) N, K = values.shape @@ -652,11 +649,7 @@ def __unicode__(self): self.shape[0] <= max_info_rows) self.info(buf=buf, verbose=verbose) - value = buf.getvalue() - if not isinstance(value, compat.text_type): - raise AssertionError() - - return value + return buf.getvalue() def _repr_html_(self): """ @@ -855,8 +848,8 @@ def dot(self, other): lvals = self.values rvals = np.asarray(other) if lvals.shape[1] != rvals.shape[0]: - raise Exception('Dot product shape mismatch, %s vs %s' % - (lvals.shape, rvals.shape)) + raise ValueError('Dot product shape mismatch, %s vs %s' % + (lvals.shape, rvals.shape)) if isinstance(other, DataFrame): return self._constructor(np.dot(lvals, rvals), @@ -1164,7 +1157,7 @@ def from_items(cls, items, columns=None, orient='columns'): return cls._from_arrays(arrays, columns, None) elif orient == 'index': if columns is None: - raise ValueError("Must pass columns with orient='index'") + raise TypeError("Must pass columns with orient='index'") keys = _ensure_index(keys) @@ -1250,12 +1243,12 @@ def to_panel(self): from pandas.core.reshape import block2d_to_blocknd # only support this kind for now - if (not isinstance(self.index, MultiIndex) or + if (not isinstance(self.index, MultiIndex) or # pragma: no cover len(self.index.levels) != 2): - raise AssertionError('Must have 2-level MultiIndex') + raise NotImplementedError('Only 2-level MultiIndex are supported.') if not self.index.is_unique: - raise Exception("Can't convert non-uniquely indexed " + raise ValueError("Can't convert non-uniquely indexed " "DataFrame to Panel") self._consolidate_inplace() @@ -1628,8 +1621,9 @@ def info(self, verbose=True, buf=None, max_cols=None): len(self.columns)) space = max([len(com.pprint_thing(k)) for k in self.columns]) + 4 counts = self.count() - if len(cols) != len(counts): - raise AssertionError('Columns must equal counts') + if len(cols) != len(counts): # pragma: no cover + raise AssertionError('Columns must equal counts (%d != %d)' % + (len(cols), len(counts))) for col, count in compat.iteritems(counts): col = com.pprint_thing(col) lines.append(_put_str(col, space) + @@ -1854,7 +1848,7 @@ def _getitem_array(self, key): warnings.warn("Boolean Series key will be reindexed to match " "DataFrame index.", UserWarning) elif len(key) != len(self.index): - raise ValueError('Item wrong length %d instead of %d!' % + raise ValueError('Item wrong length %d instead of %d.' % (len(key), len(self.index))) # _check_bool_indexer will throw exception if Series key cannot # be reindexed to match DataFrame rows @@ -1940,7 +1934,7 @@ def _setitem_array(self, key, value): else: if isinstance(value, DataFrame): if len(value.columns) != len(key): - raise AssertionError('Columns must be same length as key') + raise ValueError('Columns must be same length as key') for k1, k2 in zip(key, value.columns): self[k1] = value[k2] else: @@ -1951,11 +1945,11 @@ def _setitem_frame(self, key, value): # support boolean setting with DataFrame input, e.g. # df[df > df2] = 0 if key.values.dtype != np.bool_: - raise ValueError('Must pass DataFrame with boolean values only') + raise TypeError('Must pass DataFrame with boolean values only') if self._is_mixed_type: if not self._is_numeric_mixed_type: - raise ValueError( + raise TypeError( 'Cannot do boolean setting on mixed-type frame') self.where(-key, value, inplace=True) @@ -2011,8 +2005,8 @@ def _sanitize_column(self, key, value): value = value.T else: if len(value) != len(self.index): - raise AssertionError('Length of values does not match ' - 'length of index') + raise ValueError('Length of values does not match ' + 'length of index') if not isinstance(value, np.ndarray): if isinstance(value, list) and len(value) > 0: @@ -2225,7 +2219,7 @@ def lookup(self, row_labels, col_labels): """ n = len(row_labels) if n != len(col_labels): - raise AssertionError('Row labels must have same size as ' + raise ValueError('Row labels must have same size as ' 'column labels') thresh = 1000 @@ -2234,9 +2228,9 @@ def lookup(self, row_labels, col_labels): ridx = self.index.get_indexer(row_labels) cidx = self.columns.get_indexer(col_labels) if (ridx == -1).any(): - raise ValueError('One or more row labels was not found') + raise KeyError('One or more row labels was not found') if (cidx == -1).any(): - raise ValueError('One or more column labels was not found') + raise KeyError('One or more column labels was not found') flat_index = ridx * len(self.columns) + cidx result = values.flat[flat_index] else: @@ -2394,7 +2388,7 @@ def set_index(self, keys, drop=True, append=False, inplace=False, if verify_integrity and not index.is_unique: duplicates = index.get_duplicates() - raise Exception('Index has duplicate keys: %s' % duplicates) + raise ValueError('Index has duplicate keys: %s' % duplicates) for c in to_remove: del frame[c] @@ -2595,12 +2589,9 @@ def dropna(self, axis=0, how='any', thresh=None, subset=None): return result axis = self._get_axis_number(axis) - if axis == 0: - agg_axis = 1 - elif axis == 1: - agg_axis = 0 - else: # pragma: no cover - raise ValueError('axis must be 0 or 1') + if axis not in (0, 1): # pragma: no cover + raise AssertionError('axis must be 0 or 1') + agg_axis = 1 - axis agg_obj = self if subset is not None: @@ -2617,9 +2608,9 @@ def dropna(self, axis=0, how='any', thresh=None, subset=None): mask = count > 0 else: if how is not None: - raise ValueError('do not recognize %s' % how) + raise ValueError('invalid how option: %s' % how) else: - raise ValueError('must specify how or thresh') + raise TypeError('must specify how or thresh') return self.take(mask.nonzero()[0], axis=axis, convert=False) @@ -2759,14 +2750,14 @@ def sort_index(self, axis=0, by=None, ascending=True, inplace=False, from pandas.core.groupby import _lexsort_indexer axis = self._get_axis_number(axis) - if axis not in [0, 1]: - raise ValueError('Axis must be 0 or 1, got %s' % str(axis)) + if axis not in [0, 1]: # pragma: no cover + raise AssertionError('Axis must be 0 or 1, got %s' % str(axis)) labels = self._get_axis(axis) if by is not None: if axis != 0: - raise AssertionError('Axis must be 0') + raise ValueError('When sorting by column, axis must be 0 (rows)') if not isinstance(by, (tuple, list)): by = [by] @@ -2837,7 +2828,7 @@ def sortlevel(self, level=0, axis=0, ascending=True, inplace=False): axis = self._get_axis_number(axis) the_axis = self._get_axis(axis) if not isinstance(the_axis, MultiIndex): - raise Exception('can only sort by level with a hierarchical index') + raise TypeError('can only sort by level with a hierarchical index') new_axis, indexer = the_axis.sortlevel(level, ascending=ascending) @@ -2902,7 +2893,7 @@ def reorder_levels(self, order, axis=0): axis = self._get_axis_number(axis) if not isinstance(self._get_axis(axis), MultiIndex): # pragma: no cover - raise Exception('Can only reorder levels on a hierarchical axis.') + raise TypeError('Can only reorder levels on a hierarchical axis.') result = self.copy() @@ -2978,7 +2969,8 @@ def _combine_series_infer(self, other, func, fill_value=None): def _combine_match_index(self, other, func, fill_value=None): left, right = self.align(other, join='outer', axis=0, copy=False) if fill_value is not None: - raise NotImplementedError + raise NotImplementedError("fill_value %r not supported." % + fill_value) return self._constructor(func(left.values.T, right.values).T, index=left.index, columns=self.columns, copy=False) @@ -2986,7 +2978,8 @@ def _combine_match_index(self, other, func, fill_value=None): def _combine_match_columns(self, other, func, fill_value=None): left, right = self.align(other, join='outer', axis=1, copy=False) if fill_value is not None: - raise NotImplementedError + raise NotImplementedError("fill_value %r not supported" % + fill_value) new_data = left._data.eval( func, right, axes=[left.columns, self.index]) @@ -3001,7 +2994,7 @@ def _combine_const(self, other, func, raise_on_error=True): def _compare_frame(self, other, func, str_rep): if not self._indexed_same(other): - raise Exception('Can only compare identically-labeled ' + raise ValueError('Can only compare identically-labeled ' 'DataFrame objects') def _compare(a, b): @@ -3166,8 +3159,9 @@ def update(self, other, join='left', overwrite=True, filter_func=None, If True, will raise an error if the DataFrame and other both contain data in the same place. """ - if join != 'left': - raise NotImplementedError + # TODO: Support other joins + if join != 'left': # pragma: no cover + raise NotImplementedError("Only left join is supported") if not isinstance(other, DataFrame): other = DataFrame(other) @@ -3184,7 +3178,7 @@ def update(self, other, join='left', overwrite=True, filter_func=None, mask_this = notnull(that) mask_that = notnull(this) if any(mask_this & mask_that): - raise DataConflictError("Data overlaps.") + raise ValueError("Data overlaps.") if overwrite: mask = isnull(that) @@ -3558,8 +3552,8 @@ def _apply_standard(self, func, axis, ignore_failures=False, reduce=True): series_gen = (Series.from_array(arr, index=res_columns, name=name) for i, (arr, name) in enumerate(zip(values, res_index))) - else: - raise ValueError('Axis must be 0 or 1, got %s' % str(axis)) + else: # pragma : no cover + raise AssertionError('Axis must be 0 or 1, got %s' % str(axis)) keys = [] results = {} @@ -3617,7 +3611,7 @@ def _apply_broadcast(self, func, axis): elif axis == 1: target = self.T else: # pragma: no cover - raise ValueError('Axis must be 0 or 1, got %s' % axis) + raise AssertionError('Axis must be 0 or 1, got %s' % axis) result_values = np.empty_like(target.values) columns = target.columns @@ -3671,7 +3665,7 @@ def append(self, other, ignore_index=False, verify_integrity=False): If True do not use the index labels. Useful for gluing together record arrays verify_integrity : boolean, default False - If True, raise Exception on creating index with duplicates + If True, raise ValueError on creating index with duplicates Notes ----- @@ -3687,7 +3681,7 @@ def append(self, other, ignore_index=False, verify_integrity=False): if isinstance(other, dict): other = Series(other) if other.name is None and not ignore_index: - raise Exception('Can only append a Series if ' + raise TypeError('Can only append a Series if ' 'ignore_index=True') index = None if other.name is None else [other.name] @@ -3759,7 +3753,7 @@ def _join_compat(self, other, on=None, how='left', lsuffix='', rsuffix='', if isinstance(other, Series): if other.name is None: - raise AssertionError('Other Series must have a name') + raise ValueError('Other Series must have a name') other = DataFrame({other.name: other}) if isinstance(other, DataFrame): @@ -4044,6 +4038,10 @@ def _count_level(self, level, axis=0, numeric_only=False): if axis == 1: frame = frame.T + if not isinstance(frame.index, MultiIndex): + raise TypeError("Can only count levels on hierarchical %s." % + self._get_axis_name(axis)) + # python 2.5 mask = notnull(frame.values).view(np.uint8) @@ -4393,7 +4391,7 @@ def _get_agg_axis(self, axis_num): elif axis_num == 1: return self.index else: - raise Exception('Must have 0<= axis <= 1') + raise ValueError('Axis must be 0 or 1 (got %r)' % axis_num) def quantile(self, q=0.5, axis=0, numeric_only=True): """ @@ -4541,8 +4539,8 @@ def to_timestamp(self, freq=None, how='start', axis=0, copy=True): new_data.set_axis(1, self.index.to_timestamp(freq=freq, how=how)) elif axis == 1: new_data.set_axis(0, self.columns.to_timestamp(freq=freq, how=how)) - else: - raise ValueError('Axis must be 0 or 1. Got %s' % str(axis)) + else: # pragma: no cover + raise AssertionError('Axis must be 0 or 1. Got %s' % str(axis)) return self._constructor(new_data) @@ -4576,8 +4574,8 @@ def to_period(self, freq=None, axis=0, copy=True): if freq is None: freq = self.columns.freqstr or self.columns.inferred_freq new_data.set_axis(0, self.columns.to_period(freq=freq)) - else: - raise ValueError('Axis must be 0 or 1. Got %s' % str(axis)) + else: # pragma: no cover + raise AssertionError('Axis must be 0 or 1. Got %s' % str(axis)) return self._constructor(new_data) @@ -4830,22 +4828,6 @@ def convert(v): return values -def _rec_to_dict(arr): - if isinstance(arr, (np.ndarray, Series)): - columns = list(arr.dtype.names) - sdict = dict((k, arr[k]) for k in columns) - elif isinstance(arr, DataFrame): - columns = list(arr.columns) - sdict = dict((k, v.values) for k, v in compat.iteritems(arr)) - elif isinstance(arr, dict): - columns = sorted(arr) - sdict = arr.copy() - else: # pragma: no cover - raise TypeError('%s' % type(arr)) - - return columns, sdict - - def _to_arrays(data, columns, coerce_float=False, dtype=None): """ Return list of arrays, columns @@ -4940,8 +4922,7 @@ def _list_of_dict_to_arrays(data, columns, coerce_float=False, dtype=None): # assure that they are of the base dict class and not of derived # classes - data = [(type(d) is dict) and d or dict(d) - for d in data] + data = [(type(d) is dict) and d or dict(d) for d in data] content = list(lib.dicts_to_array(data, list(columns)).T) return _convert_object_array(content, columns, dtype=dtype, @@ -4952,7 +4933,8 @@ def _convert_object_array(content, columns, coerce_float=False, dtype=None): if columns is None: columns = _default_index(len(content)) else: - if len(columns) != len(content): + if len(columns) != len(content): # pragma: no cover + # caller's responsibility to check for this... raise AssertionError('%d columns passed, passed data had %s ' 'columns' % (len(columns), len(content))) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b0ab62b7158c9..796c3948a2681 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -417,7 +417,7 @@ def rename(self, *args, **kwargs): inplace = kwargs.get('inplace', False) if (com._count_not_none(*axes.values()) == 0): - raise Exception('must pass an index to rename') + raise TypeError('must pass an index to rename') # renamer function if passed a dict def _get_rename_function(mapper): @@ -1242,7 +1242,7 @@ def filter(self, items=None, like=None, regex=None, axis=None): matcher = re.compile(regex) return self.select(lambda x: matcher.search(x) is not None, axis=axis_name) else: - raise ValueError('items was None!') + raise TypeError('Must pass either `items`, `like`, or `regex`') #---------------------------------------------------------------------- # Attribute access diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 856e97ad163f2..636a5e88817ee 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -84,7 +84,7 @@ def __setitem__(self, key, value): if isinstance(key, tuple): if len(key) > self.ndim: - raise IndexingError('only tuples of length <= %d supported', + raise IndexingError('only tuples of length <= %d supported' % self.ndim) indexer = self._convert_tuple(key, is_setter=True) else: diff --git a/pandas/core/internals.py b/pandas/core/internals.py index d025c7a7fcf6d..294b90bd4092f 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -1625,7 +1625,7 @@ def set_axis(self, axis, value, maybe_rename=True, check_axis=True): value = _ensure_index(value) if check_axis and len(value) != len(cur_axis): - raise Exception('Length mismatch (%d vs %d)' + raise ValueError('Length mismatch (%d vs %d)' % (len(value), len(cur_axis))) self.axes[axis] = value @@ -2297,7 +2297,7 @@ def fast_2d_xs(self, loc, copy=False): return result if not copy: - raise Exception('cannot get view of mixed-type or ' + raise TypeError('cannot get view of mixed-type or ' 'non-consolidated DataFrame') items = self.items @@ -2483,7 +2483,8 @@ def _set_item(item, arr): def insert(self, loc, item, value, allow_duplicates=False): if not allow_duplicates and item in self.items: - raise Exception('cannot insert %s, already exists' % item) + # Should this be a different kind of error?? + raise ValueError('cannot insert %s, already exists' % item) try: new_items = self.items.insert(loc, item) @@ -2826,7 +2827,8 @@ def _maybe_rename_join(self, other, lsuffix, rsuffix, copydata=True): to_rename = self.items.intersection(other.items) if len(to_rename) > 0: if not lsuffix and not rsuffix: - raise Exception('columns overlap: %s' % to_rename) + raise ValueError('columns overlap but no suffix specified: %s' + % to_rename) def lrenamer(x): if x in to_rename: diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index a8a36ef8ca0be..312667930b54d 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -21,10 +21,6 @@ from pandas.core.index import Index, MultiIndex -class ReshapeError(Exception): - pass - - class _Unstacker(object): """ Helper class to unstack data / pivot with multi-level index @@ -129,8 +125,8 @@ def _make_selectors(self): mask.put(selector, True) if mask.sum() < len(self.index): - raise ReshapeError('Index contains duplicate entries, ' - 'cannot reshape') + raise ValueError('Index contains duplicate entries, ' + 'cannot reshape') self.group_index = comp_index self.mask = mask diff --git a/pandas/core/series.py b/pandas/core/series.py index 8396de9c5997b..43bf4db7b29db 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2377,7 +2377,7 @@ def sort(self, axis=0, kind='quicksort', order=None, ascending=True): if (true_base is not None and (true_base.ndim != 1 or true_base.shape != self.shape)): - raise Exception('This Series is a view of some other array, to ' + raise TypeError('This Series is a view of some other array, to ' 'sort in-place you must create a copy') self[:] = sortedSeries @@ -2542,7 +2542,7 @@ def sortlevel(self, level=0, ascending=True): sorted : Series """ if not isinstance(self.index, MultiIndex): - raise Exception('can only sort by level with a hierarchical index') + raise TypeError('can only sort by level with a hierarchical index') new_index, indexer = self.index.sortlevel(level, ascending=ascending) new_values = self.values.take(indexer) diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py index dd7bd52076e06..d8f6d531a6983 100644 --- a/pandas/sparse/frame.py +++ b/pandas/sparse/frame.py @@ -600,7 +600,8 @@ def _reindex_with_indexers(self, reindexers, method=None, fill_value=np.nan, lim def _join_compat(self, other, on=None, how='left', lsuffix='', rsuffix='', sort=False): if isinstance(other, Series): - assert(other.name is not None) + if other.name is None: + raise ValueError('Other Series must have a name') other = SparseDataFrame({other.name: other}, default_fill_value=self._default_fill_value) if on is not None: @@ -627,19 +628,19 @@ def _join_index(self, other, how, lsuffix, rsuffix): return concat([this, other], axis=1, verify_integrity=True) def _maybe_rename_join(self, other, lsuffix, rsuffix): - intersection = self.columns.intersection(other.columns) - - if len(intersection) > 0: + to_rename = self.columns.intersection(other.columns) + if len(to_rename) > 0: if not lsuffix and not rsuffix: - raise Exception('columns overlap: %s' % intersection) + raise ValueError('columns overlap but no suffix specified: %s' + % to_rename) def lrenamer(x): - if x in intersection: + if x in to_rename: return '%s%s' % (x, lsuffix) return x def rrenamer(x): - if x in intersection: + if x in to_rename: return '%s%s' % (x, rsuffix) return x @@ -687,7 +688,7 @@ def apply(self, func, axis=0, broadcast=False, reduce=False): ---------- func : function Function to apply to each column - axis : {0, 1} + axis : {0, 1, 'index', 'columns'} broadcast : bool, default False For aggregation functions, return object of same size with values propagated @@ -698,6 +699,7 @@ def apply(self, func, axis=0, broadcast=False, reduce=False): """ if not len(self.columns): return self + axis = self._get_axis_number(axis) if isinstance(func, np.ufunc): new_series = {} diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index ce93f930a0ec3..c39634281ebb7 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -40,6 +40,8 @@ assertRaisesRegexp, makeCustomDataframe as mkdf, ensure_clean) +from pandas.core.indexing import IndexingError +from pandas.core.common import PandasError import pandas.util.testing as tm import pandas.lib as lib @@ -115,7 +117,8 @@ def test_getitem(self): self.assert_(self.frame[key] is not None) self.assert_('random' not in self.frame) - self.assertRaises(Exception, self.frame.__getitem__, 'random') + with assertRaisesRegexp(KeyError, 'no item named random'): + self.frame['random'] def test_getitem_dupe_cols(self): df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=['a', 'a', 'b']) @@ -154,10 +157,10 @@ def test_getitem_list(self): self.assertEqual(result.columns.name, 'foo') - self.assertRaises(Exception, self.frame.__getitem__, - ['B', 'A', 'foo']) - self.assertRaises(Exception, self.frame.__getitem__, - Index(['B', 'A', 'foo'])) + with assertRaisesRegexp(KeyError, 'not in index'): + self.frame[['B', 'A', 'food']] + with assertRaisesRegexp(KeyError, 'not in index'): + self.frame[Index(['B', 'A', 'foo'])] # tuples df = DataFrame(randn(8, 3), @@ -178,6 +181,12 @@ def test_setitem_list(self): assert_series_equal(self.frame['B'], data['A']) assert_series_equal(self.frame['A'], data['B']) + with assertRaisesRegexp(ValueError, 'Columns must be same length as key'): + data[['A']] = self.frame[['A', 'B']] + with assertRaisesRegexp(ValueError, 'Length of values does not match ' + 'length of index'): + data['A'] = range(len(data.index) - 1) + df = DataFrame(0, lrange(3), ['tt1', 'tt2'], dtype=np.int_) df.ix[1, ['tt1', 'tt2']] = [1, 2] @@ -214,12 +223,14 @@ def test_getitem_boolean(self): subframe = self.tsframe[indexer] self.assert_(np.array_equal(subindex, subframe.index)) - self.assertRaises(Exception, self.tsframe.__getitem__, indexer[:-1]) + with assertRaisesRegexp(ValueError, 'Item wrong length'): + self.tsframe[indexer[:-1]] subframe_obj = self.tsframe[indexer_obj] assert_frame_equal(subframe_obj, subframe) - self.assertRaises(ValueError, self.tsframe.__getitem__, self.tsframe) + with tm.assertRaisesRegexp(ValueError, 'boolean values only'): + self.tsframe[self.tsframe] # test that Series work indexer_obj = Series(indexer_obj, self.tsframe.index) @@ -381,8 +392,8 @@ def test_setitem(self): tm.assert_dict_equal(series, self.frame['col6'], compare_keys=False) - self.assertRaises(Exception, self.frame.__setitem__, - randn(len(self.frame) + 1)) + with tm.assertRaises(KeyError): + self.frame[randn(len(self.frame) + 1)] = 1 # set ndarray arr = randn(len(self.frame)) @@ -462,7 +473,9 @@ def test_setitem_boolean(self): values[values == 2] = 3 assert_almost_equal(df.values, values) - self.assertRaises(Exception, df.__setitem__, df * 0, 2) + with assertRaisesRegexp(TypeError, 'Must pass DataFrame with boolean ' + 'values only'): + df[df * 0] = 2 # index with DataFrame mask = df > np.abs(df) @@ -1176,14 +1189,11 @@ def test_getitem_fancy_ints(self): def test_getitem_setitem_fancy_exceptions(self): ix = self.frame.ix - self.assertRaises(Exception, ix.__getitem__, - (slice(None, None, None), - slice(None, None, None), - slice(None, None, None))) - self.assertRaises(Exception, ix.__setitem__, - (slice(None, None, None), - slice(None, None, None), - slice(None, None, None)), 1) + with assertRaisesRegexp(IndexingError, 'Too many indexers'): + ix[:, :, :] + with assertRaisesRegexp(IndexingError, 'only tuples of length <= 2 ' + 'supported'): + ix[:, :, :] = 1 def test_getitem_setitem_boolean_misaligned(self): # boolean index misaligned labels @@ -1230,8 +1240,10 @@ def test_getitem_setitem_float_labels(self): self.assertEqual(len(result), 2) # this should raise an exception - self.assertRaises(Exception, df.ix.__getitem__, slice(1, 2)) - self.assertRaises(Exception, df.ix.__setitem__, slice(1, 2), 0) + with tm.assertRaises(KeyError): + df.ix[1:2] + with tm.assertRaises(KeyError): + df.ix[1:2] = 0 # #2727 index = Index([1.0, 2.5, 3.5, 4.5, 5.0]) @@ -1470,11 +1482,14 @@ def testit(df): assert_almost_equal(df['mask'], exp_mask) self.assert_(df['mask'].dtype == np.bool_) - self.assertRaises(ValueError, self.frame.lookup, - ['xyz'], ['A']) + with tm.assertRaises(KeyError): + self.frame.lookup(['xyz'], ['A']) + + with tm.assertRaises(KeyError): + self.frame.lookup([self.frame.index[0]], ['xyz']) - self.assertRaises(ValueError, self.frame.lookup, - [self.frame.index[0]], ['xyz']) + with tm.assertRaisesRegexp(ValueError, 'same size'): + self.frame.lookup(['a', 'b', 'c'], ['a']) def test_set_value(self): for idx in self.frame.index: @@ -1721,10 +1736,6 @@ def test_join_index(self): self.assert_(joined.index.equals(f2.index)) self.assertEqual(len(joined.columns), 4) - # corner case - self.assertRaises(Exception, self.frame.join, self.frame, - how='left') - # inner f = self.frame.reindex(columns=['A', 'B'])[:10] @@ -1734,10 +1745,6 @@ def test_join_index(self): self.assert_(joined.index.equals(f.index.intersection(f2.index))) self.assertEqual(len(joined.columns), 4) - # corner case - self.assertRaises(Exception, self.frame.join, self.frame, - how='inner') - # outer f = self.frame.reindex(columns=['A', 'B'])[:10] @@ -1747,11 +1754,12 @@ def test_join_index(self): self.assert_(tm.equalContents(self.frame.index, joined.index)) self.assertEqual(len(joined.columns), 4) - # corner case - self.assertRaises(Exception, self.frame.join, self.frame, - how='outer') + assertRaisesRegexp(ValueError, 'join method', f.join, f2, how='foo') - self.assertRaises(Exception, f.join, f2, how='foo') + # corner case - overlapping columns + for how in ('outer', 'left', 'inner'): + with assertRaisesRegexp(ValueError, 'columns overlap but no suffix'): + self.frame.join(self.frame, how=how) def test_join_index_more(self): af = self.frame.ix[:, ['A', 'B']] @@ -1778,7 +1786,7 @@ def test_join_index_series(self): assert_frame_equal(joined, self.frame, check_names=False) # TODO should this check_names ? s.name = None - self.assertRaises(Exception, df.join, s) + assertRaisesRegexp(ValueError, 'must have a name', df.join, s) def test_join_overlap(self): df1 = self.frame.ix[:, ['A', 'B', 'C']] @@ -1858,22 +1866,23 @@ def setUp(self): def test_get_axis(self): f = self.frame - self.assert_(f._get_axis_name(0) == 'index') - self.assert_(f._get_axis_name(1) == 'columns') - self.assert_(f._get_axis_name('index') == 'index') - self.assert_(f._get_axis_name('columns') == 'columns') - self.assertRaises(Exception, f._get_axis_name, 'foo') - self.assertRaises(Exception, f._get_axis_name, None) - - self.assert_(f._get_axis_number(0) == 0) - self.assert_(f._get_axis_number(1) == 1) - self.assert_(f._get_axis_number('index') == 0) - self.assert_(f._get_axis_number('columns') == 1) - self.assertRaises(Exception, f._get_axis_number, 2) - self.assertRaises(Exception, f._get_axis_number, None) - - self.assert_(self.frame._get_axis(0) is self.frame.index) - self.assert_(self.frame._get_axis(1) is self.frame.columns) + self.assertEquals(f._get_axis_number(0), 0) + self.assertEquals(f._get_axis_number(1), 1) + self.assertEquals(f._get_axis_number('index'), 0) + self.assertEquals(f._get_axis_number('columns'), 1) + + self.assertEquals(f._get_axis_name(0), 'index') + self.assertEquals(f._get_axis_name(1), 'columns') + self.assertEquals(f._get_axis_name('index'), 'index') + self.assertEquals(f._get_axis_name('columns'), 'columns') + + self.assert_(f._get_axis(0) is f.index) + self.assert_(f._get_axis(1) is f.columns) + + assertRaisesRegexp(ValueError, 'No axis named', f._get_axis_number, 2) + assertRaisesRegexp(ValueError, 'No axis.*foo', f._get_axis_name, 'foo') + assertRaisesRegexp(ValueError, 'No axis.*None', f._get_axis_name, None) + assertRaisesRegexp(ValueError, 'No axis named', f._get_axis_number, None) def test_set_index(self): idx = Index(np.arange(len(self.mixed_frame))) @@ -1882,8 +1891,8 @@ def test_set_index(self): _ = self.mixed_frame['foo'] self.mixed_frame.index = idx self.assert_(self.mixed_frame['foo'].index is idx) - self.assertRaises(Exception, setattr, self.mixed_frame, 'index', - idx[::2]) + with assertRaisesRegexp(ValueError, 'Length mismatch'): + self.mixed_frame.index = idx[::2] def test_set_index_cast(self): @@ -1957,7 +1966,8 @@ def test_set_index2(self): assert_frame_equal(df3, expected_nodrop) # corner case - self.assertRaises(Exception, df.set_index, 'A', verify_integrity=True) + with assertRaisesRegexp(ValueError, 'Index has duplicate keys'): + df.set_index('A', verify_integrity=True) # append result = df.set_index(['A', 'B'], append=True) @@ -1981,8 +1991,8 @@ def test_set_index_nonuniq(self): 'C': ['a', 'b', 'c', 'd', 'e'], 'D': np.random.randn(5), 'E': np.random.randn(5)}) - self.assertRaises(Exception, df.set_index, 'A', verify_integrity=True, - inplace=True) + with assertRaisesRegexp(ValueError, 'Index has duplicate keys'): + df.set_index('A', verify_integrity=True, inplace=True) self.assert_('A' in df) def test_set_index_bug(self): @@ -2040,8 +2050,8 @@ def test_set_index_empty_column(self): def test_set_columns(self): cols = Index(np.arange(len(self.mixed_frame.columns))) self.mixed_frame.columns = cols - self.assertRaises(Exception, setattr, self.mixed_frame, 'columns', - cols[::2]) + with assertRaisesRegexp(ValueError, 'Length mismatch'): + self.mixed_frame.columns = cols[::2] def test_keys(self): getkeys = self.frame.keys @@ -2220,12 +2230,11 @@ def test_constructor_dict(self): # Corner cases self.assertEqual(len(DataFrame({})), 0) - self.assertRaises(Exception, lambda x: DataFrame([self.ts1, self.ts2])) - # mix dict and array, wrong size - self.assertRaises(Exception, DataFrame, - {'A': {'a': 'a', 'b': 'b'}, - 'B': ['a', 'b', 'c']}) + # mix dict and array, wrong size - no spec for which error should raise + # first + with tm.assertRaises(ValueError): + DataFrame({'A': {'a': 'a', 'b': 'b'}, 'B': ['a', 'b', 'c']}) # Length-one dict micro-optimization frame = DataFrame({'A': {'1': 1, '2': 2}}) @@ -2248,36 +2257,33 @@ def test_constructor_dict(self): self.assert_(frame.index.equals(Index([]))) def test_constructor_error_msgs(self): - + msg = "Mixing dicts with non-Series may lead to ambiguous ordering." # mix dict and array, wrong size - def testit(): + with assertRaisesRegexp(ValueError, msg): DataFrame({'A': {'a': 'a', 'b': 'b'}, 'B': ['a', 'b', 'c']}) - assertRaisesRegexp(ValueError, "Mixing dicts with non-Series may lead to ambiguous ordering.", testit) # wrong size ndarray, GH 3105 - def testit(): - DataFrame(np.arange(12).reshape((4, 3)), columns=['foo', 'bar', 'baz'], + msg = "Shape of passed values is \(3, 4\), indices imply \(3, 3\)" + with assertRaisesRegexp(ValueError, msg): + DataFrame(np.arange(12).reshape((4, 3)), + columns=['foo', 'bar', 'baz'], index=date_range('2000-01-01', periods=3)) - assertRaisesRegexp(ValueError, "Shape of passed values is \(3, 4\), indices imply \(3, 3\)", testit) + # higher dim raise exception - def testit(): + with assertRaisesRegexp(ValueError, 'Must pass 2-d input'): DataFrame(np.zeros((3, 3, 3)), columns=['A', 'B', 'C'], index=[1]) - assertRaisesRegexp(ValueError, "Must pass 2-d input", testit) # wrong size axis labels - def testit(): + with assertRaisesRegexp(ValueError, "Shape of passed values is \(3, 2\), indices imply \(3, 1\)"): DataFrame(np.random.rand(2,3), columns=['A', 'B', 'C'], index=[1]) - assertRaisesRegexp(ValueError, "Shape of passed values is \(3, 2\), indices imply \(3, 1\)", testit) - def testit(): + with assertRaisesRegexp(ValueError, "Shape of passed values is \(3, 2\), indices imply \(2, 2\)"): DataFrame(np.random.rand(2,3), columns=['A', 'B'], index=[1, 2]) - assertRaisesRegexp(ValueError, "Shape of passed values is \(3, 2\), indices imply \(2, 2\)", testit) - def testit(): + with assertRaisesRegexp(ValueError, 'If using all scalar values, you must must pass an index'): DataFrame({'a': False, 'b': True}) - assertRaisesRegexp(ValueError, 'If using all scalar values, you must must pass an index', testit) def test_insert_error_msmgs(self): @@ -2285,10 +2291,10 @@ def test_insert_error_msmgs(self): df = DataFrame(np.random.randint(0,2,(4,4)), columns=['a', 'b', 'c', 'd']) - def testit(): + msg = 'incompatible index of inserted column with frame index' + with assertRaisesRegexp(TypeError, msg): df['gr'] = df.groupby(['b', 'c']).count() - assertRaisesRegexp(TypeError, 'incompatible index of inserted column with frame index', testit) def test_constructor_subclass_dict(self): # Test for passing dict subclass to constructor @@ -2362,38 +2368,38 @@ def test_constructor_dict_of_tuples(self): expected = DataFrame(dict((k, list(v)) for k, v in compat.iteritems(data))) assert_frame_equal(result, expected, check_dtype=False) - def test_constructor_ndarray(self): - mat = np.zeros((2, 3), dtype=float) - + def _check_basic_constructor(self, empty): + "mat: 2d matrix with shpae (3, 2) to input. empty - makes sized objects" + mat = empty((2, 3), dtype=float) # 2-D input frame = DataFrame(mat, columns=['A', 'B', 'C'], index=[1, 2]) self.assertEqual(len(frame.index), 2) self.assertEqual(len(frame.columns), 3) - # cast type - frame = DataFrame(mat, columns=['A', 'B', 'C'], - index=[1, 2], dtype=np.int64) - self.assert_(frame.values.dtype == np.int64) - # 1-D input - frame = DataFrame(np.zeros(3), columns=['A'], index=[1, 2, 3]) + frame = DataFrame(empty((3,)), columns=['A'], index=[1, 2, 3]) self.assertEqual(len(frame.index), 3) self.assertEqual(len(frame.columns), 1) - frame = DataFrame(['foo', 'bar'], index=[0, 1], columns=['A']) - self.assertEqual(len(frame), 2) - # higher dim raise exception - self.assertRaises(Exception, DataFrame, np.zeros((3, 3, 3)), - columns=['A', 'B', 'C'], index=[1]) + # cast type + frame = DataFrame(mat, columns=['A', 'B', 'C'], + index=[1, 2], dtype=np.int64) + self.assert_(frame.values.dtype == np.int64) # wrong size axis labels - self.assertRaises(Exception, DataFrame, mat, - columns=['A', 'B', 'C'], index=[1]) + msg = r'Shape of passed values is \(3, 2\), indices imply \(3, 1\)' + with assertRaisesRegexp(ValueError, msg): + DataFrame(mat, columns=['A', 'B', 'C'], index=[1]) + msg = r'Shape of passed values is \(3, 2\), indices imply \(2, 2\)' + with assertRaisesRegexp(ValueError, msg): + DataFrame(mat, columns=['A', 'B'], index=[1, 2]) - self.assertRaises(Exception, DataFrame, mat, - columns=['A', 'B'], index=[1, 2]) + # higher dim raise exception + with assertRaisesRegexp(ValueError, 'Must pass 2-d input'): + DataFrame(empty((3, 3, 3)), columns=['A', 'B', 'C'], + index=[1]) # automatic labeling frame = DataFrame(mat) @@ -2407,70 +2413,36 @@ def test_constructor_ndarray(self): self.assert_(np.array_equal(frame.index, lrange(2))) # 0-length axis - frame = DataFrame(np.empty((0, 3))) + frame = DataFrame(empty((0, 3))) self.assert_(len(frame.index) == 0) - frame = DataFrame(np.empty((3, 0))) + frame = DataFrame(empty((3, 0))) self.assert_(len(frame.columns) == 0) - def test_constructor_maskedarray(self): - mat = ma.masked_all((2, 3), dtype=float) + def test_constructor_ndarray(self): + mat = np.zeros((2, 3), dtype=float) + self._check_basic_constructor(np.ones) - # 2-D input - frame = DataFrame(mat, columns=['A', 'B', 'C'], index=[1, 2]) + frame = DataFrame(['foo', 'bar'], index=[0, 1], columns=['A']) + self.assertEqual(len(frame), 2) - self.assertEqual(len(frame.index), 2) - self.assertEqual(len(frame.columns), 3) - self.assertTrue(np.all(~np.asarray(frame == frame))) - # cast type - frame = DataFrame(mat, columns=['A', 'B', 'C'], - index=[1, 2], dtype=np.int64) - self.assert_(frame.values.dtype == np.int64) + def test_constructor_maskedarray(self): + self._check_basic_constructor(ma.masked_all) # Check non-masked values - mat2 = ma.copy(mat) - mat2[0, 0] = 1.0 - mat2[1, 2] = 2.0 - frame = DataFrame(mat2, columns=['A', 'B', 'C'], index=[1, 2]) + mat = ma.masked_all((2, 3), dtype=float) + mat[0, 0] = 1.0 + mat[1, 2] = 2.0 + frame = DataFrame(mat, columns=['A', 'B', 'C'], index=[1, 2]) self.assertEqual(1.0, frame['A'][1]) self.assertEqual(2.0, frame['C'][2]) - # 1-D input - frame = DataFrame(ma.masked_all((3,)), columns=['A'], index=[1, 2, 3]) - self.assertEqual(len(frame.index), 3) - self.assertEqual(len(frame.columns), 1) + # what is this even checking?? + mat = ma.masked_all((2, 3), dtype=float) + frame = DataFrame(mat, columns=['A', 'B', 'C'], index=[1, 2]) self.assertTrue(np.all(~np.asarray(frame == frame))) - # higher dim raise exception - self.assertRaises(Exception, DataFrame, ma.masked_all((3, 3, 3)), - columns=['A', 'B', 'C'], index=[1]) - - # wrong size axis labels - self.assertRaises(Exception, DataFrame, mat, - columns=['A', 'B', 'C'], index=[1]) - - self.assertRaises(Exception, DataFrame, mat, - columns=['A', 'B'], index=[1, 2]) - - # automatic labeling - frame = DataFrame(mat) - self.assert_(np.array_equal(frame.index, lrange(2))) - self.assert_(np.array_equal(frame.columns, lrange(3))) - - frame = DataFrame(mat, index=[1, 2]) - self.assert_(np.array_equal(frame.columns, lrange(3))) - - frame = DataFrame(mat, columns=['A', 'B', 'C']) - self.assert_(np.array_equal(frame.index, lrange(2))) - - # 0-length axis - frame = DataFrame(ma.masked_all((0, 3))) - self.assert_(len(frame.index) == 0) - - frame = DataFrame(ma.masked_all((3, 0))) - self.assert_(len(frame.columns) == 0) - def test_constructor_maskedarray_nonfloat(self): # masked int promoted to float mat = ma.masked_all((2, 3), dtype=int) @@ -2570,8 +2542,8 @@ def test_constructor_arrays_and_scalars(self): exp = DataFrame({'a': df['a'].values, 'b': [True] * 10}) assert_frame_equal(df, exp) - - self.assertRaises(ValueError, DataFrame, {'a': False, 'b': True}) + with tm.assertRaisesRegexp(ValueError, 'must pass an index'): + DataFrame({'a': False, 'b': True}) def test_constructor_DataFrame(self): df = DataFrame(self.frame) @@ -2602,12 +2574,14 @@ def test_constructor_more(self): self.assertEqual(dm.values.shape, (10, 0)) # corner, silly - self.assertRaises(Exception, DataFrame, (1, 2, 3)) + # TODO: Fix this Exception to be better... + with assertRaisesRegexp(PandasError, 'constructor not properly called'): + DataFrame((1, 2, 3)) # can't cast mat = np.array(['foo', 'bar'], dtype=object).reshape(2, 1) - self.assertRaises(ValueError, DataFrame, mat, index=[0, 1], - columns=[0], dtype=float) + with assertRaisesRegexp(ValueError, 'cast'): + DataFrame(mat, index=[0, 1], columns=[0], dtype=float) dm = DataFrame(DataFrame(self.frame._series)) tm.assert_frame_equal(dm, self.frame) @@ -2718,7 +2692,8 @@ class CustomDict(dict): def test_constructor_ragged(self): data = {'A': randn(10), 'B': randn(8)} - self.assertRaises(Exception, DataFrame, data) + assertRaisesRegexp(ValueError, 'arrays must all be same length', + DataFrame, data) def test_constructor_scalar(self): idx = Index(lrange(3)) @@ -2739,8 +2714,8 @@ def test_constructor_mixed_dict_and_Series(self): self.assert_(result.index.is_monotonic) # ordering ambiguous, raise exception - self.assertRaises(Exception, DataFrame, - {'A': ['a', 'b'], 'B': {'a': 'a', 'b': 'b'}}) + with assertRaisesRegexp(ValueError, 'ambiguous ordering'): + DataFrame({'A': ['a', 'b'], 'B': {'a': 'a', 'b': 'b'}}) # this is OK though result = DataFrame({'A': ['a', 'b'], @@ -2850,10 +2825,11 @@ def test_constructor_from_items(self): columns=self.mixed_frame.columns, orient='index') assert_frame_equal(recons, self.mixed_frame) - self.assert_(recons['A'].dtype == np.float64) + self.assertEqual(recons['A'].dtype, np.float64) - self.assertRaises(ValueError, DataFrame.from_items, row_items, - orient='index') + with tm.assertRaisesRegexp(TypeError, + "Must pass columns with orient='index'"): + DataFrame.from_items(row_items, orient='index') # orient='index', but thar be tuples arr = lib.list_to_object_array( @@ -2878,9 +2854,8 @@ def test_constructor_mix_series_nonseries(self): 'B': list(self.frame['B'])}, columns=['A', 'B']) assert_frame_equal(df, self.frame.ix[:, ['A', 'B']]) - self.assertRaises(ValueError, DataFrame, - {'A': self.frame['A'], - 'B': list(self.frame['B'])[:-2]}) + with tm.assertRaisesRegexp(ValueError, 'does not match index length'): + DataFrame({'A': self.frame['A'], 'B': list(self.frame['B'])[:-2]}) def test_constructor_miscast_na_int_dtype(self): df = DataFrame([[np.nan, 1], [1, 0]], dtype=np.int64) @@ -2932,6 +2907,8 @@ def check(result, expected=None): df['string'] = 'bah' expected = DataFrame([[1,1,1,5,'bah'],[1,1,2,5,'bah'],[2,1,3,5,'bah']],columns=['foo','bar','foo','hello','string']) check(df,expected) + with assertRaisesRegexp(ValueError, 'Length of value'): + df.insert(0, 'AnotherColumn', range(len(df.index) - 1)) # insert same dtype df['foo2'] = 3 @@ -2965,7 +2942,7 @@ def check(result, expected=None): check(df,expected) # insert a dup - self.assertRaises(Exception, df.insert, 2, 'new_col', 4.) + assertRaisesRegexp(ValueError, 'cannot insert', df.insert, 2, 'new_col', 4.) df.insert(2,'new_col',4.,allow_duplicates=True) expected = DataFrame([[1,1,4.,5.,'bah',3],[1,2,4.,5.,'bah',3],[2,3,4.,5.,'bah',3]],columns=['foo','foo','new_col','new_col','string','foo2']) check(df,expected) @@ -3071,8 +3048,8 @@ def test_constructor_single_value(self): self.assertRaises(com.PandasError, DataFrame, 'a', [1, 2]) self.assertRaises(com.PandasError, DataFrame, 'a', columns=['a', 'c']) - self.assertRaises( - com.PandasError, DataFrame, 'a', [1, 2], ['a', 'c'], float) + with tm.assertRaisesRegexp(TypeError, 'incompatible data and dtype'): + DataFrame('a', [1, 2], ['a', 'c'], float) def test_constructor_with_datetimes(self): @@ -3497,8 +3474,9 @@ def test_from_records_to_records(self): tm.assert_frame_equal(DataFrame.from_records(arr2), DataFrame(arr2)) # wrong length - self.assertRaises(Exception, DataFrame.from_records, arr, - index=index[:-1]) + msg = r'Shape of passed values is \(3,\), indices imply \(3, 1\)' + with assertRaisesRegexp(ValueError, msg): + DataFrame.from_records(arr, index=index[:-1]) indexed_frame = DataFrame.from_records(arr, index='f1') @@ -3816,7 +3794,7 @@ def test_get_agg_axis(self): idx = self.frame._get_agg_axis(1) self.assert_(idx is self.frame.index) - self.assertRaises(Exception, self.frame._get_agg_axis, 2) + self.assertRaises(ValueError, self.frame._get_agg_axis, 2) def test_nonzero(self): self.assertTrue(self.empty.empty) @@ -3991,19 +3969,20 @@ def test_insert(self): # new item df['x'] = df['a'].astype('float32') result = Series(dict(float64 = 5, float32 = 1)) - self.assert_((df.get_dtype_counts() == result).all() == True) + self.assert_((df.get_dtype_counts() == result).all()) # replacing current (in different block) df['a'] = df['a'].astype('float32') result = Series(dict(float64 = 4, float32 = 2)) - self.assert_((df.get_dtype_counts() == result).all() == True) + self.assert_((df.get_dtype_counts() == result).all()) df['y'] = df['a'].astype('int32') result = Series(dict(float64 = 4, float32 = 2, int32 = 1)) - self.assert_((df.get_dtype_counts() == result).all() == True) + self.assert_((df.get_dtype_counts() == result).all()) - self.assertRaises(Exception, df.insert, 1, 'a', df['b']) - self.assertRaises(Exception, df.insert, 1, 'c', df['b']) + with assertRaisesRegexp(ValueError, 'already exists'): + df.insert(1, 'a', df['b']) + self.assertRaises(ValueError, df.insert, 1, 'c', df['b']) df.columns.name = 'some_name' # preserve columns name field @@ -4310,6 +4289,15 @@ def test_arith_flex_frame(self): assert_frame_equal(result, exp) _check_mixed_int(result, dtype = dtype) + # ndim >= 3 + ndim_5 = np.ones(self.frame.shape + (3, 4, 5)) + with assertRaisesRegexp(ValueError, 'shape'): + f(self.frame, ndim_5) + + with assertRaisesRegexp(ValueError, 'shape'): + getattr(self.frame, op)(ndim_5) + + # res_add = self.frame.add(self.frame) # res_sub = self.frame.sub(self.frame) # res_mul = self.frame.mul(self.frame) @@ -4331,6 +4319,8 @@ def test_arith_flex_frame(self): assert_frame_equal(result, self.frame * np.nan) with assertRaisesRegexp(NotImplementedError, 'fill_value'): self.frame.add(self.frame.irow(0), fill_value=3) + with assertRaisesRegexp(NotImplementedError, 'fill_value'): + self.frame.add(self.frame.irow(0), axis='index', fill_value=3) def test_arith_mixed(self): @@ -4379,34 +4369,32 @@ def test_bool_flex_frame(self): other_data = np.random.randn(5, 3) df = DataFrame(data) other = DataFrame(other_data) - - # No NAs - - # DataFrame - self.assert_(df.eq(df).values.all()) - self.assert_(not df.ne(df).values.any()) - - assert_frame_equal((df == other), df.eq(other)) - assert_frame_equal((df != other), df.ne(other)) - assert_frame_equal((df > other), df.gt(other)) - assert_frame_equal((df < other), df.lt(other)) - assert_frame_equal((df >= other), df.ge(other)) - assert_frame_equal((df <= other), df.le(other)) + ndim_5 = np.ones(df.shape + (1, 3)) # Unaligned - def _check_unaligned_frame(meth, op, df, other, default=False): + def _check_unaligned_frame(meth, op, df, other): part_o = other.ix[3:, 1:].copy() - rs = meth(df, part_o) + rs = meth(part_o) xp = op(df, part_o.reindex(index=df.index, columns=df.columns)) assert_frame_equal(rs, xp) - _check_unaligned_frame(DataFrame.eq, operator.eq, df, other) - _check_unaligned_frame(DataFrame.ne, operator.ne, df, other, - default=True) - _check_unaligned_frame(DataFrame.gt, operator.gt, df, other) - _check_unaligned_frame(DataFrame.lt, operator.lt, df, other) - _check_unaligned_frame(DataFrame.ge, operator.ge, df, other) - _check_unaligned_frame(DataFrame.le, operator.le, df, other) + # DataFrame + self.assert_(df.eq(df).values.all()) + self.assert_(not df.ne(df).values.any()) + for op in ['eq', 'ne', 'gt', 'lt', 'ge', 'le']: + f = getattr(df, op) + o = getattr(operator, op) + # No NAs + assert_frame_equal(f(other), o(df, other)) + _check_unaligned_frame(f, o, df, other) + # ndarray + assert_frame_equal(f(other.values), o(df, other.values)) + # scalar + assert_frame_equal(f(0), o(df, 0)) + # NAs + assert_frame_equal(f(np.nan), o(df, np.nan)) + with assertRaisesRegexp(ValueError, 'shape'): + f(ndim_5) # Series def _test_seq(df, idx_ser, col_ser): @@ -4445,14 +4433,6 @@ def _test_seq(df, idx_ser, col_ser): col_ser = Series(np.random.randn(3)) _test_seq(df, idx_ser, col_ser) - # ndarray - - assert_frame_equal((df == other.values), df.eq(other.values)) - assert_frame_equal((df != other.values), df.ne(other.values)) - assert_frame_equal((df > other.values), df.gt(other.values)) - assert_frame_equal((df < other.values), df.lt(other.values)) - assert_frame_equal((df >= other.values), df.ge(other.values)) - assert_frame_equal((df <= other.values), df.le(other.values)) # list/tuple _test_seq(df, idx_ser.values, col_ser.values) @@ -4472,20 +4452,7 @@ def _test_seq(df, idx_ser, col_ser): rs = df.le(df) self.assert_(not rs.ix[0, 0]) - # scalar - assert_frame_equal(df.eq(0), df == 0) - assert_frame_equal(df.ne(0), df != 0) - assert_frame_equal(df.gt(0), df > 0) - assert_frame_equal(df.lt(0), df < 0) - assert_frame_equal(df.ge(0), df >= 0) - assert_frame_equal(df.le(0), df <= 0) - - assert_frame_equal(df.eq(np.nan), df == np.nan) - assert_frame_equal(df.ne(np.nan), df != np.nan) - assert_frame_equal(df.gt(np.nan), df > np.nan) - assert_frame_equal(df.lt(np.nan), df < np.nan) - assert_frame_equal(df.ge(np.nan), df >= np.nan) - assert_frame_equal(df.le(np.nan), df <= np.nan) + # complex arr = np.array([np.nan, 1, 6, np.nan]) @@ -4514,19 +4481,20 @@ def test_arith_flex_series(self): row = df.xs('a') col = df['two'] + # after arithmetic refactor, add truediv here + ops = ['add', 'sub', 'mul', 'mod'] + for op in ops: + f = getattr(df, op) + op = getattr(operator, op) + assert_frame_equal(f(row), op(df, row)) + assert_frame_equal(f(col, axis=0), op(df.T, col).T) - assert_frame_equal(df.add(row), df + row) + # special case for some reason assert_frame_equal(df.add(row, axis=None), df + row) - assert_frame_equal(df.sub(row), df - row) - assert_frame_equal(df.div(row), df / row) - assert_frame_equal(df.mul(row), df * row) - assert_frame_equal(df.mod(row), df % row) - assert_frame_equal(df.add(col, axis=0), (df.T + col).T) - assert_frame_equal(df.sub(col, axis=0), (df.T - col).T) + # cases which will be refactored after big arithmetic refactor + assert_frame_equal(df.div(row), df / row) assert_frame_equal(df.div(col, axis=0), (df.T / col).T) - assert_frame_equal(df.mul(col, axis=0), (df.T * col).T) - assert_frame_equal(df.mod(col, axis=0), (df.T % col).T) def test_arith_non_pandas_object(self): df = self.simple @@ -4699,11 +4667,14 @@ def test_comparisons(self): df2 = tm.makeTimeDataFrame() row = self.simple.xs('a') + ndim_5 = np.ones(df1.shape + (1, 1, 1)) def test_comp(func): result = func(df1, df2) self.assert_(np.array_equal(result.values, func(df1.values, df2.values))) + with assertRaisesRegexp(ValueError, 'Wrong number of dimensions'): + func(df1, ndim_5) result2 = func(self.simple, row) self.assert_(np.array_equal(result2.values, @@ -4713,7 +4684,10 @@ def test_comp(func): self.assert_(np.array_equal(result3.values, func(self.frame.values, 0))) - self.assertRaises(Exception, func, self.simple, self.simple[:2]) + + with assertRaisesRegexp(ValueError, 'Can only compare ' + 'identically-labeled DataFrame'): + func(self.simple, self.simple[:2]) test_comp(operator.eq) test_comp(operator.ne) @@ -5291,20 +5265,19 @@ def _make_frame(names=None): df.to_csv(path,tupleize_cols=False) # catch invalid headers - def testit(): + with assertRaisesRegexp(CParserError, 'Passed header=\[0,1,2\] are too many rows for this multi_index of columns'): read_csv(path,tupleize_cols=False,header=lrange(3),index_col=0) - assertRaisesRegexp(CParserError, 'Passed header=\[0,1,2\] are too many rows for this multi_index of columns', testit) - def testit(): + with assertRaisesRegexp(CParserError, 'Passed header=\[0,1,2,3,4,5,6\], len of 7, but only 6 lines in file'): read_csv(path,tupleize_cols=False,header=lrange(7),index_col=0) - assertRaisesRegexp(CParserError, 'Passed header=\[0,1,2,3,4,5,6\], len of 7, but only 6 lines in file', testit) - for i in [3,4,5,6,7]: - self.assertRaises(Exception, read_csv, path, tupleize_cols=False, header=lrange(i), index_col=0) - self.assertRaises(Exception, read_csv, path, tupleize_cols=False, header=[0,2], index_col=0) + for i in [4,5,6]: + with tm.assertRaises(CParserError): + read_csv(path, tupleize_cols=False, header=lrange(i), index_col=0) # write with cols - self.assertRaises(Exception, df.to_csv, path,tupleize_cols=False,cols=['foo','bar']) + with assertRaisesRegexp(TypeError, 'cannot specify cols with a MultiIndex'): + df.to_csv(path, tupleize_cols=False, cols=['foo', 'bar']) with ensure_clean(pname) as path: # empty @@ -5652,7 +5625,8 @@ def test_convert_objects(self): # via astype, but errors converted = self.mixed_frame.copy() - self.assertRaises(Exception, converted['H'].astype, 'int32') + with assertRaisesRegexp(ValueError, 'invalid literal'): + converted['H'].astype('int32') # mixed in a single column df = DataFrame(dict(s = Series([1, 'na', 3 ,4]))) @@ -5671,9 +5645,12 @@ def test_append_series_dict(self): columns=['foo', 'bar', 'baz', 'qux']) series = df.ix[4] - self.assertRaises(ValueError, df.append, series, verify_integrity=True) + with assertRaisesRegexp(ValueError, 'Indexes have overlapping values'): + df.append(series, verify_integrity=True) series.name = None - self.assertRaises(Exception, df.append, series, verify_integrity=True) + with assertRaisesRegexp(TypeError, 'Can only append a Series if ' + 'ignore_index=True'): + df.append(series, verify_integrity=True) result = df.append(series[::-1], ignore_index=True) expected = df.append(DataFrame({0: series[::-1]}, index=df.columns).T, @@ -6140,10 +6117,13 @@ def test_dropna(self): expected = df.ix[:, [0, 1, 3]] assert_frame_equal(dropped, expected) + # bad input + self.assertRaises(ValueError, df.dropna, axis=3) + def test_dropna_corner(self): # bad input self.assertRaises(ValueError, self.frame.dropna, how='foo') - self.assertRaises(ValueError, self.frame.dropna, how=None) + self.assertRaises(TypeError, self.frame.dropna, how=None) def test_dropna_multiple_axes(self): df = DataFrame([[1, np.nan, 2, 3], @@ -6442,7 +6422,8 @@ def test_fillna_dict_series(self): assert_frame_equal(result, expected) # disable this for now - self.assertRaises(Exception, df.fillna, df.max(1), axis=1) + with assertRaisesRegexp(NotImplementedError, 'column by column'): + df.fillna(df.max(1), axis=1) def test_fillna_columns(self): df = DataFrame(np.random.randn(10, 10)) @@ -6458,10 +6439,8 @@ def test_fillna_columns(self): assert_frame_equal(result, expected) def test_fillna_invalid_method(self): - try: + with assertRaisesRegexp(ValueError, 'ffil'): self.frame.fillna(method='ffil') - except ValueError as inst: - self.assert_('ffil' in str(inst)) def test_fillna_invalid_value(self): # list @@ -7324,8 +7303,8 @@ def test_xs(self): self.assertEqual(xs['A'], 1) self.assertEqual(xs['B'], '1') - self.assertRaises(Exception, self.tsframe.xs, - self.tsframe.index[0] - datetools.bday) + with tm.assertRaises(KeyError): + self.tsframe.xs(self.tsframe.index[0] - datetools.bday) # xs get column series = self.frame.xs('A', axis=1) @@ -7416,7 +7395,8 @@ def test_pivot_duplicates(self): data = DataFrame({'a': ['bar', 'bar', 'foo', 'foo', 'foo'], 'b': ['one', 'two', 'one', 'one', 'two'], 'c': [1., 2., 3., 3., 4.]}) - self.assertRaises(Exception, data.pivot, 'a', 'b', 'c') + with assertRaisesRegexp(ValueError, 'duplicate entries'): + data.pivot('a', 'b', 'c') def test_pivot_empty(self): df = DataFrame({}, columns=['a', 'b', 'c']) @@ -7996,7 +7976,7 @@ def test_rename(self): self.assert_(np.array_equal(renamed.index, ['BAR', 'FOO'])) # have to pass something - self.assertRaises(Exception, self.frame.rename) + self.assertRaises(TypeError, self.frame.rename) # partial columns renamed = self.frame.rename(columns={'C': 'foo', 'D': 'bar'}) @@ -8185,7 +8165,7 @@ def test_tshift(self): shifted3 = ps.tshift(freq=datetools.bday) assert_frame_equal(shifted, shifted3) - self.assertRaises(ValueError, ps.tshift, freq='M') + assertRaisesRegexp(ValueError, 'does not match', ps.tshift, freq='M') # DatetimeIndex shifted = self.tsframe.tshift(1) @@ -8297,7 +8277,6 @@ def test_apply_ignore_failures(self): expected = self.mixed_frame._get_numeric_data().apply(np.mean) assert_series_equal(result, expected) - # test with hierarchical index def test_apply_mixed_dtype_corner(self): df = DataFrame({'A': ['foo'], @@ -8414,9 +8393,10 @@ def transform2(row): try: transformed = data.apply(transform, axis=1) - except Exception as e: + except AttributeError as e: self.assertEqual(len(e.args), 2) self.assertEqual(e.args[1], 'occurred at index 4') + self.assertEqual(e.args[0], "'float' object has no attribute 'startswith'") def test_swapaxes(self): df = DataFrame(np.random.randn(10, 5)) @@ -8533,7 +8513,8 @@ def test_filter(self): self.assertEqual(len(filtered.columns), 2) # pass in None - self.assertRaises(Exception, self.frame.filter, items=None) + with assertRaisesRegexp(TypeError, 'Must pass'): + self.frame.filter(items=None) # objects filtered = self.mixed_frame.filter(like='foo') @@ -8633,6 +8614,9 @@ def test_sort_index(self): assert_frame_equal(sorted_df, expected) self.assertRaises(ValueError, frame.sort_index, axis=2, inplace=True) + msg = 'When sorting by column, axis must be 0' + with assertRaisesRegexp(ValueError, msg): + frame.sort_index(by='A', axis=1) def test_sort_index_multicolumn(self): import random @@ -8738,17 +8722,16 @@ def test_sort_inplace(self): assert_frame_equal(sorted_df, expected) def test_sort_index_duplicates(self): - df = DataFrame([[1, 2], [3, 4]], columns=['a', 'a']) + df = DataFrame([lrange(5,9), lrange(4)], + columns=['a', 'a', 'b', 'b']) - try: + with assertRaisesRegexp(ValueError, 'duplicate'): df.sort_index(by='a') - except Exception as e: - self.assertTrue('duplicate' in str(e)) - - try: + with assertRaisesRegexp(ValueError, 'duplicate'): df.sort_index(by=['a']) - except Exception as e: - self.assertTrue('duplicate' in str(e)) + with assertRaisesRegexp(ValueError, 'duplicate'): + # multi-column 'by' is separate codepath + df.sort_index(by=['a', 'b']) def test_sort_datetimes(self): @@ -8774,7 +8757,8 @@ def test_sort_datetimes(self): def test_frame_column_inplace_sort_exception(self): s = self.frame['A'] - self.assertRaises(Exception, s.sort) + with assertRaisesRegexp(TypeError, "This Series is a view"): + s.sort() cp = s.copy() cp.sort() # it works! @@ -8985,9 +8969,8 @@ def test_update_raise(self): other = DataFrame([[2., nan], [nan, 7]], index=[1, 3], columns=[1, 2]) - - np.testing.assert_raises(Exception, df.update, *(other,), - **{'raise_conflict': True}) + with assertRaisesRegexp(ValueError, "Data overlaps"): + df.update(other, raise_conflict=True) def test_update_from_non_df(self): d = {'a': Series([1, 2, 3, 4]), 'b': Series([5, 6, 7, 8])} @@ -9419,8 +9402,8 @@ def wrapper(x): # comp = frame.apply(alternative, axis=1).reindex(result.index) # assert_series_equal(result, comp) - self.assertRaises(Exception, f, axis=2) - + # bad axis + assertRaisesRegexp(ValueError, 'No axis named 2', f, axis=2) # make sure works on mixed-type frame getattr(self.mixed_frame, name)(axis=0) getattr(self.mixed_frame, name)(axis=1) @@ -9734,18 +9717,6 @@ def test_describe_empty_int_columns(self): Series([0, 0], dtype=float, name='count')) self.assert_(isnull(desc.ix[1:]).all().all()) - def test_get_axis_etc(self): - f = self.frame - - self.assertEquals(f._get_axis_number(0), 0) - self.assertEquals(f._get_axis_number(1), 1) - self.assertEquals(f._get_axis_name(0), 'index') - self.assertEquals(f._get_axis_name(1), 'columns') - - self.assert_(f._get_axis(0) is f.index) - self.assert_(f._get_axis(1) is f.columns) - self.assertRaises(Exception, f._get_axis_number, 2) - def test_axis_aliases(self): f = self.frame @@ -10394,8 +10365,8 @@ def test_xs_view(self): self.assert_((dm.xs(2) == 5).all()) # TODO (?): deal with mixed-type fiasco? - self.assertRaises(Exception, self.mixed_frame.xs, - self.mixed_frame.index[2], copy=False) + with assertRaisesRegexp(TypeError, 'cannot get view of mixed-type'): + self.mixed_frame.xs(self.mixed_frame.index[2], copy=False) # unconsolidated dm['foo'] = 6. @@ -10420,6 +10391,8 @@ def test_boolean_indexing(self): df1[df1 > 2.0 * df2] = -1 assert_frame_equal(df1, expected) + with assertRaisesRegexp(ValueError, 'Item wrong length'): + df1[df1.index[:-1] > 2] = -1 def test_boolean_indexing_mixed(self): df = DataFrame( @@ -10446,7 +10419,8 @@ def test_boolean_indexing_mixed(self): # add object, should this raise? df['foo'] = 'test' - self.assertRaises(ValueError, df.__setitem__, df>0.3, 1) + with tm.assertRaisesRegexp(TypeError, 'boolean setting on mixed-type'): + df[df > 0.3] = 1 def test_sum_bools(self): df = DataFrame(index=lrange(1), columns=lrange(10)) @@ -10577,7 +10551,8 @@ def test_dot(self): exp = a.dot(a.ix[0]) assert_series_equal(result, exp) - self.assertRaises(Exception, a.dot, row[:-1]) + with assertRaisesRegexp(ValueError, 'Dot product shape mismatch'): + a.dot(row[:-1]) a = np.random.rand(1, 5) b = np.random.rand(5, 1) @@ -10605,7 +10580,7 @@ def test_idxmin(self): Series.idxmin, axis=axis, skipna=skipna) assert_series_equal(result, expected) - self.assertRaises(Exception, frame.idxmin, axis=2) + self.assertRaises(ValueError, frame.idxmin, axis=2) def test_idxmax(self): frame = self.frame @@ -10619,7 +10594,7 @@ def test_idxmax(self): Series.idxmax, axis=axis, skipna=skipna) assert_series_equal(result, expected) - self.assertRaises(Exception, frame.idxmax, axis=2) + self.assertRaises(ValueError, frame.idxmax, axis=2) def test_stale_cached_series_bug_473(self): Y = DataFrame(np.random.random((4, 4)), index=('a', 'b', 'c', 'd'), @@ -10768,7 +10743,8 @@ def wrapper(x): # comp = frame.apply(alternative, axis=1).reindex(result.index) # assert_series_equal(result, comp) - self.assertRaises(Exception, f, axis=2) + # bad axis + self.assertRaises(ValueError, f, axis=2) # make sure works on mixed-type frame mixed = self.mixed_frame diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 50d94ada7b9df..21462780e2ffd 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -10,7 +10,8 @@ from pandas.util.testing import (assert_almost_equal, assert_series_equal, - assert_frame_equal) + assert_frame_equal, + assertRaisesRegexp) import pandas.core.common as com import pandas.util.testing as tm from pandas.compat import (range, lrange, StringIO, lzip, u, cPickle, @@ -270,7 +271,8 @@ def test_frame_getitem_setitem_boolean(self): np.putmask(values[:-1], values[:-1] < 0, 2) assert_almost_equal(df.values, values) - self.assertRaises(Exception, df.__setitem__, df * 0, 2) + with assertRaisesRegexp(TypeError, 'boolean values only'): + df[df * 0] = 2 def test_frame_getitem_setitem_slice(self): # getitem @@ -427,6 +429,9 @@ def test_xs_level(self): expected = df[1:2] expected.index = expected.index.droplevel(2) assert_frame_equal(result, expected) + # can't produce a view of a multiindex with a level without copying + with assertRaisesRegexp(ValueError, 'Cannot retrieve view'): + self.frame.xs('two', level='second', copy=False) def test_xs_level_multiple(self): from pandas import read_table @@ -441,6 +446,8 @@ def test_xs_level_multiple(self): result = df.xs(('a', 4), level=['one', 'four']) expected = df.xs('a').xs(4, level='four') assert_frame_equal(result, expected) + with assertRaisesRegexp(ValueError, 'Cannot retrieve view'): + df.xs(('a', 4), level=['one', 'four'], copy=False) # GH2107 dates = lrange(20111201, 20111205) @@ -620,14 +627,14 @@ def test_getitem_partial_column_select(self): def test_sortlevel(self): df = self.frame.copy() df.index = np.arange(len(df)) - self.assertRaises(Exception, df.sortlevel, 0) + assertRaisesRegexp(TypeError, 'hierarchical index', df.sortlevel, 0) # axis=1 # series a_sorted = self.frame['A'].sortlevel(0) - self.assertRaises(Exception, - self.frame.reset_index()['A'].sortlevel) + with assertRaisesRegexp(TypeError, 'hierarchical index'): + self.frame.reset_index()['A'].sortlevel() # preserve names self.assertEquals(a_sorted.index.names, self.frame.index.names) @@ -722,7 +729,7 @@ def _check_counts(frame, axis=0): # can't call with level on regular DataFrame df = tm.makeTimeDataFrame() - self.assertRaises(Exception, df.count, level=0) + assertRaisesRegexp(TypeError, 'hierarchical', df.count, level=0) self.frame['D'] = 'foo' result = self.frame.count(level=0, numeric_only=True) @@ -1085,8 +1092,11 @@ def test_reorder_levels(self): expected = self.ymd.T.swaplevel(0, 1, axis=1).swaplevel(1, 2, axis=1) assert_frame_equal(result, expected) - self.assertRaises(Exception, self.ymd.index.reorder_levels, - [1, 2, 3]) + with assertRaisesRegexp(TypeError, 'hierarchical axis'): + self.ymd.reorder_levels([1, 2], axis=1) + + with assertRaisesRegexp(IndexError, 'Too many levels'): + self.ymd.index.reorder_levels([1, 2, 3]) def test_insert_index(self): df = self.ymd[:5].T @@ -1202,8 +1212,8 @@ def test_count(self): expect = self.series.count(level=0) assert_series_equal(result, expect) - self.assertRaises(Exception, series.count, 'x') - self.assertRaises(Exception, frame.count, level='x') + self.assertRaises(KeyError, series.count, 'x') + self.assertRaises(KeyError, frame.count, level='x') AGG_FUNCTIONS = ['sum', 'prod', 'min', 'max', 'median', 'mean', 'skew', 'mad', 'std', 'var'] @@ -1420,6 +1430,7 @@ def test_partial_ix_missing(self): # self.ymd.ix[2000, 0] = 0 # self.assert_((self.ymd.ix[2000]['A'] == 0).all()) + # Pretty sure the second (and maybe even the first) is already wrong. self.assertRaises(Exception, self.ymd.ix.__getitem__, (2000, 6)) self.assertRaises(Exception, self.ymd.ix.__getitem__, (2000, 6), 0) @@ -1774,14 +1785,6 @@ def test_indexing_over_hashtable_size_cutoff(self): _index._SIZE_CUTOFF = old_cutoff - def test_xs_mixed_no_copy(self): - index = MultiIndex.from_arrays([['a', 'a', 'b', 'b'], [1, 2, 1, 2]], - names=['first', 'second']) - data = DataFrame(np.random.rand(len(index)), index=index, - columns=['A']) - - self.assertRaises(Exception, data.xs, 2, level=1, copy=False) - def test_multiindex_na_repr(self): # only an issue with long columns diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 8ad88374f40f6..938025c450258 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -1276,7 +1276,7 @@ def test_to_panel_duplicates(self): # #2441 df = DataFrame({'a': [0, 0, 1], 'b': [1, 1, 1], 'c': [1, 2, 3]}) idf = df.set_index(['a', 'b']) - self.assertRaises(Exception, idf.to_panel) + assertRaisesRegexp(ValueError, 'non-uniquely indexed', idf.to_panel) def test_filter(self): pass diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index b28da7c9d7e0b..45894eb419489 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -121,6 +121,7 @@ def __init__(self, value=None, freq=None, ordinal=None, base, mult = _gfc(freq) if mult != 1: + # TODO: Better error message - this is slightly confusing raise ValueError('Only mult == 1 supported') if self.ordinal is None: diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py index b7916bd98d70f..b95ea2cacda55 100644 --- a/pandas/tseries/tests/test_period.py +++ b/pandas/tseries/tests/test_period.py @@ -26,7 +26,8 @@ randn = np.random.randn from pandas import Series, TimeSeries, DataFrame -from pandas.util.testing import assert_series_equal, assert_almost_equal +from pandas.util.testing import(assert_series_equal, assert_almost_equal, + assertRaisesRegexp) import pandas.util.testing as tm from pandas import compat from numpy.testing import assert_array_equal @@ -272,7 +273,7 @@ def _ex(p): result = p.to_timestamp('S', how='start') self.assertEquals(result, expected) - self.assertRaises(ValueError, p.to_timestamp, '5t') + assertRaisesRegexp(ValueError, 'Only mult == 1', p.to_timestamp, '5t') def test_start_time(self): freq_lst = ['A', 'Q', 'M', 'D', 'H', 'T', 'S'] @@ -1427,7 +1428,8 @@ def _get_with_delta(delta, freq='A-DEC'): self.assert_(result.columns.equals(exp_index)) # invalid axis - self.assertRaises(ValueError, df.to_timestamp, axis=2) + assertRaisesRegexp(ValueError, 'axis', df.to_timestamp, axis=2) + assertRaisesRegexp(ValueError, 'Only mult == 1', df.to_timestamp, '5t', axis=1) def test_index_duplicate_periods(self): # monotonic @@ -1886,9 +1888,8 @@ def test_align_series(self): # it works! for kind in ['inner', 'outer', 'left', 'right']: ts.align(ts[::2], join=kind) - - self.assertRaises(Exception, ts.__add__, - ts.asfreq('D', how='end')) + with assertRaisesRegexp(ValueError, 'Only like-indexed'): + ts + ts.asfreq('D', how="end") def test_align_frame(self): rng = period_range('1/1/2000', '1/1/2010', freq='A') @@ -1915,7 +1916,7 @@ def test_union(self): # raise if different frequencies index = period_range('1/1/2000', '1/20/2000', freq='D') index2 = period_range('1/1/2000', '1/20/2000', freq='W-WED') - self.assertRaises(Exception, index.union, index2) + self.assertRaises(ValueError, index.union, index2) self.assertRaises(ValueError, index.join, index.to_timestamp()) @@ -1934,7 +1935,7 @@ def test_intersection(self): # raise if different frequencies index = period_range('1/1/2000', '1/20/2000', freq='D') index2 = period_range('1/1/2000', '1/20/2000', freq='W-WED') - self.assertRaises(Exception, index.intersection, index2) + self.assertRaises(ValueError, index.intersection, index2) def test_fields(self): # year, month, day, hour, minute diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 1142181a6c6aa..c652c2da3214c 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -1032,7 +1032,7 @@ def assertRaises(_exception, _callable=None, *args, **kwargs): >>> assertRaises(TypeError, ",".join, [1, 3, 5]); """ manager = _AssertRaisesContextmanager(exception=_exception) - # don't return anything if usedin function form + # don't return anything if used in function form if _callable is not None: with manager: _callable(*args, **kwargs)