diff --git a/doc/source/release.rst b/doc/source/release.rst index 557c4b293a84e..544e414132acd 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -71,8 +71,9 @@ pandas 0.13 when the key is a column - Support for using a ``DatetimeIndex/PeriodsIndex`` directly in a datelike calculation e.g. s-s.index (:issue:`4629`) - - Better/cleaned up exceptions in core/common, io/excel and core/format. - (:issue:`4721`, :issue:`3954`) + - Better/cleaned up exceptions in core/common, io/excel and core/format + (:issue:`4721`, :issue:`3954`), as well as cleaned up test cases in + tests/test_frame, tests/test_multilevel (:issue:`4732`). **API Changes** @@ -143,9 +144,10 @@ pandas 0.13 now returns a ``MultiIndex`` rather than an ``Index``. (:issue:`4039`) - Infer and downcast dtype if ``downcast='infer'`` is passed to ``fillna/ffill/bfill`` (:issue:`4604`) - - Factored out excel_value_to_python_value from ExcelFile::_parse_excel (:issue:`4589`) - ``__nonzero__`` for all NDFrame objects, will now raise a ``ValueError``, this reverts back to (:issue:`1073`, :issue:`4633`) behavior. + - ``DataFrame.update()`` no longer raises a ``DataConflictError``, it now + will raise a ``ValueError`` instead (if necessary) (:issue:`4732`) **Internal Refactoring** diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index 7a444ab01e10a..494cc5fe9ad29 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -665,6 +665,25 @@ def __and__(self, other): else: from collections import OrderedDict, Counter +if PY3: + def raise_with_traceback(exc, traceback=Ellipsis): + if traceback == Ellipsis: + _, _, traceback = sys.exc_info() + raise exc.with_traceback(traceback) +else: + # this version of raise is a syntax error in Python 3 + exec(""" +def raise_with_traceback(exc, traceback=Ellipsis): + if traceback == Ellipsis: + _, _, traceback = sys.exc_info() + raise exc, None, traceback +""") + +raise_with_traceback.__doc__ = ( +"""Raise exception with existing traceback. +If traceback is not passed, uses sys.exc_info() to get traceback.""" +) + # http://stackoverflow.com/questions/4126348 # Thanks to @martineau at SO diff --git a/pandas/core/format.py b/pandas/core/format.py index 978b82aed22d9..6b4dc979d5279 100644 --- a/pandas/core/format.py +++ b/pandas/core/format.py @@ -825,9 +825,8 @@ def __init__(self, obj, path_or_buf, sep=",", na_rep='', float_format=None, # validate mi options if self.has_mi_columns: - # guarded against in to_csv itself - if cols is not None: # pragma: no cover - raise AssertionError("cannot specify cols with a multi_index on the columns") + if cols is not None: + raise TypeError("cannot specify cols with a MultiIndex on the columns") if cols is not None: if isinstance(cols,Index): diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d184120185955..0cd9f7f3f5330 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -12,8 +12,6 @@ # pylint: disable=E1101,E1103 # pylint: disable=W0212,W0231,W0703,W0622 -from pandas.compat import range, zip, lrange, lmap, lzip, StringIO, u, OrderedDict -from pandas import compat import operator import sys import collections @@ -38,6 +36,8 @@ import pandas.core.expressions as expressions from pandas.sparse.array import SparseArray from pandas.compat.scipy import scoreatpercentile as _quantile +from pandas.compat import(range, zip, lrange, lmap, lzip, StringIO, u, + OrderedDict, raise_with_traceback) from pandas import compat from pandas.util.terminal import get_terminal_size from pandas.util.decorators import deprecate, Appender, Substitution @@ -180,12 +180,6 @@ merged : DataFrame """ -# Custom error class for update - - -class DataConflictError(Exception): - pass - #---------------------------------------------------------------------- # Factory helper methods @@ -237,8 +231,8 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): casted = DataFrame(other, index=self.index, columns=self.columns) return self._combine_frame(casted, na_op, fill_value, level) - else: # pragma: no cover - raise ValueError("Bad argument shape") + else: + raise ValueError("Incompatible argument shape %s" % (other.shape,)) else: return self._combine_const(other, na_op) @@ -303,8 +297,9 @@ def f(self, other, axis=default_axis, level=None): return self._flex_compare_frame(casted, na_op, str_rep, level) - else: # pragma: no cover - raise ValueError("Bad argument shape") + else: + raise ValueError("Incompatible argument shape: %s" % + (other.shape,)) else: return self._combine_const(other, na_op) @@ -351,7 +346,7 @@ class DataFrame(NDFrame): Index to use for resulting frame. Will default to np.arange(n) if no indexing information part of input data and no index provided columns : Index or array-like - Column labels to use for resulting frame. Will default to + Column labels to use for resulting frame. Will default to np.arange(n) if no column labels are provided dtype : dtype, default None Data type to force, otherwise infer @@ -407,7 +402,8 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, copy=copy) elif isinstance(data, (np.ndarray, Series)): if data.dtype.names: - data_columns, data = _rec_to_dict(data) + data_columns = list(data.dtype.names) + data = dict((k, data[k]) for k in data_columns) if columns is None: columns = data_columns mgr = self._init_dict(data, index, columns, dtype=dtype) @@ -438,9 +434,10 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, else: try: arr = np.array(data, dtype=dtype, copy=copy) - except (ValueError, TypeError): - raise PandasError('DataFrame constructor called with ' - 'incompatible data and dtype') + except (ValueError, TypeError) as e: + exc = TypeError('DataFrame constructor called with ' + 'incompatible data and dtype: %s' % e) + raise_with_traceback(exc) if arr.ndim == 0 and index is not None and columns is not None: if isinstance(data, compat.string_types) and dtype is None: @@ -527,8 +524,10 @@ def _init_ndarray(self, values, index, columns, dtype=None, if values.dtype != dtype: try: values = values.astype(dtype) - except Exception: - raise ValueError('failed to cast to %s' % dtype) + except Exception as orig: + e = ValueError("failed to cast to '%s' (Exception was: %s)" + % (dtype, orig)) + raise_with_traceback(e) N, K = values.shape @@ -650,11 +649,7 @@ def __unicode__(self): self.shape[0] <= max_info_rows) self.info(buf=buf, verbose=verbose) - value = buf.getvalue() - if not isinstance(value, compat.text_type): - raise AssertionError() - - return value + return buf.getvalue() def _repr_html_(self): """ @@ -853,8 +848,8 @@ def dot(self, other): lvals = self.values rvals = np.asarray(other) if lvals.shape[1] != rvals.shape[0]: - raise Exception('Dot product shape mismatch, %s vs %s' % - (lvals.shape, rvals.shape)) + raise ValueError('Dot product shape mismatch, %s vs %s' % + (lvals.shape, rvals.shape)) if isinstance(other, DataFrame): return self._constructor(np.dot(lvals, rvals), @@ -1162,7 +1157,7 @@ def from_items(cls, items, columns=None, orient='columns'): return cls._from_arrays(arrays, columns, None) elif orient == 'index': if columns is None: - raise ValueError("Must pass columns with orient='index'") + raise TypeError("Must pass columns with orient='index'") keys = _ensure_index(keys) @@ -1248,12 +1243,12 @@ def to_panel(self): from pandas.core.reshape import block2d_to_blocknd # only support this kind for now - if (not isinstance(self.index, MultiIndex) or + if (not isinstance(self.index, MultiIndex) or # pragma: no cover len(self.index.levels) != 2): - raise AssertionError('Must have 2-level MultiIndex') + raise NotImplementedError('Only 2-level MultiIndex are supported.') if not self.index.is_unique: - raise Exception("Can't convert non-uniquely indexed " + raise ValueError("Can't convert non-uniquely indexed " "DataFrame to Panel") self._consolidate_inplace() @@ -1626,8 +1621,9 @@ def info(self, verbose=True, buf=None, max_cols=None): len(self.columns)) space = max([len(com.pprint_thing(k)) for k in self.columns]) + 4 counts = self.count() - if len(cols) != len(counts): - raise AssertionError('Columns must equal counts') + if len(cols) != len(counts): # pragma: no cover + raise AssertionError('Columns must equal counts (%d != %d)' % + (len(cols), len(counts))) for col, count in compat.iteritems(counts): col = com.pprint_thing(col) lines.append(_put_str(col, space) + @@ -1852,7 +1848,7 @@ def _getitem_array(self, key): warnings.warn("Boolean Series key will be reindexed to match " "DataFrame index.", UserWarning) elif len(key) != len(self.index): - raise ValueError('Item wrong length %d instead of %d!' % + raise ValueError('Item wrong length %d instead of %d.' % (len(key), len(self.index))) # _check_bool_indexer will throw exception if Series key cannot # be reindexed to match DataFrame rows @@ -1938,7 +1934,7 @@ def _setitem_array(self, key, value): else: if isinstance(value, DataFrame): if len(value.columns) != len(key): - raise AssertionError('Columns must be same length as key') + raise ValueError('Columns must be same length as key') for k1, k2 in zip(key, value.columns): self[k1] = value[k2] else: @@ -1949,11 +1945,11 @@ def _setitem_frame(self, key, value): # support boolean setting with DataFrame input, e.g. # df[df > df2] = 0 if key.values.dtype != np.bool_: - raise ValueError('Must pass DataFrame with boolean values only') + raise TypeError('Must pass DataFrame with boolean values only') if self._is_mixed_type: if not self._is_numeric_mixed_type: - raise ValueError( + raise TypeError( 'Cannot do boolean setting on mixed-type frame') self.where(-key, value, inplace=True) @@ -2009,8 +2005,8 @@ def _sanitize_column(self, key, value): value = value.T else: if len(value) != len(self.index): - raise AssertionError('Length of values does not match ' - 'length of index') + raise ValueError('Length of values does not match ' + 'length of index') if not isinstance(value, np.ndarray): if isinstance(value, list) and len(value) > 0: @@ -2223,7 +2219,7 @@ def lookup(self, row_labels, col_labels): """ n = len(row_labels) if n != len(col_labels): - raise AssertionError('Row labels must have same size as ' + raise ValueError('Row labels must have same size as ' 'column labels') thresh = 1000 @@ -2232,9 +2228,9 @@ def lookup(self, row_labels, col_labels): ridx = self.index.get_indexer(row_labels) cidx = self.columns.get_indexer(col_labels) if (ridx == -1).any(): - raise ValueError('One or more row labels was not found') + raise KeyError('One or more row labels was not found') if (cidx == -1).any(): - raise ValueError('One or more column labels was not found') + raise KeyError('One or more column labels was not found') flat_index = ridx * len(self.columns) + cidx result = values.flat[flat_index] else: @@ -2392,7 +2388,7 @@ def set_index(self, keys, drop=True, append=False, inplace=False, if verify_integrity and not index.is_unique: duplicates = index.get_duplicates() - raise Exception('Index has duplicate keys: %s' % duplicates) + raise ValueError('Index has duplicate keys: %s' % duplicates) for c in to_remove: del frame[c] @@ -2593,12 +2589,9 @@ def dropna(self, axis=0, how='any', thresh=None, subset=None): return result axis = self._get_axis_number(axis) - if axis == 0: - agg_axis = 1 - elif axis == 1: - agg_axis = 0 - else: # pragma: no cover - raise ValueError('axis must be 0 or 1') + if axis not in (0, 1): # pragma: no cover + raise AssertionError('axis must be 0 or 1') + agg_axis = 1 - axis agg_obj = self if subset is not None: @@ -2615,9 +2608,9 @@ def dropna(self, axis=0, how='any', thresh=None, subset=None): mask = count > 0 else: if how is not None: - raise ValueError('do not recognize %s' % how) + raise ValueError('invalid how option: %s' % how) else: - raise ValueError('must specify how or thresh') + raise TypeError('must specify how or thresh') return self.take(mask.nonzero()[0], axis=axis, convert=False) @@ -2757,14 +2750,14 @@ def sort_index(self, axis=0, by=None, ascending=True, inplace=False, from pandas.core.groupby import _lexsort_indexer axis = self._get_axis_number(axis) - if axis not in [0, 1]: - raise ValueError('Axis must be 0 or 1, got %s' % str(axis)) + if axis not in [0, 1]: # pragma: no cover + raise AssertionError('Axis must be 0 or 1, got %s' % str(axis)) labels = self._get_axis(axis) if by is not None: if axis != 0: - raise AssertionError('Axis must be 0') + raise ValueError('When sorting by column, axis must be 0 (rows)') if not isinstance(by, (tuple, list)): by = [by] @@ -2835,7 +2828,7 @@ def sortlevel(self, level=0, axis=0, ascending=True, inplace=False): axis = self._get_axis_number(axis) the_axis = self._get_axis(axis) if not isinstance(the_axis, MultiIndex): - raise Exception('can only sort by level with a hierarchical index') + raise TypeError('can only sort by level with a hierarchical index') new_axis, indexer = the_axis.sortlevel(level, ascending=ascending) @@ -2900,7 +2893,7 @@ def reorder_levels(self, order, axis=0): axis = self._get_axis_number(axis) if not isinstance(self._get_axis(axis), MultiIndex): # pragma: no cover - raise Exception('Can only reorder levels on a hierarchical axis.') + raise TypeError('Can only reorder levels on a hierarchical axis.') result = self.copy() @@ -2976,7 +2969,8 @@ def _combine_series_infer(self, other, func, fill_value=None): def _combine_match_index(self, other, func, fill_value=None): left, right = self.align(other, join='outer', axis=0, copy=False) if fill_value is not None: - raise NotImplementedError + raise NotImplementedError("fill_value %r not supported." % + fill_value) return self._constructor(func(left.values.T, right.values).T, index=left.index, columns=self.columns, copy=False) @@ -2984,7 +2978,8 @@ def _combine_match_index(self, other, func, fill_value=None): def _combine_match_columns(self, other, func, fill_value=None): left, right = self.align(other, join='outer', axis=1, copy=False) if fill_value is not None: - raise NotImplementedError + raise NotImplementedError("fill_value %r not supported" % + fill_value) new_data = left._data.eval( func, right, axes=[left.columns, self.index]) @@ -2999,7 +2994,7 @@ def _combine_const(self, other, func, raise_on_error=True): def _compare_frame(self, other, func, str_rep): if not self._indexed_same(other): - raise Exception('Can only compare identically-labeled ' + raise ValueError('Can only compare identically-labeled ' 'DataFrame objects') def _compare(a, b): @@ -3164,8 +3159,9 @@ def update(self, other, join='left', overwrite=True, filter_func=None, If True, will raise an error if the DataFrame and other both contain data in the same place. """ - if join != 'left': - raise NotImplementedError + # TODO: Support other joins + if join != 'left': # pragma: no cover + raise NotImplementedError("Only left join is supported") if not isinstance(other, DataFrame): other = DataFrame(other) @@ -3182,7 +3178,7 @@ def update(self, other, join='left', overwrite=True, filter_func=None, mask_this = notnull(that) mask_that = notnull(this) if any(mask_this & mask_that): - raise DataConflictError("Data overlaps.") + raise ValueError("Data overlaps.") if overwrite: mask = isnull(that) @@ -3556,8 +3552,8 @@ def _apply_standard(self, func, axis, ignore_failures=False, reduce=True): series_gen = (Series.from_array(arr, index=res_columns, name=name) for i, (arr, name) in enumerate(zip(values, res_index))) - else: - raise ValueError('Axis must be 0 or 1, got %s' % str(axis)) + else: # pragma : no cover + raise AssertionError('Axis must be 0 or 1, got %s' % str(axis)) keys = [] results = {} @@ -3615,7 +3611,7 @@ def _apply_broadcast(self, func, axis): elif axis == 1: target = self.T else: # pragma: no cover - raise ValueError('Axis must be 0 or 1, got %s' % axis) + raise AssertionError('Axis must be 0 or 1, got %s' % axis) result_values = np.empty_like(target.values) columns = target.columns @@ -3669,7 +3665,7 @@ def append(self, other, ignore_index=False, verify_integrity=False): If True do not use the index labels. Useful for gluing together record arrays verify_integrity : boolean, default False - If True, raise Exception on creating index with duplicates + If True, raise ValueError on creating index with duplicates Notes ----- @@ -3685,7 +3681,7 @@ def append(self, other, ignore_index=False, verify_integrity=False): if isinstance(other, dict): other = Series(other) if other.name is None and not ignore_index: - raise Exception('Can only append a Series if ' + raise TypeError('Can only append a Series if ' 'ignore_index=True') index = None if other.name is None else [other.name] @@ -3757,7 +3753,7 @@ def _join_compat(self, other, on=None, how='left', lsuffix='', rsuffix='', if isinstance(other, Series): if other.name is None: - raise AssertionError('Other Series must have a name') + raise ValueError('Other Series must have a name') other = DataFrame({other.name: other}) if isinstance(other, DataFrame): @@ -4042,6 +4038,10 @@ def _count_level(self, level, axis=0, numeric_only=False): if axis == 1: frame = frame.T + if not isinstance(frame.index, MultiIndex): + raise TypeError("Can only count levels on hierarchical %s." % + self._get_axis_name(axis)) + # python 2.5 mask = notnull(frame.values).view(np.uint8) @@ -4282,13 +4282,16 @@ def _reduce(self, op, axis=0, skipna=True, numeric_only=None, try: values = self.values result = f(values) - except Exception: + except Exception as e: if filter_type is None or filter_type == 'numeric': data = self._get_numeric_data() elif filter_type == 'bool': data = self._get_bool_data() - else: - raise NotImplementedError + else: # pragma: no cover + e = NotImplementedError("Handling exception with filter_" + "type %s not implemented." + % filter_type) + raise_with_traceback(e) result = f(data.values) labels = data._get_agg_axis(axis) else: @@ -4297,8 +4300,10 @@ def _reduce(self, op, axis=0, skipna=True, numeric_only=None, data = self._get_numeric_data() elif filter_type == 'bool': data = self._get_bool_data() - else: - raise NotImplementedError + else: # pragma: no cover + msg = ("Generating numeric_only data with filter_type %s" + "not supported." % filter_type) + raise NotImplementedError(msg) values = data.values labels = data._get_agg_axis(axis) else: @@ -4386,7 +4391,7 @@ def _get_agg_axis(self, axis_num): elif axis_num == 1: return self.index else: - raise Exception('Must have 0<= axis <= 1') + raise ValueError('Axis must be 0 or 1 (got %r)' % axis_num) def quantile(self, q=0.5, axis=0, numeric_only=True): """ @@ -4534,8 +4539,8 @@ def to_timestamp(self, freq=None, how='start', axis=0, copy=True): new_data.set_axis(1, self.index.to_timestamp(freq=freq, how=how)) elif axis == 1: new_data.set_axis(0, self.columns.to_timestamp(freq=freq, how=how)) - else: - raise ValueError('Axis must be 0 or 1. Got %s' % str(axis)) + else: # pragma: no cover + raise AssertionError('Axis must be 0 or 1. Got %s' % str(axis)) return self._constructor(new_data) @@ -4569,8 +4574,8 @@ def to_period(self, freq=None, axis=0, copy=True): if freq is None: freq = self.columns.freqstr or self.columns.inferred_freq new_data.set_axis(0, self.columns.to_period(freq=freq)) - else: - raise ValueError('Axis must be 0 or 1. Got %s' % str(axis)) + else: # pragma: no cover + raise AssertionError('Axis must be 0 or 1. Got %s' % str(axis)) return self._constructor(new_data) @@ -4823,22 +4828,6 @@ def convert(v): return values -def _rec_to_dict(arr): - if isinstance(arr, (np.ndarray, Series)): - columns = list(arr.dtype.names) - sdict = dict((k, arr[k]) for k in columns) - elif isinstance(arr, DataFrame): - columns = list(arr.columns) - sdict = dict((k, v.values) for k, v in compat.iteritems(arr)) - elif isinstance(arr, dict): - columns = sorted(arr) - sdict = arr.copy() - else: # pragma: no cover - raise TypeError('%s' % type(arr)) - - return columns, sdict - - def _to_arrays(data, columns, coerce_float=False, dtype=None): """ Return list of arrays, columns @@ -4933,8 +4922,7 @@ def _list_of_dict_to_arrays(data, columns, coerce_float=False, dtype=None): # assure that they are of the base dict class and not of derived # classes - data = [(type(d) is dict) and d or dict(d) - for d in data] + data = [(type(d) is dict) and d or dict(d) for d in data] content = list(lib.dicts_to_array(data, list(columns)).T) return _convert_object_array(content, columns, dtype=dtype, @@ -4945,7 +4933,8 @@ def _convert_object_array(content, columns, coerce_float=False, dtype=None): if columns is None: columns = _default_index(len(content)) else: - if len(columns) != len(content): + if len(columns) != len(content): # pragma: no cover + # caller's responsibility to check for this... raise AssertionError('%d columns passed, passed data had %s ' 'columns' % (len(columns), len(content))) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b0ab62b7158c9..796c3948a2681 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -417,7 +417,7 @@ def rename(self, *args, **kwargs): inplace = kwargs.get('inplace', False) if (com._count_not_none(*axes.values()) == 0): - raise Exception('must pass an index to rename') + raise TypeError('must pass an index to rename') # renamer function if passed a dict def _get_rename_function(mapper): @@ -1242,7 +1242,7 @@ def filter(self, items=None, like=None, regex=None, axis=None): matcher = re.compile(regex) return self.select(lambda x: matcher.search(x) is not None, axis=axis_name) else: - raise ValueError('items was None!') + raise TypeError('Must pass either `items`, `like`, or `regex`') #---------------------------------------------------------------------- # Attribute access diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 856e97ad163f2..636a5e88817ee 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -84,7 +84,7 @@ def __setitem__(self, key, value): if isinstance(key, tuple): if len(key) > self.ndim: - raise IndexingError('only tuples of length <= %d supported', + raise IndexingError('only tuples of length <= %d supported' % self.ndim) indexer = self._convert_tuple(key, is_setter=True) else: diff --git a/pandas/core/internals.py b/pandas/core/internals.py index d025c7a7fcf6d..294b90bd4092f 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -1625,7 +1625,7 @@ def set_axis(self, axis, value, maybe_rename=True, check_axis=True): value = _ensure_index(value) if check_axis and len(value) != len(cur_axis): - raise Exception('Length mismatch (%d vs %d)' + raise ValueError('Length mismatch (%d vs %d)' % (len(value), len(cur_axis))) self.axes[axis] = value @@ -2297,7 +2297,7 @@ def fast_2d_xs(self, loc, copy=False): return result if not copy: - raise Exception('cannot get view of mixed-type or ' + raise TypeError('cannot get view of mixed-type or ' 'non-consolidated DataFrame') items = self.items @@ -2483,7 +2483,8 @@ def _set_item(item, arr): def insert(self, loc, item, value, allow_duplicates=False): if not allow_duplicates and item in self.items: - raise Exception('cannot insert %s, already exists' % item) + # Should this be a different kind of error?? + raise ValueError('cannot insert %s, already exists' % item) try: new_items = self.items.insert(loc, item) @@ -2826,7 +2827,8 @@ def _maybe_rename_join(self, other, lsuffix, rsuffix, copydata=True): to_rename = self.items.intersection(other.items) if len(to_rename) > 0: if not lsuffix and not rsuffix: - raise Exception('columns overlap: %s' % to_rename) + raise ValueError('columns overlap but no suffix specified: %s' + % to_rename) def lrenamer(x): if x in to_rename: diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index a8a36ef8ca0be..312667930b54d 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -21,10 +21,6 @@ from pandas.core.index import Index, MultiIndex -class ReshapeError(Exception): - pass - - class _Unstacker(object): """ Helper class to unstack data / pivot with multi-level index @@ -129,8 +125,8 @@ def _make_selectors(self): mask.put(selector, True) if mask.sum() < len(self.index): - raise ReshapeError('Index contains duplicate entries, ' - 'cannot reshape') + raise ValueError('Index contains duplicate entries, ' + 'cannot reshape') self.group_index = comp_index self.mask = mask diff --git a/pandas/core/series.py b/pandas/core/series.py index 8396de9c5997b..43bf4db7b29db 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2377,7 +2377,7 @@ def sort(self, axis=0, kind='quicksort', order=None, ascending=True): if (true_base is not None and (true_base.ndim != 1 or true_base.shape != self.shape)): - raise Exception('This Series is a view of some other array, to ' + raise TypeError('This Series is a view of some other array, to ' 'sort in-place you must create a copy') self[:] = sortedSeries @@ -2542,7 +2542,7 @@ def sortlevel(self, level=0, ascending=True): sorted : Series """ if not isinstance(self.index, MultiIndex): - raise Exception('can only sort by level with a hierarchical index') + raise TypeError('can only sort by level with a hierarchical index') new_index, indexer = self.index.sortlevel(level, ascending=ascending) new_values = self.values.take(indexer) diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py index dd7bd52076e06..d8f6d531a6983 100644 --- a/pandas/sparse/frame.py +++ b/pandas/sparse/frame.py @@ -600,7 +600,8 @@ def _reindex_with_indexers(self, reindexers, method=None, fill_value=np.nan, lim def _join_compat(self, other, on=None, how='left', lsuffix='', rsuffix='', sort=False): if isinstance(other, Series): - assert(other.name is not None) + if other.name is None: + raise ValueError('Other Series must have a name') other = SparseDataFrame({other.name: other}, default_fill_value=self._default_fill_value) if on is not None: @@ -627,19 +628,19 @@ def _join_index(self, other, how, lsuffix, rsuffix): return concat([this, other], axis=1, verify_integrity=True) def _maybe_rename_join(self, other, lsuffix, rsuffix): - intersection = self.columns.intersection(other.columns) - - if len(intersection) > 0: + to_rename = self.columns.intersection(other.columns) + if len(to_rename) > 0: if not lsuffix and not rsuffix: - raise Exception('columns overlap: %s' % intersection) + raise ValueError('columns overlap but no suffix specified: %s' + % to_rename) def lrenamer(x): - if x in intersection: + if x in to_rename: return '%s%s' % (x, lsuffix) return x def rrenamer(x): - if x in intersection: + if x in to_rename: return '%s%s' % (x, rsuffix) return x @@ -687,7 +688,7 @@ def apply(self, func, axis=0, broadcast=False, reduce=False): ---------- func : function Function to apply to each column - axis : {0, 1} + axis : {0, 1, 'index', 'columns'} broadcast : bool, default False For aggregation functions, return object of same size with values propagated @@ -698,6 +699,7 @@ def apply(self, func, axis=0, broadcast=False, reduce=False): """ if not len(self.columns): return self + axis = self._get_axis_number(axis) if isinstance(func, np.ufunc): new_series = {} diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 118672a85d3fb..c39634281ebb7 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -40,6 +40,8 @@ assertRaisesRegexp, makeCustomDataframe as mkdf, ensure_clean) +from pandas.core.indexing import IndexingError +from pandas.core.common import PandasError import pandas.util.testing as tm import pandas.lib as lib @@ -115,7 +117,8 @@ def test_getitem(self): self.assert_(self.frame[key] is not None) self.assert_('random' not in self.frame) - self.assertRaises(Exception, self.frame.__getitem__, 'random') + with assertRaisesRegexp(KeyError, 'no item named random'): + self.frame['random'] def test_getitem_dupe_cols(self): df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=['a', 'a', 'b']) @@ -154,10 +157,10 @@ def test_getitem_list(self): self.assertEqual(result.columns.name, 'foo') - self.assertRaises(Exception, self.frame.__getitem__, - ['B', 'A', 'foo']) - self.assertRaises(Exception, self.frame.__getitem__, - Index(['B', 'A', 'foo'])) + with assertRaisesRegexp(KeyError, 'not in index'): + self.frame[['B', 'A', 'food']] + with assertRaisesRegexp(KeyError, 'not in index'): + self.frame[Index(['B', 'A', 'foo'])] # tuples df = DataFrame(randn(8, 3), @@ -178,6 +181,12 @@ def test_setitem_list(self): assert_series_equal(self.frame['B'], data['A']) assert_series_equal(self.frame['A'], data['B']) + with assertRaisesRegexp(ValueError, 'Columns must be same length as key'): + data[['A']] = self.frame[['A', 'B']] + with assertRaisesRegexp(ValueError, 'Length of values does not match ' + 'length of index'): + data['A'] = range(len(data.index) - 1) + df = DataFrame(0, lrange(3), ['tt1', 'tt2'], dtype=np.int_) df.ix[1, ['tt1', 'tt2']] = [1, 2] @@ -214,12 +223,14 @@ def test_getitem_boolean(self): subframe = self.tsframe[indexer] self.assert_(np.array_equal(subindex, subframe.index)) - self.assertRaises(Exception, self.tsframe.__getitem__, indexer[:-1]) + with assertRaisesRegexp(ValueError, 'Item wrong length'): + self.tsframe[indexer[:-1]] subframe_obj = self.tsframe[indexer_obj] assert_frame_equal(subframe_obj, subframe) - self.assertRaises(ValueError, self.tsframe.__getitem__, self.tsframe) + with tm.assertRaisesRegexp(ValueError, 'boolean values only'): + self.tsframe[self.tsframe] # test that Series work indexer_obj = Series(indexer_obj, self.tsframe.index) @@ -381,8 +392,8 @@ def test_setitem(self): tm.assert_dict_equal(series, self.frame['col6'], compare_keys=False) - self.assertRaises(Exception, self.frame.__setitem__, - randn(len(self.frame) + 1)) + with tm.assertRaises(KeyError): + self.frame[randn(len(self.frame) + 1)] = 1 # set ndarray arr = randn(len(self.frame)) @@ -462,7 +473,9 @@ def test_setitem_boolean(self): values[values == 2] = 3 assert_almost_equal(df.values, values) - self.assertRaises(Exception, df.__setitem__, df * 0, 2) + with assertRaisesRegexp(TypeError, 'Must pass DataFrame with boolean ' + 'values only'): + df[df * 0] = 2 # index with DataFrame mask = df > np.abs(df) @@ -1176,14 +1189,11 @@ def test_getitem_fancy_ints(self): def test_getitem_setitem_fancy_exceptions(self): ix = self.frame.ix - self.assertRaises(Exception, ix.__getitem__, - (slice(None, None, None), - slice(None, None, None), - slice(None, None, None))) - self.assertRaises(Exception, ix.__setitem__, - (slice(None, None, None), - slice(None, None, None), - slice(None, None, None)), 1) + with assertRaisesRegexp(IndexingError, 'Too many indexers'): + ix[:, :, :] + with assertRaisesRegexp(IndexingError, 'only tuples of length <= 2 ' + 'supported'): + ix[:, :, :] = 1 def test_getitem_setitem_boolean_misaligned(self): # boolean index misaligned labels @@ -1230,8 +1240,10 @@ def test_getitem_setitem_float_labels(self): self.assertEqual(len(result), 2) # this should raise an exception - self.assertRaises(Exception, df.ix.__getitem__, slice(1, 2)) - self.assertRaises(Exception, df.ix.__setitem__, slice(1, 2), 0) + with tm.assertRaises(KeyError): + df.ix[1:2] + with tm.assertRaises(KeyError): + df.ix[1:2] = 0 # #2727 index = Index([1.0, 2.5, 3.5, 4.5, 5.0]) @@ -1470,11 +1482,14 @@ def testit(df): assert_almost_equal(df['mask'], exp_mask) self.assert_(df['mask'].dtype == np.bool_) - self.assertRaises(ValueError, self.frame.lookup, - ['xyz'], ['A']) + with tm.assertRaises(KeyError): + self.frame.lookup(['xyz'], ['A']) + + with tm.assertRaises(KeyError): + self.frame.lookup([self.frame.index[0]], ['xyz']) - self.assertRaises(ValueError, self.frame.lookup, - [self.frame.index[0]], ['xyz']) + with tm.assertRaisesRegexp(ValueError, 'same size'): + self.frame.lookup(['a', 'b', 'c'], ['a']) def test_set_value(self): for idx in self.frame.index: @@ -1721,10 +1736,6 @@ def test_join_index(self): self.assert_(joined.index.equals(f2.index)) self.assertEqual(len(joined.columns), 4) - # corner case - self.assertRaises(Exception, self.frame.join, self.frame, - how='left') - # inner f = self.frame.reindex(columns=['A', 'B'])[:10] @@ -1734,10 +1745,6 @@ def test_join_index(self): self.assert_(joined.index.equals(f.index.intersection(f2.index))) self.assertEqual(len(joined.columns), 4) - # corner case - self.assertRaises(Exception, self.frame.join, self.frame, - how='inner') - # outer f = self.frame.reindex(columns=['A', 'B'])[:10] @@ -1747,11 +1754,12 @@ def test_join_index(self): self.assert_(tm.equalContents(self.frame.index, joined.index)) self.assertEqual(len(joined.columns), 4) - # corner case - self.assertRaises(Exception, self.frame.join, self.frame, - how='outer') + assertRaisesRegexp(ValueError, 'join method', f.join, f2, how='foo') - self.assertRaises(Exception, f.join, f2, how='foo') + # corner case - overlapping columns + for how in ('outer', 'left', 'inner'): + with assertRaisesRegexp(ValueError, 'columns overlap but no suffix'): + self.frame.join(self.frame, how=how) def test_join_index_more(self): af = self.frame.ix[:, ['A', 'B']] @@ -1778,7 +1786,7 @@ def test_join_index_series(self): assert_frame_equal(joined, self.frame, check_names=False) # TODO should this check_names ? s.name = None - self.assertRaises(Exception, df.join, s) + assertRaisesRegexp(ValueError, 'must have a name', df.join, s) def test_join_overlap(self): df1 = self.frame.ix[:, ['A', 'B', 'C']] @@ -1858,22 +1866,23 @@ def setUp(self): def test_get_axis(self): f = self.frame - self.assert_(f._get_axis_name(0) == 'index') - self.assert_(f._get_axis_name(1) == 'columns') - self.assert_(f._get_axis_name('index') == 'index') - self.assert_(f._get_axis_name('columns') == 'columns') - self.assertRaises(Exception, f._get_axis_name, 'foo') - self.assertRaises(Exception, f._get_axis_name, None) - - self.assert_(f._get_axis_number(0) == 0) - self.assert_(f._get_axis_number(1) == 1) - self.assert_(f._get_axis_number('index') == 0) - self.assert_(f._get_axis_number('columns') == 1) - self.assertRaises(Exception, f._get_axis_number, 2) - self.assertRaises(Exception, f._get_axis_number, None) - - self.assert_(self.frame._get_axis(0) is self.frame.index) - self.assert_(self.frame._get_axis(1) is self.frame.columns) + self.assertEquals(f._get_axis_number(0), 0) + self.assertEquals(f._get_axis_number(1), 1) + self.assertEquals(f._get_axis_number('index'), 0) + self.assertEquals(f._get_axis_number('columns'), 1) + + self.assertEquals(f._get_axis_name(0), 'index') + self.assertEquals(f._get_axis_name(1), 'columns') + self.assertEquals(f._get_axis_name('index'), 'index') + self.assertEquals(f._get_axis_name('columns'), 'columns') + + self.assert_(f._get_axis(0) is f.index) + self.assert_(f._get_axis(1) is f.columns) + + assertRaisesRegexp(ValueError, 'No axis named', f._get_axis_number, 2) + assertRaisesRegexp(ValueError, 'No axis.*foo', f._get_axis_name, 'foo') + assertRaisesRegexp(ValueError, 'No axis.*None', f._get_axis_name, None) + assertRaisesRegexp(ValueError, 'No axis named', f._get_axis_number, None) def test_set_index(self): idx = Index(np.arange(len(self.mixed_frame))) @@ -1882,8 +1891,8 @@ def test_set_index(self): _ = self.mixed_frame['foo'] self.mixed_frame.index = idx self.assert_(self.mixed_frame['foo'].index is idx) - self.assertRaises(Exception, setattr, self.mixed_frame, 'index', - idx[::2]) + with assertRaisesRegexp(ValueError, 'Length mismatch'): + self.mixed_frame.index = idx[::2] def test_set_index_cast(self): @@ -1957,7 +1966,8 @@ def test_set_index2(self): assert_frame_equal(df3, expected_nodrop) # corner case - self.assertRaises(Exception, df.set_index, 'A', verify_integrity=True) + with assertRaisesRegexp(ValueError, 'Index has duplicate keys'): + df.set_index('A', verify_integrity=True) # append result = df.set_index(['A', 'B'], append=True) @@ -1981,8 +1991,8 @@ def test_set_index_nonuniq(self): 'C': ['a', 'b', 'c', 'd', 'e'], 'D': np.random.randn(5), 'E': np.random.randn(5)}) - self.assertRaises(Exception, df.set_index, 'A', verify_integrity=True, - inplace=True) + with assertRaisesRegexp(ValueError, 'Index has duplicate keys'): + df.set_index('A', verify_integrity=True, inplace=True) self.assert_('A' in df) def test_set_index_bug(self): @@ -2040,8 +2050,8 @@ def test_set_index_empty_column(self): def test_set_columns(self): cols = Index(np.arange(len(self.mixed_frame.columns))) self.mixed_frame.columns = cols - self.assertRaises(Exception, setattr, self.mixed_frame, 'columns', - cols[::2]) + with assertRaisesRegexp(ValueError, 'Length mismatch'): + self.mixed_frame.columns = cols[::2] def test_keys(self): getkeys = self.frame.keys @@ -2220,12 +2230,11 @@ def test_constructor_dict(self): # Corner cases self.assertEqual(len(DataFrame({})), 0) - self.assertRaises(Exception, lambda x: DataFrame([self.ts1, self.ts2])) - # mix dict and array, wrong size - self.assertRaises(Exception, DataFrame, - {'A': {'a': 'a', 'b': 'b'}, - 'B': ['a', 'b', 'c']}) + # mix dict and array, wrong size - no spec for which error should raise + # first + with tm.assertRaises(ValueError): + DataFrame({'A': {'a': 'a', 'b': 'b'}, 'B': ['a', 'b', 'c']}) # Length-one dict micro-optimization frame = DataFrame({'A': {'1': 1, '2': 2}}) @@ -2248,36 +2257,33 @@ def test_constructor_dict(self): self.assert_(frame.index.equals(Index([]))) def test_constructor_error_msgs(self): - + msg = "Mixing dicts with non-Series may lead to ambiguous ordering." # mix dict and array, wrong size - def testit(): + with assertRaisesRegexp(ValueError, msg): DataFrame({'A': {'a': 'a', 'b': 'b'}, 'B': ['a', 'b', 'c']}) - assertRaisesRegexp(ValueError, "Mixing dicts with non-Series may lead to ambiguous ordering.", testit) # wrong size ndarray, GH 3105 - def testit(): - DataFrame(np.arange(12).reshape((4, 3)), columns=['foo', 'bar', 'baz'], + msg = "Shape of passed values is \(3, 4\), indices imply \(3, 3\)" + with assertRaisesRegexp(ValueError, msg): + DataFrame(np.arange(12).reshape((4, 3)), + columns=['foo', 'bar', 'baz'], index=date_range('2000-01-01', periods=3)) - assertRaisesRegexp(ValueError, "Shape of passed values is \(3, 4\), indices imply \(3, 3\)", testit) + # higher dim raise exception - def testit(): + with assertRaisesRegexp(ValueError, 'Must pass 2-d input'): DataFrame(np.zeros((3, 3, 3)), columns=['A', 'B', 'C'], index=[1]) - assertRaisesRegexp(ValueError, "Must pass 2-d input", testit) # wrong size axis labels - def testit(): + with assertRaisesRegexp(ValueError, "Shape of passed values is \(3, 2\), indices imply \(3, 1\)"): DataFrame(np.random.rand(2,3), columns=['A', 'B', 'C'], index=[1]) - assertRaisesRegexp(ValueError, "Shape of passed values is \(3, 2\), indices imply \(3, 1\)", testit) - def testit(): + with assertRaisesRegexp(ValueError, "Shape of passed values is \(3, 2\), indices imply \(2, 2\)"): DataFrame(np.random.rand(2,3), columns=['A', 'B'], index=[1, 2]) - assertRaisesRegexp(ValueError, "Shape of passed values is \(3, 2\), indices imply \(2, 2\)", testit) - def testit(): + with assertRaisesRegexp(ValueError, 'If using all scalar values, you must must pass an index'): DataFrame({'a': False, 'b': True}) - assertRaisesRegexp(ValueError, 'If using all scalar values, you must must pass an index', testit) def test_insert_error_msmgs(self): @@ -2285,10 +2291,10 @@ def test_insert_error_msmgs(self): df = DataFrame(np.random.randint(0,2,(4,4)), columns=['a', 'b', 'c', 'd']) - def testit(): + msg = 'incompatible index of inserted column with frame index' + with assertRaisesRegexp(TypeError, msg): df['gr'] = df.groupby(['b', 'c']).count() - assertRaisesRegexp(TypeError, 'incompatible index of inserted column with frame index', testit) def test_constructor_subclass_dict(self): # Test for passing dict subclass to constructor @@ -2362,38 +2368,38 @@ def test_constructor_dict_of_tuples(self): expected = DataFrame(dict((k, list(v)) for k, v in compat.iteritems(data))) assert_frame_equal(result, expected, check_dtype=False) - def test_constructor_ndarray(self): - mat = np.zeros((2, 3), dtype=float) - + def _check_basic_constructor(self, empty): + "mat: 2d matrix with shpae (3, 2) to input. empty - makes sized objects" + mat = empty((2, 3), dtype=float) # 2-D input frame = DataFrame(mat, columns=['A', 'B', 'C'], index=[1, 2]) self.assertEqual(len(frame.index), 2) self.assertEqual(len(frame.columns), 3) - # cast type - frame = DataFrame(mat, columns=['A', 'B', 'C'], - index=[1, 2], dtype=np.int64) - self.assert_(frame.values.dtype == np.int64) - # 1-D input - frame = DataFrame(np.zeros(3), columns=['A'], index=[1, 2, 3]) + frame = DataFrame(empty((3,)), columns=['A'], index=[1, 2, 3]) self.assertEqual(len(frame.index), 3) self.assertEqual(len(frame.columns), 1) - frame = DataFrame(['foo', 'bar'], index=[0, 1], columns=['A']) - self.assertEqual(len(frame), 2) - # higher dim raise exception - self.assertRaises(Exception, DataFrame, np.zeros((3, 3, 3)), - columns=['A', 'B', 'C'], index=[1]) + # cast type + frame = DataFrame(mat, columns=['A', 'B', 'C'], + index=[1, 2], dtype=np.int64) + self.assert_(frame.values.dtype == np.int64) # wrong size axis labels - self.assertRaises(Exception, DataFrame, mat, - columns=['A', 'B', 'C'], index=[1]) + msg = r'Shape of passed values is \(3, 2\), indices imply \(3, 1\)' + with assertRaisesRegexp(ValueError, msg): + DataFrame(mat, columns=['A', 'B', 'C'], index=[1]) + msg = r'Shape of passed values is \(3, 2\), indices imply \(2, 2\)' + with assertRaisesRegexp(ValueError, msg): + DataFrame(mat, columns=['A', 'B'], index=[1, 2]) - self.assertRaises(Exception, DataFrame, mat, - columns=['A', 'B'], index=[1, 2]) + # higher dim raise exception + with assertRaisesRegexp(ValueError, 'Must pass 2-d input'): + DataFrame(empty((3, 3, 3)), columns=['A', 'B', 'C'], + index=[1]) # automatic labeling frame = DataFrame(mat) @@ -2407,70 +2413,36 @@ def test_constructor_ndarray(self): self.assert_(np.array_equal(frame.index, lrange(2))) # 0-length axis - frame = DataFrame(np.empty((0, 3))) + frame = DataFrame(empty((0, 3))) self.assert_(len(frame.index) == 0) - frame = DataFrame(np.empty((3, 0))) + frame = DataFrame(empty((3, 0))) self.assert_(len(frame.columns) == 0) - def test_constructor_maskedarray(self): - mat = ma.masked_all((2, 3), dtype=float) + def test_constructor_ndarray(self): + mat = np.zeros((2, 3), dtype=float) + self._check_basic_constructor(np.ones) - # 2-D input - frame = DataFrame(mat, columns=['A', 'B', 'C'], index=[1, 2]) + frame = DataFrame(['foo', 'bar'], index=[0, 1], columns=['A']) + self.assertEqual(len(frame), 2) - self.assertEqual(len(frame.index), 2) - self.assertEqual(len(frame.columns), 3) - self.assertTrue(np.all(~np.asarray(frame == frame))) - # cast type - frame = DataFrame(mat, columns=['A', 'B', 'C'], - index=[1, 2], dtype=np.int64) - self.assert_(frame.values.dtype == np.int64) + def test_constructor_maskedarray(self): + self._check_basic_constructor(ma.masked_all) # Check non-masked values - mat2 = ma.copy(mat) - mat2[0, 0] = 1.0 - mat2[1, 2] = 2.0 - frame = DataFrame(mat2, columns=['A', 'B', 'C'], index=[1, 2]) + mat = ma.masked_all((2, 3), dtype=float) + mat[0, 0] = 1.0 + mat[1, 2] = 2.0 + frame = DataFrame(mat, columns=['A', 'B', 'C'], index=[1, 2]) self.assertEqual(1.0, frame['A'][1]) self.assertEqual(2.0, frame['C'][2]) - # 1-D input - frame = DataFrame(ma.masked_all((3,)), columns=['A'], index=[1, 2, 3]) - self.assertEqual(len(frame.index), 3) - self.assertEqual(len(frame.columns), 1) + # what is this even checking?? + mat = ma.masked_all((2, 3), dtype=float) + frame = DataFrame(mat, columns=['A', 'B', 'C'], index=[1, 2]) self.assertTrue(np.all(~np.asarray(frame == frame))) - # higher dim raise exception - self.assertRaises(Exception, DataFrame, ma.masked_all((3, 3, 3)), - columns=['A', 'B', 'C'], index=[1]) - - # wrong size axis labels - self.assertRaises(Exception, DataFrame, mat, - columns=['A', 'B', 'C'], index=[1]) - - self.assertRaises(Exception, DataFrame, mat, - columns=['A', 'B'], index=[1, 2]) - - # automatic labeling - frame = DataFrame(mat) - self.assert_(np.array_equal(frame.index, lrange(2))) - self.assert_(np.array_equal(frame.columns, lrange(3))) - - frame = DataFrame(mat, index=[1, 2]) - self.assert_(np.array_equal(frame.columns, lrange(3))) - - frame = DataFrame(mat, columns=['A', 'B', 'C']) - self.assert_(np.array_equal(frame.index, lrange(2))) - - # 0-length axis - frame = DataFrame(ma.masked_all((0, 3))) - self.assert_(len(frame.index) == 0) - - frame = DataFrame(ma.masked_all((3, 0))) - self.assert_(len(frame.columns) == 0) - def test_constructor_maskedarray_nonfloat(self): # masked int promoted to float mat = ma.masked_all((2, 3), dtype=int) @@ -2570,8 +2542,8 @@ def test_constructor_arrays_and_scalars(self): exp = DataFrame({'a': df['a'].values, 'b': [True] * 10}) assert_frame_equal(df, exp) - - self.assertRaises(ValueError, DataFrame, {'a': False, 'b': True}) + with tm.assertRaisesRegexp(ValueError, 'must pass an index'): + DataFrame({'a': False, 'b': True}) def test_constructor_DataFrame(self): df = DataFrame(self.frame) @@ -2602,12 +2574,14 @@ def test_constructor_more(self): self.assertEqual(dm.values.shape, (10, 0)) # corner, silly - self.assertRaises(Exception, DataFrame, (1, 2, 3)) + # TODO: Fix this Exception to be better... + with assertRaisesRegexp(PandasError, 'constructor not properly called'): + DataFrame((1, 2, 3)) # can't cast mat = np.array(['foo', 'bar'], dtype=object).reshape(2, 1) - self.assertRaises(ValueError, DataFrame, mat, index=[0, 1], - columns=[0], dtype=float) + with assertRaisesRegexp(ValueError, 'cast'): + DataFrame(mat, index=[0, 1], columns=[0], dtype=float) dm = DataFrame(DataFrame(self.frame._series)) tm.assert_frame_equal(dm, self.frame) @@ -2718,7 +2692,8 @@ class CustomDict(dict): def test_constructor_ragged(self): data = {'A': randn(10), 'B': randn(8)} - self.assertRaises(Exception, DataFrame, data) + assertRaisesRegexp(ValueError, 'arrays must all be same length', + DataFrame, data) def test_constructor_scalar(self): idx = Index(lrange(3)) @@ -2739,8 +2714,8 @@ def test_constructor_mixed_dict_and_Series(self): self.assert_(result.index.is_monotonic) # ordering ambiguous, raise exception - self.assertRaises(Exception, DataFrame, - {'A': ['a', 'b'], 'B': {'a': 'a', 'b': 'b'}}) + with assertRaisesRegexp(ValueError, 'ambiguous ordering'): + DataFrame({'A': ['a', 'b'], 'B': {'a': 'a', 'b': 'b'}}) # this is OK though result = DataFrame({'A': ['a', 'b'], @@ -2850,10 +2825,11 @@ def test_constructor_from_items(self): columns=self.mixed_frame.columns, orient='index') assert_frame_equal(recons, self.mixed_frame) - self.assert_(recons['A'].dtype == np.float64) + self.assertEqual(recons['A'].dtype, np.float64) - self.assertRaises(ValueError, DataFrame.from_items, row_items, - orient='index') + with tm.assertRaisesRegexp(TypeError, + "Must pass columns with orient='index'"): + DataFrame.from_items(row_items, orient='index') # orient='index', but thar be tuples arr = lib.list_to_object_array( @@ -2878,9 +2854,8 @@ def test_constructor_mix_series_nonseries(self): 'B': list(self.frame['B'])}, columns=['A', 'B']) assert_frame_equal(df, self.frame.ix[:, ['A', 'B']]) - self.assertRaises(ValueError, DataFrame, - {'A': self.frame['A'], - 'B': list(self.frame['B'])[:-2]}) + with tm.assertRaisesRegexp(ValueError, 'does not match index length'): + DataFrame({'A': self.frame['A'], 'B': list(self.frame['B'])[:-2]}) def test_constructor_miscast_na_int_dtype(self): df = DataFrame([[np.nan, 1], [1, 0]], dtype=np.int64) @@ -2932,6 +2907,8 @@ def check(result, expected=None): df['string'] = 'bah' expected = DataFrame([[1,1,1,5,'bah'],[1,1,2,5,'bah'],[2,1,3,5,'bah']],columns=['foo','bar','foo','hello','string']) check(df,expected) + with assertRaisesRegexp(ValueError, 'Length of value'): + df.insert(0, 'AnotherColumn', range(len(df.index) - 1)) # insert same dtype df['foo2'] = 3 @@ -2965,7 +2942,7 @@ def check(result, expected=None): check(df,expected) # insert a dup - self.assertRaises(Exception, df.insert, 2, 'new_col', 4.) + assertRaisesRegexp(ValueError, 'cannot insert', df.insert, 2, 'new_col', 4.) df.insert(2,'new_col',4.,allow_duplicates=True) expected = DataFrame([[1,1,4.,5.,'bah',3],[1,2,4.,5.,'bah',3],[2,3,4.,5.,'bah',3]],columns=['foo','foo','new_col','new_col','string','foo2']) check(df,expected) @@ -3071,8 +3048,8 @@ def test_constructor_single_value(self): self.assertRaises(com.PandasError, DataFrame, 'a', [1, 2]) self.assertRaises(com.PandasError, DataFrame, 'a', columns=['a', 'c']) - self.assertRaises( - com.PandasError, DataFrame, 'a', [1, 2], ['a', 'c'], float) + with tm.assertRaisesRegexp(TypeError, 'incompatible data and dtype'): + DataFrame('a', [1, 2], ['a', 'c'], float) def test_constructor_with_datetimes(self): @@ -3497,8 +3474,9 @@ def test_from_records_to_records(self): tm.assert_frame_equal(DataFrame.from_records(arr2), DataFrame(arr2)) # wrong length - self.assertRaises(Exception, DataFrame.from_records, arr, - index=index[:-1]) + msg = r'Shape of passed values is \(3,\), indices imply \(3, 1\)' + with assertRaisesRegexp(ValueError, msg): + DataFrame.from_records(arr, index=index[:-1]) indexed_frame = DataFrame.from_records(arr, index='f1') @@ -3816,7 +3794,7 @@ def test_get_agg_axis(self): idx = self.frame._get_agg_axis(1) self.assert_(idx is self.frame.index) - self.assertRaises(Exception, self.frame._get_agg_axis, 2) + self.assertRaises(ValueError, self.frame._get_agg_axis, 2) def test_nonzero(self): self.assertTrue(self.empty.empty) @@ -3991,19 +3969,20 @@ def test_insert(self): # new item df['x'] = df['a'].astype('float32') result = Series(dict(float64 = 5, float32 = 1)) - self.assert_((df.get_dtype_counts() == result).all() == True) + self.assert_((df.get_dtype_counts() == result).all()) # replacing current (in different block) df['a'] = df['a'].astype('float32') result = Series(dict(float64 = 4, float32 = 2)) - self.assert_((df.get_dtype_counts() == result).all() == True) + self.assert_((df.get_dtype_counts() == result).all()) df['y'] = df['a'].astype('int32') result = Series(dict(float64 = 4, float32 = 2, int32 = 1)) - self.assert_((df.get_dtype_counts() == result).all() == True) + self.assert_((df.get_dtype_counts() == result).all()) - self.assertRaises(Exception, df.insert, 1, 'a', df['b']) - self.assertRaises(Exception, df.insert, 1, 'c', df['b']) + with assertRaisesRegexp(ValueError, 'already exists'): + df.insert(1, 'a', df['b']) + self.assertRaises(ValueError, df.insert, 1, 'c', df['b']) df.columns.name = 'some_name' # preserve columns name field @@ -4310,6 +4289,15 @@ def test_arith_flex_frame(self): assert_frame_equal(result, exp) _check_mixed_int(result, dtype = dtype) + # ndim >= 3 + ndim_5 = np.ones(self.frame.shape + (3, 4, 5)) + with assertRaisesRegexp(ValueError, 'shape'): + f(self.frame, ndim_5) + + with assertRaisesRegexp(ValueError, 'shape'): + getattr(self.frame, op)(ndim_5) + + # res_add = self.frame.add(self.frame) # res_sub = self.frame.sub(self.frame) # res_mul = self.frame.mul(self.frame) @@ -4329,6 +4317,10 @@ def test_arith_flex_frame(self): result = self.frame[:0].add(self.frame) assert_frame_equal(result, self.frame * np.nan) + with assertRaisesRegexp(NotImplementedError, 'fill_value'): + self.frame.add(self.frame.irow(0), fill_value=3) + with assertRaisesRegexp(NotImplementedError, 'fill_value'): + self.frame.add(self.frame.irow(0), axis='index', fill_value=3) def test_arith_mixed(self): @@ -4377,34 +4369,32 @@ def test_bool_flex_frame(self): other_data = np.random.randn(5, 3) df = DataFrame(data) other = DataFrame(other_data) - - # No NAs - - # DataFrame - self.assert_(df.eq(df).values.all()) - self.assert_(not df.ne(df).values.any()) - - assert_frame_equal((df == other), df.eq(other)) - assert_frame_equal((df != other), df.ne(other)) - assert_frame_equal((df > other), df.gt(other)) - assert_frame_equal((df < other), df.lt(other)) - assert_frame_equal((df >= other), df.ge(other)) - assert_frame_equal((df <= other), df.le(other)) + ndim_5 = np.ones(df.shape + (1, 3)) # Unaligned - def _check_unaligned_frame(meth, op, df, other, default=False): + def _check_unaligned_frame(meth, op, df, other): part_o = other.ix[3:, 1:].copy() - rs = meth(df, part_o) + rs = meth(part_o) xp = op(df, part_o.reindex(index=df.index, columns=df.columns)) assert_frame_equal(rs, xp) - _check_unaligned_frame(DataFrame.eq, operator.eq, df, other) - _check_unaligned_frame(DataFrame.ne, operator.ne, df, other, - default=True) - _check_unaligned_frame(DataFrame.gt, operator.gt, df, other) - _check_unaligned_frame(DataFrame.lt, operator.lt, df, other) - _check_unaligned_frame(DataFrame.ge, operator.ge, df, other) - _check_unaligned_frame(DataFrame.le, operator.le, df, other) + # DataFrame + self.assert_(df.eq(df).values.all()) + self.assert_(not df.ne(df).values.any()) + for op in ['eq', 'ne', 'gt', 'lt', 'ge', 'le']: + f = getattr(df, op) + o = getattr(operator, op) + # No NAs + assert_frame_equal(f(other), o(df, other)) + _check_unaligned_frame(f, o, df, other) + # ndarray + assert_frame_equal(f(other.values), o(df, other.values)) + # scalar + assert_frame_equal(f(0), o(df, 0)) + # NAs + assert_frame_equal(f(np.nan), o(df, np.nan)) + with assertRaisesRegexp(ValueError, 'shape'): + f(ndim_5) # Series def _test_seq(df, idx_ser, col_ser): @@ -4443,14 +4433,6 @@ def _test_seq(df, idx_ser, col_ser): col_ser = Series(np.random.randn(3)) _test_seq(df, idx_ser, col_ser) - # ndarray - - assert_frame_equal((df == other.values), df.eq(other.values)) - assert_frame_equal((df != other.values), df.ne(other.values)) - assert_frame_equal((df > other.values), df.gt(other.values)) - assert_frame_equal((df < other.values), df.lt(other.values)) - assert_frame_equal((df >= other.values), df.ge(other.values)) - assert_frame_equal((df <= other.values), df.le(other.values)) # list/tuple _test_seq(df, idx_ser.values, col_ser.values) @@ -4470,20 +4452,7 @@ def _test_seq(df, idx_ser, col_ser): rs = df.le(df) self.assert_(not rs.ix[0, 0]) - # scalar - assert_frame_equal(df.eq(0), df == 0) - assert_frame_equal(df.ne(0), df != 0) - assert_frame_equal(df.gt(0), df > 0) - assert_frame_equal(df.lt(0), df < 0) - assert_frame_equal(df.ge(0), df >= 0) - assert_frame_equal(df.le(0), df <= 0) - - assert_frame_equal(df.eq(np.nan), df == np.nan) - assert_frame_equal(df.ne(np.nan), df != np.nan) - assert_frame_equal(df.gt(np.nan), df > np.nan) - assert_frame_equal(df.lt(np.nan), df < np.nan) - assert_frame_equal(df.ge(np.nan), df >= np.nan) - assert_frame_equal(df.le(np.nan), df <= np.nan) + # complex arr = np.array([np.nan, 1, 6, np.nan]) @@ -4512,19 +4481,20 @@ def test_arith_flex_series(self): row = df.xs('a') col = df['two'] + # after arithmetic refactor, add truediv here + ops = ['add', 'sub', 'mul', 'mod'] + for op in ops: + f = getattr(df, op) + op = getattr(operator, op) + assert_frame_equal(f(row), op(df, row)) + assert_frame_equal(f(col, axis=0), op(df.T, col).T) - assert_frame_equal(df.add(row), df + row) + # special case for some reason assert_frame_equal(df.add(row, axis=None), df + row) - assert_frame_equal(df.sub(row), df - row) - assert_frame_equal(df.div(row), df / row) - assert_frame_equal(df.mul(row), df * row) - assert_frame_equal(df.mod(row), df % row) - assert_frame_equal(df.add(col, axis=0), (df.T + col).T) - assert_frame_equal(df.sub(col, axis=0), (df.T - col).T) + # cases which will be refactored after big arithmetic refactor + assert_frame_equal(df.div(row), df / row) assert_frame_equal(df.div(col, axis=0), (df.T / col).T) - assert_frame_equal(df.mul(col, axis=0), (df.T * col).T) - assert_frame_equal(df.mod(col, axis=0), (df.T % col).T) def test_arith_non_pandas_object(self): df = self.simple @@ -4697,11 +4667,14 @@ def test_comparisons(self): df2 = tm.makeTimeDataFrame() row = self.simple.xs('a') + ndim_5 = np.ones(df1.shape + (1, 1, 1)) def test_comp(func): result = func(df1, df2) self.assert_(np.array_equal(result.values, func(df1.values, df2.values))) + with assertRaisesRegexp(ValueError, 'Wrong number of dimensions'): + func(df1, ndim_5) result2 = func(self.simple, row) self.assert_(np.array_equal(result2.values, @@ -4711,7 +4684,10 @@ def test_comp(func): self.assert_(np.array_equal(result3.values, func(self.frame.values, 0))) - self.assertRaises(Exception, func, self.simple, self.simple[:2]) + + with assertRaisesRegexp(ValueError, 'Can only compare ' + 'identically-labeled DataFrame'): + func(self.simple, self.simple[:2]) test_comp(operator.eq) test_comp(operator.ne) @@ -5289,20 +5265,19 @@ def _make_frame(names=None): df.to_csv(path,tupleize_cols=False) # catch invalid headers - def testit(): + with assertRaisesRegexp(CParserError, 'Passed header=\[0,1,2\] are too many rows for this multi_index of columns'): read_csv(path,tupleize_cols=False,header=lrange(3),index_col=0) - assertRaisesRegexp(CParserError, 'Passed header=\[0,1,2\] are too many rows for this multi_index of columns', testit) - def testit(): + with assertRaisesRegexp(CParserError, 'Passed header=\[0,1,2,3,4,5,6\], len of 7, but only 6 lines in file'): read_csv(path,tupleize_cols=False,header=lrange(7),index_col=0) - assertRaisesRegexp(CParserError, 'Passed header=\[0,1,2,3,4,5,6\], len of 7, but only 6 lines in file', testit) - for i in [3,4,5,6,7]: - self.assertRaises(Exception, read_csv, path, tupleize_cols=False, header=lrange(i), index_col=0) - self.assertRaises(Exception, read_csv, path, tupleize_cols=False, header=[0,2], index_col=0) + for i in [4,5,6]: + with tm.assertRaises(CParserError): + read_csv(path, tupleize_cols=False, header=lrange(i), index_col=0) # write with cols - self.assertRaises(Exception, df.to_csv, path,tupleize_cols=False,cols=['foo','bar']) + with assertRaisesRegexp(TypeError, 'cannot specify cols with a MultiIndex'): + df.to_csv(path, tupleize_cols=False, cols=['foo', 'bar']) with ensure_clean(pname) as path: # empty @@ -5650,7 +5625,8 @@ def test_convert_objects(self): # via astype, but errors converted = self.mixed_frame.copy() - self.assertRaises(Exception, converted['H'].astype, 'int32') + with assertRaisesRegexp(ValueError, 'invalid literal'): + converted['H'].astype('int32') # mixed in a single column df = DataFrame(dict(s = Series([1, 'na', 3 ,4]))) @@ -5669,9 +5645,12 @@ def test_append_series_dict(self): columns=['foo', 'bar', 'baz', 'qux']) series = df.ix[4] - self.assertRaises(ValueError, df.append, series, verify_integrity=True) + with assertRaisesRegexp(ValueError, 'Indexes have overlapping values'): + df.append(series, verify_integrity=True) series.name = None - self.assertRaises(Exception, df.append, series, verify_integrity=True) + with assertRaisesRegexp(TypeError, 'Can only append a Series if ' + 'ignore_index=True'): + df.append(series, verify_integrity=True) result = df.append(series[::-1], ignore_index=True) expected = df.append(DataFrame({0: series[::-1]}, index=df.columns).T, @@ -6138,10 +6117,13 @@ def test_dropna(self): expected = df.ix[:, [0, 1, 3]] assert_frame_equal(dropped, expected) + # bad input + self.assertRaises(ValueError, df.dropna, axis=3) + def test_dropna_corner(self): # bad input self.assertRaises(ValueError, self.frame.dropna, how='foo') - self.assertRaises(ValueError, self.frame.dropna, how=None) + self.assertRaises(TypeError, self.frame.dropna, how=None) def test_dropna_multiple_axes(self): df = DataFrame([[1, np.nan, 2, 3], @@ -6440,7 +6422,8 @@ def test_fillna_dict_series(self): assert_frame_equal(result, expected) # disable this for now - self.assertRaises(Exception, df.fillna, df.max(1), axis=1) + with assertRaisesRegexp(NotImplementedError, 'column by column'): + df.fillna(df.max(1), axis=1) def test_fillna_columns(self): df = DataFrame(np.random.randn(10, 10)) @@ -6456,10 +6439,8 @@ def test_fillna_columns(self): assert_frame_equal(result, expected) def test_fillna_invalid_method(self): - try: + with assertRaisesRegexp(ValueError, 'ffil'): self.frame.fillna(method='ffil') - except ValueError as inst: - self.assert_('ffil' in str(inst)) def test_fillna_invalid_value(self): # list @@ -7322,8 +7303,8 @@ def test_xs(self): self.assertEqual(xs['A'], 1) self.assertEqual(xs['B'], '1') - self.assertRaises(Exception, self.tsframe.xs, - self.tsframe.index[0] - datetools.bday) + with tm.assertRaises(KeyError): + self.tsframe.xs(self.tsframe.index[0] - datetools.bday) # xs get column series = self.frame.xs('A', axis=1) @@ -7414,7 +7395,8 @@ def test_pivot_duplicates(self): data = DataFrame({'a': ['bar', 'bar', 'foo', 'foo', 'foo'], 'b': ['one', 'two', 'one', 'one', 'two'], 'c': [1., 2., 3., 3., 4.]}) - self.assertRaises(Exception, data.pivot, 'a', 'b', 'c') + with assertRaisesRegexp(ValueError, 'duplicate entries'): + data.pivot('a', 'b', 'c') def test_pivot_empty(self): df = DataFrame({}, columns=['a', 'b', 'c']) @@ -7994,7 +7976,7 @@ def test_rename(self): self.assert_(np.array_equal(renamed.index, ['BAR', 'FOO'])) # have to pass something - self.assertRaises(Exception, self.frame.rename) + self.assertRaises(TypeError, self.frame.rename) # partial columns renamed = self.frame.rename(columns={'C': 'foo', 'D': 'bar'}) @@ -8157,7 +8139,8 @@ def test_shift(self): assert_frame_equal(shifted2, shifted3) assert_frame_equal(ps, shifted2.shift(-1, 'B')) - self.assertRaises(ValueError, ps.shift, freq='D') + assertRaisesRegexp(ValueError, 'does not match PeriodIndex freq', + ps.shift, freq='D') def test_shift_bool(self): df = DataFrame({'high': [True, False], @@ -8182,7 +8165,7 @@ def test_tshift(self): shifted3 = ps.tshift(freq=datetools.bday) assert_frame_equal(shifted, shifted3) - self.assertRaises(ValueError, ps.tshift, freq='M') + assertRaisesRegexp(ValueError, 'does not match', ps.tshift, freq='M') # DatetimeIndex shifted = self.tsframe.tshift(1) @@ -8294,7 +8277,6 @@ def test_apply_ignore_failures(self): expected = self.mixed_frame._get_numeric_data().apply(np.mean) assert_series_equal(result, expected) - # test with hierarchical index def test_apply_mixed_dtype_corner(self): df = DataFrame({'A': ['foo'], @@ -8411,9 +8393,10 @@ def transform2(row): try: transformed = data.apply(transform, axis=1) - except Exception as e: + except AttributeError as e: self.assertEqual(len(e.args), 2) self.assertEqual(e.args[1], 'occurred at index 4') + self.assertEqual(e.args[0], "'float' object has no attribute 'startswith'") def test_swapaxes(self): df = DataFrame(np.random.randn(10, 5)) @@ -8530,7 +8513,8 @@ def test_filter(self): self.assertEqual(len(filtered.columns), 2) # pass in None - self.assertRaises(Exception, self.frame.filter, items=None) + with assertRaisesRegexp(TypeError, 'Must pass'): + self.frame.filter(items=None) # objects filtered = self.mixed_frame.filter(like='foo') @@ -8630,6 +8614,9 @@ def test_sort_index(self): assert_frame_equal(sorted_df, expected) self.assertRaises(ValueError, frame.sort_index, axis=2, inplace=True) + msg = 'When sorting by column, axis must be 0' + with assertRaisesRegexp(ValueError, msg): + frame.sort_index(by='A', axis=1) def test_sort_index_multicolumn(self): import random @@ -8735,17 +8722,16 @@ def test_sort_inplace(self): assert_frame_equal(sorted_df, expected) def test_sort_index_duplicates(self): - df = DataFrame([[1, 2], [3, 4]], columns=['a', 'a']) + df = DataFrame([lrange(5,9), lrange(4)], + columns=['a', 'a', 'b', 'b']) - try: + with assertRaisesRegexp(ValueError, 'duplicate'): df.sort_index(by='a') - except Exception as e: - self.assertTrue('duplicate' in str(e)) - - try: + with assertRaisesRegexp(ValueError, 'duplicate'): df.sort_index(by=['a']) - except Exception as e: - self.assertTrue('duplicate' in str(e)) + with assertRaisesRegexp(ValueError, 'duplicate'): + # multi-column 'by' is separate codepath + df.sort_index(by=['a', 'b']) def test_sort_datetimes(self): @@ -8771,7 +8757,8 @@ def test_sort_datetimes(self): def test_frame_column_inplace_sort_exception(self): s = self.frame['A'] - self.assertRaises(Exception, s.sort) + with assertRaisesRegexp(TypeError, "This Series is a view"): + s.sort() cp = s.copy() cp.sort() # it works! @@ -8982,9 +8969,8 @@ def test_update_raise(self): other = DataFrame([[2., nan], [nan, 7]], index=[1, 3], columns=[1, 2]) - - np.testing.assert_raises(Exception, df.update, *(other,), - **{'raise_conflict': True}) + with assertRaisesRegexp(ValueError, "Data overlaps"): + df.update(other, raise_conflict=True) def test_update_from_non_df(self): d = {'a': Series([1, 2, 3, 4]), 'b': Series([5, 6, 7, 8])} @@ -9416,8 +9402,8 @@ def wrapper(x): # comp = frame.apply(alternative, axis=1).reindex(result.index) # assert_series_equal(result, comp) - self.assertRaises(Exception, f, axis=2) - + # bad axis + assertRaisesRegexp(ValueError, 'No axis named 2', f, axis=2) # make sure works on mixed-type frame getattr(self.mixed_frame, name)(axis=0) getattr(self.mixed_frame, name)(axis=1) @@ -9731,18 +9717,6 @@ def test_describe_empty_int_columns(self): Series([0, 0], dtype=float, name='count')) self.assert_(isnull(desc.ix[1:]).all().all()) - def test_get_axis_etc(self): - f = self.frame - - self.assertEquals(f._get_axis_number(0), 0) - self.assertEquals(f._get_axis_number(1), 1) - self.assertEquals(f._get_axis_name(0), 'index') - self.assertEquals(f._get_axis_name(1), 'columns') - - self.assert_(f._get_axis(0) is f.index) - self.assert_(f._get_axis(1) is f.columns) - self.assertRaises(Exception, f._get_axis_number, 2) - def test_axis_aliases(self): f = self.frame @@ -10391,8 +10365,8 @@ def test_xs_view(self): self.assert_((dm.xs(2) == 5).all()) # TODO (?): deal with mixed-type fiasco? - self.assertRaises(Exception, self.mixed_frame.xs, - self.mixed_frame.index[2], copy=False) + with assertRaisesRegexp(TypeError, 'cannot get view of mixed-type'): + self.mixed_frame.xs(self.mixed_frame.index[2], copy=False) # unconsolidated dm['foo'] = 6. @@ -10417,6 +10391,8 @@ def test_boolean_indexing(self): df1[df1 > 2.0 * df2] = -1 assert_frame_equal(df1, expected) + with assertRaisesRegexp(ValueError, 'Item wrong length'): + df1[df1.index[:-1] > 2] = -1 def test_boolean_indexing_mixed(self): df = DataFrame( @@ -10443,7 +10419,8 @@ def test_boolean_indexing_mixed(self): # add object, should this raise? df['foo'] = 'test' - self.assertRaises(ValueError, df.__setitem__, df>0.3, 1) + with tm.assertRaisesRegexp(TypeError, 'boolean setting on mixed-type'): + df[df > 0.3] = 1 def test_sum_bools(self): df = DataFrame(index=lrange(1), columns=lrange(10)) @@ -10574,7 +10551,8 @@ def test_dot(self): exp = a.dot(a.ix[0]) assert_series_equal(result, exp) - self.assertRaises(Exception, a.dot, row[:-1]) + with assertRaisesRegexp(ValueError, 'Dot product shape mismatch'): + a.dot(row[:-1]) a = np.random.rand(1, 5) b = np.random.rand(5, 1) @@ -10588,7 +10566,7 @@ def test_dot(self): df = DataFrame(randn(3, 4), index=[1, 2, 3], columns=lrange(4)) df2 = DataFrame(randn(5, 3), index=lrange(5), columns=[1, 2, 3]) - self.assertRaises(ValueError, df.dot, df2) + assertRaisesRegexp(ValueError, 'aligned', df.dot, df2) def test_idxmin(self): frame = self.frame @@ -10602,7 +10580,7 @@ def test_idxmin(self): Series.idxmin, axis=axis, skipna=skipna) assert_series_equal(result, expected) - self.assertRaises(Exception, frame.idxmin, axis=2) + self.assertRaises(ValueError, frame.idxmin, axis=2) def test_idxmax(self): frame = self.frame @@ -10616,7 +10594,7 @@ def test_idxmax(self): Series.idxmax, axis=axis, skipna=skipna) assert_series_equal(result, expected) - self.assertRaises(Exception, frame.idxmax, axis=2) + self.assertRaises(ValueError, frame.idxmax, axis=2) def test_stale_cached_series_bug_473(self): Y = DataFrame(np.random.random((4, 4)), index=('a', 'b', 'c', 'd'), @@ -10765,7 +10743,8 @@ def wrapper(x): # comp = frame.apply(alternative, axis=1).reindex(result.index) # assert_series_equal(result, comp) - self.assertRaises(Exception, f, axis=2) + # bad axis + self.assertRaises(ValueError, f, axis=2) # make sure works on mixed-type frame mixed = self.mixed_frame diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 50d94ada7b9df..21462780e2ffd 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -10,7 +10,8 @@ from pandas.util.testing import (assert_almost_equal, assert_series_equal, - assert_frame_equal) + assert_frame_equal, + assertRaisesRegexp) import pandas.core.common as com import pandas.util.testing as tm from pandas.compat import (range, lrange, StringIO, lzip, u, cPickle, @@ -270,7 +271,8 @@ def test_frame_getitem_setitem_boolean(self): np.putmask(values[:-1], values[:-1] < 0, 2) assert_almost_equal(df.values, values) - self.assertRaises(Exception, df.__setitem__, df * 0, 2) + with assertRaisesRegexp(TypeError, 'boolean values only'): + df[df * 0] = 2 def test_frame_getitem_setitem_slice(self): # getitem @@ -427,6 +429,9 @@ def test_xs_level(self): expected = df[1:2] expected.index = expected.index.droplevel(2) assert_frame_equal(result, expected) + # can't produce a view of a multiindex with a level without copying + with assertRaisesRegexp(ValueError, 'Cannot retrieve view'): + self.frame.xs('two', level='second', copy=False) def test_xs_level_multiple(self): from pandas import read_table @@ -441,6 +446,8 @@ def test_xs_level_multiple(self): result = df.xs(('a', 4), level=['one', 'four']) expected = df.xs('a').xs(4, level='four') assert_frame_equal(result, expected) + with assertRaisesRegexp(ValueError, 'Cannot retrieve view'): + df.xs(('a', 4), level=['one', 'four'], copy=False) # GH2107 dates = lrange(20111201, 20111205) @@ -620,14 +627,14 @@ def test_getitem_partial_column_select(self): def test_sortlevel(self): df = self.frame.copy() df.index = np.arange(len(df)) - self.assertRaises(Exception, df.sortlevel, 0) + assertRaisesRegexp(TypeError, 'hierarchical index', df.sortlevel, 0) # axis=1 # series a_sorted = self.frame['A'].sortlevel(0) - self.assertRaises(Exception, - self.frame.reset_index()['A'].sortlevel) + with assertRaisesRegexp(TypeError, 'hierarchical index'): + self.frame.reset_index()['A'].sortlevel() # preserve names self.assertEquals(a_sorted.index.names, self.frame.index.names) @@ -722,7 +729,7 @@ def _check_counts(frame, axis=0): # can't call with level on regular DataFrame df = tm.makeTimeDataFrame() - self.assertRaises(Exception, df.count, level=0) + assertRaisesRegexp(TypeError, 'hierarchical', df.count, level=0) self.frame['D'] = 'foo' result = self.frame.count(level=0, numeric_only=True) @@ -1085,8 +1092,11 @@ def test_reorder_levels(self): expected = self.ymd.T.swaplevel(0, 1, axis=1).swaplevel(1, 2, axis=1) assert_frame_equal(result, expected) - self.assertRaises(Exception, self.ymd.index.reorder_levels, - [1, 2, 3]) + with assertRaisesRegexp(TypeError, 'hierarchical axis'): + self.ymd.reorder_levels([1, 2], axis=1) + + with assertRaisesRegexp(IndexError, 'Too many levels'): + self.ymd.index.reorder_levels([1, 2, 3]) def test_insert_index(self): df = self.ymd[:5].T @@ -1202,8 +1212,8 @@ def test_count(self): expect = self.series.count(level=0) assert_series_equal(result, expect) - self.assertRaises(Exception, series.count, 'x') - self.assertRaises(Exception, frame.count, level='x') + self.assertRaises(KeyError, series.count, 'x') + self.assertRaises(KeyError, frame.count, level='x') AGG_FUNCTIONS = ['sum', 'prod', 'min', 'max', 'median', 'mean', 'skew', 'mad', 'std', 'var'] @@ -1420,6 +1430,7 @@ def test_partial_ix_missing(self): # self.ymd.ix[2000, 0] = 0 # self.assert_((self.ymd.ix[2000]['A'] == 0).all()) + # Pretty sure the second (and maybe even the first) is already wrong. self.assertRaises(Exception, self.ymd.ix.__getitem__, (2000, 6)) self.assertRaises(Exception, self.ymd.ix.__getitem__, (2000, 6), 0) @@ -1774,14 +1785,6 @@ def test_indexing_over_hashtable_size_cutoff(self): _index._SIZE_CUTOFF = old_cutoff - def test_xs_mixed_no_copy(self): - index = MultiIndex.from_arrays([['a', 'a', 'b', 'b'], [1, 2, 1, 2]], - names=['first', 'second']) - data = DataFrame(np.random.rand(len(index)), index=index, - columns=['A']) - - self.assertRaises(Exception, data.xs, 2, level=1, copy=False) - def test_multiindex_na_repr(self): # only an issue with long columns diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 8ad88374f40f6..938025c450258 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -1276,7 +1276,7 @@ def test_to_panel_duplicates(self): # #2441 df = DataFrame({'a': [0, 0, 1], 'b': [1, 1, 1], 'c': [1, 2, 3]}) idf = df.set_index(['a', 'b']) - self.assertRaises(Exception, idf.to_panel) + assertRaisesRegexp(ValueError, 'non-uniquely indexed', idf.to_panel) def test_filter(self): pass diff --git a/pandas/tests/test_tests.py b/pandas/tests/test_tests.py index b52ab61f7be6b..1890c2607fc89 100644 --- a/pandas/tests/test_tests.py +++ b/pandas/tests/test_tests.py @@ -4,23 +4,32 @@ import unittest import warnings import nose +import sys -from pandas.util.testing import assert_almost_equal +from pandas.util.testing import ( + assert_almost_equal, assertRaisesRegexp, raise_with_traceback +) # let's get meta. class TestUtilTesting(unittest.TestCase): _multiprocess_can_split_ = True - def __init__(self, *args): - super(TestUtilTesting, self).__init__(*args) - - def setUp(self): - pass - - def tearDown(self): - pass - def test_assert_almost_equal(self): # don't die because values are not ndarrays assert_almost_equal(1.1,1.1,check_less_precise=True) + + def test_raise_with_traceback(self): + with assertRaisesRegexp(LookupError, "error_text"): + try: + raise ValueError("THIS IS AN ERROR") + except ValueError as e: + e = LookupError("error_text") + raise_with_traceback(e) + with assertRaisesRegexp(LookupError, "error_text"): + try: + raise ValueError("This is another error") + except ValueError: + e = LookupError("error_text") + _, _, traceback = sys.exc_info() + raise_with_traceback(e, traceback) diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index b28da7c9d7e0b..45894eb419489 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -121,6 +121,7 @@ def __init__(self, value=None, freq=None, ordinal=None, base, mult = _gfc(freq) if mult != 1: + # TODO: Better error message - this is slightly confusing raise ValueError('Only mult == 1 supported') if self.ordinal is None: diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py index b7916bd98d70f..b95ea2cacda55 100644 --- a/pandas/tseries/tests/test_period.py +++ b/pandas/tseries/tests/test_period.py @@ -26,7 +26,8 @@ randn = np.random.randn from pandas import Series, TimeSeries, DataFrame -from pandas.util.testing import assert_series_equal, assert_almost_equal +from pandas.util.testing import(assert_series_equal, assert_almost_equal, + assertRaisesRegexp) import pandas.util.testing as tm from pandas import compat from numpy.testing import assert_array_equal @@ -272,7 +273,7 @@ def _ex(p): result = p.to_timestamp('S', how='start') self.assertEquals(result, expected) - self.assertRaises(ValueError, p.to_timestamp, '5t') + assertRaisesRegexp(ValueError, 'Only mult == 1', p.to_timestamp, '5t') def test_start_time(self): freq_lst = ['A', 'Q', 'M', 'D', 'H', 'T', 'S'] @@ -1427,7 +1428,8 @@ def _get_with_delta(delta, freq='A-DEC'): self.assert_(result.columns.equals(exp_index)) # invalid axis - self.assertRaises(ValueError, df.to_timestamp, axis=2) + assertRaisesRegexp(ValueError, 'axis', df.to_timestamp, axis=2) + assertRaisesRegexp(ValueError, 'Only mult == 1', df.to_timestamp, '5t', axis=1) def test_index_duplicate_periods(self): # monotonic @@ -1886,9 +1888,8 @@ def test_align_series(self): # it works! for kind in ['inner', 'outer', 'left', 'right']: ts.align(ts[::2], join=kind) - - self.assertRaises(Exception, ts.__add__, - ts.asfreq('D', how='end')) + with assertRaisesRegexp(ValueError, 'Only like-indexed'): + ts + ts.asfreq('D', how="end") def test_align_frame(self): rng = period_range('1/1/2000', '1/1/2010', freq='A') @@ -1915,7 +1916,7 @@ def test_union(self): # raise if different frequencies index = period_range('1/1/2000', '1/20/2000', freq='D') index2 = period_range('1/1/2000', '1/20/2000', freq='W-WED') - self.assertRaises(Exception, index.union, index2) + self.assertRaises(ValueError, index.union, index2) self.assertRaises(ValueError, index.join, index.to_timestamp()) @@ -1934,7 +1935,7 @@ def test_intersection(self): # raise if different frequencies index = period_range('1/1/2000', '1/20/2000', freq='D') index2 = period_range('1/1/2000', '1/20/2000', freq='W-WED') - self.assertRaises(Exception, index.intersection, index2) + self.assertRaises(ValueError, index.intersection, index2) def test_fields(self): # year, month, day, hour, minute diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 8af88895a8b73..c652c2da3214c 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -26,7 +26,8 @@ import pandas.core.panel4d as panel4d import pandas.compat as compat from pandas.compat import( - map, zip, range, unichr, lrange, lmap, lzip, u, callable, Counter + map, zip, range, unichr, lrange, lmap, lzip, u, callable, Counter, + raise_with_traceback ) from pandas import bdate_range @@ -1031,7 +1032,7 @@ def assertRaises(_exception, _callable=None, *args, **kwargs): >>> assertRaises(TypeError, ",".join, [1, 3, 5]); """ manager = _AssertRaisesContextmanager(exception=_exception) - # don't return anything if usedin function form + # don't return anything if used in function form if _callable is not None: with manager: _callable(*args, **kwargs) @@ -1111,8 +1112,9 @@ def handle_success(self, exc_type, exc_value, traceback): if self.regexp is not None: val = str(exc_value) if not self.regexp.search(val): - raise AssertionError('"%s" does not match "%s"' % - (self.regexp.pattern, str(val))) + e = AssertionError('"%s" does not match "%s"' % + (self.regexp.pattern, str(val))) + raise_with_traceback(e, traceback) return True