diff --git a/ci/lint.sh b/ci/lint.sh index 08c3e4570f262..6b8f160fc90db 100755 --- a/ci/lint.sh +++ b/ci/lint.sh @@ -8,7 +8,7 @@ RET=0 if [ "$LINT" ]; then echo "Linting" - for path in 'core' 'indexes' 'io' 'stats' 'compat' 'sparse' 'tools' 'tseries' 'tests' 'computation' 'util' + for path in 'core' 'indexes' 'types' 'formats' 'io' 'stats' 'compat' 'sparse' 'tools' 'tseries' 'tests' 'computation' 'util' do echo "linting -> pandas/$path" flake8 pandas/$path --filename '*.py' diff --git a/pandas/computation/engines.py b/pandas/computation/engines.py index 532921035c385..a3de78c2f2089 100644 --- a/pandas/computation/engines.py +++ b/pandas/computation/engines.py @@ -7,7 +7,8 @@ from pandas import compat from pandas.compat import DeepChainMap, map -from pandas.core import common as com +import pandas.core.common as com +import pandas.formats.printing as printing from pandas.computation.align import _align, _reconstruct_object from pandas.computation.ops import (UndefinedVariableError, _mathops, _reductions) @@ -55,7 +56,7 @@ def convert(self): Defaults to return the expression as a string. """ - return com.pprint_thing(self.expr) + return printing.pprint_thing(self.expr) def evaluate(self): """Run the engine on the expression diff --git a/pandas/computation/eval.py b/pandas/computation/eval.py index c3300ffca468e..48459181f5358 100644 --- a/pandas/computation/eval.py +++ b/pandas/computation/eval.py @@ -5,7 +5,7 @@ import warnings import tokenize -from pandas.core import common as com +from pandas.formats.printing import pprint_thing from pandas.computation import _NUMEXPR_INSTALLED from pandas.computation.expr import Expr, _parsers, tokenize_string from pandas.computation.scope import _ensure_scope @@ -108,7 +108,7 @@ def _convert_expression(expr): ValueError * If the expression is empty. """ - s = com.pprint_thing(expr) + s = pprint_thing(expr) _check_expression(s) return s diff --git a/pandas/computation/expr.py b/pandas/computation/expr.py index 61a3c9991160d..01d0fa664ac41 100644 --- a/pandas/computation/expr.py +++ b/pandas/computation/expr.py @@ -11,6 +11,7 @@ from pandas.compat import StringIO, lmap, zip, reduce, string_types from pandas.core.base import StringMixin from pandas.core import common as com +import pandas.formats.printing as printing from pandas.tools.util import compose from pandas.computation.ops import (_cmp_ops_syms, _bool_ops_syms, _arith_ops_syms, _unary_ops_syms, is_term) @@ -716,7 +717,7 @@ def __call__(self): return self.terms(self.env) def __unicode__(self): - return com.pprint_thing(self.terms) + return printing.pprint_thing(self.terms) def __len__(self): return len(self.expr) diff --git a/pandas/computation/ops.py b/pandas/computation/ops.py index b80823de6de05..603c030dcaa6e 100644 --- a/pandas/computation/ops.py +++ b/pandas/computation/ops.py @@ -10,6 +10,7 @@ import pandas as pd from pandas.compat import PY3, string_types, text_type import pandas.core.common as com +from pandas.formats.printing import pprint_thing, pprint_thing_encoded import pandas.lib as lib from pandas.core.base import StringMixin from pandas.computation.common import _ensure_decoded, _result_type_many @@ -62,7 +63,7 @@ def local_name(self): return self.name.replace(_LOCAL_TAG, '') def __unicode__(self): - return com.pprint_thing(self.name) + return pprint_thing(self.name) def __call__(self, *args, **kwargs): return self.value @@ -118,9 +119,9 @@ def type(self): @property def raw(self): - return com.pprint_thing('{0}(name={1!r}, type={2})' - ''.format(self.__class__.__name__, self.name, - self.type)) + return pprint_thing('{0}(name={1!r}, type={2})' + ''.format(self.__class__.__name__, self.name, + self.type)) @property def is_datetime(self): @@ -186,9 +187,9 @@ def __unicode__(self): """Print a generic n-ary operator and its operands using infix notation""" # recurse over the operands - parened = ('({0})'.format(com.pprint_thing(opr)) + parened = ('({0})'.format(pprint_thing(opr)) for opr in self.operands) - return com.pprint_thing(' {0} '.format(self.op).join(parened)) + return pprint_thing(' {0} '.format(self.op).join(parened)) @property def return_type(self): @@ -390,10 +391,10 @@ def convert_values(self): """ def stringify(value): if self.encoding is not None: - encoder = partial(com.pprint_thing_encoded, + encoder = partial(pprint_thing_encoded, encoding=self.encoding) else: - encoder = com.pprint_thing + encoder = pprint_thing return encoder(value) lhs, rhs = self.lhs, self.rhs @@ -491,7 +492,7 @@ def __call__(self, env): return self.func(operand) def __unicode__(self): - return com.pprint_thing('{0}({1})'.format(self.op, self.operand)) + return pprint_thing('{0}({1})'.format(self.op, self.operand)) @property def return_type(self): @@ -516,7 +517,7 @@ def __call__(self, env): def __unicode__(self): operands = map(str, self.operands) - return com.pprint_thing('{0}({1})'.format(self.op, ','.join(operands))) + return pprint_thing('{0}({1})'.format(self.op, ','.join(operands))) class FuncNode(object): diff --git a/pandas/computation/pytables.py b/pandas/computation/pytables.py index 3b3a0a8ab8525..d6d55d15fec30 100644 --- a/pandas/computation/pytables.py +++ b/pandas/computation/pytables.py @@ -7,9 +7,10 @@ from datetime import datetime, timedelta import numpy as np import pandas as pd +import pandas.core.common as com from pandas.compat import u, string_types, DeepChainMap from pandas.core.base import StringMixin -import pandas.core.common as com +from pandas.formats.printing import pprint_thing, pprint_thing_encoded from pandas.computation import expr, ops from pandas.computation.ops import is_term, UndefinedVariableError from pandas.computation.expr import BaseExprVisitor @@ -169,10 +170,10 @@ def convert_value(self, v): def stringify(value): if self.encoding is not None: - encoder = partial(com.pprint_thing_encoded, + encoder = partial(pprint_thing_encoded, encoding=self.encoding) else: - encoder = com.pprint_thing + encoder = pprint_thing return encoder(value) kind = _ensure_decoded(self.kind) @@ -224,8 +225,8 @@ def convert_values(self): class FilterBinOp(BinOp): def __unicode__(self): - return com.pprint_thing("[Filter : [{0}] -> " - "[{1}]".format(self.filter[0], self.filter[1])) + return pprint_thing("[Filter : [{0}] -> " + "[{1}]".format(self.filter[0], self.filter[1])) def invert(self): """ invert the filter """ @@ -296,7 +297,7 @@ def evaluate(self): class ConditionBinOp(BinOp): def __unicode__(self): - return com.pprint_thing("[Condition : [{0}]]".format(self.condition)) + return pprint_thing("[Condition : [{0}]]".format(self.condition)) def invert(self): """ invert the condition """ @@ -571,8 +572,8 @@ def convert(v): def __unicode__(self): if self.terms is not None: - return com.pprint_thing(self.terms) - return com.pprint_thing(self.expr) + return pprint_thing(self.terms) + return pprint_thing(self.expr) def evaluate(self): """ create and return the numexpr condition and filter """ diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index de38c0c3940fd..323cbe8e93b78 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -6,7 +6,7 @@ from warnings import warn import numpy as np -from pandas import compat, lib, _np_version_under1p8 +from pandas import compat, lib, tslib, _np_version_under1p8 import pandas.core.common as com import pandas.algos as algos import pandas.hashtable as htable @@ -14,6 +14,10 @@ from pandas.tslib import iNaT +# --------------- # +# top-level algos # +# --------------- # + def match(to_match, values, na_sentinel=-1): """ Compute locations of to_match into values @@ -52,6 +56,14 @@ def match(to_match, values, na_sentinel=-1): return result +def _match_generic(values, index, table_type, type_caster): + values = type_caster(values) + index = type_caster(index) + table = table_type(min(len(index), 1000000)) + table.map_locations(index) + return table.lookup(values) + + def unique(values): """ Compute unique values (not necessarily sorted) efficiently from input array @@ -71,6 +83,13 @@ def unique(values): return _hashtable_algo(f, values.dtype) +def _unique_generic(values, table_type, type_caster): + values = type_caster(values) + table = table_type(min(len(values), 1000000)) + uniques = table.unique(values) + return type_caster(uniques) + + def isin(comps, values): """ Compute the isin boolean array @@ -120,39 +139,6 @@ def isin(comps, values): return f(comps, values) -def _hashtable_algo(f, dtype, return_dtype=None): - """ - f(HashTable, type_caster) -> result - """ - if com.is_float_dtype(dtype): - return f(htable.Float64HashTable, com._ensure_float64) - elif com.is_integer_dtype(dtype): - return f(htable.Int64HashTable, com._ensure_int64) - elif com.is_datetime64_dtype(dtype): - return_dtype = return_dtype or 'M8[ns]' - return f(htable.Int64HashTable, com._ensure_int64).view(return_dtype) - elif com.is_timedelta64_dtype(dtype): - return_dtype = return_dtype or 'm8[ns]' - return f(htable.Int64HashTable, com._ensure_int64).view(return_dtype) - else: - return f(htable.PyObjectHashTable, com._ensure_object) - - -def _match_generic(values, index, table_type, type_caster): - values = type_caster(values) - index = type_caster(index) - table = table_type(min(len(index), 1000000)) - table.map_locations(index) - return table.lookup(values) - - -def _unique_generic(values, table_type, type_caster): - values = type_caster(values) - table = table_type(min(len(values), 1000000)) - uniques = table.unique(values) - return type_caster(uniques) - - def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None): """ Encode input values as an enumerated type or categorical variable @@ -400,6 +386,18 @@ def rank(values, axis=0, method='average', na_option='keep', return ranks +_rank1d_functions = { + 'float64': algos.rank_1d_float64, + 'int64': algos.rank_1d_int64, + 'generic': algos.rank_1d_generic +} + +_rank2d_functions = { + 'float64': algos.rank_2d_float64, + 'int64': algos.rank_2d_int64, + 'generic': algos.rank_2d_generic +} + def quantile(x, q, interpolation_method='fraction'): """ @@ -482,52 +480,6 @@ def _interpolate(a, b, fraction): return a + (b - a) * fraction -def _get_data_algo(values, func_map): - if com.is_float_dtype(values): - f = func_map['float64'] - values = com._ensure_float64(values) - - elif com.needs_i8_conversion(values): - f = func_map['int64'] - values = values.view('i8') - - elif com.is_integer_dtype(values): - f = func_map['int64'] - values = com._ensure_int64(values) - else: - f = func_map['generic'] - values = com._ensure_object(values) - return f, values - - -def group_position(*args): - """ - Get group position - """ - from collections import defaultdict - table = defaultdict(int) - - result = [] - for tup in zip(*args): - result.append(table[tup]) - table[tup] += 1 - - return result - - -_dtype_map = {'datetime64[ns]': 'int64', 'timedelta64[ns]': 'int64'} - - -def _finalize_nsmallest(arr, kth_val, n, keep, narr): - ns, = np.nonzero(arr <= kth_val) - inds = ns[arr[ns].argsort(kind='mergesort')][:n] - if keep == 'last': - # reverse indices - return narr - 1 - inds - else: - return inds - - def nsmallest(arr, n, keep='first'): """ Find the indices of the n smallest values of a numpy array. @@ -601,20 +553,516 @@ def select_n(series, n, keep, method): return dropped.iloc[inds] -_rank1d_functions = { - 'float64': algos.rank_1d_float64, - 'int64': algos.rank_1d_int64, - 'generic': algos.rank_1d_generic -} +def _finalize_nsmallest(arr, kth_val, n, keep, narr): + ns, = np.nonzero(arr <= kth_val) + inds = ns[arr[ns].argsort(kind='mergesort')][:n] + if keep == 'last': + # reverse indices + return narr - 1 - inds + else: + return inds -_rank2d_functions = { - 'float64': algos.rank_2d_float64, - 'int64': algos.rank_2d_int64, - 'generic': algos.rank_2d_generic -} +_dtype_map = {'datetime64[ns]': 'int64', 'timedelta64[ns]': 'int64'} + + +# ------- # +# helpers # +# ------- # + +def _hashtable_algo(f, dtype, return_dtype=None): + """ + f(HashTable, type_caster) -> result + """ + if com.is_float_dtype(dtype): + return f(htable.Float64HashTable, com._ensure_float64) + elif com.is_integer_dtype(dtype): + return f(htable.Int64HashTable, com._ensure_int64) + elif com.is_datetime64_dtype(dtype): + return_dtype = return_dtype or 'M8[ns]' + return f(htable.Int64HashTable, com._ensure_int64).view(return_dtype) + elif com.is_timedelta64_dtype(dtype): + return_dtype = return_dtype or 'm8[ns]' + return f(htable.Int64HashTable, com._ensure_int64).view(return_dtype) + else: + return f(htable.PyObjectHashTable, com._ensure_object) _hashtables = { 'float64': (htable.Float64HashTable, htable.Float64Vector), 'int64': (htable.Int64HashTable, htable.Int64Vector), 'generic': (htable.PyObjectHashTable, htable.ObjectVector) } + + +def _get_data_algo(values, func_map): + if com.is_float_dtype(values): + f = func_map['float64'] + values = com._ensure_float64(values) + + elif com.needs_i8_conversion(values): + f = func_map['int64'] + values = values.view('i8') + + elif com.is_integer_dtype(values): + f = func_map['int64'] + values = com._ensure_int64(values) + else: + f = func_map['generic'] + values = com._ensure_object(values) + return f, values + + +# ---- # +# take # +# ---- # + + +def _view_wrapper(f, arr_dtype=None, out_dtype=None, fill_wrap=None): + def wrapper(arr, indexer, out, fill_value=np.nan): + if arr_dtype is not None: + arr = arr.view(arr_dtype) + if out_dtype is not None: + out = out.view(out_dtype) + if fill_wrap is not None: + fill_value = fill_wrap(fill_value) + f(arr, indexer, out, fill_value=fill_value) + + return wrapper + + +def _convert_wrapper(f, conv_dtype): + def wrapper(arr, indexer, out, fill_value=np.nan): + arr = arr.astype(conv_dtype) + f(arr, indexer, out, fill_value=fill_value) + + return wrapper + + +def _take_2d_multi_generic(arr, indexer, out, fill_value, mask_info): + # this is not ideal, performance-wise, but it's better than raising + # an exception (best to optimize in Cython to avoid getting here) + row_idx, col_idx = indexer + if mask_info is not None: + (row_mask, col_mask), (row_needs, col_needs) = mask_info + else: + row_mask = row_idx == -1 + col_mask = col_idx == -1 + row_needs = row_mask.any() + col_needs = col_mask.any() + if fill_value is not None: + if row_needs: + out[row_mask, :] = fill_value + if col_needs: + out[:, col_mask] = fill_value + for i in range(len(row_idx)): + u_ = row_idx[i] + for j in range(len(col_idx)): + v = col_idx[j] + out[i, j] = arr[u_, v] + + +def _take_nd_generic(arr, indexer, out, axis, fill_value, mask_info): + if mask_info is not None: + mask, needs_masking = mask_info + else: + mask = indexer == -1 + needs_masking = mask.any() + if arr.dtype != out.dtype: + arr = arr.astype(out.dtype) + if arr.shape[axis] > 0: + arr.take(com._ensure_platform_int(indexer), axis=axis, out=out) + if needs_masking: + outindexer = [slice(None)] * arr.ndim + outindexer[axis] = mask + out[tuple(outindexer)] = fill_value + + +_take_1d_dict = { + ('int8', 'int8'): algos.take_1d_int8_int8, + ('int8', 'int32'): algos.take_1d_int8_int32, + ('int8', 'int64'): algos.take_1d_int8_int64, + ('int8', 'float64'): algos.take_1d_int8_float64, + ('int16', 'int16'): algos.take_1d_int16_int16, + ('int16', 'int32'): algos.take_1d_int16_int32, + ('int16', 'int64'): algos.take_1d_int16_int64, + ('int16', 'float64'): algos.take_1d_int16_float64, + ('int32', 'int32'): algos.take_1d_int32_int32, + ('int32', 'int64'): algos.take_1d_int32_int64, + ('int32', 'float64'): algos.take_1d_int32_float64, + ('int64', 'int64'): algos.take_1d_int64_int64, + ('int64', 'float64'): algos.take_1d_int64_float64, + ('float32', 'float32'): algos.take_1d_float32_float32, + ('float32', 'float64'): algos.take_1d_float32_float64, + ('float64', 'float64'): algos.take_1d_float64_float64, + ('object', 'object'): algos.take_1d_object_object, + ('bool', 'bool'): _view_wrapper(algos.take_1d_bool_bool, np.uint8, + np.uint8), + ('bool', 'object'): _view_wrapper(algos.take_1d_bool_object, np.uint8, + None), + ('datetime64[ns]', 'datetime64[ns]'): _view_wrapper( + algos.take_1d_int64_int64, np.int64, np.int64, np.int64) +} + +_take_2d_axis0_dict = { + ('int8', 'int8'): algos.take_2d_axis0_int8_int8, + ('int8', 'int32'): algos.take_2d_axis0_int8_int32, + ('int8', 'int64'): algos.take_2d_axis0_int8_int64, + ('int8', 'float64'): algos.take_2d_axis0_int8_float64, + ('int16', 'int16'): algos.take_2d_axis0_int16_int16, + ('int16', 'int32'): algos.take_2d_axis0_int16_int32, + ('int16', 'int64'): algos.take_2d_axis0_int16_int64, + ('int16', 'float64'): algos.take_2d_axis0_int16_float64, + ('int32', 'int32'): algos.take_2d_axis0_int32_int32, + ('int32', 'int64'): algos.take_2d_axis0_int32_int64, + ('int32', 'float64'): algos.take_2d_axis0_int32_float64, + ('int64', 'int64'): algos.take_2d_axis0_int64_int64, + ('int64', 'float64'): algos.take_2d_axis0_int64_float64, + ('float32', 'float32'): algos.take_2d_axis0_float32_float32, + ('float32', 'float64'): algos.take_2d_axis0_float32_float64, + ('float64', 'float64'): algos.take_2d_axis0_float64_float64, + ('object', 'object'): algos.take_2d_axis0_object_object, + ('bool', 'bool'): _view_wrapper(algos.take_2d_axis0_bool_bool, np.uint8, + np.uint8), + ('bool', 'object'): _view_wrapper(algos.take_2d_axis0_bool_object, + np.uint8, None), + ('datetime64[ns]', 'datetime64[ns]'): + _view_wrapper(algos.take_2d_axis0_int64_int64, np.int64, np.int64, + fill_wrap=np.int64) +} + +_take_2d_axis1_dict = { + ('int8', 'int8'): algos.take_2d_axis1_int8_int8, + ('int8', 'int32'): algos.take_2d_axis1_int8_int32, + ('int8', 'int64'): algos.take_2d_axis1_int8_int64, + ('int8', 'float64'): algos.take_2d_axis1_int8_float64, + ('int16', 'int16'): algos.take_2d_axis1_int16_int16, + ('int16', 'int32'): algos.take_2d_axis1_int16_int32, + ('int16', 'int64'): algos.take_2d_axis1_int16_int64, + ('int16', 'float64'): algos.take_2d_axis1_int16_float64, + ('int32', 'int32'): algos.take_2d_axis1_int32_int32, + ('int32', 'int64'): algos.take_2d_axis1_int32_int64, + ('int32', 'float64'): algos.take_2d_axis1_int32_float64, + ('int64', 'int64'): algos.take_2d_axis1_int64_int64, + ('int64', 'float64'): algos.take_2d_axis1_int64_float64, + ('float32', 'float32'): algos.take_2d_axis1_float32_float32, + ('float32', 'float64'): algos.take_2d_axis1_float32_float64, + ('float64', 'float64'): algos.take_2d_axis1_float64_float64, + ('object', 'object'): algos.take_2d_axis1_object_object, + ('bool', 'bool'): _view_wrapper(algos.take_2d_axis1_bool_bool, np.uint8, + np.uint8), + ('bool', 'object'): _view_wrapper(algos.take_2d_axis1_bool_object, + np.uint8, None), + ('datetime64[ns]', 'datetime64[ns]'): + _view_wrapper(algos.take_2d_axis1_int64_int64, np.int64, np.int64, + fill_wrap=np.int64) +} + +_take_2d_multi_dict = { + ('int8', 'int8'): algos.take_2d_multi_int8_int8, + ('int8', 'int32'): algos.take_2d_multi_int8_int32, + ('int8', 'int64'): algos.take_2d_multi_int8_int64, + ('int8', 'float64'): algos.take_2d_multi_int8_float64, + ('int16', 'int16'): algos.take_2d_multi_int16_int16, + ('int16', 'int32'): algos.take_2d_multi_int16_int32, + ('int16', 'int64'): algos.take_2d_multi_int16_int64, + ('int16', 'float64'): algos.take_2d_multi_int16_float64, + ('int32', 'int32'): algos.take_2d_multi_int32_int32, + ('int32', 'int64'): algos.take_2d_multi_int32_int64, + ('int32', 'float64'): algos.take_2d_multi_int32_float64, + ('int64', 'int64'): algos.take_2d_multi_int64_int64, + ('int64', 'float64'): algos.take_2d_multi_int64_float64, + ('float32', 'float32'): algos.take_2d_multi_float32_float32, + ('float32', 'float64'): algos.take_2d_multi_float32_float64, + ('float64', 'float64'): algos.take_2d_multi_float64_float64, + ('object', 'object'): algos.take_2d_multi_object_object, + ('bool', 'bool'): _view_wrapper(algos.take_2d_multi_bool_bool, np.uint8, + np.uint8), + ('bool', 'object'): _view_wrapper(algos.take_2d_multi_bool_object, + np.uint8, None), + ('datetime64[ns]', 'datetime64[ns]'): + _view_wrapper(algos.take_2d_multi_int64_int64, np.int64, np.int64, + fill_wrap=np.int64) +} + + +def _get_take_nd_function(ndim, arr_dtype, out_dtype, axis=0, mask_info=None): + if ndim <= 2: + tup = (arr_dtype.name, out_dtype.name) + if ndim == 1: + func = _take_1d_dict.get(tup, None) + elif ndim == 2: + if axis == 0: + func = _take_2d_axis0_dict.get(tup, None) + else: + func = _take_2d_axis1_dict.get(tup, None) + if func is not None: + return func + + tup = (out_dtype.name, out_dtype.name) + if ndim == 1: + func = _take_1d_dict.get(tup, None) + elif ndim == 2: + if axis == 0: + func = _take_2d_axis0_dict.get(tup, None) + else: + func = _take_2d_axis1_dict.get(tup, None) + if func is not None: + func = _convert_wrapper(func, out_dtype) + return func + + def func(arr, indexer, out, fill_value=np.nan): + indexer = com._ensure_int64(indexer) + _take_nd_generic(arr, indexer, out, axis=axis, fill_value=fill_value, + mask_info=mask_info) + + return func + + +def take_nd(arr, indexer, axis=0, out=None, fill_value=np.nan, mask_info=None, + allow_fill=True): + """ + Specialized Cython take which sets NaN values in one pass + + Parameters + ---------- + arr : ndarray + Input array + indexer : ndarray + 1-D array of indices to take, subarrays corresponding to -1 value + indicies are filed with fill_value + axis : int, default 0 + Axis to take from + out : ndarray or None, default None + Optional output array, must be appropriate type to hold input and + fill_value together, if indexer has any -1 value entries; call + common._maybe_promote to determine this type for any fill_value + fill_value : any, default np.nan + Fill value to replace -1 values with + mask_info : tuple of (ndarray, boolean) + If provided, value should correspond to: + (indexer != -1, (indexer != -1).any()) + If not provided, it will be computed internally if necessary + allow_fill : boolean, default True + If False, indexer is assumed to contain no -1 values so no filling + will be done. This short-circuits computation of a mask. Result is + undefined if allow_fill == False and -1 is present in indexer. + """ + + # dispatch to internal type takes + if com.is_categorical(arr): + return arr.take_nd(indexer, fill_value=fill_value, + allow_fill=allow_fill) + elif com.is_datetimetz(arr): + return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill) + + if indexer is None: + indexer = np.arange(arr.shape[axis], dtype=np.int64) + dtype, fill_value = arr.dtype, arr.dtype.type() + else: + indexer = com._ensure_int64(indexer) + if not allow_fill: + dtype, fill_value = arr.dtype, arr.dtype.type() + mask_info = None, False + else: + # check for promotion based on types only (do this first because + # it's faster than computing a mask) + dtype, fill_value = com._maybe_promote(arr.dtype, fill_value) + if dtype != arr.dtype and (out is None or out.dtype != dtype): + # check if promotion is actually required based on indexer + if mask_info is not None: + mask, needs_masking = mask_info + else: + mask = indexer == -1 + needs_masking = mask.any() + mask_info = mask, needs_masking + if needs_masking: + if out is not None and out.dtype != dtype: + raise TypeError('Incompatible type for fill_value') + else: + # if not, then depromote, set fill_value to dummy + # (it won't be used but we don't want the cython code + # to crash when trying to cast it to dtype) + dtype, fill_value = arr.dtype, arr.dtype.type() + + flip_order = False + if arr.ndim == 2: + if arr.flags.f_contiguous: + flip_order = True + + if flip_order: + arr = arr.T + axis = arr.ndim - axis - 1 + if out is not None: + out = out.T + + # at this point, it's guaranteed that dtype can hold both the arr values + # and the fill_value + if out is None: + out_shape = list(arr.shape) + out_shape[axis] = len(indexer) + out_shape = tuple(out_shape) + if arr.flags.f_contiguous and axis == arr.ndim - 1: + # minor tweak that can make an order-of-magnitude difference + # for dataframes initialized directly from 2-d ndarrays + # (s.t. df.values is c-contiguous and df._data.blocks[0] is its + # f-contiguous transpose) + out = np.empty(out_shape, dtype=dtype, order='F') + else: + out = np.empty(out_shape, dtype=dtype) + + func = _get_take_nd_function(arr.ndim, arr.dtype, out.dtype, axis=axis, + mask_info=mask_info) + indexer = com._ensure_int64(indexer) + func(arr, indexer, out, fill_value) + + if flip_order: + out = out.T + return out + + +take_1d = take_nd + + +def take_2d_multi(arr, indexer, out=None, fill_value=np.nan, mask_info=None, + allow_fill=True): + """ + Specialized Cython take which sets NaN values in one pass + """ + if indexer is None or (indexer[0] is None and indexer[1] is None): + row_idx = np.arange(arr.shape[0], dtype=np.int64) + col_idx = np.arange(arr.shape[1], dtype=np.int64) + indexer = row_idx, col_idx + dtype, fill_value = arr.dtype, arr.dtype.type() + else: + row_idx, col_idx = indexer + if row_idx is None: + row_idx = np.arange(arr.shape[0], dtype=np.int64) + else: + row_idx = com._ensure_int64(row_idx) + if col_idx is None: + col_idx = np.arange(arr.shape[1], dtype=np.int64) + else: + col_idx = com._ensure_int64(col_idx) + indexer = row_idx, col_idx + if not allow_fill: + dtype, fill_value = arr.dtype, arr.dtype.type() + mask_info = None, False + else: + # check for promotion based on types only (do this first because + # it's faster than computing a mask) + dtype, fill_value = com._maybe_promote(arr.dtype, fill_value) + if dtype != arr.dtype and (out is None or out.dtype != dtype): + # check if promotion is actually required based on indexer + if mask_info is not None: + (row_mask, col_mask), (row_needs, col_needs) = mask_info + else: + row_mask = row_idx == -1 + col_mask = col_idx == -1 + row_needs = row_mask.any() + col_needs = col_mask.any() + mask_info = (row_mask, col_mask), (row_needs, col_needs) + if row_needs or col_needs: + if out is not None and out.dtype != dtype: + raise TypeError('Incompatible type for fill_value') + else: + # if not, then depromote, set fill_value to dummy + # (it won't be used but we don't want the cython code + # to crash when trying to cast it to dtype) + dtype, fill_value = arr.dtype, arr.dtype.type() + + # at this point, it's guaranteed that dtype can hold both the arr values + # and the fill_value + if out is None: + out_shape = len(row_idx), len(col_idx) + out = np.empty(out_shape, dtype=dtype) + + func = _take_2d_multi_dict.get((arr.dtype.name, out.dtype.name), None) + if func is None and arr.dtype != out.dtype: + func = _take_2d_multi_dict.get((out.dtype.name, out.dtype.name), None) + if func is not None: + func = _convert_wrapper(func, out.dtype) + if func is None: + + def func(arr, indexer, out, fill_value=np.nan): + _take_2d_multi_generic(arr, indexer, out, fill_value=fill_value, + mask_info=mask_info) + + func(arr, indexer, out=out, fill_value=fill_value) + return out + + +# ---- # +# diff # +# ---- # + +_diff_special = { + 'float64': algos.diff_2d_float64, + 'float32': algos.diff_2d_float32, + 'int64': algos.diff_2d_int64, + 'int32': algos.diff_2d_int32, + 'int16': algos.diff_2d_int16, + 'int8': algos.diff_2d_int8, +} + + +def diff(arr, n, axis=0): + """ difference of n between self, + analagoust to s-s.shift(n) """ + + n = int(n) + na = np.nan + dtype = arr.dtype + is_timedelta = False + if com.needs_i8_conversion(arr): + dtype = np.float64 + arr = arr.view('i8') + na = tslib.iNaT + is_timedelta = True + elif issubclass(dtype.type, np.integer): + dtype = np.float64 + elif issubclass(dtype.type, np.bool_): + dtype = np.object_ + + dtype = np.dtype(dtype) + out_arr = np.empty(arr.shape, dtype=dtype) + + na_indexer = [slice(None)] * arr.ndim + na_indexer[axis] = slice(None, n) if n >= 0 else slice(n, None) + out_arr[tuple(na_indexer)] = na + + if arr.ndim == 2 and arr.dtype.name in _diff_special: + f = _diff_special[arr.dtype.name] + f(arr, out_arr, n, axis) + else: + res_indexer = [slice(None)] * arr.ndim + res_indexer[axis] = slice(n, None) if n >= 0 else slice(None, n) + res_indexer = tuple(res_indexer) + + lag_indexer = [slice(None)] * arr.ndim + lag_indexer[axis] = slice(None, -n) if n > 0 else slice(-n, None) + lag_indexer = tuple(lag_indexer) + + # need to make sure that we account for na for datelike/timedelta + # we don't actually want to subtract these i8 numbers + if is_timedelta: + res = arr[res_indexer] + lag = arr[lag_indexer] + + mask = (arr[res_indexer] == na) | (arr[lag_indexer] == na) + if mask.any(): + res = res.copy() + res[mask] = 0 + lag = lag.copy() + lag[mask] = 0 + + result = res - lag + result[mask] = na + out_arr[res_indexer] = result + else: + out_arr[res_indexer] = arr[res_indexer] - arr[lag_indexer] + + if is_timedelta: + from pandas import TimedeltaIndex + out_arr = TimedeltaIndex(out_arr.ravel().astype('int64')).asi8.reshape( + out_arr.shape).astype('timedelta64[ns]') + + return out_arr diff --git a/pandas/core/api.py b/pandas/core/api.py index 1d9a07eca5f03..0a6992bfebd70 100644 --- a/pandas/core/api.py +++ b/pandas/core/api.py @@ -8,7 +8,7 @@ from pandas.core.common import isnull, notnull from pandas.core.categorical import Categorical from pandas.core.groupby import Grouper -from pandas.core.format import set_eng_float_format +from pandas.formats.format import set_eng_float_format from pandas.core.index import (Index, CategoricalIndex, Int64Index, RangeIndex, Float64Index, MultiIndex) diff --git a/pandas/core/base.py b/pandas/core/base.py index 3ebd60d45b48d..e14cdd88b50f7 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -10,6 +10,7 @@ from pandas.util.decorators import (Appender, cache_readonly, deprecate_kwarg, Substitution) from pandas.core.common import AbstractMethodError +from pandas.formats.printing import pprint_thing _shared_docs = dict() _indexops_doc_kwargs = dict(klass='IndexOpsMixin', inplace='', @@ -680,7 +681,6 @@ def _disabled(self, *args, **kwargs): self.__class__.__name__) def __unicode__(self): - from pandas.core.common import pprint_thing return pprint_thing(self, quote_strings=True, escape_chars=('\t', '\r', '\n')) @@ -724,8 +724,8 @@ def __unicode__(self): Invoked by unicode(df) in py2 only. Yields a Unicode String in both py2/py3. """ - prepr = com.pprint_thing(self, escape_chars=('\t', '\r', '\n'), - quote_strings=True) + prepr = pprint_thing(self, escape_chars=('\t', '\r', '\n'), + quote_strings=True) return "%s(%s, dtype='%s')" % (type(self).__name__, prepr, self.dtype) diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 69c1adbfae574..bf5fbb95dbfaa 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -7,7 +7,7 @@ from pandas import compat, lib from pandas.compat import u -from pandas.core.algorithms import factorize +from pandas.core.algorithms import factorize, take_1d from pandas.core.base import (PandasObject, PandasDelegate, NoNewAttributesMixin, _shared_docs) import pandas.core.common as com @@ -20,8 +20,8 @@ is_dtype_equal, is_categorical_dtype, is_integer_dtype, _possibly_infer_to_datetimelike, get_dtype_kinds, is_list_like, is_sequence, is_null_slice, is_bool, _ensure_object, _ensure_int64, - _coerce_indexer_dtype, take_1d) -from pandas.core.dtypes import CategoricalDtype + _coerce_indexer_dtype) +from pandas.types.api import CategoricalDtype from pandas.util.terminal import get_terminal_size from pandas.core.config import get_option @@ -1433,7 +1433,7 @@ def _repr_categories(self): """ return the base repr for the categories """ max_categories = (10 if get_option("display.max_categories") == 0 else get_option("display.max_categories")) - from pandas.core import format as fmt + from pandas.formats import format as fmt if len(self.categories) > max_categories: num = max_categories // 2 head = fmt.format_array(self.categories[:num], None) @@ -1481,7 +1481,7 @@ def _repr_footer(self): return u('Length: %d\n%s') % (len(self), self._repr_categories_info()) def _get_repr(self, length=True, na_rep='NaN', footer=True): - from pandas.core import format as fmt + from pandas.formats import format as fmt formatter = fmt.CategoricalFormatter(self, length=length, na_rep=na_rep, footer=footer) result = formatter.to_string() diff --git a/pandas/core/common.py b/pandas/core/common.py index 4275870cb8543..dc2ee31bbaf3d 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -8,18 +8,16 @@ from datetime import datetime, timedelta from functools import partial -from numpy.lib.format import read_array, write_array import numpy as np - import pandas as pd import pandas.algos as algos import pandas.lib as lib import pandas.tslib as tslib from pandas import compat -from pandas.compat import (BytesIO, range, long, u, zip, map, string_types, +from pandas.compat import (long, zip, map, string_types, iteritems) -from pandas.core.dtypes import (CategoricalDtype, CategoricalDtypeType, - DatetimeTZDtype, DatetimeTZDtypeType) +from pandas.types import api as gt +from pandas.types.api import * # noqa from pandas.core.config import get_option @@ -72,63 +70,6 @@ def __str__(self): _int64_max = np.iinfo(np.int64).max -# define abstract base classes to enable isinstance type checking on our -# objects -def create_pandas_abc_type(name, attr, comp): - @classmethod - def _check(cls, inst): - return getattr(inst, attr, '_typ') in comp - - dct = dict(__instancecheck__=_check, __subclasscheck__=_check) - meta = type("ABCBase", (type, ), dct) - return meta(name, tuple(), dct) - - -ABCIndex = create_pandas_abc_type("ABCIndex", "_typ", ("index", )) -ABCInt64Index = create_pandas_abc_type("ABCInt64Index", "_typ", - ("int64index", )) -ABCRangeIndex = create_pandas_abc_type("ABCRangeIndex", "_typ", - ("rangeindex", )) -ABCFloat64Index = create_pandas_abc_type("ABCFloat64Index", "_typ", - ("float64index", )) -ABCMultiIndex = create_pandas_abc_type("ABCMultiIndex", "_typ", - ("multiindex", )) -ABCDatetimeIndex = create_pandas_abc_type("ABCDatetimeIndex", "_typ", - ("datetimeindex", )) -ABCTimedeltaIndex = create_pandas_abc_type("ABCTimedeltaIndex", "_typ", - ("timedeltaindex", )) -ABCPeriodIndex = create_pandas_abc_type("ABCPeriodIndex", "_typ", - ("periodindex", )) -ABCCategoricalIndex = create_pandas_abc_type("ABCCategoricalIndex", "_typ", - ("categoricalindex", )) -ABCIndexClass = create_pandas_abc_type("ABCIndexClass", "_typ", - ("index", "int64index", "rangeindex", - "float64index", - "multiindex", "datetimeindex", - "timedeltaindex", "periodindex", - "categoricalindex")) - -ABCSeries = create_pandas_abc_type("ABCSeries", "_typ", ("series", )) -ABCDataFrame = create_pandas_abc_type("ABCDataFrame", "_typ", ("dataframe", )) -ABCPanel = create_pandas_abc_type("ABCPanel", "_typ", ("panel", )) -ABCSparseSeries = create_pandas_abc_type("ABCSparseSeries", "_subtyp", - ('sparse_series', - 'sparse_time_series')) -ABCSparseArray = create_pandas_abc_type("ABCSparseArray", "_subtyp", - ('sparse_array', 'sparse_series')) -ABCCategorical = create_pandas_abc_type("ABCCategorical", "_typ", - ("categorical")) -ABCPeriod = create_pandas_abc_type("ABCPeriod", "_typ", ("period", )) - - -class _ABCGeneric(type): - def __instancecheck__(cls, inst): - return hasattr(inst, "_data") - - -ABCGeneric = _ABCGeneric("ABCGeneric", tuple(), {}) - - def isnull(obj): """Detect missing values (NaN in numeric arrays, None/NaN in object arrays) @@ -156,9 +97,9 @@ def _isnull_new(obj): # hack (for now) because MI registers as ndarray elif isinstance(obj, pd.MultiIndex): raise NotImplementedError("isnull is not defined for MultiIndex") - elif isinstance(obj, (ABCSeries, np.ndarray, pd.Index)): + elif isinstance(obj, (gt.ABCSeries, np.ndarray, pd.Index)): return _isnull_ndarraylike(obj) - elif isinstance(obj, ABCGeneric): + elif isinstance(obj, gt.ABCGeneric): return obj._constructor(obj._data.isnull(func=isnull)) elif isinstance(obj, list) or hasattr(obj, '__array__'): return _isnull_ndarraylike(np.asarray(obj)) @@ -182,9 +123,9 @@ def _isnull_old(obj): # hack (for now) because MI registers as ndarray elif isinstance(obj, pd.MultiIndex): raise NotImplementedError("isnull is not defined for MultiIndex") - elif isinstance(obj, (ABCSeries, np.ndarray, pd.Index)): + elif isinstance(obj, (gt.ABCSeries, np.ndarray, pd.Index)): return _isnull_ndarraylike_old(obj) - elif isinstance(obj, ABCGeneric): + elif isinstance(obj, gt.ABCGeneric): return obj._constructor(obj._data.isnull(func=_isnull_old)) elif isinstance(obj, list) or hasattr(obj, '__array__'): return _isnull_ndarraylike_old(np.asarray(obj)) @@ -251,7 +192,7 @@ def _isnull_ndarraylike(obj): result = np.isnan(values) # box - if isinstance(obj, ABCSeries): + if isinstance(obj, gt.ABCSeries): from pandas import Series result = Series(result, index=obj.index, name=obj.name, copy=False) @@ -280,7 +221,7 @@ def _isnull_ndarraylike_old(obj): result = ~np.isfinite(values) # box - if isinstance(obj, ABCSeries): + if isinstance(obj, gt.ABCSeries): from pandas import Series result = Series(result, index=obj.index, name=obj.name, copy=False) @@ -435,522 +376,6 @@ def flatten(l): yield el -def mask_missing(arr, values_to_mask): - """ - Return a masking array of same size/shape as arr - with entries equaling any member of values_to_mask set to True - """ - if not isinstance(values_to_mask, (list, np.ndarray)): - values_to_mask = [values_to_mask] - - try: - values_to_mask = np.array(values_to_mask, dtype=arr.dtype) - except Exception: - values_to_mask = np.array(values_to_mask, dtype=object) - - na_mask = isnull(values_to_mask) - nonna = values_to_mask[~na_mask] - - mask = None - for x in nonna: - if mask is None: - - # numpy elementwise comparison warning - if is_numeric_v_string_like(arr, x): - mask = False - else: - mask = arr == x - - # if x is a string and arr is not, then we get False and we must - # expand the mask to size arr.shape - if lib.isscalar(mask): - mask = np.zeros(arr.shape, dtype=bool) - else: - - # numpy elementwise comparison warning - if is_numeric_v_string_like(arr, x): - mask |= False - else: - mask |= arr == x - - if na_mask.any(): - if mask is None: - mask = isnull(arr) - else: - mask |= isnull(arr) - - return mask - - -def _pickle_array(arr): - arr = arr.view(np.ndarray) - - buf = BytesIO() - write_array(buf, arr) - - return buf.getvalue() - - -def _unpickle_array(bytes): - arr = read_array(BytesIO(bytes)) - - # All datetimes should be stored as M8[ns]. When unpickling with - # numpy1.6, it will read these as M8[us]. So this ensures all - # datetime64 types are read as MS[ns] - if is_datetime64_dtype(arr): - arr = arr.view(_NS_DTYPE) - - return arr - - -def _view_wrapper(f, arr_dtype=None, out_dtype=None, fill_wrap=None): - def wrapper(arr, indexer, out, fill_value=np.nan): - if arr_dtype is not None: - arr = arr.view(arr_dtype) - if out_dtype is not None: - out = out.view(out_dtype) - if fill_wrap is not None: - fill_value = fill_wrap(fill_value) - f(arr, indexer, out, fill_value=fill_value) - - return wrapper - - -def _convert_wrapper(f, conv_dtype): - def wrapper(arr, indexer, out, fill_value=np.nan): - arr = arr.astype(conv_dtype) - f(arr, indexer, out, fill_value=fill_value) - - return wrapper - - -def _take_2d_multi_generic(arr, indexer, out, fill_value, mask_info): - # this is not ideal, performance-wise, but it's better than raising - # an exception (best to optimize in Cython to avoid getting here) - row_idx, col_idx = indexer - if mask_info is not None: - (row_mask, col_mask), (row_needs, col_needs) = mask_info - else: - row_mask = row_idx == -1 - col_mask = col_idx == -1 - row_needs = row_mask.any() - col_needs = col_mask.any() - if fill_value is not None: - if row_needs: - out[row_mask, :] = fill_value - if col_needs: - out[:, col_mask] = fill_value - for i in range(len(row_idx)): - u_ = row_idx[i] - for j in range(len(col_idx)): - v = col_idx[j] - out[i, j] = arr[u_, v] - - -def _take_nd_generic(arr, indexer, out, axis, fill_value, mask_info): - if mask_info is not None: - mask, needs_masking = mask_info - else: - mask = indexer == -1 - needs_masking = mask.any() - if arr.dtype != out.dtype: - arr = arr.astype(out.dtype) - if arr.shape[axis] > 0: - arr.take(_ensure_platform_int(indexer), axis=axis, out=out) - if needs_masking: - outindexer = [slice(None)] * arr.ndim - outindexer[axis] = mask - out[tuple(outindexer)] = fill_value - - -_take_1d_dict = { - ('int8', 'int8'): algos.take_1d_int8_int8, - ('int8', 'int32'): algos.take_1d_int8_int32, - ('int8', 'int64'): algos.take_1d_int8_int64, - ('int8', 'float64'): algos.take_1d_int8_float64, - ('int16', 'int16'): algos.take_1d_int16_int16, - ('int16', 'int32'): algos.take_1d_int16_int32, - ('int16', 'int64'): algos.take_1d_int16_int64, - ('int16', 'float64'): algos.take_1d_int16_float64, - ('int32', 'int32'): algos.take_1d_int32_int32, - ('int32', 'int64'): algos.take_1d_int32_int64, - ('int32', 'float64'): algos.take_1d_int32_float64, - ('int64', 'int64'): algos.take_1d_int64_int64, - ('int64', 'float64'): algos.take_1d_int64_float64, - ('float32', 'float32'): algos.take_1d_float32_float32, - ('float32', 'float64'): algos.take_1d_float32_float64, - ('float64', 'float64'): algos.take_1d_float64_float64, - ('object', 'object'): algos.take_1d_object_object, - ('bool', 'bool'): _view_wrapper(algos.take_1d_bool_bool, np.uint8, - np.uint8), - ('bool', 'object'): _view_wrapper(algos.take_1d_bool_object, np.uint8, - None), - ('datetime64[ns]', 'datetime64[ns]'): _view_wrapper( - algos.take_1d_int64_int64, np.int64, np.int64, np.int64) -} - -_take_2d_axis0_dict = { - ('int8', 'int8'): algos.take_2d_axis0_int8_int8, - ('int8', 'int32'): algos.take_2d_axis0_int8_int32, - ('int8', 'int64'): algos.take_2d_axis0_int8_int64, - ('int8', 'float64'): algos.take_2d_axis0_int8_float64, - ('int16', 'int16'): algos.take_2d_axis0_int16_int16, - ('int16', 'int32'): algos.take_2d_axis0_int16_int32, - ('int16', 'int64'): algos.take_2d_axis0_int16_int64, - ('int16', 'float64'): algos.take_2d_axis0_int16_float64, - ('int32', 'int32'): algos.take_2d_axis0_int32_int32, - ('int32', 'int64'): algos.take_2d_axis0_int32_int64, - ('int32', 'float64'): algos.take_2d_axis0_int32_float64, - ('int64', 'int64'): algos.take_2d_axis0_int64_int64, - ('int64', 'float64'): algos.take_2d_axis0_int64_float64, - ('float32', 'float32'): algos.take_2d_axis0_float32_float32, - ('float32', 'float64'): algos.take_2d_axis0_float32_float64, - ('float64', 'float64'): algos.take_2d_axis0_float64_float64, - ('object', 'object'): algos.take_2d_axis0_object_object, - ('bool', 'bool'): _view_wrapper(algos.take_2d_axis0_bool_bool, np.uint8, - np.uint8), - ('bool', 'object'): _view_wrapper(algos.take_2d_axis0_bool_object, - np.uint8, None), - ('datetime64[ns]', 'datetime64[ns]'): - _view_wrapper(algos.take_2d_axis0_int64_int64, np.int64, np.int64, - fill_wrap=np.int64) -} - -_take_2d_axis1_dict = { - ('int8', 'int8'): algos.take_2d_axis1_int8_int8, - ('int8', 'int32'): algos.take_2d_axis1_int8_int32, - ('int8', 'int64'): algos.take_2d_axis1_int8_int64, - ('int8', 'float64'): algos.take_2d_axis1_int8_float64, - ('int16', 'int16'): algos.take_2d_axis1_int16_int16, - ('int16', 'int32'): algos.take_2d_axis1_int16_int32, - ('int16', 'int64'): algos.take_2d_axis1_int16_int64, - ('int16', 'float64'): algos.take_2d_axis1_int16_float64, - ('int32', 'int32'): algos.take_2d_axis1_int32_int32, - ('int32', 'int64'): algos.take_2d_axis1_int32_int64, - ('int32', 'float64'): algos.take_2d_axis1_int32_float64, - ('int64', 'int64'): algos.take_2d_axis1_int64_int64, - ('int64', 'float64'): algos.take_2d_axis1_int64_float64, - ('float32', 'float32'): algos.take_2d_axis1_float32_float32, - ('float32', 'float64'): algos.take_2d_axis1_float32_float64, - ('float64', 'float64'): algos.take_2d_axis1_float64_float64, - ('object', 'object'): algos.take_2d_axis1_object_object, - ('bool', 'bool'): _view_wrapper(algos.take_2d_axis1_bool_bool, np.uint8, - np.uint8), - ('bool', 'object'): _view_wrapper(algos.take_2d_axis1_bool_object, - np.uint8, None), - ('datetime64[ns]', 'datetime64[ns]'): - _view_wrapper(algos.take_2d_axis1_int64_int64, np.int64, np.int64, - fill_wrap=np.int64) -} - -_take_2d_multi_dict = { - ('int8', 'int8'): algos.take_2d_multi_int8_int8, - ('int8', 'int32'): algos.take_2d_multi_int8_int32, - ('int8', 'int64'): algos.take_2d_multi_int8_int64, - ('int8', 'float64'): algos.take_2d_multi_int8_float64, - ('int16', 'int16'): algos.take_2d_multi_int16_int16, - ('int16', 'int32'): algos.take_2d_multi_int16_int32, - ('int16', 'int64'): algos.take_2d_multi_int16_int64, - ('int16', 'float64'): algos.take_2d_multi_int16_float64, - ('int32', 'int32'): algos.take_2d_multi_int32_int32, - ('int32', 'int64'): algos.take_2d_multi_int32_int64, - ('int32', 'float64'): algos.take_2d_multi_int32_float64, - ('int64', 'int64'): algos.take_2d_multi_int64_int64, - ('int64', 'float64'): algos.take_2d_multi_int64_float64, - ('float32', 'float32'): algos.take_2d_multi_float32_float32, - ('float32', 'float64'): algos.take_2d_multi_float32_float64, - ('float64', 'float64'): algos.take_2d_multi_float64_float64, - ('object', 'object'): algos.take_2d_multi_object_object, - ('bool', 'bool'): _view_wrapper(algos.take_2d_multi_bool_bool, np.uint8, - np.uint8), - ('bool', 'object'): _view_wrapper(algos.take_2d_multi_bool_object, - np.uint8, None), - ('datetime64[ns]', 'datetime64[ns]'): - _view_wrapper(algos.take_2d_multi_int64_int64, np.int64, np.int64, - fill_wrap=np.int64) -} - - -def _get_take_nd_function(ndim, arr_dtype, out_dtype, axis=0, mask_info=None): - if ndim <= 2: - tup = (arr_dtype.name, out_dtype.name) - if ndim == 1: - func = _take_1d_dict.get(tup, None) - elif ndim == 2: - if axis == 0: - func = _take_2d_axis0_dict.get(tup, None) - else: - func = _take_2d_axis1_dict.get(tup, None) - if func is not None: - return func - - tup = (out_dtype.name, out_dtype.name) - if ndim == 1: - func = _take_1d_dict.get(tup, None) - elif ndim == 2: - if axis == 0: - func = _take_2d_axis0_dict.get(tup, None) - else: - func = _take_2d_axis1_dict.get(tup, None) - if func is not None: - func = _convert_wrapper(func, out_dtype) - return func - - def func(arr, indexer, out, fill_value=np.nan): - indexer = _ensure_int64(indexer) - _take_nd_generic(arr, indexer, out, axis=axis, fill_value=fill_value, - mask_info=mask_info) - - return func - - -def take_nd(arr, indexer, axis=0, out=None, fill_value=np.nan, mask_info=None, - allow_fill=True): - """ - Specialized Cython take which sets NaN values in one pass - - Parameters - ---------- - arr : ndarray - Input array - indexer : ndarray - 1-D array of indices to take, subarrays corresponding to -1 value - indicies are filed with fill_value - axis : int, default 0 - Axis to take from - out : ndarray or None, default None - Optional output array, must be appropriate type to hold input and - fill_value together, if indexer has any -1 value entries; call - common._maybe_promote to determine this type for any fill_value - fill_value : any, default np.nan - Fill value to replace -1 values with - mask_info : tuple of (ndarray, boolean) - If provided, value should correspond to: - (indexer != -1, (indexer != -1).any()) - If not provided, it will be computed internally if necessary - allow_fill : boolean, default True - If False, indexer is assumed to contain no -1 values so no filling - will be done. This short-circuits computation of a mask. Result is - undefined if allow_fill == False and -1 is present in indexer. - """ - - # dispatch to internal type takes - if is_categorical(arr): - return arr.take_nd(indexer, fill_value=fill_value, - allow_fill=allow_fill) - elif is_datetimetz(arr): - return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill) - - if indexer is None: - indexer = np.arange(arr.shape[axis], dtype=np.int64) - dtype, fill_value = arr.dtype, arr.dtype.type() - else: - indexer = _ensure_int64(indexer) - if not allow_fill: - dtype, fill_value = arr.dtype, arr.dtype.type() - mask_info = None, False - else: - # check for promotion based on types only (do this first because - # it's faster than computing a mask) - dtype, fill_value = _maybe_promote(arr.dtype, fill_value) - if dtype != arr.dtype and (out is None or out.dtype != dtype): - # check if promotion is actually required based on indexer - if mask_info is not None: - mask, needs_masking = mask_info - else: - mask = indexer == -1 - needs_masking = mask.any() - mask_info = mask, needs_masking - if needs_masking: - if out is not None and out.dtype != dtype: - raise TypeError('Incompatible type for fill_value') - else: - # if not, then depromote, set fill_value to dummy - # (it won't be used but we don't want the cython code - # to crash when trying to cast it to dtype) - dtype, fill_value = arr.dtype, arr.dtype.type() - - flip_order = False - if arr.ndim == 2: - if arr.flags.f_contiguous: - flip_order = True - - if flip_order: - arr = arr.T - axis = arr.ndim - axis - 1 - if out is not None: - out = out.T - - # at this point, it's guaranteed that dtype can hold both the arr values - # and the fill_value - if out is None: - out_shape = list(arr.shape) - out_shape[axis] = len(indexer) - out_shape = tuple(out_shape) - if arr.flags.f_contiguous and axis == arr.ndim - 1: - # minor tweak that can make an order-of-magnitude difference - # for dataframes initialized directly from 2-d ndarrays - # (s.t. df.values is c-contiguous and df._data.blocks[0] is its - # f-contiguous transpose) - out = np.empty(out_shape, dtype=dtype, order='F') - else: - out = np.empty(out_shape, dtype=dtype) - - func = _get_take_nd_function(arr.ndim, arr.dtype, out.dtype, axis=axis, - mask_info=mask_info) - indexer = _ensure_int64(indexer) - func(arr, indexer, out, fill_value) - - if flip_order: - out = out.T - return out - - -take_1d = take_nd - - -def take_2d_multi(arr, indexer, out=None, fill_value=np.nan, mask_info=None, - allow_fill=True): - """ - Specialized Cython take which sets NaN values in one pass - """ - if indexer is None or (indexer[0] is None and indexer[1] is None): - row_idx = np.arange(arr.shape[0], dtype=np.int64) - col_idx = np.arange(arr.shape[1], dtype=np.int64) - indexer = row_idx, col_idx - dtype, fill_value = arr.dtype, arr.dtype.type() - else: - row_idx, col_idx = indexer - if row_idx is None: - row_idx = np.arange(arr.shape[0], dtype=np.int64) - else: - row_idx = _ensure_int64(row_idx) - if col_idx is None: - col_idx = np.arange(arr.shape[1], dtype=np.int64) - else: - col_idx = _ensure_int64(col_idx) - indexer = row_idx, col_idx - if not allow_fill: - dtype, fill_value = arr.dtype, arr.dtype.type() - mask_info = None, False - else: - # check for promotion based on types only (do this first because - # it's faster than computing a mask) - dtype, fill_value = _maybe_promote(arr.dtype, fill_value) - if dtype != arr.dtype and (out is None or out.dtype != dtype): - # check if promotion is actually required based on indexer - if mask_info is not None: - (row_mask, col_mask), (row_needs, col_needs) = mask_info - else: - row_mask = row_idx == -1 - col_mask = col_idx == -1 - row_needs = row_mask.any() - col_needs = col_mask.any() - mask_info = (row_mask, col_mask), (row_needs, col_needs) - if row_needs or col_needs: - if out is not None and out.dtype != dtype: - raise TypeError('Incompatible type for fill_value') - else: - # if not, then depromote, set fill_value to dummy - # (it won't be used but we don't want the cython code - # to crash when trying to cast it to dtype) - dtype, fill_value = arr.dtype, arr.dtype.type() - - # at this point, it's guaranteed that dtype can hold both the arr values - # and the fill_value - if out is None: - out_shape = len(row_idx), len(col_idx) - out = np.empty(out_shape, dtype=dtype) - - func = _take_2d_multi_dict.get((arr.dtype.name, out.dtype.name), None) - if func is None and arr.dtype != out.dtype: - func = _take_2d_multi_dict.get((out.dtype.name, out.dtype.name), None) - if func is not None: - func = _convert_wrapper(func, out.dtype) - if func is None: - - def func(arr, indexer, out, fill_value=np.nan): - _take_2d_multi_generic(arr, indexer, out, fill_value=fill_value, - mask_info=mask_info) - - func(arr, indexer, out=out, fill_value=fill_value) - return out - - -_diff_special = { - 'float64': algos.diff_2d_float64, - 'float32': algos.diff_2d_float32, - 'int64': algos.diff_2d_int64, - 'int32': algos.diff_2d_int32, - 'int16': algos.diff_2d_int16, - 'int8': algos.diff_2d_int8, -} - - -def diff(arr, n, axis=0): - """ difference of n between self, - analagoust to s-s.shift(n) """ - - n = int(n) - na = np.nan - dtype = arr.dtype - is_timedelta = False - if needs_i8_conversion(arr): - dtype = np.float64 - arr = arr.view('i8') - na = tslib.iNaT - is_timedelta = True - elif issubclass(dtype.type, np.integer): - dtype = np.float64 - elif issubclass(dtype.type, np.bool_): - dtype = np.object_ - - dtype = np.dtype(dtype) - out_arr = np.empty(arr.shape, dtype=dtype) - - na_indexer = [slice(None)] * arr.ndim - na_indexer[axis] = slice(None, n) if n >= 0 else slice(n, None) - out_arr[tuple(na_indexer)] = na - - if arr.ndim == 2 and arr.dtype.name in _diff_special: - f = _diff_special[arr.dtype.name] - f(arr, out_arr, n, axis) - else: - res_indexer = [slice(None)] * arr.ndim - res_indexer[axis] = slice(n, None) if n >= 0 else slice(None, n) - res_indexer = tuple(res_indexer) - - lag_indexer = [slice(None)] * arr.ndim - lag_indexer[axis] = slice(None, -n) if n > 0 else slice(-n, None) - lag_indexer = tuple(lag_indexer) - - # need to make sure that we account for na for datelike/timedelta - # we don't actually want to subtract these i8 numbers - if is_timedelta: - res = arr[res_indexer] - lag = arr[lag_indexer] - - mask = (arr[res_indexer] == na) | (arr[lag_indexer] == na) - if mask.any(): - res = res.copy() - res[mask] = 0 - lag = lag.copy() - lag[mask] = 0 - - result = res - lag - result[mask] = na - out_arr[res_indexer] = result - else: - out_arr[res_indexer] = arr[res_indexer] - arr[lag_indexer] - - if is_timedelta: - from pandas import TimedeltaIndex - out_arr = TimedeltaIndex(out_arr.ravel().astype('int64')).asi8.reshape( - out_arr.shape).astype('timedelta64[ns]') - - return out_arr - - def _coerce_indexer_dtype(indexer, categories): """ coerce the indexer input array to the smallest dtype possible """ l = len(categories) @@ -1482,9 +907,9 @@ def _get_dtype_from_object(dtype): if isinstance(dtype, type) and issubclass(dtype, np.generic): return dtype elif is_categorical(dtype): - return CategoricalDtype().type + return gt.CategoricalDtype().type elif is_datetimetz(dtype): - return DatetimeTZDtype(dtype).type + return gt.DatetimeTZDtype(dtype).type elif isinstance(dtype, np.dtype): # dtype object try: _validate_date_like_dtype(dtype) @@ -1688,10 +1113,10 @@ def _possibly_infer_to_datetimelike(value, convert_dates=False): """ - if isinstance(value, (ABCDatetimeIndex, ABCPeriodIndex)): + if isinstance(value, (gt.ABCDatetimeIndex, gt.ABCPeriodIndex)): return value - elif isinstance(value, ABCSeries): - if isinstance(value._values, ABCDatetimeIndex): + elif isinstance(value, gt.ABCSeries): + if isinstance(value._values, gt.ABCDatetimeIndex): return value._values v = value @@ -1761,7 +1186,7 @@ def _try_timedelta(v): def is_bool_indexer(key): - if isinstance(key, (ABCSeries, np.ndarray)): + if isinstance(key, (gt.ABCSeries, np.ndarray)): if key.dtype == np.object_: key = np.asarray(_values_from_object(key)) @@ -1836,65 +1261,6 @@ def _try_sort(iterable): def _count_not_none(*args): return sum(x is not None for x in args) -# ----------------------------------------------------------------------------- -# miscellaneous python tools - - -def adjoin(space, *lists, **kwargs): - """ - Glues together two sets of strings using the amount of space requested. - The idea is to prettify. - - ---------- - space : int - number of spaces for padding - lists : str - list of str which being joined - strlen : callable - function used to calculate the length of each str. Needed for unicode - handling. - justfunc : callable - function used to justify str. Needed for unicode handling. - """ - strlen = kwargs.pop('strlen', len) - justfunc = kwargs.pop('justfunc', _justify) - - out_lines = [] - newLists = [] - lengths = [max(map(strlen, x)) + space for x in lists[:-1]] - # not the last one - lengths.append(max(map(len, lists[-1]))) - maxLen = max(map(len, lists)) - for i, lst in enumerate(lists): - nl = justfunc(lst, lengths[i], mode='left') - nl.extend([' ' * lengths[i]] * (maxLen - len(lst))) - newLists.append(nl) - toJoin = zip(*newLists) - for lines in toJoin: - out_lines.append(_join_unicode(lines)) - return _join_unicode(out_lines, sep='\n') - - -def _justify(texts, max_len, mode='right'): - """ - Perform ljust, center, rjust against string or list-like - """ - if mode == 'left': - return [x.ljust(max_len) for x in texts] - elif mode == 'center': - return [x.center(max_len) for x in texts] - else: - return [x.rjust(max_len) for x in texts] - - -def _join_unicode(lines, sep=''): - try: - return sep.join(lines) - except UnicodeDecodeError: - sep = compat.text_type(sep) - return sep.join([x.decode('utf-8') if isinstance(x, str) else x - for x in lines]) - def iterpairs(seq): """ @@ -1938,19 +1304,6 @@ def split_ranges(mask): yield ranges[-1] -def indent(string, spaces=4): - dent = ' ' * spaces - return '\n'.join([dent + x for x in string.split('\n')]) - - -def banner(message): - """ - Return 80-char width message declaration with = bars on top and bottom. - """ - bar = '=' * 80 - return '%s\n%s\n%s' % (bar, message, bar) - - def _long_prod(vals): result = long(1) for x in vals: @@ -2089,31 +1442,32 @@ def is_period_arraylike(arr): """ return if we are period arraylike / PeriodIndex """ if isinstance(arr, pd.PeriodIndex): return True - elif isinstance(arr, (np.ndarray, ABCSeries)): + elif isinstance(arr, (np.ndarray, gt.ABCSeries)): return arr.dtype == object and lib.infer_dtype(arr) == 'period' return getattr(arr, 'inferred_type', None) == 'period' def is_datetime_arraylike(arr): """ return if we are datetime arraylike / DatetimeIndex """ - if isinstance(arr, ABCDatetimeIndex): + if isinstance(arr, gt.ABCDatetimeIndex): return True - elif isinstance(arr, (np.ndarray, ABCSeries)): + elif isinstance(arr, (np.ndarray, gt.ABCSeries)): return arr.dtype == object and lib.infer_dtype(arr) == 'datetime' return getattr(arr, 'inferred_type', None) == 'datetime' def is_datetimelike(arr): - return (arr.dtype in _DATELIKE_DTYPES or isinstance(arr, ABCPeriodIndex) or + return (arr.dtype in _DATELIKE_DTYPES or + isinstance(arr, gt.ABCPeriodIndex) or is_datetimetz(arr)) def _coerce_to_dtype(dtype): """ coerce a string / np.dtype to a dtype """ if is_categorical_dtype(dtype): - dtype = CategoricalDtype() + dtype = gt.CategoricalDtype() elif is_datetime64tz_dtype(dtype): - dtype = DatetimeTZDtype(dtype) + dtype = gt.DatetimeTZDtype(dtype) else: dtype = np.dtype(dtype) return dtype @@ -2124,15 +1478,15 @@ def _get_dtype(arr_or_dtype): return arr_or_dtype elif isinstance(arr_or_dtype, type): return np.dtype(arr_or_dtype) - elif isinstance(arr_or_dtype, CategoricalDtype): + elif isinstance(arr_or_dtype, gt.CategoricalDtype): return arr_or_dtype - elif isinstance(arr_or_dtype, DatetimeTZDtype): + elif isinstance(arr_or_dtype, gt.DatetimeTZDtype): return arr_or_dtype elif isinstance(arr_or_dtype, compat.string_types): if is_categorical_dtype(arr_or_dtype): - return CategoricalDtype.construct_from_string(arr_or_dtype) + return gt.CategoricalDtype.construct_from_string(arr_or_dtype) elif is_datetime64tz_dtype(arr_or_dtype): - return DatetimeTZDtype.construct_from_string(arr_or_dtype) + return gt.DatetimeTZDtype.construct_from_string(arr_or_dtype) if hasattr(arr_or_dtype, 'dtype'): arr_or_dtype = arr_or_dtype.dtype @@ -2144,15 +1498,15 @@ def _get_dtype_type(arr_or_dtype): return arr_or_dtype.type elif isinstance(arr_or_dtype, type): return np.dtype(arr_or_dtype).type - elif isinstance(arr_or_dtype, CategoricalDtype): - return CategoricalDtypeType - elif isinstance(arr_or_dtype, DatetimeTZDtype): - return DatetimeTZDtypeType + elif isinstance(arr_or_dtype, gt.CategoricalDtype): + return gt.CategoricalDtypeType + elif isinstance(arr_or_dtype, gt.DatetimeTZDtype): + return gt.DatetimeTZDtypeType elif isinstance(arr_or_dtype, compat.string_types): if is_categorical_dtype(arr_or_dtype): - return CategoricalDtypeType + return gt.CategoricalDtypeType elif is_datetime64tz_dtype(arr_or_dtype): - return DatetimeTZDtypeType + return gt.DatetimeTZDtypeType return _get_dtype_type(np.dtype(arr_or_dtype)) try: return arr_or_dtype.dtype.type @@ -2204,7 +1558,7 @@ def is_datetime64_dtype(arr_or_dtype): def is_datetime64tz_dtype(arr_or_dtype): - return DatetimeTZDtype.is_dtype(arr_or_dtype) + return gt.DatetimeTZDtype.is_dtype(arr_or_dtype) def is_datetime64_any_dtype(arr_or_dtype): @@ -2335,12 +1689,12 @@ def is_bool_dtype(arr_or_dtype): def is_sparse(array): """ return if we are a sparse array """ - return isinstance(array, (ABCSparseArray, ABCSparseSeries)) + return isinstance(array, (gt.ABCSparseArray, gt.ABCSparseSeries)) def is_datetimetz(array): """ return if we are a datetime with tz array """ - return ((isinstance(array, ABCDatetimeIndex) and + return ((isinstance(array, gt.ABCDatetimeIndex) and getattr(array, 'tz', None) is not None) or is_datetime64tz_dtype(array)) @@ -2361,11 +1715,11 @@ def is_internal_type(value): def is_categorical(array): """ return if we are a categorical possibility """ - return isinstance(array, ABCCategorical) or is_categorical_dtype(array) + return isinstance(array, gt.ABCCategorical) or is_categorical_dtype(array) def is_categorical_dtype(arr_or_dtype): - return CategoricalDtype.is_dtype(arr_or_dtype) + return gt.CategoricalDtype.is_dtype(arr_or_dtype) def is_complex_dtype(arr_or_dtype): @@ -2755,187 +2109,6 @@ def in_ipython_frontend(): return False -# Unicode consolidation -# --------------------- -# -# pprinting utility functions for generating Unicode text or -# bytes(3.x)/str(2.x) representations of objects. -# Try to use these as much as possible rather then rolling your own. -# -# When to use -# ----------- -# -# 1) If you're writing code internal to pandas (no I/O directly involved), -# use pprint_thing(). -# -# It will always return unicode text which can handled by other -# parts of the package without breakage. -# -# 2) If you need to send something to the console, use console_encode(). -# -# console_encode() should (hopefully) choose the right encoding for you -# based on the encoding set in option "display.encoding" -# -# 3) if you need to write something out to file, use -# pprint_thing_encoded(encoding). -# -# If no encoding is specified, it defaults to utf-8. Since encoding pure -# ascii with utf-8 is a no-op you can safely use the default utf-8 if you're -# working with straight ascii. - - -def _pprint_seq(seq, _nest_lvl=0, max_seq_items=None, **kwds): - """ - internal. pprinter for iterables. you should probably use pprint_thing() - rather then calling this directly. - - bounds length of printed sequence, depending on options - """ - if isinstance(seq, set): - fmt = u("{%s}") - else: - fmt = u("[%s]") if hasattr(seq, '__setitem__') else u("(%s)") - - if max_seq_items is False: - nitems = len(seq) - else: - nitems = max_seq_items or get_option("max_seq_items") or len(seq) - - s = iter(seq) - r = [] - for i in range(min(nitems, len(seq))): # handle sets, no slicing - r.append(pprint_thing( - next(s), _nest_lvl + 1, max_seq_items=max_seq_items, **kwds)) - body = ", ".join(r) - - if nitems < len(seq): - body += ", ..." - elif isinstance(seq, tuple) and len(seq) == 1: - body += ',' - - return fmt % body - - -def _pprint_dict(seq, _nest_lvl=0, max_seq_items=None, **kwds): - """ - internal. pprinter for iterables. you should probably use pprint_thing() - rather then calling this directly. - """ - fmt = u("{%s}") - pairs = [] - - pfmt = u("%s: %s") - - if max_seq_items is False: - nitems = len(seq) - else: - nitems = max_seq_items or get_option("max_seq_items") or len(seq) - - for k, v in list(seq.items())[:nitems]: - pairs.append(pfmt % - (pprint_thing(k, _nest_lvl + 1, - max_seq_items=max_seq_items, **kwds), - pprint_thing(v, _nest_lvl + 1, - max_seq_items=max_seq_items, **kwds))) - - if nitems < len(seq): - return fmt % (", ".join(pairs) + ", ...") - else: - return fmt % ", ".join(pairs) - - -def pprint_thing(thing, _nest_lvl=0, escape_chars=None, default_escapes=False, - quote_strings=False, max_seq_items=None): - """ - This function is the sanctioned way of converting objects - to a unicode representation. - - properly handles nested sequences containing unicode strings - (unicode(object) does not) - - Parameters - ---------- - thing : anything to be formatted - _nest_lvl : internal use only. pprint_thing() is mutually-recursive - with pprint_sequence, this argument is used to keep track of the - current nesting level, and limit it. - escape_chars : list or dict, optional - Characters to escape. If a dict is passed the values are the - replacements - default_escapes : bool, default False - Whether the input escape characters replaces or adds to the defaults - max_seq_items : False, int, default None - Pass thru to other pretty printers to limit sequence printing - - Returns - ------- - result - unicode object on py2, str on py3. Always Unicode. - - """ - - def as_escaped_unicode(thing, escape_chars=escape_chars): - # Unicode is fine, else we try to decode using utf-8 and 'replace' - # if that's not it either, we have no way of knowing and the user - # should deal with it himself. - - try: - result = compat.text_type(thing) # we should try this first - except UnicodeDecodeError: - # either utf-8 or we replace errors - result = str(thing).decode('utf-8', "replace") - - translate = {'\t': r'\t', '\n': r'\n', '\r': r'\r', } - if isinstance(escape_chars, dict): - if default_escapes: - translate.update(escape_chars) - else: - translate = escape_chars - escape_chars = list(escape_chars.keys()) - else: - escape_chars = escape_chars or tuple() - for c in escape_chars: - result = result.replace(c, translate[c]) - - return compat.text_type(result) - - if (compat.PY3 and hasattr(thing, '__next__')) or hasattr(thing, 'next'): - return compat.text_type(thing) - elif (isinstance(thing, dict) and - _nest_lvl < get_option("display.pprint_nest_depth")): - result = _pprint_dict(thing, _nest_lvl, quote_strings=True, - max_seq_items=max_seq_items) - elif (is_sequence(thing) and - _nest_lvl < get_option("display.pprint_nest_depth")): - result = _pprint_seq(thing, _nest_lvl, escape_chars=escape_chars, - quote_strings=quote_strings, - max_seq_items=max_seq_items) - elif isinstance(thing, compat.string_types) and quote_strings: - if compat.PY3: - fmt = "'%s'" - else: - fmt = "u'%s'" - result = fmt % as_escaped_unicode(thing) - else: - result = as_escaped_unicode(thing) - - return compat.text_type(result) # always unicode - - -def pprint_thing_encoded(object, encoding='utf-8', errors='replace', **kwds): - value = pprint_thing(object) # get unicode representation of object - return value.encode(encoding, errors, **kwds) - - -def console_encode(object, **kwds): - """ - this is the sanctioned way to prepare something for - sending *to the console*, it delegates to pprint_thing() to get - a unicode representation of the object relies on the global encoding - set in display.encoding. Use this everywhere - where you output to the console. - """ - return pprint_thing_encoded(object, get_option("display.encoding")) - def _maybe_match_name(a, b): a_has = hasattr(a, 'name') @@ -2979,29 +2152,3 @@ def _random_state(state=None): else: raise ValueError("random_state must be an integer, a numpy " "RandomState, or None") - - -def pandas_dtype(dtype): - """ - Converts input into a pandas only dtype object or a numpy dtype object. - - Parameters - ---------- - dtype : object to be converted - - Returns - ------- - np.dtype or a pandas dtype - """ - if isinstance(dtype, compat.string_types): - try: - return DatetimeTZDtype.construct_from_string(dtype) - except TypeError: - pass - - try: - return CategoricalDtype.construct_from_string(dtype) - except TypeError: - pass - - return np.dtype(dtype) diff --git a/pandas/core/config.py b/pandas/core/config.py index b4f3e5214d09a..618de4e02b56f 100644 --- a/pandas/core/config.py +++ b/pandas/core/config.py @@ -773,7 +773,7 @@ def is_instance_factory(_type): """ if isinstance(_type, (tuple, list)): _type = tuple(_type) - from pandas.core.common import pprint_thing + from pandas.formats.printing import pprint_thing type_repr = "|".join(map(pprint_thing, _type)) else: type_repr = "'%s'" % _type @@ -791,7 +791,7 @@ def is_one_of_factory(legal_values): legal_values = [c for c in legal_values if not callable(c)] def inner(x): - from pandas.core.common import pprint_thing as pp + from pandas.formats.printing import pprint_thing as pp if x not in legal_values: if not any([c(x) for c in callables]): diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 0439fa0f3810c..3ca2c6cd014bc 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -15,7 +15,7 @@ from pandas.core.config import (is_int, is_bool, is_text, is_instance_factory, is_one_of_factory, get_default_val, is_callable) -from pandas.core.format import detect_console_encoding +from pandas.formats.format import detect_console_encoding # # options from the "display" namespace @@ -110,7 +110,7 @@ The callable should accept a floating point number and return a string with the desired format of the number. This is used in some places like SeriesFormatter. - See core.format.EngFormatter for an example. + See formats.format.EngFormatter for an example. """ max_colwidth_doc = """ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b4b044c7780e5..99fa722aebb7b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -39,6 +39,7 @@ from pandas.core.series import Series from pandas.core.categorical import Categorical import pandas.computation.expressions as expressions +import pandas.core.algorithms as algos from pandas.computation.eval import eval as _eval from pandas.compat import (range, map, zip, lrange, lmap, lzip, StringIO, u, OrderedDict, raise_with_traceback) @@ -53,9 +54,10 @@ import pandas.core.base as base import pandas.core.common as com -import pandas.core.format as fmt import pandas.core.nanops as nanops import pandas.core.ops as ops +import pandas.formats.format as fmt +from pandas.formats.printing import pprint_thing import pandas.tools.plotting as gfx import pandas.lib as lib @@ -585,9 +587,9 @@ def style(self): See Also -------- - pandas.core.style.Styler + pandas.formats.style.Styler """ - from pandas.core.style import Styler + from pandas.formats.style import Styler return Styler(self) def iteritems(self): @@ -1633,7 +1635,7 @@ def info(self, verbose=None, buf=None, max_cols=None, memory_usage=None, - If False, never show counts. """ - from pandas.core.format import _put_lines + from pandas.formats.format import _put_lines if buf is None: # pragma: no cover buf = sys.stdout @@ -1667,7 +1669,7 @@ def info(self, verbose=None, buf=None, max_cols=None, memory_usage=None, def _verbose_repr(): lines.append('Data columns (total %d columns):' % len(self.columns)) - space = max([len(com.pprint_thing(k)) for k in self.columns]) + 4 + space = max([len(pprint_thing(k)) for k in self.columns]) + 4 counts = None tmpl = "%s%s" @@ -1681,7 +1683,7 @@ def _verbose_repr(): dtypes = self.dtypes for i, col in enumerate(self.columns): dtype = dtypes.iloc[i] - col = com.pprint_thing(col) + col = pprint_thing(col) count = "" if show_counts: @@ -2709,8 +2711,8 @@ def _reindex_multi(self, axes, copy, fill_value): if row_indexer is not None and col_indexer is not None: indexer = row_indexer, col_indexer - new_values = com.take_2d_multi(self.values, indexer, - fill_value=fill_value) + new_values = algos.take_2d_multi(self.values, indexer, + fill_value=fill_value) return self._constructor(new_values, index=new_index, columns=new_columns) else: @@ -3084,11 +3086,11 @@ def duplicated(self, subset=None, keep='first'): duplicated : Series """ from pandas.core.groupby import get_group_index - from pandas.core.algorithms import factorize from pandas.hashtable import duplicated_int64, _SIZE_HINT_LIMIT def f(vals): - labels, shape = factorize(vals, size_hint=min(len(self), + labels, shape = algos.factorize(vals, + size_hint=min(len(self), _SIZE_HINT_LIMIT)) return labels.astype('i8', copy=False), len(shape) @@ -4144,7 +4146,7 @@ def _apply_standard(self, func, axis, ignore_failures=False, reduce=True): if i is not None: k = res_index[i] e.args = e.args + ('occurred at index %s' % - com.pprint_thing(k), ) + pprint_thing(k), ) raise if len(results) > 0 and is_sequence(results[0]): @@ -5436,7 +5438,7 @@ def _list_of_series_to_arrays(data, columns, coerce_float=False, dtype=None): indexer = indexer_cache[id(index)] = index.get_indexer(columns) values = _values_from_object(s) - aligned_values.append(com.take_1d(values, indexer)) + aligned_values.append(algos.take_1d(values, indexer)) values = np.vstack(aligned_values) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index d8ee85df58e11..e450ac7e0cdc1 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -19,6 +19,7 @@ import pandas.core.common as com import pandas.core.missing as missing import pandas.core.datetools as datetools +from pandas.formats.printing import pprint_thing from pandas import compat from pandas.compat import (map, zip, lrange, string_types, isidentifier, set_function_name) @@ -54,7 +55,7 @@ def _single_replace(self, to_replace, method, inplace, limit): result = self if inplace else self.copy() fill_f = missing.get_fill_func(method) - mask = com.mask_missing(result.values, to_replace) + mask = missing.mask_missing(result.values, to_replace) values = fill_f(result.values, limit=limit, mask=mask) if values.dtype == orig_dtype and inplace: @@ -150,7 +151,7 @@ def _constructor(self): def __unicode__(self): # unicode representation based upon iterating over self # (since, by definition, `PandasContainers` are iterable) - prepr = '[%s]' % ','.join(map(com.pprint_thing, self)) + prepr = '[%s]' % ','.join(map(pprint_thing, self)) return '%s(%s)' % (self.__class__.__name__, prepr) def _dir_additions(self): diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index f013408185c90..a0a358717fdc6 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -24,6 +24,7 @@ from pandas.core.panel import Panel from pandas.util.decorators import (cache_readonly, Substitution, Appender, make_signature, deprecate_kwarg) +from pandas.formats.printing import pprint_thing import pandas.core.algorithms as algos import pandas.core.common as com from pandas.core.common import(_possibly_downcast_to_dtype, isnull, @@ -1351,7 +1352,7 @@ def shift(self, periods=1, freq=None, axis=0): output = {} for name, obj in self._iterate_slices(): - output[name] = com.take_nd(obj.values, indexer) + output[name] = algos.take_nd(obj.values, indexer) return self._wrap_transformed_output(output) @@ -1873,7 +1874,7 @@ def _aggregate_series_fast(self, obj, func): dummy = obj._get_values(slice(None, 0)).to_dense() indexer = _get_group_index_sorter(group_index, ngroups) obj = obj.take(indexer, convert=False) - group_index = com.take_nd(group_index, indexer, allow_fill=False) + group_index = algos.take_nd(group_index, indexer, allow_fill=False) grouper = lib.SeriesGrouper(obj, func, group_index, ngroups, dummy) result, counts = grouper.get_result() @@ -2213,7 +2214,7 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None, len(self.grouper) == len(self.index)): errmsg = ('Grouper result violates len(labels) == ' 'len(data)\nresult: %s' % - com.pprint_thing(self.grouper)) + pprint_thing(self.grouper)) self.grouper = None # Try for sanity raise AssertionError(errmsg) @@ -3850,7 +3851,7 @@ def __init__(self, data, labels, ngroups, axis=0): @cache_readonly def slabels(self): # Sorted labels - return com.take_nd(self.labels, self.sort_idx, allow_fill=False) + return algos.take_nd(self.labels, self.sort_idx, allow_fill=False) @cache_readonly def sort_idx(self): @@ -4278,11 +4279,11 @@ def _reorder_by_uniques(uniques, labels): mask = labels < 0 # move labels to right locations (ie, unsort ascending labels) - labels = com.take_nd(reverse_indexer, labels, allow_fill=False) + labels = algos.take_nd(reverse_indexer, labels, allow_fill=False) np.putmask(labels, mask, -1) # sort observed ids - uniques = com.take_nd(uniques, sorter, allow_fill=False) + uniques = algos.take_nd(uniques, sorter, allow_fill=False) return uniques, labels diff --git a/pandas/core/internals.py b/pandas/core/internals.py index c5353f6fef6dc..585eaf2261420 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -20,12 +20,14 @@ _maybe_convert_string_to_object, is_categorical, is_datetimelike_v_numeric, is_numeric_v_string_like, is_internal_type) -from pandas.core.dtypes import DatetimeTZDtype +import pandas.core.algorithms as algos +from pandas.types.api import DatetimeTZDtype from pandas.core.index import Index, MultiIndex, _ensure_index from pandas.core.indexing import maybe_convert_indices, length_of_indexer from pandas.core.categorical import Categorical, maybe_to_categorical from pandas.tseries.index import DatetimeIndex +from pandas.formats.printing import pprint_thing import pandas.core.common as com import pandas.core.missing as missing import pandas.core.convert as convert @@ -194,15 +196,15 @@ def mgr_locs(self, new_mgr_locs): def __unicode__(self): # don't want to print out all of the items here - name = com.pprint_thing(self.__class__.__name__) + name = pprint_thing(self.__class__.__name__) if self._is_single_block: result = '%s: %s dtype: %s' % (name, len(self), self.dtype) else: - shape = ' x '.join([com.pprint_thing(s) for s in self.shape]) - result = '%s: %s, %s, dtype: %s' % (name, com.pprint_thing( + shape = ' x '.join([pprint_thing(s) for s in self.shape]) + result = '%s: %s, %s, dtype: %s' % (name, pprint_thing( self.mgr_locs.indexer), shape, self.dtype) return result @@ -286,8 +288,8 @@ def reindex_axis(self, indexer, method=None, axis=1, fill_value=None, if fill_value is None: fill_value = self.fill_value - new_values = com.take_nd(self.values, indexer, axis, - fill_value=fill_value, mask_info=mask_info) + new_values = algos.take_nd(self.values, indexer, axis, + fill_value=fill_value, mask_info=mask_info) return self.make_block(new_values, fastpath=True) def get(self, item): @@ -597,7 +599,7 @@ def replace(self, to_replace, value, inplace=False, filter=None, try: values, _, to_replace, _ = self._try_coerce_args(self.values, to_replace) - mask = com.mask_missing(values, to_replace) + mask = missing.mask_missing(values, to_replace) if filter is not None: filtered_out = ~self.mgr_locs.isin(filter) mask[filtered_out.nonzero()[0]] = False @@ -974,7 +976,7 @@ def take_nd(self, indexer, axis, new_mgr_locs=None, fill_tuple=None): """ - # com.take_nd dispatches for DatetimeTZBlock, CategoricalBlock + # algos.take_nd dispatches for DatetimeTZBlock, CategoricalBlock # so need to preserve types # sparse is treated like an ndarray, but needs .get_values() shaping @@ -984,12 +986,12 @@ def take_nd(self, indexer, axis, new_mgr_locs=None, fill_tuple=None): if fill_tuple is None: fill_value = self.fill_value - new_values = com.take_nd(values, indexer, axis=axis, - allow_fill=False) + new_values = algos.take_nd(values, indexer, axis=axis, + allow_fill=False) else: fill_value = fill_tuple[0] - new_values = com.take_nd(values, indexer, axis=axis, - allow_fill=True, fill_value=fill_value) + new_values = algos.take_nd(values, indexer, axis=axis, + allow_fill=True, fill_value=fill_value) if new_mgr_locs is None: if axis == 0: @@ -1008,7 +1010,7 @@ def take_nd(self, indexer, axis, new_mgr_locs=None, fill_tuple=None): def diff(self, n, axis=1, mgr=None): """ return block for the diff of the values """ - new_values = com.diff(self.values, n, axis=axis) + new_values = algos.diff(self.values, n, axis=axis) return [self.make_block(values=new_values, fastpath=True)] def shift(self, periods, axis=0, mgr=None): @@ -1430,7 +1432,7 @@ def to_native_types(self, slicer=None, na_rep='', float_format=None, if slicer is not None: values = values[:, slicer] - from pandas.core.format import FloatArrayFormatter + from pandas.formats.format import FloatArrayFormatter formatter = FloatArrayFormatter(values, na_rep=na_rep, float_format=float_format, decimal=decimal, quoting=quoting, @@ -1605,7 +1607,7 @@ def to_native_types(self, slicer=None, na_rep=None, quoting=None, imask = (~mask).ravel() # FIXME: - # should use the core.format.Timedelta64Formatter here + # should use the formats.format.Timedelta64Formatter here # to figure what format to pass to the Timedelta # e.g. to not show the decimals say rvalues.flat[imask] = np.array([Timedelta(val)._repr_base(format='all') @@ -2127,7 +2129,7 @@ def to_native_types(self, slicer=None, na_rep=None, date_format=None, if slicer is not None: values = values[..., slicer] - from pandas.core.format import _get_format_datetime64_from_values + from pandas.formats.format import _get_format_datetime64_from_values format = _get_format_datetime64_from_values(values, date_format) result = tslib.format_array_from_datetime( @@ -2711,11 +2713,11 @@ def get_ftype_counts(self): def get_dtypes(self): dtypes = np.array([blk.dtype for blk in self.blocks]) - return com.take_1d(dtypes, self._blknos, allow_fill=False) + return algos.take_1d(dtypes, self._blknos, allow_fill=False) def get_ftypes(self): ftypes = np.array([blk.ftype for blk in self.blocks]) - return com.take_1d(ftypes, self._blknos, allow_fill=False) + return algos.take_1d(ftypes, self._blknos, allow_fill=False) def __getstate__(self): block_values = [b.values for b in self.blocks] @@ -2782,7 +2784,7 @@ def __len__(self): return len(self.items) def __unicode__(self): - output = com.pprint_thing(self.__class__.__name__) + output = pprint_thing(self.__class__.__name__) for i, ax in enumerate(self.axes): if i == 0: output += u('\nItems: %s') % ax @@ -2790,7 +2792,7 @@ def __unicode__(self): output += u('\nAxis %d: %s') % (i, ax) for block in self.blocks: - output += u('\n%s') % com.pprint_thing(block) + output += u('\n%s') % pprint_thing(block) return output def _verify_integrity(self): @@ -3070,8 +3072,8 @@ def combine(self, blocks, copy=True): new_blocks = [] for b in blocks: b = b.copy(deep=copy) - b.mgr_locs = com.take_1d(inv_indexer, b.mgr_locs.as_array, axis=0, - allow_fill=False) + b.mgr_locs = algos.take_1d(inv_indexer, b.mgr_locs.as_array, + axis=0, allow_fill=False) new_blocks.append(b) new_axes = list(self.axes) @@ -3451,8 +3453,8 @@ def value_getitem(placement): new_blknos.fill(-1) new_blknos[~is_deleted] = np.arange(self.nblocks - len(removed_blknos)) - self._blknos = com.take_1d(new_blknos, self._blknos, axis=0, - allow_fill=False) + self._blknos = algos.take_1d(new_blknos, self._blknos, axis=0, + allow_fill=False) self.blocks = tuple(blk for i, blk in enumerate(self.blocks) if i not in set(removed_blknos)) @@ -3632,10 +3634,10 @@ def _slice_take_blocks_ax0(self, slice_or_indexer, fill_tuple=None): blknos = self._blknos[slobj] blklocs = self._blklocs[slobj] else: - blknos = com.take_1d(self._blknos, slobj, fill_value=-1, - allow_fill=allow_fill) - blklocs = com.take_1d(self._blklocs, slobj, fill_value=-1, - allow_fill=allow_fill) + blknos = algos.take_1d(self._blknos, slobj, fill_value=-1, + allow_fill=allow_fill) + blklocs = algos.take_1d(self._blklocs, slobj, fill_value=-1, + allow_fill=allow_fill) # When filling blknos, make sure blknos is updated before appending to # blocks list, that way new blkno is exactly len(blocks). @@ -3847,7 +3849,7 @@ def reindex(self, new_axis, indexer=None, method=None, fill_value=None, else: fill_value = np.nan - new_values = com.take_1d(values, indexer, fill_value=fill_value) + new_values = algos.take_1d(values, indexer, fill_value=fill_value) # fill if needed if method is not None or limit is not None: @@ -4676,8 +4678,8 @@ def get_mgr_concatenation_plan(mgr, indexers): if 0 in indexers: ax0_indexer = indexers.pop(0) - blknos = com.take_1d(mgr._blknos, ax0_indexer, fill_value=-1) - blklocs = com.take_1d(mgr._blklocs, ax0_indexer, fill_value=-1) + blknos = algos.take_1d(mgr._blknos, ax0_indexer, fill_value=-1) + blklocs = algos.take_1d(mgr._blklocs, ax0_indexer, fill_value=-1) else: if mgr._is_single_block: @@ -4932,8 +4934,8 @@ def get_reindexed_values(self, empty_dtype, upcasted_na): else: for ax, indexer in self.indexers.items(): - values = com.take_nd(values, indexer, axis=ax, - fill_value=fill_value) + values = algos.take_nd(values, indexer, axis=ax, + fill_value=fill_value) return values diff --git a/pandas/core/missing.py b/pandas/core/missing.py index a8ca5e452c7ac..7ca96ef7b602e 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -10,6 +10,53 @@ from pandas.compat import range +def mask_missing(arr, values_to_mask): + """ + Return a masking array of same size/shape as arr + with entries equaling any member of values_to_mask set to True + """ + if not isinstance(values_to_mask, (list, np.ndarray)): + values_to_mask = [values_to_mask] + + try: + values_to_mask = np.array(values_to_mask, dtype=arr.dtype) + except Exception: + values_to_mask = np.array(values_to_mask, dtype=object) + + na_mask = com.isnull(values_to_mask) + nonna = values_to_mask[~na_mask] + + mask = None + for x in nonna: + if mask is None: + + # numpy elementwise comparison warning + if com.is_numeric_v_string_like(arr, x): + mask = False + else: + mask = arr == x + + # if x is a string and arr is not, then we get False and we must + # expand the mask to size arr.shape + if lib.isscalar(mask): + mask = np.zeros(arr.shape, dtype=bool) + else: + + # numpy elementwise comparison warning + if com.is_numeric_v_string_like(arr, x): + mask |= False + else: + mask |= arr == x + + if na_mask.any(): + if mask is None: + mask = com.isnull(arr) + else: + mask |= com.isnull(arr) + + return mask + + def clean_fill_method(method, allow_nearest=False): if method is None: return None @@ -239,7 +286,7 @@ def interpolate_2d(values, method='pad', axis=0, limit=None, fill_value=None, if fill_value is None: mask = None else: # todo create faster fill func without masking - mask = com.mask_missing(transf(values), fill_value) + mask = mask_missing(transf(values), fill_value) method = clean_fill_method(method) if method == 'pad': diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 11161d8a5d186..cb0d06c1739b6 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -19,6 +19,7 @@ from pandas.tslib import iNaT from pandas.compat import bind_method import pandas.core.missing as missing +import pandas.core.algorithms as algos from pandas.core.common import (is_list_like, notnull, isnull, _values_from_object, _maybe_match_name, needs_i8_conversion, is_datetimelike_v_numeric, @@ -632,10 +633,10 @@ def wrapper(left, right, name=name, na_op=na_op): return_indexers=True) if lidx is not None: - lvalues = com.take_1d(lvalues, lidx) + lvalues = algos.take_1d(lvalues, lidx) if ridx is not None: - rvalues = com.take_1d(rvalues, ridx) + rvalues = algos.take_1d(rvalues, ridx) arr = na_op(lvalues, rvalues) diff --git a/pandas/core/panel.py b/pandas/core/panel.py index f0f3803c62566..b84079ffc4ffd 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -22,6 +22,7 @@ from pandas.core.generic import NDFrame, _shared_docs from pandas.core.index import (Index, MultiIndex, _ensure_index, _get_combined_index) +from pandas.formats.printing import pprint_thing from pandas.core.indexing import maybe_droplevels from pandas.core.internals import (BlockManager, create_block_manager_from_arrays, @@ -345,8 +346,8 @@ def axis_pretty(a): v = getattr(self, a) if len(v) > 0: return u('%s axis: %s to %s') % (a.capitalize(), - com.pprint_thing(v[0]), - com.pprint_thing(v[-1])) + pprint_thing(v[0]), + pprint_thing(v[-1])) else: return u('%s axis: None') % a.capitalize() diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index 5ee3e4f08d285..5c775f8a0d937 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -18,7 +18,8 @@ from pandas.core.groupby import get_group_index, _compress_group_index import pandas.core.common as com -import pandas.algos as algos +import pandas.core.algorithms as algos +import pandas.algos as _algos from pandas.core.index import MultiIndex, _get_na_value @@ -109,10 +110,10 @@ def _make_sorted_values_labels(self): comp_index, obs_ids = get_compressed_ids(to_sort, sizes) ngroups = len(obs_ids) - indexer = algos.groupsort_indexer(comp_index, ngroups)[0] + indexer = _algos.groupsort_indexer(comp_index, ngroups)[0] indexer = _ensure_platform_int(indexer) - self.sorted_values = com.take_nd(self.values, indexer, axis=0) + self.sorted_values = algos.take_nd(self.values, indexer, axis=0) self.sorted_labels = [l.take(indexer) for l in to_sort] def _make_selectors(self): @@ -155,7 +156,7 @@ def get_result(self): # rare case, level values not observed if len(obs_ids) < self.full_shape[1]: inds = (value_mask.sum(0) > 0).nonzero()[0] - values = com.take_nd(values, inds, axis=1) + values = algos.take_nd(values, inds, axis=1) columns = columns[inds] # may need to coerce categoricals here diff --git a/pandas/core/series.py b/pandas/core/series.py index 7c1d4663fc6b4..ac8f073d0f0a1 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -44,12 +44,12 @@ import pandas.core.ops as ops -from pandas.core import algorithms +import pandas.core.algorithms as algos import pandas.core.common as com import pandas.core.datetools as datetools -import pandas.core.format as fmt import pandas.core.nanops as nanops +import pandas.formats.format as fmt from pandas.util.decorators import Appender, deprecate_kwarg, Substitution import pandas.lib as lib @@ -1202,7 +1202,7 @@ def mode(self): modes : Series (sorted) """ # TODO: Add option for bins like value_counts() - return algorithms.mode(self) + return algos.mode(self) @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', False: 'first'}) @@ -1424,7 +1424,7 @@ def diff(self, periods=1): ------- diffed : Series """ - result = com.diff(_values_from_object(self), periods) + result = algos.diff(_values_from_object(self), periods) return self._constructor(result, index=self.index).__finalize__(self) def autocorr(self, lag=1): @@ -1889,7 +1889,7 @@ def nlargest(self, n=5, keep='first'): >>> s = pd.Series(np.random.randn(1e6)) >>> s.nlargest(10) # only sorts up to the N requested """ - return algorithms.select_n(self, n=n, keep=keep, method='nlargest') + return algos.select_n(self, n=n, keep=keep, method='nlargest') @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', False: 'first'}) @@ -1927,7 +1927,7 @@ def nsmallest(self, n=5, keep='first'): >>> s = pd.Series(np.random.randn(1e6)) >>> s.nsmallest(10) # only sorts up to the N requested """ - return algorithms.select_n(self, n=n, keep=keep, method='nsmallest') + return algos.select_n(self, n=n, keep=keep, method='nsmallest') def sortlevel(self, level=0, ascending=True, sort_remaining=True): """ @@ -2081,7 +2081,7 @@ def map_f(values, f): arg = self._constructor(arg, index=arg.keys()) indexer = arg.index.get_indexer(values) - new_values = com.take_1d(arg._values, indexer) + new_values = algos.take_1d(arg._values, indexer) return self._constructor(new_values, index=self.index).__finalize__(self) else: @@ -2233,7 +2233,7 @@ def _reindex_indexer(self, new_index, indexer, copy): return self # be subclass-friendly - new_values = com.take_1d(self.get_values(), indexer) + new_values = algos.take_1d(self.get_values(), indexer) return self._constructor(new_values, index=new_index) def _needs_reindex_multi(self, axes, method, level): @@ -2384,7 +2384,7 @@ def isin(self, values): dtype: bool """ - result = algorithms.isin(_values_from_object(self), values) + result = algos.isin(_values_from_object(self), values) return self._constructor(result, index=self.index).__finalize__(self) def between(self, left, right, inclusive=True): @@ -2627,7 +2627,7 @@ def asof(self, where): where = Index(where) locs = self.index.asof_locs(where, notnull(values)) - new_values = com.take_1d(values, locs) + new_values = algos.take_1d(values, locs) return self._constructor(new_values, index=where).__finalize__(self) def to_timestamp(self, freq=None, how='start', copy=True): diff --git a/pandas/core/strings.py b/pandas/core/strings.py index a7ed1ba0c0be0..81e1922db1b09 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -4,7 +4,8 @@ from pandas.core.common import (isnull, notnull, _values_from_object, is_bool_dtype, is_list_like, is_categorical_dtype, - is_object_dtype, take_1d) + is_object_dtype) +from pandas.core.algorithms import take_1d import pandas.compat as compat from pandas.core.base import AccessorProperty, NoNewAttributesMixin from pandas.util.decorators import Appender, deprecate_kwarg diff --git a/pandas/formats/__init__.py b/pandas/formats/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/core/format.py b/pandas/formats/format.py similarity index 99% rename from pandas/core/format.py rename to pandas/formats/format.py index 16a870cbc6901..cdebaf28634af 100644 --- a/pandas/core/format.py +++ b/pandas/formats/format.py @@ -14,6 +14,7 @@ from pandas.util.terminal import get_terminal_size from pandas.core.config import get_option, set_option from pandas.io.common import _get_handle, UnicodeWriter, _expand_user +from pandas.formats.printing import adjoin, justify, pprint_thing import pandas.core.common as com import pandas.lib as lib from pandas.tslib import iNaT, Timestamp, Timedelta, format_array_from_datetime @@ -171,8 +172,8 @@ def _get_footer(self): if footer: footer += ', ' - series_name = com.pprint_thing(name, - escape_chars=('\t', '\r', '\n')) + series_name = pprint_thing(name, + escape_chars=('\t', '\r', '\n')) footer += ("Name: %s" % series_name) if name is not None else "" if self.length: @@ -185,7 +186,7 @@ def _get_footer(self): if name: if footer: footer += ', ' - footer += 'dtype: %s' % com.pprint_thing(name) + footer += 'dtype: %s' % pprint_thing(name) # level infos are added to the end and in a new line, like it is done # for Categoricals @@ -260,11 +261,11 @@ def len(self, text): return compat.strlen(text, encoding=self.encoding) def justify(self, texts, max_len, mode='right'): - return com._justify(texts, max_len, mode=mode) + return justify(texts, max_len, mode=mode) def adjoin(self, space, *lists, **kwargs): - return com.adjoin(space, *lists, strlen=self.len, - justfunc=self.justify, **kwargs) + return adjoin(space, *lists, strlen=self.len, + justfunc=self.justify, **kwargs) class EastAsianTextAdjustment(TextAdjustment): @@ -541,8 +542,8 @@ def to_string(self): if len(frame.columns) == 0 or len(frame.index) == 0: info_line = (u('Empty %s\nColumns: %s\nIndex: %s') % (type(self.frame).__name__, - com.pprint_thing(frame.columns), - com.pprint_thing(frame.index))) + pprint_thing(frame.columns), + pprint_thing(frame.index))) text = info_line else: strcols = self._to_str_columns() @@ -908,7 +909,7 @@ def __init__(self, formatter, classes=None, max_rows=None, max_cols=None, self.notebook = notebook def write(self, s, indent=0): - rs = com.pprint_thing(s) + rs = pprint_thing(s) self.elements.append(' ' * indent + rs) def write_th(self, s, indent=0, tags=None): @@ -933,7 +934,7 @@ def _write_cell(self, s, kind='td', indent=0, tags=None): ('>', r'>')]) else: esc = {} - rs = com.pprint_thing(s, escape_chars=esc).strip() + rs = pprint_thing(s, escape_chars=esc).strip() self.write('%s%s' % (start_tag, rs, kind), indent) def write_tr(self, line, indent=0, indent_delta=4, header=False, @@ -1090,7 +1091,7 @@ def _column_header(): name = self.columns.names[lnum] row = [''] * (row_levels - 1) + ['' if name is None else - com.pprint_thing(name)] + pprint_thing(name)] if row == [""] and self.fmt.index is False: row = [] @@ -1803,7 +1804,7 @@ def _format_header_mi(self): else: # Format in legacy format with dots to indicate levels. for i, values in enumerate(zip(*level_strs)): - v = ".".join(map(com.pprint_thing, values)) + v = ".".join(map(pprint_thing, values)) yield ExcelCell(lnum, coloffset + i + 1, v, header_style) self.rowcounter = lnum @@ -2036,7 +2037,7 @@ def _format_strings(self): formatter = ( self.formatter if self.formatter is not None else - (lambda x: com.pprint_thing(x, escape_chars=('\t', '\r', '\n')))) + (lambda x: pprint_thing(x, escape_chars=('\t', '\r', '\n')))) def _format(x): if self.na_rep is not None and lib.checknull(x): diff --git a/pandas/formats/printing.py b/pandas/formats/printing.py new file mode 100644 index 0000000000000..a4eaec8d5334b --- /dev/null +++ b/pandas/formats/printing.py @@ -0,0 +1,235 @@ +""" +printing tools +""" + +from pandas import compat +from pandas.compat import u +import pandas.core.common as com +from pandas.core.config import get_option + + +def adjoin(space, *lists, **kwargs): + """ + Glues together two sets of strings using the amount of space requested. + The idea is to prettify. + + ---------- + space : int + number of spaces for padding + lists : str + list of str which being joined + strlen : callable + function used to calculate the length of each str. Needed for unicode + handling. + justfunc : callable + function used to justify str. Needed for unicode handling. + """ + strlen = kwargs.pop('strlen', len) + justfunc = kwargs.pop('justfunc', justify) + + out_lines = [] + newLists = [] + lengths = [max(map(strlen, x)) + space for x in lists[:-1]] + # not the last one + lengths.append(max(map(len, lists[-1]))) + maxLen = max(map(len, lists)) + for i, lst in enumerate(lists): + nl = justfunc(lst, lengths[i], mode='left') + nl.extend([' ' * lengths[i]] * (maxLen - len(lst))) + newLists.append(nl) + toJoin = zip(*newLists) + for lines in toJoin: + out_lines.append(_join_unicode(lines)) + return _join_unicode(out_lines, sep='\n') + + +def justify(texts, max_len, mode='right'): + """ + Perform ljust, center, rjust against string or list-like + """ + if mode == 'left': + return [x.ljust(max_len) for x in texts] + elif mode == 'center': + return [x.center(max_len) for x in texts] + else: + return [x.rjust(max_len) for x in texts] + + +def _join_unicode(lines, sep=''): + try: + return sep.join(lines) + except UnicodeDecodeError: + sep = compat.text_type(sep) + return sep.join([x.decode('utf-8') if isinstance(x, str) else x + for x in lines]) + + +# Unicode consolidation +# --------------------- +# +# pprinting utility functions for generating Unicode text or +# bytes(3.x)/str(2.x) representations of objects. +# Try to use these as much as possible rather then rolling your own. +# +# When to use +# ----------- +# +# 1) If you're writing code internal to pandas (no I/O directly involved), +# use pprint_thing(). +# +# It will always return unicode text which can handled by other +# parts of the package without breakage. +# +# 2) If you need to send something to the console, use console_encode(). +# +# console_encode() should (hopefully) choose the right encoding for you +# based on the encoding set in option "display.encoding" +# +# 3) if you need to write something out to file, use +# pprint_thing_encoded(encoding). +# +# If no encoding is specified, it defaults to utf-8. Since encoding pure +# ascii with utf-8 is a no-op you can safely use the default utf-8 if you're +# working with straight ascii. + + +def _pprint_seq(seq, _nest_lvl=0, max_seq_items=None, **kwds): + """ + internal. pprinter for iterables. you should probably use pprint_thing() + rather then calling this directly. + + bounds length of printed sequence, depending on options + """ + if isinstance(seq, set): + fmt = u("{%s}") + else: + fmt = u("[%s]") if hasattr(seq, '__setitem__') else u("(%s)") + + if max_seq_items is False: + nitems = len(seq) + else: + nitems = max_seq_items or get_option("max_seq_items") or len(seq) + + s = iter(seq) + r = [] + for i in range(min(nitems, len(seq))): # handle sets, no slicing + r.append(pprint_thing( + next(s), _nest_lvl + 1, max_seq_items=max_seq_items, **kwds)) + body = ", ".join(r) + + if nitems < len(seq): + body += ", ..." + elif isinstance(seq, tuple) and len(seq) == 1: + body += ',' + + return fmt % body + + +def _pprint_dict(seq, _nest_lvl=0, max_seq_items=None, **kwds): + """ + internal. pprinter for iterables. you should probably use pprint_thing() + rather then calling this directly. + """ + fmt = u("{%s}") + pairs = [] + + pfmt = u("%s: %s") + + if max_seq_items is False: + nitems = len(seq) + else: + nitems = max_seq_items or get_option("max_seq_items") or len(seq) + + for k, v in list(seq.items())[:nitems]: + pairs.append(pfmt % + (pprint_thing(k, _nest_lvl + 1, + max_seq_items=max_seq_items, **kwds), + pprint_thing(v, _nest_lvl + 1, + max_seq_items=max_seq_items, **kwds))) + + if nitems < len(seq): + return fmt % (", ".join(pairs) + ", ...") + else: + return fmt % ", ".join(pairs) + + +def pprint_thing(thing, _nest_lvl=0, escape_chars=None, default_escapes=False, + quote_strings=False, max_seq_items=None): + """ + This function is the sanctioned way of converting objects + to a unicode representation. + + properly handles nested sequences containing unicode strings + (unicode(object) does not) + + Parameters + ---------- + thing : anything to be formatted + _nest_lvl : internal use only. pprint_thing() is mutually-recursive + with pprint_sequence, this argument is used to keep track of the + current nesting level, and limit it. + escape_chars : list or dict, optional + Characters to escape. If a dict is passed the values are the + replacements + default_escapes : bool, default False + Whether the input escape characters replaces or adds to the defaults + max_seq_items : False, int, default None + Pass thru to other pretty printers to limit sequence printing + + Returns + ------- + result - unicode object on py2, str on py3. Always Unicode. + + """ + + def as_escaped_unicode(thing, escape_chars=escape_chars): + # Unicode is fine, else we try to decode using utf-8 and 'replace' + # if that's not it either, we have no way of knowing and the user + # should deal with it himself. + + try: + result = compat.text_type(thing) # we should try this first + except UnicodeDecodeError: + # either utf-8 or we replace errors + result = str(thing).decode('utf-8', "replace") + + translate = {'\t': r'\t', '\n': r'\n', '\r': r'\r', } + if isinstance(escape_chars, dict): + if default_escapes: + translate.update(escape_chars) + else: + translate = escape_chars + escape_chars = list(escape_chars.keys()) + else: + escape_chars = escape_chars or tuple() + for c in escape_chars: + result = result.replace(c, translate[c]) + + return compat.text_type(result) + + if (compat.PY3 and hasattr(thing, '__next__')) or hasattr(thing, 'next'): + return compat.text_type(thing) + elif (isinstance(thing, dict) and + _nest_lvl < get_option("display.pprint_nest_depth")): + result = _pprint_dict(thing, _nest_lvl, quote_strings=True, + max_seq_items=max_seq_items) + elif (com.is_sequence(thing) and + _nest_lvl < get_option("display.pprint_nest_depth")): + result = _pprint_seq(thing, _nest_lvl, escape_chars=escape_chars, + quote_strings=quote_strings, + max_seq_items=max_seq_items) + elif isinstance(thing, compat.string_types) and quote_strings: + if compat.PY3: + fmt = "'%s'" + else: + fmt = "u'%s'" + result = fmt % as_escaped_unicode(thing) + else: + result = as_escaped_unicode(thing) + + return compat.text_type(result) # always unicode + + +def pprint_thing_encoded(object, encoding='utf-8', errors='replace', **kwds): + value = pprint_thing(object) # get unicode representation of object + return value.encode(encoding, errors, **kwds) diff --git a/pandas/core/style.py b/pandas/formats/style.py similarity index 100% rename from pandas/core/style.py rename to pandas/formats/style.py diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index dedabd1126b09..94f85d40c73cc 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -11,7 +11,6 @@ from pandas.compat import range, u from pandas import compat -from pandas.core import algorithms from pandas.core.base import (PandasObject, FrozenList, FrozenNDArray, IndexOpsMixin) import pandas.core.base as base @@ -19,6 +18,8 @@ deprecate, deprecate_kwarg) import pandas.core.common as com import pandas.core.missing as missing +import pandas.core.algorithms as algos +from pandas.formats.printing import pprint_thing from pandas.core.common import (isnull, array_equivalent, is_object_dtype, is_datetimetz, ABCSeries, ABCPeriodIndex, ABCMultiIndex, @@ -33,8 +34,8 @@ # simplify default_pprint = lambda x, max_seq_items=None: \ - com.pprint_thing(x, escape_chars=('\t', '\r', '\n'), quote_strings=True, - max_seq_items=max_seq_items) + pprint_thing(x, escape_chars=('\t', '\r', '\n'), quote_strings=True, + max_seq_items=max_seq_items) __all__ = ['Index'] @@ -609,7 +610,7 @@ def _format_data(self): """ Return the formatted data as a unicode string """ - from pandas.core.format import get_console_size, _get_adjustment + from pandas.formats.format import get_console_size, _get_adjustment display_width, _ = get_console_size() if display_width is None: display_width = get_option('display.width') or 80 @@ -888,8 +889,8 @@ def summary(self, name=None): if (hasattr(tail, 'format') and not isinstance(tail, compat.string_types)): tail = tail.format() - index_summary = ', %s to %s' % (com.pprint_thing(head), - com.pprint_thing(tail)) + index_summary = ', %s to %s' % (pprint_thing(head), + pprint_thing(tail)) else: index_summary = '' @@ -1444,8 +1445,8 @@ def format(self, name=False, formatter=None, **kwargs): """ header = [] if name: - header.append(com.pprint_thing(self.name, - escape_chars=('\t', '\r', '\n')) if + header.append(pprint_thing(self.name, + escape_chars=('\t', '\r', '\n')) if self.name is not None else '') if formatter is not None: @@ -1456,7 +1457,7 @@ def format(self, name=False, formatter=None, **kwargs): def _format_with_header(self, header, na_rep='NaN', **kwargs): values = self.values - from pandas.core.format import format_array + from pandas.formats.format import format_array if is_categorical_dtype(values.dtype): values = np.array(values) @@ -1464,7 +1465,7 @@ def _format_with_header(self, header, na_rep='NaN', **kwargs): values = lib.maybe_convert_objects(values, safe=1) if is_object_dtype(values.dtype): - result = [com.pprint_thing(x, escape_chars=('\t', '\r', '\n')) + result = [pprint_thing(x, escape_chars=('\t', '\r', '\n')) for x in values] # could have nans @@ -1710,8 +1711,8 @@ def union(self, other): indexer, = (indexer == -1).nonzero() if len(indexer) > 0: - other_diff = com.take_nd(other._values, indexer, - allow_fill=False) + other_diff = algos.take_nd(other._values, indexer, + allow_fill=False) result = com._concat_compat((self.values, other_diff)) try: @@ -2227,7 +2228,7 @@ def isin(self, values, level=None): """ if level is not None: self._validate_index_level(level) - return algorithms.isin(np.array(self), values) + return algos.isin(np.array(self), values) def _can_reindex(self, indexer): """ @@ -2611,8 +2612,8 @@ def _get_leaf_sorter(labels): rev_indexer = lib.get_reverse_indexer(left_lev_indexer, len(old_level)) - new_lev_labels = com.take_nd(rev_indexer, left.labels[level], - allow_fill=False) + new_lev_labels = algos.take_nd(rev_indexer, left.labels[level], + allow_fill=False) new_labels = list(left.labels) new_labels[level] = new_lev_labels @@ -2654,9 +2655,9 @@ def _get_leaf_sorter(labels): names=left.names, verify_integrity=False) if right_lev_indexer is not None: - right_indexer = com.take_nd(right_lev_indexer, - join_index.labels[level], - allow_fill=False) + right_indexer = algos.take_nd(right_lev_indexer, + join_index.labels[level], + allow_fill=False) else: right_indexer = join_index.labels[level] diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index 773852f986fe1..b58c5382f628c 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -18,6 +18,8 @@ deprecate, deprecate_kwarg) import pandas.core.common as com import pandas.core.missing as missing +import pandas.core.algorithms as algos +from pandas.formats.printing import pprint_thing from pandas.core.common import (isnull, array_equivalent, is_object_dtype, _values_from_object, @@ -540,12 +542,12 @@ def values(self): box = hasattr(lev, '_box_values') # Try to minimize boxing. if box and len(lev) > len(lab): - taken = lev._box_values(com.take_1d(lev._values, lab)) + taken = lev._box_values(algos.take_1d(lev._values, lab)) elif box: - taken = com.take_1d(lev._box_values(lev._values), lab, - fill_value=_get_na_value(lev.dtype.type)) + taken = algos.take_1d(lev._box_values(lev._values), lab, + fill_value=_get_na_value(lev.dtype.type)) else: - taken = com.take_1d(np.asarray(lev._values), lab) + taken = algos.take_1d(np.asarray(lev._values), lab) values.append(taken) self._tuples = lib.fast_zip(values) @@ -661,8 +663,8 @@ def get_level_values(self, level): num = self._get_level_number(level) unique = self.levels[num] # .values labels = self.labels[num] - filled = com.take_1d(unique.values, labels, - fill_value=unique._na_value) + filled = algos.take_1d(unique.values, labels, + fill_value=unique._na_value) _simple_new = unique._simple_new values = _simple_new(filled, self.names[num], freq=getattr(unique, 'freq', None), @@ -691,9 +693,9 @@ def format(self, space=2, sparsify=None, adjoin=True, names=False, else: # weird all NA case - formatted = [com.pprint_thing(na if isnull(x) else x, - escape_chars=('\t', '\r', '\n')) - for x in com.take_1d(lev._values, lab)] + formatted = [pprint_thing(na if isnull(x) else x, + escape_chars=('\t', '\r', '\n')) + for x in algos.take_1d(lev._values, lab)] stringified_levels.append(formatted) result_levels = [] @@ -701,8 +703,8 @@ def format(self, space=2, sparsify=None, adjoin=True, names=False, level = [] if names: - level.append(com.pprint_thing(name, - escape_chars=('\t', '\r', '\n')) + level.append(pprint_thing(name, + escape_chars=('\t', '\r', '\n')) if name is not None else '') level.extend(np.array(lev, dtype=object)) @@ -723,7 +725,7 @@ def format(self, space=2, sparsify=None, adjoin=True, names=False, sentinel=sentinel) if adjoin: - from pandas.core.format import _get_adjustment + from pandas.formats.format import _get_adjustment adj = _get_adjustment() return adj.adjoin(space, *result_levels).split('\n') else: @@ -1957,10 +1959,10 @@ def equals(self, other): return False for i in range(self.nlevels): - svalues = com.take_nd(np.asarray(self.levels[i]._values), - self.labels[i], allow_fill=False) - ovalues = com.take_nd(np.asarray(other.levels[i]._values), - other.labels[i], allow_fill=False) + svalues = algos.take_nd(np.asarray(self.levels[i]._values), + self.labels[i], allow_fill=False) + ovalues = algos.take_nd(np.asarray(other.levels[i]._values), + other.labels[i], allow_fill=False) if not array_equivalent(svalues, ovalues): return False diff --git a/pandas/indexes/numeric.py b/pandas/indexes/numeric.py index 4b021c51456b9..79a9d0a584a42 100644 --- a/pandas/indexes/numeric.py +++ b/pandas/indexes/numeric.py @@ -292,7 +292,7 @@ def _convert_slice_indexer(self, key, kind=None): def _format_native_types(self, na_rep='', float_format=None, decimal='.', quoting=None, **kwargs): - from pandas.core.format import FloatArrayFormatter + from pandas.formats.format import FloatArrayFormatter formatter = FloatArrayFormatter(self.values, na_rep=na_rep, float_format=float_format, decimal=decimal, quoting=quoting, diff --git a/pandas/io/common.py b/pandas/io/common.py index 6a40cbcd71a65..e644f3a5f5090 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -9,7 +9,8 @@ from pandas.compat import StringIO, BytesIO, string_types, text_type from pandas import compat -from pandas.core.common import pprint_thing, is_number, AbstractMethodError +from pandas.formats.printing import pprint_thing +from pandas.core.common import is_number, AbstractMethodError try: diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 07078faef0266..0261e825d56e2 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -19,7 +19,7 @@ from pandas.compat import (map, zip, reduce, range, lrange, u, add_metaclass, string_types) from pandas.core import config -from pandas.core.common import pprint_thing +from pandas.formats.printing import pprint_thing import pandas.compat as compat import pandas.compat.openpyxl_compat as openpyxl_compat import pandas.core.common as com diff --git a/pandas/io/html.py b/pandas/io/html.py index b21f1ef7f160c..af4ecb2484797 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -19,6 +19,7 @@ from pandas.core import common as com from pandas import Series from pandas.core.common import AbstractMethodError +from pandas.formats.printing import pprint_thing _IMPORTS = False _HAS_BS4 = False @@ -683,7 +684,7 @@ def _parser_dispatch(flavor): def _print_as_set(s): - return '{%s}' % ', '.join([com.pprint_thing(el) for el in s]) + return '{%s}' % ', '.join([pprint_thing(el) for el in s]) def _validate_flavor(flavor): diff --git a/pandas/io/json.py b/pandas/io/json.py index 76cda87043a37..f06ec72062ffa 100644 --- a/pandas/io/json.py +++ b/pandas/io/json.py @@ -12,7 +12,7 @@ from pandas import Series, DataFrame, to_datetime from pandas.io.common import get_filepath_or_buffer from pandas.core.common import AbstractMethodError -import pandas.core.common as com +from pandas.formats.printing import pprint_thing loads = _json.loads dumps = _json.dumps @@ -266,7 +266,7 @@ def check_keys_split(self, decoded): if bad_keys: bad_keys = ", ".join(bad_keys) raise ValueError(u("JSON data had unexpected key(s): %s") % - com.pprint_thing(bad_keys)) + pprint_thing(bad_keys)) def parse(self): diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py index 3b1338df525b2..c19dae7f3545e 100644 --- a/pandas/io/pickle.py +++ b/pandas/io/pickle.py @@ -1,4 +1,9 @@ -from pandas.compat import cPickle as pkl, pickle_compat as pc, PY3 +""" pickle compat """ + +import numpy as np +from numpy.lib.format import read_array, write_array +from pandas.compat import BytesIO, cPickle as pkl, pickle_compat as pc, PY3 +import pandas.core.common as com def to_pickle(obj, path): @@ -62,3 +67,26 @@ def try_read(path, encoding=None): if PY3: return try_read(path, encoding='latin1') raise + +# compat with sparse pickle / unpickle + + +def _pickle_array(arr): + arr = arr.view(np.ndarray) + + buf = BytesIO() + write_array(buf, arr) + + return buf.getvalue() + + +def _unpickle_array(bytes): + arr = read_array(BytesIO(bytes)) + + # All datetimes should be stored as M8[ns]. When unpickling with + # numpy1.6, it will read these as M8[us]. So this ensures all + # datetime64 types are read as MS[ns] + if com.is_datetime64_dtype(arr): + arr = arr.view(com._NS_DTYPE) + + return arr diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 17bd2c97d618d..854843ffdd152 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -22,8 +22,8 @@ from pandas.tseries.api import PeriodIndex, DatetimeIndex from pandas.tseries.tdi import TimedeltaIndex from pandas.core.base import StringMixin -from pandas.core.common import (adjoin, pprint_thing, _asarray_tuplesafe, - PerformanceWarning) +from pandas.formats.printing import adjoin, pprint_thing +from pandas.core.common import _asarray_tuplesafe, PerformanceWarning from pandas.core.algorithms import match, unique from pandas.core.categorical import Categorical from pandas.core.internals import (BlockManager, make_block, @@ -3411,7 +3411,7 @@ def get_blk_items(mgr, blocks): except: raise ValueError( "cannot match existing table structure for [%s] on " - "appending data" % ','.join(com.pprint_thing(item) for + "appending data" % ','.join(pprint_thing(item) for item in items)) blocks = new_blocks blk_items = new_blk_items diff --git a/pandas/io/sql.py b/pandas/io/sql.py index addc88bebebe1..6e309e4210962 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -19,7 +19,7 @@ from pandas.core.api import DataFrame, Series from pandas.core.common import isnull from pandas.core.base import PandasObject -from pandas.core.dtypes import DatetimeTZDtype +from pandas.types.api import DatetimeTZDtype from pandas.tseries.tools import to_datetime from contextlib import contextmanager diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py index bbca8bffa2f3f..35ce0375ae438 100644 --- a/pandas/io/tests/test_excel.py +++ b/pandas/io/tests/test_excel.py @@ -1826,7 +1826,7 @@ def test_to_excel_styleconverter(self): self.assertEqual(kw['protection'], protection) def test_write_cells_merge_styled(self): - from pandas.core.format import ExcelCell + from pandas.formats.format import ExcelCell from openpyxl import styles sheet_name = 'merge_styled' @@ -1939,7 +1939,7 @@ def test_write_cells_merge_styled(self): if not openpyxl_compat.is_compat(major_ver=2): raise nose.SkipTest('incompatiable openpyxl version') - from pandas.core.format import ExcelCell + from pandas.formats.format import ExcelCell sheet_name = 'merge_styled' diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index 97adbcaa79469..92a59337b7e43 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -16,6 +16,7 @@ isnull) from pandas.compat import is_platform_windows, PY3, PY35 +from pandas.formats.printing import pprint_thing from pandas.io.pytables import _tables, TableIterator try: _tables() @@ -28,7 +29,6 @@ AttributeConflictWarning, DuplicateWarning, PossibleDataLossError, ClosedFileError) from pandas.io import pytables as pytables -import pandas.core.common as com import pandas.util.testing as tm from pandas.util.testing import (assert_panel4d_equal, assert_panel_equal, @@ -3806,9 +3806,9 @@ def test_string_select(self): expected = df[df.x != 'none'] assert_frame_equal(result, expected) except Exception as detail: - com.pprint_thing("[{0}]".format(detail)) - com.pprint_thing(store) - com.pprint_thing(expected) + pprint_thing("[{0}]".format(detail)) + pprint_thing(store) + pprint_thing(expected) df2 = df.copy() df2.loc[df2.x == '', 'x'] = np.nan diff --git a/pandas/sparse/array.py b/pandas/sparse/array.py index a01f9a96b227b..b8a66921fd01d 100644 --- a/pandas/sparse/array.py +++ b/pandas/sparse/array.py @@ -17,6 +17,7 @@ import pandas._sparse as splib import pandas.index as _index import pandas.core.ops as ops +import pandas.formats.printing as printing def _arith_method(op, name, str_rep=None, default_axis=None, fill_zeros=None, @@ -214,9 +215,9 @@ def __len__(self): return 0 def __unicode__(self): - return '%s\nFill: %s\n%s' % (com.pprint_thing(self), - com.pprint_thing(self.fill_value), - com.pprint_thing(self.sp_index)) + return '%s\nFill: %s\n%s' % (printing.pprint_thing(self), + printing.pprint_thing(self.fill_value), + printing.pprint_thing(self.sp_index)) def disable(self, other): raise NotImplementedError('inplace binary ops not supported') diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py index f9741217a024c..11947d780ad88 100644 --- a/pandas/sparse/frame.py +++ b/pandas/sparse/frame.py @@ -10,12 +10,13 @@ from pandas import compat import numpy as np -from pandas.core.common import isnull, _unpickle_array, _try_sort +from pandas.core.common import isnull, _try_sort from pandas.core.index import Index, MultiIndex, _ensure_index from pandas.core.series import Series from pandas.core.frame import (DataFrame, extract_index, _prep_ndarray, _default_index) import pandas.core.common as com +import pandas.core.algorithms as algos from pandas.core.internals import (BlockManager, create_block_manager_from_arrays) from pandas.core.generic import NDFrame @@ -216,11 +217,13 @@ def _unpickle_sparse_frame_compat(self, state): series, cols, idx, fv, kind = state if not isinstance(cols, Index): # pragma: no cover + from pandas.io.pickle import _unpickle_array columns = _unpickle_array(cols) else: columns = cols if not isinstance(idx, Index): # pragma: no cover + from pandas.io.pickle import _unpickle_array index = _unpickle_array(idx) else: index = idx @@ -593,9 +596,9 @@ def _reindex_with_indexers(self, reindexers, method=None, fill_value=None, if col not in self: continue if row_indexer is not None: - new_arrays[col] = com.take_1d(self[col].get_values(), - row_indexer, - fill_value=fill_value) + new_arrays[col] = algos.take_1d(self[col].get_values(), + row_indexer, + fill_value=fill_value) else: new_arrays[col] = self[col] diff --git a/pandas/sparse/list.py b/pandas/sparse/list.py index 6cfe1bc6a79a3..bc10b73a47723 100644 --- a/pandas/sparse/list.py +++ b/pandas/sparse/list.py @@ -1,6 +1,6 @@ import numpy as np from pandas.core.base import PandasObject -from pandas.core.common import pprint_thing +from pandas.formats.printing import pprint_thing from pandas.sparse.array import SparseArray import pandas._sparse as splib diff --git a/pandas/sparse/panel.py b/pandas/sparse/panel.py index 25b0e11448e97..88f396d20a91e 100644 --- a/pandas/sparse/panel.py +++ b/pandas/sparse/panel.py @@ -250,19 +250,21 @@ def __delitem__(self, key): def __getstate__(self): # pickling - return (self._frames, com._pickle_array(self.items), - com._pickle_array(self.major_axis), - com._pickle_array(self.minor_axis), self.default_fill_value, + from pandas.io.pickle import _pickle_array + return (self._frames, _pickle_array(self.items), + _pickle_array(self.major_axis), + _pickle_array(self.minor_axis), self.default_fill_value, self.default_kind) def __setstate__(self, state): frames, items, major, minor, fv, kind = state + from pandas.io.pickle import _unpickle_array self.default_fill_value = fv self.default_kind = kind - self._items = _ensure_index(com._unpickle_array(items)) - self._major_axis = _ensure_index(com._unpickle_array(major)) - self._minor_axis = _ensure_index(com._unpickle_array(minor)) + self._items = _ensure_index(_unpickle_array(items)) + self._major_axis = _ensure_index(_unpickle_array(major)) + self._minor_axis = _ensure_index(_unpickle_array(minor)) self._frames = frames def copy(self, deep=True): diff --git a/pandas/tests/formats/__init__.py b/pandas/tests/formats/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/data/unicode_series.csv b/pandas/tests/formats/data/unicode_series.csv similarity index 100% rename from pandas/tests/data/unicode_series.csv rename to pandas/tests/formats/data/unicode_series.csv diff --git a/pandas/tests/test_format.py b/pandas/tests/formats/test_format.py similarity index 99% rename from pandas/tests/test_format.py rename to pandas/tests/formats/test_format.py index 6b8104974cc09..ab547f943375f 100644 --- a/pandas/tests/test_format.py +++ b/pandas/tests/formats/test_format.py @@ -33,9 +33,10 @@ from pandas import DataFrame, Series, Index, Timestamp, MultiIndex, date_range, NaT -import pandas.core.format as fmt +import pandas.formats.format as fmt import pandas.util.testing as tm import pandas.core.common as com +import pandas.formats.printing as printing from pandas.util.terminal import get_terminal_size import pandas as pd from pandas.core.config import (set_option, get_option, option_context, @@ -213,13 +214,13 @@ def test_repr_chop_threshold(self): def test_repr_obeys_max_seq_limit(self): with option_context("display.max_seq_items", 2000): - self.assertTrue(len(com.pprint_thing(lrange(1000))) > 1000) + self.assertTrue(len(printing.pprint_thing(lrange(1000))) > 1000) with option_context("display.max_seq_items", 5): - self.assertTrue(len(com.pprint_thing(lrange(1000))) < 100) + self.assertTrue(len(printing.pprint_thing(lrange(1000))) < 100) def test_repr_set(self): - self.assertEqual(com.pprint_thing(set([1])), '{1}') + self.assertEqual(printing.pprint_thing(set([1])), '{1}') def test_repr_is_valid_construction_code(self): # for the case of Index, where the repr is traditional rather then @@ -321,7 +322,7 @@ def mkframe(n): df = mkframe((term_width // 7) - 2) self.assertFalse(has_expanded_repr(df)) df = mkframe((term_width // 7) + 2) - com.pprint_thing(df._repr_fits_horizontal_()) + printing.pprint_thing(df._repr_fits_horizontal_()) self.assertTrue(has_expanded_repr(df)) def test_str_max_colwidth(self): @@ -1556,7 +1557,7 @@ def test_frame_info_encoding(self): fmt.set_option('display.max_rows', 200) def test_pprint_thing(self): - from pandas.core.common import pprint_thing as pp_t + from pandas.formats.printing import pprint_thing as pp_t if PY3: raise nose.SkipTest("doesn't work on Python 3") diff --git a/pandas/tests/formats/test_printing.py b/pandas/tests/formats/test_printing.py new file mode 100644 index 0000000000000..3bcceca1f50a7 --- /dev/null +++ b/pandas/tests/formats/test_printing.py @@ -0,0 +1,142 @@ +# -*- coding: utf-8 -*- +import nose +from pandas import compat +import pandas.formats.printing as printing +import pandas.formats.format as fmt +import pandas.util.testing as tm +import pandas.core.config as cf + +_multiprocess_can_split_ = True + + +def test_adjoin(): + data = [['a', 'b', 'c'], ['dd', 'ee', 'ff'], ['ggg', 'hhh', 'iii']] + expected = 'a dd ggg\nb ee hhh\nc ff iii' + + adjoined = printing.adjoin(2, *data) + + assert (adjoined == expected) + + +def test_repr_binary_type(): + import string + letters = string.ascii_letters + btype = compat.binary_type + try: + raw = btype(letters, encoding=cf.get_option('display.encoding')) + except TypeError: + raw = btype(letters) + b = compat.text_type(compat.bytes_to_str(raw)) + res = printing.pprint_thing(b, quote_strings=True) + tm.assert_equal(res, repr(b)) + res = printing.pprint_thing(b, quote_strings=False) + tm.assert_equal(res, b) + + +class TestFormattBase(tm.TestCase): + + def test_adjoin(self): + data = [['a', 'b', 'c'], ['dd', 'ee', 'ff'], ['ggg', 'hhh', 'iii']] + expected = 'a dd ggg\nb ee hhh\nc ff iii' + + adjoined = printing.adjoin(2, *data) + + self.assertEqual(adjoined, expected) + + def test_adjoin_unicode(self): + data = [[u'あ', 'b', 'c'], ['dd', u'ええ', 'ff'], ['ggg', 'hhh', u'いいい']] + expected = u'あ dd ggg\nb ええ hhh\nc ff いいい' + adjoined = printing.adjoin(2, *data) + self.assertEqual(adjoined, expected) + + adj = fmt.EastAsianTextAdjustment() + + expected = u"""あ dd ggg +b ええ hhh +c ff いいい""" + + adjoined = adj.adjoin(2, *data) + self.assertEqual(adjoined, expected) + cols = adjoined.split('\n') + self.assertEqual(adj.len(cols[0]), 13) + self.assertEqual(adj.len(cols[1]), 13) + self.assertEqual(adj.len(cols[2]), 16) + + expected = u"""あ dd ggg +b ええ hhh +c ff いいい""" + + adjoined = adj.adjoin(7, *data) + self.assertEqual(adjoined, expected) + cols = adjoined.split('\n') + self.assertEqual(adj.len(cols[0]), 23) + self.assertEqual(adj.len(cols[1]), 23) + self.assertEqual(adj.len(cols[2]), 26) + + def test_justify(self): + adj = fmt.EastAsianTextAdjustment() + + def just(x, *args, **kwargs): + # wrapper to test single str + return adj.justify([x], *args, **kwargs)[0] + + self.assertEqual(just('abc', 5, mode='left'), 'abc ') + self.assertEqual(just('abc', 5, mode='center'), ' abc ') + self.assertEqual(just('abc', 5, mode='right'), ' abc') + self.assertEqual(just(u'abc', 5, mode='left'), 'abc ') + self.assertEqual(just(u'abc', 5, mode='center'), ' abc ') + self.assertEqual(just(u'abc', 5, mode='right'), ' abc') + + self.assertEqual(just(u'パンダ', 5, mode='left'), u'パンダ') + self.assertEqual(just(u'パンダ', 5, mode='center'), u'パンダ') + self.assertEqual(just(u'パンダ', 5, mode='right'), u'パンダ') + + self.assertEqual(just(u'パンダ', 10, mode='left'), u'パンダ ') + self.assertEqual(just(u'パンダ', 10, mode='center'), u' パンダ ') + self.assertEqual(just(u'パンダ', 10, mode='right'), u' パンダ') + + def test_east_asian_len(self): + adj = fmt.EastAsianTextAdjustment() + + self.assertEqual(adj.len('abc'), 3) + self.assertEqual(adj.len(u'abc'), 3) + + self.assertEqual(adj.len(u'パンダ'), 6) + self.assertEqual(adj.len(u'パンダ'), 5) + self.assertEqual(adj.len(u'パンダpanda'), 11) + self.assertEqual(adj.len(u'パンダpanda'), 10) + + def test_ambiguous_width(self): + adj = fmt.EastAsianTextAdjustment() + self.assertEqual(adj.len(u'¡¡ab'), 4) + + with cf.option_context('display.unicode.ambiguous_as_wide', True): + adj = fmt.EastAsianTextAdjustment() + self.assertEqual(adj.len(u'¡¡ab'), 6) + + data = [[u'あ', 'b', 'c'], ['dd', u'ええ', 'ff'], + ['ggg', u'¡¡ab', u'いいい']] + expected = u'あ dd ggg \nb ええ ¡¡ab\nc ff いいい' + adjoined = adj.adjoin(2, *data) + self.assertEqual(adjoined, expected) + + +# TODO: fix this broken test + +# def test_console_encode(): +# """ +# On Python 2, if sys.stdin.encoding is None (IPython with zmq frontend) +# common.console_encode should encode things as utf-8. +# """ +# if compat.PY3: +# raise nose.SkipTest + +# with tm.stdin_encoding(encoding=None): +# result = printing.console_encode(u"\u05d0") +# expected = u"\u05d0".encode('utf-8') +# assert (result == expected) + + +if __name__ == '__main__': + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], + exit=False) diff --git a/pandas/tests/test_style.py b/pandas/tests/formats/test_style.py similarity index 99% rename from pandas/tests/test_style.py rename to pandas/tests/formats/test_style.py index bfabaab8ad2f5..5a79e3f6897f0 100644 --- a/pandas/tests/test_style.py +++ b/pandas/tests/formats/test_style.py @@ -22,7 +22,7 @@ import jinja2 # noqa except ImportError: raise SkipTest("No Jinja2") -from pandas.core.style import Styler # noqa +from pandas.formats.style import Styler # noqa class TestStyler(TestCase): diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index a395c667188eb..dbb461a5c9e15 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -15,8 +15,8 @@ from pandas import (compat, isnull, notnull, DataFrame, Series, MultiIndex, date_range, Timestamp) import pandas as pd -import pandas.core.common as com import pandas.core.nanops as nanops +import pandas.formats.printing as printing from pandas.util.testing import (assert_almost_equal, assert_equal, @@ -882,14 +882,14 @@ def test_mode(self): # outputs in sorted order df["C"] = list(reversed(df["C"])) - com.pprint_thing(df["C"]) - com.pprint_thing(df["C"].mode()) + printing.pprint_thing(df["C"]) + printing.pprint_thing(df["C"].mode()) a, b = (df[["A", "B", "C"]].mode(), pd.DataFrame({"A": [12, np.nan], "B": [10, np.nan], "C": [8, 9]})) - com.pprint_thing(a) - com.pprint_thing(b) + printing.pprint_thing(a) + printing.pprint_thing(b) assert_frame_equal(a, b) # should work with heterogeneous types df = pd.DataFrame({"A": np.arange(6, dtype='int64'), diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 4a7c5c3b79de8..083da2a040ed5 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -25,7 +25,7 @@ import pandas.core.common as com import pandas.lib as lib -from pandas.core.dtypes import DatetimeTZDtype +from pandas.types.api import DatetimeTZDtype from pandas.util.testing import (assert_numpy_array_equal, assert_series_equal, diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index eedcce82c733d..3c4054b247e0e 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -15,6 +15,7 @@ from pandas import (DataFrame, Series, MultiIndex, Timestamp, date_range) import pandas.core.common as com +import pandas.formats.printing as printing import pandas as pd from pandas.util.testing import (assert_numpy_array_equal, @@ -411,7 +412,7 @@ def test_arith_flex_frame(self): assert_frame_equal(result, exp) _check_mixed_int(result, dtype=dtype) except: - com.pprint_thing("Failing operation %r" % op) + printing.pprint_thing("Failing operation %r" % op) raise # ndim >= 3 diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index 07446d32c55fb..3d4be319092c3 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -11,7 +11,7 @@ from pandas import (DataFrame, compat, option_context) from pandas.compat import StringIO, lrange, u -import pandas.core.format as fmt +import pandas.formats.format as fmt import pandas as pd from numpy.testing.decorators import slow diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index e5be2bb08f605..4b8b5ae2571d0 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -21,6 +21,7 @@ from pandas.util.testing import (assert_almost_equal, assert_series_equal, assert_frame_equal, assert_panel_equal, assert_attr_equal) +from pandas.formats.printing import pprint_thing from pandas import concat, lib from pandas.core.common import PerformanceWarning @@ -182,7 +183,7 @@ def _print(result, error=None): "key1->(%-4.4s),key2->(%-4.4s),axis->%s] %s" % (name, result, t, o, method1, method2, a, error or '')) if _verbose: - com.pprint_thing(v) + pprint_thing(v) try: # if (name == 'bool' and t == 'empty' and o == 'series' and diff --git a/pandas/tests/series/test_misc_api.py b/pandas/tests/series/test_misc_api.py index ffb360c5871c7..9f5433782b062 100644 --- a/pandas/tests/series/test_misc_api.py +++ b/pandas/tests/series/test_misc_api.py @@ -6,10 +6,10 @@ from pandas import Index, Series, DataFrame, date_range from pandas.tseries.index import Timestamp -import pandas.core.common as com from pandas.compat import range from pandas import compat +import pandas.formats.printing as printing from pandas.util.testing import (assert_series_equal, ensure_clean) import pandas.util.testing as tm @@ -37,7 +37,7 @@ def test_copy_index_name_checking(self): cp = self.ts.copy() cp.index.name = 'foo' - com.pprint_thing(self.ts.index.name) + printing.pprint_thing(self.ts.index.name) self.assertIsNone(self.ts.index.name) def test_append_preserve_name(self): diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index d24e1eab1cea8..880145715ce62 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -4,18 +4,16 @@ import re import nose -from nose.tools import assert_equal, assert_true import numpy as np import pandas as pd from pandas.tslib import iNaT, NaT from pandas import (Series, DataFrame, date_range, DatetimeIndex, TimedeltaIndex, Timestamp, Float64Index) from pandas import compat -from pandas.compat import range, long, lrange, lmap, u +from pandas.compat import range, lrange, lmap, u from pandas.core.common import notnull, isnull, array_equivalent import pandas.core.common as com import pandas.core.convert as convert -import pandas.core.format as fmt import pandas.util.testing as tm import pandas.core.config as cf @@ -70,40 +68,6 @@ def __call__(self): assert getname(1) is None -# Issue 10859 -class TestABCClasses(tm.TestCase): - tuples = [[1, 2, 2], ['red', 'blue', 'red']] - multi_index = pd.MultiIndex.from_arrays(tuples, names=('number', 'color')) - datetime_index = pd.to_datetime(['2000/1/1', '2010/1/1']) - timedelta_index = pd.to_timedelta(np.arange(5), unit='s') - period_index = pd.period_range('2000/1/1', '2010/1/1/', freq='M') - categorical = pd.Categorical([1, 2, 3], categories=[2, 3, 1]) - categorical_df = pd.DataFrame({"values": [1, 2, 3]}, index=categorical) - df = pd.DataFrame({'names': ['a', 'b', 'c']}, index=multi_index) - sparse_series = pd.Series([1, 2, 3]).to_sparse() - sparse_array = pd.SparseArray(np.random.randn(10)) - - def test_abc_types(self): - self.assertIsInstance(pd.Index(['a', 'b', 'c']), com.ABCIndex) - self.assertIsInstance(pd.Int64Index([1, 2, 3]), com.ABCInt64Index) - self.assertIsInstance(pd.Float64Index([1, 2, 3]), com.ABCFloat64Index) - self.assertIsInstance(self.multi_index, com.ABCMultiIndex) - self.assertIsInstance(self.datetime_index, com.ABCDatetimeIndex) - self.assertIsInstance(self.timedelta_index, com.ABCTimedeltaIndex) - self.assertIsInstance(self.period_index, com.ABCPeriodIndex) - self.assertIsInstance(self.categorical_df.index, - com.ABCCategoricalIndex) - self.assertIsInstance(pd.Index(['a', 'b', 'c']), com.ABCIndexClass) - self.assertIsInstance(pd.Int64Index([1, 2, 3]), com.ABCIndexClass) - self.assertIsInstance(pd.Series([1, 2, 3]), com.ABCSeries) - self.assertIsInstance(self.df, com.ABCDataFrame) - self.assertIsInstance(self.df.to_panel(), com.ABCPanel) - self.assertIsInstance(self.sparse_series, com.ABCSparseSeries) - self.assertIsInstance(self.sparse_array, com.ABCSparseArray) - self.assertIsInstance(self.categorical, com.ABCCategorical) - self.assertIsInstance(pd.Period('2012', freq='A-DEC'), com.ABCPeriod) - - class TestInferDtype(tm.TestCase): def test_infer_dtype_from_scalar(self): @@ -408,118 +372,6 @@ def test_all_not_none(): assert (not com._all_not_none(None, None, None, None)) -def test_repr_binary_type(): - import string - letters = string.ascii_letters - btype = compat.binary_type - try: - raw = btype(letters, encoding=cf.get_option('display.encoding')) - except TypeError: - raw = btype(letters) - b = compat.text_type(compat.bytes_to_str(raw)) - res = com.pprint_thing(b, quote_strings=True) - assert_equal(res, repr(b)) - res = com.pprint_thing(b, quote_strings=False) - assert_equal(res, b) - - -def test_adjoin(): - data = [['a', 'b', 'c'], ['dd', 'ee', 'ff'], ['ggg', 'hhh', 'iii']] - expected = 'a dd ggg\nb ee hhh\nc ff iii' - - adjoined = com.adjoin(2, *data) - - assert (adjoined == expected) - - -class TestFormattBase(tm.TestCase): - - def test_adjoin(self): - data = [['a', 'b', 'c'], ['dd', 'ee', 'ff'], ['ggg', 'hhh', 'iii']] - expected = 'a dd ggg\nb ee hhh\nc ff iii' - - adjoined = com.adjoin(2, *data) - - self.assertEqual(adjoined, expected) - - def test_adjoin_unicode(self): - data = [[u'あ', 'b', 'c'], ['dd', u'ええ', 'ff'], ['ggg', 'hhh', u'いいい']] - expected = u'あ dd ggg\nb ええ hhh\nc ff いいい' - adjoined = com.adjoin(2, *data) - self.assertEqual(adjoined, expected) - - adj = fmt.EastAsianTextAdjustment() - - expected = u"""あ dd ggg -b ええ hhh -c ff いいい""" - - adjoined = adj.adjoin(2, *data) - self.assertEqual(adjoined, expected) - cols = adjoined.split('\n') - self.assertEqual(adj.len(cols[0]), 13) - self.assertEqual(adj.len(cols[1]), 13) - self.assertEqual(adj.len(cols[2]), 16) - - expected = u"""あ dd ggg -b ええ hhh -c ff いいい""" - - adjoined = adj.adjoin(7, *data) - self.assertEqual(adjoined, expected) - cols = adjoined.split('\n') - self.assertEqual(adj.len(cols[0]), 23) - self.assertEqual(adj.len(cols[1]), 23) - self.assertEqual(adj.len(cols[2]), 26) - - def test_justify(self): - adj = fmt.EastAsianTextAdjustment() - - def just(x, *args, **kwargs): - # wrapper to test single str - return adj.justify([x], *args, **kwargs)[0] - - self.assertEqual(just('abc', 5, mode='left'), 'abc ') - self.assertEqual(just('abc', 5, mode='center'), ' abc ') - self.assertEqual(just('abc', 5, mode='right'), ' abc') - self.assertEqual(just(u'abc', 5, mode='left'), 'abc ') - self.assertEqual(just(u'abc', 5, mode='center'), ' abc ') - self.assertEqual(just(u'abc', 5, mode='right'), ' abc') - - self.assertEqual(just(u'パンダ', 5, mode='left'), u'パンダ') - self.assertEqual(just(u'パンダ', 5, mode='center'), u'パンダ') - self.assertEqual(just(u'パンダ', 5, mode='right'), u'パンダ') - - self.assertEqual(just(u'パンダ', 10, mode='left'), u'パンダ ') - self.assertEqual(just(u'パンダ', 10, mode='center'), u' パンダ ') - self.assertEqual(just(u'パンダ', 10, mode='right'), u' パンダ') - - def test_east_asian_len(self): - adj = fmt.EastAsianTextAdjustment() - - self.assertEqual(adj.len('abc'), 3) - self.assertEqual(adj.len(u'abc'), 3) - - self.assertEqual(adj.len(u'パンダ'), 6) - self.assertEqual(adj.len(u'パンダ'), 5) - self.assertEqual(adj.len(u'パンダpanda'), 11) - self.assertEqual(adj.len(u'パンダpanda'), 10) - - def test_ambiguous_width(self): - adj = fmt.EastAsianTextAdjustment() - self.assertEqual(adj.len(u'¡¡ab'), 4) - - with cf.option_context('display.unicode.ambiguous_as_wide', True): - adj = fmt.EastAsianTextAdjustment() - self.assertEqual(adj.len(u'¡¡ab'), 6) - - data = [[u'あ', 'b', 'c'], ['dd', u'ええ', 'ff'], - ['ggg', u'¡¡ab', u'いいい']] - expected = u'あ dd ggg \nb ええ ¡¡ab\nc ff いいい' - adjoined = adj.adjoin(2, *data) - self.assertEqual(adjoined, expected) - - def test_iterpairs(): data = [1, 2, 3, 4] expected = [(1, 2), (2, 3), (3, 4)] @@ -559,18 +411,6 @@ def test_locs(mask): test_locs([1]) -def test_indent(): - s = 'a b c\nd e f' - result = com.indent(s, spaces=6) - - assert (result == ' a b c\n d e f') - - -def test_banner(): - ban = com.banner('hi') - assert (ban == ('%s\nhi\n%s' % ('=' * 80, '=' * 80))) - - def test_map_indices_py(): data = [4, 3, 2, 1] expected = {4: 0, 3: 1, 2: 2, 1: 3} @@ -732,21 +572,6 @@ def test_ensure_platform_int(): pi = com._ensure_platform_int(x) assert (pi.dtype == np.int_) -# TODO: fix this broken test - -# def test_console_encode(): -# """ -# On Python 2, if sys.stdin.encoding is None (IPython with zmq frontend) -# common.console_encode should encode things as utf-8. -# """ -# if compat.PY3: -# raise nose.SkipTest - -# with tm.stdin_encoding(encoding=None): -# result = com.console_encode(u"\u05d0") -# expected = u"\u05d0".encode('utf-8') -# assert (result == expected) - def test_is_re(): passes = re.compile('ad'), @@ -775,11 +600,11 @@ def test_random_state(): import numpy.random as npr # Check with seed state = com._random_state(5) - assert_equal(state.uniform(), npr.RandomState(5).uniform()) + tm.assert_equal(state.uniform(), npr.RandomState(5).uniform()) # Check with random state object state2 = npr.RandomState(10) - assert_equal( + tm.assert_equal( com._random_state(state2).uniform(), npr.RandomState(10).uniform()) # check with no arg random state @@ -818,434 +643,6 @@ def test_maybe_match_name(): assert (matched == 'y') -class TestTake(tm.TestCase): - # standard incompatible fill error - fill_error = re.compile("Incompatible type for fill_value") - - _multiprocess_can_split_ = True - - def test_1d_with_out(self): - def _test_dtype(dtype, can_hold_na, writeable=True): - data = np.random.randint(0, 2, 4).astype(dtype) - data.flags.writeable = writeable - - indexer = [2, 1, 0, 1] - out = np.empty(4, dtype=dtype) - com.take_1d(data, indexer, out=out) - expected = data.take(indexer) - tm.assert_almost_equal(out, expected) - - indexer = [2, 1, 0, -1] - out = np.empty(4, dtype=dtype) - if can_hold_na: - com.take_1d(data, indexer, out=out) - expected = data.take(indexer) - expected[3] = np.nan - tm.assert_almost_equal(out, expected) - else: - with tm.assertRaisesRegexp(TypeError, self.fill_error): - com.take_1d(data, indexer, out=out) - # no exception o/w - data.take(indexer, out=out) - - for writeable in [True, False]: - # Check that take_nd works both with writeable arrays (in which - # case fast typed memoryviews implementation) and read-only - # arrays alike. - _test_dtype(np.float64, True, writeable=writeable) - _test_dtype(np.float32, True, writeable=writeable) - _test_dtype(np.uint64, False, writeable=writeable) - _test_dtype(np.uint32, False, writeable=writeable) - _test_dtype(np.uint16, False, writeable=writeable) - _test_dtype(np.uint8, False, writeable=writeable) - _test_dtype(np.int64, False, writeable=writeable) - _test_dtype(np.int32, False, writeable=writeable) - _test_dtype(np.int16, False, writeable=writeable) - _test_dtype(np.int8, False, writeable=writeable) - _test_dtype(np.object_, True, writeable=writeable) - _test_dtype(np.bool, False, writeable=writeable) - - def test_1d_fill_nonna(self): - def _test_dtype(dtype, fill_value, out_dtype): - data = np.random.randint(0, 2, 4).astype(dtype) - - indexer = [2, 1, 0, -1] - - result = com.take_1d(data, indexer, fill_value=fill_value) - assert ((result[[0, 1, 2]] == data[[2, 1, 0]]).all()) - assert (result[3] == fill_value) - assert (result.dtype == out_dtype) - - indexer = [2, 1, 0, 1] - - result = com.take_1d(data, indexer, fill_value=fill_value) - assert ((result[[0, 1, 2, 3]] == data[indexer]).all()) - assert (result.dtype == dtype) - - _test_dtype(np.int8, np.int16(127), np.int8) - _test_dtype(np.int8, np.int16(128), np.int16) - _test_dtype(np.int32, 1, np.int32) - _test_dtype(np.int32, 2.0, np.float64) - _test_dtype(np.int32, 3.0 + 4.0j, np.complex128) - _test_dtype(np.int32, True, np.object_) - _test_dtype(np.int32, '', np.object_) - _test_dtype(np.float64, 1, np.float64) - _test_dtype(np.float64, 2.0, np.float64) - _test_dtype(np.float64, 3.0 + 4.0j, np.complex128) - _test_dtype(np.float64, True, np.object_) - _test_dtype(np.float64, '', np.object_) - _test_dtype(np.complex128, 1, np.complex128) - _test_dtype(np.complex128, 2.0, np.complex128) - _test_dtype(np.complex128, 3.0 + 4.0j, np.complex128) - _test_dtype(np.complex128, True, np.object_) - _test_dtype(np.complex128, '', np.object_) - _test_dtype(np.bool_, 1, np.object_) - _test_dtype(np.bool_, 2.0, np.object_) - _test_dtype(np.bool_, 3.0 + 4.0j, np.object_) - _test_dtype(np.bool_, True, np.bool_) - _test_dtype(np.bool_, '', np.object_) - - def test_2d_with_out(self): - def _test_dtype(dtype, can_hold_na, writeable=True): - data = np.random.randint(0, 2, (5, 3)).astype(dtype) - data.flags.writeable = writeable - - indexer = [2, 1, 0, 1] - out0 = np.empty((4, 3), dtype=dtype) - out1 = np.empty((5, 4), dtype=dtype) - com.take_nd(data, indexer, out=out0, axis=0) - com.take_nd(data, indexer, out=out1, axis=1) - expected0 = data.take(indexer, axis=0) - expected1 = data.take(indexer, axis=1) - tm.assert_almost_equal(out0, expected0) - tm.assert_almost_equal(out1, expected1) - - indexer = [2, 1, 0, -1] - out0 = np.empty((4, 3), dtype=dtype) - out1 = np.empty((5, 4), dtype=dtype) - if can_hold_na: - com.take_nd(data, indexer, out=out0, axis=0) - com.take_nd(data, indexer, out=out1, axis=1) - expected0 = data.take(indexer, axis=0) - expected1 = data.take(indexer, axis=1) - expected0[3, :] = np.nan - expected1[:, 3] = np.nan - tm.assert_almost_equal(out0, expected0) - tm.assert_almost_equal(out1, expected1) - else: - for i, out in enumerate([out0, out1]): - with tm.assertRaisesRegexp(TypeError, self.fill_error): - com.take_nd(data, indexer, out=out, axis=i) - # no exception o/w - data.take(indexer, out=out, axis=i) - - for writeable in [True, False]: - # Check that take_nd works both with writeable arrays (in which - # case fast typed memoryviews implementation) and read-only - # arrays alike. - _test_dtype(np.float64, True, writeable=writeable) - _test_dtype(np.float32, True, writeable=writeable) - _test_dtype(np.uint64, False, writeable=writeable) - _test_dtype(np.uint32, False, writeable=writeable) - _test_dtype(np.uint16, False, writeable=writeable) - _test_dtype(np.uint8, False, writeable=writeable) - _test_dtype(np.int64, False, writeable=writeable) - _test_dtype(np.int32, False, writeable=writeable) - _test_dtype(np.int16, False, writeable=writeable) - _test_dtype(np.int8, False, writeable=writeable) - _test_dtype(np.object_, True, writeable=writeable) - _test_dtype(np.bool, False, writeable=writeable) - - def test_2d_fill_nonna(self): - def _test_dtype(dtype, fill_value, out_dtype): - data = np.random.randint(0, 2, (5, 3)).astype(dtype) - - indexer = [2, 1, 0, -1] - - result = com.take_nd(data, indexer, axis=0, fill_value=fill_value) - assert ((result[[0, 1, 2], :] == data[[2, 1, 0], :]).all()) - assert ((result[3, :] == fill_value).all()) - assert (result.dtype == out_dtype) - - result = com.take_nd(data, indexer, axis=1, fill_value=fill_value) - assert ((result[:, [0, 1, 2]] == data[:, [2, 1, 0]]).all()) - assert ((result[:, 3] == fill_value).all()) - assert (result.dtype == out_dtype) - - indexer = [2, 1, 0, 1] - - result = com.take_nd(data, indexer, axis=0, fill_value=fill_value) - assert ((result[[0, 1, 2, 3], :] == data[indexer, :]).all()) - assert (result.dtype == dtype) - - result = com.take_nd(data, indexer, axis=1, fill_value=fill_value) - assert ((result[:, [0, 1, 2, 3]] == data[:, indexer]).all()) - assert (result.dtype == dtype) - - _test_dtype(np.int8, np.int16(127), np.int8) - _test_dtype(np.int8, np.int16(128), np.int16) - _test_dtype(np.int32, 1, np.int32) - _test_dtype(np.int32, 2.0, np.float64) - _test_dtype(np.int32, 3.0 + 4.0j, np.complex128) - _test_dtype(np.int32, True, np.object_) - _test_dtype(np.int32, '', np.object_) - _test_dtype(np.float64, 1, np.float64) - _test_dtype(np.float64, 2.0, np.float64) - _test_dtype(np.float64, 3.0 + 4.0j, np.complex128) - _test_dtype(np.float64, True, np.object_) - _test_dtype(np.float64, '', np.object_) - _test_dtype(np.complex128, 1, np.complex128) - _test_dtype(np.complex128, 2.0, np.complex128) - _test_dtype(np.complex128, 3.0 + 4.0j, np.complex128) - _test_dtype(np.complex128, True, np.object_) - _test_dtype(np.complex128, '', np.object_) - _test_dtype(np.bool_, 1, np.object_) - _test_dtype(np.bool_, 2.0, np.object_) - _test_dtype(np.bool_, 3.0 + 4.0j, np.object_) - _test_dtype(np.bool_, True, np.bool_) - _test_dtype(np.bool_, '', np.object_) - - def test_3d_with_out(self): - def _test_dtype(dtype, can_hold_na): - data = np.random.randint(0, 2, (5, 4, 3)).astype(dtype) - - indexer = [2, 1, 0, 1] - out0 = np.empty((4, 4, 3), dtype=dtype) - out1 = np.empty((5, 4, 3), dtype=dtype) - out2 = np.empty((5, 4, 4), dtype=dtype) - com.take_nd(data, indexer, out=out0, axis=0) - com.take_nd(data, indexer, out=out1, axis=1) - com.take_nd(data, indexer, out=out2, axis=2) - expected0 = data.take(indexer, axis=0) - expected1 = data.take(indexer, axis=1) - expected2 = data.take(indexer, axis=2) - tm.assert_almost_equal(out0, expected0) - tm.assert_almost_equal(out1, expected1) - tm.assert_almost_equal(out2, expected2) - - indexer = [2, 1, 0, -1] - out0 = np.empty((4, 4, 3), dtype=dtype) - out1 = np.empty((5, 4, 3), dtype=dtype) - out2 = np.empty((5, 4, 4), dtype=dtype) - if can_hold_na: - com.take_nd(data, indexer, out=out0, axis=0) - com.take_nd(data, indexer, out=out1, axis=1) - com.take_nd(data, indexer, out=out2, axis=2) - expected0 = data.take(indexer, axis=0) - expected1 = data.take(indexer, axis=1) - expected2 = data.take(indexer, axis=2) - expected0[3, :, :] = np.nan - expected1[:, 3, :] = np.nan - expected2[:, :, 3] = np.nan - tm.assert_almost_equal(out0, expected0) - tm.assert_almost_equal(out1, expected1) - tm.assert_almost_equal(out2, expected2) - else: - for i, out in enumerate([out0, out1, out2]): - with tm.assertRaisesRegexp(TypeError, self.fill_error): - com.take_nd(data, indexer, out=out, axis=i) - # no exception o/w - data.take(indexer, out=out, axis=i) - - _test_dtype(np.float64, True) - _test_dtype(np.float32, True) - _test_dtype(np.uint64, False) - _test_dtype(np.uint32, False) - _test_dtype(np.uint16, False) - _test_dtype(np.uint8, False) - _test_dtype(np.int64, False) - _test_dtype(np.int32, False) - _test_dtype(np.int16, False) - _test_dtype(np.int8, False) - _test_dtype(np.object_, True) - _test_dtype(np.bool, False) - - def test_3d_fill_nonna(self): - def _test_dtype(dtype, fill_value, out_dtype): - data = np.random.randint(0, 2, (5, 4, 3)).astype(dtype) - - indexer = [2, 1, 0, -1] - - result = com.take_nd(data, indexer, axis=0, fill_value=fill_value) - assert ((result[[0, 1, 2], :, :] == data[[2, 1, 0], :, :]).all()) - assert ((result[3, :, :] == fill_value).all()) - assert (result.dtype == out_dtype) - - result = com.take_nd(data, indexer, axis=1, fill_value=fill_value) - assert ((result[:, [0, 1, 2], :] == data[:, [2, 1, 0], :]).all()) - assert ((result[:, 3, :] == fill_value).all()) - assert (result.dtype == out_dtype) - - result = com.take_nd(data, indexer, axis=2, fill_value=fill_value) - assert ((result[:, :, [0, 1, 2]] == data[:, :, [2, 1, 0]]).all()) - assert ((result[:, :, 3] == fill_value).all()) - assert (result.dtype == out_dtype) - - indexer = [2, 1, 0, 1] - - result = com.take_nd(data, indexer, axis=0, fill_value=fill_value) - assert ((result[[0, 1, 2, 3], :, :] == data[indexer, :, :]).all()) - assert (result.dtype == dtype) - - result = com.take_nd(data, indexer, axis=1, fill_value=fill_value) - assert ((result[:, [0, 1, 2, 3], :] == data[:, indexer, :]).all()) - assert (result.dtype == dtype) - - result = com.take_nd(data, indexer, axis=2, fill_value=fill_value) - assert ((result[:, :, [0, 1, 2, 3]] == data[:, :, indexer]).all()) - assert (result.dtype == dtype) - - _test_dtype(np.int8, np.int16(127), np.int8) - _test_dtype(np.int8, np.int16(128), np.int16) - _test_dtype(np.int32, 1, np.int32) - _test_dtype(np.int32, 2.0, np.float64) - _test_dtype(np.int32, 3.0 + 4.0j, np.complex128) - _test_dtype(np.int32, True, np.object_) - _test_dtype(np.int32, '', np.object_) - _test_dtype(np.float64, 1, np.float64) - _test_dtype(np.float64, 2.0, np.float64) - _test_dtype(np.float64, 3.0 + 4.0j, np.complex128) - _test_dtype(np.float64, True, np.object_) - _test_dtype(np.float64, '', np.object_) - _test_dtype(np.complex128, 1, np.complex128) - _test_dtype(np.complex128, 2.0, np.complex128) - _test_dtype(np.complex128, 3.0 + 4.0j, np.complex128) - _test_dtype(np.complex128, True, np.object_) - _test_dtype(np.complex128, '', np.object_) - _test_dtype(np.bool_, 1, np.object_) - _test_dtype(np.bool_, 2.0, np.object_) - _test_dtype(np.bool_, 3.0 + 4.0j, np.object_) - _test_dtype(np.bool_, True, np.bool_) - _test_dtype(np.bool_, '', np.object_) - - def test_1d_other_dtypes(self): - arr = np.random.randn(10).astype(np.float32) - - indexer = [1, 2, 3, -1] - result = com.take_1d(arr, indexer) - expected = arr.take(indexer) - expected[-1] = np.nan - tm.assert_almost_equal(result, expected) - - def test_2d_other_dtypes(self): - arr = np.random.randn(10, 5).astype(np.float32) - - indexer = [1, 2, 3, -1] - - # axis=0 - result = com.take_nd(arr, indexer, axis=0) - expected = arr.take(indexer, axis=0) - expected[-1] = np.nan - tm.assert_almost_equal(result, expected) - - # axis=1 - result = com.take_nd(arr, indexer, axis=1) - expected = arr.take(indexer, axis=1) - expected[:, -1] = np.nan - tm.assert_almost_equal(result, expected) - - def test_1d_bool(self): - arr = np.array([0, 1, 0], dtype=bool) - - result = com.take_1d(arr, [0, 2, 2, 1]) - expected = arr.take([0, 2, 2, 1]) - self.assert_numpy_array_equal(result, expected) - - result = com.take_1d(arr, [0, 2, -1]) - self.assertEqual(result.dtype, np.object_) - - def test_2d_bool(self): - arr = np.array([[0, 1, 0], [1, 0, 1], [0, 1, 1]], dtype=bool) - - result = com.take_nd(arr, [0, 2, 2, 1]) - expected = arr.take([0, 2, 2, 1], axis=0) - self.assert_numpy_array_equal(result, expected) - - result = com.take_nd(arr, [0, 2, 2, 1], axis=1) - expected = arr.take([0, 2, 2, 1], axis=1) - self.assert_numpy_array_equal(result, expected) - - result = com.take_nd(arr, [0, 2, -1]) - self.assertEqual(result.dtype, np.object_) - - def test_2d_float32(self): - arr = np.random.randn(4, 3).astype(np.float32) - indexer = [0, 2, -1, 1, -1] - - # axis=0 - result = com.take_nd(arr, indexer, axis=0) - result2 = np.empty_like(result) - com.take_nd(arr, indexer, axis=0, out=result2) - tm.assert_almost_equal(result, result2) - - expected = arr.take(indexer, axis=0) - expected[[2, 4], :] = np.nan - tm.assert_almost_equal(result, expected) - - # this now accepts a float32! # test with float64 out buffer - out = np.empty((len(indexer), arr.shape[1]), dtype='float32') - com.take_nd(arr, indexer, out=out) # it works! - - # axis=1 - result = com.take_nd(arr, indexer, axis=1) - result2 = np.empty_like(result) - com.take_nd(arr, indexer, axis=1, out=result2) - tm.assert_almost_equal(result, result2) - - expected = arr.take(indexer, axis=1) - expected[:, [2, 4]] = np.nan - tm.assert_almost_equal(result, expected) - - def test_2d_datetime64(self): - # 2005/01/01 - 2006/01/01 - arr = np.random.randint( - long(11045376), long(11360736), (5, 3)) * 100000000000 - arr = arr.view(dtype='datetime64[ns]') - indexer = [0, 2, -1, 1, -1] - - # axis=0 - result = com.take_nd(arr, indexer, axis=0) - result2 = np.empty_like(result) - com.take_nd(arr, indexer, axis=0, out=result2) - tm.assert_almost_equal(result, result2) - - expected = arr.take(indexer, axis=0) - expected.view(np.int64)[[2, 4], :] = iNaT - tm.assert_almost_equal(result, expected) - - result = com.take_nd(arr, indexer, axis=0, - fill_value=datetime(2007, 1, 1)) - result2 = np.empty_like(result) - com.take_nd(arr, indexer, out=result2, axis=0, - fill_value=datetime(2007, 1, 1)) - tm.assert_almost_equal(result, result2) - - expected = arr.take(indexer, axis=0) - expected[[2, 4], :] = datetime(2007, 1, 1) - tm.assert_almost_equal(result, expected) - - # axis=1 - result = com.take_nd(arr, indexer, axis=1) - result2 = np.empty_like(result) - com.take_nd(arr, indexer, axis=1, out=result2) - tm.assert_almost_equal(result, result2) - - expected = arr.take(indexer, axis=1) - expected.view(np.int64)[:, [2, 4]] = iNaT - tm.assert_almost_equal(result, expected) - - result = com.take_nd(arr, indexer, axis=1, - fill_value=datetime(2007, 1, 1)) - result2 = np.empty_like(result) - com.take_nd(arr, indexer, out=result2, axis=1, - fill_value=datetime(2007, 1, 1)) - tm.assert_almost_equal(result, result2) - - expected = arr.take(indexer, axis=1) - expected[:, [2, 4]] = datetime(2007, 1, 1) - tm.assert_almost_equal(result, expected) - - class TestMaybe(tm.TestCase): def test_maybe_convert_string_to_array(self): @@ -1274,21 +671,23 @@ def test_maybe_convert_string_to_array(self): self.assertTrue(result.dtype == object) -def test_possibly_convert_objects_copy(): - values = np.array([1, 2]) +class TestConvert(tm.TestCase): + + def test_possibly_convert_objects_copy(self): + values = np.array([1, 2]) - out = convert._possibly_convert_objects(values, copy=False) - assert_true(values is out) + out = convert._possibly_convert_objects(values, copy=False) + self.assertTrue(values is out) - out = convert._possibly_convert_objects(values, copy=True) - assert_true(values is not out) + out = convert._possibly_convert_objects(values, copy=True) + self.assertTrue(values is not out) - values = np.array(['apply', 'banana']) - out = convert._possibly_convert_objects(values, copy=False) - assert_true(values is out) + values = np.array(['apply', 'banana']) + out = convert._possibly_convert_objects(values, copy=False) + self.assertTrue(values is out) - out = convert._possibly_convert_objects(values, copy=True) - assert_true(values is not out) + out = convert._possibly_convert_objects(values, copy=True) + self.assertTrue(values is not out) def test_dict_compat(): diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index 688f074e31a42..044272f24a21f 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -16,7 +16,7 @@ from pandas.util.testing import (assert_almost_equal, assert_series_equal, assert_frame_equal, assert_panel_equal, assert_panel4d_equal) -import pandas.core.common as com +from pandas.formats.printing import pprint_thing import pandas.util.testing as tm from numpy.testing.decorators import slow @@ -99,7 +99,7 @@ def run_arithmetic_test(self, df, other, assert_func, check_dtype=False, assert expected.dtype.kind == 'f' assert_func(expected, result) except Exception: - com.pprint_thing("Failed test with operator %r" % op.__name__) + pprint_thing("Failed test with operator %r" % op.__name__) raise def test_integer_arithmetic(self): @@ -139,8 +139,8 @@ def run_binary_test(self, df, other, assert_func, test_flex=False, assert not used_numexpr, "Used numexpr unexpectedly." assert_func(expected, result) except Exception: - com.pprint_thing("Failed test with operation %r" % arith) - com.pprint_thing("test_flex was %r" % test_flex) + pprint_thing("Failed test with operation %r" % arith) + pprint_thing("test_flex was %r" % test_flex) raise def run_frame(self, df, other, binary_comp=None, run_binary=True, diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py index 71f2551e89ccf..7c31e71bbaf05 100644 --- a/pandas/tests/test_generic.py +++ b/pandas/tests/test_generic.py @@ -12,7 +12,7 @@ date_range, period_range, Panel4D) from pandas.core.index import MultiIndex -import pandas.core.common as com +import pandas.formats.printing as printing import pandas.lib as lib from pandas.compat import range, zip, PY3 @@ -208,7 +208,7 @@ def test_nonzero(self): def f(): if obj1: - com.pprint_thing("this works and shouldn't") + printing.pprint_thing("this works and shouldn't") self.assertRaises(ValueError, f) self.assertRaises(ValueError, lambda: obj1 and obj2) diff --git a/pandas/tests/test_graphics.py b/pandas/tests/test_graphics.py index 45d3fd0dad855..16b83c202ccaf 100644 --- a/pandas/tests/test_graphics.py +++ b/pandas/tests/test_graphics.py @@ -15,6 +15,7 @@ from pandas.compat import (range, lrange, StringIO, lmap, lzip, u, zip, iteritems, OrderedDict, PY3) from pandas.util.decorators import cache_readonly +from pandas.formats.printing import pprint_thing import pandas.core.common as com import pandas.util.testing as tm from pandas.util.testing import (ensure_clean, @@ -1543,7 +1544,7 @@ def test_subplots(self): for ax, column in zip(axes, df.columns): self._check_legend_labels(ax, - labels=[com.pprint_thing(column)]) + labels=[pprint_thing(column)]) for ax in axes[:-2]: self._check_visible(ax.xaxis) # xaxis must be visible for grid @@ -2344,7 +2345,7 @@ def test_boxplot(self): df = self.hist_df series = df['height'] numeric_cols = df._get_numeric_data().columns - labels = [com.pprint_thing(c) for c in numeric_cols] + labels = [pprint_thing(c) for c in numeric_cols] ax = _check_plot_works(df.plot.box) self._check_text_labels(ax.get_xticklabels(), labels) @@ -2371,7 +2372,7 @@ def test_boxplot(self): positions = np.array([1, 6, 7]) ax = df.plot.box(positions=positions) numeric_cols = df._get_numeric_data().columns - labels = [com.pprint_thing(c) for c in numeric_cols] + labels = [pprint_thing(c) for c in numeric_cols] self._check_text_labels(ax.get_xticklabels(), labels) tm.assert_numpy_array_equal(ax.xaxis.get_ticklocs(), positions) self.assertEqual(len(ax.lines), self.bp_n_objects * len(numeric_cols)) @@ -2380,7 +2381,7 @@ def test_boxplot(self): def test_boxplot_vertical(self): df = self.hist_df numeric_cols = df._get_numeric_data().columns - labels = [com.pprint_thing(c) for c in numeric_cols] + labels = [pprint_thing(c) for c in numeric_cols] # if horizontal, yticklabels are rotated ax = df.plot.box(rot=50, fontsize=8, vert=False) @@ -2442,7 +2443,7 @@ def test_kde_df(self): _skip_if_no_scipy_gaussian_kde() df = DataFrame(randn(100, 4)) ax = _check_plot_works(df.plot, kind='kde') - expected = [com.pprint_thing(c) for c in df.columns] + expected = [pprint_thing(c) for c in df.columns] self._check_legend_labels(ax, labels=expected) self._check_ticks_props(ax, xrot=0) @@ -2474,7 +2475,7 @@ def test_hist_df(self): series = df[0] ax = _check_plot_works(df.plot.hist) - expected = [com.pprint_thing(c) for c in df.columns] + expected = [pprint_thing(c) for c in df.columns] self._check_legend_labels(ax, labels=expected) axes = _check_plot_works(df.plot.hist, filterwarnings='ignore', diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 04ef07244cb06..6cf779bad1a41 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -14,6 +14,7 @@ _lexsort_indexer) from pandas.core.series import Series from pandas.core.config import option_context +from pandas.formats.printing import pprint_thing from pandas.util.testing import (assert_panel_equal, assert_frame_equal, assert_series_equal, assert_almost_equal, assert_index_equal, assertRaisesRegexp) @@ -981,8 +982,8 @@ def test_agg_item_by_item_raise_typeerror(self): df = DataFrame(randint(10, size=(20, 10))) def raiseException(df): - com.pprint_thing('----------------------------------------') - com.pprint_thing(df.to_string()) + pprint_thing('----------------------------------------') + pprint_thing(df.to_string()) raise TypeError self.assertRaises(TypeError, df.groupby(0).agg, raiseException) diff --git a/pandas/tests/test_internals.py b/pandas/tests/test_internals.py index 72bad407ded9f..95e7ab49ccd9c 100644 --- a/pandas/tests/test_internals.py +++ b/pandas/tests/test_internals.py @@ -14,7 +14,7 @@ from pandas.sparse.array import SparseArray from pandas.core.internals import (BlockPlacement, SingleBlockManager, make_block, BlockManager) -import pandas.core.common as com +import pandas.core.algorithms as algos import pandas.util.testing as tm import pandas as pd from pandas.util.testing import (assert_almost_equal, assert_frame_equal, @@ -948,8 +948,8 @@ def assert_reindex_axis_is_ok(mgr, axis, new_labels, fill_value): reindexed = mgr.reindex_axis(new_labels, axis, fill_value=fill_value) - tm.assert_numpy_array_equal(com.take_nd(mat, indexer, axis, - fill_value=fill_value), + tm.assert_numpy_array_equal(algos.take_nd(mat, indexer, axis, + fill_value=fill_value), reindexed.as_matrix()) tm.assert_index_equal(reindexed.axes[axis], new_labels) @@ -981,8 +981,8 @@ def test_reindex_indexer(self): def assert_reindex_indexer_is_ok(mgr, axis, new_labels, indexer, fill_value): mat = mgr.as_matrix() - reindexed_mat = com.take_nd(mat, indexer, axis, - fill_value=fill_value) + reindexed_mat = algos.take_nd(mat, indexer, axis, + fill_value=fill_value) reindexed = mgr.reindex_indexer(new_labels, indexer, axis, fill_value=fill_value) tm.assert_numpy_array_equal(reindexed_mat, reindexed.as_matrix()) diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index dbab9a2298282..f8792e0b68308 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -16,6 +16,7 @@ from pandas.core.panel import Panel from pandas.core.series import remove_na import pandas.core.common as com +from pandas.formats.printing import pprint_thing from pandas import compat from pandas.compat import range, lrange, StringIO, OrderedDict, signature from pandas import SparsePanel @@ -371,13 +372,13 @@ def check_op(op, name): try: check_op(getattr(operator, op), op) except: - com.pprint_thing("Failing operation: %r" % op) + pprint_thing("Failing operation: %r" % op) raise if compat.PY3: try: check_op(operator.truediv, 'div') except: - com.pprint_thing("Failing operation: %r" % 'div') + pprint_thing("Failing operation: %r" % 'div') raise @ignore_sparse_panel_future_warning @@ -2160,8 +2161,8 @@ def check_drop(drop_val, axis_number, aliases, expected): actual = panel.drop(drop_val, axis=alias) assert_panel_equal(actual, expected) except AssertionError: - com.pprint_thing("Failed with axis_number %d and aliases: %s" % - (axis_number, aliases)) + pprint_thing("Failed with axis_number %d and aliases: %s" % + (axis_number, aliases)) raise # Items expected = Panel({"One": df}) diff --git a/pandas/tests/test_take.py b/pandas/tests/test_take.py new file mode 100644 index 0000000000000..98b3b474f785d --- /dev/null +++ b/pandas/tests/test_take.py @@ -0,0 +1,455 @@ +# -*- coding: utf-8 -*- +import re +from datetime import datetime + +import nose +import numpy as np +from pandas.compat import long +import pandas.core.algorithms as algos +import pandas.util.testing as tm +from pandas.tslib import iNaT + +_multiprocess_can_split_ = True + + +class TestTake(tm.TestCase): + # standard incompatible fill error + fill_error = re.compile("Incompatible type for fill_value") + + _multiprocess_can_split_ = True + + def test_1d_with_out(self): + def _test_dtype(dtype, can_hold_na, writeable=True): + data = np.random.randint(0, 2, 4).astype(dtype) + data.flags.writeable = writeable + + indexer = [2, 1, 0, 1] + out = np.empty(4, dtype=dtype) + algos.take_1d(data, indexer, out=out) + expected = data.take(indexer) + tm.assert_almost_equal(out, expected) + + indexer = [2, 1, 0, -1] + out = np.empty(4, dtype=dtype) + if can_hold_na: + algos.take_1d(data, indexer, out=out) + expected = data.take(indexer) + expected[3] = np.nan + tm.assert_almost_equal(out, expected) + else: + with tm.assertRaisesRegexp(TypeError, self.fill_error): + algos.take_1d(data, indexer, out=out) + # no exception o/w + data.take(indexer, out=out) + + for writeable in [True, False]: + # Check that take_nd works both with writeable arrays (in which + # case fast typed memoryviews implementation) and read-only + # arrays alike. + _test_dtype(np.float64, True, writeable=writeable) + _test_dtype(np.float32, True, writeable=writeable) + _test_dtype(np.uint64, False, writeable=writeable) + _test_dtype(np.uint32, False, writeable=writeable) + _test_dtype(np.uint16, False, writeable=writeable) + _test_dtype(np.uint8, False, writeable=writeable) + _test_dtype(np.int64, False, writeable=writeable) + _test_dtype(np.int32, False, writeable=writeable) + _test_dtype(np.int16, False, writeable=writeable) + _test_dtype(np.int8, False, writeable=writeable) + _test_dtype(np.object_, True, writeable=writeable) + _test_dtype(np.bool, False, writeable=writeable) + + def test_1d_fill_nonna(self): + def _test_dtype(dtype, fill_value, out_dtype): + data = np.random.randint(0, 2, 4).astype(dtype) + + indexer = [2, 1, 0, -1] + + result = algos.take_1d(data, indexer, fill_value=fill_value) + assert ((result[[0, 1, 2]] == data[[2, 1, 0]]).all()) + assert (result[3] == fill_value) + assert (result.dtype == out_dtype) + + indexer = [2, 1, 0, 1] + + result = algos.take_1d(data, indexer, fill_value=fill_value) + assert ((result[[0, 1, 2, 3]] == data[indexer]).all()) + assert (result.dtype == dtype) + + _test_dtype(np.int8, np.int16(127), np.int8) + _test_dtype(np.int8, np.int16(128), np.int16) + _test_dtype(np.int32, 1, np.int32) + _test_dtype(np.int32, 2.0, np.float64) + _test_dtype(np.int32, 3.0 + 4.0j, np.complex128) + _test_dtype(np.int32, True, np.object_) + _test_dtype(np.int32, '', np.object_) + _test_dtype(np.float64, 1, np.float64) + _test_dtype(np.float64, 2.0, np.float64) + _test_dtype(np.float64, 3.0 + 4.0j, np.complex128) + _test_dtype(np.float64, True, np.object_) + _test_dtype(np.float64, '', np.object_) + _test_dtype(np.complex128, 1, np.complex128) + _test_dtype(np.complex128, 2.0, np.complex128) + _test_dtype(np.complex128, 3.0 + 4.0j, np.complex128) + _test_dtype(np.complex128, True, np.object_) + _test_dtype(np.complex128, '', np.object_) + _test_dtype(np.bool_, 1, np.object_) + _test_dtype(np.bool_, 2.0, np.object_) + _test_dtype(np.bool_, 3.0 + 4.0j, np.object_) + _test_dtype(np.bool_, True, np.bool_) + _test_dtype(np.bool_, '', np.object_) + + def test_2d_with_out(self): + def _test_dtype(dtype, can_hold_na, writeable=True): + data = np.random.randint(0, 2, (5, 3)).astype(dtype) + data.flags.writeable = writeable + + indexer = [2, 1, 0, 1] + out0 = np.empty((4, 3), dtype=dtype) + out1 = np.empty((5, 4), dtype=dtype) + algos.take_nd(data, indexer, out=out0, axis=0) + algos.take_nd(data, indexer, out=out1, axis=1) + expected0 = data.take(indexer, axis=0) + expected1 = data.take(indexer, axis=1) + tm.assert_almost_equal(out0, expected0) + tm.assert_almost_equal(out1, expected1) + + indexer = [2, 1, 0, -1] + out0 = np.empty((4, 3), dtype=dtype) + out1 = np.empty((5, 4), dtype=dtype) + if can_hold_na: + algos.take_nd(data, indexer, out=out0, axis=0) + algos.take_nd(data, indexer, out=out1, axis=1) + expected0 = data.take(indexer, axis=0) + expected1 = data.take(indexer, axis=1) + expected0[3, :] = np.nan + expected1[:, 3] = np.nan + tm.assert_almost_equal(out0, expected0) + tm.assert_almost_equal(out1, expected1) + else: + for i, out in enumerate([out0, out1]): + with tm.assertRaisesRegexp(TypeError, self.fill_error): + algos.take_nd(data, indexer, out=out, axis=i) + # no exception o/w + data.take(indexer, out=out, axis=i) + + for writeable in [True, False]: + # Check that take_nd works both with writeable arrays (in which + # case fast typed memoryviews implementation) and read-only + # arrays alike. + _test_dtype(np.float64, True, writeable=writeable) + _test_dtype(np.float32, True, writeable=writeable) + _test_dtype(np.uint64, False, writeable=writeable) + _test_dtype(np.uint32, False, writeable=writeable) + _test_dtype(np.uint16, False, writeable=writeable) + _test_dtype(np.uint8, False, writeable=writeable) + _test_dtype(np.int64, False, writeable=writeable) + _test_dtype(np.int32, False, writeable=writeable) + _test_dtype(np.int16, False, writeable=writeable) + _test_dtype(np.int8, False, writeable=writeable) + _test_dtype(np.object_, True, writeable=writeable) + _test_dtype(np.bool, False, writeable=writeable) + + def test_2d_fill_nonna(self): + def _test_dtype(dtype, fill_value, out_dtype): + data = np.random.randint(0, 2, (5, 3)).astype(dtype) + + indexer = [2, 1, 0, -1] + + result = algos.take_nd(data, indexer, axis=0, + fill_value=fill_value) + assert ((result[[0, 1, 2], :] == data[[2, 1, 0], :]).all()) + assert ((result[3, :] == fill_value).all()) + assert (result.dtype == out_dtype) + + result = algos.take_nd(data, indexer, axis=1, + fill_value=fill_value) + assert ((result[:, [0, 1, 2]] == data[:, [2, 1, 0]]).all()) + assert ((result[:, 3] == fill_value).all()) + assert (result.dtype == out_dtype) + + indexer = [2, 1, 0, 1] + + result = algos.take_nd(data, indexer, axis=0, + fill_value=fill_value) + assert ((result[[0, 1, 2, 3], :] == data[indexer, :]).all()) + assert (result.dtype == dtype) + + result = algos.take_nd(data, indexer, axis=1, + fill_value=fill_value) + assert ((result[:, [0, 1, 2, 3]] == data[:, indexer]).all()) + assert (result.dtype == dtype) + + _test_dtype(np.int8, np.int16(127), np.int8) + _test_dtype(np.int8, np.int16(128), np.int16) + _test_dtype(np.int32, 1, np.int32) + _test_dtype(np.int32, 2.0, np.float64) + _test_dtype(np.int32, 3.0 + 4.0j, np.complex128) + _test_dtype(np.int32, True, np.object_) + _test_dtype(np.int32, '', np.object_) + _test_dtype(np.float64, 1, np.float64) + _test_dtype(np.float64, 2.0, np.float64) + _test_dtype(np.float64, 3.0 + 4.0j, np.complex128) + _test_dtype(np.float64, True, np.object_) + _test_dtype(np.float64, '', np.object_) + _test_dtype(np.complex128, 1, np.complex128) + _test_dtype(np.complex128, 2.0, np.complex128) + _test_dtype(np.complex128, 3.0 + 4.0j, np.complex128) + _test_dtype(np.complex128, True, np.object_) + _test_dtype(np.complex128, '', np.object_) + _test_dtype(np.bool_, 1, np.object_) + _test_dtype(np.bool_, 2.0, np.object_) + _test_dtype(np.bool_, 3.0 + 4.0j, np.object_) + _test_dtype(np.bool_, True, np.bool_) + _test_dtype(np.bool_, '', np.object_) + + def test_3d_with_out(self): + def _test_dtype(dtype, can_hold_na): + data = np.random.randint(0, 2, (5, 4, 3)).astype(dtype) + + indexer = [2, 1, 0, 1] + out0 = np.empty((4, 4, 3), dtype=dtype) + out1 = np.empty((5, 4, 3), dtype=dtype) + out2 = np.empty((5, 4, 4), dtype=dtype) + algos.take_nd(data, indexer, out=out0, axis=0) + algos.take_nd(data, indexer, out=out1, axis=1) + algos.take_nd(data, indexer, out=out2, axis=2) + expected0 = data.take(indexer, axis=0) + expected1 = data.take(indexer, axis=1) + expected2 = data.take(indexer, axis=2) + tm.assert_almost_equal(out0, expected0) + tm.assert_almost_equal(out1, expected1) + tm.assert_almost_equal(out2, expected2) + + indexer = [2, 1, 0, -1] + out0 = np.empty((4, 4, 3), dtype=dtype) + out1 = np.empty((5, 4, 3), dtype=dtype) + out2 = np.empty((5, 4, 4), dtype=dtype) + if can_hold_na: + algos.take_nd(data, indexer, out=out0, axis=0) + algos.take_nd(data, indexer, out=out1, axis=1) + algos.take_nd(data, indexer, out=out2, axis=2) + expected0 = data.take(indexer, axis=0) + expected1 = data.take(indexer, axis=1) + expected2 = data.take(indexer, axis=2) + expected0[3, :, :] = np.nan + expected1[:, 3, :] = np.nan + expected2[:, :, 3] = np.nan + tm.assert_almost_equal(out0, expected0) + tm.assert_almost_equal(out1, expected1) + tm.assert_almost_equal(out2, expected2) + else: + for i, out in enumerate([out0, out1, out2]): + with tm.assertRaisesRegexp(TypeError, self.fill_error): + algos.take_nd(data, indexer, out=out, axis=i) + # no exception o/w + data.take(indexer, out=out, axis=i) + + _test_dtype(np.float64, True) + _test_dtype(np.float32, True) + _test_dtype(np.uint64, False) + _test_dtype(np.uint32, False) + _test_dtype(np.uint16, False) + _test_dtype(np.uint8, False) + _test_dtype(np.int64, False) + _test_dtype(np.int32, False) + _test_dtype(np.int16, False) + _test_dtype(np.int8, False) + _test_dtype(np.object_, True) + _test_dtype(np.bool, False) + + def test_3d_fill_nonna(self): + def _test_dtype(dtype, fill_value, out_dtype): + data = np.random.randint(0, 2, (5, 4, 3)).astype(dtype) + + indexer = [2, 1, 0, -1] + + result = algos.take_nd(data, indexer, axis=0, + fill_value=fill_value) + assert ((result[[0, 1, 2], :, :] == data[[2, 1, 0], :, :]).all()) + assert ((result[3, :, :] == fill_value).all()) + assert (result.dtype == out_dtype) + + result = algos.take_nd(data, indexer, axis=1, + fill_value=fill_value) + assert ((result[:, [0, 1, 2], :] == data[:, [2, 1, 0], :]).all()) + assert ((result[:, 3, :] == fill_value).all()) + assert (result.dtype == out_dtype) + + result = algos.take_nd(data, indexer, axis=2, + fill_value=fill_value) + assert ((result[:, :, [0, 1, 2]] == data[:, :, [2, 1, 0]]).all()) + assert ((result[:, :, 3] == fill_value).all()) + assert (result.dtype == out_dtype) + + indexer = [2, 1, 0, 1] + + result = algos.take_nd(data, indexer, axis=0, + fill_value=fill_value) + assert ((result[[0, 1, 2, 3], :, :] == data[indexer, :, :]).all()) + assert (result.dtype == dtype) + + result = algos.take_nd(data, indexer, axis=1, + fill_value=fill_value) + assert ((result[:, [0, 1, 2, 3], :] == data[:, indexer, :]).all()) + assert (result.dtype == dtype) + + result = algos.take_nd(data, indexer, axis=2, + fill_value=fill_value) + assert ((result[:, :, [0, 1, 2, 3]] == data[:, :, indexer]).all()) + assert (result.dtype == dtype) + + _test_dtype(np.int8, np.int16(127), np.int8) + _test_dtype(np.int8, np.int16(128), np.int16) + _test_dtype(np.int32, 1, np.int32) + _test_dtype(np.int32, 2.0, np.float64) + _test_dtype(np.int32, 3.0 + 4.0j, np.complex128) + _test_dtype(np.int32, True, np.object_) + _test_dtype(np.int32, '', np.object_) + _test_dtype(np.float64, 1, np.float64) + _test_dtype(np.float64, 2.0, np.float64) + _test_dtype(np.float64, 3.0 + 4.0j, np.complex128) + _test_dtype(np.float64, True, np.object_) + _test_dtype(np.float64, '', np.object_) + _test_dtype(np.complex128, 1, np.complex128) + _test_dtype(np.complex128, 2.0, np.complex128) + _test_dtype(np.complex128, 3.0 + 4.0j, np.complex128) + _test_dtype(np.complex128, True, np.object_) + _test_dtype(np.complex128, '', np.object_) + _test_dtype(np.bool_, 1, np.object_) + _test_dtype(np.bool_, 2.0, np.object_) + _test_dtype(np.bool_, 3.0 + 4.0j, np.object_) + _test_dtype(np.bool_, True, np.bool_) + _test_dtype(np.bool_, '', np.object_) + + def test_1d_other_dtypes(self): + arr = np.random.randn(10).astype(np.float32) + + indexer = [1, 2, 3, -1] + result = algos.take_1d(arr, indexer) + expected = arr.take(indexer) + expected[-1] = np.nan + tm.assert_almost_equal(result, expected) + + def test_2d_other_dtypes(self): + arr = np.random.randn(10, 5).astype(np.float32) + + indexer = [1, 2, 3, -1] + + # axis=0 + result = algos.take_nd(arr, indexer, axis=0) + expected = arr.take(indexer, axis=0) + expected[-1] = np.nan + tm.assert_almost_equal(result, expected) + + # axis=1 + result = algos.take_nd(arr, indexer, axis=1) + expected = arr.take(indexer, axis=1) + expected[:, -1] = np.nan + tm.assert_almost_equal(result, expected) + + def test_1d_bool(self): + arr = np.array([0, 1, 0], dtype=bool) + + result = algos.take_1d(arr, [0, 2, 2, 1]) + expected = arr.take([0, 2, 2, 1]) + self.assert_numpy_array_equal(result, expected) + + result = algos.take_1d(arr, [0, 2, -1]) + self.assertEqual(result.dtype, np.object_) + + def test_2d_bool(self): + arr = np.array([[0, 1, 0], [1, 0, 1], [0, 1, 1]], dtype=bool) + + result = algos.take_nd(arr, [0, 2, 2, 1]) + expected = arr.take([0, 2, 2, 1], axis=0) + self.assert_numpy_array_equal(result, expected) + + result = algos.take_nd(arr, [0, 2, 2, 1], axis=1) + expected = arr.take([0, 2, 2, 1], axis=1) + self.assert_numpy_array_equal(result, expected) + + result = algos.take_nd(arr, [0, 2, -1]) + self.assertEqual(result.dtype, np.object_) + + def test_2d_float32(self): + arr = np.random.randn(4, 3).astype(np.float32) + indexer = [0, 2, -1, 1, -1] + + # axis=0 + result = algos.take_nd(arr, indexer, axis=0) + result2 = np.empty_like(result) + algos.take_nd(arr, indexer, axis=0, out=result2) + tm.assert_almost_equal(result, result2) + + expected = arr.take(indexer, axis=0) + expected[[2, 4], :] = np.nan + tm.assert_almost_equal(result, expected) + + # this now accepts a float32! # test with float64 out buffer + out = np.empty((len(indexer), arr.shape[1]), dtype='float32') + algos.take_nd(arr, indexer, out=out) # it works! + + # axis=1 + result = algos.take_nd(arr, indexer, axis=1) + result2 = np.empty_like(result) + algos.take_nd(arr, indexer, axis=1, out=result2) + tm.assert_almost_equal(result, result2) + + expected = arr.take(indexer, axis=1) + expected[:, [2, 4]] = np.nan + tm.assert_almost_equal(result, expected) + + def test_2d_datetime64(self): + # 2005/01/01 - 2006/01/01 + arr = np.random.randint( + long(11045376), long(11360736), (5, 3)) * 100000000000 + arr = arr.view(dtype='datetime64[ns]') + indexer = [0, 2, -1, 1, -1] + + # axis=0 + result = algos.take_nd(arr, indexer, axis=0) + result2 = np.empty_like(result) + algos.take_nd(arr, indexer, axis=0, out=result2) + tm.assert_almost_equal(result, result2) + + expected = arr.take(indexer, axis=0) + expected.view(np.int64)[[2, 4], :] = iNaT + tm.assert_almost_equal(result, expected) + + result = algos.take_nd(arr, indexer, axis=0, + fill_value=datetime(2007, 1, 1)) + result2 = np.empty_like(result) + algos.take_nd(arr, indexer, out=result2, axis=0, + fill_value=datetime(2007, 1, 1)) + tm.assert_almost_equal(result, result2) + + expected = arr.take(indexer, axis=0) + expected[[2, 4], :] = datetime(2007, 1, 1) + tm.assert_almost_equal(result, expected) + + # axis=1 + result = algos.take_nd(arr, indexer, axis=1) + result2 = np.empty_like(result) + algos.take_nd(arr, indexer, axis=1, out=result2) + tm.assert_almost_equal(result, result2) + + expected = arr.take(indexer, axis=1) + expected.view(np.int64)[:, [2, 4]] = iNaT + tm.assert_almost_equal(result, expected) + + result = algos.take_nd(arr, indexer, axis=1, + fill_value=datetime(2007, 1, 1)) + result2 = np.empty_like(result) + algos.take_nd(arr, indexer, out=result2, axis=1, + fill_value=datetime(2007, 1, 1)) + tm.assert_almost_equal(result, result2) + + expected = arr.take(indexer, axis=1) + expected[:, [2, 4]] = datetime(2007, 1, 1) + tm.assert_almost_equal(result, expected) + + +if __name__ == '__main__': + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], + exit=False) diff --git a/pandas/tests/types/__init__.py b/pandas/tests/types/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/test_dtypes.py b/pandas/tests/types/test_dtypes.py similarity index 98% rename from pandas/tests/test_dtypes.py rename to pandas/tests/types/test_dtypes.py index f12adab386dab..2a9ad30a07805 100644 --- a/pandas/tests/test_dtypes.py +++ b/pandas/tests/types/test_dtypes.py @@ -5,7 +5,8 @@ import numpy as np from pandas import Series, Categorical, date_range import pandas.core.common as com -from pandas.core.common import (CategoricalDtype, is_categorical_dtype, +from pandas.types.api import CategoricalDtype +from pandas.core.common import (is_categorical_dtype, is_categorical, DatetimeTZDtype, is_datetime64tz_dtype, is_datetimetz, is_dtype_equal, is_datetime64_ns_dtype, diff --git a/pandas/tests/types/test_generic.py b/pandas/tests/types/test_generic.py new file mode 100644 index 0000000000000..5549a3a376992 --- /dev/null +++ b/pandas/tests/types/test_generic.py @@ -0,0 +1,47 @@ +# -*- coding: utf-8 -*- + +import nose +import numpy as np +import pandas as pd +import pandas.core.common as com +import pandas.util.testing as tm + +_multiprocess_can_split_ = True + + +class TestABCClasses(tm.TestCase): + tuples = [[1, 2, 2], ['red', 'blue', 'red']] + multi_index = pd.MultiIndex.from_arrays(tuples, names=('number', 'color')) + datetime_index = pd.to_datetime(['2000/1/1', '2010/1/1']) + timedelta_index = pd.to_timedelta(np.arange(5), unit='s') + period_index = pd.period_range('2000/1/1', '2010/1/1/', freq='M') + categorical = pd.Categorical([1, 2, 3], categories=[2, 3, 1]) + categorical_df = pd.DataFrame({"values": [1, 2, 3]}, index=categorical) + df = pd.DataFrame({'names': ['a', 'b', 'c']}, index=multi_index) + sparse_series = pd.Series([1, 2, 3]).to_sparse() + sparse_array = pd.SparseArray(np.random.randn(10)) + + def test_abc_types(self): + self.assertIsInstance(pd.Index(['a', 'b', 'c']), com.ABCIndex) + self.assertIsInstance(pd.Int64Index([1, 2, 3]), com.ABCInt64Index) + self.assertIsInstance(pd.Float64Index([1, 2, 3]), com.ABCFloat64Index) + self.assertIsInstance(self.multi_index, com.ABCMultiIndex) + self.assertIsInstance(self.datetime_index, com.ABCDatetimeIndex) + self.assertIsInstance(self.timedelta_index, com.ABCTimedeltaIndex) + self.assertIsInstance(self.period_index, com.ABCPeriodIndex) + self.assertIsInstance(self.categorical_df.index, + com.ABCCategoricalIndex) + self.assertIsInstance(pd.Index(['a', 'b', 'c']), com.ABCIndexClass) + self.assertIsInstance(pd.Int64Index([1, 2, 3]), com.ABCIndexClass) + self.assertIsInstance(pd.Series([1, 2, 3]), com.ABCSeries) + self.assertIsInstance(self.df, com.ABCDataFrame) + self.assertIsInstance(self.df.to_panel(), com.ABCPanel) + self.assertIsInstance(self.sparse_series, com.ABCSparseSeries) + self.assertIsInstance(self.sparse_array, com.ABCSparseArray) + self.assertIsInstance(self.categorical, com.ABCCategorical) + self.assertIsInstance(pd.Period('2012', freq='A-DEC'), com.ABCPeriod) + + +if __name__ == '__main__': + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], + exit=False) diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index 016dd5ed4e56b..52be7444f445a 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -17,9 +17,10 @@ from pandas.util.decorators import Appender, Substitution from pandas.core.common import ABCSeries, isnull +import pandas.core.algorithms as algos import pandas.core.common as com -import pandas.algos as algos +import pandas.algos as _algos import pandas.hashtable as _hash @@ -291,8 +292,8 @@ def _maybe_add_join_keys(self, result, left_indexer, right_indexer): right_na_indexer = right_indexer.take(na_indexer) result.iloc[na_indexer, key_indexer] = ( - com.take_1d(self.right_join_keys[i], - right_na_indexer)) + algos.take_1d(self.right_join_keys[i], + right_na_indexer)) elif name in self.right: if len(self.right) == 0: continue @@ -303,8 +304,8 @@ def _maybe_add_join_keys(self, result, left_indexer, right_indexer): left_na_indexer = left_indexer.take(na_indexer) result.iloc[na_indexer, key_indexer] = ( - com.take_1d(self.left_join_keys[i], - left_na_indexer)) + algos.take_1d(self.left_join_keys[i], + left_na_indexer)) elif left_indexer is not None \ and isinstance(self.left_join_keys[i], np.ndarray): @@ -312,11 +313,11 @@ def _maybe_add_join_keys(self, result, left_indexer, right_indexer): name = 'key_%d' % i # a faster way? - key_col = com.take_1d(self.left_join_keys[i], left_indexer) + key_col = algos.take_1d(self.left_join_keys[i], left_indexer) na_indexer = (left_indexer == -1).nonzero()[0] right_na_indexer = right_indexer.take(na_indexer) - key_col.put(na_indexer, com.take_1d(self.right_join_keys[i], - right_na_indexer)) + key_col.put(na_indexer, algos.take_1d(self.right_join_keys[i], + right_na_indexer)) result.insert(i, name, key_col) def _get_join_info(self): @@ -576,8 +577,8 @@ def get_result(self): rdata.items, rsuf) if self.fill_method == 'ffill': - left_join_indexer = algos.ffill_indexer(left_indexer) - right_join_indexer = algos.ffill_indexer(right_indexer) + left_join_indexer = _algos.ffill_indexer(left_indexer) + right_join_indexer = _algos.ffill_indexer(right_indexer) else: left_join_indexer = left_indexer right_join_indexer = right_indexer @@ -632,16 +633,16 @@ def _get_multiindex_indexer(join_keys, index, sort): # factorize keys to a dense i8 space lkey, rkey, count = fkeys(lkey, rkey) - return algos.left_outer_join(lkey, rkey, count, sort=sort) + return _algos.left_outer_join(lkey, rkey, count, sort=sort) def _get_single_indexer(join_key, index, sort=False): left_key, right_key, count = _factorize_keys(join_key, index, sort=sort) - left_indexer, right_indexer = \ - algos.left_outer_join(com._ensure_int64(left_key), - com._ensure_int64(right_key), - count, sort=sort) + left_indexer, right_indexer = _algos.left_outer_join( + com._ensure_int64(left_key), + com._ensure_int64(right_key), + count, sort=sort) return left_indexer, right_indexer @@ -673,14 +674,14 @@ def _left_join_on_index(left_ax, right_ax, join_keys, sort=False): def _right_outer_join(x, y, max_groups): - right_indexer, left_indexer = algos.left_outer_join(y, x, max_groups) + right_indexer, left_indexer = _algos.left_outer_join(y, x, max_groups) return left_indexer, right_indexer _join_functions = { - 'inner': algos.inner_join, - 'left': algos.left_outer_join, + 'inner': _algos.inner_join, + 'left': _algos.left_outer_join, 'right': _right_outer_join, - 'outer': algos.full_outer_join, + 'outer': _algos.full_outer_join, } diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index 103b7484ea138..1433ce65b3021 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -19,6 +19,7 @@ from pandas.tseries.period import PeriodIndex from pandas.compat import range, lrange, lmap, map, zip, string_types import pandas.compat as compat +from pandas.formats.printing import pprint_thing from pandas.util.decorators import Appender try: # mpl optional import pandas.tseries.converter as conv @@ -486,7 +487,7 @@ def normalize(series): for i, kls in enumerate(classes): ax.scatter(to_plot[kls][0], to_plot[kls][1], color=colors[i], - label=com.pprint_thing(kls), **kwds) + label=pprint_thing(kls), **kwds) ax.legend() ax.add_patch(patches.Circle((0.0, 0.0), radius=1.0, facecolor='none')) @@ -591,7 +592,7 @@ def f(t): f = function(row) y = f(t) kls = class_col.iat[i] - label = com.pprint_thing(kls) + label = pprint_thing(kls) if label not in used_legends: used_legends.add(label) ax.plot(t, y, color=colors[kls], label=label, **kwds) @@ -753,7 +754,7 @@ def parallel_coordinates(frame, class_column, cols=None, ax=None, color=None, for i in range(n): y = df.iloc[i].values kls = class_col.iat[i] - label = com.pprint_thing(kls) + label = pprint_thing(kls) if label not in used_legends: used_legends.add(label) ax.plot(x, y, color=colors[kls], label=label, **kwds) @@ -1148,7 +1149,7 @@ def _add_table(self): def _post_plot_logic_common(self, ax, data): """Common post process for each axes""" - labels = [com.pprint_thing(key) for key in data.index] + labels = [pprint_thing(key) for key in data.index] labels = dict(zip(range(len(data.index)), labels)) if self.orientation == 'vertical' or self.orientation is None: @@ -1216,10 +1217,10 @@ def legend_title(self): if not isinstance(self.data.columns, MultiIndex): name = self.data.columns.name if name is not None: - name = com.pprint_thing(name) + name = pprint_thing(name) return name else: - stringified = map(com.pprint_thing, + stringified = map(pprint_thing, self.data.columns.names) return ','.join(stringified) @@ -1342,13 +1343,13 @@ def _get_index_name(self): if isinstance(self.data.index, MultiIndex): name = self.data.index.names if any(x is not None for x in name): - name = ','.join([com.pprint_thing(x) for x in name]) + name = ','.join([pprint_thing(x) for x in name]) else: name = None else: name = self.data.index.name if name is not None: - name = com.pprint_thing(name) + name = pprint_thing(name) return name @@ -1549,8 +1550,8 @@ def nseries(self): def _post_plot_logic(self, ax, data): x, y = self.x, self.y - ax.set_ylabel(com.pprint_thing(y)) - ax.set_xlabel(com.pprint_thing(x)) + ax.set_ylabel(pprint_thing(y)) + ax.set_xlabel(pprint_thing(x)) class ScatterPlot(PlanePlot): @@ -1695,7 +1696,7 @@ def _make_plot(self): errors = self._get_errorbars(label=label, index=i) kwds = dict(kwds, **errors) - label = com.pprint_thing(label) # .encode('utf-8') + label = pprint_thing(label) # .encode('utf-8') kwds['label'] = label newlines = plotf(ax, x, y, style=style, column_num=i, @@ -1935,7 +1936,7 @@ def _make_plot(self): errors = self._get_errorbars(label=label, index=i) kwds = dict(kwds, **errors) - label = com.pprint_thing(label) + label = pprint_thing(label) if (('yerr' in kwds) or ('xerr' in kwds)) \ and (kwds.get('ecolor') is None): @@ -1970,9 +1971,9 @@ def _make_plot(self): def _post_plot_logic(self, ax, data): if self.use_index: - str_index = [com.pprint_thing(key) for key in data.index] + str_index = [pprint_thing(key) for key in data.index] else: - str_index = [com.pprint_thing(key) for key in range(data.shape[0])] + str_index = [pprint_thing(key) for key in range(data.shape[0])] name = self._get_index_name() s_edge = self.ax_pos[0] - 0.25 + self.lim_offset @@ -2058,7 +2059,7 @@ def _make_plot(self): kwds = self.kwds.copy() - label = com.pprint_thing(label) + label = pprint_thing(label) kwds['label'] = label style, kwds = self._apply_style_colors(colors, kwds, i, label) @@ -2169,7 +2170,7 @@ def _make_plot(self): for i, (label, y) in enumerate(self._iter_data()): ax = self._get_ax(i) if label is not None: - label = com.pprint_thing(label) + label = pprint_thing(label) ax.set_ylabel(label) kwds = self.kwds.copy() @@ -2180,7 +2181,7 @@ def blank_labeler(label, value): else: return label - idx = [com.pprint_thing(v) for v in self.data.index] + idx = [pprint_thing(v) for v in self.data.index] labels = kwds.pop('labels', idx) # labels is used for each wedge's labels # Blank out labels for values of 0 so they don't overlap @@ -2319,7 +2320,7 @@ def _make_plot(self): self.maybe_color_bp(bp) self._return_obj[label] = ret - label = [com.pprint_thing(label)] + label = [pprint_thing(label)] self._set_ticklabels(ax, label) else: y = self.data.values.T @@ -2332,9 +2333,9 @@ def _make_plot(self): self._return_obj = ret labels = [l for l, _ in self._iter_data()] - labels = [com.pprint_thing(l) for l in labels] + labels = [pprint_thing(l) for l in labels] if not self.use_index: - labels = [com.pprint_thing(key) for key in range(len(labels))] + labels = [pprint_thing(key) for key in range(len(labels))] self._set_ticklabels(ax, labels) def _set_ticklabels(self, ax, labels): @@ -2711,7 +2712,7 @@ def maybe_color_bp(bp): setp(bp['medians'], color=colors[2], alpha=1) def plot_group(keys, values, ax): - keys = [com.pprint_thing(x) for x in keys] + keys = [pprint_thing(x) for x in keys] values = [remove_na(v) for v in values] bp = ax.boxplot(values, **kwds) if kwds.get('vert', 1): @@ -2821,8 +2822,8 @@ def plot_group(group, ax): else: fig = ax.get_figure() plot_group(data, ax) - ax.set_ylabel(com.pprint_thing(y)) - ax.set_xlabel(com.pprint_thing(x)) + ax.set_ylabel(pprint_thing(y)) + ax.set_xlabel(pprint_thing(x)) ax.grid(grid) @@ -3077,7 +3078,7 @@ def boxplot_frame_groupby(grouped, subplots=True, column=None, fontsize=None, for (key, group), ax in zip(grouped, axes): d = group.boxplot(ax=ax, column=column, fontsize=fontsize, rot=rot, grid=grid, **kwds) - ax.set_title(com.pprint_thing(key)) + ax.set_title(pprint_thing(key)) ret[key] = d fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, right=0.9, wspace=0.2) @@ -3124,7 +3125,7 @@ def _grouped_plot(plotf, data, column=None, by=None, numeric_only=True, if numeric_only and isinstance(group, DataFrame): group = group._get_numeric_data() plotf(group, ax, **kwargs) - ax.set_title(com.pprint_thing(key)) + ax.set_title(pprint_thing(key)) return fig, axes @@ -3151,7 +3152,7 @@ def _grouped_plot_by_column(plotf, data, columns=None, by=None, keys, values = zip(*gp_col) re_plotf = plotf(keys, values, ax, **kwargs) ax.set_title(col) - ax.set_xlabel(com.pprint_thing(by)) + ax.set_xlabel(pprint_thing(by)) result[col] = re_plotf ax.grid(grid) diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py index 6e7b0ac9bade8..f59a970fd9853 100644 --- a/pandas/tseries/base.py +++ b/pandas/tseries/base.py @@ -9,6 +9,7 @@ import numpy as np from pandas.core import common as com, algorithms from pandas.core.common import is_integer, is_float, AbstractMethodError +import pandas.formats.printing as printing import pandas.tslib as tslib import pandas.lib as lib from pandas.core.index import Index @@ -673,7 +674,7 @@ def summary(self, name=None): if name is None: name = type(self).__name__ - result = '%s: %s entries%s' % (com.pprint_thing(name), + result = '%s: %s entries%s' % (printing.pprint_thing(name), len(self), index_summary) if self.freq: result += '\nFreq: %s' % self.freqstr diff --git a/pandas/tseries/common.py b/pandas/tseries/common.py index 11a5fdc062e22..c4f100eb8f4d3 100644 --- a/pandas/tseries/common.py +++ b/pandas/tseries/common.py @@ -10,12 +10,13 @@ from pandas.tseries.period import PeriodIndex from pandas.tseries.tdi import TimedeltaIndex from pandas import tslib +from pandas.core.algorithms import take_1d from pandas.core.common import (_NS_DTYPE, _TD_DTYPE, is_period_arraylike, is_datetime_arraylike, is_integer_dtype, is_list_like, is_datetime64_dtype, is_datetime64tz_dtype, is_timedelta64_dtype, is_categorical_dtype, - get_dtype_kinds, take_1d) + get_dtype_kinds) def is_datetimelike(data): diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 50171c3ae4fe3..dc40387cc365f 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -691,12 +691,12 @@ def _mpl_repr(self): @cache_readonly def _is_dates_only(self): - from pandas.core.format import _is_dates_only + from pandas.formats.format import _is_dates_only return _is_dates_only(self.values) @property def _formatter_func(self): - from pandas.core.format import _get_format_datetime64 + from pandas.formats.format import _get_format_datetime64 formatter = _get_format_datetime64(is_dates_only=self._is_dates_only) return lambda x: "'%s'" % formatter(x, tz=self.tz) @@ -812,7 +812,7 @@ def _add_offset(self, offset): def _format_native_types(self, na_rep=u('NaT'), date_format=None, **kwargs): - from pandas.core.format import _get_format_datetime64_from_values + from pandas.formats.format import _get_format_datetime64_from_values format = _get_format_datetime64_from_values(self, date_format) return tslib.format_array_from_datetime(self.asi8, diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index e9a9796f9c48d..da04acf6446af 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -24,6 +24,7 @@ from pandas.lib import Timedelta import pandas.lib as lib import pandas.tslib as tslib +import pandas.core.missing as missing from pandas.compat import zip, u @@ -77,8 +78,8 @@ def wrapper(self, other): result = getattr(self.values, opname)(other.values) - mask = (com.mask_missing(self.values, tslib.iNaT) | - com.mask_missing(other.values, tslib.iNaT)) + mask = (missing.mask_missing(self.values, tslib.iNaT) | + missing.mask_missing(other.values, tslib.iNaT)) if mask.any(): result[mask] = nat_result diff --git a/pandas/tseries/plotting.py b/pandas/tseries/plotting.py index 5e26d5dbf9387..fe64af67af0ed 100644 --- a/pandas/tseries/plotting.py +++ b/pandas/tseries/plotting.py @@ -12,7 +12,7 @@ from pandas.tseries.offsets import DateOffset import pandas.tseries.frequencies as frequencies from pandas.tseries.index import DatetimeIndex -import pandas.core.common as com +from pandas.formats.printing import pprint_thing import pandas.compat as compat from pandas.tseries.converter import (TimeSeries_DateLocator, @@ -141,7 +141,7 @@ def _replot_ax(ax, freq, kwargs): lines.append(plotf(ax, series.index._mpl_repr(), series.values, **kwds)[0]) - labels.append(com.pprint_thing(series.name)) + labels.append(pprint_thing(series.name)) return lines, labels diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py index 1970db36513e6..454eb6b3c165e 100644 --- a/pandas/tseries/resample.py +++ b/pandas/tseries/resample.py @@ -14,6 +14,7 @@ from pandas.tseries.offsets import DateOffset, Tick, Day, _delta_to_nanoseconds from pandas.tseries.period import PeriodIndex, period_range import pandas.core.common as com +import pandas.core.algorithms as algos import pandas.compat as compat from pandas.lib import Timestamp @@ -1047,7 +1048,7 @@ def _take_new_index(obj, indexer, new_index, axis=0): from pandas.core.api import Series, DataFrame if isinstance(obj, Series): - new_values = com.take_1d(obj.values, indexer) + new_values = algos.take_1d(obj.values, indexer) return Series(new_values, index=new_index, name=obj.name) elif isinstance(obj, DataFrame): if axis == 1: diff --git a/pandas/tseries/tdi.py b/pandas/tseries/tdi.py index 6e54f1fde8a8f..56012a8c4ad6a 100644 --- a/pandas/tseries/tdi.py +++ b/pandas/tseries/tdi.py @@ -267,7 +267,7 @@ def _simple_new(cls, values, name=None, freq=None, **kwargs): @property def _formatter_func(self): - from pandas.core.format import _get_format_timedelta64 + from pandas.formats.format import _get_format_timedelta64 return _get_format_timedelta64(self, box=True) def __setstate__(self, state): @@ -340,7 +340,7 @@ def _sub_datelike(self, other): def _format_native_types(self, na_rep=u('NaT'), date_format=None, **kwargs): - from pandas.core.format import Timedelta64Formatter + from pandas.formats.format import Timedelta64Formatter return Timedelta64Formatter(values=self, nat_rep=na_rep, justify='all').get_result() diff --git a/pandas/tseries/tests/test_timezones.py b/pandas/tseries/tests/test_timezones.py index 7a951683abaec..cb0b76f5d81f2 100644 --- a/pandas/tseries/tests/test_timezones.py +++ b/pandas/tseries/tests/test_timezones.py @@ -17,7 +17,7 @@ from pytz import NonExistentTimeError import pandas.util.testing as tm -from pandas.core.dtypes import DatetimeTZDtype +from pandas.types.api import DatetimeTZDtype from pandas.util.testing import assert_frame_equal from pandas.compat import lrange, zip diff --git a/pandas/types/__init__.py b/pandas/types/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/types/api.py b/pandas/types/api.py new file mode 100644 index 0000000000000..bb61025a41a37 --- /dev/null +++ b/pandas/types/api.py @@ -0,0 +1,42 @@ +# flake8: noqa + +import numpy as np +from pandas.compat import string_types + +from .dtypes import (CategoricalDtype, CategoricalDtypeType, + DatetimeTZDtype, DatetimeTZDtypeType) +from .generic import (ABCIndex, ABCInt64Index, ABCRangeIndex, + ABCFloat64Index, ABCMultiIndex, + ABCDatetimeIndex, + ABCTimedeltaIndex, ABCPeriodIndex, + ABCCategoricalIndex, + ABCIndexClass, + ABCSeries, ABCDataFrame, ABCPanel, + ABCSparseSeries, ABCSparseArray, + ABCCategorical, ABCPeriod, + ABCGeneric) + +def pandas_dtype(dtype): + """ + Converts input into a pandas only dtype object or a numpy dtype object. + + Parameters + ---------- + dtype : object to be converted + + Returns + ------- + np.dtype or a pandas dtype + """ + if isinstance(dtype, string_types): + try: + return DatetimeTZDtype.construct_from_string(dtype) + except TypeError: + pass + + try: + return CategoricalDtype.construct_from_string(dtype) + except TypeError: + pass + + return np.dtype(dtype) diff --git a/pandas/core/dtypes.py b/pandas/types/dtypes.py similarity index 100% rename from pandas/core/dtypes.py rename to pandas/types/dtypes.py diff --git a/pandas/types/generic.py b/pandas/types/generic.py new file mode 100644 index 0000000000000..af3f735f4932b --- /dev/null +++ b/pandas/types/generic.py @@ -0,0 +1,57 @@ +""" define generic base classes for pandas objects """ + + +# define abstract base classes to enable isinstance type checking on our +# objects +def create_pandas_abc_type(name, attr, comp): + @classmethod + def _check(cls, inst): + return getattr(inst, attr, '_typ') in comp + + dct = dict(__instancecheck__=_check, __subclasscheck__=_check) + meta = type("ABCBase", (type, ), dct) + return meta(name, tuple(), dct) + + +ABCIndex = create_pandas_abc_type("ABCIndex", "_typ", ("index", )) +ABCInt64Index = create_pandas_abc_type("ABCInt64Index", "_typ", + ("int64index", )) +ABCRangeIndex = create_pandas_abc_type("ABCRangeIndex", "_typ", + ("rangeindex", )) +ABCFloat64Index = create_pandas_abc_type("ABCFloat64Index", "_typ", + ("float64index", )) +ABCMultiIndex = create_pandas_abc_type("ABCMultiIndex", "_typ", + ("multiindex", )) +ABCDatetimeIndex = create_pandas_abc_type("ABCDatetimeIndex", "_typ", + ("datetimeindex", )) +ABCTimedeltaIndex = create_pandas_abc_type("ABCTimedeltaIndex", "_typ", + ("timedeltaindex", )) +ABCPeriodIndex = create_pandas_abc_type("ABCPeriodIndex", "_typ", + ("periodindex", )) +ABCCategoricalIndex = create_pandas_abc_type("ABCCategoricalIndex", "_typ", + ("categoricalindex", )) +ABCIndexClass = create_pandas_abc_type("ABCIndexClass", "_typ", + ("index", "int64index", "rangeindex", + "float64index", + "multiindex", "datetimeindex", + "timedeltaindex", "periodindex", + "categoricalindex")) + +ABCSeries = create_pandas_abc_type("ABCSeries", "_typ", ("series", )) +ABCDataFrame = create_pandas_abc_type("ABCDataFrame", "_typ", ("dataframe", )) +ABCPanel = create_pandas_abc_type("ABCPanel", "_typ", ("panel", )) +ABCSparseSeries = create_pandas_abc_type("ABCSparseSeries", "_subtyp", + ('sparse_series', + 'sparse_time_series')) +ABCSparseArray = create_pandas_abc_type("ABCSparseArray", "_subtyp", + ('sparse_array', 'sparse_series')) +ABCCategorical = create_pandas_abc_type("ABCCategorical", "_typ", + ("categorical")) +ABCPeriod = create_pandas_abc_type("ABCPeriod", "_typ", ("period", )) + + +class _ABCGeneric(type): + def __instancecheck__(cls, inst): + return hasattr(inst, "_data") + +ABCGeneric = _ABCGeneric("ABCGeneric", tuple(), {}) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 4a328fc7841f6..1d479868c00a6 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -25,7 +25,9 @@ from pandas.core.common import (is_sequence, array_equivalent, is_list_like, is_datetimelike_v_numeric, is_datetimelike_v_object, is_number, - pprint_thing, take_1d, needs_i8_conversion) + needs_i8_conversion) +from pandas.formats.printing import pprint_thing +from pandas.core.algorithms import take_1d import pandas.compat as compat import pandas.lib as lib diff --git a/setup.py b/setup.py index 1467ea1da1949..e4dc6dc16929c 100755 --- a/setup.py +++ b/setup.py @@ -596,7 +596,8 @@ def pxd(name): 'tests/data/*.table', 'tests/data/*.html', 'tests/data/html_encoding/*.html', - 'tests/test_json/data/*.json'], + 'tests/test_json/data/*.json', + 'tests/formats/data/*.csv'], 'pandas.tools': ['tests/*.csv'], 'pandas.tests': ['data/*.pickle', 'data/*.csv'],