From 233a284567925d79c68cea137e52bcda69210b63 Mon Sep 17 00:00:00 2001 From: JustinZhengBC Date: Sun, 14 Oct 2018 04:26:15 -0700 Subject: [PATCH 01/12] CLN-23123 Move SparseArray to arrays --- pandas/__init__.py | 2 +- pandas/_libs/parsers.pyx | 2 +- pandas/compat/numpy/function.py | 3 +- pandas/compat/pickle_compat.py | 23 +- pandas/core/api.py | 2 +- pandas/core/arrays/__init__.py | 10 - pandas/core/arrays/datetimelike.py | 2 +- pandas/core/arrays/datetimes.py | 2 +- pandas/core/arrays/integer.py | 3 +- pandas/core/arrays/interval.py | 3 +- pandas/core/arrays/period.py | 2 +- pandas/core/arrays/sparse/api.py | 6 + pandas/core/{ => arrays}/sparse/array.py | 2 +- pandas/core/{ => arrays}/sparse/dtype.py | 0 pandas/core/{ => arrays}/sparse/frame.py | 4 +- .../core/{ => arrays}/sparse/scipy_sparse.py | 1 - pandas/core/{ => arrays}/sparse/series.py | 6 +- pandas/core/arrays/timedeltas.py | 2 +- pandas/core/categorical.py | 2 +- pandas/core/dtypes/common.py | 6 +- pandas/core/dtypes/concat.py | 12 +- pandas/core/dtypes/dtypes.py | 2 +- pandas/core/frame.py | 7 +- pandas/core/groupby/grouper.py | 3 +- pandas/core/indexes/base.py | 2 +- pandas/core/indexes/datetimes.py | 2 +- pandas/core/indexes/multi.py | 2 +- pandas/core/indexes/period.py | 2 +- pandas/core/indexes/timedeltas.py | 2 +- pandas/core/internals/blocks.py | 2 +- pandas/core/internals/managers.py | 2 +- pandas/core/ops.py | 6 +- pandas/core/reshape/melt.py | 2 +- pandas/core/reshape/reshape.py | 6 +- pandas/core/series.py | 8 +- pandas/core/sorting.py | 2 +- pandas/core/sparse/__init__.py | 0 pandas/core/sparse/api.py | 6 - pandas/core/util/hashing.py | 1 - pandas/io/packers.py | 6 +- pandas/io/parsers.py | 2 +- pandas/io/pytables.py | 2 +- pandas/io/stata.py | 2 +- .../{ => arrays}/sparse/frame/conftest.py | 0 .../{ => arrays}/sparse/frame/test_apply.py | 2 +- .../{ => arrays}/sparse/frame/test_frame.py | 4 +- .../sparse/frame/test_to_from_scipy.py | 2 +- .../{ => arrays}/sparse/series/test_series.py | 6 +- .../{ => arrays}/sparse/test_arithmetics.py | 2 +- .../tests/{ => arrays}/sparse/test_array.py | 2 +- .../sparse/test_combine_concat.py | 2 +- .../tests/{ => arrays}/sparse/test_dtype.py | 2 +- .../tests/{ => arrays}/sparse/test_format.py | 0 .../tests/{ => arrays}/sparse/test_groupby.py | 0 .../{ => arrays}/sparse/test_indexing.py | 2 +- pandas/tests/arrays/test_integer.py | 2 +- pandas/tests/arrays/test_interval.py | 2 +- pandas/tests/dtypes/test_common.py | 2 +- pandas/tests/dtypes/test_dtypes.py | 2 +- pandas/tests/extension/decimal/array.py | 4 +- pandas/tests/extension/json/array.py | 2 +- pandas/tests/extension/test_common.py | 2 +- pandas/tests/extension/test_integer.py | 2 +- pandas/tests/extension/test_interval.py | 2 +- pandas/tests/extension/test_sparse.py | 2 +- pandas/tests/frame/test_block_internals.py | 3 +- pandas/tests/frame/test_dtypes.py | 2 +- pandas/tests/frame/test_indexing.py | 2 +- .../indexes/interval/test_construction.py | 2 +- pandas/tests/indexing/test_indexing.py | 9 +- pandas/tests/reshape/test_reshape.py | 2 +- pandas/tests/series/test_combine_concat.py | 6 +- pandas/tests/series/test_subclass.py | 2 +- pandas/tests/sparse/__init__.py | 0 pandas/tests/sparse/common.py | 0 pandas/tests/sparse/frame/__init__.py | 0 pandas/tests/sparse/frame/test_analytics.py | 40 -- pandas/tests/sparse/frame/test_indexing.py | 113 ---- pandas/tests/sparse/frame/test_to_csv.py | 20 - pandas/tests/sparse/series/__init__.py | 0 pandas/tests/sparse/series/test_indexing.py | 113 ---- pandas/tests/sparse/test_libsparse.py | 604 ------------------ pandas/tests/sparse/test_pivot.py | 50 -- pandas/tests/sparse/test_reshape.py | 38 -- pandas/tests/test_base.py | 4 +- pandas/util/testing.py | 3 +- 86 files changed, 127 insertions(+), 1096 deletions(-) create mode 100644 pandas/core/arrays/sparse/api.py rename pandas/core/{ => arrays}/sparse/array.py (99%) rename pandas/core/{ => arrays}/sparse/dtype.py (100%) rename pandas/core/{ => arrays}/sparse/frame.py (99%) rename pandas/core/{ => arrays}/sparse/scipy_sparse.py (99%) rename pandas/core/{ => arrays}/sparse/series.py (99%) delete mode 100644 pandas/core/sparse/__init__.py delete mode 100644 pandas/core/sparse/api.py rename pandas/tests/{ => arrays}/sparse/frame/conftest.py (100%) rename pandas/tests/{ => arrays}/sparse/frame/test_apply.py (97%) rename pandas/tests/{ => arrays}/sparse/frame/test_frame.py (99%) rename pandas/tests/{ => arrays}/sparse/frame/test_to_from_scipy.py (99%) rename pandas/tests/{ => arrays}/sparse/series/test_series.py (99%) rename pandas/tests/{ => arrays}/sparse/test_arithmetics.py (99%) rename pandas/tests/{ => arrays}/sparse/test_array.py (99%) rename pandas/tests/{ => arrays}/sparse/test_combine_concat.py (99%) rename pandas/tests/{ => arrays}/sparse/test_dtype.py (98%) rename pandas/tests/{ => arrays}/sparse/test_format.py (100%) rename pandas/tests/{ => arrays}/sparse/test_groupby.py (100%) rename pandas/tests/{ => arrays}/sparse/test_indexing.py (99%) delete mode 100644 pandas/tests/sparse/__init__.py delete mode 100644 pandas/tests/sparse/common.py delete mode 100644 pandas/tests/sparse/frame/__init__.py delete mode 100644 pandas/tests/sparse/frame/test_analytics.py delete mode 100644 pandas/tests/sparse/frame/test_indexing.py delete mode 100644 pandas/tests/sparse/frame/test_to_csv.py delete mode 100644 pandas/tests/sparse/series/__init__.py delete mode 100644 pandas/tests/sparse/series/test_indexing.py delete mode 100644 pandas/tests/sparse/test_libsparse.py delete mode 100644 pandas/tests/sparse/test_pivot.py delete mode 100644 pandas/tests/sparse/test_reshape.py diff --git a/pandas/__init__.py b/pandas/__init__.py index e446782d9665e..930b8844d9014 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -40,7 +40,7 @@ import pandas.core.config_init from pandas.core.api import * -from pandas.core.sparse.api import * +from pandas.core.arrays.sparse.api import * from pandas.tseries.api import * from pandas.core.computation.api import * from pandas.core.reshape.api import * diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index e3df391c5c45d..f177e3faf4957 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -51,7 +51,7 @@ from pandas.core.dtypes.common import ( is_bool_dtype, is_object_dtype, is_datetime64_dtype, pandas_dtype) -from pandas.core.arrays import Categorical +from pandas.core.arrays.categorical import Categorical from pandas.core.dtypes.concat import union_categoricals import pandas.io.common as icom diff --git a/pandas/compat/numpy/function.py b/pandas/compat/numpy/function.py index d42be56963569..b54f7e31fe87b 100644 --- a/pandas/compat/numpy/function.py +++ b/pandas/compat/numpy/function.py @@ -19,7 +19,8 @@ """ from numpy import ndarray -from pandas.util._validators import (validate_args, validate_kwargs, +from pandas.util._validators import (validate_args, + validate_kwargs, validate_args_and_kwargs) from pandas.errors import UnsupportedFunctionCall from pandas.core.dtypes.common import is_integer, is_bool diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index 713a5b1120beb..321ae6d541555 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -57,6 +57,14 @@ def load_reduce(self): # If classes are moved, provide compat here. _class_locations_map = { + # 23123, fix sparse mapping + ('pandas.core.sparse.array', 'SparseArray'): + ('pandas.core.arrays.sparse.series', 'SparseArray'), + ('pandas.core.sparse.series', 'SparseSeries'): + ('pandas.core.arrays.sparse.series', 'SparseSeries'), + ('pandas.core.sparse.frame', 'SparseDataFrame'): + ('pandas.core.arrays.sparse.frame', 'SparseDataFrame'), + # 15477 ('pandas.core.base', 'FrozenNDArray'): ('pandas.core.indexes.frozen', 'FrozenNDArray'), @@ -67,7 +75,7 @@ def load_reduce(self): ('pandas.core.series', 'TimeSeries'): ('pandas.core.series', 'Series'), ('pandas.sparse.series', 'SparseTimeSeries'): - ('pandas.core.sparse.series', 'SparseSeries'), + ('pandas.core.arrays.sparse.series', 'SparseSeries'), # 12588, extensions moving ('pandas._sparse', 'BlockIndex'): @@ -88,11 +96,11 @@ def load_reduce(self): # 15998 top-level dirs moving ('pandas.sparse.array', 'SparseArray'): - ('pandas.core.sparse.array', 'SparseArray'), + ('pandas.core.arrays.sparse.array', 'SparseArray'), ('pandas.sparse.series', 'SparseSeries'): - ('pandas.core.sparse.series', 'SparseSeries'), + ('pandas.core.arrays.sparse.series', 'SparseSeries'), ('pandas.sparse.frame', 'SparseDataFrame'): - ('pandas.core.sparse.frame', 'SparseDataFrame'), + ('pandas.core.arrays.sparse.frame', 'SparseDataFrame'), ('pandas.indexes.base', '_new_Index'): ('pandas.core.indexes.base', '_new_Index'), ('pandas.indexes.base', 'Index'): @@ -112,7 +120,7 @@ def load_reduce(self): # 19269, arrays moving ('pandas.core.categorical', 'Categorical'): - ('pandas.core.arrays', 'Categorical'), + ('pandas.core.arrays.categorical', 'Categorical'), # 19939, add timedeltaindex, float64index compat from 15998 move ('pandas.tseries.tdi', 'TimedeltaIndex'): @@ -130,8 +138,13 @@ class Unpickler(pkl._Unpickler): def find_class(self, module, name): # override superclass + print(module) + print(name) key = (module, name) module, name = _class_locations_map.get(key, key) + print(module) + print(name) + print("---") return super(Unpickler, self).find_class(module, name) else: diff --git a/pandas/core/api.py b/pandas/core/api.py index 32df317a602a9..13d3ef5a20c42 100644 --- a/pandas/core/api.py +++ b/pandas/core/api.py @@ -6,7 +6,7 @@ from pandas.core.algorithms import factorize, unique, value_counts from pandas.core.dtypes.missing import isna, isnull, notna, notnull -from pandas.core.arrays import Categorical +from pandas.core.arrays.categorical import Categorical from pandas.core.groupby import Grouper from pandas.io.formats.format import set_eng_float_format from pandas.core.index import (Index, CategoricalIndex, Int64Index, diff --git a/pandas/core/arrays/__init__.py b/pandas/core/arrays/__init__.py index 29f258bf1b29e..e69de29bb2d1d 100644 --- a/pandas/core/arrays/__init__.py +++ b/pandas/core/arrays/__init__.py @@ -1,10 +0,0 @@ -from .base import (ExtensionArray, # noqa - ExtensionOpsMixin, - ExtensionScalarOpsMixin) -from .categorical import Categorical # noqa -from .datetimes import DatetimeArrayMixin # noqa -from .interval import IntervalArray # noqa -from .period import PeriodArrayMixin # noqa -from .timedeltas import TimedeltaArrayMixin # noqa -from .integer import ( # noqa - IntegerArray, integer_array) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 37fc451ba2a2b..39533b62c96a4 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -715,7 +715,7 @@ def __rsub__(self, other): # we need to wrap in DatetimeArray/Index and flip the operation if not isinstance(other, DatetimeLikeArrayMixin): # Avoid down-casting DatetimeIndex - from pandas.core.arrays import DatetimeArrayMixin + from pandas.core.arrays.datetimes import DatetimeArrayMixin other = DatetimeArrayMixin(other) return other - self elif (is_datetime64_any_dtype(self) and hasattr(other, 'dtype') and diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 4c75927135b22..448394f6b0204 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -34,7 +34,7 @@ from pandas.tseries.frequencies import to_offset, get_period_alias from pandas.tseries.offsets import Tick, generate_range -from pandas.core.arrays import datetimelike as dtl +import pandas.core.arrays.datetimelike as dtl _midnight = time(0, 0) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 9917045f2f7d2..dfe9cd7110e13 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -18,7 +18,8 @@ is_integer_dtype, is_object_dtype, is_list_like) -from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin +from pandas.core.arrays.base import (ExtensionArray, + ExtensionOpsMixin) from pandas.core.dtypes.base import ExtensionDtype from pandas.core.dtypes.dtypes import register_extension_dtype from pandas.core.dtypes.missing import isna, notna diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 134999f05364f..769053ca28045 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -23,7 +23,8 @@ from pandas.util._decorators import Appender from pandas.util._doctools import _WritableDoc -from . import ExtensionArray, Categorical +from pandas.core.arrays.base import ExtensionArray +from pandas.core.arrays.categorical import Categorical _VALID_CLOSED = {'left', 'right', 'both', 'neither'} _interval_shared_docs = {} diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index d32ff76c0819b..9c261da48fd2d 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -27,7 +27,7 @@ from pandas.tseries import frequencies from pandas.tseries.offsets import Tick, DateOffset -from pandas.core.arrays import datetimelike as dtl +import pandas.core.arrays.datetimelike as dtl from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin diff --git a/pandas/core/arrays/sparse/api.py b/pandas/core/arrays/sparse/api.py new file mode 100644 index 0000000000000..2176213813e2e --- /dev/null +++ b/pandas/core/arrays/sparse/api.py @@ -0,0 +1,6 @@ +# pylint: disable=W0611 +# flake8: noqa +from pandas.core.arrays.sparse.array import SparseArray +from pandas.core.arrays.sparse.series import SparseSeries +from pandas.core.arrays.sparse.frame import SparseDataFrame +from pandas.core.arrays.sparse.dtype import SparseDtype diff --git a/pandas/core/sparse/array.py b/pandas/core/arrays/sparse/array.py similarity index 99% rename from pandas/core/sparse/array.py rename to pandas/core/arrays/sparse/array.py index 15b5118db2230..e7361df49345f 100644 --- a/pandas/core/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -45,7 +45,7 @@ import pandas.core.algorithms as algos import pandas.io.formats.printing as printing -from pandas.core.sparse.dtype import SparseDtype +from pandas.core.arrays.sparse.dtype import SparseDtype _sparray_doc_kwargs = dict(klass='SparseArray') diff --git a/pandas/core/sparse/dtype.py b/pandas/core/arrays/sparse/dtype.py similarity index 100% rename from pandas/core/sparse/dtype.py rename to pandas/core/arrays/sparse/dtype.py diff --git a/pandas/core/sparse/frame.py b/pandas/core/arrays/sparse/frame.py similarity index 99% rename from pandas/core/sparse/frame.py rename to pandas/core/arrays/sparse/frame.py index 36b6ea089f459..a4f05542df5b6 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/arrays/sparse/frame.py @@ -22,8 +22,8 @@ from pandas.core.internals import (BlockManager, create_block_manager_from_arrays) import pandas.core.generic as generic -from pandas.core.sparse.series import SparseSeries, SparseArray -from pandas.core.sparse.dtype import SparseDtype +from pandas.core.arrays.sparse.series import SparseSeries, SparseArray +from pandas.core.arrays.sparse.dtype import SparseDtype from pandas._libs.sparse import BlockIndex, get_blocks from pandas.util._decorators import Appender import pandas.core.ops as ops diff --git a/pandas/core/sparse/scipy_sparse.py b/pandas/core/arrays/sparse/scipy_sparse.py similarity index 99% rename from pandas/core/sparse/scipy_sparse.py rename to pandas/core/arrays/sparse/scipy_sparse.py index 748a52f484893..9e0404918aec0 100644 --- a/pandas/core/sparse/scipy_sparse.py +++ b/pandas/core/arrays/sparse/scipy_sparse.py @@ -1,6 +1,5 @@ """ Interaction with scipy.sparse matrices. - Currently only includes SparseSeries.to_coo helpers. """ from pandas.core.index import MultiIndex, Index diff --git a/pandas/core/sparse/series.py b/pandas/core/arrays/sparse/series.py similarity index 99% rename from pandas/core/sparse/series.py rename to pandas/core/arrays/sparse/series.py index eebf26bbb9708..290f8c6de70ad 100644 --- a/pandas/core/sparse/series.py +++ b/pandas/core/arrays/sparse/series.py @@ -24,13 +24,13 @@ import pandas._libs.index as libindex from pandas.util._decorators import Appender, Substitution -from pandas.core.sparse.array import ( +from pandas.core.arrays.sparse.array import ( SparseArray, ) from pandas._libs.sparse import BlockIndex, IntIndex import pandas._libs.sparse as splib -from pandas.core.sparse.scipy_sparse import ( +from pandas.core.arrays.sparse.scipy_sparse import ( _sparse_series_to_coo, _coo_to_sparse_series) @@ -205,7 +205,7 @@ def _constructor(self): @property def _constructor_expanddim(self): - from pandas.core.sparse.api import SparseDataFrame + from pandas.core.arrays.sparse.api import SparseDataFrame return SparseDataFrame @property diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 4904a90ab7b2b..bc50fea39d062 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -230,7 +230,7 @@ def _add_delta(self, delta): def _add_datelike(self, other): # adding a timedeltaindex to a datetimelike - from pandas.core.arrays import DatetimeArrayMixin + from pandas.core.arrays.datetimes import DatetimeArrayMixin if isinstance(other, (DatetimeArrayMixin, np.ndarray)): # if other is an ndarray, we assume it is datetime64-dtype # defer to implementation in DatetimeIndex diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 530a3ecb5f378..e8f4cfb8cfac9 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -4,5 +4,5 @@ warnings.warn("'pandas.core' is private. Use 'pandas.Categorical'", FutureWarning, stacklevel=2) -from pandas.core.arrays import Categorical # noqa +from pandas.core.arrays.categorical import Categorical # noqa from pandas.core.dtypes.dtypes import CategoricalDtype # noqa diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 7a4e7022f7819..6e55ced943d0c 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1,5 +1,4 @@ """ common type operations """ - import numpy as np from pandas.compat import (string_types, text_type, binary_type, PY3, PY36) @@ -12,7 +11,6 @@ PeriodDtype, IntervalDtype, PandasExtensionDtype, ExtensionDtype, _pandas_registry) -from pandas.core.sparse.dtype import SparseDtype from pandas.core.dtypes.generic import ( ABCCategorical, ABCPeriodIndex, ABCDatetimeIndex, ABCSeries, ABCSparseArray, ABCSparseSeries, ABCCategoricalIndex, ABCIndexClass, @@ -22,7 +20,7 @@ is_dict_like, is_scalar, is_string_like, is_list_like, is_number, is_file_like, is_re, is_re_compilable, is_sequence, is_nested_list_like, is_named_tuple, is_array_like, is_decimal, is_complex, is_interval) - +from pandas.core.arrays.sparse.dtype import SparseDtype _POSSIBLY_CAST_DTYPES = {np.dtype(t).name for t in ['O', 'int8', 'uint8', 'int16', 'uint16', @@ -181,7 +179,7 @@ def is_sparse(arr): >>> is_sparse(bsr_matrix([1, 2, 3])) False """ - from pandas.core.sparse.dtype import SparseDtype + from pandas.core.arrays.sparse.dtype import SparseDtype dtype = getattr(arr, 'dtype', arr) return isinstance(dtype, SparseDtype) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index ac824708245d2..48a1312b5156d 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -76,7 +76,7 @@ def _get_series_result_type(result, objs=None): if isinstance(result, dict): # concat Series with axis 1 if all(is_sparse(c) for c in compat.itervalues(result)): - from pandas.core.sparse.api import SparseDataFrame + from pandas.core.arrays.sparse.api import SparseDataFrame return SparseDataFrame else: from pandas.core.frame import DataFrame @@ -84,7 +84,7 @@ def _get_series_result_type(result, objs=None): # otherwise it is a SingleBlockManager (axis = 0) if result._block.is_sparse: - from pandas.core.sparse.api import SparseSeries + from pandas.core.arrays.sparse.api import SparseSeries return SparseSeries else: return objs[0]._constructor @@ -100,7 +100,7 @@ def _get_frame_result_type(result, objs): if (result.blocks and ( all(is_sparse(b) for b in result.blocks) or all(isinstance(obj, ABCSparseDataFrame) for obj in objs))): - from pandas.core.sparse.api import SparseDataFrame + from pandas.core.arrays.sparse.api import SparseDataFrame return SparseDataFrame else: return next(obj for obj in objs if not isinstance(obj, @@ -123,7 +123,7 @@ def _get_sliced_frame_result_type(data, obj): Series or SparseSeries """ if is_sparse(data): - from pandas.core.sparse.api import SparseSeries + from pandas.core.arrays.sparse.api import SparseSeries return SparseSeries return obj._constructor_sliced @@ -523,7 +523,7 @@ def _concat_index_asobject(to_concat, name=None): PeriodIndex are converted to object dtype before concatenation """ from pandas import Index - from pandas.core.arrays import ExtensionArray + from pandas.core.arrays.base import ExtensionArray klasses = (ABCDatetimeIndex, ABCTimedeltaIndex, ABCPeriodIndex, ExtensionArray) @@ -556,7 +556,7 @@ def _concat_sparse(to_concat, axis=0, typs=None): a single array, preserving the combined dtypes """ - from pandas.core.sparse.array import SparseArray + from pandas.core.arrays.sparse.array import SparseArray fill_values = [x.fill_value for x in to_concat if isinstance(x, SparseArray)] diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index f07fb3cd80eab..dde5c84a13344 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -776,7 +776,7 @@ def construct_array_type(cls): ------- type """ - from pandas.core.arrays import IntervalArray + from pandas.core.arrays.interval import IntervalArray return IntervalArray @classmethod diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e9be7a3e9afb8..5a47e3d94a136 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -80,7 +80,8 @@ from pandas.core import nanops from pandas.core import ops from pandas.core.accessor import CachedAccessor -from pandas.core.arrays import Categorical, ExtensionArray +from pandas.core.arrays.categorical import Categorical +from pandas.core.arrays.base import ExtensionArray from pandas.core.config import get_option from pandas.core.generic import NDFrame, _shared_docs from pandas.core.index import (Index, MultiIndex, ensure_index, @@ -1761,9 +1762,9 @@ def to_sparse(self, fill_value=None, kind='block'): 1 1.0 NaN 2 NaN 1.0 >>> type(sdf) - + """ - from pandas.core.sparse.frame import SparseDataFrame + from pandas.core.arrays.sparse.frame import SparseDataFrame return SparseDataFrame(self._series, index=self.index, columns=self.columns, default_kind=kind, default_fill_value=fill_value) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 1c8fe0e6cadad..c95208198f85a 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -12,7 +12,8 @@ from pandas.compat import zip, callable from pandas.core.dtypes.generic import ABCSeries -from pandas.core.arrays import ExtensionArray, Categorical +from pandas.core.arrays.base import ExtensionArray +from pandas.core.arrays.categorical import Categorical from pandas.core.index import ( Index, MultiIndex, CategoricalIndex) from pandas.core.dtypes.common import ( diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 51c84d6e28cb4..42ec8dff9e548 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -14,7 +14,7 @@ from pandas import compat from pandas.core.accessor import CachedAccessor -from pandas.core.arrays import ExtensionArray +from pandas.core.arrays.base import ExtensionArray from pandas.core.dtypes.generic import ( ABCSeries, ABCDataFrame, ABCMultiIndex, diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 70140d2d9a432..1bbd5b9ac10ac 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -30,7 +30,7 @@ import pandas.core.dtypes.concat as _concat from pandas.core.arrays.datetimes import DatetimeArrayMixin, _to_m8 -from pandas.core.arrays import datetimelike as dtl +import pandas.core.arrays.datetimelike as dtl from pandas.core.indexes.base import Index, _index_shared_docs from pandas.core.indexes.numeric import Int64Index diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 3cccb65503378..49bf1c18721d2 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1858,7 +1858,7 @@ def _get_labels_for_sorting(self): for sorting, where we need to disambiguate that -1 is not a valid valid """ - from pandas.core.arrays import Categorical + from pandas.core.arrays.categorical import Categorical def cats(label): return np.arange(np.array(label).max() + 1 if len(label) else 0, diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index f452a57e82725..9a3cd0bb8f68c 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -31,7 +31,7 @@ from pandas._libs.tslibs import resolution, period from pandas.core.algorithms import unique1d -from pandas.core.arrays import datetimelike as dtl +import pandas.core.arrays.datetimelike as dtl from pandas.core.arrays.period import PeriodArrayMixin, dt64arr_to_periodarr from pandas.core.base import _shared_docs from pandas.core.indexes.base import _index_shared_docs, ensure_index diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 56b6dc7051d9f..430d90cf29aae 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -16,7 +16,7 @@ from pandas.core.arrays.timedeltas import ( TimedeltaArrayMixin, _is_convertible_to_td, _to_m8) -from pandas.core.arrays import datetimelike as dtl +import pandas.core.arrays.datetimelike as dtl from pandas.core.indexes.base import Index from pandas.core.indexes.numeric import Int64Index diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 214fcb097f736..e66c8fe4de3da 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -64,7 +64,7 @@ import pandas.core.missing as missing from pandas.core.base import PandasObject -from pandas.core.arrays import Categorical +from pandas.core.arrays.categorical import Categorical from pandas.core.indexes.datetimes import DatetimeIndex from pandas.core.indexes.timedeltas import TimedeltaIndex diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 3667d7c5e39dc..60d4e7bc080bc 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -29,7 +29,7 @@ from pandas.core.base import PandasObject import pandas.core.algorithms as algos -from pandas.core.sparse.array import _maybe_to_sparse +from pandas.core.arrays.sparse.array import _maybe_to_sparse from pandas.core.index import Index, MultiIndex, ensure_index from pandas.core.indexing import maybe_convert_indices diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 640b2812d3e85..93690252a9993 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -2066,7 +2066,7 @@ def _cast_sparse_series_op(left, right, opname): left : SparseArray right : SparseArray """ - from pandas.core.sparse.api import SparseDtype + from pandas.core.arrays.sparse.api import SparseDtype opname = opname.strip('_') @@ -2116,7 +2116,7 @@ def _sparse_series_op(left, right, op, name): new_index = left.index new_name = get_op_result_name(left, right) - from pandas.core.sparse.array import _sparse_array_op + from pandas.core.arrays.sparse.array import _sparse_array_op lvalues, rvalues = _cast_sparse_series_op(left.values, right.values, name) result = _sparse_array_op(lvalues, rvalues, op, name) return left._constructor(result, index=new_index, name=new_name) @@ -2130,7 +2130,7 @@ def _arith_method_SPARSE_ARRAY(cls, op, special): op_name = _get_op_name(op, special) def wrapper(self, other): - from pandas.core.sparse.array import ( + from pandas.core.arrays.sparse.array import ( SparseArray, _sparse_array_op, _wrap_result, _get_fill) if isinstance(other, np.ndarray): if len(self) != len(other): diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py index 26221143c0cdf..fbd02d7b1aed1 100644 --- a/pandas/core/reshape/melt.py +++ b/pandas/core/reshape/melt.py @@ -4,7 +4,7 @@ from pandas.core.dtypes.common import is_list_like from pandas import compat -from pandas.core.arrays import Categorical +from pandas.core.arrays.categorical import Categorical from pandas.core.dtypes.generic import ABCMultiIndex diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 7bee1ba0e2eb2..f0b8dc1c14a80 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -18,11 +18,11 @@ from pandas.core.series import Series from pandas.core.frame import DataFrame -from pandas.core.sparse.api import SparseDataFrame, SparseSeries -from pandas.core.sparse.array import SparseArray +from pandas.core.arrays.sparse.api import SparseDataFrame, SparseSeries +from pandas.core.arrays.sparse.array import SparseArray from pandas._libs.sparse import IntIndex -from pandas.core.arrays import Categorical +from pandas.core.arrays.categorical import Categorical from pandas.core.arrays.categorical import _factorize_from_iterable from pandas.core.sorting import (get_group_index, get_compressed_ids, compress_group_index, decons_obs_group_ids) diff --git a/pandas/core/series.py b/pandas/core/series.py index 4f6bca93d377b..7397c26c4cc21 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -13,7 +13,7 @@ import numpy.ma as ma from pandas.core.accessor import CachedAccessor -from pandas.core.arrays import ExtensionArray +from pandas.core.arrays.base import ExtensionArray from pandas.core.dtypes.common import ( is_categorical_dtype, is_string_like, @@ -344,7 +344,7 @@ def from_array(cls, arr, index=None, name=None, dtype=None, copy=False, "future version. Please use the pd.Series(..) " "constructor instead.", FutureWarning, stacklevel=2) if isinstance(arr, ABCSparseArray): - from pandas.core.sparse.series import SparseSeries + from pandas.core.arrays.sparse.series import SparseSeries cls = SparseSeries return cls(arr, index=index, name=name, dtype=dtype, copy=copy, fastpath=fastpath) @@ -1383,8 +1383,8 @@ def to_sparse(self, kind='block', fill_value=None): sp : SparseSeries """ # TODO: deprecate - from pandas.core.sparse.series import SparseSeries - from pandas.core.sparse.array import SparseArray + from pandas.core.arrays.sparse.series import SparseSeries + from pandas.core.arrays.sparse.array import SparseArray values = SparseArray(self, kind=kind, fill_value=fill_value) return SparseSeries( diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 5aa9ea658482b..6ac67afa952bb 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -187,7 +187,7 @@ def indexer_from_factorized(labels, shape, compress=True): def lexsort_indexer(keys, orders=None, na_position='last'): - from pandas.core.arrays import Categorical + from pandas.core.arrays.categorical import Categorical labels = [] shape = [] diff --git a/pandas/core/sparse/__init__.py b/pandas/core/sparse/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/pandas/core/sparse/api.py b/pandas/core/sparse/api.py deleted file mode 100644 index 0fb0396e34669..0000000000000 --- a/pandas/core/sparse/api.py +++ /dev/null @@ -1,6 +0,0 @@ -# pylint: disable=W0611 -# flake8: noqa -from pandas.core.sparse.array import SparseArray -from pandas.core.sparse.series import SparseSeries -from pandas.core.sparse.frame import SparseDataFrame -from pandas.core.sparse.dtype import SparseDtype diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py index e41885d525653..6a2cfd4d4a7b3 100644 --- a/pandas/core/util/hashing.py +++ b/pandas/core/util/hashing.py @@ -2,7 +2,6 @@ data hash pandas / numpy objects """ import itertools - import numpy as np from pandas._libs import hashing, tslibs from pandas.core.dtypes.generic import ( diff --git a/pandas/io/packers.py b/pandas/io/packers.py index 638b76c780852..b01ffb0242d13 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -66,11 +66,11 @@ needs_i8_conversion, pandas_dtype) from pandas.core import internals -from pandas.core.arrays import IntervalArray +from pandas.core.arrays.interval import IntervalArray from pandas.core.generic import NDFrame from pandas.core.internals import BlockManager, make_block, _safe_reshape -from pandas.core.sparse.api import SparseSeries, SparseDataFrame -from pandas.core.sparse.array import BlockIndex, IntIndex +from pandas.core.arrays.sparse.api import SparseSeries, SparseDataFrame +from pandas.core.arrays.sparse.array import BlockIndex, IntIndex from pandas.io.common import get_filepath_or_buffer, _stringify_path from pandas.io.msgpack import Unpacker as _Unpacker, Packer as _Packer, ExtType diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 1edc6f6e14442..75d54fcad73c5 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -28,7 +28,7 @@ ensure_index_from_sequences) from pandas.core.series import Series from pandas.core.frame import DataFrame -from pandas.core.arrays import Categorical +from pandas.core.arrays.categorical import Categorical from pandas.core import algorithms import pandas.core.common as com from pandas.io.date_converters import generic_parser diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index de193db846c50..55d6fe2399380 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -50,7 +50,7 @@ from pandas.core.internals import (BlockManager, make_block, _block2d_to_blocknd, _factor_indexer, _block_shape) -from pandas.core.sparse.array import BlockIndex, IntIndex +from pandas.core.arrays.sparse.array import BlockIndex, IntIndex from pandas.io.common import _stringify_path from pandas.io.formats.printing import adjoin, pprint_thing diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 68b2182c2ff07..b53814e9ad605 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -26,7 +26,7 @@ from pandas import compat, to_timedelta, to_datetime, isna, DatetimeIndex from pandas.compat import (lrange, lmap, lzip, text_type, string_types, range, zip, BytesIO) -from pandas.core.arrays import Categorical +from pandas.core.arrays.categorical import Categorical from pandas.core.base import StringMixin from pandas.core.dtypes.common import (is_categorical_dtype, ensure_object, is_datetime64_dtype) diff --git a/pandas/tests/sparse/frame/conftest.py b/pandas/tests/arrays/sparse/frame/conftest.py similarity index 100% rename from pandas/tests/sparse/frame/conftest.py rename to pandas/tests/arrays/sparse/frame/conftest.py diff --git a/pandas/tests/sparse/frame/test_apply.py b/pandas/tests/arrays/sparse/frame/test_apply.py similarity index 97% rename from pandas/tests/sparse/frame/test_apply.py rename to pandas/tests/arrays/sparse/frame/test_apply.py index 2d7a537f0fb3b..47a164e3c0945 100644 --- a/pandas/tests/sparse/frame/test_apply.py +++ b/pandas/tests/arrays/sparse/frame/test_apply.py @@ -1,7 +1,7 @@ import pytest import numpy as np from pandas import SparseDataFrame, DataFrame, Series, bdate_range -from pandas.core.sparse.api import SparseDtype +from pandas.core.arrays.sparse.api import SparseDtype from pandas.core import nanops from pandas.util import testing as tm diff --git a/pandas/tests/sparse/frame/test_frame.py b/pandas/tests/arrays/sparse/frame/test_frame.py similarity index 99% rename from pandas/tests/sparse/frame/test_frame.py rename to pandas/tests/arrays/sparse/frame/test_frame.py index 03143488c3874..d967904ddf9b5 100644 --- a/pandas/tests/sparse/frame/test_frame.py +++ b/pandas/tests/arrays/sparse/frame/test_frame.py @@ -14,10 +14,10 @@ from pandas.util import testing as tm from pandas.compat import lrange from pandas import compat -from pandas.core.sparse import frame as spf +from pandas.core.arrays.sparse import frame as spf from pandas._libs.sparse import BlockIndex, IntIndex -from pandas.core.sparse.api import ( +from pandas.core.arrays.sparse.api import ( SparseSeries, SparseDataFrame, SparseArray, SparseDtype ) from pandas.tests.frame.test_api import SharedWithSparse diff --git a/pandas/tests/sparse/frame/test_to_from_scipy.py b/pandas/tests/arrays/sparse/frame/test_to_from_scipy.py similarity index 99% rename from pandas/tests/sparse/frame/test_to_from_scipy.py rename to pandas/tests/arrays/sparse/frame/test_to_from_scipy.py index 1a10ff83d3097..7ca520338baa6 100644 --- a/pandas/tests/sparse/frame/test_to_from_scipy.py +++ b/pandas/tests/arrays/sparse/frame/test_to_from_scipy.py @@ -2,7 +2,7 @@ import numpy as np from pandas.util import testing as tm from pandas import SparseDataFrame, SparseSeries -from pandas.core.sparse.api import SparseDtype +from pandas.core.arrays.sparse.api import SparseDtype from distutils.version import LooseVersion from pandas.core.dtypes.common import ( is_bool_dtype, diff --git a/pandas/tests/sparse/series/test_series.py b/pandas/tests/arrays/sparse/series/test_series.py similarity index 99% rename from pandas/tests/sparse/series/test_series.py rename to pandas/tests/arrays/sparse/series/test_series.py index a1ec8314841e3..2d07288bf304f 100644 --- a/pandas/tests/sparse/series/test_series.py +++ b/pandas/tests/arrays/sparse/series/test_series.py @@ -18,11 +18,11 @@ from pandas.compat import range, PY36 from pandas.core.reshape.util import cartesian_product -from pandas.core.sparse.api import SparseDtype -import pandas.core.sparse.frame as spf +from pandas.core.arrays.sparse.api import SparseDtype +import pandas.core.arrays.sparse.frame as spf from pandas._libs.sparse import BlockIndex, IntIndex -from pandas.core.sparse.api import SparseSeries +from pandas.core.arrays.sparse.api import SparseSeries from pandas.tests.series.test_api import SharedWithSparse diff --git a/pandas/tests/sparse/test_arithmetics.py b/pandas/tests/arrays/sparse/test_arithmetics.py similarity index 99% rename from pandas/tests/sparse/test_arithmetics.py rename to pandas/tests/arrays/sparse/test_arithmetics.py index 388411f909bac..fdbac8b79ab16 100644 --- a/pandas/tests/sparse/test_arithmetics.py +++ b/pandas/tests/arrays/sparse/test_arithmetics.py @@ -4,7 +4,7 @@ import pytest import pandas as pd import pandas.util.testing as tm -from pandas.core.sparse.api import SparseDtype +from pandas.core.arrays.sparse.api import SparseDtype class TestSparseArrayArithmetics(object): diff --git a/pandas/tests/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py similarity index 99% rename from pandas/tests/sparse/test_array.py rename to pandas/tests/arrays/sparse/test_array.py index 0257d996228df..bbd8fcbc294d1 100644 --- a/pandas/tests/sparse/test_array.py +++ b/pandas/tests/arrays/sparse/test_array.py @@ -9,7 +9,7 @@ import numpy as np import pandas as pd -from pandas.core.sparse.api import SparseArray, SparseSeries, SparseDtype +from pandas.core.arrays.sparse.api import SparseArray, SparseSeries, SparseDtype from pandas._libs.sparse import IntIndex from pandas.util.testing import assert_almost_equal import pandas.util.testing as tm diff --git a/pandas/tests/sparse/test_combine_concat.py b/pandas/tests/arrays/sparse/test_combine_concat.py similarity index 99% rename from pandas/tests/sparse/test_combine_concat.py rename to pandas/tests/arrays/sparse/test_combine_concat.py index 92483f1e7511e..66a7a89732b60 100644 --- a/pandas/tests/sparse/test_combine_concat.py +++ b/pandas/tests/arrays/sparse/test_combine_concat.py @@ -282,7 +282,7 @@ def test_concat_different_columns(self): tm.assert_sp_frame_equal(res, exp, check_kind=False) def test_concat_bug(self): - from pandas.core.sparse.api import SparseDtype + from pandas.core.arrays.sparse.api import SparseDtype x = pd.SparseDataFrame({"A": pd.SparseArray([np.nan, np.nan], fill_value=0)}) y = pd.SparseDataFrame({"B": []}) diff --git a/pandas/tests/sparse/test_dtype.py b/pandas/tests/arrays/sparse/test_dtype.py similarity index 98% rename from pandas/tests/sparse/test_dtype.py rename to pandas/tests/arrays/sparse/test_dtype.py index 0dcfc3ae79b0f..c3a1a28f93ba7 100644 --- a/pandas/tests/sparse/test_dtype.py +++ b/pandas/tests/arrays/sparse/test_dtype.py @@ -3,7 +3,7 @@ import pandas as pd import pandas.util.testing as tm -from pandas.core.sparse.api import SparseDtype +from pandas.core.arrays.sparse.api import SparseDtype @pytest.mark.parametrize("dtype, fill_value", [ diff --git a/pandas/tests/sparse/test_format.py b/pandas/tests/arrays/sparse/test_format.py similarity index 100% rename from pandas/tests/sparse/test_format.py rename to pandas/tests/arrays/sparse/test_format.py diff --git a/pandas/tests/sparse/test_groupby.py b/pandas/tests/arrays/sparse/test_groupby.py similarity index 100% rename from pandas/tests/sparse/test_groupby.py rename to pandas/tests/arrays/sparse/test_groupby.py diff --git a/pandas/tests/sparse/test_indexing.py b/pandas/tests/arrays/sparse/test_indexing.py similarity index 99% rename from pandas/tests/sparse/test_indexing.py rename to pandas/tests/arrays/sparse/test_indexing.py index 7c7e450c966bf..cc910e20cb385 100644 --- a/pandas/tests/sparse/test_indexing.py +++ b/pandas/tests/arrays/sparse/test_indexing.py @@ -4,7 +4,7 @@ import numpy as np import pandas as pd import pandas.util.testing as tm -from pandas.core.sparse.api import SparseDtype +from pandas.core.arrays.sparse.api import SparseDtype class TestSparseSeriesIndexing(object): diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py index 23ee8d217bd59..72c32d1c759ae 100644 --- a/pandas/tests/arrays/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -7,7 +7,7 @@ from pandas.api.types import is_integer, is_float, is_float_dtype, is_scalar from pandas.core.dtypes.generic import ABCIndexClass -from pandas.core.arrays import ( +from pandas.core.arrays.integer import ( integer_array, IntegerArray) from pandas.core.arrays.integer import ( Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype, diff --git a/pandas/tests/arrays/test_interval.py b/pandas/tests/arrays/test_interval.py index bcf4cea795978..f8499979061f8 100644 --- a/pandas/tests/arrays/test_interval.py +++ b/pandas/tests/arrays/test_interval.py @@ -3,7 +3,7 @@ import numpy as np from pandas import Index, IntervalIndex, date_range, timedelta_range -from pandas.core.arrays import IntervalArray +from pandas.core.arrays.interval import IntervalArray import pandas.util.testing as tm diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index b5353e34a2311..c454887242904 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -6,7 +6,7 @@ from pandas.core.dtypes.dtypes import (DatetimeTZDtype, PeriodDtype, CategoricalDtype, IntervalDtype) -from pandas.core.sparse.api import SparseDtype +from pandas.core.arrays.sparse.api import SparseDtype import pandas.core.dtypes.common as com import pandas.util.testing as tm diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index c53c2e5059cde..22544d3268141 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -20,7 +20,7 @@ _coerce_to_dtype, is_bool_dtype, ) -from pandas.core.sparse.api import SparseDtype +from pandas.core.arrays.sparse.api import SparseDtype import pandas.util.testing as tm diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index 53a598559393c..823a72ac59638 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -6,8 +6,8 @@ import numpy as np import pandas as pd -from pandas.core.arrays import (ExtensionArray, - ExtensionScalarOpsMixin) +from pandas.core.arrays.base import (ExtensionArray, + ExtensionScalarOpsMixin) from pandas.core.dtypes.base import ExtensionDtype diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index 976511941042d..cf5389904a425 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -21,7 +21,7 @@ from pandas import compat from pandas.core.dtypes.base import ExtensionDtype -from pandas.core.arrays import ExtensionArray +from pandas.core.arrays.base import ExtensionArray class JSONDtype(ExtensionDtype): diff --git a/pandas/tests/extension/test_common.py b/pandas/tests/extension/test_common.py index b6223ea96d7dd..33776eb4262fe 100644 --- a/pandas/tests/extension/test_common.py +++ b/pandas/tests/extension/test_common.py @@ -3,7 +3,7 @@ import pandas as pd import pandas.util.testing as tm -from pandas.core.arrays import ExtensionArray +from pandas.core.arrays.base import ExtensionArray from pandas.core.dtypes.common import is_extension_array_dtype from pandas.core.dtypes import dtypes diff --git a/pandas/tests/extension/test_integer.py b/pandas/tests/extension/test_integer.py index 89c36bbe7b325..91007b1314834 100644 --- a/pandas/tests/extension/test_integer.py +++ b/pandas/tests/extension/test_integer.py @@ -20,7 +20,7 @@ from pandas.tests.extension import base from pandas.core.dtypes.common import is_extension_array_dtype -from pandas.core.arrays import integer_array +from pandas.core.arrays.integer import integer_array from pandas.core.arrays.integer import ( Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype, UInt8Dtype, UInt16Dtype, UInt32Dtype, UInt64Dtype) diff --git a/pandas/tests/extension/test_interval.py b/pandas/tests/extension/test_interval.py index 183ebea927b10..3e8013ee60340 100644 --- a/pandas/tests/extension/test_interval.py +++ b/pandas/tests/extension/test_interval.py @@ -17,7 +17,7 @@ import numpy as np from pandas import Interval -from pandas.core.arrays import IntervalArray +from pandas.core.arrays.interval import IntervalArray from pandas.core.dtypes.dtypes import IntervalDtype from pandas.tests.extension import base import pandas.util.testing as tm diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index 11bf1cb6e9f05..3cacd46e797eb 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -2,7 +2,7 @@ import pandas as pd import numpy as np -from pandas.core.sparse.dtype import SparseDtype +from pandas.core.arrays.sparse.dtype import SparseDtype from pandas import SparseArray from pandas.errors import PerformanceWarning from pandas.tests.extension import base diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index 3fe1c84174acb..dfb1902897f78 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -12,7 +12,8 @@ from pandas import (DataFrame, Series, Timestamp, date_range, compat, option_context, Categorical) -from pandas.core.arrays import IntervalArray, integer_array +from pandas.core.arrays.interval import IntervalArray +from pandas.core.arrays.integer import integer_array from pandas.compat import StringIO import pandas as pd diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 2afaeea3755d0..b836e8fa8cf7b 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -12,7 +12,7 @@ from pandas.compat import u from pandas import _np_version_under1p14 -from pandas.core.arrays import integer_array +from pandas.core.arrays.integer import integer_array from pandas.core.dtypes.dtypes import DatetimeTZDtype, CategoricalDtype from pandas.tests.frame.common import TestData from pandas.util.testing import (assert_series_equal, diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index fe2d14458c197..ec0c25f53afcd 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -2057,7 +2057,7 @@ def test_loc_duplicates(self): tm.assert_frame_equal(df, expected) def test_iloc_sparse_propegate_fill_value(self): - from pandas.core.sparse.api import SparseDataFrame + from pandas.core.arrays.sparse.api import SparseDataFrame df = SparseDataFrame({'A': [999, 1]}, default_fill_value=999) assert len(df['A'].sp_values) == len(df.iloc[:, 0].sp_values) diff --git a/pandas/tests/indexes/interval/test_construction.py b/pandas/tests/indexes/interval/test_construction.py index 208d498180692..2a3e49cffaa2c 100644 --- a/pandas/tests/indexes/interval/test_construction.py +++ b/pandas/tests/indexes/interval/test_construction.py @@ -8,7 +8,7 @@ Interval, IntervalIndex, Index, Int64Index, Float64Index, Categorical, CategoricalIndex, date_range, timedelta_range, period_range, notna) from pandas.compat import lzip -from pandas.core.arrays import IntervalArray +from pandas.core.arrays.interval import IntervalArray from pandas.core.dtypes.common import is_categorical_dtype from pandas.core.dtypes.dtypes import IntervalDtype import pandas.core.common as com diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 0f524ca0aaac5..6037e7613fe2c 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -18,6 +18,7 @@ import pandas as pd from pandas.core.indexing import (_non_reducing_slice, _maybe_numeric_slice, validate_indices) +from pandas.core.arrays.integer import integer_array from pandas import NaT, DataFrame, Index, Series, MultiIndex import pandas.util.testing as tm from pandas.compat import PY2 @@ -1084,10 +1085,10 @@ def test_validate_indices_empty(): def test_extension_array_cross_section(): # A cross-section of a homogeneous EA should be an EA df = pd.DataFrame({ - "A": pd.core.arrays.integer_array([1, 2]), - "B": pd.core.arrays.integer_array([3, 4]) + "A": integer_array([1, 2]), + "B": integer_array([3, 4]) }, index=['a', 'b']) - expected = pd.Series(pd.core.arrays.integer_array([1, 3]), + expected = pd.Series(integer_array([1, 3]), index=['A', 'B'], name='a') result = df.loc['a'] tm.assert_series_equal(result, expected) @@ -1098,7 +1099,7 @@ def test_extension_array_cross_section(): def test_extension_array_cross_section_converts(): df = pd.DataFrame({ - "A": pd.core.arrays.integer_array([1, 2]), + "A": integer_array([1, 2]), "B": np.array([1, 2]), }, index=['a', 'b']) result = df.loc['a'] diff --git a/pandas/tests/reshape/test_reshape.py b/pandas/tests/reshape/test_reshape.py index d8b3d9588f2f1..40c54dc59c7e3 100644 --- a/pandas/tests/reshape/test_reshape.py +++ b/pandas/tests/reshape/test_reshape.py @@ -5,7 +5,7 @@ from collections import OrderedDict from pandas import DataFrame, Series -from pandas.core.sparse.api import SparseDtype, SparseArray +from pandas.core.arrays.sparse.api import SparseDtype, SparseArray import pandas as pd from numpy import nan diff --git a/pandas/tests/series/test_combine_concat.py b/pandas/tests/series/test_combine_concat.py index f27600d830a93..846b5c2dbb31b 100644 --- a/pandas/tests/series/test_combine_concat.py +++ b/pandas/tests/series/test_combine_concat.py @@ -223,13 +223,15 @@ def test_concat_empty_series_dtypes(self): result = pd.concat([Series(dtype='float64').to_sparse(), Series( dtype='float64')]) # TODO: release-note: concat sparse dtype - assert result.dtype == pd.core.sparse.dtype.SparseDtype(np.float64) + expected = pd.core.arrays.sparse.dtype.SparseDtype(np.float64) + assert result.dtype == expected assert result.ftype == 'float64:sparse' result = pd.concat([Series(dtype='float64').to_sparse(), Series( dtype='object')]) # TODO: release-note: concat sparse dtype - assert result.dtype == pd.core.sparse.dtype.SparseDtype('object') + expected = pd.core.arrays.sparse.dtype.SparseDtype('object') + assert result.dtype == expected assert result.ftype == 'object:sparse' def test_combine_first_dt64(self): diff --git a/pandas/tests/series/test_subclass.py b/pandas/tests/series/test_subclass.py index d539dfa456740..de13566902950 100644 --- a/pandas/tests/series/test_subclass.py +++ b/pandas/tests/series/test_subclass.py @@ -2,7 +2,7 @@ # pylint: disable-msg=E1101,W0612 import numpy as np import pandas as pd -from pandas.core.sparse.dtype import SparseDtype +from pandas.core.arrays.sparse.dtype import SparseDtype import pandas.util.testing as tm diff --git a/pandas/tests/sparse/__init__.py b/pandas/tests/sparse/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/pandas/tests/sparse/common.py b/pandas/tests/sparse/common.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/pandas/tests/sparse/frame/__init__.py b/pandas/tests/sparse/frame/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/pandas/tests/sparse/frame/test_analytics.py b/pandas/tests/sparse/frame/test_analytics.py deleted file mode 100644 index 54e3ddbf2f1cf..0000000000000 --- a/pandas/tests/sparse/frame/test_analytics.py +++ /dev/null @@ -1,40 +0,0 @@ -import pytest -import numpy as np -from pandas import SparseDataFrame, DataFrame, SparseSeries -from pandas.util import testing as tm - - -@pytest.mark.xfail(reason='Wrong SparseBlock initialization (GH#17386)', - strict=True) -def test_quantile(): - # GH 17386 - data = [[1, 1], [2, 10], [3, 100], [np.nan, np.nan]] - q = 0.1 - - sparse_df = SparseDataFrame(data) - result = sparse_df.quantile(q) - - dense_df = DataFrame(data) - dense_expected = dense_df.quantile(q) - sparse_expected = SparseSeries(dense_expected) - - tm.assert_series_equal(result, dense_expected) - tm.assert_sp_series_equal(result, sparse_expected) - - -@pytest.mark.xfail(reason='Wrong SparseBlock initialization (GH#17386)', - strict=True) -def test_quantile_multi(): - # GH 17386 - data = [[1, 1], [2, 10], [3, 100], [np.nan, np.nan]] - q = [0.1, 0.5] - - sparse_df = SparseDataFrame(data) - result = sparse_df.quantile(q) - - dense_df = DataFrame(data) - dense_expected = dense_df.quantile(q) - sparse_expected = SparseDataFrame(dense_expected) - - tm.assert_frame_equal(result, dense_expected) - tm.assert_sp_frame_equal(result, sparse_expected) diff --git a/pandas/tests/sparse/frame/test_indexing.py b/pandas/tests/sparse/frame/test_indexing.py deleted file mode 100644 index 607eb2da6ded0..0000000000000 --- a/pandas/tests/sparse/frame/test_indexing.py +++ /dev/null @@ -1,113 +0,0 @@ -import pytest -import numpy as np -from pandas import SparseDataFrame, DataFrame -from pandas.util import testing as tm - - -pytestmark = pytest.mark.skip("Wrong SparseBlock initialization (GH 17386)") - - -@pytest.mark.parametrize('data', [ - [[1, 1], [2, 2], [3, 3], [4, 4], [0, 0]], - [[1.0, 1.0], [2.0, 2.0], [3.0, 3.0], [4.0, 4.0], [np.nan, np.nan]], - [ - [1.0, 1.0 + 1.0j], - [2.0 + 2.0j, 2.0], - [3.0, 3.0 + 3.0j], - [4.0 + 4.0j, 4.0], - [np.nan, np.nan] - ] -]) -@pytest.mark.xfail(reason='Wrong SparseBlock initialization (GH#17386)', - strict=True) -def test_where_with_numeric_data(data): - # GH 17386 - lower_bound = 1.5 - - sparse = SparseDataFrame(data) - result = sparse.where(sparse > lower_bound) - - dense = DataFrame(data) - dense_expected = dense.where(dense > lower_bound) - sparse_expected = SparseDataFrame(dense_expected) - - tm.assert_frame_equal(result, dense_expected) - tm.assert_sp_frame_equal(result, sparse_expected) - - -@pytest.mark.parametrize('data', [ - [[1, 1], [2, 2], [3, 3], [4, 4], [0, 0]], - [[1.0, 1.0], [2.0, 2.0], [3.0, 3.0], [4.0, 4.0], [np.nan, np.nan]], - [ - [1.0, 1.0 + 1.0j], - [2.0 + 2.0j, 2.0], - [3.0, 3.0 + 3.0j], - [4.0 + 4.0j, 4.0], - [np.nan, np.nan] - ] -]) -@pytest.mark.parametrize('other', [ - True, - -100, - 0.1, - 100.0 + 100.0j -]) -@pytest.mark.xfail(reason='Wrong SparseBlock initialization (GH#17386)', - strict=True) -def test_where_with_numeric_data_and_other(data, other): - # GH 17386 - lower_bound = 1.5 - - sparse = SparseDataFrame(data) - result = sparse.where(sparse > lower_bound, other) - - dense = DataFrame(data) - dense_expected = dense.where(dense > lower_bound, other) - sparse_expected = SparseDataFrame(dense_expected, - default_fill_value=other) - - tm.assert_frame_equal(result, dense_expected) - tm.assert_sp_frame_equal(result, sparse_expected) - - -@pytest.mark.xfail(reason='Wrong SparseBlock initialization (GH#17386)', - strict=True) -def test_where_with_bool_data(): - # GH 17386 - data = [[False, False], [True, True], [False, False]] - cond = True - - sparse = SparseDataFrame(data) - result = sparse.where(sparse == cond) - - dense = DataFrame(data) - dense_expected = dense.where(dense == cond) - sparse_expected = SparseDataFrame(dense_expected) - - tm.assert_frame_equal(result, dense_expected) - tm.assert_sp_frame_equal(result, sparse_expected) - - -@pytest.mark.parametrize('other', [ - True, - 0, - 0.1, - 100.0 + 100.0j -]) -@pytest.mark.xfail(reason='Wrong SparseBlock initialization (GH#17386)', - strict=True) -def test_where_with_bool_data_and_other(other): - # GH 17386 - data = [[False, False], [True, True], [False, False]] - cond = True - - sparse = SparseDataFrame(data) - result = sparse.where(sparse == cond, other) - - dense = DataFrame(data) - dense_expected = dense.where(dense == cond, other) - sparse_expected = SparseDataFrame(dense_expected, - default_fill_value=other) - - tm.assert_frame_equal(result, dense_expected) - tm.assert_sp_frame_equal(result, sparse_expected) diff --git a/pandas/tests/sparse/frame/test_to_csv.py b/pandas/tests/sparse/frame/test_to_csv.py deleted file mode 100644 index b0243dfde8d3f..0000000000000 --- a/pandas/tests/sparse/frame/test_to_csv.py +++ /dev/null @@ -1,20 +0,0 @@ -import numpy as np -import pytest -from pandas import SparseDataFrame, read_csv -from pandas.util import testing as tm - - -class TestSparseDataFrameToCsv(object): - fill_values = [np.nan, 0, None, 1] - - @pytest.mark.parametrize('fill_value', fill_values) - def test_to_csv_sparse_dataframe(self, fill_value): - # GH19384 - sdf = SparseDataFrame({'a': type(self).fill_values}, - default_fill_value=fill_value) - - with tm.ensure_clean('sparse_df.csv') as path: - sdf.to_csv(path, index=False) - df = read_csv(path, skip_blank_lines=False) - - tm.assert_sp_frame_equal(df.to_sparse(fill_value=fill_value), sdf) diff --git a/pandas/tests/sparse/series/__init__.py b/pandas/tests/sparse/series/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/pandas/tests/sparse/series/test_indexing.py b/pandas/tests/sparse/series/test_indexing.py deleted file mode 100644 index 998285d933492..0000000000000 --- a/pandas/tests/sparse/series/test_indexing.py +++ /dev/null @@ -1,113 +0,0 @@ -import pytest -import numpy as np -from pandas import SparseSeries, Series -from pandas.util import testing as tm - - -pytestmark = pytest.mark.skip("Wrong SparseBlock initialization (GH 17386)") - - -@pytest.mark.parametrize('data', [ - [1, 1, 2, 2, 3, 3, 4, 4, 0, 0], - [1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0, np.nan, np.nan], - [ - 1.0, 1.0 + 1.0j, - 2.0 + 2.0j, 2.0, - 3.0, 3.0 + 3.0j, - 4.0 + 4.0j, 4.0, - np.nan, np.nan - ] -]) -@pytest.mark.xfail(reason='Wrong SparseBlock initialization (GH#17386)', - strict=True) -def test_where_with_numeric_data(data): - # GH 17386 - lower_bound = 1.5 - - sparse = SparseSeries(data) - result = sparse.where(sparse > lower_bound) - - dense = Series(data) - dense_expected = dense.where(dense > lower_bound) - sparse_expected = SparseSeries(dense_expected) - - tm.assert_series_equal(result, dense_expected) - tm.assert_sp_series_equal(result, sparse_expected) - - -@pytest.mark.parametrize('data', [ - [1, 1, 2, 2, 3, 3, 4, 4, 0, 0], - [1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0, np.nan, np.nan], - [ - 1.0, 1.0 + 1.0j, - 2.0 + 2.0j, 2.0, - 3.0, 3.0 + 3.0j, - 4.0 + 4.0j, 4.0, - np.nan, np.nan - ] -]) -@pytest.mark.parametrize('other', [ - True, - -100, - 0.1, - 100.0 + 100.0j -]) -@pytest.mark.skip(reason='Wrong SparseBlock initialization ' - '(Segfault) ' - '(GH 17386)') -def test_where_with_numeric_data_and_other(data, other): - # GH 17386 - lower_bound = 1.5 - - sparse = SparseSeries(data) - result = sparse.where(sparse > lower_bound, other) - - dense = Series(data) - dense_expected = dense.where(dense > lower_bound, other) - sparse_expected = SparseSeries(dense_expected, fill_value=other) - - tm.assert_series_equal(result, dense_expected) - tm.assert_sp_series_equal(result, sparse_expected) - - -@pytest.mark.xfail(reason='Wrong SparseBlock initialization (GH#17386)', - strict=True) -def test_where_with_bool_data(): - # GH 17386 - data = [False, False, True, True, False, False] - cond = True - - sparse = SparseSeries(data) - result = sparse.where(sparse == cond) - - dense = Series(data) - dense_expected = dense.where(dense == cond) - sparse_expected = SparseSeries(dense_expected) - - tm.assert_series_equal(result, dense_expected) - tm.assert_sp_series_equal(result, sparse_expected) - - -@pytest.mark.parametrize('other', [ - True, - 0, - 0.1, - 100.0 + 100.0j -]) -@pytest.mark.skip(reason='Wrong SparseBlock initialization ' - '(Segfault) ' - '(GH 17386)') -def test_where_with_bool_data_and_other(other): - # GH 17386 - data = [False, False, True, True, False, False] - cond = True - - sparse = SparseSeries(data) - result = sparse.where(sparse == cond, other) - - dense = Series(data) - dense_expected = dense.where(dense == cond, other) - sparse_expected = SparseSeries(dense_expected, fill_value=other) - - tm.assert_series_equal(result, dense_expected) - tm.assert_sp_series_equal(result, sparse_expected) diff --git a/pandas/tests/sparse/test_libsparse.py b/pandas/tests/sparse/test_libsparse.py deleted file mode 100644 index 3b90d93cee7a4..0000000000000 --- a/pandas/tests/sparse/test_libsparse.py +++ /dev/null @@ -1,604 +0,0 @@ -from pandas import Series - -import pytest -import numpy as np -import operator -import pandas.util.testing as tm -import pandas.util._test_decorators as td - -from pandas.core.sparse.array import IntIndex, BlockIndex, _make_index -import pandas._libs.sparse as splib - -TEST_LENGTH = 20 - -plain_case = dict(xloc=[0, 7, 15], xlen=[3, 5, 5], yloc=[2, 9, 14], - ylen=[2, 3, 5], intersect_loc=[2, 9, 15], - intersect_len=[1, 3, 4]) -delete_blocks = dict(xloc=[0, 5], xlen=[4, 4], yloc=[1], ylen=[4], - intersect_loc=[1], intersect_len=[3]) -split_blocks = dict(xloc=[0], xlen=[10], yloc=[0, 5], ylen=[3, 7], - intersect_loc=[0, 5], intersect_len=[3, 5]) -skip_block = dict(xloc=[10], xlen=[5], yloc=[0, 12], ylen=[5, 3], - intersect_loc=[12], intersect_len=[3]) - -no_intersect = dict(xloc=[0, 10], xlen=[4, 6], yloc=[5, 17], ylen=[4, 2], - intersect_loc=[], intersect_len=[]) - - -def check_cases(_check_case): - def _check_case_dict(case): - _check_case(case['xloc'], case['xlen'], case['yloc'], case['ylen'], - case['intersect_loc'], case['intersect_len']) - - _check_case_dict(plain_case) - _check_case_dict(delete_blocks) - _check_case_dict(split_blocks) - _check_case_dict(skip_block) - _check_case_dict(no_intersect) - - # one or both is empty - _check_case([0], [5], [], [], [], []) - _check_case([], [], [], [], [], []) - - -class TestSparseIndexUnion(object): - - def test_index_make_union(self): - def _check_case(xloc, xlen, yloc, ylen, eloc, elen): - xindex = BlockIndex(TEST_LENGTH, xloc, xlen) - yindex = BlockIndex(TEST_LENGTH, yloc, ylen) - bresult = xindex.make_union(yindex) - assert (isinstance(bresult, BlockIndex)) - tm.assert_numpy_array_equal(bresult.blocs, - np.array(eloc, dtype=np.int32)) - tm.assert_numpy_array_equal(bresult.blengths, - np.array(elen, dtype=np.int32)) - - ixindex = xindex.to_int_index() - iyindex = yindex.to_int_index() - iresult = ixindex.make_union(iyindex) - assert (isinstance(iresult, IntIndex)) - tm.assert_numpy_array_equal(iresult.indices, - bresult.to_int_index().indices) - - """ - x: ---- - y: ---- - r: -------- - """ - xloc = [0] - xlen = [5] - yloc = [5] - ylen = [4] - eloc = [0] - elen = [9] - _check_case(xloc, xlen, yloc, ylen, eloc, elen) - """ - x: ----- ----- - y: ----- -- - """ - xloc = [0, 10] - xlen = [5, 5] - yloc = [2, 17] - ylen = [5, 2] - eloc = [0, 10, 17] - elen = [7, 5, 2] - _check_case(xloc, xlen, yloc, ylen, eloc, elen) - """ - x: ------ - y: ------- - r: ---------- - """ - xloc = [1] - xlen = [5] - yloc = [3] - ylen = [5] - eloc = [1] - elen = [7] - _check_case(xloc, xlen, yloc, ylen, eloc, elen) - """ - x: ------ ----- - y: ------- - r: ------------- - """ - xloc = [2, 10] - xlen = [4, 4] - yloc = [4] - ylen = [8] - eloc = [2] - elen = [12] - _check_case(xloc, xlen, yloc, ylen, eloc, elen) - """ - x: --- ----- - y: ------- - r: ------------- - """ - xloc = [0, 5] - xlen = [3, 5] - yloc = [0] - ylen = [7] - eloc = [0] - elen = [10] - _check_case(xloc, xlen, yloc, ylen, eloc, elen) - """ - x: ------ ----- - y: ------- --- - r: ------------- - """ - xloc = [2, 10] - xlen = [4, 4] - yloc = [4, 13] - ylen = [8, 4] - eloc = [2] - elen = [15] - _check_case(xloc, xlen, yloc, ylen, eloc, elen) - """ - x: ---------------------- - y: ---- ---- --- - r: ---------------------- - """ - xloc = [2] - xlen = [15] - yloc = [4, 9, 14] - ylen = [3, 2, 2] - eloc = [2] - elen = [15] - _check_case(xloc, xlen, yloc, ylen, eloc, elen) - """ - x: ---- --- - y: --- --- - """ - xloc = [0, 10] - xlen = [3, 3] - yloc = [5, 15] - ylen = [2, 2] - eloc = [0, 5, 10, 15] - elen = [3, 2, 3, 2] - _check_case(xloc, xlen, yloc, ylen, eloc, elen) - - def test_intindex_make_union(self): - a = IntIndex(5, np.array([0, 3, 4], dtype=np.int32)) - b = IntIndex(5, np.array([0, 2], dtype=np.int32)) - res = a.make_union(b) - exp = IntIndex(5, np.array([0, 2, 3, 4], np.int32)) - assert res.equals(exp) - - a = IntIndex(5, np.array([], dtype=np.int32)) - b = IntIndex(5, np.array([0, 2], dtype=np.int32)) - res = a.make_union(b) - exp = IntIndex(5, np.array([0, 2], np.int32)) - assert res.equals(exp) - - a = IntIndex(5, np.array([], dtype=np.int32)) - b = IntIndex(5, np.array([], dtype=np.int32)) - res = a.make_union(b) - exp = IntIndex(5, np.array([], np.int32)) - assert res.equals(exp) - - a = IntIndex(5, np.array([0, 1, 2, 3, 4], dtype=np.int32)) - b = IntIndex(5, np.array([0, 1, 2, 3, 4], dtype=np.int32)) - res = a.make_union(b) - exp = IntIndex(5, np.array([0, 1, 2, 3, 4], np.int32)) - assert res.equals(exp) - - a = IntIndex(5, np.array([0, 1], dtype=np.int32)) - b = IntIndex(4, np.array([0, 1], dtype=np.int32)) - with pytest.raises(ValueError): - a.make_union(b) - - -class TestSparseIndexIntersect(object): - - @td.skip_if_windows - def test_intersect(self): - def _check_correct(a, b, expected): - result = a.intersect(b) - assert (result.equals(expected)) - - def _check_length_exc(a, longer): - pytest.raises(Exception, a.intersect, longer) - - def _check_case(xloc, xlen, yloc, ylen, eloc, elen): - xindex = BlockIndex(TEST_LENGTH, xloc, xlen) - yindex = BlockIndex(TEST_LENGTH, yloc, ylen) - expected = BlockIndex(TEST_LENGTH, eloc, elen) - longer_index = BlockIndex(TEST_LENGTH + 1, yloc, ylen) - - _check_correct(xindex, yindex, expected) - _check_correct(xindex.to_int_index(), yindex.to_int_index(), - expected.to_int_index()) - - _check_length_exc(xindex, longer_index) - _check_length_exc(xindex.to_int_index(), - longer_index.to_int_index()) - - check_cases(_check_case) - - def test_intersect_empty(self): - xindex = IntIndex(4, np.array([], dtype=np.int32)) - yindex = IntIndex(4, np.array([2, 3], dtype=np.int32)) - assert xindex.intersect(yindex).equals(xindex) - assert yindex.intersect(xindex).equals(xindex) - - xindex = xindex.to_block_index() - yindex = yindex.to_block_index() - assert xindex.intersect(yindex).equals(xindex) - assert yindex.intersect(xindex).equals(xindex) - - def test_intersect_identical(self): - cases = [IntIndex(5, np.array([1, 2], dtype=np.int32)), - IntIndex(5, np.array([0, 2, 4], dtype=np.int32)), - IntIndex(0, np.array([], dtype=np.int32)), - IntIndex(5, np.array([], dtype=np.int32))] - - for case in cases: - assert case.intersect(case).equals(case) - case = case.to_block_index() - assert case.intersect(case).equals(case) - - -class TestSparseIndexCommon(object): - - def test_int_internal(self): - idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind='integer') - assert isinstance(idx, IntIndex) - assert idx.npoints == 2 - tm.assert_numpy_array_equal(idx.indices, - np.array([2, 3], dtype=np.int32)) - - idx = _make_index(4, np.array([], dtype=np.int32), kind='integer') - assert isinstance(idx, IntIndex) - assert idx.npoints == 0 - tm.assert_numpy_array_equal(idx.indices, - np.array([], dtype=np.int32)) - - idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32), - kind='integer') - assert isinstance(idx, IntIndex) - assert idx.npoints == 4 - tm.assert_numpy_array_equal(idx.indices, - np.array([0, 1, 2, 3], dtype=np.int32)) - - def test_block_internal(self): - idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind='block') - assert isinstance(idx, BlockIndex) - assert idx.npoints == 2 - tm.assert_numpy_array_equal(idx.blocs, - np.array([2], dtype=np.int32)) - tm.assert_numpy_array_equal(idx.blengths, - np.array([2], dtype=np.int32)) - - idx = _make_index(4, np.array([], dtype=np.int32), kind='block') - assert isinstance(idx, BlockIndex) - assert idx.npoints == 0 - tm.assert_numpy_array_equal(idx.blocs, - np.array([], dtype=np.int32)) - tm.assert_numpy_array_equal(idx.blengths, - np.array([], dtype=np.int32)) - - idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32), - kind='block') - assert isinstance(idx, BlockIndex) - assert idx.npoints == 4 - tm.assert_numpy_array_equal(idx.blocs, - np.array([0], dtype=np.int32)) - tm.assert_numpy_array_equal(idx.blengths, - np.array([4], dtype=np.int32)) - - idx = _make_index(4, np.array([0, 2, 3], dtype=np.int32), - kind='block') - assert isinstance(idx, BlockIndex) - assert idx.npoints == 3 - tm.assert_numpy_array_equal(idx.blocs, - np.array([0, 2], dtype=np.int32)) - tm.assert_numpy_array_equal(idx.blengths, - np.array([1, 2], dtype=np.int32)) - - def test_lookup(self): - for kind in ['integer', 'block']: - idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind=kind) - assert idx.lookup(-1) == -1 - assert idx.lookup(0) == -1 - assert idx.lookup(1) == -1 - assert idx.lookup(2) == 0 - assert idx.lookup(3) == 1 - assert idx.lookup(4) == -1 - - idx = _make_index(4, np.array([], dtype=np.int32), kind=kind) - - for i in range(-1, 5): - assert idx.lookup(i) == -1 - - idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32), - kind=kind) - assert idx.lookup(-1) == -1 - assert idx.lookup(0) == 0 - assert idx.lookup(1) == 1 - assert idx.lookup(2) == 2 - assert idx.lookup(3) == 3 - assert idx.lookup(4) == -1 - - idx = _make_index(4, np.array([0, 2, 3], dtype=np.int32), - kind=kind) - assert idx.lookup(-1) == -1 - assert idx.lookup(0) == 0 - assert idx.lookup(1) == -1 - assert idx.lookup(2) == 1 - assert idx.lookup(3) == 2 - assert idx.lookup(4) == -1 - - def test_lookup_array(self): - for kind in ['integer', 'block']: - idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind=kind) - - res = idx.lookup_array(np.array([-1, 0, 2], dtype=np.int32)) - exp = np.array([-1, -1, 0], dtype=np.int32) - tm.assert_numpy_array_equal(res, exp) - - res = idx.lookup_array(np.array([4, 2, 1, 3], dtype=np.int32)) - exp = np.array([-1, 0, -1, 1], dtype=np.int32) - tm.assert_numpy_array_equal(res, exp) - - idx = _make_index(4, np.array([], dtype=np.int32), kind=kind) - res = idx.lookup_array(np.array([-1, 0, 2, 4], dtype=np.int32)) - exp = np.array([-1, -1, -1, -1], dtype=np.int32) - - idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32), - kind=kind) - res = idx.lookup_array(np.array([-1, 0, 2], dtype=np.int32)) - exp = np.array([-1, 0, 2], dtype=np.int32) - tm.assert_numpy_array_equal(res, exp) - - res = idx.lookup_array(np.array([4, 2, 1, 3], dtype=np.int32)) - exp = np.array([-1, 2, 1, 3], dtype=np.int32) - tm.assert_numpy_array_equal(res, exp) - - idx = _make_index(4, np.array([0, 2, 3], dtype=np.int32), - kind=kind) - res = idx.lookup_array(np.array([2, 1, 3, 0], dtype=np.int32)) - exp = np.array([1, -1, 2, 0], dtype=np.int32) - tm.assert_numpy_array_equal(res, exp) - - res = idx.lookup_array(np.array([1, 4, 2, 5], dtype=np.int32)) - exp = np.array([-1, -1, 1, -1], dtype=np.int32) - tm.assert_numpy_array_equal(res, exp) - - def test_lookup_basics(self): - def _check(index): - assert (index.lookup(0) == -1) - assert (index.lookup(5) == 0) - assert (index.lookup(7) == 2) - assert (index.lookup(8) == -1) - assert (index.lookup(9) == -1) - assert (index.lookup(10) == -1) - assert (index.lookup(11) == -1) - assert (index.lookup(12) == 3) - assert (index.lookup(17) == 8) - assert (index.lookup(18) == -1) - - bindex = BlockIndex(20, [5, 12], [3, 6]) - iindex = bindex.to_int_index() - - _check(bindex) - _check(iindex) - - # corner cases - - -class TestBlockIndex(object): - - def test_block_internal(self): - idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind='block') - assert isinstance(idx, BlockIndex) - assert idx.npoints == 2 - tm.assert_numpy_array_equal(idx.blocs, - np.array([2], dtype=np.int32)) - tm.assert_numpy_array_equal(idx.blengths, - np.array([2], dtype=np.int32)) - - idx = _make_index(4, np.array([], dtype=np.int32), kind='block') - assert isinstance(idx, BlockIndex) - assert idx.npoints == 0 - tm.assert_numpy_array_equal(idx.blocs, - np.array([], dtype=np.int32)) - tm.assert_numpy_array_equal(idx.blengths, - np.array([], dtype=np.int32)) - - idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32), - kind='block') - assert isinstance(idx, BlockIndex) - assert idx.npoints == 4 - tm.assert_numpy_array_equal(idx.blocs, - np.array([0], dtype=np.int32)) - tm.assert_numpy_array_equal(idx.blengths, - np.array([4], dtype=np.int32)) - - idx = _make_index(4, np.array([0, 2, 3], dtype=np.int32), kind='block') - assert isinstance(idx, BlockIndex) - assert idx.npoints == 3 - tm.assert_numpy_array_equal(idx.blocs, - np.array([0, 2], dtype=np.int32)) - tm.assert_numpy_array_equal(idx.blengths, - np.array([1, 2], dtype=np.int32)) - - def test_make_block_boundary(self): - for i in [5, 10, 100, 101]: - idx = _make_index(i, np.arange(0, i, 2, dtype=np.int32), - kind='block') - - exp = np.arange(0, i, 2, dtype=np.int32) - tm.assert_numpy_array_equal(idx.blocs, exp) - tm.assert_numpy_array_equal(idx.blengths, - np.ones(len(exp), dtype=np.int32)) - - def test_equals(self): - index = BlockIndex(10, [0, 4], [2, 5]) - - assert index.equals(index) - assert not index.equals(BlockIndex(10, [0, 4], [2, 6])) - - def test_check_integrity(self): - locs = [] - lengths = [] - - # 0-length OK - # TODO: index variables are not used...is that right? - index = BlockIndex(0, locs, lengths) # noqa - - # also OK even though empty - index = BlockIndex(1, locs, lengths) # noqa - - # block extend beyond end - pytest.raises(Exception, BlockIndex, 10, [5], [10]) - - # block overlap - pytest.raises(Exception, BlockIndex, 10, [2, 5], [5, 3]) - - def test_to_int_index(self): - locs = [0, 10] - lengths = [4, 6] - exp_inds = [0, 1, 2, 3, 10, 11, 12, 13, 14, 15] - - block = BlockIndex(20, locs, lengths) - dense = block.to_int_index() - - tm.assert_numpy_array_equal(dense.indices, - np.array(exp_inds, dtype=np.int32)) - - def test_to_block_index(self): - index = BlockIndex(10, [0, 5], [4, 5]) - assert index.to_block_index() is index - - -class TestIntIndex(object): - - def test_check_integrity(self): - - # Too many indices than specified in self.length - msg = "Too many indices" - - with tm.assert_raises_regex(ValueError, msg): - IntIndex(length=1, indices=[1, 2, 3]) - - # No index can be negative. - msg = "No index can be less than zero" - - with tm.assert_raises_regex(ValueError, msg): - IntIndex(length=5, indices=[1, -2, 3]) - - # No index can be negative. - msg = "No index can be less than zero" - - with tm.assert_raises_regex(ValueError, msg): - IntIndex(length=5, indices=[1, -2, 3]) - - # All indices must be less than the length. - msg = "All indices must be less than the length" - - with tm.assert_raises_regex(ValueError, msg): - IntIndex(length=5, indices=[1, 2, 5]) - - with tm.assert_raises_regex(ValueError, msg): - IntIndex(length=5, indices=[1, 2, 6]) - - # Indices must be strictly ascending. - msg = "Indices must be strictly increasing" - - with tm.assert_raises_regex(ValueError, msg): - IntIndex(length=5, indices=[1, 3, 2]) - - with tm.assert_raises_regex(ValueError, msg): - IntIndex(length=5, indices=[1, 3, 3]) - - def test_int_internal(self): - idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind='integer') - assert isinstance(idx, IntIndex) - assert idx.npoints == 2 - tm.assert_numpy_array_equal(idx.indices, - np.array([2, 3], dtype=np.int32)) - - idx = _make_index(4, np.array([], dtype=np.int32), kind='integer') - assert isinstance(idx, IntIndex) - assert idx.npoints == 0 - tm.assert_numpy_array_equal(idx.indices, - np.array([], dtype=np.int32)) - - idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32), - kind='integer') - assert isinstance(idx, IntIndex) - assert idx.npoints == 4 - tm.assert_numpy_array_equal(idx.indices, - np.array([0, 1, 2, 3], dtype=np.int32)) - - def test_equals(self): - index = IntIndex(10, [0, 1, 2, 3, 4]) - assert index.equals(index) - assert not index.equals(IntIndex(10, [0, 1, 2, 3])) - - def test_to_block_index(self): - - def _check_case(xloc, xlen, yloc, ylen, eloc, elen): - xindex = BlockIndex(TEST_LENGTH, xloc, xlen) - yindex = BlockIndex(TEST_LENGTH, yloc, ylen) - - # see if survive the round trip - xbindex = xindex.to_int_index().to_block_index() - ybindex = yindex.to_int_index().to_block_index() - assert isinstance(xbindex, BlockIndex) - assert xbindex.equals(xindex) - assert ybindex.equals(yindex) - - check_cases(_check_case) - - def test_to_int_index(self): - index = IntIndex(10, [2, 3, 4, 5, 6]) - assert index.to_int_index() is index - - -class TestSparseOperators(object): - - def _op_tests(self, sparse_op, python_op): - def _check_case(xloc, xlen, yloc, ylen, eloc, elen): - xindex = BlockIndex(TEST_LENGTH, xloc, xlen) - yindex = BlockIndex(TEST_LENGTH, yloc, ylen) - - xdindex = xindex.to_int_index() - ydindex = yindex.to_int_index() - - x = np.arange(xindex.npoints) * 10. + 1 - y = np.arange(yindex.npoints) * 100. + 1 - - xfill = 0 - yfill = 2 - - result_block_vals, rb_index, bfill = sparse_op(x, xindex, xfill, y, - yindex, yfill) - result_int_vals, ri_index, ifill = sparse_op(x, xdindex, xfill, y, - ydindex, yfill) - - assert rb_index.to_int_index().equals(ri_index) - tm.assert_numpy_array_equal(result_block_vals, result_int_vals) - assert bfill == ifill - - # check versus Series... - xseries = Series(x, xdindex.indices) - xseries = xseries.reindex(np.arange(TEST_LENGTH)).fillna(xfill) - - yseries = Series(y, ydindex.indices) - yseries = yseries.reindex(np.arange(TEST_LENGTH)).fillna(yfill) - - series_result = python_op(xseries, yseries) - series_result = series_result.reindex(ri_index.indices) - - tm.assert_numpy_array_equal(result_block_vals, - series_result.values) - tm.assert_numpy_array_equal(result_int_vals, series_result.values) - - check_cases(_check_case) - - @pytest.mark.parametrize('opname', - ['add', 'sub', 'mul', 'truediv', 'floordiv']) - def test_op(self, opname): - sparse_op = getattr(splib, 'sparse_%s_float64' % opname) - python_op = getattr(operator, opname) - self._op_tests(sparse_op, python_op) diff --git a/pandas/tests/sparse/test_pivot.py b/pandas/tests/sparse/test_pivot.py deleted file mode 100644 index e7eba63e4e0b3..0000000000000 --- a/pandas/tests/sparse/test_pivot.py +++ /dev/null @@ -1,50 +0,0 @@ -import numpy as np -import pandas as pd -import pandas.util.testing as tm - - -class TestPivotTable(object): - - def setup_method(self, method): - self.dense = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar', - 'foo', 'bar', 'foo', 'foo'], - 'B': ['one', 'one', 'two', 'three', - 'two', 'two', 'one', 'three'], - 'C': np.random.randn(8), - 'D': np.random.randn(8), - 'E': [np.nan, np.nan, 1, 2, - np.nan, 1, np.nan, np.nan]}) - self.sparse = self.dense.to_sparse() - - def test_pivot_table(self): - res_sparse = pd.pivot_table(self.sparse, index='A', columns='B', - values='C') - res_dense = pd.pivot_table(self.dense, index='A', columns='B', - values='C') - tm.assert_frame_equal(res_sparse, res_dense) - - res_sparse = pd.pivot_table(self.sparse, index='A', columns='B', - values='E') - res_dense = pd.pivot_table(self.dense, index='A', columns='B', - values='E') - tm.assert_frame_equal(res_sparse, res_dense) - - res_sparse = pd.pivot_table(self.sparse, index='A', columns='B', - values='E', aggfunc='mean') - res_dense = pd.pivot_table(self.dense, index='A', columns='B', - values='E', aggfunc='mean') - tm.assert_frame_equal(res_sparse, res_dense) - - # ToDo: sum doesn't handle nan properly - # res_sparse = pd.pivot_table(self.sparse, index='A', columns='B', - # values='E', aggfunc='sum') - # res_dense = pd.pivot_table(self.dense, index='A', columns='B', - # values='E', aggfunc='sum') - # tm.assert_frame_equal(res_sparse, res_dense) - - def test_pivot_table_multi(self): - res_sparse = pd.pivot_table(self.sparse, index='A', columns='B', - values=['D', 'E']) - res_dense = pd.pivot_table(self.dense, index='A', columns='B', - values=['D', 'E']) - tm.assert_frame_equal(res_sparse, res_dense) diff --git a/pandas/tests/sparse/test_reshape.py b/pandas/tests/sparse/test_reshape.py deleted file mode 100644 index b492c47375bcf..0000000000000 --- a/pandas/tests/sparse/test_reshape.py +++ /dev/null @@ -1,38 +0,0 @@ -import pytest -import numpy as np - -import pandas as pd -import pandas.util.testing as tm - - -@pytest.fixture -def sparse_df(): - return pd.SparseDataFrame({0: {0: 1}, 1: {1: 1}, 2: {2: 1}}) # eye - - -@pytest.fixture -def multi_index3(): - return pd.MultiIndex.from_tuples([(0, 0), (1, 1), (2, 2)]) - - -def test_sparse_frame_stack(sparse_df, multi_index3): - ss = sparse_df.stack() - expected = pd.SparseSeries(np.ones(3), index=multi_index3) - tm.assert_sp_series_equal(ss, expected) - - -def test_sparse_frame_unstack(sparse_df): - mi = pd.MultiIndex.from_tuples([(0, 0), (1, 0), (1, 2)]) - sparse_df.index = mi - arr = np.array([[1, np.nan, np.nan], - [np.nan, 1, np.nan], - [np.nan, np.nan, 1]]) - unstacked_df = pd.DataFrame(arr, index=mi).unstack() - unstacked_sdf = sparse_df.unstack() - - tm.assert_numpy_array_equal(unstacked_df.values, unstacked_sdf.values) - - -def test_sparse_series_unstack(sparse_df, multi_index3): - frame = pd.SparseSeries(np.ones(3), index=multi_index3).unstack() - tm.assert_sp_frame_equal(frame, sparse_df) diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index bbc5bd96bad55..113c7fe77c071 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -1199,8 +1199,8 @@ def test_iter_box(self): 'datetime64[ns, US/Central]'), (pd.TimedeltaIndex([10**10]), np.ndarray, 'm8[ns]'), (pd.PeriodIndex([2018, 2019], freq='A'), np.ndarray, 'object'), - (pd.IntervalIndex.from_breaks([0, 1, 2]), pd.core.arrays.IntervalArray, - 'interval'), + (pd.IntervalIndex.from_breaks([0, 1, 2]), + pd.core.arrays.interval.IntervalArray, 'interval'), ]) def test_values_consistent(array, expected_type, dtype): l_values = pd.Series(array)._values diff --git a/pandas/util/testing.py b/pandas/util/testing.py index a89de74875ee5..eb519aeb9526c 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -19,7 +19,8 @@ import numpy as np import pandas as pd -from pandas.core.arrays import ExtensionArray, IntervalArray +from pandas.core.arrays.base import ExtensionArray +from pandas.core.arrays.interval import IntervalArray from pandas.core.dtypes.missing import array_equivalent from pandas.core.dtypes.common import ( is_datetimelike_v_numeric, From 96cd3d568f7898f9ae412d3db569364350f735ea Mon Sep 17 00:00:00 2001 From: JustinZhengBC Date: Sun, 14 Oct 2018 11:01:06 -0700 Subject: [PATCH 02/12] CLN-23123 Remove print statements --- pandas/compat/pickle_compat.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index 321ae6d541555..c2d29da1fdcd3 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -138,13 +138,8 @@ class Unpickler(pkl._Unpickler): def find_class(self, module, name): # override superclass - print(module) - print(name) key = (module, name) module, name = _class_locations_map.get(key, key) - print(module) - print(name) - print("---") return super(Unpickler, self).find_class(module, name) else: From c9bfeaeaf00f1dde54f7e1a3956e0ce2e6db9315 Mon Sep 17 00:00:00 2001 From: JustinZhengBC Date: Sun, 14 Oct 2018 11:45:08 -0700 Subject: [PATCH 03/12] CLN-23123 Restore pandas.core.arrays.__init__.py --- pandas/_libs/parsers.pyx | 2 +- pandas/api/extensions/__init__.py | 2 +- pandas/core/api.py | 2 +- pandas/core/arrays/__init__.py | 10 ++++++++++ pandas/core/arrays/datetimelike.py | 2 +- pandas/core/arrays/datetimes.py | 6 +++--- pandas/core/arrays/integer.py | 2 +- pandas/core/arrays/interval.py | 4 ++-- pandas/core/arrays/period.py | 4 ++-- pandas/core/arrays/sparse/array.py | 2 +- pandas/core/arrays/timedeltas.py | 2 +- pandas/core/categorical.py | 2 +- pandas/core/dtypes/common.py | 9 +++++---- pandas/core/dtypes/concat.py | 2 +- pandas/core/dtypes/dtypes.py | 2 +- pandas/core/frame.py | 4 ++-- pandas/core/groupby/generic.py | 2 +- pandas/core/groupby/grouper.py | 4 ++-- pandas/core/indexes/base.py | 2 +- pandas/core/indexes/datetimes.py | 2 +- pandas/core/indexes/multi.py | 2 +- pandas/core/indexes/period.py | 2 +- pandas/core/indexes/timedeltas.py | 2 +- pandas/core/internals/blocks.py | 2 +- pandas/core/reshape/melt.py | 2 +- pandas/core/reshape/reshape.py | 2 +- pandas/core/series.py | 2 +- pandas/core/sorting.py | 2 +- pandas/io/packers.py | 2 +- pandas/io/parsers.py | 2 +- pandas/io/stata.py | 2 +- pandas/tests/arrays/test_datetimelike.py | 6 +++--- pandas/tests/arrays/test_integer.py | 2 +- pandas/tests/arrays/test_interval.py | 2 +- pandas/tests/extension/decimal/array.py | 2 +- pandas/tests/extension/json/array.py | 2 +- pandas/tests/extension/test_common.py | 2 +- pandas/tests/extension/test_integer.py | 2 +- pandas/tests/extension/test_interval.py | 2 +- pandas/tests/frame/test_block_internals.py | 4 ++-- pandas/tests/frame/test_dtypes.py | 2 +- pandas/tests/indexes/interval/test_construction.py | 2 +- pandas/tests/indexing/test_indexing.py | 2 +- pandas/tests/series/test_api.py | 2 +- pandas/util/testing.py | 4 ++-- 45 files changed, 68 insertions(+), 57 deletions(-) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index f177e3faf4957..e3df391c5c45d 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -51,7 +51,7 @@ from pandas.core.dtypes.common import ( is_bool_dtype, is_object_dtype, is_datetime64_dtype, pandas_dtype) -from pandas.core.arrays.categorical import Categorical +from pandas.core.arrays import Categorical from pandas.core.dtypes.concat import union_categoricals import pandas.io.common as icom diff --git a/pandas/api/extensions/__init__.py b/pandas/api/extensions/__init__.py index 8a515661920f3..18c7c9c2979e3 100644 --- a/pandas/api/extensions/__init__.py +++ b/pandas/api/extensions/__init__.py @@ -3,7 +3,7 @@ register_index_accessor, register_series_accessor) from pandas.core.algorithms import take # noqa -from pandas.core.arrays.base import (ExtensionArray, # noqa +from pandas.core.arrays import (ExtensionArray, # noqa ExtensionScalarOpsMixin) from pandas.core.dtypes.dtypes import ( # noqa ExtensionDtype, register_extension_dtype diff --git a/pandas/core/api.py b/pandas/core/api.py index 13d3ef5a20c42..32df317a602a9 100644 --- a/pandas/core/api.py +++ b/pandas/core/api.py @@ -6,7 +6,7 @@ from pandas.core.algorithms import factorize, unique, value_counts from pandas.core.dtypes.missing import isna, isnull, notna, notnull -from pandas.core.arrays.categorical import Categorical +from pandas.core.arrays import Categorical from pandas.core.groupby import Grouper from pandas.io.formats.format import set_eng_float_format from pandas.core.index import (Index, CategoricalIndex, Int64Index, diff --git a/pandas/core/arrays/__init__.py b/pandas/core/arrays/__init__.py index e69de29bb2d1d..29f258bf1b29e 100644 --- a/pandas/core/arrays/__init__.py +++ b/pandas/core/arrays/__init__.py @@ -0,0 +1,10 @@ +from .base import (ExtensionArray, # noqa + ExtensionOpsMixin, + ExtensionScalarOpsMixin) +from .categorical import Categorical # noqa +from .datetimes import DatetimeArrayMixin # noqa +from .interval import IntervalArray # noqa +from .period import PeriodArrayMixin # noqa +from .timedeltas import TimedeltaArrayMixin # noqa +from .integer import ( # noqa + IntegerArray, integer_array) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 39533b62c96a4..37fc451ba2a2b 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -715,7 +715,7 @@ def __rsub__(self, other): # we need to wrap in DatetimeArray/Index and flip the operation if not isinstance(other, DatetimeLikeArrayMixin): # Avoid down-casting DatetimeIndex - from pandas.core.arrays.datetimes import DatetimeArrayMixin + from pandas.core.arrays import DatetimeArrayMixin other = DatetimeArrayMixin(other) return other - self elif (is_datetime64_any_dtype(self) and hasattr(other, 'dtype') and diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 448394f6b0204..228e12d08c1a3 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -34,7 +34,7 @@ from pandas.tseries.frequencies import to_offset, get_period_alias from pandas.tseries.offsets import Tick, generate_range -import pandas.core.arrays.datetimelike as dtl +from pandas.core.arrays import datetimelike as dtl _midnight = time(0, 0) @@ -523,7 +523,7 @@ def _add_delta(self, delta): The result's name is set outside of _add_delta by the calling method (__add__ or __sub__) """ - from pandas.core.arrays.timedeltas import TimedeltaArrayMixin + from pandas.core.arrays import TimedeltaArrayMixin if isinstance(delta, (Tick, timedelta, np.timedelta64)): new_values = self._add_delta_td(delta) @@ -818,7 +818,7 @@ def to_period(self, freq=None): pandas.PeriodIndex: Immutable ndarray holding ordinal values pandas.DatetimeIndex.to_pydatetime: Return DatetimeIndex as object """ - from pandas.core.arrays.period import PeriodArrayMixin + from pandas.core.arrays import PeriodArrayMixin if self.tz is not None: warnings.warn("Converting to PeriodArray/Index representation " diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index dfe9cd7110e13..4a66c8b4aba7c 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -18,7 +18,7 @@ is_integer_dtype, is_object_dtype, is_list_like) -from pandas.core.arrays.base import (ExtensionArray, +from pandas.core.arrays import (ExtensionArray, ExtensionOpsMixin) from pandas.core.dtypes.base import ExtensionDtype from pandas.core.dtypes.dtypes import register_extension_dtype diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 769053ca28045..9972a04eec1f3 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -23,8 +23,8 @@ from pandas.util._decorators import Appender from pandas.util._doctools import _WritableDoc -from pandas.core.arrays.base import ExtensionArray -from pandas.core.arrays.categorical import Categorical +from pandas.core.arrays import ExtensionArray +from pandas.core.arrays import Categorical _VALID_CLOSED = {'left', 'right', 'both', 'neither'} _interval_shared_docs = {} diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 9c261da48fd2d..b678b55d96c06 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -27,7 +27,7 @@ from pandas.tseries import frequencies from pandas.tseries.offsets import Tick, DateOffset -import pandas.core.arrays.datetimelike as dtl +from pandas.core.arrays import datetimelike as dtl from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin @@ -301,7 +301,7 @@ def to_timestamp(self, freq=None, how='start'): ------- DatetimeArray/Index """ - from pandas.core.arrays.datetimes import DatetimeArrayMixin + from pandas.core.arrays import DatetimeArrayMixin how = libperiod._validate_end_alias(how) diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index e7361df49345f..ac51d98686baa 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -16,7 +16,7 @@ from pandas.errors import PerformanceWarning from pandas.compat.numpy import function as nv -from pandas.core.arrays.base import ExtensionArray, ExtensionOpsMixin +from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin import pandas.core.common as com from pandas.core.dtypes.generic import ( ABCSparseSeries, ABCSeries, ABCIndexClass diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index bc50fea39d062..4904a90ab7b2b 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -230,7 +230,7 @@ def _add_delta(self, delta): def _add_datelike(self, other): # adding a timedeltaindex to a datetimelike - from pandas.core.arrays.datetimes import DatetimeArrayMixin + from pandas.core.arrays import DatetimeArrayMixin if isinstance(other, (DatetimeArrayMixin, np.ndarray)): # if other is an ndarray, we assume it is datetime64-dtype # defer to implementation in DatetimeIndex diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index e8f4cfb8cfac9..530a3ecb5f378 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -4,5 +4,5 @@ warnings.warn("'pandas.core' is private. Use 'pandas.Categorical'", FutureWarning, stacklevel=2) -from pandas.core.arrays.categorical import Categorical # noqa +from pandas.core.arrays import Categorical # noqa from pandas.core.dtypes.dtypes import CategoricalDtype # noqa diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 6e55ced943d0c..4e62ea121e098 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -20,7 +20,6 @@ is_dict_like, is_scalar, is_string_like, is_list_like, is_number, is_file_like, is_re, is_re_compilable, is_sequence, is_nested_list_like, is_named_tuple, is_array_like, is_decimal, is_complex, is_interval) -from pandas.core.arrays.sparse.dtype import SparseDtype _POSSIBLY_CAST_DTYPES = {np.dtype(t).name for t in ['O', 'int8', 'uint8', 'int16', 'uint16', @@ -1926,10 +1925,12 @@ def _get_dtype_type(arr_or_dtype): elif is_interval_dtype(arr_or_dtype): return Interval return _get_dtype_type(np.dtype(arr_or_dtype)) - elif isinstance(arr_or_dtype, (ABCSparseSeries, ABCSparseArray, + else: + from pandas.core.arrays.sparse.dtype import SparseDtype + if isinstance(arr_or_dtype, (ABCSparseSeries, ABCSparseArray, SparseDtype)): - dtype = getattr(arr_or_dtype, 'dtype', arr_or_dtype) - return dtype.type + dtype = getattr(arr_or_dtype, 'dtype', arr_or_dtype) + return dtype.type try: return arr_or_dtype.dtype.type except AttributeError: diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 48a1312b5156d..118ffc9a4b31f 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -523,7 +523,7 @@ def _concat_index_asobject(to_concat, name=None): PeriodIndex are converted to object dtype before concatenation """ from pandas import Index - from pandas.core.arrays.base import ExtensionArray + from pandas.core.arrays import ExtensionArray klasses = (ABCDatetimeIndex, ABCTimedeltaIndex, ABCPeriodIndex, ExtensionArray) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index dde5c84a13344..f07fb3cd80eab 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -776,7 +776,7 @@ def construct_array_type(cls): ------- type """ - from pandas.core.arrays.interval import IntervalArray + from pandas.core.arrays import IntervalArray return IntervalArray @classmethod diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5a47e3d94a136..bfe5e16d24820 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -80,8 +80,8 @@ from pandas.core import nanops from pandas.core import ops from pandas.core.accessor import CachedAccessor -from pandas.core.arrays.categorical import Categorical -from pandas.core.arrays.base import ExtensionArray +from pandas.core.arrays import Categorical +from pandas.core.arrays import ExtensionArray from pandas.core.config import get_option from pandas.core.generic import NDFrame, _shared_docs from pandas.core.index import (Index, MultiIndex, ensure_index, diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 957f3be8cf6ae..63bf67854e5cd 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -44,7 +44,7 @@ from pandas.core.dtypes.cast import maybe_downcast_to_dtype from pandas.core.base import SpecificationError, DataError from pandas.core.index import Index, MultiIndex, CategoricalIndex -from pandas.core.arrays.categorical import Categorical +from pandas.core.arrays import Categorical from pandas.core.internals import BlockManager, make_block from pandas.compat.numpy import _np_version_under1p13 diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index c95208198f85a..6c68e21e9a6d1 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -12,8 +12,8 @@ from pandas.compat import zip, callable from pandas.core.dtypes.generic import ABCSeries -from pandas.core.arrays.base import ExtensionArray -from pandas.core.arrays.categorical import Categorical +from pandas.core.arrays import ExtensionArray +from pandas.core.arrays import Categorical from pandas.core.index import ( Index, MultiIndex, CategoricalIndex) from pandas.core.dtypes.common import ( diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 42ec8dff9e548..51c84d6e28cb4 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -14,7 +14,7 @@ from pandas import compat from pandas.core.accessor import CachedAccessor -from pandas.core.arrays.base import ExtensionArray +from pandas.core.arrays import ExtensionArray from pandas.core.dtypes.generic import ( ABCSeries, ABCDataFrame, ABCMultiIndex, diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 1bbd5b9ac10ac..70140d2d9a432 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -30,7 +30,7 @@ import pandas.core.dtypes.concat as _concat from pandas.core.arrays.datetimes import DatetimeArrayMixin, _to_m8 -import pandas.core.arrays.datetimelike as dtl +from pandas.core.arrays import datetimelike as dtl from pandas.core.indexes.base import Index, _index_shared_docs from pandas.core.indexes.numeric import Int64Index diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 49bf1c18721d2..3cccb65503378 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1858,7 +1858,7 @@ def _get_labels_for_sorting(self): for sorting, where we need to disambiguate that -1 is not a valid valid """ - from pandas.core.arrays.categorical import Categorical + from pandas.core.arrays import Categorical def cats(label): return np.arange(np.array(label).max() + 1 if len(label) else 0, diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 9a3cd0bb8f68c..f452a57e82725 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -31,7 +31,7 @@ from pandas._libs.tslibs import resolution, period from pandas.core.algorithms import unique1d -import pandas.core.arrays.datetimelike as dtl +from pandas.core.arrays import datetimelike as dtl from pandas.core.arrays.period import PeriodArrayMixin, dt64arr_to_periodarr from pandas.core.base import _shared_docs from pandas.core.indexes.base import _index_shared_docs, ensure_index diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 430d90cf29aae..56b6dc7051d9f 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -16,7 +16,7 @@ from pandas.core.arrays.timedeltas import ( TimedeltaArrayMixin, _is_convertible_to_td, _to_m8) -import pandas.core.arrays.datetimelike as dtl +from pandas.core.arrays import datetimelike as dtl from pandas.core.indexes.base import Index from pandas.core.indexes.numeric import Int64Index diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index e66c8fe4de3da..214fcb097f736 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -64,7 +64,7 @@ import pandas.core.missing as missing from pandas.core.base import PandasObject -from pandas.core.arrays.categorical import Categorical +from pandas.core.arrays import Categorical from pandas.core.indexes.datetimes import DatetimeIndex from pandas.core.indexes.timedeltas import TimedeltaIndex diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py index fbd02d7b1aed1..26221143c0cdf 100644 --- a/pandas/core/reshape/melt.py +++ b/pandas/core/reshape/melt.py @@ -4,7 +4,7 @@ from pandas.core.dtypes.common import is_list_like from pandas import compat -from pandas.core.arrays.categorical import Categorical +from pandas.core.arrays import Categorical from pandas.core.dtypes.generic import ABCMultiIndex diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index f0b8dc1c14a80..09804e9783e4d 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -22,7 +22,7 @@ from pandas.core.arrays.sparse.array import SparseArray from pandas._libs.sparse import IntIndex -from pandas.core.arrays.categorical import Categorical +from pandas.core.arrays import Categorical from pandas.core.arrays.categorical import _factorize_from_iterable from pandas.core.sorting import (get_group_index, get_compressed_ids, compress_group_index, decons_obs_group_ids) diff --git a/pandas/core/series.py b/pandas/core/series.py index 7397c26c4cc21..1799e83d8edf7 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -13,7 +13,7 @@ import numpy.ma as ma from pandas.core.accessor import CachedAccessor -from pandas.core.arrays.base import ExtensionArray +from pandas.core.arrays import ExtensionArray from pandas.core.dtypes.common import ( is_categorical_dtype, is_string_like, diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 6ac67afa952bb..5aa9ea658482b 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -187,7 +187,7 @@ def indexer_from_factorized(labels, shape, compress=True): def lexsort_indexer(keys, orders=None, na_position='last'): - from pandas.core.arrays.categorical import Categorical + from pandas.core.arrays import Categorical labels = [] shape = [] diff --git a/pandas/io/packers.py b/pandas/io/packers.py index b01ffb0242d13..4ab8c7d3b74a4 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -66,7 +66,7 @@ needs_i8_conversion, pandas_dtype) from pandas.core import internals -from pandas.core.arrays.interval import IntervalArray +from pandas.core.arrays import IntervalArray from pandas.core.generic import NDFrame from pandas.core.internals import BlockManager, make_block, _safe_reshape from pandas.core.arrays.sparse.api import SparseSeries, SparseDataFrame diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 75d54fcad73c5..1edc6f6e14442 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -28,7 +28,7 @@ ensure_index_from_sequences) from pandas.core.series import Series from pandas.core.frame import DataFrame -from pandas.core.arrays.categorical import Categorical +from pandas.core.arrays import Categorical from pandas.core import algorithms import pandas.core.common as com from pandas.io.date_converters import generic_parser diff --git a/pandas/io/stata.py b/pandas/io/stata.py index b53814e9ad605..68b2182c2ff07 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -26,7 +26,7 @@ from pandas import compat, to_timedelta, to_datetime, isna, DatetimeIndex from pandas.compat import (lrange, lmap, lzip, text_type, string_types, range, zip, BytesIO) -from pandas.core.arrays.categorical import Categorical +from pandas.core.arrays import Categorical from pandas.core.base import StringMixin from pandas.core.dtypes.common import (is_categorical_dtype, ensure_object, is_datetime64_dtype) diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 6bb4241451b3f..0417895135b48 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -5,9 +5,9 @@ import pandas as pd import pandas.util.testing as tm -from pandas.core.arrays.datetimes import DatetimeArrayMixin -from pandas.core.arrays.timedeltas import TimedeltaArrayMixin -from pandas.core.arrays.period import PeriodArrayMixin +from pandas.core.arrays import DatetimeArrayMixin +from pandas.core.arrays import TimedeltaArrayMixin +from pandas.core.arrays import PeriodArrayMixin # TODO: more freq variants diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py index 72c32d1c759ae..23ee8d217bd59 100644 --- a/pandas/tests/arrays/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -7,7 +7,7 @@ from pandas.api.types import is_integer, is_float, is_float_dtype, is_scalar from pandas.core.dtypes.generic import ABCIndexClass -from pandas.core.arrays.integer import ( +from pandas.core.arrays import ( integer_array, IntegerArray) from pandas.core.arrays.integer import ( Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype, diff --git a/pandas/tests/arrays/test_interval.py b/pandas/tests/arrays/test_interval.py index f8499979061f8..bcf4cea795978 100644 --- a/pandas/tests/arrays/test_interval.py +++ b/pandas/tests/arrays/test_interval.py @@ -3,7 +3,7 @@ import numpy as np from pandas import Index, IntervalIndex, date_range, timedelta_range -from pandas.core.arrays.interval import IntervalArray +from pandas.core.arrays import IntervalArray import pandas.util.testing as tm diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index 823a72ac59638..aae7c8e4992c3 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -6,7 +6,7 @@ import numpy as np import pandas as pd -from pandas.core.arrays.base import (ExtensionArray, +from pandas.core.arrays import (ExtensionArray, ExtensionScalarOpsMixin) from pandas.core.dtypes.base import ExtensionDtype diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index cf5389904a425..976511941042d 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -21,7 +21,7 @@ from pandas import compat from pandas.core.dtypes.base import ExtensionDtype -from pandas.core.arrays.base import ExtensionArray +from pandas.core.arrays import ExtensionArray class JSONDtype(ExtensionDtype): diff --git a/pandas/tests/extension/test_common.py b/pandas/tests/extension/test_common.py index 33776eb4262fe..b6223ea96d7dd 100644 --- a/pandas/tests/extension/test_common.py +++ b/pandas/tests/extension/test_common.py @@ -3,7 +3,7 @@ import pandas as pd import pandas.util.testing as tm -from pandas.core.arrays.base import ExtensionArray +from pandas.core.arrays import ExtensionArray from pandas.core.dtypes.common import is_extension_array_dtype from pandas.core.dtypes import dtypes diff --git a/pandas/tests/extension/test_integer.py b/pandas/tests/extension/test_integer.py index 91007b1314834..89c36bbe7b325 100644 --- a/pandas/tests/extension/test_integer.py +++ b/pandas/tests/extension/test_integer.py @@ -20,7 +20,7 @@ from pandas.tests.extension import base from pandas.core.dtypes.common import is_extension_array_dtype -from pandas.core.arrays.integer import integer_array +from pandas.core.arrays import integer_array from pandas.core.arrays.integer import ( Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype, UInt8Dtype, UInt16Dtype, UInt32Dtype, UInt64Dtype) diff --git a/pandas/tests/extension/test_interval.py b/pandas/tests/extension/test_interval.py index 3e8013ee60340..183ebea927b10 100644 --- a/pandas/tests/extension/test_interval.py +++ b/pandas/tests/extension/test_interval.py @@ -17,7 +17,7 @@ import numpy as np from pandas import Interval -from pandas.core.arrays.interval import IntervalArray +from pandas.core.arrays import IntervalArray from pandas.core.dtypes.dtypes import IntervalDtype from pandas.tests.extension import base import pandas.util.testing as tm diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index dfb1902897f78..2e1fefc938676 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -12,8 +12,8 @@ from pandas import (DataFrame, Series, Timestamp, date_range, compat, option_context, Categorical) -from pandas.core.arrays.interval import IntervalArray -from pandas.core.arrays.integer import integer_array +from pandas.core.arrays import IntervalArray +from pandas.core.arrays import integer_array from pandas.compat import StringIO import pandas as pd diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index b836e8fa8cf7b..2afaeea3755d0 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -12,7 +12,7 @@ from pandas.compat import u from pandas import _np_version_under1p14 -from pandas.core.arrays.integer import integer_array +from pandas.core.arrays import integer_array from pandas.core.dtypes.dtypes import DatetimeTZDtype, CategoricalDtype from pandas.tests.frame.common import TestData from pandas.util.testing import (assert_series_equal, diff --git a/pandas/tests/indexes/interval/test_construction.py b/pandas/tests/indexes/interval/test_construction.py index 2a3e49cffaa2c..208d498180692 100644 --- a/pandas/tests/indexes/interval/test_construction.py +++ b/pandas/tests/indexes/interval/test_construction.py @@ -8,7 +8,7 @@ Interval, IntervalIndex, Index, Int64Index, Float64Index, Categorical, CategoricalIndex, date_range, timedelta_range, period_range, notna) from pandas.compat import lzip -from pandas.core.arrays.interval import IntervalArray +from pandas.core.arrays import IntervalArray from pandas.core.dtypes.common import is_categorical_dtype from pandas.core.dtypes.dtypes import IntervalDtype import pandas.core.common as com diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 6037e7613fe2c..c86cfbf9c8e3c 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -18,7 +18,7 @@ import pandas as pd from pandas.core.indexing import (_non_reducing_slice, _maybe_numeric_slice, validate_indices) -from pandas.core.arrays.integer import integer_array +from pandas.core.arrays import integer_array from pandas import NaT, DataFrame, Index, Series, MultiIndex import pandas.util.testing as tm from pandas.compat import PY2 diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index 3b82242626c20..1a5c40ba3c398 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -518,7 +518,7 @@ def test_cat_accessor(self): def test_cat_accessor_api(self): # GH 9322 - from pandas.core.arrays.categorical import CategoricalAccessor + from pandas.core.arrays import CategoricalAccessor assert Series.cat is CategoricalAccessor s = Series(list('aabbcde')).astype('category') assert isinstance(s.cat, CategoricalAccessor) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index eb519aeb9526c..a3d0ef171f8f6 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -19,8 +19,8 @@ import numpy as np import pandas as pd -from pandas.core.arrays.base import ExtensionArray -from pandas.core.arrays.interval import IntervalArray +from pandas.core.arrays import ExtensionArray +from pandas.core.arrays import IntervalArray from pandas.core.dtypes.missing import array_equivalent from pandas.core.dtypes.common import ( is_datetimelike_v_numeric, From f63ac12eab415e4ddee8d21af3c855e8053f1f8b Mon Sep 17 00:00:00 2001 From: JustinZhengBC Date: Sun, 14 Oct 2018 14:10:25 -0700 Subject: [PATCH 04/12] CLN-23123 Fix test_api.py --- pandas/tests/series/test_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index 1a5c40ba3c398..3b82242626c20 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -518,7 +518,7 @@ def test_cat_accessor(self): def test_cat_accessor_api(self): # GH 9322 - from pandas.core.arrays import CategoricalAccessor + from pandas.core.arrays.categorical import CategoricalAccessor assert Series.cat is CategoricalAccessor s = Series(list('aabbcde')).astype('category') assert isinstance(s.cat, CategoricalAccessor) From 36285c136d172f326890ed1c999e586214f9377d Mon Sep 17 00:00:00 2001 From: JustinZhengBC Date: Sun, 14 Oct 2018 14:50:38 -0700 Subject: [PATCH 05/12] CLN-23123 Clean up imports and fix linting --- pandas/core/arrays/integer.py | 2 +- pandas/core/arrays/interval.py | 3 +-- pandas/core/arrays/sparse/scipy_sparse.py | 1 + pandas/core/dtypes/common.py | 5 +++-- pandas/core/frame.py | 3 +-- pandas/core/groupby/grouper.py | 3 +-- pandas/tests/arrays/sparse/test_array.py | 4 +++- pandas/tests/extension/decimal/array.py | 2 +- pandas/tests/frame/test_block_internals.py | 3 +-- pandas/util/testing.py | 3 +-- 10 files changed, 14 insertions(+), 15 deletions(-) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 4a66c8b4aba7c..d3aacfc334579 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -19,7 +19,7 @@ is_object_dtype, is_list_like) from pandas.core.arrays import (ExtensionArray, - ExtensionOpsMixin) + ExtensionOpsMixin) from pandas.core.dtypes.base import ExtensionDtype from pandas.core.dtypes.dtypes import register_extension_dtype from pandas.core.dtypes.missing import isna, notna diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 9972a04eec1f3..64c195cac091a 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -23,8 +23,7 @@ from pandas.util._decorators import Appender from pandas.util._doctools import _WritableDoc -from pandas.core.arrays import ExtensionArray -from pandas.core.arrays import Categorical +from pandas.core.arrays import ExtensionArray, Categorical _VALID_CLOSED = {'left', 'right', 'both', 'neither'} _interval_shared_docs = {} diff --git a/pandas/core/arrays/sparse/scipy_sparse.py b/pandas/core/arrays/sparse/scipy_sparse.py index 9e0404918aec0..748a52f484893 100644 --- a/pandas/core/arrays/sparse/scipy_sparse.py +++ b/pandas/core/arrays/sparse/scipy_sparse.py @@ -1,5 +1,6 @@ """ Interaction with scipy.sparse matrices. + Currently only includes SparseSeries.to_coo helpers. """ from pandas.core.index import MultiIndex, Index diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 4e62ea121e098..85bd54f1e45fa 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1927,8 +1927,9 @@ def _get_dtype_type(arr_or_dtype): return _get_dtype_type(np.dtype(arr_or_dtype)) else: from pandas.core.arrays.sparse.dtype import SparseDtype - if isinstance(arr_or_dtype, (ABCSparseSeries, ABCSparseArray, - SparseDtype)): + if isinstance(arr_or_dtype, (ABCSparseSeries, + ABCSparseArray, + SparseDtype)): dtype = getattr(arr_or_dtype, 'dtype', arr_or_dtype) return dtype.type try: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index bfe5e16d24820..7fbd8cc8d80f2 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -80,8 +80,7 @@ from pandas.core import nanops from pandas.core import ops from pandas.core.accessor import CachedAccessor -from pandas.core.arrays import Categorical -from pandas.core.arrays import ExtensionArray +from pandas.core.arrays import Categorical, ExtensionArray from pandas.core.config import get_option from pandas.core.generic import NDFrame, _shared_docs from pandas.core.index import (Index, MultiIndex, ensure_index, diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 6c68e21e9a6d1..1c8fe0e6cadad 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -12,8 +12,7 @@ from pandas.compat import zip, callable from pandas.core.dtypes.generic import ABCSeries -from pandas.core.arrays import ExtensionArray -from pandas.core.arrays import Categorical +from pandas.core.arrays import ExtensionArray, Categorical from pandas.core.index import ( Index, MultiIndex, CategoricalIndex) from pandas.core.dtypes.common import ( diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py index bbd8fcbc294d1..7f79ade235a8b 100644 --- a/pandas/tests/arrays/sparse/test_array.py +++ b/pandas/tests/arrays/sparse/test_array.py @@ -9,7 +9,9 @@ import numpy as np import pandas as pd -from pandas.core.arrays.sparse.api import SparseArray, SparseSeries, SparseDtype +from pandas.core.arrays.sparse.api import (SparseArray, + SparseSeries, + SparseDtype) from pandas._libs.sparse import IntIndex from pandas.util.testing import assert_almost_equal import pandas.util.testing as tm diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index aae7c8e4992c3..53a598559393c 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -7,7 +7,7 @@ import pandas as pd from pandas.core.arrays import (ExtensionArray, - ExtensionScalarOpsMixin) + ExtensionScalarOpsMixin) from pandas.core.dtypes.base import ExtensionDtype diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index 2e1fefc938676..3fe1c84174acb 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -12,8 +12,7 @@ from pandas import (DataFrame, Series, Timestamp, date_range, compat, option_context, Categorical) -from pandas.core.arrays import IntervalArray -from pandas.core.arrays import integer_array +from pandas.core.arrays import IntervalArray, integer_array from pandas.compat import StringIO import pandas as pd diff --git a/pandas/util/testing.py b/pandas/util/testing.py index a3d0ef171f8f6..a89de74875ee5 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -19,8 +19,7 @@ import numpy as np import pandas as pd -from pandas.core.arrays import ExtensionArray -from pandas.core.arrays import IntervalArray +from pandas.core.arrays import ExtensionArray, IntervalArray from pandas.core.dtypes.missing import array_equivalent from pandas.core.dtypes.common import ( is_datetimelike_v_numeric, From ab60797709a46ec6dee2f7b91ba453de4874ddae Mon Sep 17 00:00:00 2001 From: JustinZhengBC Date: Sun, 14 Oct 2018 21:20:26 -0700 Subject: [PATCH 06/12] Add __init__.py to SparseArray folder --- pandas/core/arrays/sparse/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 pandas/core/arrays/sparse/__init__.py diff --git a/pandas/core/arrays/sparse/__init__.py b/pandas/core/arrays/sparse/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d From e8808e03f37579bc9fc5581ffa0a70f15a4bd861 Mon Sep 17 00:00:00 2001 From: JustinZhengBC Date: Mon, 15 Oct 2018 00:06:28 -0700 Subject: [PATCH 07/12] CLN-23123 Add __init__.py to sparse tests --- pandas/tests/arrays/sparse/__init__.py | 0 pandas/tests/arrays/sparse/frame/__init__.py | 0 pandas/tests/arrays/sparse/series/__init__.py | 0 3 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 pandas/tests/arrays/sparse/__init__.py create mode 100644 pandas/tests/arrays/sparse/frame/__init__.py create mode 100644 pandas/tests/arrays/sparse/series/__init__.py diff --git a/pandas/tests/arrays/sparse/__init__.py b/pandas/tests/arrays/sparse/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/arrays/sparse/frame/__init__.py b/pandas/tests/arrays/sparse/frame/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/arrays/sparse/series/__init__.py b/pandas/tests/arrays/sparse/series/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d From 54ed5bc6506edb2688304122d39ca20b626d58dd Mon Sep 17 00:00:00 2001 From: JustinZhengBC Date: Mon, 15 Oct 2018 00:14:19 -0700 Subject: [PATCH 08/12] CLN-23123 Add missing sparse tests --- pandas/tests/arrays/sparse/common.py | 0 .../arrays/sparse/frame/test_analytics.py | 40 ++ .../arrays/sparse/frame/test_indexing.py | 113 ++++ .../tests/arrays/sparse/frame/test_to_csv.py | 20 + .../arrays/sparse/series/test_indexing.py | 113 ++++ pandas/tests/arrays/sparse/test_libsparse.py | 604 ++++++++++++++++++ pandas/tests/arrays/sparse/test_pivot.py | 50 ++ pandas/tests/arrays/sparse/test_reshape.py | 38 ++ 8 files changed, 978 insertions(+) create mode 100644 pandas/tests/arrays/sparse/common.py create mode 100644 pandas/tests/arrays/sparse/frame/test_analytics.py create mode 100644 pandas/tests/arrays/sparse/frame/test_indexing.py create mode 100644 pandas/tests/arrays/sparse/frame/test_to_csv.py create mode 100644 pandas/tests/arrays/sparse/series/test_indexing.py create mode 100644 pandas/tests/arrays/sparse/test_libsparse.py create mode 100644 pandas/tests/arrays/sparse/test_pivot.py create mode 100644 pandas/tests/arrays/sparse/test_reshape.py diff --git a/pandas/tests/arrays/sparse/common.py b/pandas/tests/arrays/sparse/common.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/arrays/sparse/frame/test_analytics.py b/pandas/tests/arrays/sparse/frame/test_analytics.py new file mode 100644 index 0000000000000..54e3ddbf2f1cf --- /dev/null +++ b/pandas/tests/arrays/sparse/frame/test_analytics.py @@ -0,0 +1,40 @@ +import pytest +import numpy as np +from pandas import SparseDataFrame, DataFrame, SparseSeries +from pandas.util import testing as tm + + +@pytest.mark.xfail(reason='Wrong SparseBlock initialization (GH#17386)', + strict=True) +def test_quantile(): + # GH 17386 + data = [[1, 1], [2, 10], [3, 100], [np.nan, np.nan]] + q = 0.1 + + sparse_df = SparseDataFrame(data) + result = sparse_df.quantile(q) + + dense_df = DataFrame(data) + dense_expected = dense_df.quantile(q) + sparse_expected = SparseSeries(dense_expected) + + tm.assert_series_equal(result, dense_expected) + tm.assert_sp_series_equal(result, sparse_expected) + + +@pytest.mark.xfail(reason='Wrong SparseBlock initialization (GH#17386)', + strict=True) +def test_quantile_multi(): + # GH 17386 + data = [[1, 1], [2, 10], [3, 100], [np.nan, np.nan]] + q = [0.1, 0.5] + + sparse_df = SparseDataFrame(data) + result = sparse_df.quantile(q) + + dense_df = DataFrame(data) + dense_expected = dense_df.quantile(q) + sparse_expected = SparseDataFrame(dense_expected) + + tm.assert_frame_equal(result, dense_expected) + tm.assert_sp_frame_equal(result, sparse_expected) diff --git a/pandas/tests/arrays/sparse/frame/test_indexing.py b/pandas/tests/arrays/sparse/frame/test_indexing.py new file mode 100644 index 0000000000000..607eb2da6ded0 --- /dev/null +++ b/pandas/tests/arrays/sparse/frame/test_indexing.py @@ -0,0 +1,113 @@ +import pytest +import numpy as np +from pandas import SparseDataFrame, DataFrame +from pandas.util import testing as tm + + +pytestmark = pytest.mark.skip("Wrong SparseBlock initialization (GH 17386)") + + +@pytest.mark.parametrize('data', [ + [[1, 1], [2, 2], [3, 3], [4, 4], [0, 0]], + [[1.0, 1.0], [2.0, 2.0], [3.0, 3.0], [4.0, 4.0], [np.nan, np.nan]], + [ + [1.0, 1.0 + 1.0j], + [2.0 + 2.0j, 2.0], + [3.0, 3.0 + 3.0j], + [4.0 + 4.0j, 4.0], + [np.nan, np.nan] + ] +]) +@pytest.mark.xfail(reason='Wrong SparseBlock initialization (GH#17386)', + strict=True) +def test_where_with_numeric_data(data): + # GH 17386 + lower_bound = 1.5 + + sparse = SparseDataFrame(data) + result = sparse.where(sparse > lower_bound) + + dense = DataFrame(data) + dense_expected = dense.where(dense > lower_bound) + sparse_expected = SparseDataFrame(dense_expected) + + tm.assert_frame_equal(result, dense_expected) + tm.assert_sp_frame_equal(result, sparse_expected) + + +@pytest.mark.parametrize('data', [ + [[1, 1], [2, 2], [3, 3], [4, 4], [0, 0]], + [[1.0, 1.0], [2.0, 2.0], [3.0, 3.0], [4.0, 4.0], [np.nan, np.nan]], + [ + [1.0, 1.0 + 1.0j], + [2.0 + 2.0j, 2.0], + [3.0, 3.0 + 3.0j], + [4.0 + 4.0j, 4.0], + [np.nan, np.nan] + ] +]) +@pytest.mark.parametrize('other', [ + True, + -100, + 0.1, + 100.0 + 100.0j +]) +@pytest.mark.xfail(reason='Wrong SparseBlock initialization (GH#17386)', + strict=True) +def test_where_with_numeric_data_and_other(data, other): + # GH 17386 + lower_bound = 1.5 + + sparse = SparseDataFrame(data) + result = sparse.where(sparse > lower_bound, other) + + dense = DataFrame(data) + dense_expected = dense.where(dense > lower_bound, other) + sparse_expected = SparseDataFrame(dense_expected, + default_fill_value=other) + + tm.assert_frame_equal(result, dense_expected) + tm.assert_sp_frame_equal(result, sparse_expected) + + +@pytest.mark.xfail(reason='Wrong SparseBlock initialization (GH#17386)', + strict=True) +def test_where_with_bool_data(): + # GH 17386 + data = [[False, False], [True, True], [False, False]] + cond = True + + sparse = SparseDataFrame(data) + result = sparse.where(sparse == cond) + + dense = DataFrame(data) + dense_expected = dense.where(dense == cond) + sparse_expected = SparseDataFrame(dense_expected) + + tm.assert_frame_equal(result, dense_expected) + tm.assert_sp_frame_equal(result, sparse_expected) + + +@pytest.mark.parametrize('other', [ + True, + 0, + 0.1, + 100.0 + 100.0j +]) +@pytest.mark.xfail(reason='Wrong SparseBlock initialization (GH#17386)', + strict=True) +def test_where_with_bool_data_and_other(other): + # GH 17386 + data = [[False, False], [True, True], [False, False]] + cond = True + + sparse = SparseDataFrame(data) + result = sparse.where(sparse == cond, other) + + dense = DataFrame(data) + dense_expected = dense.where(dense == cond, other) + sparse_expected = SparseDataFrame(dense_expected, + default_fill_value=other) + + tm.assert_frame_equal(result, dense_expected) + tm.assert_sp_frame_equal(result, sparse_expected) diff --git a/pandas/tests/arrays/sparse/frame/test_to_csv.py b/pandas/tests/arrays/sparse/frame/test_to_csv.py new file mode 100644 index 0000000000000..b0243dfde8d3f --- /dev/null +++ b/pandas/tests/arrays/sparse/frame/test_to_csv.py @@ -0,0 +1,20 @@ +import numpy as np +import pytest +from pandas import SparseDataFrame, read_csv +from pandas.util import testing as tm + + +class TestSparseDataFrameToCsv(object): + fill_values = [np.nan, 0, None, 1] + + @pytest.mark.parametrize('fill_value', fill_values) + def test_to_csv_sparse_dataframe(self, fill_value): + # GH19384 + sdf = SparseDataFrame({'a': type(self).fill_values}, + default_fill_value=fill_value) + + with tm.ensure_clean('sparse_df.csv') as path: + sdf.to_csv(path, index=False) + df = read_csv(path, skip_blank_lines=False) + + tm.assert_sp_frame_equal(df.to_sparse(fill_value=fill_value), sdf) diff --git a/pandas/tests/arrays/sparse/series/test_indexing.py b/pandas/tests/arrays/sparse/series/test_indexing.py new file mode 100644 index 0000000000000..998285d933492 --- /dev/null +++ b/pandas/tests/arrays/sparse/series/test_indexing.py @@ -0,0 +1,113 @@ +import pytest +import numpy as np +from pandas import SparseSeries, Series +from pandas.util import testing as tm + + +pytestmark = pytest.mark.skip("Wrong SparseBlock initialization (GH 17386)") + + +@pytest.mark.parametrize('data', [ + [1, 1, 2, 2, 3, 3, 4, 4, 0, 0], + [1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0, np.nan, np.nan], + [ + 1.0, 1.0 + 1.0j, + 2.0 + 2.0j, 2.0, + 3.0, 3.0 + 3.0j, + 4.0 + 4.0j, 4.0, + np.nan, np.nan + ] +]) +@pytest.mark.xfail(reason='Wrong SparseBlock initialization (GH#17386)', + strict=True) +def test_where_with_numeric_data(data): + # GH 17386 + lower_bound = 1.5 + + sparse = SparseSeries(data) + result = sparse.where(sparse > lower_bound) + + dense = Series(data) + dense_expected = dense.where(dense > lower_bound) + sparse_expected = SparseSeries(dense_expected) + + tm.assert_series_equal(result, dense_expected) + tm.assert_sp_series_equal(result, sparse_expected) + + +@pytest.mark.parametrize('data', [ + [1, 1, 2, 2, 3, 3, 4, 4, 0, 0], + [1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0, np.nan, np.nan], + [ + 1.0, 1.0 + 1.0j, + 2.0 + 2.0j, 2.0, + 3.0, 3.0 + 3.0j, + 4.0 + 4.0j, 4.0, + np.nan, np.nan + ] +]) +@pytest.mark.parametrize('other', [ + True, + -100, + 0.1, + 100.0 + 100.0j +]) +@pytest.mark.skip(reason='Wrong SparseBlock initialization ' + '(Segfault) ' + '(GH 17386)') +def test_where_with_numeric_data_and_other(data, other): + # GH 17386 + lower_bound = 1.5 + + sparse = SparseSeries(data) + result = sparse.where(sparse > lower_bound, other) + + dense = Series(data) + dense_expected = dense.where(dense > lower_bound, other) + sparse_expected = SparseSeries(dense_expected, fill_value=other) + + tm.assert_series_equal(result, dense_expected) + tm.assert_sp_series_equal(result, sparse_expected) + + +@pytest.mark.xfail(reason='Wrong SparseBlock initialization (GH#17386)', + strict=True) +def test_where_with_bool_data(): + # GH 17386 + data = [False, False, True, True, False, False] + cond = True + + sparse = SparseSeries(data) + result = sparse.where(sparse == cond) + + dense = Series(data) + dense_expected = dense.where(dense == cond) + sparse_expected = SparseSeries(dense_expected) + + tm.assert_series_equal(result, dense_expected) + tm.assert_sp_series_equal(result, sparse_expected) + + +@pytest.mark.parametrize('other', [ + True, + 0, + 0.1, + 100.0 + 100.0j +]) +@pytest.mark.skip(reason='Wrong SparseBlock initialization ' + '(Segfault) ' + '(GH 17386)') +def test_where_with_bool_data_and_other(other): + # GH 17386 + data = [False, False, True, True, False, False] + cond = True + + sparse = SparseSeries(data) + result = sparse.where(sparse == cond, other) + + dense = Series(data) + dense_expected = dense.where(dense == cond, other) + sparse_expected = SparseSeries(dense_expected, fill_value=other) + + tm.assert_series_equal(result, dense_expected) + tm.assert_sp_series_equal(result, sparse_expected) diff --git a/pandas/tests/arrays/sparse/test_libsparse.py b/pandas/tests/arrays/sparse/test_libsparse.py new file mode 100644 index 0000000000000..3b90d93cee7a4 --- /dev/null +++ b/pandas/tests/arrays/sparse/test_libsparse.py @@ -0,0 +1,604 @@ +from pandas import Series + +import pytest +import numpy as np +import operator +import pandas.util.testing as tm +import pandas.util._test_decorators as td + +from pandas.core.sparse.array import IntIndex, BlockIndex, _make_index +import pandas._libs.sparse as splib + +TEST_LENGTH = 20 + +plain_case = dict(xloc=[0, 7, 15], xlen=[3, 5, 5], yloc=[2, 9, 14], + ylen=[2, 3, 5], intersect_loc=[2, 9, 15], + intersect_len=[1, 3, 4]) +delete_blocks = dict(xloc=[0, 5], xlen=[4, 4], yloc=[1], ylen=[4], + intersect_loc=[1], intersect_len=[3]) +split_blocks = dict(xloc=[0], xlen=[10], yloc=[0, 5], ylen=[3, 7], + intersect_loc=[0, 5], intersect_len=[3, 5]) +skip_block = dict(xloc=[10], xlen=[5], yloc=[0, 12], ylen=[5, 3], + intersect_loc=[12], intersect_len=[3]) + +no_intersect = dict(xloc=[0, 10], xlen=[4, 6], yloc=[5, 17], ylen=[4, 2], + intersect_loc=[], intersect_len=[]) + + +def check_cases(_check_case): + def _check_case_dict(case): + _check_case(case['xloc'], case['xlen'], case['yloc'], case['ylen'], + case['intersect_loc'], case['intersect_len']) + + _check_case_dict(plain_case) + _check_case_dict(delete_blocks) + _check_case_dict(split_blocks) + _check_case_dict(skip_block) + _check_case_dict(no_intersect) + + # one or both is empty + _check_case([0], [5], [], [], [], []) + _check_case([], [], [], [], [], []) + + +class TestSparseIndexUnion(object): + + def test_index_make_union(self): + def _check_case(xloc, xlen, yloc, ylen, eloc, elen): + xindex = BlockIndex(TEST_LENGTH, xloc, xlen) + yindex = BlockIndex(TEST_LENGTH, yloc, ylen) + bresult = xindex.make_union(yindex) + assert (isinstance(bresult, BlockIndex)) + tm.assert_numpy_array_equal(bresult.blocs, + np.array(eloc, dtype=np.int32)) + tm.assert_numpy_array_equal(bresult.blengths, + np.array(elen, dtype=np.int32)) + + ixindex = xindex.to_int_index() + iyindex = yindex.to_int_index() + iresult = ixindex.make_union(iyindex) + assert (isinstance(iresult, IntIndex)) + tm.assert_numpy_array_equal(iresult.indices, + bresult.to_int_index().indices) + + """ + x: ---- + y: ---- + r: -------- + """ + xloc = [0] + xlen = [5] + yloc = [5] + ylen = [4] + eloc = [0] + elen = [9] + _check_case(xloc, xlen, yloc, ylen, eloc, elen) + """ + x: ----- ----- + y: ----- -- + """ + xloc = [0, 10] + xlen = [5, 5] + yloc = [2, 17] + ylen = [5, 2] + eloc = [0, 10, 17] + elen = [7, 5, 2] + _check_case(xloc, xlen, yloc, ylen, eloc, elen) + """ + x: ------ + y: ------- + r: ---------- + """ + xloc = [1] + xlen = [5] + yloc = [3] + ylen = [5] + eloc = [1] + elen = [7] + _check_case(xloc, xlen, yloc, ylen, eloc, elen) + """ + x: ------ ----- + y: ------- + r: ------------- + """ + xloc = [2, 10] + xlen = [4, 4] + yloc = [4] + ylen = [8] + eloc = [2] + elen = [12] + _check_case(xloc, xlen, yloc, ylen, eloc, elen) + """ + x: --- ----- + y: ------- + r: ------------- + """ + xloc = [0, 5] + xlen = [3, 5] + yloc = [0] + ylen = [7] + eloc = [0] + elen = [10] + _check_case(xloc, xlen, yloc, ylen, eloc, elen) + """ + x: ------ ----- + y: ------- --- + r: ------------- + """ + xloc = [2, 10] + xlen = [4, 4] + yloc = [4, 13] + ylen = [8, 4] + eloc = [2] + elen = [15] + _check_case(xloc, xlen, yloc, ylen, eloc, elen) + """ + x: ---------------------- + y: ---- ---- --- + r: ---------------------- + """ + xloc = [2] + xlen = [15] + yloc = [4, 9, 14] + ylen = [3, 2, 2] + eloc = [2] + elen = [15] + _check_case(xloc, xlen, yloc, ylen, eloc, elen) + """ + x: ---- --- + y: --- --- + """ + xloc = [0, 10] + xlen = [3, 3] + yloc = [5, 15] + ylen = [2, 2] + eloc = [0, 5, 10, 15] + elen = [3, 2, 3, 2] + _check_case(xloc, xlen, yloc, ylen, eloc, elen) + + def test_intindex_make_union(self): + a = IntIndex(5, np.array([0, 3, 4], dtype=np.int32)) + b = IntIndex(5, np.array([0, 2], dtype=np.int32)) + res = a.make_union(b) + exp = IntIndex(5, np.array([0, 2, 3, 4], np.int32)) + assert res.equals(exp) + + a = IntIndex(5, np.array([], dtype=np.int32)) + b = IntIndex(5, np.array([0, 2], dtype=np.int32)) + res = a.make_union(b) + exp = IntIndex(5, np.array([0, 2], np.int32)) + assert res.equals(exp) + + a = IntIndex(5, np.array([], dtype=np.int32)) + b = IntIndex(5, np.array([], dtype=np.int32)) + res = a.make_union(b) + exp = IntIndex(5, np.array([], np.int32)) + assert res.equals(exp) + + a = IntIndex(5, np.array([0, 1, 2, 3, 4], dtype=np.int32)) + b = IntIndex(5, np.array([0, 1, 2, 3, 4], dtype=np.int32)) + res = a.make_union(b) + exp = IntIndex(5, np.array([0, 1, 2, 3, 4], np.int32)) + assert res.equals(exp) + + a = IntIndex(5, np.array([0, 1], dtype=np.int32)) + b = IntIndex(4, np.array([0, 1], dtype=np.int32)) + with pytest.raises(ValueError): + a.make_union(b) + + +class TestSparseIndexIntersect(object): + + @td.skip_if_windows + def test_intersect(self): + def _check_correct(a, b, expected): + result = a.intersect(b) + assert (result.equals(expected)) + + def _check_length_exc(a, longer): + pytest.raises(Exception, a.intersect, longer) + + def _check_case(xloc, xlen, yloc, ylen, eloc, elen): + xindex = BlockIndex(TEST_LENGTH, xloc, xlen) + yindex = BlockIndex(TEST_LENGTH, yloc, ylen) + expected = BlockIndex(TEST_LENGTH, eloc, elen) + longer_index = BlockIndex(TEST_LENGTH + 1, yloc, ylen) + + _check_correct(xindex, yindex, expected) + _check_correct(xindex.to_int_index(), yindex.to_int_index(), + expected.to_int_index()) + + _check_length_exc(xindex, longer_index) + _check_length_exc(xindex.to_int_index(), + longer_index.to_int_index()) + + check_cases(_check_case) + + def test_intersect_empty(self): + xindex = IntIndex(4, np.array([], dtype=np.int32)) + yindex = IntIndex(4, np.array([2, 3], dtype=np.int32)) + assert xindex.intersect(yindex).equals(xindex) + assert yindex.intersect(xindex).equals(xindex) + + xindex = xindex.to_block_index() + yindex = yindex.to_block_index() + assert xindex.intersect(yindex).equals(xindex) + assert yindex.intersect(xindex).equals(xindex) + + def test_intersect_identical(self): + cases = [IntIndex(5, np.array([1, 2], dtype=np.int32)), + IntIndex(5, np.array([0, 2, 4], dtype=np.int32)), + IntIndex(0, np.array([], dtype=np.int32)), + IntIndex(5, np.array([], dtype=np.int32))] + + for case in cases: + assert case.intersect(case).equals(case) + case = case.to_block_index() + assert case.intersect(case).equals(case) + + +class TestSparseIndexCommon(object): + + def test_int_internal(self): + idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind='integer') + assert isinstance(idx, IntIndex) + assert idx.npoints == 2 + tm.assert_numpy_array_equal(idx.indices, + np.array([2, 3], dtype=np.int32)) + + idx = _make_index(4, np.array([], dtype=np.int32), kind='integer') + assert isinstance(idx, IntIndex) + assert idx.npoints == 0 + tm.assert_numpy_array_equal(idx.indices, + np.array([], dtype=np.int32)) + + idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32), + kind='integer') + assert isinstance(idx, IntIndex) + assert idx.npoints == 4 + tm.assert_numpy_array_equal(idx.indices, + np.array([0, 1, 2, 3], dtype=np.int32)) + + def test_block_internal(self): + idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind='block') + assert isinstance(idx, BlockIndex) + assert idx.npoints == 2 + tm.assert_numpy_array_equal(idx.blocs, + np.array([2], dtype=np.int32)) + tm.assert_numpy_array_equal(idx.blengths, + np.array([2], dtype=np.int32)) + + idx = _make_index(4, np.array([], dtype=np.int32), kind='block') + assert isinstance(idx, BlockIndex) + assert idx.npoints == 0 + tm.assert_numpy_array_equal(idx.blocs, + np.array([], dtype=np.int32)) + tm.assert_numpy_array_equal(idx.blengths, + np.array([], dtype=np.int32)) + + idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32), + kind='block') + assert isinstance(idx, BlockIndex) + assert idx.npoints == 4 + tm.assert_numpy_array_equal(idx.blocs, + np.array([0], dtype=np.int32)) + tm.assert_numpy_array_equal(idx.blengths, + np.array([4], dtype=np.int32)) + + idx = _make_index(4, np.array([0, 2, 3], dtype=np.int32), + kind='block') + assert isinstance(idx, BlockIndex) + assert idx.npoints == 3 + tm.assert_numpy_array_equal(idx.blocs, + np.array([0, 2], dtype=np.int32)) + tm.assert_numpy_array_equal(idx.blengths, + np.array([1, 2], dtype=np.int32)) + + def test_lookup(self): + for kind in ['integer', 'block']: + idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind=kind) + assert idx.lookup(-1) == -1 + assert idx.lookup(0) == -1 + assert idx.lookup(1) == -1 + assert idx.lookup(2) == 0 + assert idx.lookup(3) == 1 + assert idx.lookup(4) == -1 + + idx = _make_index(4, np.array([], dtype=np.int32), kind=kind) + + for i in range(-1, 5): + assert idx.lookup(i) == -1 + + idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32), + kind=kind) + assert idx.lookup(-1) == -1 + assert idx.lookup(0) == 0 + assert idx.lookup(1) == 1 + assert idx.lookup(2) == 2 + assert idx.lookup(3) == 3 + assert idx.lookup(4) == -1 + + idx = _make_index(4, np.array([0, 2, 3], dtype=np.int32), + kind=kind) + assert idx.lookup(-1) == -1 + assert idx.lookup(0) == 0 + assert idx.lookup(1) == -1 + assert idx.lookup(2) == 1 + assert idx.lookup(3) == 2 + assert idx.lookup(4) == -1 + + def test_lookup_array(self): + for kind in ['integer', 'block']: + idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind=kind) + + res = idx.lookup_array(np.array([-1, 0, 2], dtype=np.int32)) + exp = np.array([-1, -1, 0], dtype=np.int32) + tm.assert_numpy_array_equal(res, exp) + + res = idx.lookup_array(np.array([4, 2, 1, 3], dtype=np.int32)) + exp = np.array([-1, 0, -1, 1], dtype=np.int32) + tm.assert_numpy_array_equal(res, exp) + + idx = _make_index(4, np.array([], dtype=np.int32), kind=kind) + res = idx.lookup_array(np.array([-1, 0, 2, 4], dtype=np.int32)) + exp = np.array([-1, -1, -1, -1], dtype=np.int32) + + idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32), + kind=kind) + res = idx.lookup_array(np.array([-1, 0, 2], dtype=np.int32)) + exp = np.array([-1, 0, 2], dtype=np.int32) + tm.assert_numpy_array_equal(res, exp) + + res = idx.lookup_array(np.array([4, 2, 1, 3], dtype=np.int32)) + exp = np.array([-1, 2, 1, 3], dtype=np.int32) + tm.assert_numpy_array_equal(res, exp) + + idx = _make_index(4, np.array([0, 2, 3], dtype=np.int32), + kind=kind) + res = idx.lookup_array(np.array([2, 1, 3, 0], dtype=np.int32)) + exp = np.array([1, -1, 2, 0], dtype=np.int32) + tm.assert_numpy_array_equal(res, exp) + + res = idx.lookup_array(np.array([1, 4, 2, 5], dtype=np.int32)) + exp = np.array([-1, -1, 1, -1], dtype=np.int32) + tm.assert_numpy_array_equal(res, exp) + + def test_lookup_basics(self): + def _check(index): + assert (index.lookup(0) == -1) + assert (index.lookup(5) == 0) + assert (index.lookup(7) == 2) + assert (index.lookup(8) == -1) + assert (index.lookup(9) == -1) + assert (index.lookup(10) == -1) + assert (index.lookup(11) == -1) + assert (index.lookup(12) == 3) + assert (index.lookup(17) == 8) + assert (index.lookup(18) == -1) + + bindex = BlockIndex(20, [5, 12], [3, 6]) + iindex = bindex.to_int_index() + + _check(bindex) + _check(iindex) + + # corner cases + + +class TestBlockIndex(object): + + def test_block_internal(self): + idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind='block') + assert isinstance(idx, BlockIndex) + assert idx.npoints == 2 + tm.assert_numpy_array_equal(idx.blocs, + np.array([2], dtype=np.int32)) + tm.assert_numpy_array_equal(idx.blengths, + np.array([2], dtype=np.int32)) + + idx = _make_index(4, np.array([], dtype=np.int32), kind='block') + assert isinstance(idx, BlockIndex) + assert idx.npoints == 0 + tm.assert_numpy_array_equal(idx.blocs, + np.array([], dtype=np.int32)) + tm.assert_numpy_array_equal(idx.blengths, + np.array([], dtype=np.int32)) + + idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32), + kind='block') + assert isinstance(idx, BlockIndex) + assert idx.npoints == 4 + tm.assert_numpy_array_equal(idx.blocs, + np.array([0], dtype=np.int32)) + tm.assert_numpy_array_equal(idx.blengths, + np.array([4], dtype=np.int32)) + + idx = _make_index(4, np.array([0, 2, 3], dtype=np.int32), kind='block') + assert isinstance(idx, BlockIndex) + assert idx.npoints == 3 + tm.assert_numpy_array_equal(idx.blocs, + np.array([0, 2], dtype=np.int32)) + tm.assert_numpy_array_equal(idx.blengths, + np.array([1, 2], dtype=np.int32)) + + def test_make_block_boundary(self): + for i in [5, 10, 100, 101]: + idx = _make_index(i, np.arange(0, i, 2, dtype=np.int32), + kind='block') + + exp = np.arange(0, i, 2, dtype=np.int32) + tm.assert_numpy_array_equal(idx.blocs, exp) + tm.assert_numpy_array_equal(idx.blengths, + np.ones(len(exp), dtype=np.int32)) + + def test_equals(self): + index = BlockIndex(10, [0, 4], [2, 5]) + + assert index.equals(index) + assert not index.equals(BlockIndex(10, [0, 4], [2, 6])) + + def test_check_integrity(self): + locs = [] + lengths = [] + + # 0-length OK + # TODO: index variables are not used...is that right? + index = BlockIndex(0, locs, lengths) # noqa + + # also OK even though empty + index = BlockIndex(1, locs, lengths) # noqa + + # block extend beyond end + pytest.raises(Exception, BlockIndex, 10, [5], [10]) + + # block overlap + pytest.raises(Exception, BlockIndex, 10, [2, 5], [5, 3]) + + def test_to_int_index(self): + locs = [0, 10] + lengths = [4, 6] + exp_inds = [0, 1, 2, 3, 10, 11, 12, 13, 14, 15] + + block = BlockIndex(20, locs, lengths) + dense = block.to_int_index() + + tm.assert_numpy_array_equal(dense.indices, + np.array(exp_inds, dtype=np.int32)) + + def test_to_block_index(self): + index = BlockIndex(10, [0, 5], [4, 5]) + assert index.to_block_index() is index + + +class TestIntIndex(object): + + def test_check_integrity(self): + + # Too many indices than specified in self.length + msg = "Too many indices" + + with tm.assert_raises_regex(ValueError, msg): + IntIndex(length=1, indices=[1, 2, 3]) + + # No index can be negative. + msg = "No index can be less than zero" + + with tm.assert_raises_regex(ValueError, msg): + IntIndex(length=5, indices=[1, -2, 3]) + + # No index can be negative. + msg = "No index can be less than zero" + + with tm.assert_raises_regex(ValueError, msg): + IntIndex(length=5, indices=[1, -2, 3]) + + # All indices must be less than the length. + msg = "All indices must be less than the length" + + with tm.assert_raises_regex(ValueError, msg): + IntIndex(length=5, indices=[1, 2, 5]) + + with tm.assert_raises_regex(ValueError, msg): + IntIndex(length=5, indices=[1, 2, 6]) + + # Indices must be strictly ascending. + msg = "Indices must be strictly increasing" + + with tm.assert_raises_regex(ValueError, msg): + IntIndex(length=5, indices=[1, 3, 2]) + + with tm.assert_raises_regex(ValueError, msg): + IntIndex(length=5, indices=[1, 3, 3]) + + def test_int_internal(self): + idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind='integer') + assert isinstance(idx, IntIndex) + assert idx.npoints == 2 + tm.assert_numpy_array_equal(idx.indices, + np.array([2, 3], dtype=np.int32)) + + idx = _make_index(4, np.array([], dtype=np.int32), kind='integer') + assert isinstance(idx, IntIndex) + assert idx.npoints == 0 + tm.assert_numpy_array_equal(idx.indices, + np.array([], dtype=np.int32)) + + idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32), + kind='integer') + assert isinstance(idx, IntIndex) + assert idx.npoints == 4 + tm.assert_numpy_array_equal(idx.indices, + np.array([0, 1, 2, 3], dtype=np.int32)) + + def test_equals(self): + index = IntIndex(10, [0, 1, 2, 3, 4]) + assert index.equals(index) + assert not index.equals(IntIndex(10, [0, 1, 2, 3])) + + def test_to_block_index(self): + + def _check_case(xloc, xlen, yloc, ylen, eloc, elen): + xindex = BlockIndex(TEST_LENGTH, xloc, xlen) + yindex = BlockIndex(TEST_LENGTH, yloc, ylen) + + # see if survive the round trip + xbindex = xindex.to_int_index().to_block_index() + ybindex = yindex.to_int_index().to_block_index() + assert isinstance(xbindex, BlockIndex) + assert xbindex.equals(xindex) + assert ybindex.equals(yindex) + + check_cases(_check_case) + + def test_to_int_index(self): + index = IntIndex(10, [2, 3, 4, 5, 6]) + assert index.to_int_index() is index + + +class TestSparseOperators(object): + + def _op_tests(self, sparse_op, python_op): + def _check_case(xloc, xlen, yloc, ylen, eloc, elen): + xindex = BlockIndex(TEST_LENGTH, xloc, xlen) + yindex = BlockIndex(TEST_LENGTH, yloc, ylen) + + xdindex = xindex.to_int_index() + ydindex = yindex.to_int_index() + + x = np.arange(xindex.npoints) * 10. + 1 + y = np.arange(yindex.npoints) * 100. + 1 + + xfill = 0 + yfill = 2 + + result_block_vals, rb_index, bfill = sparse_op(x, xindex, xfill, y, + yindex, yfill) + result_int_vals, ri_index, ifill = sparse_op(x, xdindex, xfill, y, + ydindex, yfill) + + assert rb_index.to_int_index().equals(ri_index) + tm.assert_numpy_array_equal(result_block_vals, result_int_vals) + assert bfill == ifill + + # check versus Series... + xseries = Series(x, xdindex.indices) + xseries = xseries.reindex(np.arange(TEST_LENGTH)).fillna(xfill) + + yseries = Series(y, ydindex.indices) + yseries = yseries.reindex(np.arange(TEST_LENGTH)).fillna(yfill) + + series_result = python_op(xseries, yseries) + series_result = series_result.reindex(ri_index.indices) + + tm.assert_numpy_array_equal(result_block_vals, + series_result.values) + tm.assert_numpy_array_equal(result_int_vals, series_result.values) + + check_cases(_check_case) + + @pytest.mark.parametrize('opname', + ['add', 'sub', 'mul', 'truediv', 'floordiv']) + def test_op(self, opname): + sparse_op = getattr(splib, 'sparse_%s_float64' % opname) + python_op = getattr(operator, opname) + self._op_tests(sparse_op, python_op) diff --git a/pandas/tests/arrays/sparse/test_pivot.py b/pandas/tests/arrays/sparse/test_pivot.py new file mode 100644 index 0000000000000..e7eba63e4e0b3 --- /dev/null +++ b/pandas/tests/arrays/sparse/test_pivot.py @@ -0,0 +1,50 @@ +import numpy as np +import pandas as pd +import pandas.util.testing as tm + + +class TestPivotTable(object): + + def setup_method(self, method): + self.dense = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar', + 'foo', 'bar', 'foo', 'foo'], + 'B': ['one', 'one', 'two', 'three', + 'two', 'two', 'one', 'three'], + 'C': np.random.randn(8), + 'D': np.random.randn(8), + 'E': [np.nan, np.nan, 1, 2, + np.nan, 1, np.nan, np.nan]}) + self.sparse = self.dense.to_sparse() + + def test_pivot_table(self): + res_sparse = pd.pivot_table(self.sparse, index='A', columns='B', + values='C') + res_dense = pd.pivot_table(self.dense, index='A', columns='B', + values='C') + tm.assert_frame_equal(res_sparse, res_dense) + + res_sparse = pd.pivot_table(self.sparse, index='A', columns='B', + values='E') + res_dense = pd.pivot_table(self.dense, index='A', columns='B', + values='E') + tm.assert_frame_equal(res_sparse, res_dense) + + res_sparse = pd.pivot_table(self.sparse, index='A', columns='B', + values='E', aggfunc='mean') + res_dense = pd.pivot_table(self.dense, index='A', columns='B', + values='E', aggfunc='mean') + tm.assert_frame_equal(res_sparse, res_dense) + + # ToDo: sum doesn't handle nan properly + # res_sparse = pd.pivot_table(self.sparse, index='A', columns='B', + # values='E', aggfunc='sum') + # res_dense = pd.pivot_table(self.dense, index='A', columns='B', + # values='E', aggfunc='sum') + # tm.assert_frame_equal(res_sparse, res_dense) + + def test_pivot_table_multi(self): + res_sparse = pd.pivot_table(self.sparse, index='A', columns='B', + values=['D', 'E']) + res_dense = pd.pivot_table(self.dense, index='A', columns='B', + values=['D', 'E']) + tm.assert_frame_equal(res_sparse, res_dense) diff --git a/pandas/tests/arrays/sparse/test_reshape.py b/pandas/tests/arrays/sparse/test_reshape.py new file mode 100644 index 0000000000000..b492c47375bcf --- /dev/null +++ b/pandas/tests/arrays/sparse/test_reshape.py @@ -0,0 +1,38 @@ +import pytest +import numpy as np + +import pandas as pd +import pandas.util.testing as tm + + +@pytest.fixture +def sparse_df(): + return pd.SparseDataFrame({0: {0: 1}, 1: {1: 1}, 2: {2: 1}}) # eye + + +@pytest.fixture +def multi_index3(): + return pd.MultiIndex.from_tuples([(0, 0), (1, 1), (2, 2)]) + + +def test_sparse_frame_stack(sparse_df, multi_index3): + ss = sparse_df.stack() + expected = pd.SparseSeries(np.ones(3), index=multi_index3) + tm.assert_sp_series_equal(ss, expected) + + +def test_sparse_frame_unstack(sparse_df): + mi = pd.MultiIndex.from_tuples([(0, 0), (1, 0), (1, 2)]) + sparse_df.index = mi + arr = np.array([[1, np.nan, np.nan], + [np.nan, 1, np.nan], + [np.nan, np.nan, 1]]) + unstacked_df = pd.DataFrame(arr, index=mi).unstack() + unstacked_sdf = sparse_df.unstack() + + tm.assert_numpy_array_equal(unstacked_df.values, unstacked_sdf.values) + + +def test_sparse_series_unstack(sparse_df, multi_index3): + frame = pd.SparseSeries(np.ones(3), index=multi_index3).unstack() + tm.assert_sp_frame_equal(frame, sparse_df) From b261f85b82d79e9cc871c1de7abaf1fe924f7df3 Mon Sep 17 00:00:00 2001 From: JustinZhengBC Date: Mon, 15 Oct 2018 00:27:56 -0700 Subject: [PATCH 09/12] CLN-23123 Modify re-added tests to use correct imports --- pandas/tests/arrays/sparse/test_libsparse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/arrays/sparse/test_libsparse.py b/pandas/tests/arrays/sparse/test_libsparse.py index 3b90d93cee7a4..adb05196295bd 100644 --- a/pandas/tests/arrays/sparse/test_libsparse.py +++ b/pandas/tests/arrays/sparse/test_libsparse.py @@ -6,7 +6,7 @@ import pandas.util.testing as tm import pandas.util._test_decorators as td -from pandas.core.sparse.array import IntIndex, BlockIndex, _make_index +from pandas.core.arrays.sparse.array import IntIndex, BlockIndex, _make_index import pandas._libs.sparse as splib TEST_LENGTH = 20 From 1d0b50b295a472191c74fe341bb4b6fdd5cf6ffb Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 15 Oct 2018 13:28:30 -0500 Subject: [PATCH 10/12] move frame, series back --- pandas/__init__.py | 2 +- pandas/compat/pickle_compat.py | 16 +- pandas/core/arrays/__init__.py | 1 + .../arrays/{sparse/array.py => sparse.py} | 250 +++++++++++++++++- pandas/core/arrays/sparse/api.py | 6 - pandas/core/arrays/sparse/dtype.py | 249 ----------------- pandas/core/dtypes/common.py | 4 +- pandas/core/dtypes/concat.py | 10 +- pandas/core/frame.py | 4 +- pandas/core/internals/managers.py | 2 +- pandas/core/ops.py | 4 +- pandas/core/reshape/reshape.py | 4 +- pandas/core/series.py | 6 +- pandas/core/{arrays => }/sparse/__init__.py | 0 pandas/core/sparse/api.py | 5 + pandas/core/{arrays => }/sparse/frame.py | 4 +- .../core/{arrays => }/sparse/scipy_sparse.py | 0 pandas/core/{arrays => }/sparse/series.py | 6 +- pandas/io/packers.py | 4 +- pandas/io/pytables.py | 2 +- .../tests/arrays/sparse/test_arithmetics.py | 2 +- pandas/tests/arrays/sparse/test_array.py | 6 +- pandas/tests/arrays/sparse/test_dtype.py | 2 +- pandas/tests/arrays/sparse/test_libsparse.py | 2 +- pandas/tests/dtypes/test_common.py | 2 +- pandas/tests/dtypes/test_dtypes.py | 2 +- pandas/tests/extension/test_sparse.py | 3 +- pandas/tests/frame/test_indexing.py | 2 +- pandas/tests/reshape/test_reshape.py | 2 +- pandas/tests/series/test_subclass.py | 2 +- .../sparse/frame => sparse}/__init__.py | 0 pandas/tests/{arrays => }/sparse/common.py | 0 .../series => sparse/frame}/__init__.py | 0 .../{arrays => }/sparse/frame/conftest.py | 0 .../sparse/frame/test_analytics.py | 0 .../{arrays => }/sparse/frame/test_apply.py | 2 +- .../{arrays => }/sparse/frame/test_frame.py | 4 +- .../sparse/frame/test_indexing.py | 0 .../{arrays => }/sparse/frame/test_to_csv.py | 0 .../sparse/frame/test_to_from_scipy.py | 2 +- pandas/tests/sparse/series/__init__.py | 0 .../sparse/series/test_indexing.py | 0 .../{arrays => }/sparse/series/test_series.py | 5 +- .../sparse/test_combine_concat.py | 2 +- .../tests/{arrays => }/sparse/test_format.py | 0 .../tests/{arrays => }/sparse/test_groupby.py | 0 .../{arrays => }/sparse/test_indexing.py | 2 +- .../tests/{arrays => }/sparse/test_pivot.py | 0 .../tests/{arrays => }/sparse/test_reshape.py | 0 49 files changed, 306 insertions(+), 315 deletions(-) rename pandas/core/arrays/{sparse/array.py => sparse.py} (85%) delete mode 100644 pandas/core/arrays/sparse/api.py delete mode 100644 pandas/core/arrays/sparse/dtype.py rename pandas/core/{arrays => }/sparse/__init__.py (100%) create mode 100644 pandas/core/sparse/api.py rename pandas/core/{arrays => }/sparse/frame.py (99%) rename pandas/core/{arrays => }/sparse/scipy_sparse.py (100%) rename pandas/core/{arrays => }/sparse/series.py (99%) rename pandas/tests/{arrays/sparse/frame => sparse}/__init__.py (100%) rename pandas/tests/{arrays => }/sparse/common.py (100%) rename pandas/tests/{arrays/sparse/series => sparse/frame}/__init__.py (100%) rename pandas/tests/{arrays => }/sparse/frame/conftest.py (100%) rename pandas/tests/{arrays => }/sparse/frame/test_analytics.py (100%) rename pandas/tests/{arrays => }/sparse/frame/test_apply.py (97%) rename pandas/tests/{arrays => }/sparse/frame/test_frame.py (99%) rename pandas/tests/{arrays => }/sparse/frame/test_indexing.py (100%) rename pandas/tests/{arrays => }/sparse/frame/test_to_csv.py (100%) rename pandas/tests/{arrays => }/sparse/frame/test_to_from_scipy.py (99%) create mode 100644 pandas/tests/sparse/series/__init__.py rename pandas/tests/{arrays => }/sparse/series/test_indexing.py (100%) rename pandas/tests/{arrays => }/sparse/series/test_series.py (99%) rename pandas/tests/{arrays => }/sparse/test_combine_concat.py (99%) rename pandas/tests/{arrays => }/sparse/test_format.py (100%) rename pandas/tests/{arrays => }/sparse/test_groupby.py (100%) rename pandas/tests/{arrays => }/sparse/test_indexing.py (99%) rename pandas/tests/{arrays => }/sparse/test_pivot.py (100%) rename pandas/tests/{arrays => }/sparse/test_reshape.py (100%) diff --git a/pandas/__init__.py b/pandas/__init__.py index 930b8844d9014..e446782d9665e 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -40,7 +40,7 @@ import pandas.core.config_init from pandas.core.api import * -from pandas.core.arrays.sparse.api import * +from pandas.core.sparse.api import * from pandas.tseries.api import * from pandas.core.computation.api import * from pandas.core.reshape.api import * diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index c2d29da1fdcd3..61457bcef9dea 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -56,14 +56,8 @@ def load_reduce(self): # If classes are moved, provide compat here. _class_locations_map = { - - # 23123, fix sparse mapping ('pandas.core.sparse.array', 'SparseArray'): - ('pandas.core.arrays.sparse.series', 'SparseArray'), - ('pandas.core.sparse.series', 'SparseSeries'): - ('pandas.core.arrays.sparse.series', 'SparseSeries'), - ('pandas.core.sparse.frame', 'SparseDataFrame'): - ('pandas.core.arrays.sparse.frame', 'SparseDataFrame'), + ('pandas.core.arrays', 'SparseArray'), # 15477 ('pandas.core.base', 'FrozenNDArray'): @@ -75,7 +69,7 @@ def load_reduce(self): ('pandas.core.series', 'TimeSeries'): ('pandas.core.series', 'Series'), ('pandas.sparse.series', 'SparseTimeSeries'): - ('pandas.core.arrays.sparse.series', 'SparseSeries'), + ('pandas.core.sparse.series', 'SparseSeries'), # 12588, extensions moving ('pandas._sparse', 'BlockIndex'): @@ -96,11 +90,11 @@ def load_reduce(self): # 15998 top-level dirs moving ('pandas.sparse.array', 'SparseArray'): - ('pandas.core.arrays.sparse.array', 'SparseArray'), + ('pandas.core.arrays.sparse', 'SparseArray'), ('pandas.sparse.series', 'SparseSeries'): - ('pandas.core.arrays.sparse.series', 'SparseSeries'), + ('pandas.core.sparse.series', 'SparseSeries'), ('pandas.sparse.frame', 'SparseDataFrame'): - ('pandas.core.arrays.sparse.frame', 'SparseDataFrame'), + ('pandas.core.sparse.frame', 'SparseDataFrame'), ('pandas.indexes.base', '_new_Index'): ('pandas.core.indexes.base', '_new_Index'), ('pandas.indexes.base', 'Index'): diff --git a/pandas/core/arrays/__init__.py b/pandas/core/arrays/__init__.py index 29f258bf1b29e..0537b79541641 100644 --- a/pandas/core/arrays/__init__.py +++ b/pandas/core/arrays/__init__.py @@ -8,3 +8,4 @@ from .timedeltas import TimedeltaArrayMixin # noqa from .integer import ( # noqa IntegerArray, integer_array) +from .sparse import SparseArray # noqa diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse.py similarity index 85% rename from pandas/core/arrays/sparse/array.py rename to pandas/core/arrays/sparse.py index ac51d98686baa..f5e54e4425444 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse.py @@ -4,6 +4,7 @@ from __future__ import division # pylint: disable=E1101,E1103,W0231 +import re import operator import numbers import numpy as np @@ -18,6 +19,8 @@ from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin import pandas.core.common as com +from pandas.core.dtypes.base import ExtensionDtype +from pandas.core.dtypes.dtypes import register_extension_dtype from pandas.core.dtypes.generic import ( ABCSparseSeries, ABCSeries, ABCIndexClass ) @@ -45,7 +48,252 @@ import pandas.core.algorithms as algos import pandas.io.formats.printing as printing -from pandas.core.arrays.sparse.dtype import SparseDtype + +# ---------------------------------------------------------------------------- +# Dtype + +@register_extension_dtype +class SparseDtype(ExtensionDtype): + """ + Dtype for data stored in :class:`SparseArray`. + + This dtype implements the pandas ExtensionDtype interface. + + .. versionadded:: 0.24.0 + + Parameters + ---------- + dtype : str, ExtensionDtype, numpy.dtype, type, default numpy.float64 + The dtype of the underlying array storing the non-fill value values. + fill_value : scalar, optional. + The scalar value not stored in the SparseArray. By default, this + depends on `dtype`. + + ========== ========== + dtype na_value + ========== ========== + float ``np.nan`` + int ``0`` + bool ``False`` + datetime64 ``pd.NaT`` + timedelta64 ``pd.NaT`` + ========== ========== + + The default value may be overridden by specifying a `fill_value`. + """ + # We include `_is_na_fill_value` in the metadata to avoid hash collisions + # between SparseDtype(float, 0.0) and SparseDtype(float, nan). + # Without is_na_fill_value in the comparison, those would be equal since + # hash(nan) is (sometimes?) 0. + _metadata = ('_dtype', '_fill_value', '_is_na_fill_value') + + def __init__(self, dtype=np.float64, fill_value=None): + # type: (Union[str, np.dtype, 'ExtensionDtype', type], Any) -> None + from pandas.core.dtypes.missing import na_value_for_dtype + from pandas.core.dtypes.common import ( + pandas_dtype, is_string_dtype, is_scalar + ) + + if isinstance(dtype, type(self)): + if fill_value is None: + fill_value = dtype.fill_value + dtype = dtype.subtype + + dtype = pandas_dtype(dtype) + if is_string_dtype(dtype): + dtype = np.dtype('object') + + if fill_value is None: + fill_value = na_value_for_dtype(dtype) + + if not is_scalar(fill_value): + raise ValueError("fill_value must be a scalar. Got {} " + "instead".format(fill_value)) + self._dtype = dtype + self._fill_value = fill_value + + def __hash__(self): + # Python3 doesn't inherit __hash__ when a base class overrides + # __eq__, so we explicitly do it here. + return super(SparseDtype, self).__hash__() + + def __eq__(self, other): + # We have to override __eq__ to handle NA values in _metadata. + # The base class does simple == checks, which fail for NA. + if isinstance(other, compat.string_types): + try: + other = self.construct_from_string(other) + except TypeError: + return False + + if isinstance(other, type(self)): + subtype = self.subtype == other.subtype + if self._is_na_fill_value: + # this case is complicated by two things: + # SparseDtype(float, float(nan)) == SparseDtype(float, np.nan) + # SparseDtype(float, np.nan) != SparseDtype(float, pd.NaT) + # i.e. we want to treat any floating-point NaN as equal, but + # not a floating-point NaN and a datetime NaT. + fill_value = ( + other._is_na_fill_value and + isinstance(self.fill_value, type(other.fill_value)) or + isinstance(other.fill_value, type(self.fill_value)) + ) + else: + fill_value = self.fill_value == other.fill_value + + return subtype and fill_value + return False + + @property + def fill_value(self): + """ + The fill value of the array. + + Converting the SparseArray to a dense ndarray will fill the + array with this value. + + .. warning:: + + It's possible to end up with a SparseArray that has ``fill_value`` + values in ``sp_values``. This can occur, for example, when setting + ``SparseArray.fill_value`` directly. + """ + return self._fill_value + + @property + def _is_na_fill_value(self): + from pandas.core.dtypes.missing import isna + return isna(self.fill_value) + + @property + def _is_numeric(self): + from pandas.core.dtypes.common import is_object_dtype + return not is_object_dtype(self.subtype) + + @property + def _is_boolean(self): + from pandas.core.dtypes.common import is_bool_dtype + return is_bool_dtype(self.subtype) + + @property + def kind(self): + return self.subtype.kind + + @property + def type(self): + return self.subtype.type + + @property + def subtype(self): + return self._dtype + + @property + def name(self): + return 'Sparse[{}, {}]'.format(self.subtype.name, self.fill_value) + + def __repr__(self): + return self.name + + @classmethod + def construct_array_type(cls): + return SparseArray + + @classmethod + def construct_from_string(cls, string): + """ + Construct a SparseDtype from a string form. + + Parameters + ---------- + string : str + Can take the following forms. + + string dtype + ================ ============================ + 'int' SparseDtype[np.int64, 0] + 'Sparse' SparseDtype[np.float64, nan] + 'Sparse[int]' SparseDtype[np.int64, 0] + 'Sparse[int, 0]' SparseDtype[np.int64, 0] + ================ ============================ + + It is not possible to specify non-default fill values + with a string. An argument like ``'Sparse[int, 1]'`` + will raise a ``TypeError`` because the default fill value + for integers is 0. + + Returns + ------- + SparseDtype + """ + msg = "Could not construct SparseDtype from '{}'".format(string) + if string.startswith("Sparse"): + try: + sub_type, has_fill_value = cls._parse_subtype(string) + result = SparseDtype(sub_type) + except Exception: + raise TypeError(msg) + else: + msg = ("Could not construct SparseDtype from '{}'.\n\nIt " + "looks like the fill_value in the string is not " + "the default for the dtype. Non-default fill_values " + "are not supported. Use the 'SparseDtype()' " + "constructor instead.") + if has_fill_value and str(result) != string: + raise TypeError(msg.format(string)) + return result + else: + raise TypeError(msg) + + @staticmethod + def _parse_subtype(dtype): + """ + Parse a string to get the subtype + + Parameters + ---------- + dtype : str + A string like + + * Sparse[subtype] + * Sparse[subtype, fill_value] + + Returns + ------- + subtype : str + + Raises + ------ + ValueError + When the subtype cannot be extracted. + """ + xpr = re.compile( + r"Sparse\[(?P[^,]*)(, )?(?P.*?)?\]$" + ) + m = xpr.match(dtype) + has_fill_value = False + if m: + subtype = m.groupdict()['subtype'] + has_fill_value = m.groupdict()['fill_value'] or has_fill_value + elif dtype == "Sparse": + subtype = 'float64' + else: + raise ValueError("Cannot parse {}".format(dtype)) + return subtype, has_fill_value + + @classmethod + def is_dtype(cls, dtype): + dtype = getattr(dtype, 'dtype', dtype) + if (isinstance(dtype, compat.string_types) and + dtype.startswith("Sparse")): + sub_type, _ = cls._parse_subtype(dtype) + dtype = np.dtype(sub_type) + elif isinstance(dtype, cls): + return True + return isinstance(dtype, np.dtype) or dtype == 'Sparse' + +# ---------------------------------------------------------------------------- +# Array _sparray_doc_kwargs = dict(klass='SparseArray') diff --git a/pandas/core/arrays/sparse/api.py b/pandas/core/arrays/sparse/api.py deleted file mode 100644 index 2176213813e2e..0000000000000 --- a/pandas/core/arrays/sparse/api.py +++ /dev/null @@ -1,6 +0,0 @@ -# pylint: disable=W0611 -# flake8: noqa -from pandas.core.arrays.sparse.array import SparseArray -from pandas.core.arrays.sparse.series import SparseSeries -from pandas.core.arrays.sparse.frame import SparseDataFrame -from pandas.core.arrays.sparse.dtype import SparseDtype diff --git a/pandas/core/arrays/sparse/dtype.py b/pandas/core/arrays/sparse/dtype.py deleted file mode 100644 index 7f99bf8b58847..0000000000000 --- a/pandas/core/arrays/sparse/dtype.py +++ /dev/null @@ -1,249 +0,0 @@ -import re - -import numpy as np - -from pandas.core.dtypes.base import ExtensionDtype -from pandas.core.dtypes.dtypes import register_extension_dtype -from pandas import compat - - -@register_extension_dtype -class SparseDtype(ExtensionDtype): - """ - Dtype for data stored in :class:`SparseArray`. - - This dtype implements the pandas ExtensionDtype interface. - - .. versionadded:: 0.24.0 - - Parameters - ---------- - dtype : str, ExtensionDtype, numpy.dtype, type, default numpy.float64 - The dtype of the underlying array storing the non-fill value values. - fill_value : scalar, optional. - The scalar value not stored in the SparseArray. By default, this - depends on `dtype`. - - ========== ========== - dtype na_value - ========== ========== - float ``np.nan`` - int ``0`` - bool ``False`` - datetime64 ``pd.NaT`` - timedelta64 ``pd.NaT`` - ========== ========== - - The default value may be overridden by specifying a `fill_value`. - """ - # We include `_is_na_fill_value` in the metadata to avoid hash collisions - # between SparseDtype(float, 0.0) and SparseDtype(float, nan). - # Without is_na_fill_value in the comparison, those would be equal since - # hash(nan) is (sometimes?) 0. - _metadata = ('_dtype', '_fill_value', '_is_na_fill_value') - - def __init__(self, dtype=np.float64, fill_value=None): - # type: (Union[str, np.dtype, 'ExtensionDtype', type], Any) -> None - from pandas.core.dtypes.missing import na_value_for_dtype - from pandas.core.dtypes.common import ( - pandas_dtype, is_string_dtype, is_scalar - ) - - if isinstance(dtype, type(self)): - if fill_value is None: - fill_value = dtype.fill_value - dtype = dtype.subtype - - dtype = pandas_dtype(dtype) - if is_string_dtype(dtype): - dtype = np.dtype('object') - - if fill_value is None: - fill_value = na_value_for_dtype(dtype) - - if not is_scalar(fill_value): - raise ValueError("fill_value must be a scalar. Got {} " - "instead".format(fill_value)) - self._dtype = dtype - self._fill_value = fill_value - - def __hash__(self): - # Python3 doesn't inherit __hash__ when a base class overrides - # __eq__, so we explicitly do it here. - return super(SparseDtype, self).__hash__() - - def __eq__(self, other): - # We have to override __eq__ to handle NA values in _metadata. - # The base class does simple == checks, which fail for NA. - if isinstance(other, compat.string_types): - try: - other = self.construct_from_string(other) - except TypeError: - return False - - if isinstance(other, type(self)): - subtype = self.subtype == other.subtype - if self._is_na_fill_value: - # this case is complicated by two things: - # SparseDtype(float, float(nan)) == SparseDtype(float, np.nan) - # SparseDtype(float, np.nan) != SparseDtype(float, pd.NaT) - # i.e. we want to treat any floating-point NaN as equal, but - # not a floating-point NaN and a datetime NaT. - fill_value = ( - other._is_na_fill_value and - isinstance(self.fill_value, type(other.fill_value)) or - isinstance(other.fill_value, type(self.fill_value)) - ) - else: - fill_value = self.fill_value == other.fill_value - - return subtype and fill_value - return False - - @property - def fill_value(self): - """ - The fill value of the array. - - Converting the SparseArray to a dense ndarray will fill the - array with this value. - - .. warning:: - - It's possible to end up with a SparseArray that has ``fill_value`` - values in ``sp_values``. This can occur, for example, when setting - ``SparseArray.fill_value`` directly. - """ - return self._fill_value - - @property - def _is_na_fill_value(self): - from pandas.core.dtypes.missing import isna - return isna(self.fill_value) - - @property - def _is_numeric(self): - from pandas.core.dtypes.common import is_object_dtype - return not is_object_dtype(self.subtype) - - @property - def _is_boolean(self): - from pandas.core.dtypes.common import is_bool_dtype - return is_bool_dtype(self.subtype) - - @property - def kind(self): - return self.subtype.kind - - @property - def type(self): - return self.subtype.type - - @property - def subtype(self): - return self._dtype - - @property - def name(self): - return 'Sparse[{}, {}]'.format(self.subtype.name, self.fill_value) - - def __repr__(self): - return self.name - - @classmethod - def construct_array_type(cls): - from .array import SparseArray - return SparseArray - - @classmethod - def construct_from_string(cls, string): - """ - Construct a SparseDtype from a string form. - - Parameters - ---------- - string : str - Can take the following forms. - - string dtype - ================ ============================ - 'int' SparseDtype[np.int64, 0] - 'Sparse' SparseDtype[np.float64, nan] - 'Sparse[int]' SparseDtype[np.int64, 0] - 'Sparse[int, 0]' SparseDtype[np.int64, 0] - ================ ============================ - - It is not possible to specify non-default fill values - with a string. An argument like ``'Sparse[int, 1]'`` - will raise a ``TypeError`` because the default fill value - for integers is 0. - - Returns - ------- - SparseDtype - """ - msg = "Could not construct SparseDtype from '{}'".format(string) - if string.startswith("Sparse"): - try: - sub_type, has_fill_value = cls._parse_subtype(string) - result = SparseDtype(sub_type) - except Exception: - raise TypeError(msg) - else: - msg = ("Could not construct SparseDtype from '{}'.\n\nIt " - "looks like the fill_value in the string is not " - "the default for the dtype. Non-default fill_values " - "are not supported. Use the 'SparseDtype()' " - "constructor instead.") - if has_fill_value and str(result) != string: - raise TypeError(msg.format(string)) - return result - else: - raise TypeError(msg) - - @staticmethod - def _parse_subtype(dtype): - """ - Parse a string to get the subtype - - Parameters - ---------- - dtype : str - A string like - - * Sparse[subtype] - * Sparse[subtype, fill_value] - - Returns - ------- - subtype : str - - Raises - ------ - ValueError - When the subtype cannot be extracted. - """ - xpr = re.compile( - r"Sparse\[(?P[^,]*)(, )?(?P.*?)?\]$" - ) - m = xpr.match(dtype) - has_fill_value = False - if m: - subtype = m.groupdict()['subtype'] - has_fill_value = m.groupdict()['fill_value'] or has_fill_value - elif dtype == "Sparse": - subtype = 'float64' - else: - raise ValueError("Cannot parse {}".format(dtype)) - return subtype, has_fill_value - - @classmethod - def is_dtype(cls, dtype): - dtype = getattr(dtype, 'dtype', dtype) - if (isinstance(dtype, compat.string_types) and - dtype.startswith("Sparse")): - sub_type, _ = cls._parse_subtype(dtype) - dtype = np.dtype(sub_type) - elif isinstance(dtype, cls): - return True - return isinstance(dtype, np.dtype) or dtype == 'Sparse' diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 85bd54f1e45fa..22da546355df6 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -178,7 +178,7 @@ def is_sparse(arr): >>> is_sparse(bsr_matrix([1, 2, 3])) False """ - from pandas.core.arrays.sparse.dtype import SparseDtype + from pandas.core.arrays.sparse import SparseDtype dtype = getattr(arr, 'dtype', arr) return isinstance(dtype, SparseDtype) @@ -1926,7 +1926,7 @@ def _get_dtype_type(arr_or_dtype): return Interval return _get_dtype_type(np.dtype(arr_or_dtype)) else: - from pandas.core.arrays.sparse.dtype import SparseDtype + from pandas.core.arrays.sparse import SparseDtype if isinstance(arr_or_dtype, (ABCSparseSeries, ABCSparseArray, SparseDtype)): diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 118ffc9a4b31f..91fbaf736aae8 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -76,7 +76,7 @@ def _get_series_result_type(result, objs=None): if isinstance(result, dict): # concat Series with axis 1 if all(is_sparse(c) for c in compat.itervalues(result)): - from pandas.core.arrays.sparse.api import SparseDataFrame + from pandas.core.sparse.api import SparseDataFrame return SparseDataFrame else: from pandas.core.frame import DataFrame @@ -84,7 +84,7 @@ def _get_series_result_type(result, objs=None): # otherwise it is a SingleBlockManager (axis = 0) if result._block.is_sparse: - from pandas.core.arrays.sparse.api import SparseSeries + from pandas.core.sparse.api import SparseSeries return SparseSeries else: return objs[0]._constructor @@ -100,7 +100,7 @@ def _get_frame_result_type(result, objs): if (result.blocks and ( all(is_sparse(b) for b in result.blocks) or all(isinstance(obj, ABCSparseDataFrame) for obj in objs))): - from pandas.core.arrays.sparse.api import SparseDataFrame + from pandas.core.sparse.api import SparseDataFrame return SparseDataFrame else: return next(obj for obj in objs if not isinstance(obj, @@ -123,7 +123,7 @@ def _get_sliced_frame_result_type(data, obj): Series or SparseSeries """ if is_sparse(data): - from pandas.core.arrays.sparse.api import SparseSeries + from pandas.core.sparse.api import SparseSeries return SparseSeries return obj._constructor_sliced @@ -556,7 +556,7 @@ def _concat_sparse(to_concat, axis=0, typs=None): a single array, preserving the combined dtypes """ - from pandas.core.arrays.sparse.array import SparseArray + from pandas.core.arrays import SparseArray fill_values = [x.fill_value for x in to_concat if isinstance(x, SparseArray)] diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7fbd8cc8d80f2..064a1b72eb4c8 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1761,9 +1761,9 @@ def to_sparse(self, fill_value=None, kind='block'): 1 1.0 NaN 2 NaN 1.0 >>> type(sdf) - + """ - from pandas.core.arrays.sparse.frame import SparseDataFrame + from pandas.core.sparse.api import SparseDataFrame return SparseDataFrame(self._series, index=self.index, columns=self.columns, default_kind=kind, default_fill_value=fill_value) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 60d4e7bc080bc..dd0bb1ab8bacb 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -29,7 +29,7 @@ from pandas.core.base import PandasObject import pandas.core.algorithms as algos -from pandas.core.arrays.sparse.array import _maybe_to_sparse +from pandas.core.arrays.sparse import _maybe_to_sparse from pandas.core.index import Index, MultiIndex, ensure_index from pandas.core.indexing import maybe_convert_indices diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 93690252a9993..8d1ed6486a456 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -2066,7 +2066,7 @@ def _cast_sparse_series_op(left, right, opname): left : SparseArray right : SparseArray """ - from pandas.core.arrays.sparse.api import SparseDtype + from pandas.core.sparse.api import SparseDtype opname = opname.strip('_') @@ -2116,7 +2116,7 @@ def _sparse_series_op(left, right, op, name): new_index = left.index new_name = get_op_result_name(left, right) - from pandas.core.arrays.sparse.array import _sparse_array_op + from pandas.core.arrays.sparse import _sparse_array_op lvalues, rvalues = _cast_sparse_series_op(left.values, right.values, name) result = _sparse_array_op(lvalues, rvalues, op, name) return left._constructor(result, index=new_index, name=new_name) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 09804e9783e4d..03b77f0e787f0 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -18,8 +18,8 @@ from pandas.core.series import Series from pandas.core.frame import DataFrame -from pandas.core.arrays.sparse.api import SparseDataFrame, SparseSeries -from pandas.core.arrays.sparse.array import SparseArray +from pandas.core.sparse.api import SparseDataFrame, SparseSeries +from pandas.core.arrays import SparseArray from pandas._libs.sparse import IntIndex from pandas.core.arrays import Categorical diff --git a/pandas/core/series.py b/pandas/core/series.py index 1799e83d8edf7..b4566ebd36d13 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -344,7 +344,7 @@ def from_array(cls, arr, index=None, name=None, dtype=None, copy=False, "future version. Please use the pd.Series(..) " "constructor instead.", FutureWarning, stacklevel=2) if isinstance(arr, ABCSparseArray): - from pandas.core.arrays.sparse.series import SparseSeries + from pandas.core.sparse.series import SparseSeries cls = SparseSeries return cls(arr, index=index, name=name, dtype=dtype, copy=copy, fastpath=fastpath) @@ -1383,8 +1383,8 @@ def to_sparse(self, kind='block', fill_value=None): sp : SparseSeries """ # TODO: deprecate - from pandas.core.arrays.sparse.series import SparseSeries - from pandas.core.arrays.sparse.array import SparseArray + from pandas.core.sparse.series import SparseSeries + from pandas.core.arrays import SparseArray values = SparseArray(self, kind=kind, fill_value=fill_value) return SparseSeries( diff --git a/pandas/core/arrays/sparse/__init__.py b/pandas/core/sparse/__init__.py similarity index 100% rename from pandas/core/arrays/sparse/__init__.py rename to pandas/core/sparse/__init__.py diff --git a/pandas/core/sparse/api.py b/pandas/core/sparse/api.py new file mode 100644 index 0000000000000..e3be241bcdd70 --- /dev/null +++ b/pandas/core/sparse/api.py @@ -0,0 +1,5 @@ +# pylint: disable=W0611 +# flake8: noqa +from pandas.core.arrays.sparse import SparseArray, SparseDtype +from pandas.core.sparse.series import SparseSeries +from pandas.core.sparse.frame import SparseDataFrame diff --git a/pandas/core/arrays/sparse/frame.py b/pandas/core/sparse/frame.py similarity index 99% rename from pandas/core/arrays/sparse/frame.py rename to pandas/core/sparse/frame.py index a4f05542df5b6..2ed275e3bbd2d 100644 --- a/pandas/core/arrays/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -22,8 +22,8 @@ from pandas.core.internals import (BlockManager, create_block_manager_from_arrays) import pandas.core.generic as generic -from pandas.core.arrays.sparse.series import SparseSeries, SparseArray -from pandas.core.arrays.sparse.dtype import SparseDtype +from pandas.core.arrays.sparse import SparseArray, SparseDtype +from pandas.core.sparse.series import SparseSeries from pandas._libs.sparse import BlockIndex, get_blocks from pandas.util._decorators import Appender import pandas.core.ops as ops diff --git a/pandas/core/arrays/sparse/scipy_sparse.py b/pandas/core/sparse/scipy_sparse.py similarity index 100% rename from pandas/core/arrays/sparse/scipy_sparse.py rename to pandas/core/sparse/scipy_sparse.py diff --git a/pandas/core/arrays/sparse/series.py b/pandas/core/sparse/series.py similarity index 99% rename from pandas/core/arrays/sparse/series.py rename to pandas/core/sparse/series.py index 290f8c6de70ad..35ddd623878d0 100644 --- a/pandas/core/arrays/sparse/series.py +++ b/pandas/core/sparse/series.py @@ -24,13 +24,13 @@ import pandas._libs.index as libindex from pandas.util._decorators import Appender, Substitution -from pandas.core.arrays.sparse.array import ( +from pandas.core.arrays import ( SparseArray, ) from pandas._libs.sparse import BlockIndex, IntIndex import pandas._libs.sparse as splib -from pandas.core.arrays.sparse.scipy_sparse import ( +from pandas.core.sparse.scipy_sparse import ( _sparse_series_to_coo, _coo_to_sparse_series) @@ -205,7 +205,7 @@ def _constructor(self): @property def _constructor_expanddim(self): - from pandas.core.arrays.sparse.api import SparseDataFrame + from pandas.core.sparse.api import SparseDataFrame return SparseDataFrame @property diff --git a/pandas/io/packers.py b/pandas/io/packers.py index 4ab8c7d3b74a4..135f9e89eaaef 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -69,8 +69,8 @@ from pandas.core.arrays import IntervalArray from pandas.core.generic import NDFrame from pandas.core.internals import BlockManager, make_block, _safe_reshape -from pandas.core.arrays.sparse.api import SparseSeries, SparseDataFrame -from pandas.core.arrays.sparse.array import BlockIndex, IntIndex +from pandas.core.sparse.api import SparseSeries, SparseDataFrame +from pandas.core.arrays.sparse import BlockIndex, IntIndex from pandas.io.common import get_filepath_or_buffer, _stringify_path from pandas.io.msgpack import Unpacker as _Unpacker, Packer as _Packer, ExtType diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 55d6fe2399380..9cceff30c9e0e 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -50,7 +50,7 @@ from pandas.core.internals import (BlockManager, make_block, _block2d_to_blocknd, _factor_indexer, _block_shape) -from pandas.core.arrays.sparse.array import BlockIndex, IntIndex +from pandas.core.arrays.sparse import BlockIndex, IntIndex from pandas.io.common import _stringify_path from pandas.io.formats.printing import adjoin, pprint_thing diff --git a/pandas/tests/arrays/sparse/test_arithmetics.py b/pandas/tests/arrays/sparse/test_arithmetics.py index fdbac8b79ab16..388411f909bac 100644 --- a/pandas/tests/arrays/sparse/test_arithmetics.py +++ b/pandas/tests/arrays/sparse/test_arithmetics.py @@ -4,7 +4,7 @@ import pytest import pandas as pd import pandas.util.testing as tm -from pandas.core.arrays.sparse.api import SparseDtype +from pandas.core.sparse.api import SparseDtype class TestSparseArrayArithmetics(object): diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py index 7f79ade235a8b..2d25d033ac8c1 100644 --- a/pandas/tests/arrays/sparse/test_array.py +++ b/pandas/tests/arrays/sparse/test_array.py @@ -9,9 +9,9 @@ import numpy as np import pandas as pd -from pandas.core.arrays.sparse.api import (SparseArray, - SparseSeries, - SparseDtype) +from pandas.core.sparse.api import (SparseArray, + SparseSeries, + SparseDtype) from pandas._libs.sparse import IntIndex from pandas.util.testing import assert_almost_equal import pandas.util.testing as tm diff --git a/pandas/tests/arrays/sparse/test_dtype.py b/pandas/tests/arrays/sparse/test_dtype.py index c3a1a28f93ba7..0dcfc3ae79b0f 100644 --- a/pandas/tests/arrays/sparse/test_dtype.py +++ b/pandas/tests/arrays/sparse/test_dtype.py @@ -3,7 +3,7 @@ import pandas as pd import pandas.util.testing as tm -from pandas.core.arrays.sparse.api import SparseDtype +from pandas.core.sparse.api import SparseDtype @pytest.mark.parametrize("dtype, fill_value", [ diff --git a/pandas/tests/arrays/sparse/test_libsparse.py b/pandas/tests/arrays/sparse/test_libsparse.py index adb05196295bd..3d867cdda1d42 100644 --- a/pandas/tests/arrays/sparse/test_libsparse.py +++ b/pandas/tests/arrays/sparse/test_libsparse.py @@ -6,7 +6,7 @@ import pandas.util.testing as tm import pandas.util._test_decorators as td -from pandas.core.arrays.sparse.array import IntIndex, BlockIndex, _make_index +from pandas.core.arrays.sparse import IntIndex, BlockIndex, _make_index import pandas._libs.sparse as splib TEST_LENGTH = 20 diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index c454887242904..b5353e34a2311 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -6,7 +6,7 @@ from pandas.core.dtypes.dtypes import (DatetimeTZDtype, PeriodDtype, CategoricalDtype, IntervalDtype) -from pandas.core.arrays.sparse.api import SparseDtype +from pandas.core.sparse.api import SparseDtype import pandas.core.dtypes.common as com import pandas.util.testing as tm diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index 22544d3268141..c53c2e5059cde 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -20,7 +20,7 @@ _coerce_to_dtype, is_bool_dtype, ) -from pandas.core.arrays.sparse.api import SparseDtype +from pandas.core.sparse.api import SparseDtype import pandas.util.testing as tm diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index 3cacd46e797eb..ca0435141c2e2 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -2,8 +2,7 @@ import pandas as pd import numpy as np -from pandas.core.arrays.sparse.dtype import SparseDtype -from pandas import SparseArray +from pandas import SparseArray, SparseDtype from pandas.errors import PerformanceWarning from pandas.tests.extension import base import pandas.util.testing as tm diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index ec0c25f53afcd..fe2d14458c197 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -2057,7 +2057,7 @@ def test_loc_duplicates(self): tm.assert_frame_equal(df, expected) def test_iloc_sparse_propegate_fill_value(self): - from pandas.core.arrays.sparse.api import SparseDataFrame + from pandas.core.sparse.api import SparseDataFrame df = SparseDataFrame({'A': [999, 1]}, default_fill_value=999) assert len(df['A'].sp_values) == len(df.iloc[:, 0].sp_values) diff --git a/pandas/tests/reshape/test_reshape.py b/pandas/tests/reshape/test_reshape.py index 40c54dc59c7e3..d8b3d9588f2f1 100644 --- a/pandas/tests/reshape/test_reshape.py +++ b/pandas/tests/reshape/test_reshape.py @@ -5,7 +5,7 @@ from collections import OrderedDict from pandas import DataFrame, Series -from pandas.core.arrays.sparse.api import SparseDtype, SparseArray +from pandas.core.sparse.api import SparseDtype, SparseArray import pandas as pd from numpy import nan diff --git a/pandas/tests/series/test_subclass.py b/pandas/tests/series/test_subclass.py index de13566902950..70e44a9d2d40f 100644 --- a/pandas/tests/series/test_subclass.py +++ b/pandas/tests/series/test_subclass.py @@ -2,7 +2,7 @@ # pylint: disable-msg=E1101,W0612 import numpy as np import pandas as pd -from pandas.core.arrays.sparse.dtype import SparseDtype +from pandas import SparseDtype import pandas.util.testing as tm diff --git a/pandas/tests/arrays/sparse/frame/__init__.py b/pandas/tests/sparse/__init__.py similarity index 100% rename from pandas/tests/arrays/sparse/frame/__init__.py rename to pandas/tests/sparse/__init__.py diff --git a/pandas/tests/arrays/sparse/common.py b/pandas/tests/sparse/common.py similarity index 100% rename from pandas/tests/arrays/sparse/common.py rename to pandas/tests/sparse/common.py diff --git a/pandas/tests/arrays/sparse/series/__init__.py b/pandas/tests/sparse/frame/__init__.py similarity index 100% rename from pandas/tests/arrays/sparse/series/__init__.py rename to pandas/tests/sparse/frame/__init__.py diff --git a/pandas/tests/arrays/sparse/frame/conftest.py b/pandas/tests/sparse/frame/conftest.py similarity index 100% rename from pandas/tests/arrays/sparse/frame/conftest.py rename to pandas/tests/sparse/frame/conftest.py diff --git a/pandas/tests/arrays/sparse/frame/test_analytics.py b/pandas/tests/sparse/frame/test_analytics.py similarity index 100% rename from pandas/tests/arrays/sparse/frame/test_analytics.py rename to pandas/tests/sparse/frame/test_analytics.py diff --git a/pandas/tests/arrays/sparse/frame/test_apply.py b/pandas/tests/sparse/frame/test_apply.py similarity index 97% rename from pandas/tests/arrays/sparse/frame/test_apply.py rename to pandas/tests/sparse/frame/test_apply.py index 47a164e3c0945..2d7a537f0fb3b 100644 --- a/pandas/tests/arrays/sparse/frame/test_apply.py +++ b/pandas/tests/sparse/frame/test_apply.py @@ -1,7 +1,7 @@ import pytest import numpy as np from pandas import SparseDataFrame, DataFrame, Series, bdate_range -from pandas.core.arrays.sparse.api import SparseDtype +from pandas.core.sparse.api import SparseDtype from pandas.core import nanops from pandas.util import testing as tm diff --git a/pandas/tests/arrays/sparse/frame/test_frame.py b/pandas/tests/sparse/frame/test_frame.py similarity index 99% rename from pandas/tests/arrays/sparse/frame/test_frame.py rename to pandas/tests/sparse/frame/test_frame.py index d967904ddf9b5..03143488c3874 100644 --- a/pandas/tests/arrays/sparse/frame/test_frame.py +++ b/pandas/tests/sparse/frame/test_frame.py @@ -14,10 +14,10 @@ from pandas.util import testing as tm from pandas.compat import lrange from pandas import compat -from pandas.core.arrays.sparse import frame as spf +from pandas.core.sparse import frame as spf from pandas._libs.sparse import BlockIndex, IntIndex -from pandas.core.arrays.sparse.api import ( +from pandas.core.sparse.api import ( SparseSeries, SparseDataFrame, SparseArray, SparseDtype ) from pandas.tests.frame.test_api import SharedWithSparse diff --git a/pandas/tests/arrays/sparse/frame/test_indexing.py b/pandas/tests/sparse/frame/test_indexing.py similarity index 100% rename from pandas/tests/arrays/sparse/frame/test_indexing.py rename to pandas/tests/sparse/frame/test_indexing.py diff --git a/pandas/tests/arrays/sparse/frame/test_to_csv.py b/pandas/tests/sparse/frame/test_to_csv.py similarity index 100% rename from pandas/tests/arrays/sparse/frame/test_to_csv.py rename to pandas/tests/sparse/frame/test_to_csv.py diff --git a/pandas/tests/arrays/sparse/frame/test_to_from_scipy.py b/pandas/tests/sparse/frame/test_to_from_scipy.py similarity index 99% rename from pandas/tests/arrays/sparse/frame/test_to_from_scipy.py rename to pandas/tests/sparse/frame/test_to_from_scipy.py index 7ca520338baa6..1a10ff83d3097 100644 --- a/pandas/tests/arrays/sparse/frame/test_to_from_scipy.py +++ b/pandas/tests/sparse/frame/test_to_from_scipy.py @@ -2,7 +2,7 @@ import numpy as np from pandas.util import testing as tm from pandas import SparseDataFrame, SparseSeries -from pandas.core.arrays.sparse.api import SparseDtype +from pandas.core.sparse.api import SparseDtype from distutils.version import LooseVersion from pandas.core.dtypes.common import ( is_bool_dtype, diff --git a/pandas/tests/sparse/series/__init__.py b/pandas/tests/sparse/series/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/arrays/sparse/series/test_indexing.py b/pandas/tests/sparse/series/test_indexing.py similarity index 100% rename from pandas/tests/arrays/sparse/series/test_indexing.py rename to pandas/tests/sparse/series/test_indexing.py diff --git a/pandas/tests/arrays/sparse/series/test_series.py b/pandas/tests/sparse/series/test_series.py similarity index 99% rename from pandas/tests/arrays/sparse/series/test_series.py rename to pandas/tests/sparse/series/test_series.py index 2d07288bf304f..7a8b5b5ad407b 100644 --- a/pandas/tests/arrays/sparse/series/test_series.py +++ b/pandas/tests/sparse/series/test_series.py @@ -18,11 +18,10 @@ from pandas.compat import range, PY36 from pandas.core.reshape.util import cartesian_product -from pandas.core.arrays.sparse.api import SparseDtype -import pandas.core.arrays.sparse.frame as spf +import pandas.core.sparse.frame as spf from pandas._libs.sparse import BlockIndex, IntIndex -from pandas.core.arrays.sparse.api import SparseSeries +from pandas import SparseSeries, SparseDtype from pandas.tests.series.test_api import SharedWithSparse diff --git a/pandas/tests/arrays/sparse/test_combine_concat.py b/pandas/tests/sparse/test_combine_concat.py similarity index 99% rename from pandas/tests/arrays/sparse/test_combine_concat.py rename to pandas/tests/sparse/test_combine_concat.py index 66a7a89732b60..92483f1e7511e 100644 --- a/pandas/tests/arrays/sparse/test_combine_concat.py +++ b/pandas/tests/sparse/test_combine_concat.py @@ -282,7 +282,7 @@ def test_concat_different_columns(self): tm.assert_sp_frame_equal(res, exp, check_kind=False) def test_concat_bug(self): - from pandas.core.arrays.sparse.api import SparseDtype + from pandas.core.sparse.api import SparseDtype x = pd.SparseDataFrame({"A": pd.SparseArray([np.nan, np.nan], fill_value=0)}) y = pd.SparseDataFrame({"B": []}) diff --git a/pandas/tests/arrays/sparse/test_format.py b/pandas/tests/sparse/test_format.py similarity index 100% rename from pandas/tests/arrays/sparse/test_format.py rename to pandas/tests/sparse/test_format.py diff --git a/pandas/tests/arrays/sparse/test_groupby.py b/pandas/tests/sparse/test_groupby.py similarity index 100% rename from pandas/tests/arrays/sparse/test_groupby.py rename to pandas/tests/sparse/test_groupby.py diff --git a/pandas/tests/arrays/sparse/test_indexing.py b/pandas/tests/sparse/test_indexing.py similarity index 99% rename from pandas/tests/arrays/sparse/test_indexing.py rename to pandas/tests/sparse/test_indexing.py index cc910e20cb385..7c7e450c966bf 100644 --- a/pandas/tests/arrays/sparse/test_indexing.py +++ b/pandas/tests/sparse/test_indexing.py @@ -4,7 +4,7 @@ import numpy as np import pandas as pd import pandas.util.testing as tm -from pandas.core.arrays.sparse.api import SparseDtype +from pandas.core.sparse.api import SparseDtype class TestSparseSeriesIndexing(object): diff --git a/pandas/tests/arrays/sparse/test_pivot.py b/pandas/tests/sparse/test_pivot.py similarity index 100% rename from pandas/tests/arrays/sparse/test_pivot.py rename to pandas/tests/sparse/test_pivot.py diff --git a/pandas/tests/arrays/sparse/test_reshape.py b/pandas/tests/sparse/test_reshape.py similarity index 100% rename from pandas/tests/arrays/sparse/test_reshape.py rename to pandas/tests/sparse/test_reshape.py From fe53b50a8b29a6d61a64fbe19285fe85cc3e6e8e Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 15 Oct 2018 14:15:18 -0500 Subject: [PATCH 11/12] Revert extraneous changes --- pandas/api/extensions/__init__.py | 2 +- pandas/compat/numpy/function.py | 3 +-- pandas/compat/pickle_compat.py | 2 +- pandas/core/arrays/datetimes.py | 4 ++-- pandas/core/arrays/integer.py | 3 +-- pandas/core/arrays/interval.py | 2 +- pandas/core/arrays/period.py | 2 +- pandas/core/groupby/generic.py | 2 +- pandas/tests/arrays/sparse/test_array.py | 4 +--- pandas/tests/arrays/test_datetimelike.py | 6 +++--- pandas/tests/indexing/test_indexing.py | 9 ++++----- pandas/tests/test_base.py | 4 ++-- 12 files changed, 19 insertions(+), 24 deletions(-) diff --git a/pandas/api/extensions/__init__.py b/pandas/api/extensions/__init__.py index 18c7c9c2979e3..51555c57b2288 100644 --- a/pandas/api/extensions/__init__.py +++ b/pandas/api/extensions/__init__.py @@ -4,7 +4,7 @@ register_series_accessor) from pandas.core.algorithms import take # noqa from pandas.core.arrays import (ExtensionArray, # noqa - ExtensionScalarOpsMixin) + ExtensionScalarOpsMixin) from pandas.core.dtypes.dtypes import ( # noqa ExtensionDtype, register_extension_dtype ) diff --git a/pandas/compat/numpy/function.py b/pandas/compat/numpy/function.py index b54f7e31fe87b..d42be56963569 100644 --- a/pandas/compat/numpy/function.py +++ b/pandas/compat/numpy/function.py @@ -19,8 +19,7 @@ """ from numpy import ndarray -from pandas.util._validators import (validate_args, - validate_kwargs, +from pandas.util._validators import (validate_args, validate_kwargs, validate_args_and_kwargs) from pandas.errors import UnsupportedFunctionCall from pandas.core.dtypes.common import is_integer, is_bool diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index 61457bcef9dea..59c162251c58f 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -114,7 +114,7 @@ def load_reduce(self): # 19269, arrays moving ('pandas.core.categorical', 'Categorical'): - ('pandas.core.arrays.categorical', 'Categorical'), + ('pandas.core.arrays', 'Categorical'), # 19939, add timedeltaindex, float64index compat from 15998 move ('pandas.tseries.tdi', 'TimedeltaIndex'): diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 228e12d08c1a3..4c75927135b22 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -523,7 +523,7 @@ def _add_delta(self, delta): The result's name is set outside of _add_delta by the calling method (__add__ or __sub__) """ - from pandas.core.arrays import TimedeltaArrayMixin + from pandas.core.arrays.timedeltas import TimedeltaArrayMixin if isinstance(delta, (Tick, timedelta, np.timedelta64)): new_values = self._add_delta_td(delta) @@ -818,7 +818,7 @@ def to_period(self, freq=None): pandas.PeriodIndex: Immutable ndarray holding ordinal values pandas.DatetimeIndex.to_pydatetime: Return DatetimeIndex as object """ - from pandas.core.arrays import PeriodArrayMixin + from pandas.core.arrays.period import PeriodArrayMixin if self.tz is not None: warnings.warn("Converting to PeriodArray/Index representation " diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index d3aacfc334579..9917045f2f7d2 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -18,8 +18,7 @@ is_integer_dtype, is_object_dtype, is_list_like) -from pandas.core.arrays import (ExtensionArray, - ExtensionOpsMixin) +from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin from pandas.core.dtypes.base import ExtensionDtype from pandas.core.dtypes.dtypes import register_extension_dtype from pandas.core.dtypes.missing import isna, notna diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 64c195cac091a..134999f05364f 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -23,7 +23,7 @@ from pandas.util._decorators import Appender from pandas.util._doctools import _WritableDoc -from pandas.core.arrays import ExtensionArray, Categorical +from . import ExtensionArray, Categorical _VALID_CLOSED = {'left', 'right', 'both', 'neither'} _interval_shared_docs = {} diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index b678b55d96c06..d32ff76c0819b 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -301,7 +301,7 @@ def to_timestamp(self, freq=None, how='start'): ------- DatetimeArray/Index """ - from pandas.core.arrays import DatetimeArrayMixin + from pandas.core.arrays.datetimes import DatetimeArrayMixin how = libperiod._validate_end_alias(how) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 63bf67854e5cd..957f3be8cf6ae 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -44,7 +44,7 @@ from pandas.core.dtypes.cast import maybe_downcast_to_dtype from pandas.core.base import SpecificationError, DataError from pandas.core.index import Index, MultiIndex, CategoricalIndex -from pandas.core.arrays import Categorical +from pandas.core.arrays.categorical import Categorical from pandas.core.internals import BlockManager, make_block from pandas.compat.numpy import _np_version_under1p13 diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py index 2d25d033ac8c1..0257d996228df 100644 --- a/pandas/tests/arrays/sparse/test_array.py +++ b/pandas/tests/arrays/sparse/test_array.py @@ -9,9 +9,7 @@ import numpy as np import pandas as pd -from pandas.core.sparse.api import (SparseArray, - SparseSeries, - SparseDtype) +from pandas.core.sparse.api import SparseArray, SparseSeries, SparseDtype from pandas._libs.sparse import IntIndex from pandas.util.testing import assert_almost_equal import pandas.util.testing as tm diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 0417895135b48..6bb4241451b3f 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -5,9 +5,9 @@ import pandas as pd import pandas.util.testing as tm -from pandas.core.arrays import DatetimeArrayMixin -from pandas.core.arrays import TimedeltaArrayMixin -from pandas.core.arrays import PeriodArrayMixin +from pandas.core.arrays.datetimes import DatetimeArrayMixin +from pandas.core.arrays.timedeltas import TimedeltaArrayMixin +from pandas.core.arrays.period import PeriodArrayMixin # TODO: more freq variants diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index c86cfbf9c8e3c..0f524ca0aaac5 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -18,7 +18,6 @@ import pandas as pd from pandas.core.indexing import (_non_reducing_slice, _maybe_numeric_slice, validate_indices) -from pandas.core.arrays import integer_array from pandas import NaT, DataFrame, Index, Series, MultiIndex import pandas.util.testing as tm from pandas.compat import PY2 @@ -1085,10 +1084,10 @@ def test_validate_indices_empty(): def test_extension_array_cross_section(): # A cross-section of a homogeneous EA should be an EA df = pd.DataFrame({ - "A": integer_array([1, 2]), - "B": integer_array([3, 4]) + "A": pd.core.arrays.integer_array([1, 2]), + "B": pd.core.arrays.integer_array([3, 4]) }, index=['a', 'b']) - expected = pd.Series(integer_array([1, 3]), + expected = pd.Series(pd.core.arrays.integer_array([1, 3]), index=['A', 'B'], name='a') result = df.loc['a'] tm.assert_series_equal(result, expected) @@ -1099,7 +1098,7 @@ def test_extension_array_cross_section(): def test_extension_array_cross_section_converts(): df = pd.DataFrame({ - "A": integer_array([1, 2]), + "A": pd.core.arrays.integer_array([1, 2]), "B": np.array([1, 2]), }, index=['a', 'b']) result = df.loc['a'] diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 113c7fe77c071..bbc5bd96bad55 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -1199,8 +1199,8 @@ def test_iter_box(self): 'datetime64[ns, US/Central]'), (pd.TimedeltaIndex([10**10]), np.ndarray, 'm8[ns]'), (pd.PeriodIndex([2018, 2019], freq='A'), np.ndarray, 'object'), - (pd.IntervalIndex.from_breaks([0, 1, 2]), - pd.core.arrays.interval.IntervalArray, 'interval'), + (pd.IntervalIndex.from_breaks([0, 1, 2]), pd.core.arrays.IntervalArray, + 'interval'), ]) def test_values_consistent(array, expected_type, dtype): l_values = pd.Series(array)._values From 120bc5e17092723d0dcd60fbf46e7b054e623333 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 15 Oct 2018 15:16:03 -0500 Subject: [PATCH 12/12] fixup --- pandas/tests/series/test_combine_concat.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/series/test_combine_concat.py b/pandas/tests/series/test_combine_concat.py index 846b5c2dbb31b..bf7247caa5d4a 100644 --- a/pandas/tests/series/test_combine_concat.py +++ b/pandas/tests/series/test_combine_concat.py @@ -223,14 +223,14 @@ def test_concat_empty_series_dtypes(self): result = pd.concat([Series(dtype='float64').to_sparse(), Series( dtype='float64')]) # TODO: release-note: concat sparse dtype - expected = pd.core.arrays.sparse.dtype.SparseDtype(np.float64) + expected = pd.core.sparse.api.SparseDtype(np.float64) assert result.dtype == expected assert result.ftype == 'float64:sparse' result = pd.concat([Series(dtype='float64').to_sparse(), Series( dtype='object')]) # TODO: release-note: concat sparse dtype - expected = pd.core.arrays.sparse.dtype.SparseDtype('object') + expected = pd.core.sparse.api.SparseDtype('object') assert result.dtype == expected assert result.ftype == 'object:sparse'