diff --git a/LICENSES/XARRAY_LICENSE b/LICENSES/XARRAY_LICENSE new file mode 100644 index 0000000000000..37ec93a14fdcd --- /dev/null +++ b/LICENSES/XARRAY_LICENSE @@ -0,0 +1,191 @@ +Apache License +Version 2.0, January 2004 +http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + +"License" shall mean the terms and conditions for use, reproduction, and +distribution as defined by Sections 1 through 9 of this document. + +"Licensor" shall mean the copyright owner or entity authorized by the copyright +owner that is granting the License. + +"Legal Entity" shall mean the union of the acting entity and all other entities +that control, are controlled by, or are under common control with that entity. +For the purposes of this definition, "control" means (i) the power, direct or +indirect, to cause the direction or management of such entity, whether by +contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the +outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity exercising +permissions granted by this License. + +"Source" form shall mean the preferred form for making modifications, including +but not limited to software source code, documentation source, and configuration +files. + +"Object" form shall mean any form resulting from mechanical transformation or +translation of a Source form, including but not limited to compiled object code, +generated documentation, and conversions to other media types. + +"Work" shall mean the work of authorship, whether in Source or Object form, made +available under the License, as indicated by a copyright notice that is included +in or attached to the work (an example is provided in the Appendix below). + +"Derivative Works" shall mean any work, whether in Source or Object form, that +is based on (or derived from) the Work and for which the editorial revisions, +annotations, elaborations, or other modifications represent, as a whole, an +original work of authorship. For the purposes of this License, Derivative Works +shall not include works that remain separable from, or merely link (or bind by +name) to the interfaces of, the Work and Derivative Works thereof. + +"Contribution" shall mean any work of authorship, including the original version +of the Work and any modifications or additions to that Work or Derivative Works +thereof, that is intentionally submitted to Licensor for inclusion in the Work +by the copyright owner or by an individual or Legal Entity authorized to submit +on behalf of the copyright owner. For the purposes of this definition, +"submitted" means any form of electronic, verbal, or written communication sent +to the Licensor or its representatives, including but not limited to +communication on electronic mailing lists, source code control systems, and +issue tracking systems that are managed by, or on behalf of, the Licensor for +the purpose of discussing and improving the Work, but excluding communication +that is conspicuously marked or otherwise designated in writing by the copyright +owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity on behalf +of whom a Contribution has been received by Licensor and subsequently +incorporated within the Work. + +2. Grant of Copyright License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable copyright license to reproduce, prepare Derivative Works of, +publicly display, publicly perform, sublicense, and distribute the Work and such +Derivative Works in Source or Object form. + +3. Grant of Patent License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable (except as stated in this section) patent license to make, have +made, use, offer to sell, sell, import, and otherwise transfer the Work, where +such license applies only to those patent claims licensable by such Contributor +that are necessarily infringed by their Contribution(s) alone or by combination +of their Contribution(s) with the Work to which such Contribution(s) was +submitted. If You institute patent litigation against any entity (including a +cross-claim or counterclaim in a lawsuit) alleging that the Work or a +Contribution incorporated within the Work constitutes direct or contributory +patent infringement, then any patent licenses granted to You under this License +for that Work shall terminate as of the date such litigation is filed. + +4. Redistribution. + +You may reproduce and distribute copies of the Work or Derivative Works thereof +in any medium, with or without modifications, and in Source or Object form, +provided that You meet the following conditions: + +You must give any other recipients of the Work or Derivative Works a copy of +this License; and +You must cause any modified files to carry prominent notices stating that You +changed the files; and +You must retain, in the Source form of any Derivative Works that You distribute, +all copyright, patent, trademark, and attribution notices from the Source form +of the Work, excluding those notices that do not pertain to any part of the +Derivative Works; and +If the Work includes a "NOTICE" text file as part of its distribution, then any +Derivative Works that You distribute must include a readable copy of the +attribution notices contained within such NOTICE file, excluding those notices +that do not pertain to any part of the Derivative Works, in at least one of the +following places: within a NOTICE text file distributed as part of the +Derivative Works; within the Source form or documentation, if provided along +with the Derivative Works; or, within a display generated by the Derivative +Works, if and wherever such third-party notices normally appear. The contents of +the NOTICE file are for informational purposes only and do not modify the +License. You may add Your own attribution notices within Derivative Works that +You distribute, alongside or as an addendum to the NOTICE text from the Work, +provided that such additional attribution notices cannot be construed as +modifying the License. +You may add Your own copyright statement to Your modifications and may provide +additional or different license terms and conditions for use, reproduction, or +distribution of Your modifications, or for any such Derivative Works as a whole, +provided Your use, reproduction, and distribution of the Work otherwise complies +with the conditions stated in this License. + +5. Submission of Contributions. + +Unless You explicitly state otherwise, any Contribution intentionally submitted +for inclusion in the Work by You to the Licensor shall be under the terms and +conditions of this License, without any additional terms or conditions. +Notwithstanding the above, nothing herein shall supersede or modify the terms of +any separate license agreement you may have executed with Licensor regarding +such Contributions. + +6. Trademarks. + +This License does not grant permission to use the trade names, trademarks, +service marks, or product names of the Licensor, except as required for +reasonable and customary use in describing the origin of the Work and +reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. + +Unless required by applicable law or agreed to in writing, Licensor provides the +Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, +including, without limitation, any warranties or conditions of TITLE, +NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are +solely responsible for determining the appropriateness of using or +redistributing the Work and assume any risks associated with Your exercise of +permissions under this License. + +8. Limitation of Liability. + +In no event and under no legal theory, whether in tort (including negligence), +contract, or otherwise, unless required by applicable law (such as deliberate +and grossly negligent acts) or agreed to in writing, shall any Contributor be +liable to You for damages, including any direct, indirect, special, incidental, +or consequential damages of any character arising as a result of this License or +out of the use or inability to use the Work (including but not limited to +damages for loss of goodwill, work stoppage, computer failure or malfunction, or +any and all other commercial damages or losses), even if such Contributor has +been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. + +While redistributing the Work or Derivative Works thereof, You may choose to +offer, and charge a fee for, acceptance of support, warranty, indemnity, or +other liability obligations and/or rights consistent with this License. However, +in accepting such obligations, You may act only on Your own behalf and on Your +sole responsibility, not on behalf of any other Contributor, and only if You +agree to indemnify, defend, and hold each Contributor harmless for any liability +incurred by, or claims asserted against, such Contributor by reason of your +accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work + +To apply the Apache License to your work, attach the following boilerplate +notice, with the fields enclosed by brackets "[]" replaced with your own +identifying information. (Don't include the brackets!) The text should be +enclosed in the appropriate comment syntax for the file format. We also +recommend that a file or class name and description of purpose be included on +the same "printed page" as the copyright notice for easier identification within +third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/doc/source/api.rst b/doc/source/api.rst index 02f729c89295b..88419df1880ec 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -2498,6 +2498,20 @@ Scalar introspection api.types.is_re_compilable api.types.is_scalar +Extensions +---------- + +These are primarily intented for library authors looking to extend pandas +objects. + +.. currentmodule:: pandas + +.. autosummary:: + :toctree: generated/ + + api.extensions.register_dataframe_accessor + api.extensions.register_series_accessor + api.extensions.register_index_accessor .. This is to prevent warnings in the doc build. We don't want to encourage .. these methods. diff --git a/doc/source/developer.rst b/doc/source/developer.rst index b8bb2b2fcbe2f..5c3b114ce7299 100644 --- a/doc/source/developer.rst +++ b/doc/source/developer.rst @@ -140,3 +140,46 @@ As an example of fully-formed metadata: 'metadata': None} ], 'pandas_version': '0.20.0'} + +.. _developer.register-accessors: + +Registering Custom Accessors +---------------------------- + +Libraries can use the decorators +:func:`pandas.api.extensions.register_dataframe_accessor`, +:func:`pandas.api.extensions.register_series_accessor`, and +:func:`pandas.api.extensions.register_index_accessor`, to add additional "namespaces" to +pandas objects. All of these follow a similar convention: you decorate a class, providing the name of attribute to add. The +class's `__init__` method gets the object being decorated. For example: + +.. ipython:: python + + @pd.api.extensions.register_dataframe_accessor("geo") + class GeoAccessor(object): + def __init__(self, pandas_obj): + self._obj = pandas_obj + + @property + def center(self): + # return the geographic center point of this DataFarme + lon = self._obj.latitude + lat = self._obj.longitude + return (float(lon.mean()), float(lat.mean())) + + def plot(self): + # plot this array's data on a map, e.g., using Cartopy + pass + +Now users can access your methods using the `geo` namespace: + + >>> ds = pd.DataFrame({'longitude': np.linspace(0, 10), + ... 'latitude': np.linspace(0, 20)}) + >>> ds.geo.center + (5.0, 10.0) + >>> ds.geo.plot() + # plots data on a map + +This can be a convenient way to extend pandas objects without subclassing them. +If you write a custom accessor, make a pull request adding it to our +:ref:`ecosystem` page. diff --git a/doc/source/internals.rst b/doc/source/internals.rst index a321b4202296f..ee4df879d9478 100644 --- a/doc/source/internals.rst +++ b/doc/source/internals.rst @@ -100,6 +100,8 @@ Subclassing pandas Data Structures 2. Use *composition*. See `here `_. + 3. Extending by :ref:`registering an accessor ` + This section describes how to subclass ``pandas`` data structures to meet more specific needs. There are 2 points which need attention: 1. Override constructor properties. diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 6fe15133914da..952182cfdcf82 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -202,6 +202,13 @@ Other Enhancements - ``Resampler`` objects now have a functioning :attr:`~pandas.core.resample.Resampler.pipe` method. Previously, calls to ``pipe`` were diverted to the ``mean`` method (:issue:`17905`). - :func:`~pandas.api.types.is_scalar` now returns ``True`` for ``DateOffset`` objects (:issue:`18943`). +- Added :func:`pandas.api.extensions.register_dataframe_accessor`, + :func:`pandas.api.extensions.register_series_accessor`, and + :func:`pandas.api.extensions.register_index_accessor`, accessor for libraries downstream of pandas + to register custom accessors like ``.cat`` on pandas objects. See + :ref:`Registering Custom Accessors ` for more (:issue:`14781`). + + - ``IntervalIndex.astype`` now supports conversions between subtypes when passed an ``IntervalDtype`` (:issue:`19197`) .. _whatsnew_0230.api_breaking: diff --git a/pandas/api/__init__.py b/pandas/api/__init__.py index fcbf42f6dabc4..afff059e7b601 100644 --- a/pandas/api/__init__.py +++ b/pandas/api/__init__.py @@ -1 +1,2 @@ """ public toolkit API """ +from . import types, extensions # noqa diff --git a/pandas/api/extensions/__init__.py b/pandas/api/extensions/__init__.py new file mode 100644 index 0000000000000..64f5e8fb939a4 --- /dev/null +++ b/pandas/api/extensions/__init__.py @@ -0,0 +1,4 @@ +"""Public API for extending panadas objects.""" +from pandas.core.accessor import (register_dataframe_accessor, # noqa + register_index_accessor, + register_series_accessor) diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py index 73e01fbf17205..96bf628c8d7ff 100644 --- a/pandas/core/accessor.py +++ b/pandas/core/accessor.py @@ -5,7 +5,9 @@ that can be mixed into or pinned onto other pandas classes. """ -from pandas.core.common import AbstractMethodError +import warnings + +from pandas.util._decorators import Appender class DirNamesMixin(object): @@ -37,38 +39,9 @@ def __dir__(self): return sorted(rv) -class AccessorProperty(object): - """Descriptor for implementing accessor properties like Series.str - """ - - def __init__(self, accessor_cls, construct_accessor=None): - self.accessor_cls = accessor_cls - self.construct_accessor = (construct_accessor or - accessor_cls._make_accessor) - self.__doc__ = accessor_cls.__doc__ - - def __get__(self, instance, owner=None): - if instance is None: - # this ensures that Series.str. is well defined - return self.accessor_cls - return self.construct_accessor(instance) - - def __set__(self, instance, value): - raise AttributeError("can't set attribute") - - def __delete__(self, instance): - raise AttributeError("can't delete attribute") - - class PandasDelegate(object): """ an abstract base class for delegating methods/properties """ - @classmethod - def _make_accessor(cls, data): - raise AbstractMethodError("_make_accessor should be implemented" - "by subclass and return an instance" - "of `cls`.") - def _delegate_property_get(self, name, *args, **kwargs): raise TypeError("You cannot access the " "property {name}".format(name=name)) @@ -129,3 +102,138 @@ def f(self, *args, **kwargs): # don't overwrite existing methods/properties if overwrite or not hasattr(cls, name): setattr(cls, name, f) + + +# Ported with modifications from xarray +# https://github.com/pydata/xarray/blob/master/xarray/core/extensions.py +# 1. We don't need to catch and re-raise AttributeErrors as RuntimeErrors +# 2. We use a UserWarning instead of a custom Warning + +class CachedAccessor(object): + """Custom property-like object (descriptor) for caching accessors. + + Parameters + ---------- + name : str + The namespace this will be accessed under, e.g. ``df.foo`` + accessor : cls + The class with the extension methods. The class' __init__ method + should expect one of a ``Series``, ``DataFrame`` or ``Index`` as + the single argument ``data`` + """ + def __init__(self, name, accessor): + self._name = name + self._accessor = accessor + + def __get__(self, obj, cls): + if obj is None: + # we're accessing the attribute of the class, i.e., Dataset.geo + return self._accessor + accessor_obj = self._accessor(obj) + # Replace the property with the accessor object. Inspired by: + # http://www.pydanny.com/cached-property.html + # We need to use object.__setattr__ because we overwrite __setattr__ on + # NDFrame + object.__setattr__(obj, self._name, accessor_obj) + return accessor_obj + + +def _register_accessor(name, cls): + def decorator(accessor): + if hasattr(cls, name): + warnings.warn( + 'registration of accessor {!r} under name {!r} for type ' + '{!r} is overriding a preexisting attribute with the same ' + 'name.'.format(accessor, name, cls), + UserWarning, + stacklevel=2) + setattr(cls, name, CachedAccessor(name, accessor)) + return accessor + return decorator + + +_doc = """Register a custom accessor on %(klass)s objects. + +Parameters +---------- +name : str + Name under which the accessor should be registered. A warning is issued + if this name conflicts with a preexisting attribute. + +Notes +----- +When accessed, your accessor will be initialized with the pandas object +the user is interacting with. So the signature must be + +.. code-block:: python + + def __init__(self, pandas_object): + +For consistency with pandas methods, you should raise an ``AttributeError`` +if the data passed to your accessor has an incorrect dtype. + +>>> pd.Series(['a', 'b']).dt +Traceback (most recent call last): +... +AttributeError: Can only use .dt accessor with datetimelike values + +Examples +-------- + +In your library code:: + + import pandas as pd + + @pd.api.extensions.register_dataframe_accessor("geo") + class GeoAccessor(object): + def __init__(self, pandas_obj): + self._obj = pandas_obj + + @property + def center(self): + # return the geographic center point of this DataFarme + lon = self._obj.latitude + lat = self._obj.longitude + return (float(lon.mean()), float(lat.mean())) + + def plot(self): + # plot this array's data on a map, e.g., using Cartopy + pass + +Back in an interactive IPython session: + + >>> ds = pd.DataFrame({'longitude': np.linspace(0, 10), + ... 'latitude': np.linspace(0, 20)}) + >>> ds.geo.center + (5.0, 10.0) + >>> ds.geo.plot() + # plots data on a map + +See also +-------- +%(others)s +""" + + +@Appender(_doc % dict(klass="DataFrame", + others=("register_series_accessor, " + "register_index_accessor"))) +def register_dataframe_accessor(name): + from pandas import DataFrame + return _register_accessor(name, DataFrame) + + +@Appender(_doc % dict(klass="Series", + others=("register_dataframe_accessor, " + "register_index_accessor"))) +def register_series_accessor(name): + from pandas import Series + return _register_accessor(name, Series) + + +@Appender(_doc % dict(klass="Index", + others=("register_dataframe_accessor, " + "register_series_accessor"))) +def register_index_accessor(name): + from pandas import Index + return _register_accessor(name, Index) diff --git a/pandas/core/base.py b/pandas/core/base.py index e90794c6c2e1a..4b3e74eae36b8 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -145,10 +145,14 @@ def _freeze(self): # prevent adding any attribute via s.xxx.new_attribute = ... def __setattr__(self, key, value): # _cache is used by a decorator - # dict lookup instead of getattr as getattr is false for getter - # which error - if getattr(self, "__frozen", False) and not \ - (key in type(self).__dict__ or key == "_cache"): + # We need to check both 1.) cls.__dict__ and 2.) getattr(self, key) + # because + # 1.) getattr is false for attributes that raise errors + # 2.) cls.__dict__ doesn't traverse into base classes + if (getattr(self, "__frozen", False) and not + (key == "_cache" or + key in type(self).__dict__ or + getattr(self, key, None) is not None)): raise AttributeError("You cannot add any new attribute '{key}'". format(key=key)) object.__setattr__(self, key, value) diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 92fcdc0f4625b..7b11e37a14b51 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -2142,6 +2142,10 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin): methods return new categorical data per default (but can be called with `inplace=True`). + Parameters + ---------- + data : Series or CategoricalIndex + Examples -------- >>> s.cat.categories @@ -2157,12 +2161,19 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin): """ - def __init__(self, values, index, name): - self.categorical = values - self.index = index - self.name = name + def __init__(self, data): + self._validate(data) + self.categorical = data.values + self.index = data.index + self.name = data.name self._freeze() + @staticmethod + def _validate(data): + if not is_categorical_dtype(data.dtype): + raise AttributeError("Can only use .cat accessor with a " + "'category' dtype") + def _delegate_property_get(self, name): return getattr(self.categorical, name) @@ -2181,14 +2192,6 @@ def _delegate_method(self, name, *args, **kwargs): if res is not None: return Series(res, index=self.index, name=self.name) - @classmethod - def _make_accessor(cls, data): - if not is_categorical_dtype(data.dtype): - raise AttributeError("Can only use .cat accessor with a " - "'category' dtype") - return CategoricalAccessor(data.values, data.index, - getattr(data, 'name', None),) - CategoricalAccessor._add_delegate_accessors(delegate=Categorical, accessors=["categories", diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4d907180da00a..2c05eefa5706e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -23,6 +23,7 @@ import numpy as np import numpy.ma as ma +from pandas.core.accessor import CachedAccessor from pandas.core.dtypes.cast import ( maybe_upcast, cast_scalar_to_array, @@ -92,7 +93,6 @@ from pandas.core.indexes.datetimes import DatetimeIndex from pandas.core.indexes.timedeltas import TimedeltaIndex -from pandas.core import accessor import pandas.core.common as com import pandas.core.nanops as nanops import pandas.core.ops as ops @@ -6003,8 +6003,7 @@ def isin(self, values): # ---------------------------------------------------------------------- # Add plotting methods to DataFrame - plot = accessor.AccessorProperty(gfx.FramePlotMethods, - gfx.FramePlotMethods) + plot = CachedAccessor("plot", gfx.FramePlotMethods) hist = gfx.hist_frame boxplot = gfx.boxplot_frame diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index 116c7eb8c7958..d40230386216c 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -4,6 +4,7 @@ import numpy as np +from pandas.core.dtypes.generic import ABCSeries from pandas.core.dtypes.common import ( is_period_arraylike, is_datetime_arraylike, is_integer_dtype, @@ -20,81 +21,44 @@ from pandas.core.algorithms import take_1d -def is_datetimelike(data): - """ - return a boolean if we can be successfully converted to a datetimelike - """ - try: - maybe_to_datetimelike(data) - return True - except (Exception): - pass - return False +class Properties(PandasDelegate, PandasObject, NoNewAttributesMixin): + def __init__(self, data, orig): + if not isinstance(data, ABCSeries): + raise TypeError("cannot convert an object of type {0} to a " + "datetimelike index".format(type(data))) -def maybe_to_datetimelike(data, copy=False): - """ - return a DelegatedClass of a Series that is datetimelike - (e.g. datetime64[ns],timedelta64[ns] dtype or a Series of Periods) - raise TypeError if this is not possible. + self.values = data + self.orig = orig + self.name = getattr(data, 'name', None) + self.index = getattr(data, 'index', None) + self._freeze() - Parameters - ---------- - data : Series - copy : boolean, default False - copy the input data + def _get_values(self): + data = self.values + if is_datetime64_dtype(data.dtype): + return DatetimeIndex(data, copy=False, name=self.name) - Returns - ------- - DelegatedClass + elif is_datetime64tz_dtype(data.dtype): + return DatetimeIndex(data, copy=False, name=self.name) - """ - from pandas import Series + elif is_timedelta64_dtype(data.dtype): + return TimedeltaIndex(data, copy=False, name=self.name) + + else: + if is_period_arraylike(data): + return PeriodIndex(data, copy=False, name=self.name) + if is_datetime_arraylike(data): + return DatetimeIndex(data, copy=False, name=self.name) - if not isinstance(data, Series): raise TypeError("cannot convert an object of type {0} to a " "datetimelike index".format(type(data))) - index = data.index - name = data.name - orig = data if is_categorical_dtype(data) else None - if orig is not None: - data = orig.values.categories - - if is_datetime64_dtype(data.dtype): - return DatetimeProperties(DatetimeIndex(data, copy=copy), - index, name=name, orig=orig) - elif is_datetime64tz_dtype(data.dtype): - return DatetimeProperties(DatetimeIndex(data, copy=copy), - index, data.name, orig=orig) - elif is_timedelta64_dtype(data.dtype): - return TimedeltaProperties(TimedeltaIndex(data, copy=copy), index, - name=name, orig=orig) - else: - if is_period_arraylike(data): - return PeriodProperties(PeriodIndex(data, copy=copy), index, - name=name, orig=orig) - if is_datetime_arraylike(data): - return DatetimeProperties(DatetimeIndex(data, copy=copy), index, - name=name, orig=orig) - - raise TypeError("cannot convert an object of type {0} to a " - "datetimelike index".format(type(data))) - - -class Properties(PandasDelegate, PandasObject, NoNewAttributesMixin): - - def __init__(self, values, index, name, orig=None): - self.values = values - self.index = index - self.name = name - self.orig = orig - self._freeze() - def _delegate_property_get(self, name): from pandas import Series + values = self._get_values() - result = getattr(self.values, name) + result = getattr(values, name) # maybe need to upcast (ints) if isinstance(result, np.ndarray): @@ -126,8 +90,9 @@ def _delegate_property_set(self, name, value, *args, **kwargs): def _delegate_method(self, name, *args, **kwargs): from pandas import Series + values = self._get_values() - method = getattr(self.values, name) + method = getattr(values, name) result = method(*args, **kwargs) if not is_list_like(result): @@ -158,11 +123,11 @@ class DatetimeProperties(Properties): """ def to_pydatetime(self): - return self.values.to_pydatetime() + return self._get_values().to_pydatetime() @property def freq(self): - return self.values.inferred_freq + return self._get_values().inferred_freq DatetimeProperties._add_delegate_accessors( @@ -189,7 +154,7 @@ class TimedeltaProperties(Properties): """ def to_pytimedelta(self): - return self.values.to_pytimedelta() + return self._get_values().to_pytimedelta() @property def components(self): @@ -202,11 +167,11 @@ def components(self): a DataFrame """ - return self.values.components.set_index(self.index) + return self._get_values().components.set_index(self.index) @property def freq(self): - return self.values.inferred_freq + return self._get_values().inferred_freq TimedeltaProperties._add_delegate_accessors( @@ -245,15 +210,38 @@ class PeriodProperties(Properties): class CombinedDatetimelikeProperties(DatetimeProperties, TimedeltaProperties): - # This class is never instantiated, and exists solely for the benefit of - # the Series.dt class property. For Series objects, .dt will always be one - # of the more specific classes above. - __doc__ = DatetimeProperties.__doc__ - @classmethod - def _make_accessor(cls, data): + def __new__(cls, data): + # CombinedDatetimelikeProperties isn't really instantiated. Instead + # we need to choose which parent (datetime or timedelta) is + # appropriate. Since we're checking the dtypes anyway, we'll just + # do all the validation here. + from pandas import Series + + if not isinstance(data, Series): + raise TypeError("cannot convert an object of type {0} to a " + "datetimelike index".format(type(data))) + + orig = data if is_categorical_dtype(data) else None + if orig is not None: + data = Series(orig.values.categories, + name=orig.name, + copy=False) + try: - return maybe_to_datetimelike(data) + if is_datetime64_dtype(data.dtype): + return DatetimeProperties(data, orig) + elif is_datetime64tz_dtype(data.dtype): + return DatetimeProperties(data, orig) + elif is_timedelta64_dtype(data.dtype): + return TimedeltaProperties(data, orig) + else: + if is_period_arraylike(data): + return PeriodProperties(data, orig) + if is_datetime_arraylike(data): + return DatetimeProperties(data, orig) except Exception: - raise AttributeError("Can only use .dt accessor with " - "datetimelike values") + pass # we raise an attribute error anyway + + raise AttributeError("Can only use .dt accessor with datetimelike " + "values") diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index f634d809560ee..a5949c62ad913 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -12,6 +12,7 @@ from pandas.compat.numpy import function as nv from pandas import compat +from pandas.core.accessor import CachedAccessor from pandas.core.dtypes.generic import ( ABCSeries, ABCMultiIndex, @@ -55,8 +56,8 @@ import pandas.core.sorting as sorting from pandas.io.formats.printing import pprint_thing from pandas.core.ops import _comp_method_OBJECT_ARRAY -from pandas.core import strings, accessor from pandas.core.config import get_option +from pandas.core.strings import StringMethods # simplify @@ -172,9 +173,7 @@ class Index(IndexOpsMixin, PandasObject): _engine_type = libindex.ObjectEngine _accessors = frozenset(['str']) - - # String Methods - str = accessor.AccessorProperty(strings.StringMethods) + str = CachedAccessor("str", StringMethods) def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False, tupleize_cols=True, **kwargs): diff --git a/pandas/core/series.py b/pandas/core/series.py index 71cded4f9c888..25c8bacd31940 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -13,6 +13,7 @@ import numpy as np import numpy.ma as ma +from pandas.core.accessor import CachedAccessor from pandas.core.dtypes.common import ( is_categorical_dtype, is_bool, @@ -53,7 +54,6 @@ from pandas.core import generic, base from pandas.core.internals import SingleBlockManager from pandas.core.categorical import Categorical, CategoricalAccessor -import pandas.core.strings as strings from pandas.core.indexes.accessors import CombinedDatetimelikeProperties from pandas.core.indexes.datetimes import DatetimeIndex from pandas.core.indexes.timedeltas import TimedeltaIndex @@ -64,7 +64,6 @@ zip, u, OrderedDict, StringIO, range, get_range_parameters) from pandas.compat.numpy import function as nv -from pandas.core import accessor import pandas.core.ops as ops import pandas.core.algorithms as algorithms @@ -77,6 +76,7 @@ from pandas._libs import index as libindex, tslib as libts, lib, iNaT from pandas.core.config import get_option +from pandas.core.strings import StringMethods import pandas.plotting._core as gfx @@ -3069,21 +3069,16 @@ def to_period(self, freq=None, copy=True): return self._constructor(new_values, index=new_index).__finalize__(self) - # ------------------------------------------------------------------------- - # Datetimelike delegation methods - dt = accessor.AccessorProperty(CombinedDatetimelikeProperties) - - # ------------------------------------------------------------------------- - # Categorical methods - cat = accessor.AccessorProperty(CategoricalAccessor) - - # String Methods - str = accessor.AccessorProperty(strings.StringMethods) + # ---------------------------------------------------------------------- + # Accessor Methods + # ---------------------------------------------------------------------- + str = CachedAccessor("str", StringMethods) + dt = CachedAccessor("dt", CombinedDatetimelikeProperties) + cat = CachedAccessor("cat", CategoricalAccessor) + plot = CachedAccessor("plot", gfx.SeriesPlotMethods) # ---------------------------------------------------------------------- # Add plotting methods to Series - plot = accessor.AccessorProperty(gfx.SeriesPlotMethods, - gfx.SeriesPlotMethods) hist = gfx.hist_series diff --git a/pandas/core/strings.py b/pandas/core/strings.py index e0012c25e366d..278b220753196 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -1370,12 +1370,44 @@ class StringMethods(NoNewAttributesMixin): """ def __init__(self, data): + self._validate(data) self._is_categorical = is_categorical_dtype(data) self._data = data.cat.categories if self._is_categorical else data # save orig to blow up categoricals to the right type self._orig = data self._freeze() + @staticmethod + def _validate(data): + from pandas.core.index import Index + + if (isinstance(data, ABCSeries) and + not ((is_categorical_dtype(data.dtype) and + is_object_dtype(data.values.categories)) or + (is_object_dtype(data.dtype)))): + # it's neither a string series not a categorical series with + # strings inside the categories. + # this really should exclude all series with any non-string values + # (instead of test for object dtype), but that isn't practical for + # performance reasons until we have a str dtype (GH 9343) + raise AttributeError("Can only use .str accessor with string " + "values, which use np.object_ dtype in " + "pandas") + elif isinstance(data, Index): + # can't use ABCIndex to exclude non-str + + # see scc/inferrence.pyx which can contain string values + allowed_types = ('string', 'unicode', 'mixed', 'mixed-integer') + if data.inferred_type not in allowed_types: + message = ("Can only use .str accessor with string values " + "(i.e. inferred_type is 'string', 'unicode' or " + "'mixed')") + raise AttributeError(message) + if data.nlevels > 1: + message = ("Can only use .str accessor with Index, not " + "MultiIndex") + raise AttributeError(message) + def __getitem__(self, key): if isinstance(key, slice): return self.slice(start=key.start, stop=key.stop, step=key.step) @@ -1895,32 +1927,5 @@ def rindex(self, sub, start=0, end=None): @classmethod def _make_accessor(cls, data): - from pandas.core.index import Index - - if (isinstance(data, ABCSeries) and - not ((is_categorical_dtype(data.dtype) and - is_object_dtype(data.values.categories)) or - (is_object_dtype(data.dtype)))): - # it's neither a string series not a categorical series with - # strings inside the categories. - # this really should exclude all series with any non-string values - # (instead of test for object dtype), but that isn't practical for - # performance reasons until we have a str dtype (GH 9343) - raise AttributeError("Can only use .str accessor with string " - "values, which use np.object_ dtype in " - "pandas") - elif isinstance(data, Index): - # can't use ABCIndex to exclude non-str - - # see scc/inferrence.pyx which can contain string values - allowed_types = ('string', 'unicode', 'mixed', 'mixed-integer') - if data.inferred_type not in allowed_types: - message = ("Can only use .str accessor with string values " - "(i.e. inferred_type is 'string', 'unicode' or " - "'mixed')") - raise AttributeError(message) - if data.nlevels > 1: - message = ("Can only use .str accessor with Index, not " - "MultiIndex") - raise AttributeError(message) + cls._validate(data) return cls(data) diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index b3d1ce31d66ae..d843126c60144 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -65,3 +65,7 @@ class MergeError(ValueError): Error raised when problems arise during merging due to problems with input data. Subclass of `ValueError`. """ + + +class AccessorRegistrationWarning(Warning): + """Warning for attribute conflicts in accessor registration.""" diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 8962eb90be828..821c7858c7a5c 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -122,7 +122,7 @@ def test_api(self): class TestApi(Base): - allowed = ['types'] + allowed = ['types', 'extensions'] def test_api(self): diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index 8ae7feab451f9..73cc87855acbd 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -512,6 +512,7 @@ def test_cat_accessor(self): def test_cat_accessor_api(self): # GH 9322 from pandas.core.categorical import CategoricalAccessor + assert Series.cat is CategoricalAccessor s = Series(list('aabbcde')).astype('category') assert isinstance(s.cat, CategoricalAccessor) diff --git a/pandas/tests/test_register_accessor.py b/pandas/tests/test_register_accessor.py new file mode 100644 index 0000000000000..fe0cf4c9b38af --- /dev/null +++ b/pandas/tests/test_register_accessor.py @@ -0,0 +1,87 @@ +import contextlib + +import pytest + +import pandas as pd +import pandas.util.testing as tm + + +@contextlib.contextmanager +def ensure_removed(obj, attr): + """Ensure that an attribute added to 'obj' during the test is + removed when we're done""" + try: + yield + finally: + try: + delattr(obj, attr) + except AttributeError: + pass + + +class MyAccessor(object): + + def __init__(self, obj): + self.obj = obj + self.item = 'item' + + @property + def prop(self): + return self.item + + def method(self): + return self.item + + +@pytest.mark.parametrize('obj, registrar', [ + (pd.Series, pd.api.extensions.register_series_accessor), + (pd.DataFrame, pd.api.extensions.register_dataframe_accessor), + (pd.Index, pd.api.extensions.register_index_accessor) +]) +def test_series_register(obj, registrar): + with ensure_removed(obj, 'mine'): + before = set(dir(obj)) + registrar('mine')(MyAccessor) + assert obj([]).mine.prop == 'item' + after = set(dir(obj)) + assert (before ^ after) == {'mine'} + + +def test_accessor_works(): + with ensure_removed(pd.Series, 'mine'): + pd.api.extensions.register_series_accessor('mine')(MyAccessor) + + s = pd.Series([1, 2]) + assert s.mine.obj is s + + assert s.mine.prop == 'item' + assert s.mine.method() == 'item' + + +def test_overwrite_warns(): + # Need to restore mean + mean = pd.Series.mean + try: + with tm.assert_produces_warning(UserWarning) as w: + pd.api.extensions.register_series_accessor('mean')(MyAccessor) + s = pd.Series([1, 2]) + assert s.mean.prop == 'item' + msg = str(w[0].message) + assert 'mean' in msg + assert 'MyAccessor' in msg + assert 'Series' in msg + finally: + pd.Series.mean = mean + + +def test_raises_attribute_error(): + + with ensure_removed(pd.Series, 'bad'): + + @pd.api.extensions.register_series_accessor("bad") + class Bad(object): + def __init__(self, data): + raise AttributeError("whoops") + + with tm.assert_raises_regex(AttributeError, "whoops"): + pd.Series([]).bad