Skip to content

Prevent adding new attributes to the accessors .str, .dt and .cat #11575

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v0.17.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ Bug Fixes
- Bug in merging ``datetime64[ns, tz]`` dtypes (:issue:`11405`)
- Bug in ``HDFStore.select`` when comparing with a numpy scalar in a where clause (:issue:`11283`)
- Bug in using ``DataFrame.ix`` with a multi-index indexer(:issue:`11372`)
- Prevent adding new attributes to the accessors ``.str``, ``.dt`` and ``.cat``. Retrieving such
a value was not possible, so error out on setting it. (:issue:`10673`)


- Bug in tz-conversions with an ambiguous time and ``.dt`` accessors (:issue:`11295`)
Expand Down
61 changes: 25 additions & 36 deletions pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import pandas.core.nanops as nanops
import pandas.lib as lib
from pandas.util.decorators import Appender, cache_readonly, deprecate_kwarg
from pandas.core.strings import StringMethods
from pandas.core.common import AbstractMethodError

_shared_docs = dict()
Expand Down Expand Up @@ -111,6 +110,31 @@ def _reset_cache(self, key=None):
else:
self._cache.pop(key, None)

class NoNewAttributesMixin(object):
"""Mixin which prevents adding new attributes.

Prevents additional attributes via xxx.attribute = "something" after a call to
`self.__freeze()`. Mainly used to prevent the user from using wrong attrirbutes
on a accessor (`Series.cat/.str/.dt`).

If you really want to add a new attribute at a later time, you need to use
`object.__setattr__(self, key, value)`.
"""

def _freeze(self):
"""Prevents setting additional attributes"""
object.__setattr__(self, "__frozen", True)


# prevent adding any attribute via s.xxx.new_attribute = ...
def __setattr__(self, key, value):
# _cache is used by a decorator
# dict lookup instead of getattr as getattr is false for getter which error
if getattr(self, "__frozen", False) and not (key in type(self).__dict__ or key == "_cache"):
raise AttributeError( "You cannot add any new attribute '{key}'".format(key=key))
object.__setattr__(self, key, value)


class PandasDelegate(PandasObject):
""" an abstract base class for delegating methods/properties """

Expand Down Expand Up @@ -517,41 +541,6 @@ def searchsorted(self, key, side='left'):
#### needs tests/doc-string
return self.values.searchsorted(key, side=side)

# string methods
def _make_str_accessor(self):
from pandas.core.series import Series
from pandas.core.index import Index
if isinstance(self, Series) and not com.is_object_dtype(self.dtype):
# this really should exclude all series with any non-string values,
# but that isn't practical for performance reasons until we have a
# str dtype (GH 9343)
raise AttributeError("Can only use .str accessor with string "
"values, which use np.object_ dtype in "
"pandas")
elif isinstance(self, Index):
# see scc/inferrence.pyx which can contain string values
allowed_types = ('string', 'unicode', 'mixed', 'mixed-integer')
if self.inferred_type not in allowed_types:
message = ("Can only use .str accessor with string values "
"(i.e. inferred_type is 'string', 'unicode' or 'mixed')")
raise AttributeError(message)
if self.nlevels > 1:
message = "Can only use .str accessor with Index, not MultiIndex"
raise AttributeError(message)
return StringMethods(self)

str = AccessorProperty(StringMethods, _make_str_accessor)

def _dir_additions(self):
return set()

def _dir_deletions(self):
try:
getattr(self, 'str')
except AttributeError:
return set(['str'])
return set()

_shared_docs['drop_duplicates'] = (
"""Return %(klass)s with duplicate values removed

Expand Down
5 changes: 3 additions & 2 deletions pandas/core/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from pandas.compat import u

from pandas.core.algorithms import factorize
from pandas.core.base import PandasObject, PandasDelegate
from pandas.core.base import PandasObject, PandasDelegate, NoNewAttributesMixin
import pandas.core.common as com
from pandas.core.missing import interpolate_2d
from pandas.util.decorators import cache_readonly, deprecate_kwarg
Expand Down Expand Up @@ -1717,7 +1717,7 @@ def repeat(self, repeats):

##### The Series.cat accessor #####

class CategoricalAccessor(PandasDelegate):
class CategoricalAccessor(PandasDelegate, NoNewAttributesMixin):
"""
Accessor object for categorical properties of the Series values.

Expand All @@ -1742,6 +1742,7 @@ class CategoricalAccessor(PandasDelegate):
def __init__(self, values, index):
self.categorical = values
self.index = index
self._freeze()

def _delegate_property_get(self, name):
return getattr(self.categorical, name)
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
_values_from_object, is_float, is_integer, is_iterator, is_categorical_dtype,
_ensure_object, _ensure_int64, is_bool_indexer,
is_list_like, is_bool_dtype, is_null_slice, is_integer_dtype)
from pandas.core.strings import StringAccessorMixin
from pandas.core.config import get_option
from pandas.io.common import PerformanceWarning

Expand Down Expand Up @@ -64,7 +65,7 @@ def _new_Index(cls, d):
and breaks __new__ """
return cls.__new__(cls, **d)

class Index(IndexOpsMixin, PandasObject):
class Index(IndexOpsMixin, StringAccessorMixin, PandasObject):

"""
Immutable ndarray implementing an ordered, sliceable set. The basic object
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from pandas.core import generic, base
from pandas.core.internals import SingleBlockManager
from pandas.core.categorical import Categorical, CategoricalAccessor
import pandas.core.strings as strings
from pandas.tseries.common import (maybe_to_datetimelike,
CombinedDatetimelikeProperties)
from pandas.tseries.index import DatetimeIndex
Expand Down Expand Up @@ -85,7 +86,7 @@ def wrapper(self):
# Series class


class Series(base.IndexOpsMixin, generic.NDFrame):
class Series(base.IndexOpsMixin, strings.StringAccessorMixin, generic.NDFrame,):

"""
One-dimensional ndarray with axis labels (including time series).
Expand Down
45 changes: 43 additions & 2 deletions pandas/core/strings.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import numpy as np

from pandas.compat import zip
from pandas.core.common import isnull, _values_from_object, is_bool_dtype, is_list_like
from pandas.core.common import (isnull, _values_from_object, is_bool_dtype, is_list_like,
is_categorical_dtype, is_object_dtype)
import pandas.compat as compat
from pandas.core.base import AccessorProperty, NoNewAttributesMixin
from pandas.util.decorators import Appender, deprecate_kwarg
import re
import pandas.lib as lib
Expand Down Expand Up @@ -1044,7 +1046,7 @@ def do_copy(target):
return do_copy


class StringMethods(object):
class StringMethods(NoNewAttributesMixin):

"""
Vectorized string functions for Series and Index. NAs stay NA unless
Expand All @@ -1059,6 +1061,7 @@ class StringMethods(object):

def __init__(self, series):
self.series = series
self._freeze()

def __getitem__(self, key):
if isinstance(key, slice):
Expand Down Expand Up @@ -1542,3 +1545,41 @@ def rindex(self, sub, start=0, end=None):
isdecimal = _noarg_wrapper(lambda x: compat.u_safe(x).isdecimal(),
docstring=_shared_docs['ismethods'] %
_shared_docs['isdecimal'])

class StringAccessorMixin(object):
""" Mixin to add a `.str` acessor to the class."""

# string methods
def _make_str_accessor(self):
from pandas.core.series import Series
from pandas.core.index import Index
if isinstance(self, Series) and not is_object_dtype(self.dtype):
# this really should exclude all series with any non-string values,
# but that isn't practical for performance reasons until we have a
# str dtype (GH 9343)
raise AttributeError("Can only use .str accessor with string "
"values, which use np.object_ dtype in "
"pandas")
elif isinstance(self, Index):
# see scc/inferrence.pyx which can contain string values
allowed_types = ('string', 'unicode', 'mixed', 'mixed-integer')
if self.inferred_type not in allowed_types:
message = ("Can only use .str accessor with string values "
"(i.e. inferred_type is 'string', 'unicode' or 'mixed')")
raise AttributeError(message)
if self.nlevels > 1:
message = "Can only use .str accessor with Index, not MultiIndex"
raise AttributeError(message)
return StringMethods(self)

str = AccessorProperty(StringMethods, _make_str_accessor)

def _dir_additions(self):
return set()

def _dir_deletions(self):
try:
getattr(self, 'str')
except AttributeError:
return set(['str'])
return set()
21 changes: 20 additions & 1 deletion pandas/tests/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import pandas.compat as compat
import pandas as pd
from pandas.compat import u, StringIO
from pandas.core.base import FrozenList, FrozenNDArray, PandasDelegate
from pandas.core.base import FrozenList, FrozenNDArray, PandasDelegate, NoNewAttributesMixin
import pandas.core.common as com
from pandas.tseries.base import DatetimeIndexOpsMixin
from pandas.util.testing import assertRaisesRegexp, assertIsInstance
Expand Down Expand Up @@ -825,6 +825,25 @@ def test_lookup_nan(self):
self.assert_numpy_array_equal(m.lookup(xs), np.arange(len(xs)))


class TestNoNewAttributesMixin(tm.TestCase):

def test_mixin(self):
class T(NoNewAttributesMixin):
pass

t = T()
self.assertFalse(hasattr(t, "__frozen"))
t.a = "test"
self.assertEqual(t.a, "test")
t._freeze()
#self.assertTrue("__frozen" not in dir(t))
self.assertIs(getattr(t, "__frozen"), True)
def f():
t.b = "test"
self.assertRaises(AttributeError, f)
self.assertFalse(hasattr(t, "b"))


if __name__ == '__main__':
import nose

Expand Down
6 changes: 6 additions & 0 deletions pandas/tests/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -3625,6 +3625,12 @@ def test_cat_accessor_api(self):
invalid.cat
self.assertFalse(hasattr(invalid, 'cat'))

def test_cat_accessor_no_new_attributes(self):
# https://github.com/pydata/pandas/issues/10673
c = Series(list('aabbcde')).astype('category')
with tm.assertRaisesRegexp(AttributeError, "You cannot add any new attribute"):
c.cat.xlabel = "a"

def test_pickle_v0_14_1(self):

# we have the name warning
Expand Down
6 changes: 6 additions & 0 deletions pandas/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,12 @@ def f():
s.dt.hour[0] = 5
self.assertRaises(com.SettingWithCopyError, f)

def test_dt_accessor_no_new_attributes(self):
# https://github.com/pydata/pandas/issues/10673
s = Series(date_range('20130101',periods=5,freq='D'))
with tm.assertRaisesRegexp(AttributeError, "You cannot add any new attribute"):
s.dt.xlabel = "a"

def test_strftime(self):
# GH 10086
s = Series(date_range('20130101', periods=5))
Expand Down
6 changes: 6 additions & 0 deletions pandas/tests/test_strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -2034,6 +2034,12 @@ def test_index_str_accessor_visibility(self):
with self.assertRaisesRegexp(AttributeError, message):
idx.str

def test_str_accessor_no_new_attributes(self):
# https://github.com/pydata/pandas/issues/10673
s = Series(list('aabbcde'))
with tm.assertRaisesRegexp(AttributeError, "You cannot add any new attribute"):
s.str.xlabel = "a"

def test_method_on_bytes(self):
lhs = Series(np.array(list('abc'), 'S1').astype(object))
rhs = Series(np.array(list('def'), 'S1').astype(object))
Expand Down
5 changes: 3 additions & 2 deletions pandas/tseries/common.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
## datetimelike delegation ##

import numpy as np
from pandas.core.base import PandasDelegate
from pandas.core.base import PandasDelegate, NoNewAttributesMixin
from pandas.core import common as com
from pandas.tseries.index import DatetimeIndex
from pandas.tseries.period import PeriodIndex
Expand Down Expand Up @@ -59,12 +59,13 @@ def maybe_to_datetimelike(data, copy=False):

raise TypeError("cannot convert an object of type {0} to a datetimelike index".format(type(data)))

class Properties(PandasDelegate):
class Properties(PandasDelegate, NoNewAttributesMixin):

def __init__(self, values, index, name):
self.values = values
self.index = index
self.name = name
self._freeze()

def _delegate_property_get(self, name):
from pandas import Series
Expand Down