pandas-dev · jreback · Nov 14, 2015 · Nov 11, 2015
diff --git a/doc/source/whatsnew/v0.17.1.txt b/doc/source/whatsnew/v0.17.1.txt
@@ -76,6 +76,8 @@ Bug Fixes
 - Bug in merging ``datetime64[ns, tz]`` dtypes (:issue:`11405`)
 - Bug in ``HDFStore.select`` when comparing with a numpy scalar in a where clause (:issue:`11283`)
 - Bug in using ``DataFrame.ix`` with a multi-index indexer(:issue:`11372`)
+- Prevent adding new attributes to the accessors ``.str``, ``.dt`` and ``.cat``. Retrieving such
+  a value was not possible, so error out on setting it. (:issue:`10673`)
 
 
 - Bug in tz-conversions with an ambiguous time and ``.dt`` accessors (:issue:`11295`)

diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -7,7 +7,6 @@
 import pandas.core.nanops as nanops
 import pandas.lib as lib
 from pandas.util.decorators import Appender, cache_readonly, deprecate_kwarg
-from pandas.core.strings import StringMethods
 from pandas.core.common import AbstractMethodError
 
 _shared_docs = dict()
@@ -111,6 +110,31 @@ def _reset_cache(self, key=None):
         else:
             self._cache.pop(key, None)
 
+class NoNewAttributesMixin(object):
+    """Mixin which prevents adding new attributes.
+
+     Prevents additional attributes via xxx.attribute = "something" after a call to
+     `self.__freeze()`. Mainly used to prevent the user from using wrong attrirbutes
+     on a accessor (`Series.cat/.str/.dt`).
+
+     If you really want to add a new attribute at a later time, you need to use
+     `object.__setattr__(self, key, value)`.
+     """
+
+    def _freeze(self):
+        """Prevents setting additional attributes"""
+        object.__setattr__(self, "__frozen", True)
+
+
+    # prevent adding any attribute via s.xxx.new_attribute = ...
+    def __setattr__(self, key, value):
+        # _cache is used by a decorator
+        # dict lookup instead of getattr as getattr is false for getter which error
+        if getattr(self, "__frozen", False) and not (key in type(self).__dict__ or key == "_cache"):
+            raise AttributeError( "You cannot add any new attribute '{key}'".format(key=key))
+        object.__setattr__(self, key, value)
+
+
 class PandasDelegate(PandasObject):
     """ an abstract base class for delegating methods/properties """
 
@@ -517,41 +541,6 @@ def searchsorted(self, key, side='left'):
         #### needs tests/doc-string
         return self.values.searchsorted(key, side=side)
 
-    # string methods
-    def _make_str_accessor(self):
-        from pandas.core.series import Series
-        from pandas.core.index import Index
-        if isinstance(self, Series) and not com.is_object_dtype(self.dtype):
-            # this really should exclude all series with any non-string values,
-            # but that isn't practical for performance reasons until we have a
-            # str dtype (GH 9343)
-            raise AttributeError("Can only use .str accessor with string "
-                                 "values, which use np.object_ dtype in "
-                                 "pandas")
-        elif isinstance(self, Index):
-            # see scc/inferrence.pyx which can contain string values
-            allowed_types = ('string', 'unicode', 'mixed', 'mixed-integer')
-            if self.inferred_type not in allowed_types:
-                message = ("Can only use .str accessor with string values "
-                           "(i.e. inferred_type is 'string', 'unicode' or 'mixed')")
-                raise AttributeError(message)
-            if self.nlevels > 1:
-                message = "Can only use .str accessor with Index, not MultiIndex"
-                raise AttributeError(message)
-        return StringMethods(self)
-
-    str = AccessorProperty(StringMethods, _make_str_accessor)
-
-    def _dir_additions(self):
-        return set()
-
-    def _dir_deletions(self):
-        try:
-            getattr(self, 'str')
-        except AttributeError:
-            return set(['str'])
-        return set()
-
     _shared_docs['drop_duplicates'] = (
         """Return %(klass)s with duplicate values removed
 

diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py
@@ -8,7 +8,7 @@
 from pandas.compat import u
 
 from pandas.core.algorithms import factorize
-from pandas.core.base import PandasObject, PandasDelegate
+from pandas.core.base import PandasObject, PandasDelegate, NoNewAttributesMixin
 import pandas.core.common as com
 from pandas.core.missing import interpolate_2d
 from pandas.util.decorators import cache_readonly, deprecate_kwarg
@@ -1717,7 +1717,7 @@ def repeat(self, repeats):
 
 ##### The Series.cat accessor #####
 
-class CategoricalAccessor(PandasDelegate):
+class CategoricalAccessor(PandasDelegate, NoNewAttributesMixin):
     """
     Accessor object for categorical properties of the Series values.
 
@@ -1742,6 +1742,7 @@ class CategoricalAccessor(PandasDelegate):
     def __init__(self, values, index):
         self.categorical = values
         self.index = index
+        self._freeze()
 
     def _delegate_property_get(self, name):
         return getattr(self.categorical, name)

diff --git a/pandas/core/index.py b/pandas/core/index.py
@@ -25,6 +25,7 @@
                                 _values_from_object, is_float, is_integer, is_iterator, is_categorical_dtype,
                                 _ensure_object, _ensure_int64, is_bool_indexer,
                                 is_list_like, is_bool_dtype, is_null_slice, is_integer_dtype)
+from pandas.core.strings import StringAccessorMixin
 from pandas.core.config import get_option
 from pandas.io.common import PerformanceWarning
 
@@ -64,7 +65,7 @@ def _new_Index(cls, d):
         and breaks __new__ """
     return cls.__new__(cls, **d)
 
-class Index(IndexOpsMixin, PandasObject):
+class Index(IndexOpsMixin, StringAccessorMixin, PandasObject):
 
     """
     Immutable ndarray implementing an ordered, sliceable set. The basic object

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -32,6 +32,7 @@
 from pandas.core import generic, base
 from pandas.core.internals import SingleBlockManager
 from pandas.core.categorical import Categorical, CategoricalAccessor
+import pandas.core.strings as strings
 from pandas.tseries.common import (maybe_to_datetimelike,
                                    CombinedDatetimelikeProperties)
 from pandas.tseries.index import DatetimeIndex
@@ -85,7 +86,7 @@ def wrapper(self):
 # Series class
 
 
-class Series(base.IndexOpsMixin, generic.NDFrame):
+class Series(base.IndexOpsMixin, strings.StringAccessorMixin, generic.NDFrame,):
 
     """
     One-dimensional ndarray with axis labels (including time series).

diff --git a/pandas/core/strings.py b/pandas/core/strings.py
@@ -1,8 +1,10 @@
 import numpy as np
 
 from pandas.compat import zip
-from pandas.core.common import isnull, _values_from_object, is_bool_dtype, is_list_like
+from pandas.core.common import (isnull, _values_from_object, is_bool_dtype, is_list_like,
+                                is_categorical_dtype, is_object_dtype)
 import pandas.compat as compat
+from pandas.core.base import AccessorProperty, NoNewAttributesMixin
 from pandas.util.decorators import Appender, deprecate_kwarg
 import re
 import pandas.lib as lib
@@ -1044,7 +1046,7 @@ def do_copy(target):
     return do_copy
 
 
-class StringMethods(object):
+class StringMethods(NoNewAttributesMixin):
 
     """
     Vectorized string functions for Series and Index. NAs stay NA unless
@@ -1059,6 +1061,7 @@ class StringMethods(object):
 
     def __init__(self, series):
         self.series = series
+        self._freeze()
 
     def __getitem__(self, key):
         if isinstance(key, slice):
@@ -1542,3 +1545,41 @@ def rindex(self, sub, start=0, end=None):
     isdecimal = _noarg_wrapper(lambda x: compat.u_safe(x).isdecimal(),
                                docstring=_shared_docs['ismethods'] %
                                _shared_docs['isdecimal'])
+
+class StringAccessorMixin(object):
+    """ Mixin to add a `.str` acessor to the class."""
+
+    # string methods
+    def _make_str_accessor(self):
+        from pandas.core.series import Series
+        from pandas.core.index import Index
+        if isinstance(self, Series) and not is_object_dtype(self.dtype):
+            # this really should exclude all series with any non-string values,
+            # but that isn't practical for performance reasons until we have a
+            # str dtype (GH 9343)
+            raise AttributeError("Can only use .str accessor with string "
+                                 "values, which use np.object_ dtype in "
+                                 "pandas")
+        elif isinstance(self, Index):
+            # see scc/inferrence.pyx which can contain string values
+            allowed_types = ('string', 'unicode', 'mixed', 'mixed-integer')
+            if self.inferred_type not in allowed_types:
+                message = ("Can only use .str accessor with string values "
+                           "(i.e. inferred_type is 'string', 'unicode' or 'mixed')")
+                raise AttributeError(message)
+            if self.nlevels > 1:
+                message = "Can only use .str accessor with Index, not MultiIndex"
+                raise AttributeError(message)
+        return StringMethods(self)
+
+    str = AccessorProperty(StringMethods, _make_str_accessor)
+
+    def _dir_additions(self):
+        return set()
+
+    def _dir_deletions(self):
+        try:
+            getattr(self, 'str')
+        except AttributeError:
+            return set(['str'])
+        return set()
diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py
@@ -6,7 +6,7 @@
 import pandas.compat as compat
 import pandas as pd
 from pandas.compat import u, StringIO
-from pandas.core.base import FrozenList, FrozenNDArray, PandasDelegate
+from pandas.core.base import FrozenList, FrozenNDArray, PandasDelegate, NoNewAttributesMixin
 import pandas.core.common as com
 from pandas.tseries.base import DatetimeIndexOpsMixin
 from pandas.util.testing import assertRaisesRegexp, assertIsInstance
@@ -825,6 +825,25 @@ def test_lookup_nan(self):
         self.assert_numpy_array_equal(m.lookup(xs), np.arange(len(xs)))
 
 
+class TestNoNewAttributesMixin(tm.TestCase):
+
+    def test_mixin(self):
+        class T(NoNewAttributesMixin):
+            pass
+
+        t = T()
+        self.assertFalse(hasattr(t, "__frozen"))
+        t.a = "test"
+        self.assertEqual(t.a, "test")
+        t._freeze()
+        #self.assertTrue("__frozen" not in dir(t))
+        self.assertIs(getattr(t, "__frozen"), True)
+        def f():
+            t.b = "test"
+        self.assertRaises(AttributeError, f)
+        self.assertFalse(hasattr(t, "b"))
+
+
 if __name__ == '__main__':
     import nose
 

diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py
@@ -3625,6 +3625,12 @@ def test_cat_accessor_api(self):
             invalid.cat
         self.assertFalse(hasattr(invalid, 'cat'))
 
+    def test_cat_accessor_no_new_attributes(self):
+        # https://github.com/pydata/pandas/issues/10673
+        c = Series(list('aabbcde')).astype('category')
+        with tm.assertRaisesRegexp(AttributeError, "You cannot add any new attribute"):
+            c.cat.xlabel = "a"
+
     def test_pickle_v0_14_1(self):
 
         # we have the name warning

diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py
@@ -247,6 +247,12 @@ def f():
                 s.dt.hour[0] = 5
             self.assertRaises(com.SettingWithCopyError, f)
 
+    def test_dt_accessor_no_new_attributes(self):
+        # https://github.com/pydata/pandas/issues/10673
+        s = Series(date_range('20130101',periods=5,freq='D'))
+        with tm.assertRaisesRegexp(AttributeError, "You cannot add any new attribute"):
+            s.dt.xlabel = "a"
+
     def test_strftime(self):
         # GH 10086
         s = Series(date_range('20130101', periods=5))

diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py
@@ -2034,6 +2034,12 @@ def test_index_str_accessor_visibility(self):
         with self.assertRaisesRegexp(AttributeError, message):
             idx.str
 
+    def test_str_accessor_no_new_attributes(self):
+        # https://github.com/pydata/pandas/issues/10673
+        s = Series(list('aabbcde'))
+        with tm.assertRaisesRegexp(AttributeError, "You cannot add any new attribute"):
+            s.str.xlabel = "a"
+
     def test_method_on_bytes(self):
         lhs = Series(np.array(list('abc'), 'S1').astype(object))
         rhs = Series(np.array(list('def'), 'S1').astype(object))

diff --git a/pandas/tseries/common.py b/pandas/tseries/common.py
@@ -1,7 +1,7 @@
 ## datetimelike delegation ##
 
 import numpy as np
-from pandas.core.base import PandasDelegate
+from pandas.core.base import PandasDelegate, NoNewAttributesMixin
 from pandas.core import common as com
 from pandas.tseries.index import DatetimeIndex
 from pandas.tseries.period import PeriodIndex
@@ -59,12 +59,13 @@ def maybe_to_datetimelike(data, copy=False):
 
     raise TypeError("cannot convert an object of type {0} to a datetimelike index".format(type(data)))
 
-class Properties(PandasDelegate):
+class Properties(PandasDelegate, NoNewAttributesMixin):
 
     def __init__(self, values, index, name):
         self.values = values
         self.index = index
         self.name = name
+        self._freeze()
 
     def _delegate_property_get(self, name):
         from pandas import Series