Skip to content

BUG: Regression in v2.2.x Pandas DataFrame Raises a __deepcopy__ error when class placed in attrs #57859

Open
@achapkowski

Description

@achapkowski

Pandas version checks

  • I have checked that this issue has not already been reported.

  • I have confirmed this bug exists on the latest version of pandas.

  • I have confirmed this bug exists on the main branch of pandas.

Reproducible Example

import pandas as pd


import json as _ujson
from collections import OrderedDict
from collections.abc import MutableMapping, Mapping


###########################################################################
class InsensitiveDict(MutableMapping):
    """
    A case-insensitive ``dict`` like object used to update and alter JSON

    A varients of a case-less dictionary that allows for dot and bracket notation.
    """

    # ----------------------------------------------------------------------
    def __init__(self, data=None):
        self._store = OrderedDict()  #
        if data is None:
            data = {}

        self.update(data)
        self._to_isd(self)

    # ----------------------------------------------------------------------
    def __repr__(self):
        return str(dict(self.items()))

    # ----------------------------------------------------------------------
    def __str__(self):
        return str(dict(self.items()))

    # ----------------------------------------------------------------------
    def __setitem__(self, key, value):
        if str(key).lower() in {"_store"}:
            super(InsensitiveDict, self).__setattr__(key, value)
        else:
            if isinstance(value, dict):
                self._store[str(key).lower()] = (key, InsensitiveDict(value))
            else:
                self._store[str(key).lower()] = (key, value)

    # ----------------------------------------------------------------------
    def __getitem__(self, key):
        return self._store[str(key).lower()][1]

    # ----------------------------------------------------------------------
    def __delitem__(self, key):
        del self._store[str(key).lower()]

    # ----------------------------------------------------------------------
    def __getattr__(self, key):
        if str(key).lower() in {"_store"}:
            return self._store
        else:
            return self._store[str(key).lower()][1]

    # ----------------------------------------------------------------------
    def __setattr__(self, key, value):
        if str(key).lower() in {"_store"}:
            super(InsensitiveDict, self).__setattr__(key, value)
        else:
            if isinstance(value, dict):
                self._store[str(key).lower()] = (key, InsensitiveDict(value))
            else:
                self._store[str(key).lower()] = (key, value)

    # ----------------------------------------------------------------------
    def __dir__(self):
        return self.keys()

    # ----------------------------------------------------------------------
    def __iter__(self):
        return (casedkey for casedkey, mappedvalue in self._store.values())

    # ----------------------------------------------------------------------
    def __len__(self):
        return len(self._store)

    # ----------------------------------------------------------------------
    def _lower_items(self):
        """Like iteritems(), but with all lowercase keys."""
        return (
            (lowerkey, keyval[1])
            for (lowerkey, keyval) in self._store.items()
        )

    # ----------------------------------------------------------------------
    def __setstate__(self, d):
        """unpickle support"""
        self.__dict__.update(InsensitiveDict(d).__dict__)
        self = InsensitiveDict(d)

    # ----------------------------------------------------------------------
    def __getstate__(self):
        """pickle support"""
        return _ujson.loads(self.json)

    # ----------------------------------------------------------------------
    @classmethod
    def from_dict(cls, o):
        """Converts dict to a InsensitiveDict"""
        return cls(o)

    # ----------------------------------------------------------------------
    @classmethod
    def from_json(cls, o):
        """Converts JSON string to a InsensitiveDict"""
        if isinstance(o, str):
            o = _ujson.loads(o)
            return InsensitiveDict(o)
        return InsensitiveDict.from_dict(o)

    # ----------------------------------------------------------------------
    def __eq__(self, other):
        if isinstance(other, Mapping):
            other = InsensitiveDict(other)
        else:
            return NotImplemented
        return dict(self._lower_items()) == dict(other._lower_items())

    # ----------------------------------------------------------------------
    def copy(self):
        return InsensitiveDict(self._store.values())

    # ---------------------------------------------------------------------
    def _to_isd(self, data):
        """converts a dictionary from InsensitiveDict to a dictionary"""
        for k, v in data.items():
            if isinstance(v, (dict, InsensitiveDict)):
                data[k] = self._to_isd(v)
            elif isinstance(v, (list, tuple)):
                l = []
                for i in v:
                    if isinstance(i, dict):
                        l.append(InsensitiveDict(i))
                    else:
                        l.append(i)
                if isinstance(v, tuple):
                    l = tuple(l)
                data[k] = l
        return data

    # ---------------------------------------------------------------------
    def _json(self):
        """converts an InsensitiveDict to a dictionary"""
        d = {}
        for k, v in self.items():
            if isinstance(v, InsensitiveDict):
                d[k] = v._json()
            elif type(v) in (list, tuple):
                l = []
                for i in v:
                    if isinstance(i, InsensitiveDict):
                        l.append(i._json())
                    else:
                        l.append(i)
                if type(v) is tuple:
                    v = tuple(l)
                else:
                    v = l
                d[k] = v
            else:
                d[k] = v  # not list, tuple, or InsensitiveDict
        return d

    # ----------------------------------------------------------------------
    @property
    def json(self):
        """returns the value as JSON String"""
        o = self._json()  # dict(self.copy())
        return _ujson.dumps(dict(o))


data = {
    "A": [1, 2, 3],
    "B": ['a', 'b', None],
}

df = pd.DataFrame(data=data)
df.attrs["metadata"] = InsensitiveDict(data)

print(df.head()) # error here

Issue Description

In Pandas 2.1 and below, we could put whatever we wanted in the attrs and the data would not throw a __deepcopy__ error. We would like it if the attrs didn't require information to be deep copied like previous version.

https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.attrs.html

Expected Behavior

When working with data and you have any custom class that doesn't implement __deepcopy__, the pandas data is essentially useless on any operation.

Installed Versions

INSTALLED VERSIONS

commit : bdc79c1
python : 3.9.18.final.0
python-bits : 64
OS : Windows
OS-release : 10
Version : 10.0.19045
machine : AMD64
processor : Intel64 Family 6 Model 154 Stepping 3, GenuineIntel
byteorder : little
LC_ALL : None
LANG : None
LOCALE : English_United States.1252

pandas : 2.2.1
numpy : 1.23.5
pytz : 2023.3
dateutil : 2.8.2
setuptools : 67.6.1
pip : 23.3.1
Cython : 0.29.34
pytest : None
hypothesis : 6.72.0
sphinx : None
blosc : None
feather : None
xlsxwriter : 3.1.0
lxml.etree : 4.9.2
html5lib : 1.1
pymysql : 1.0.3
psycopg2 : 2.9.6
jinja2 : 3.1.2
IPython : 8.18.1
pandas_datareader : None
adbc-driver-postgresql: None
adbc-driver-sqlite : None
bs4 : 4.12.2
bottleneck : 1.3.7
dataframe-api-compat : None
fastparquet : 2023.2.0
fsspec : 2023.4.0
gcsfs : 2023.4.0
matplotlib : 3.7.1
numba : 0.56.4
numexpr : 2.8.4
odfpy : None
openpyxl : 3.1.2
pandas_gbq : 0.19.1
pyarrow : 15.0.1
pyreadstat : 1.2.1
python-calamine : None
pyxlsb : 1.0.10
s3fs : None
scipy : 1.10.1
sqlalchemy : 2.0.9
tables : 3.8.0
tabulate : 0.9.0
xarray : 2023.4.1
xlrd : 2.0.1
zstandard : 0.21.0
tzdata : 2023.3
qtpy : None
pyqt5 : None

Metadata

Metadata

Assignees

No one assigned

    Labels

    BugNeeds TriageIssue that has not been reviewed by a pandas team member

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions