diff --git a/doc/source/user_guide/entry_points.rst b/doc/source/user_guide/entry_points.rst new file mode 100644 index 0000000000000..1333ed77b7e1e --- /dev/null +++ b/doc/source/user_guide/entry_points.rst @@ -0,0 +1 @@ +TODO diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 8695e196c4f38..b5c22a949d9f5 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -83,6 +83,7 @@ Other enhancements - Improved deprecation message for offset aliases (:issue:`60820`) - Multiplying two :class:`DateOffset` objects will now raise a ``TypeError`` instead of a ``RecursionError`` (:issue:`59442`) - Restore support for reading Stata 104-format and enable reading 103-format dta files (:issue:`58554`) +- Support :class:`DataFrame` plugin accessor via entry points (:issue:`29076`) - Support passing a :class:`Iterable[Hashable]` input to :meth:`DataFrame.drop_duplicates` (:issue:`59237`) - Support reading Stata 102-format (Stata 1) dta files (:issue:`58978`) - Support reading Stata 110-format (Stata 7) dta files (:issue:`47176`) diff --git a/pandas/__init__.py b/pandas/__init__.py index 7d6dd7b7c1a88..6f1c6e1e7cc56 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -346,3 +346,8 @@ "unique", "wide_to_long", ] + +from pandas.core.accessor import DataFrameAccessorLoader + +DataFrameAccessorLoader.load() +del DataFrameAccessorLoader diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py index 0331c26c805b6..ba600926c3841 100644 --- a/pandas/core/accessor.py +++ b/pandas/core/accessor.py @@ -10,6 +10,7 @@ import functools from typing import ( TYPE_CHECKING, + Any, final, ) import warnings @@ -25,6 +26,8 @@ from pandas import Index from pandas.core.generic import NDFrame +from importlib.metadata import entry_points + class DirNamesMixin: _accessors: set[str] = set() @@ -393,3 +396,39 @@ def register_index_accessor(name: str) -> Callable[[TypeT], TypeT]: from pandas import Index return _register_accessor(name, Index) + + +class DataFrameAccessorLoader: + """Loader class for registering DataFrame accessors via entry points.""" + + ENTRY_POINT_GROUP: str = "pandas_dataframe_accessor" + + @classmethod + def load(cls) -> None: + """loads and registers accessors defined by 'pandas_dataframe_accessor'.""" + eps = entry_points(group=cls.ENTRY_POINT_GROUP) + names: set[str] = set() + + for ep in eps: + name: str = ep.name + + if name in names: # Verifies duplicated package names + warnings.warn( + f"Warning: you have two packages with the same name: '{name}'. " + "Uninstall the package you don't want to use " + "in order to remove this warning.\n", + UserWarning, + stacklevel=2, + ) + + else: + names.add(name) + + def make_property(ep): + def accessor(self) -> Any: + cls_ = ep.load() + return cls_(self) + + return accessor + + register_dataframe_accessor(name)(make_property(ep)) diff --git a/pandas/tests/test_plugis_entrypoint_loader.py b/pandas/tests/test_plugis_entrypoint_loader.py new file mode 100644 index 0000000000000..d292257f6cad8 --- /dev/null +++ b/pandas/tests/test_plugis_entrypoint_loader.py @@ -0,0 +1,227 @@ +import pandas as pd +import pandas._testing as tm +from pandas.core.accessor import DataFrameAccessorLoader + + +def test_no_accessors(monkeypatch): + # GH29076 + + # Mock entry_points + def mock_entry_points(*, group): + return [] + + # Patch entry_points in the correct module + monkeypatch.setattr("pandas.core.accessor.entry_points", mock_entry_points) + + DataFrameAccessorLoader.load() + + +def test_load_dataframe_accessors(monkeypatch): + # GH29076 + # Mocked EntryPoint to simulate a plugin + class MockEntryPoint: + name = "test_accessor" + + def load(self): + class TestAccessor: + def __init__(self, df): + self._df = df + + def test_method(self): + return "success" + + return TestAccessor + + # Mock entry_points + def mock_entry_points(*, group): + if group == DataFrameAccessorLoader.ENTRY_POINT_GROUP: + return [MockEntryPoint()] + return [] + + # Patch entry_points in the correct module + monkeypatch.setattr("pandas.core.accessor.entry_points", mock_entry_points) + + DataFrameAccessorLoader.load() + + # Create DataFrame and verify that the accessor was registered + df = pd.DataFrame({"a": [1, 2, 3]}) + assert hasattr(df, "test_accessor") + assert df.test_accessor.test_method() == "success" + + +def test_duplicate_accessor_names(monkeypatch): + # GH29076 + # Create plugin + class MockEntryPoint1: + name = "duplicate_accessor" + + def load(self): + class Accessor1: + def __init__(self, df): + self._df = df + + def which(self): + return "Accessor1" + + return Accessor1 + + # Create plugin + class MockEntryPoint2: + name = "duplicate_accessor" + + def load(self): + class Accessor2: + def __init__(self, df): + self._df = df + + def which(self): + return "Accessor2" + + return Accessor2 + + def mock_entry_points(*, group): + if group == DataFrameAccessorLoader.ENTRY_POINT_GROUP: + return [MockEntryPoint1(), MockEntryPoint2()] + return [] + + monkeypatch.setattr("pandas.core.accessor.entry_points", mock_entry_points) + + # Check that the UserWarning is raised + with tm.assert_produces_warning(UserWarning, match="duplicate_accessor") as record: + DataFrameAccessorLoader.load() + + messages = [str(w.message) for w in record] + assert any("two packages with the same name" in msg for msg in messages) + + df = pd.DataFrame({"x": [1, 2, 3]}) + assert hasattr(df, "duplicate_accessor") + assert df.duplicate_accessor.which() in {"Accessor1", "Accessor2"} + + +def test_unique_accessor_names(monkeypatch): + # GH29076 + # Create plugin + class MockEntryPoint1: + name = "accessor1" + + def load(self): + class Accessor1: + def __init__(self, df): + self._df = df + + def which(self): + return "Accessor1" + + return Accessor1 + + # Create plugin + class MockEntryPoint2: + name = "accessor2" + + def load(self): + class Accessor2: + def __init__(self, df): + self._df = df + + def which(self): + return "Accessor2" + + return Accessor2 + + def mock_entry_points(*, group): + if group == DataFrameAccessorLoader.ENTRY_POINT_GROUP: + return [MockEntryPoint1(), MockEntryPoint2()] + return [] + + monkeypatch.setattr("pandas.core.accessor.entry_points", mock_entry_points) + + # Check that no UserWarning is raised + with tm.assert_produces_warning(None, check_stacklevel=False): + DataFrameAccessorLoader.load() + + df = pd.DataFrame({"x": [1, 2, 3]}) + assert hasattr(df, "accessor1"), "Accessor1 not registered" + assert hasattr(df, "accessor2"), "Accessor2 not registered" + assert df.accessor1.which() == "Accessor1", "Accessor1 method incorrect" + assert df.accessor2.which() == "Accessor2", "Accessor2 method incorrect" + + +def test_duplicate_and_unique_accessor_names(monkeypatch): + # GH29076 + # Create plugin + class MockEntryPoint1: + name = "duplicate_accessor" + + def load(self): + class Accessor1: + def __init__(self, df): + self._df = df + + def which(self): + return "Accessor1" + + return Accessor1 + + # Create plugin + class MockEntryPoint2: + name = "duplicate_accessor" + + def load(self): + class Accessor2: + def __init__(self, df): + self._df = df + + def which(self): + return "Accessor2" + + return Accessor2 + + # Create plugin + class MockEntryPoint3: + name = "unique_accessor" + + def load(self): + class Accessor3: + def __init__(self, df): + self._df = df + + def which(self): + return "Accessor3" + + return Accessor3 + + def mock_entry_points(*, group): + if group == DataFrameAccessorLoader.ENTRY_POINT_GROUP: + return [MockEntryPoint1(), MockEntryPoint2(), MockEntryPoint3()] + return [] + + monkeypatch.setattr("pandas.core.accessor.entry_points", mock_entry_points) + + # Capture warnings + with tm.assert_produces_warning(UserWarning, match="duplicate_accessor") as record: + DataFrameAccessorLoader.load() + + messages = [str(w.message) for w in record] + + # Filter warnings for the specific message about duplicate packages + duplicate_package_warnings = [ + msg + for msg in messages + if "you have two packages with the same name: 'duplicate_accessor'" in msg + ] + + # Assert one warning about duplicate packages + assert len(duplicate_package_warnings) == 1, ( + f"Expected exactly one warning about duplicate packages, " + f"got {len(duplicate_package_warnings)}: {duplicate_package_warnings}" + ) + + df = pd.DataFrame({"x": [1, 2, 3]}) + assert hasattr(df, "duplicate_accessor"), "duplicate_accessor not registered" + + assert hasattr(df, "unique_accessor"), "unique_accessor not registered" + + assert df.duplicate_accessor.which() in {"Accessor1", "Accessor2"}, ( + "duplicate_accessor method incorrect" + ) + assert df.unique_accessor.which() == "Accessor3", "unique_accessor method incorrect"