From f8325a4a56012ef4b3639f95cf45331cb2a5f321 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 21 Apr 2025 10:47:52 -0700 Subject: [PATCH 1/6] DEPS: Clean unused dependencies --- ci/deps/actions-310-minimum_versions.yaml | 1 - ci/deps/actions-310.yaml | 1 - ci/deps/actions-311-downstream_compat.yaml | 1 - ci/deps/actions-311.yaml | 1 - ci/deps/actions-312.yaml | 1 - doc/source/getting_started/install.rst | 1 - environment.yml | 5 +---- pandas/compat/_optional.py | 1 - pyproject.toml | 6 +----- requirements-dev.txt | 5 +---- scripts/tests/data/deps_expected_random.yaml | 1 - scripts/tests/data/deps_minimum.toml | 6 +----- scripts/tests/data/deps_unmodified_random.yaml | 1 - scripts/validate_min_versions_in_sync.py | 2 +- 14 files changed, 5 insertions(+), 28 deletions(-) diff --git a/ci/deps/actions-310-minimum_versions.yaml b/ci/deps/actions-310-minimum_versions.yaml index a8fb678970b20..286b5f5a85f07 100644 --- a/ci/deps/actions-310-minimum_versions.yaml +++ b/ci/deps/actions-310-minimum_versions.yaml @@ -26,7 +26,6 @@ dependencies: # optional dependencies - beautifulsoup4=4.12.3 - - blosc=1.21.3 - bottleneck=1.3.6 - fastparquet=2024.2.0 - fsspec=2024.2.0 diff --git a/ci/deps/actions-310.yaml b/ci/deps/actions-310.yaml index e1c7463f6432d..43049e000512f 100644 --- a/ci/deps/actions-310.yaml +++ b/ci/deps/actions-310.yaml @@ -24,7 +24,6 @@ dependencies: # optional dependencies - beautifulsoup4>=4.12.3 - - blosc>=1.21.3 - bottleneck>=1.3.6 - fastparquet>=2024.2.0 - fsspec>=2024.2.0 diff --git a/ci/deps/actions-311-downstream_compat.yaml b/ci/deps/actions-311-downstream_compat.yaml index ff8feee9dbf9a..37353532f83e4 100644 --- a/ci/deps/actions-311-downstream_compat.yaml +++ b/ci/deps/actions-311-downstream_compat.yaml @@ -25,7 +25,6 @@ dependencies: # optional dependencies - beautifulsoup4>=4.12.3 - - blosc>=1.21.3 - bottleneck>=1.3.6 - fastparquet>=2024.2.0 - fsspec>=2024.2.0 diff --git a/ci/deps/actions-311.yaml b/ci/deps/actions-311.yaml index f7d5dd75aff82..e2c7e2609cff8 100644 --- a/ci/deps/actions-311.yaml +++ b/ci/deps/actions-311.yaml @@ -24,7 +24,6 @@ dependencies: # optional dependencies - beautifulsoup4>=4.12.3 - - blosc>=1.21.3 - bottleneck>=1.3.6 - fastparquet>=2024.2.0 - fsspec>=2024.2.0 diff --git a/ci/deps/actions-312.yaml b/ci/deps/actions-312.yaml index f1d17c72da2c5..b3ba9c84c144a 100644 --- a/ci/deps/actions-312.yaml +++ b/ci/deps/actions-312.yaml @@ -24,7 +24,6 @@ dependencies: # optional dependencies - beautifulsoup4>=4.12.3 - - blosc>=1.21.3 - bottleneck>=1.3.6 - fastparquet>=2024.2.0 - fsspec>=2024.2.0 diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst index 1d651ac570d8b..8b847d82a9916 100644 --- a/doc/source/getting_started/install.rst +++ b/doc/source/getting_started/install.rst @@ -305,7 +305,6 @@ Installable with ``pip install "pandas[hdf5, parquet, feather, spss, excel]"`` Dependency Minimum Version pip extra Notes ====================================================== ================== ================ ========================================================== `PyTables `__ 3.8.0 hdf5 HDF5-based reading / writing -`blosc `__ 1.21.3 hdf5 Compression for HDF5; only available on ``conda`` `zlib `__ hdf5 Compression for HDF5 `fastparquet `__ 2024.2.0 - Parquet reading / writing (pyarrow is default) `pyarrow `__ 10.0.1 parquet, feather Parquet, ORC, and feather reading / writing diff --git a/environment.yml b/environment.yml index da0b2a012c3fc..4677614dc7858 100644 --- a/environment.yml +++ b/environment.yml @@ -27,7 +27,6 @@ dependencies: # optional dependencies - beautifulsoup4>=4.12.3 - - blosc - bottleneck>=1.3.6 - fastparquet>=2024.2.0 - fsspec>=2024.2.0 @@ -55,7 +54,7 @@ dependencies: - scipy>=1.12.0 - sqlalchemy>=2.0.0 - tabulate>=0.9.0 - - xarray>=2024.1.1, <=2024.9.0 + - xarray>=2024.1.1 - xlrd>=2.0.1 - xlsxwriter>=3.2.0 - zstandard>=0.22.0 @@ -83,8 +82,6 @@ dependencies: # documentation - gitpython # obtain contributors from git for whatsnew - - gitdb - - google-auth - natsort # DataFrame.sort_values doctest - numpydoc - pydata-sphinx-theme=0.16 diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py index 5c2e3d9b07c22..9f4615d183766 100644 --- a/pandas/compat/_optional.py +++ b/pandas/compat/_optional.py @@ -23,7 +23,6 @@ "adbc-driver-postgresql": "0.10.0", "adbc-driver-sqlite": "0.8.0", "bs4": "4.12.3", - "blosc": "1.21.3", "bottleneck": "1.3.6", "fastparquet": "2024.2.0", "fsspec": "2024.2.0", diff --git a/pyproject.toml b/pyproject.toml index 3f7b6a672e1b0..480e58b62c1d0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -68,9 +68,7 @@ gcp = ['gcsfs>=2024.2.0'] excel = ['odfpy>=1.4.1', 'openpyxl>=3.1.2', 'python-calamine>=0.1.7', 'pyxlsb>=1.0.10', 'xlrd>=2.0.1', 'xlsxwriter>=3.2.0'] parquet = ['pyarrow>=10.0.1'] feather = ['pyarrow>=10.0.1'] -hdf5 = [# blosc only available on conda (https://github.com/Blosc/python-blosc/issues/297) - #'blosc>=1.20.1', - 'tables>=3.8.0'] +hdf5 = ['tables>=3.8.0'] spss = ['pyreadstat>=1.2.6'] postgresql = ['SQLAlchemy>=2.0.0', 'psycopg2>=2.9.6', 'adbc-driver-postgresql>=0.10.0'] mysql = ['SQLAlchemy>=2.0.0', 'pymysql>=1.1.0'] @@ -85,8 +83,6 @@ timezone = ['pytz>=2023.4'] all = ['adbc-driver-postgresql>=0.10.0', 'adbc-driver-sqlite>=0.8.0', 'beautifulsoup4>=4.12.3', - # blosc only available on conda (https://github.com/Blosc/python-blosc/issues/297) - #'blosc>=1.21.3', 'bottleneck>=1.3.6', 'fastparquet>=2024.2.0', 'fsspec>=2024.2.0', diff --git a/requirements-dev.txt b/requirements-dev.txt index f16b905bcddfb..297f1778495b7 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -16,7 +16,6 @@ coverage python-dateutil numpy<3 beautifulsoup4>=4.12.3 -blosc bottleneck>=1.3.6 fastparquet>=2024.2.0 fsspec>=2024.2.0 @@ -44,7 +43,7 @@ s3fs>=2024.2.0 scipy>=1.12.0 SQLAlchemy>=2.0.0 tabulate>=0.9.0 -xarray>=2024.1.1, <=2024.9.0 +xarray>=2024.1.1 xlrd>=2.0.1 xlsxwriter>=3.2.0 zstandard>=0.22.0 @@ -58,8 +57,6 @@ mypy==1.13.0 tokenize-rt pre-commit>=4.2.0 gitpython -gitdb -google-auth natsort numpydoc pydata-sphinx-theme==0.16 diff --git a/scripts/tests/data/deps_expected_random.yaml b/scripts/tests/data/deps_expected_random.yaml index 8e85c91ead24e..d4ecd9f64a68d 100644 --- a/scripts/tests/data/deps_expected_random.yaml +++ b/scripts/tests/data/deps_expected_random.yaml @@ -23,7 +23,6 @@ dependencies: # optional dependencies - beautifulsoup4>=5.9.3 - - blosc - bottleneck>=1.3.2 - fastparquet>=0.6.3 - fsspec>=2021.07.0 diff --git a/scripts/tests/data/deps_minimum.toml b/scripts/tests/data/deps_minimum.toml index f789d5998a30c..21c269f573b3d 100644 --- a/scripts/tests/data/deps_minimum.toml +++ b/scripts/tests/data/deps_minimum.toml @@ -63,9 +63,7 @@ gcp = ['gcsfs>=2021.07.0'] excel = ['odfpy>=1.4.1', 'openpyxl>=3.0.7', 'python-calamine>=0.1.7', 'pyxlsb>=1.0.8', 'xlrd>=2.0.1', 'xlsxwriter>=1.4.3'] parquet = ['pyarrow>=7.0.0'] feather = ['pyarrow>=7.0.0'] -hdf5 = [# blosc only available on conda (https://github.com/Blosc/python-blosc/issues/297) - #'blosc>=1.20.1', - 'tables>=3.6.1'] +hdf5 = ['tables>=3.6.1'] spss = ['pyreadstat>=1.1.2'] postgresql = ['SQLAlchemy>=1.4.16', 'psycopg2>=2.8.6'] mysql = ['SQLAlchemy>=1.4.16', 'pymysql>=1.1.0'] @@ -77,8 +75,6 @@ output_formatting = ['jinja2>=3.0.0', 'tabulate>=0.8.9'] clipboard = ['PyQt5>=5.15.1', 'qtpy>=2.3.0'] compression = ['zstandard>=0.15.2'] all = ['beautifulsoup4>=5.9.3', - # blosc only available on conda (https://github.com/Blosc/python-blosc/issues/297) - #'blosc>=1.21.0', 'bottleneck>=1.3.2', 'fastparquet>=0.6.3', 'fsspec>=2021.07.0', diff --git a/scripts/tests/data/deps_unmodified_random.yaml b/scripts/tests/data/deps_unmodified_random.yaml index 5b47d45973161..4b0f4ffb51b92 100644 --- a/scripts/tests/data/deps_unmodified_random.yaml +++ b/scripts/tests/data/deps_unmodified_random.yaml @@ -23,7 +23,6 @@ dependencies: # optional dependencies - beautifulsoup4 - - blosc - bottleneck>=1.3.2 - fastparquet>=0.6.3 - fsspec>=2021.07.0 diff --git a/scripts/validate_min_versions_in_sync.py b/scripts/validate_min_versions_in_sync.py index 1001b00450354..7908aaef3d890 100755 --- a/scripts/validate_min_versions_in_sync.py +++ b/scripts/validate_min_versions_in_sync.py @@ -36,7 +36,7 @@ SETUP_PATH = pathlib.Path("pyproject.toml").resolve() YAML_PATH = pathlib.Path("ci/deps") ENV_PATH = pathlib.Path("environment.yml") -EXCLUDE_DEPS = {"tzdata", "blosc", "pyqt", "pyqt5"} +EXCLUDE_DEPS = {"tzdata", "pyqt", "pyqt5"} EXCLUSION_LIST = frozenset(["python=3.8[build=*_pypy]"]) # pandas package is not available # in pre-commit environment From f20ea839c5aa44cd0442932d6cd9462d199b2642 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 30 Apr 2025 09:13:16 -0700 Subject: [PATCH 2/6] Sync remove upper pin on xarray --- ci/deps/actions-310.yaml | 2 +- ci/deps/actions-311-downstream_compat.yaml | 2 +- ci/deps/actions-311.yaml | 2 +- ci/deps/actions-312.yaml | 2 +- ci/deps/actions-313.yaml | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/ci/deps/actions-310.yaml b/ci/deps/actions-310.yaml index 43049e000512f..5b38d7abb8540 100644 --- a/ci/deps/actions-310.yaml +++ b/ci/deps/actions-310.yaml @@ -51,7 +51,7 @@ dependencies: - scipy>=1.12.0 - sqlalchemy>=2.0.0 - tabulate>=0.9.0 - - xarray>=2024.1.1, <=2024.9.0 + - xarray>=2024.1.1 - xlrd>=2.0.1 - xlsxwriter>=3.2.0 - zstandard>=0.22.0 diff --git a/ci/deps/actions-311-downstream_compat.yaml b/ci/deps/actions-311-downstream_compat.yaml index 37353532f83e4..9fc808d2df91a 100644 --- a/ci/deps/actions-311-downstream_compat.yaml +++ b/ci/deps/actions-311-downstream_compat.yaml @@ -52,7 +52,7 @@ dependencies: - scipy>=1.12.0 - sqlalchemy>=2.0.0 - tabulate>=0.9.0 - - xarray>=2024.1.1, <=2024.9.0 + - xarray>=2024.1.1 - xlrd>=2.0.1 - xlsxwriter>=3.2.0 - zstandard>=0.22.0 diff --git a/ci/deps/actions-311.yaml b/ci/deps/actions-311.yaml index e2c7e2609cff8..9840278d22eab 100644 --- a/ci/deps/actions-311.yaml +++ b/ci/deps/actions-311.yaml @@ -51,7 +51,7 @@ dependencies: - scipy>=1.12.0 - sqlalchemy>=2.0.0 - tabulate>=0.9.0 - - xarray>=2024.1.1, <=2024.9.0 + - xarray>=2024.1.1 - xlrd>=2.0.1 - xlsxwriter>=3.2.0 - zstandard>=0.22.0 diff --git a/ci/deps/actions-312.yaml b/ci/deps/actions-312.yaml index b3ba9c84c144a..7d3d2ea1a0ec2 100644 --- a/ci/deps/actions-312.yaml +++ b/ci/deps/actions-312.yaml @@ -51,7 +51,7 @@ dependencies: - scipy>=1.12.0 - sqlalchemy>=2.0.0 - tabulate>=0.9.0 - - xarray>=2024.1.1, <=2024.9.0 + - xarray>=2024.1.1 - xlrd>=2.0.1 - xlsxwriter>=3.2.0 - zstandard>=0.22.0 diff --git a/ci/deps/actions-313.yaml b/ci/deps/actions-313.yaml index 90a337a6f70f1..3184ae9724bd3 100644 --- a/ci/deps/actions-313.yaml +++ b/ci/deps/actions-313.yaml @@ -52,7 +52,7 @@ dependencies: - scipy>=1.12.0 - sqlalchemy>=2.0.0 - tabulate>=0.9.0 - - xarray>=2024.1.1, <=2024.9.0 + - xarray>=2024.1.1 - xlrd>=2.0.1 - xlsxwriter>=3.2.0 - zstandard>=0.22.0 From d73740c810592240aaebf4394b6d6b8106ca7304 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 30 Apr 2025 09:18:33 -0700 Subject: [PATCH 3/6] Remove other dependencies unused --- ci/deps/actions-311-downstream_compat.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/ci/deps/actions-311-downstream_compat.yaml b/ci/deps/actions-311-downstream_compat.yaml index 9fc808d2df91a..5fac58193f932 100644 --- a/ci/deps/actions-311-downstream_compat.yaml +++ b/ci/deps/actions-311-downstream_compat.yaml @@ -62,14 +62,12 @@ dependencies: - cftime - dask - ipython - - geopandas-base - seaborn - scikit-learn - statsmodels - coverage - pandas-datareader - pyyaml - - py - pip: - adbc-driver-postgresql>=0.10.0 - adbc-driver-sqlite>=0.8.0 From 12b1d00622ffc8b99986da2f4478cc2df4da213a Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 5 May 2025 10:55:36 -0700 Subject: [PATCH 4/6] Fix xarray failures post xarray unpin --- pandas/tests/generic/test_to_xarray.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/pandas/tests/generic/test_to_xarray.py b/pandas/tests/generic/test_to_xarray.py index 9fe9bca8abdc9..96b2936aadd83 100644 --- a/pandas/tests/generic/test_to_xarray.py +++ b/pandas/tests/generic/test_to_xarray.py @@ -6,6 +6,7 @@ DataFrame, MultiIndex, Series, + StringDtype, date_range, ) import pandas._testing as tm @@ -51,9 +52,6 @@ def test_to_xarray_index_types(self, index_flat, df, using_infer_string): # datetimes w/tz are preserved # column names are lost expected = df.copy() - expected["f"] = expected["f"].astype( - object if not using_infer_string else "str" - ) expected.columns.name = None tm.assert_frame_equal(result.to_dataframe(), expected) @@ -88,8 +86,15 @@ def test_to_xarray_with_multiindex(self, df, using_infer_string): class TestSeriesToXArray: - def test_to_xarray_index_types(self, index_flat): + def test_to_xarray_index_types(self, index_flat, request): index = index_flat + if isinstance(index.dtype, StringDtype) and index.dtype.storage == "pyarrow": + request.applymarker( + pytest.mark.xfail( + reason="xarray calling reshape of ArrowExtensionArray", + raises=NotImplementedError, + ) + ) # MultiIndex is tested in test_to_xarray_with_multiindex from xarray import DataArray From 904bbc48ca0a00e3aa9014178480f95ab3f94d9c Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 5 May 2025 11:05:53 -0700 Subject: [PATCH 5/6] Fix downstream test --- pandas/tests/test_downstream.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py index a7b1c56ff4df2..d7398ffe259cb 100644 --- a/pandas/tests/test_downstream.py +++ b/pandas/tests/test_downstream.py @@ -103,7 +103,7 @@ def test_xarray_cftimeindex_nearest(): cftime = pytest.importorskip("cftime") xarray = pytest.importorskip("xarray") - times = xarray.cftime_range("0001", periods=2) + times = xarray.date_range("0001", periods=2, use_cftime=True) key = cftime.DatetimeGregorian(2000, 1, 1) result = times.get_indexer([key], method="nearest") expected = 1 From a63aeff9405cbe8875c1d29c4088f87a22902207 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 6 May 2025 09:36:35 -0700 Subject: [PATCH 6/6] xfail based on version --- pandas/tests/generic/test_to_xarray.py | 41 +++++++++++++------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/pandas/tests/generic/test_to_xarray.py b/pandas/tests/generic/test_to_xarray.py index 96b2936aadd83..8917e4e3f3854 100644 --- a/pandas/tests/generic/test_to_xarray.py +++ b/pandas/tests/generic/test_to_xarray.py @@ -10,8 +10,9 @@ date_range, ) import pandas._testing as tm +from pandas.util.version import Version -pytest.importorskip("xarray") +xarray = pytest.importorskip("xarray") class TestDataFrameToXArray: @@ -30,13 +31,17 @@ def df(self): } ) - def test_to_xarray_index_types(self, index_flat, df, using_infer_string): + def test_to_xarray_index_types(self, index_flat, df, request): index = index_flat # MultiIndex is tested in test_to_xarray_with_multiindex if len(index) == 0: pytest.skip("Test doesn't make sense for empty index") - - from xarray import Dataset + elif Version(xarray.__version__) <= Version("2024.9.0"): + request.applymarker( + pytest.mark.xfail( + reason="Categorical column not preserved.", + ) + ) df.index = index[:4] df.index.name = "foo" @@ -46,7 +51,7 @@ def test_to_xarray_index_types(self, index_flat, df, using_infer_string): assert len(result.coords) == 1 assert len(result.data_vars) == 8 tm.assert_almost_equal(list(result.coords.keys()), ["foo"]) - assert isinstance(result, Dataset) + assert isinstance(result, xarray.Dataset) # idempotency # datetimes w/tz are preserved @@ -56,16 +61,12 @@ def test_to_xarray_index_types(self, index_flat, df, using_infer_string): tm.assert_frame_equal(result.to_dataframe(), expected) def test_to_xarray_empty(self, df): - from xarray import Dataset - df.index.name = "foo" result = df[0:0].to_xarray() assert result.sizes["foo"] == 0 - assert isinstance(result, Dataset) + assert isinstance(result, xarray.Dataset) def test_to_xarray_with_multiindex(self, df, using_infer_string): - from xarray import Dataset - # MultiIndex df.index = MultiIndex.from_product([["a"], range(4)], names=["one", "two"]) result = df.to_xarray() @@ -74,7 +75,7 @@ def test_to_xarray_with_multiindex(self, df, using_infer_string): assert len(result.coords) == 2 assert len(result.data_vars) == 8 tm.assert_almost_equal(list(result.coords.keys()), ["one", "two"]) - assert isinstance(result, Dataset) + assert isinstance(result, xarray.Dataset) result = result.to_dataframe() expected = df.copy() @@ -88,7 +89,11 @@ def test_to_xarray_with_multiindex(self, df, using_infer_string): class TestSeriesToXArray: def test_to_xarray_index_types(self, index_flat, request): index = index_flat - if isinstance(index.dtype, StringDtype) and index.dtype.storage == "pyarrow": + if ( + isinstance(index.dtype, StringDtype) + and index.dtype.storage == "pyarrow" + and Version(xarray.__version__) > Version("2024.9.0") + ): request.applymarker( pytest.mark.xfail( reason="xarray calling reshape of ArrowExtensionArray", @@ -97,8 +102,6 @@ def test_to_xarray_index_types(self, index_flat, request): ) # MultiIndex is tested in test_to_xarray_with_multiindex - from xarray import DataArray - ser = Series(range(len(index)), index=index, dtype="int64") ser.index.name = "foo" result = ser.to_xarray() @@ -106,30 +109,26 @@ def test_to_xarray_index_types(self, index_flat, request): assert len(result) == len(index) assert len(result.coords) == 1 tm.assert_almost_equal(list(result.coords.keys()), ["foo"]) - assert isinstance(result, DataArray) + assert isinstance(result, xarray.DataArray) # idempotency tm.assert_series_equal(result.to_series(), ser) def test_to_xarray_empty(self): - from xarray import DataArray - ser = Series([], dtype=object) ser.index.name = "foo" result = ser.to_xarray() assert len(result) == 0 assert len(result.coords) == 1 tm.assert_almost_equal(list(result.coords.keys()), ["foo"]) - assert isinstance(result, DataArray) + assert isinstance(result, xarray.DataArray) def test_to_xarray_with_multiindex(self): - from xarray import DataArray - mi = MultiIndex.from_product([["a", "b"], range(3)], names=["one", "two"]) ser = Series(range(6), dtype="int64", index=mi) result = ser.to_xarray() assert len(result) == 2 tm.assert_almost_equal(list(result.coords.keys()), ["one", "two"]) - assert isinstance(result, DataArray) + assert isinstance(result, xarray.DataArray) res = result.to_series() tm.assert_series_equal(res, ser)