diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c6c7acf5b3823..767ef62bb1758 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -176,6 +176,13 @@ repos: files: ^pandas/core/ exclude: ^pandas/core/api\.py$ types: [python] + - id: use-io-common-urlopen + name: Use pandas.io.common.urlopen instead of urllib.request.urlopen + language: python + entry: python scripts/use_io_common_urlopen.py + files: ^pandas/ + exclude: ^pandas/tests/ + types: [python] - id: no-bool-in-core-generic name: Use bool_t instead of bool in pandas/core/generic.py entry: python scripts/no_bool_in_generic.py diff --git a/doc/source/development/code_style.rst b/doc/source/development/code_style.rst deleted file mode 100644 index b15898c623aec..0000000000000 --- a/doc/source/development/code_style.rst +++ /dev/null @@ -1,31 +0,0 @@ -.. _code_style: - -{{ header }} - -======================= -pandas code style guide -======================= - -.. contents:: Table of contents: - :local: - -Patterns -======== - -We use a ``flake8`` plugin, `pandas-dev-flaker `_, to -check our codebase for unwanted patterns. See its ``README`` for the up-to-date list of rules we enforce. - -Miscellaneous -============= - -Reading from a url ------------------- - -**Good:** - -.. code-block:: python - - from pandas.io.common import urlopen - - with urlopen("http://www.google.com") as url: - raw_text = url.read() diff --git a/doc/source/development/contributing_codebase.rst b/doc/source/development/contributing_codebase.rst index 2fa6bf62ba80f..b0ba275e3d895 100644 --- a/doc/source/development/contributing_codebase.rst +++ b/doc/source/development/contributing_codebase.rst @@ -37,15 +37,14 @@ In addition to ``./ci/code_checks.sh``, some extra checks are run by ``pre-commit`` - see :ref:`here ` for how to run them. -Additional standards are outlined on the :ref:`pandas code style guide `. - .. _contributing.pre-commit: Pre-commit ---------- Additionally, :ref:`Continuous Integration ` will run code formatting checks -like ``black``, ``flake8``, ``isort``, and ``cpplint`` and more using `pre-commit hooks `_ +like ``black``, ``flake8`` (including a `pandas-dev-flaker `_ plugin), +``isort``, and ``cpplint`` and more using `pre-commit hooks `_ Any warnings from these checks will cause the :ref:`Continuous Integration ` to fail; therefore, it is helpful to run the check yourself before submitting code. This can be done by installing ``pre-commit``:: diff --git a/doc/source/development/index.rst b/doc/source/development/index.rst index fb50a88c6637f..01509705bb92c 100644 --- a/doc/source/development/index.rst +++ b/doc/source/development/index.rst @@ -16,7 +16,6 @@ Development contributing_environment contributing_documentation contributing_codebase - code_style maintaining internals test_writing diff --git a/scripts/tests/test_use_io_common_urlopen.py b/scripts/tests/test_use_io_common_urlopen.py new file mode 100644 index 0000000000000..4bba550a4cc0e --- /dev/null +++ b/scripts/tests/test_use_io_common_urlopen.py @@ -0,0 +1,23 @@ +import pytest + +from scripts.use_io_common_urlopen import use_io_common_urlopen + +PATH = "t.py" + + +def test_inconsistent_usage(capsys): + content = "from urllib.request import urlopen" + result_msg = ( + "t.py:1:0: Don't use urllib.request.urlopen, " + "use pandas.io.common.urlopen instead\n" + ) + with pytest.raises(SystemExit, match=None): + use_io_common_urlopen(content, PATH) + expected_msg, _ = capsys.readouterr() + assert result_msg == expected_msg + + +def test_consistent_usage(): + # should not raise + content = "from pandas.io.common import urlopen" + use_io_common_urlopen(content, PATH) diff --git a/scripts/use_io_common_urlopen.py b/scripts/use_io_common_urlopen.py new file mode 100644 index 0000000000000..11d8378fce574 --- /dev/null +++ b/scripts/use_io_common_urlopen.py @@ -0,0 +1,63 @@ +""" +Check that pandas/core imports pandas.array as pd_array. + +This makes it easier to grep for usage of pandas array. + +This is meant to be run as a pre-commit hook - to run it manually, you can do: + + pre-commit run use-io-common-urlopen --all-files + +""" + +from __future__ import annotations + +import argparse +import ast +import sys +from typing import Sequence + +ERROR_MESSAGE = ( + "{path}:{lineno}:{col_offset}: " + "Don't use urllib.request.urlopen, use pandas.io.common.urlopen instead\n" +) + + +class Visitor(ast.NodeVisitor): + def __init__(self, path: str) -> None: + self.path = path + + def visit_ImportFrom(self, node: ast.ImportFrom) -> None: + # Check that pandas.io.common.urlopen is used instead of + # urllib.request.urlopen + if ( + node.module is not None + and node.module.startswith("urllib.request") + and any(i.name == "urlopen" for i in node.names) + ): + msg = ERROR_MESSAGE.format( + path=self.path, lineno=node.lineno, col_offset=node.col_offset + ) + sys.stdout.write(msg) + sys.exit(1) + super().generic_visit(node) + + +def use_io_common_urlopen(content: str, path: str) -> None: + tree = ast.parse(content) + visitor = Visitor(path) + visitor.visit(tree) + + +def main(argv: Sequence[str] | None = None) -> None: + parser = argparse.ArgumentParser() + parser.add_argument("paths", nargs="*") + args = parser.parse_args(argv) + + for path in args.paths: + with open(path, encoding="utf-8") as fd: + content = fd.read() + use_io_common_urlopen(content, path) + + +if __name__ == "__main__": + main()