diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 43b3699907325..2116bc64a5805 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -28,7 +28,7 @@ repos: types_or: [python, pyi] additional_dependencies: [black==23.1.0] - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.0.259 + rev: v0.0.264 hooks: - id: ruff args: [--exit-non-zero-on-fix] @@ -40,13 +40,13 @@ repos: pass_filenames: true require_serial: false - repo: https://github.com/codespell-project/codespell - rev: v2.2.2 + rev: v2.2.4 hooks: - id: codespell types_or: [python, rst, markdown, cython, c] additional_dependencies: [tomli] - repo: https://github.com/MarcoGorelli/cython-lint - rev: v0.12.5 + rev: v0.15.0 hooks: - id: cython-lint - id: double-quote-cython-strings @@ -104,7 +104,7 @@ repos: hooks: - id: isort - repo: https://github.com/asottile/pyupgrade - rev: v3.3.1 + rev: v3.3.2 hooks: - id: pyupgrade args: [--py38-plus] diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi index 05d569f0e58eb..6dfaf6624d36f 100644 --- a/pandas/_libs/lib.pyi +++ b/pandas/_libs/lib.pyi @@ -30,7 +30,7 @@ from enum import Enum class _NoDefault(Enum): no_default = ... -no_default: Final = _NoDefault.no_default +no_default: Final = _NoDefault.no_default # noqa NoDefault = Literal[_NoDefault.no_default] i8max: int diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 317d43e99af3a..636267e54027f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5139,7 +5139,7 @@ def drop( Drop columns and/or rows of MultiIndex DataFrame - >>> midx = pd.MultiIndex(levels=[['lama', 'cow', 'falcon'], + >>> midx = pd.MultiIndex(levels=[['llama', 'cow', 'falcon'], ... ['speed', 'weight', 'length']], ... codes=[[0, 0, 0, 1, 1, 1, 2, 2, 2], ... [0, 1, 2, 0, 1, 2, 0, 1, 2]]) @@ -5149,7 +5149,7 @@ def drop( ... [1, 0.8], [0.3, 0.2]]) >>> df big small - lama speed 45.0 30.0 + llama speed 45.0 30.0 weight 200.0 100.0 length 1.5 1.0 cow speed 30.0 20.0 @@ -5165,7 +5165,7 @@ def drop( >>> df.drop(index=('falcon', 'weight')) big small - lama speed 45.0 30.0 + llama speed 45.0 30.0 weight 200.0 100.0 length 1.5 1.0 cow speed 30.0 20.0 @@ -5176,7 +5176,7 @@ def drop( >>> df.drop(index='cow', columns='small') big - lama speed 45.0 + llama speed 45.0 weight 200.0 length 1.5 falcon speed 320.0 @@ -5185,7 +5185,7 @@ def drop( >>> df.drop(index='length', level=1) big small - lama speed 45.0 30.0 + llama speed 45.0 30.0 weight 200.0 100.0 cow speed 30.0 20.0 weight 250.0 150.0 diff --git a/pandas/core/series.py b/pandas/core/series.py index f4c2509b44d25..afa124368a29e 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2225,14 +2225,14 @@ def drop_duplicates( -------- Generate a Series with duplicated entries. - >>> s = pd.Series(['lama', 'cow', 'lama', 'beetle', 'lama', 'hippo'], + >>> s = pd.Series(['llama', 'cow', 'llama', 'beetle', 'llama', 'hippo'], ... name='animal') >>> s - 0 lama + 0 llama 1 cow - 2 lama + 2 llama 3 beetle - 4 lama + 4 llama 5 hippo Name: animal, dtype: object @@ -2241,7 +2241,7 @@ def drop_duplicates( set of duplicated entries. The default value of keep is 'first'. >>> s.drop_duplicates() - 0 lama + 0 llama 1 cow 3 beetle 5 hippo @@ -2253,7 +2253,7 @@ def drop_duplicates( >>> s.drop_duplicates(keep='last') 1 cow 3 beetle - 4 lama + 4 llama 5 hippo Name: animal, dtype: object @@ -2314,7 +2314,7 @@ def duplicated(self, keep: DropKeep = "first") -> Series: By default, for each set of duplicated values, the first occurrence is set on False and all others on True: - >>> animals = pd.Series(['lama', 'cow', 'lama', 'beetle', 'lama']) + >>> animals = pd.Series(['llama', 'cow', 'llama', 'beetle', 'llama']) >>> animals.duplicated() 0 False 1 False @@ -4862,14 +4862,14 @@ def drop( Drop 2nd level label in MultiIndex Series - >>> midx = pd.MultiIndex(levels=[['lama', 'cow', 'falcon'], + >>> midx = pd.MultiIndex(levels=[['llama', 'cow', 'falcon'], ... ['speed', 'weight', 'length']], ... codes=[[0, 0, 0, 1, 1, 1, 2, 2, 2], ... [0, 1, 2, 0, 1, 2, 0, 1, 2]]) >>> s = pd.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1, 0.3], ... index=midx) >>> s - lama speed 45.0 + llama speed 45.0 weight 200.0 length 1.2 cow speed 30.0 @@ -4881,7 +4881,7 @@ def drop( dtype: float64 >>> s.drop(labels='weight', level=1) - lama speed 45.0 + llama speed 45.0 length 1.2 cow speed 30.0 length 1.5 @@ -5048,9 +5048,9 @@ def isin(self, values) -> Series: Examples -------- - >>> s = pd.Series(['lama', 'cow', 'lama', 'beetle', 'lama', + >>> s = pd.Series(['llama', 'cow', 'llama', 'beetle', 'llama', ... 'hippo'], name='animal') - >>> s.isin(['cow', 'lama']) + >>> s.isin(['cow', 'llama']) 0 True 1 True 2 True @@ -5061,7 +5061,7 @@ def isin(self, values) -> Series: To invert the boolean values, use the ``~`` operator: - >>> ~s.isin(['cow', 'lama']) + >>> ~s.isin(['cow', 'llama']) 0 False 1 False 2 False @@ -5070,10 +5070,10 @@ def isin(self, values) -> Series: 5 True Name: animal, dtype: bool - Passing a single string as ``s.isin('lama')`` will raise an error. Use + Passing a single string as ``s.isin('llama')`` will raise an error. Use a list of one element instead: - >>> s.isin(['lama']) + >>> s.isin(['llama']) 0 True 1 False 2 True diff --git a/pandas/tests/groupby/test_counting.py b/pandas/tests/groupby/test_counting.py index 7e7f1a628da6e..acfb0ef657023 100644 --- a/pandas/tests/groupby/test_counting.py +++ b/pandas/tests/groupby/test_counting.py @@ -257,10 +257,10 @@ def test_groupby_count_dateparseerror(self): def test_groupby_timedelta_cython_count(): df = DataFrame( - {"g": list("ab" * 2), "delt": np.arange(4).astype("timedelta64[ns]")} + {"g": list("ab" * 2), "delta": np.arange(4).astype("timedelta64[ns]")} ) - expected = Series([2, 2], index=Index(["a", "b"], name="g"), name="delt") - result = df.groupby("g").delt.count() + expected = Series([2, 2], index=Index(["a", "b"], name="g"), name="delta") + result = df.groupby("g").delta.count() tm.assert_series_equal(expected, result) diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index 37ecce84e3caa..da0df3954b84a 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -843,7 +843,13 @@ def test_bad_sheetname_raises(self, read_ext, sheet_name): def test_missing_file_raises(self, read_ext): bad_file = f"foo{read_ext}" # CI tests with other languages, translates to "No such file or directory" - match = r"(No such file or directory|没有那个文件或目录|File o directory non esistente)" + match = "|".join( + [ + "(No such file or directory", + "没有那个文件或目录", + "File o directory non esistente)", + ] + ) with pytest.raises(FileNotFoundError, match=match): pd.read_excel(bad_file) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 175c2478808b9..e4680cca881e5 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -629,7 +629,10 @@ def test_east_asian_unicode_false(self): # all col df = DataFrame( - {"a": ["あああああ", "い", "う", "えええ"], "b": ["あ", "いいい", "う", "ええええええ"]}, + { + "a": ["あああああ", "い", "う", "えええ"], + "b": ["あ", "いいい", "う", "ええええええ"], + }, index=["a", "bb", "c", "ddd"], ) expected = ( @@ -641,7 +644,10 @@ def test_east_asian_unicode_false(self): # column name df = DataFrame( - {"b": ["あ", "いいい", "う", "ええええええ"], "あああああ": [1, 222, 33333, 4]}, + { + "b": ["あ", "いいい", "う", "ええええええ"], + "あああああ": [1, 222, 33333, 4], + }, index=["a", "bb", "c", "ddd"], ) expected = ( @@ -653,7 +659,10 @@ def test_east_asian_unicode_false(self): # index df = DataFrame( - {"a": ["あああああ", "い", "う", "えええ"], "b": ["あ", "いいい", "う", "ええええええ"]}, + { + "a": ["あああああ", "い", "う", "えええ"], + "b": ["あ", "いいい", "う", "ええええええ"], + }, index=["あああ", "いいいいいい", "うう", "え"], ) expected = ( @@ -665,7 +674,10 @@ def test_east_asian_unicode_false(self): # index name df = DataFrame( - {"a": ["あああああ", "い", "う", "えええ"], "b": ["あ", "いいい", "う", "ええええええ"]}, + { + "a": ["あああああ", "い", "う", "えええ"], + "b": ["あ", "いいい", "う", "ええええええ"], + }, index=Index(["あ", "い", "うう", "え"], name="おおおお"), ) expected = ( @@ -680,7 +692,10 @@ def test_east_asian_unicode_false(self): # all df = DataFrame( - {"あああ": ["あああ", "い", "う", "えええええ"], "いいいいい": ["あ", "いいい", "う", "ええ"]}, + { + "あああ": ["あああ", "い", "う", "えええええ"], + "いいいいい": ["あ", "いいい", "う", "ええ"], + }, index=Index(["あ", "いいい", "うう", "え"], name="お"), ) expected = ( @@ -698,7 +713,10 @@ def test_east_asian_unicode_false(self): [("あ", "いい"), ("う", "え"), ("おおお", "かかかか"), ("き", "くく")] ) df = DataFrame( - {"a": ["あああああ", "い", "う", "えええ"], "b": ["あ", "いいい", "う", "ええええええ"]}, + { + "a": ["あああああ", "い", "う", "えええ"], + "b": ["あ", "いいい", "う", "ええええええ"], + }, index=idx, ) expected = ( @@ -766,7 +784,10 @@ def test_east_asian_unicode_true(self): # all col df = DataFrame( - {"a": ["あああああ", "い", "う", "えええ"], "b": ["あ", "いいい", "う", "ええええええ"]}, + { + "a": ["あああああ", "い", "う", "えええ"], + "b": ["あ", "いいい", "う", "ええええええ"], + }, index=["a", "bb", "c", "ddd"], ) expected = ( @@ -780,7 +801,10 @@ def test_east_asian_unicode_true(self): # column name df = DataFrame( - {"b": ["あ", "いいい", "う", "ええええええ"], "あああああ": [1, 222, 33333, 4]}, + { + "b": ["あ", "いいい", "う", "ええええええ"], + "あああああ": [1, 222, 33333, 4], + }, index=["a", "bb", "c", "ddd"], ) expected = ( @@ -794,7 +818,10 @@ def test_east_asian_unicode_true(self): # index df = DataFrame( - {"a": ["あああああ", "い", "う", "えええ"], "b": ["あ", "いいい", "う", "ええええええ"]}, + { + "a": ["あああああ", "い", "う", "えええ"], + "b": ["あ", "いいい", "う", "ええええええ"], + }, index=["あああ", "いいいいいい", "うう", "え"], ) expected = ( @@ -808,7 +835,10 @@ def test_east_asian_unicode_true(self): # index name df = DataFrame( - {"a": ["あああああ", "い", "う", "えええ"], "b": ["あ", "いいい", "う", "ええええええ"]}, + { + "a": ["あああああ", "い", "う", "えええ"], + "b": ["あ", "いいい", "う", "ええええええ"], + }, index=Index(["あ", "い", "うう", "え"], name="おおおお"), ) expected = ( @@ -823,7 +853,10 @@ def test_east_asian_unicode_true(self): # all df = DataFrame( - {"あああ": ["あああ", "い", "う", "えええええ"], "いいいいい": ["あ", "いいい", "う", "ええ"]}, + { + "あああ": ["あああ", "い", "う", "えええええ"], + "いいいいい": ["あ", "いいい", "う", "ええ"], + }, index=Index(["あ", "いいい", "うう", "え"], name="お"), ) expected = ( @@ -841,7 +874,10 @@ def test_east_asian_unicode_true(self): [("あ", "いい"), ("う", "え"), ("おおお", "かかかか"), ("き", "くく")] ) df = DataFrame( - {"a": ["あああああ", "い", "う", "えええ"], "b": ["あ", "いいい", "う", "ええええええ"]}, + { + "a": ["あああああ", "い", "う", "えええ"], + "b": ["あ", "いいい", "う", "ええええええ"], + }, index=idx, ) expected = ( @@ -886,7 +922,10 @@ def test_east_asian_unicode_true(self): # ambiguous unicode df = DataFrame( - {"b": ["あ", "いいい", "¡¡", "ええええええ"], "あああああ": [1, 222, 33333, 4]}, + { + "b": ["あ", "いいい", "¡¡", "ええええええ"], + "あああああ": [1, 222, 33333, 4], + }, index=["a", "bb", "c", "¡¡¡"], ) expected = ( @@ -2205,24 +2244,52 @@ def test_east_asian_unicode_series(self): # unicode index s = Series(["a", "bb", "CCC", "D"], index=["あ", "いい", "ううう", "ええええ"]) - expected = "あ a\nいい bb\nううう CCC\nええええ D\ndtype: object" + expected = "".join( + [ + "あ a\n", + "いい bb\n", + "ううう CCC\n", + "ええええ D\ndtype: object", + ] + ) assert repr(s) == expected # unicode values s = Series(["あ", "いい", "ううう", "ええええ"], index=["a", "bb", "c", "ddd"]) - expected = "a あ\nbb いい\nc ううう\nddd ええええ\ndtype: object" + expected = "".join( + [ + "a あ\n", + "bb いい\n", + "c ううう\n", + "ddd ええええ\n", + "dtype: object", + ] + ) + assert repr(s) == expected # both - s = Series(["あ", "いい", "ううう", "ええええ"], index=["ああ", "いいいい", "う", "えええ"]) - expected = ( - "ああ あ\nいいいい いい\nう ううう\nえええ ええええ\ndtype: object" + s = Series( + ["あ", "いい", "ううう", "ええええ"], + index=["ああ", "いいいい", "う", "えええ"], + ) + expected = "".join( + [ + "ああ あ\n", + "いいいい いい\n", + "う ううう\n", + "えええ ええええ\n", + "dtype: object", + ] ) + assert repr(s) == expected # unicode footer s = Series( - ["あ", "いい", "ううう", "ええええ"], index=["ああ", "いいいい", "う", "えええ"], name="おおおおおおお" + ["あ", "いい", "ううう", "ええええ"], + index=["ああ", "いいいい", "う", "えええ"], + name="おおおおおおお", ) expected = ( "ああ あ\nいいいい いい\nう ううう\n" @@ -2284,7 +2351,10 @@ def test_east_asian_unicode_series(self): # Enable Unicode option ----------------------------------------- with option_context("display.unicode.east_asian_width", True): # unicode index - s = Series(["a", "bb", "CCC", "D"], index=["あ", "いい", "ううう", "ええええ"]) + s = Series( + ["a", "bb", "CCC", "D"], + index=["あ", "いい", "ううう", "ええええ"], + ) expected = ( "あ a\nいい bb\nううう CCC\n" "ええええ D\ndtype: object" @@ -2292,15 +2362,20 @@ def test_east_asian_unicode_series(self): assert repr(s) == expected # unicode values - s = Series(["あ", "いい", "ううう", "ええええ"], index=["a", "bb", "c", "ddd"]) + s = Series( + ["あ", "いい", "ううう", "ええええ"], + index=["a", "bb", "c", "ddd"], + ) expected = ( "a あ\nbb いい\nc ううう\n" "ddd ええええ\ndtype: object" ) assert repr(s) == expected - # both - s = Series(["あ", "いい", "ううう", "ええええ"], index=["ああ", "いいいい", "う", "えええ"]) + s = Series( + ["あ", "いい", "ううう", "ええええ"], + index=["ああ", "いいいい", "う", "えええ"], + ) expected = ( "ああ あ\n" "いいいい いい\n" @@ -2380,7 +2455,8 @@ def test_east_asian_unicode_series(self): # ambiguous unicode s = Series( - ["¡¡", "い¡¡", "ううう", "ええええ"], index=["ああ", "¡¡¡¡いい", "¡¡", "えええ"] + ["¡¡", "い¡¡", "ううう", "ええええ"], + index=["ああ", "¡¡¡¡いい", "¡¡", "えええ"], ) expected = ( "ああ ¡¡\n" diff --git a/pandas/tests/io/parser/test_encoding.py b/pandas/tests/io/parser/test_encoding.py index f537c2f0681d7..31c7994f39058 100644 --- a/pandas/tests/io/parser/test_encoding.py +++ b/pandas/tests/io/parser/test_encoding.py @@ -227,7 +227,10 @@ def test_parse_encoded_special_characters(encoding): encoded_data = BytesIO(data.encode(encoding)) result = read_csv(encoded_data, delimiter="\t", encoding=encoding) - expected = DataFrame(data=[[":foo", 0], ["bar", 1], ["baz", 2]], columns=["a", "b"]) + expected = DataFrame( + data=[[":foo", 0], ["bar", 1], ["baz", 2]], + columns=["a", "b"], + ) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py index 071bc67d2dad9..c6a6b9eeadf5f 100644 --- a/pandas/tests/io/xml/test_xml.py +++ b/pandas/tests/io/xml/test_xml.py @@ -454,11 +454,20 @@ def test_file_charset(datapath, parser): "問 既破性申假 亦應但破有申無 若有無兩洗 亦應性假雙破耶", ], "答": [ - "答 邪既無量 正亦多途 大略為言不出二種 謂有得與無得 有得是邪須破 無得是正須申\n\t\t故", + "".join( + [ + "答 邪既無量 正亦多途 大略為言不出二種 謂", + "有得與無得 有得是邪須破 無得是正須申\n\t\t故", + ] + ), None, "答 不例 有無皆是性 所以須雙破 既分性假異 故有破不破", ], - "a": [None, "答 性執是有得 假名是無得 今破有得申無得 即是破性執申假名也", None], + "a": [ + None, + "答 性執是有得 假名是無得 今破有得申無得 即是破性執申假名也", + None, + ], } ) diff --git a/pyproject.toml b/pyproject.toml index d6828159d443e..13362e998a57a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -280,6 +280,8 @@ ignore = [ # "PYI041", # not yet implemented # Additional checks that don't pass yet + # Useless statement + "B018", # Within an except clause, raise exceptions with ... "B904", # Magic number diff --git a/scripts/check_test_naming.py b/scripts/check_test_naming.py index 33890feb8692d..158cf46f264c2 100644 --- a/scripts/check_test_naming.py +++ b/scripts/check_test_naming.py @@ -118,7 +118,7 @@ def main(content: str, file: str) -> int: assert isinstance(_node, ast.FunctionDef) # help mypy should_continue = False for _file in (Path("pandas") / "tests").rglob("*.py"): - with open(os.path.join(_file)) as fd: + with open(os.path.join(_file), encoding="utf-8") as fd: _content = fd.read() if f"self.{_node.name}" in _content: should_continue = True