From 87fa219f56e15e7fe6cc6e4ff7a9aefec2f6c92d Mon Sep 17 00:00:00 2001 From: SnorfYang <68773271+snorfyang@users.noreply.github.com> Date: Thu, 27 Apr 2023 16:53:47 +0000 Subject: [PATCH 1/8] Bug: fix pre-commit problems --- .pre-commit-config.yaml | 12 ++++++------ scripts/check_test_naming.py | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 43b3699907325..6b7a48e4267e8 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -28,7 +28,7 @@ repos: types_or: [python, pyi] additional_dependencies: [black==23.1.0] - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.0.259 + rev: v0.0.263 hooks: - id: ruff args: [--exit-non-zero-on-fix] @@ -40,13 +40,13 @@ repos: pass_filenames: true require_serial: false - repo: https://github.com/codespell-project/codespell - rev: v2.2.2 + rev: v2.2.4 hooks: - id: codespell types_or: [python, rst, markdown, cython, c] additional_dependencies: [tomli] - repo: https://github.com/MarcoGorelli/cython-lint - rev: v0.12.5 + rev: v0.15.0 hooks: - id: cython-lint - id: double-quote-cython-strings @@ -79,12 +79,12 @@ repos: '--filter=-readability/casting,-runtime/int,-build/include_subdir,-readability/fn_size' ] - repo: https://github.com/pycqa/pylint - rev: v2.16.2 + rev: v3.0.0a6 hooks: - id: pylint stages: [manual] - repo: https://github.com/pycqa/pylint - rev: v2.16.2 + rev: v3.0.0a6 hooks: - id: pylint alias: redefined-outer-name @@ -104,7 +104,7 @@ repos: hooks: - id: isort - repo: https://github.com/asottile/pyupgrade - rev: v3.3.1 + rev: v3.3.2 hooks: - id: pyupgrade args: [--py38-plus] diff --git a/scripts/check_test_naming.py b/scripts/check_test_naming.py index 33890feb8692d..158cf46f264c2 100644 --- a/scripts/check_test_naming.py +++ b/scripts/check_test_naming.py @@ -118,7 +118,7 @@ def main(content: str, file: str) -> int: assert isinstance(_node, ast.FunctionDef) # help mypy should_continue = False for _file in (Path("pandas") / "tests").rglob("*.py"): - with open(os.path.join(_file)) as fd: + with open(os.path.join(_file), encoding="utf-8") as fd: _content = fd.read() if f"self.{_node.name}" in _content: should_continue = True From ef1a9fdea06b249fe431c929d6adb5c2120b10ec Mon Sep 17 00:00:00 2001 From: SnorfYang <68773271+snorfyang@users.noreply.github.com> Date: Fri, 28 Apr 2023 12:59:40 +0800 Subject: [PATCH 2/8] fix some errors --- pandas/_libs/lib.pyi | 2 +- pandas/core/frame.py | 10 ++-- pandas/core/series.py | 30 +++++------ pandas/tests/groupby/test_counting.py | 6 +-- pandas/tests/io/excel/test_readers.py | 4 +- pandas/tests/io/formats/test_format.py | 75 ++++++++++++++++++++------ pandas/tests/io/xml/test_xml.py | 6 ++- pyproject.toml | 2 + typings/numba.pyi | 1 + 9 files changed, 94 insertions(+), 42 deletions(-) diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi index 05d569f0e58eb..6dfaf6624d36f 100644 --- a/pandas/_libs/lib.pyi +++ b/pandas/_libs/lib.pyi @@ -30,7 +30,7 @@ from enum import Enum class _NoDefault(Enum): no_default = ... -no_default: Final = _NoDefault.no_default +no_default: Final = _NoDefault.no_default # noqa NoDefault = Literal[_NoDefault.no_default] i8max: int diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 04c1b18cb1af1..ede6d4e2dc985 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5140,7 +5140,7 @@ def drop( Drop columns and/or rows of MultiIndex DataFrame - >>> midx = pd.MultiIndex(levels=[['lama', 'cow', 'falcon'], + >>> midx = pd.MultiIndex(levels=[['llama', 'cow', 'falcon'], ... ['speed', 'weight', 'length']], ... codes=[[0, 0, 0, 1, 1, 1, 2, 2, 2], ... [0, 1, 2, 0, 1, 2, 0, 1, 2]]) @@ -5150,7 +5150,7 @@ def drop( ... [1, 0.8], [0.3, 0.2]]) >>> df big small - lama speed 45.0 30.0 + llama speed 45.0 30.0 weight 200.0 100.0 length 1.5 1.0 cow speed 30.0 20.0 @@ -5166,7 +5166,7 @@ def drop( >>> df.drop(index=('falcon', 'weight')) big small - lama speed 45.0 30.0 + llama speed 45.0 30.0 weight 200.0 100.0 length 1.5 1.0 cow speed 30.0 20.0 @@ -5177,7 +5177,7 @@ def drop( >>> df.drop(index='cow', columns='small') big - lama speed 45.0 + llama speed 45.0 weight 200.0 length 1.5 falcon speed 320.0 @@ -5186,7 +5186,7 @@ def drop( >>> df.drop(index='length', level=1) big small - lama speed 45.0 30.0 + llama speed 45.0 30.0 weight 200.0 100.0 cow speed 30.0 20.0 weight 250.0 150.0 diff --git a/pandas/core/series.py b/pandas/core/series.py index 9693981cc5422..a0a90dde27bb3 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2222,14 +2222,14 @@ def drop_duplicates( -------- Generate a Series with duplicated entries. - >>> s = pd.Series(['lama', 'cow', 'lama', 'beetle', 'lama', 'hippo'], + >>> s = pd.Series(['llama', 'cow', 'llama', 'beetle', 'llama', 'hippo'], ... name='animal') >>> s - 0 lama + 0 llama 1 cow - 2 lama + 2 llama 3 beetle - 4 lama + 4 llama 5 hippo Name: animal, dtype: object @@ -2238,7 +2238,7 @@ def drop_duplicates( set of duplicated entries. The default value of keep is 'first'. >>> s.drop_duplicates() - 0 lama + 0 llama 1 cow 3 beetle 5 hippo @@ -2250,7 +2250,7 @@ def drop_duplicates( >>> s.drop_duplicates(keep='last') 1 cow 3 beetle - 4 lama + 4 llama 5 hippo Name: animal, dtype: object @@ -2311,7 +2311,7 @@ def duplicated(self, keep: DropKeep = "first") -> Series: By default, for each set of duplicated values, the first occurrence is set on False and all others on True: - >>> animals = pd.Series(['lama', 'cow', 'lama', 'beetle', 'lama']) + >>> animals = pd.Series(['llama', 'cow', 'llama', 'beetle', 'llama']) >>> animals.duplicated() 0 False 1 False @@ -4859,14 +4859,14 @@ def drop( Drop 2nd level label in MultiIndex Series - >>> midx = pd.MultiIndex(levels=[['lama', 'cow', 'falcon'], + >>> midx = pd.MultiIndex(levels=[['llama', 'cow', 'falcon'], ... ['speed', 'weight', 'length']], ... codes=[[0, 0, 0, 1, 1, 1, 2, 2, 2], ... [0, 1, 2, 0, 1, 2, 0, 1, 2]]) >>> s = pd.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1, 0.3], ... index=midx) >>> s - lama speed 45.0 + llama speed 45.0 weight 200.0 length 1.2 cow speed 30.0 @@ -4878,7 +4878,7 @@ def drop( dtype: float64 >>> s.drop(labels='weight', level=1) - lama speed 45.0 + llama speed 45.0 length 1.2 cow speed 30.0 length 1.5 @@ -5045,9 +5045,9 @@ def isin(self, values) -> Series: Examples -------- - >>> s = pd.Series(['lama', 'cow', 'lama', 'beetle', 'lama', + >>> s = pd.Series(['llama', 'cow', 'llama', 'beetle', 'llama', ... 'hippo'], name='animal') - >>> s.isin(['cow', 'lama']) + >>> s.isin(['cow', 'llama']) 0 True 1 True 2 True @@ -5058,7 +5058,7 @@ def isin(self, values) -> Series: To invert the boolean values, use the ``~`` operator: - >>> ~s.isin(['cow', 'lama']) + >>> ~s.isin(['cow', 'llama']) 0 False 1 False 2 False @@ -5067,10 +5067,10 @@ def isin(self, values) -> Series: 5 True Name: animal, dtype: bool - Passing a single string as ``s.isin('lama')`` will raise an error. Use + Passing a single string as ``s.isin('llama')`` will raise an error. Use a list of one element instead: - >>> s.isin(['lama']) + >>> s.isin(['llama']) 0 True 1 False 2 True diff --git a/pandas/tests/groupby/test_counting.py b/pandas/tests/groupby/test_counting.py index 921b42efb2bb3..7e0602b7b9b55 100644 --- a/pandas/tests/groupby/test_counting.py +++ b/pandas/tests/groupby/test_counting.py @@ -257,10 +257,10 @@ def test_groupby_count_dateparseerror(self): def test_groupby_timedelta_cython_count(): df = DataFrame( - {"g": list("ab" * 2), "delt": np.arange(4).astype("timedelta64[ns]")} + {"g": list("ab" * 2), "delta": np.arange(4).astype("timedelta64[ns]")} ) - expected = Series([2, 2], index=Index(["a", "b"], name="g"), name="delt") - result = df.groupby("g").delt.count() + expected = Series([2, 2], index=Index(["a", "b"], name="g"), name="delta") + result = df.groupby("g").delta.count() tm.assert_series_equal(expected, result) diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index 37ecce84e3caa..6097f6ba39026 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -843,7 +843,9 @@ def test_bad_sheetname_raises(self, read_ext, sheet_name): def test_missing_file_raises(self, read_ext): bad_file = f"foo{read_ext}" # CI tests with other languages, translates to "No such file or directory" - match = r"(No such file or directory|没有那个文件或目录|File o directory non esistente)" + match = ( + r"(No such file or directory|没有那个文件或目录|" r"File o directory non esistente)" + ) with pytest.raises(FileNotFoundError, match=match): pd.read_excel(bad_file) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 175c2478808b9..c1ded96078ba0 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -629,7 +629,10 @@ def test_east_asian_unicode_false(self): # all col df = DataFrame( - {"a": ["あああああ", "い", "う", "えええ"], "b": ["あ", "いいい", "う", "ええええええ"]}, + { + "a": ["あああああ", "い", "う", "えええ"], + "b": ["あ", "いいい", "う", "ええええええ"], + }, index=["a", "bb", "c", "ddd"], ) expected = ( @@ -641,7 +644,10 @@ def test_east_asian_unicode_false(self): # column name df = DataFrame( - {"b": ["あ", "いいい", "う", "ええええええ"], "あああああ": [1, 222, 33333, 4]}, + { + "b": ["あ", "いいい", "う", "ええええええ"], + "あああああ": [1, 222, 33333, 4], + }, index=["a", "bb", "c", "ddd"], ) expected = ( @@ -653,7 +659,10 @@ def test_east_asian_unicode_false(self): # index df = DataFrame( - {"a": ["あああああ", "い", "う", "えええ"], "b": ["あ", "いいい", "う", "ええええええ"]}, + { + "a": ["あああああ", "い", "う", "えええ"], + "b": ["あ", "いいい", "う", "ええええええ"], + }, index=["あああ", "いいいいいい", "うう", "え"], ) expected = ( @@ -665,7 +674,10 @@ def test_east_asian_unicode_false(self): # index name df = DataFrame( - {"a": ["あああああ", "い", "う", "えええ"], "b": ["あ", "いいい", "う", "ええええええ"]}, + { + "a": ["あああああ", "い", "う", "えええ"], + "b": ["あ", "いいい", "う", "ええええええ"], + }, index=Index(["あ", "い", "うう", "え"], name="おおおお"), ) expected = ( @@ -680,7 +692,10 @@ def test_east_asian_unicode_false(self): # all df = DataFrame( - {"あああ": ["あああ", "い", "う", "えええええ"], "いいいいい": ["あ", "いいい", "う", "ええ"]}, + { + "あああ": ["あああ", "い", "う", "えええええ"], + "いいいいい": ["あ", "いいい", "う", "ええ"], + }, index=Index(["あ", "いいい", "うう", "え"], name="お"), ) expected = ( @@ -698,7 +713,10 @@ def test_east_asian_unicode_false(self): [("あ", "いい"), ("う", "え"), ("おおお", "かかかか"), ("き", "くく")] ) df = DataFrame( - {"a": ["あああああ", "い", "う", "えええ"], "b": ["あ", "いいい", "う", "ええええええ"]}, + { + "a": ["あああああ", "い", "う", "えええ"], + "b": ["あ", "いいい", "う", "ええええええ"], + }, index=idx, ) expected = ( @@ -766,7 +784,10 @@ def test_east_asian_unicode_true(self): # all col df = DataFrame( - {"a": ["あああああ", "い", "う", "えええ"], "b": ["あ", "いいい", "う", "ええええええ"]}, + { + "a": ["あああああ", "い", "う", "えええ"], + "b": ["あ", "いいい", "う", "ええええええ"], + }, index=["a", "bb", "c", "ddd"], ) expected = ( @@ -780,7 +801,10 @@ def test_east_asian_unicode_true(self): # column name df = DataFrame( - {"b": ["あ", "いいい", "う", "ええええええ"], "あああああ": [1, 222, 33333, 4]}, + { + "b": ["あ", "いいい", "う", "ええええええ"], + "あああああ": [1, 222, 33333, 4], + }, index=["a", "bb", "c", "ddd"], ) expected = ( @@ -794,7 +818,10 @@ def test_east_asian_unicode_true(self): # index df = DataFrame( - {"a": ["あああああ", "い", "う", "えええ"], "b": ["あ", "いいい", "う", "ええええええ"]}, + { + "a": ["あああああ", "い", "う", "えええ"], + "b": ["あ", "いいい", "う", "ええええええ"], + }, index=["あああ", "いいいいいい", "うう", "え"], ) expected = ( @@ -808,7 +835,10 @@ def test_east_asian_unicode_true(self): # index name df = DataFrame( - {"a": ["あああああ", "い", "う", "えええ"], "b": ["あ", "いいい", "う", "ええええええ"]}, + { + "a": ["あああああ", "い", "う", "えええ"], + "b": ["あ", "いいい", "う", "ええええええ"], + }, index=Index(["あ", "い", "うう", "え"], name="おおおお"), ) expected = ( @@ -823,7 +853,10 @@ def test_east_asian_unicode_true(self): # all df = DataFrame( - {"あああ": ["あああ", "い", "う", "えええええ"], "いいいいい": ["あ", "いいい", "う", "ええ"]}, + { + "あああ": ["あああ", "い", "う", "えええええ"], + "いいいいい": ["あ", "いいい", "う", "ええ"], + }, index=Index(["あ", "いいい", "うう", "え"], name="お"), ) expected = ( @@ -841,7 +874,10 @@ def test_east_asian_unicode_true(self): [("あ", "いい"), ("う", "え"), ("おおお", "かかかか"), ("き", "くく")] ) df = DataFrame( - {"a": ["あああああ", "い", "う", "えええ"], "b": ["あ", "いいい", "う", "ええええええ"]}, + { + "a": ["あああああ", "い", "う", "えええ"], + "b": ["あ", "いいい", "う", "ええええええ"], + }, index=idx, ) expected = ( @@ -886,7 +922,10 @@ def test_east_asian_unicode_true(self): # ambiguous unicode df = DataFrame( - {"b": ["あ", "いいい", "¡¡", "ええええええ"], "あああああ": [1, 222, 33333, 4]}, + { + "b": ["あ", "いいい", "¡¡", "ええええええ"], + "あああああ": [1, 222, 33333, 4], + }, index=["a", "bb", "c", "¡¡¡"], ) expected = ( @@ -2205,18 +2244,22 @@ def test_east_asian_unicode_series(self): # unicode index s = Series(["a", "bb", "CCC", "D"], index=["あ", "いい", "ううう", "ええええ"]) - expected = "あ a\nいい bb\nううう CCC\nええええ D\ndtype: object" + expected = ( + "あ a\nいい bb\nうう" "う CCC\nええええ D\ndtype: object" + ) assert repr(s) == expected # unicode values s = Series(["あ", "いい", "ううう", "ええええ"], index=["a", "bb", "c", "ddd"]) - expected = "a あ\nbb いい\nc ううう\nddd ええええ\ndtype: object" + expected = ( + "a あ\nbb いい\nc う" "うう\nddd ええええ\ndtype: object" + ) assert repr(s) == expected # both s = Series(["あ", "いい", "ううう", "ええええ"], index=["ああ", "いいいい", "う", "えええ"]) expected = ( - "ああ あ\nいいいい いい\nう ううう\nえええ ええええ\ndtype: object" + "ああ あ\nいいいい いい\nう うう" "う\nえええ ええええ\ndtype: object" ) assert repr(s) == expected diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py index 071bc67d2dad9..53810a7251a2d 100644 --- a/pandas/tests/io/xml/test_xml.py +++ b/pandas/tests/io/xml/test_xml.py @@ -458,7 +458,11 @@ def test_file_charset(datapath, parser): None, "答 不例 有無皆是性 所以須雙破 既分性假異 故有破不破", ], - "a": [None, "答 性執是有得 假名是無得 今破有得申無得 即是破性執申假名也", None], + "a": [ + None, + "答 性執是有得 假名是無得 今破有得申無得 即是破性執申假名也", + None, + ], } ) diff --git a/pyproject.toml b/pyproject.toml index aacf8649add35..b6d2cdfef64a9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -280,6 +280,8 @@ ignore = [ # "PYI041", # not yet implemented # Additional checks that don't pass yet + # Useless statement + "B018", # Within an except clause, raise exceptions with ... "B904", # Magic number diff --git a/typings/numba.pyi b/typings/numba.pyi index 0d9184af19a0f..f6ecb85673a77 100644 --- a/typings/numba.pyi +++ b/typings/numba.pyi @@ -1,3 +1,4 @@ +# ruff: noqa: PYI033 # pyright: reportIncompleteStub = false from typing import ( TYPE_CHECKING, From ad1480cddb5b0292d19bfde9604efe107747c082 Mon Sep 17 00:00:00 2001 From: yusharth Date: Tue, 2 May 2023 11:54:01 +0530 Subject: [PATCH 3/8] pre commit issues passed --- =1.5 | 6 +++ pandas/tests/io/excel/test_readers.py | 8 +++- pandas/tests/io/formats/test_format.py | 54 ++++++++++++++++--------- pandas/tests/io/parser/test_encoding.py | 4 +- pandas/tests/io/xml/test_xml.py | 5 ++- 5 files changed, 53 insertions(+), 24 deletions(-) create mode 100644 =1.5 diff --git a/=1.5 b/=1.5 new file mode 100644 index 0000000000000..e38f983163b95 --- /dev/null +++ b/=1.5 @@ -0,0 +1,6 @@ +Requirement already satisfied: pandas in /home/yusharthsingh/Desktop/pandas (0+untagged.32262.g3790452.dirty) +Requirement already satisfied: python-dateutil>=2.8.2 in /home/yusharthsingh/mambaforge/envs/pandas-dev/lib/python3.10/site-packages (from pandas) (2.8.2) +Requirement already satisfied: pytz>=2020.1 in /home/yusharthsingh/mambaforge/envs/pandas-dev/lib/python3.10/site-packages (from pandas) (2023.3) +Requirement already satisfied: tzdata>=2022.1 in /home/yusharthsingh/mambaforge/envs/pandas-dev/lib/python3.10/site-packages (from pandas) (2023.3) +Requirement already satisfied: numpy>=1.21.6 in /home/yusharthsingh/.local/lib/python3.10/site-packages (from pandas) (1.23.1) +Requirement already satisfied: six>=1.5 in /home/yusharthsingh/mambaforge/envs/pandas-dev/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0) diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index 6097f6ba39026..1ce97702283f2 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -843,8 +843,12 @@ def test_bad_sheetname_raises(self, read_ext, sheet_name): def test_missing_file_raises(self, read_ext): bad_file = f"foo{read_ext}" # CI tests with other languages, translates to "No such file or directory" - match = ( - r"(No such file or directory|没有那个文件或目录|" r"File o directory non esistente)" + match = "|".join( + [ + "No such file or directory", + "没有那个文件或目录", + "File o directory non esistente", + ] ) with pytest.raises(FileNotFoundError, match=match): pd.read_excel(bad_file) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index c1ded96078ba0..57a409b0ec982 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -2244,29 +2244,46 @@ def test_east_asian_unicode_series(self): # unicode index s = Series(["a", "bb", "CCC", "D"], index=["あ", "いい", "ううう", "ええええ"]) - expected = ( - "あ a\nいい bb\nうう" "う CCC\nええええ D\ndtype: object" + expected = "".join( + [ + "あ a\n", + "いい bb\n", + "ううう CCC\n", + "ええええ D\ndtype: object", + ] ) assert repr(s) == expected # unicode values s = Series(["あ", "いい", "ううう", "ええええ"], index=["a", "bb", "c", "ddd"]) - expected = ( - "a あ\nbb いい\nc う" "うう\nddd ええええ\ndtype: object" + expected = "".join( + [ + "a あ\n", + "bb いい\n", + "c ううう\n", + "ddd ええええ\n", + "dtype: object", + ] ) + assert repr(s) == expected # both - s = Series(["あ", "いい", "ううう", "ええええ"], index=["ああ", "いいいい", "う", "えええ"]) + indices = ["ああ", "いいいい", "う", "えええ"] + s = Series(["あ", "いい", "ううう", "ええええ"], index=indices) expected = ( - "ああ あ\nいいいい いい\nう うう" "う\nえええ ええええ\ndtype: object" + "ああ あ\n" + "いいいい いい\n" + "う ううう\n" + "えええ ええええ\n" + "dtype: object" ) - assert repr(s) == expected + assert repr(s) == expected + indices = ["ああ", "いいいい", "う", "えええ"] + name = "おおおおおおお" # unicode footer - s = Series( - ["あ", "いい", "ううう", "ええええ"], index=["ああ", "いいいい", "う", "えええ"], name="おおおおおおお" - ) + s = Series(["あ", "いい", "ううう", "ええええ"], index=indices, name=name) expected = ( "ああ あ\nいいいい いい\nう ううう\n" "えええ ええええ\nName: おおおおおおお, dtype: object" @@ -2327,23 +2344,24 @@ def test_east_asian_unicode_series(self): # Enable Unicode option ----------------------------------------- with option_context("display.unicode.east_asian_width", True): # unicode index - s = Series(["a", "bb", "CCC", "D"], index=["あ", "いい", "ううう", "ええええ"]) + indices = ["あ", "いい", "ううう", "ええええ"] + s = Series(["a", "bb", "CCC", "D"], index=indices) expected = ( "あ a\nいい bb\nううう CCC\n" "ええええ D\ndtype: object" ) assert repr(s) == expected - + indices = ["a", "bb", "c", "ddd"] # unicode values - s = Series(["あ", "いい", "ううう", "ええええ"], index=["a", "bb", "c", "ddd"]) + s = Series(["あ", "いい", "ううう", "ええええ"], index=indices) expected = ( "a あ\nbb いい\nc ううう\n" "ddd ええええ\ndtype: object" ) assert repr(s) == expected - + indices = ["ああ", "いいいい", "う", "えええ"] # both - s = Series(["あ", "いい", "ううう", "ええええ"], index=["ああ", "いいいい", "う", "えええ"]) + s = Series(["あ", "いい", "ううう", "ええええ"], index=indices) expected = ( "ああ あ\n" "いいいい いい\n" @@ -2420,11 +2438,9 @@ def test_east_asian_unicode_series(self): "Name: おおおおおおお, Length: 4, dtype: object" ) assert repr(s) == expected - + indices = ["ああ", "¡¡¡¡いい", "¡¡", "えええ"] # ambiguous unicode - s = Series( - ["¡¡", "い¡¡", "ううう", "ええええ"], index=["ああ", "¡¡¡¡いい", "¡¡", "えええ"] - ) + s = Series(["¡¡", "い¡¡", "ううう", "ええええ"], index=indices) expected = ( "ああ ¡¡\n" "¡¡¡¡いい い¡¡\n" diff --git a/pandas/tests/io/parser/test_encoding.py b/pandas/tests/io/parser/test_encoding.py index f537c2f0681d7..9d6fb12ecfb3c 100644 --- a/pandas/tests/io/parser/test_encoding.py +++ b/pandas/tests/io/parser/test_encoding.py @@ -226,8 +226,8 @@ def test_parse_encoded_special_characters(encoding): data = "a\tb\n:foo\t0\nbar\t1\nbaz\t2" encoded_data = BytesIO(data.encode(encoding)) result = read_csv(encoded_data, delimiter="\t", encoding=encoding) - - expected = DataFrame(data=[[":foo", 0], ["bar", 1], ["baz", 2]], columns=["a", "b"]) + column = ["a", "b"] + expected = DataFrame(data=[[":foo", 0], ["bar", 1], ["baz", 2]], columns=column) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py index 53810a7251a2d..f11a8d8b36573 100644 --- a/pandas/tests/io/xml/test_xml.py +++ b/pandas/tests/io/xml/test_xml.py @@ -454,7 +454,10 @@ def test_file_charset(datapath, parser): "問 既破性申假 亦應但破有申無 若有無兩洗 亦應性假雙破耶", ], "答": [ - "答 邪既無量 正亦多途 大略為言不出二種 謂有得與無得 有得是邪須破 無得是正須申\n\t\t故", + "答 邪既無量 正亦多途 大略為言不出二種 謂有得與無得 有得是邪須破 ", + "無得是正須申\n\t\t故", + "答 邪既無量 正亦多途 大略為言不出二種 謂有得與無得 有得是邪須破 ", + "無得是正須申\n\t\t故", None, "答 不例 有無皆是性 所以須雙破 既分性假異 故有破不破", ], From abb20806396b7d1e216529afa7e6365c12ed5a00 Mon Sep 17 00:00:00 2001 From: Yusharth Singh <57184245+yusharth@users.noreply.github.com> Date: Tue, 2 May 2023 15:01:10 +0530 Subject: [PATCH 4/8] Delete =1.5 --- =1.5 | 6 ------ 1 file changed, 6 deletions(-) delete mode 100644 =1.5 diff --git a/=1.5 b/=1.5 deleted file mode 100644 index e38f983163b95..0000000000000 --- a/=1.5 +++ /dev/null @@ -1,6 +0,0 @@ -Requirement already satisfied: pandas in /home/yusharthsingh/Desktop/pandas (0+untagged.32262.g3790452.dirty) -Requirement already satisfied: python-dateutil>=2.8.2 in /home/yusharthsingh/mambaforge/envs/pandas-dev/lib/python3.10/site-packages (from pandas) (2.8.2) -Requirement already satisfied: pytz>=2020.1 in /home/yusharthsingh/mambaforge/envs/pandas-dev/lib/python3.10/site-packages (from pandas) (2023.3) -Requirement already satisfied: tzdata>=2022.1 in /home/yusharthsingh/mambaforge/envs/pandas-dev/lib/python3.10/site-packages (from pandas) (2023.3) -Requirement already satisfied: numpy>=1.21.6 in /home/yusharthsingh/.local/lib/python3.10/site-packages (from pandas) (1.23.1) -Requirement already satisfied: six>=1.5 in /home/yusharthsingh/mambaforge/envs/pandas-dev/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0) From fa83b606daaf213f00c1d6801f0d17bd81ef631e Mon Sep 17 00:00:00 2001 From: yusharth Date: Tue, 2 May 2023 18:00:33 +0530 Subject: [PATCH 5/8] pytlint and alignment issue --- .pre-commit-config.yaml | 4 ++-- pandas/core/frame.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6b7a48e4267e8..4d131d90719e1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -79,12 +79,12 @@ repos: '--filter=-readability/casting,-runtime/int,-build/include_subdir,-readability/fn_size' ] - repo: https://github.com/pycqa/pylint - rev: v3.0.0a6 + rev: v2.16.2 hooks: - id: pylint stages: [manual] - repo: https://github.com/pycqa/pylint - rev: v3.0.0a6 + rev: v2.16.2 hooks: - id: pylint alias: redefined-outer-name diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ede6d4e2dc985..f53a7ad691d46 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5150,7 +5150,7 @@ def drop( ... [1, 0.8], [0.3, 0.2]]) >>> df big small - llama speed 45.0 30.0 + llama speed 45.0 30.0 weight 200.0 100.0 length 1.5 1.0 cow speed 30.0 20.0 @@ -5166,7 +5166,7 @@ def drop( >>> df.drop(index=('falcon', 'weight')) big small - llama speed 45.0 30.0 + llama speed 45.0 30.0 weight 200.0 100.0 length 1.5 1.0 cow speed 30.0 20.0 @@ -5177,7 +5177,7 @@ def drop( >>> df.drop(index='cow', columns='small') big - llama speed 45.0 + llama speed 45.0 weight 200.0 length 1.5 falcon speed 320.0 @@ -5186,7 +5186,7 @@ def drop( >>> df.drop(index='length', level=1) big small - llama speed 45.0 30.0 + llama speed 45.0 30.0 weight 200.0 100.0 cow speed 30.0 20.0 weight 250.0 150.0 From daa10a9c6f1e61b952de6812a7a9a3a8b75a635c Mon Sep 17 00:00:00 2001 From: SnorfYang <68773271+snorfyang@users.noreply.github.com> Date: Tue, 2 May 2023 22:34:46 +0800 Subject: [PATCH 6/8] fix errors --- pandas/tests/io/excel/test_readers.py | 4 +- pandas/tests/io/formats/test_format.py | 55 ++++++++++++++++--------- pandas/tests/io/parser/test_encoding.py | 7 +++- pandas/tests/io/xml/test_xml.py | 10 +++-- 4 files changed, 49 insertions(+), 27 deletions(-) diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index 1ce97702283f2..da0df3954b84a 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -845,9 +845,9 @@ def test_missing_file_raises(self, read_ext): # CI tests with other languages, translates to "No such file or directory" match = "|".join( [ - "No such file or directory", + "(No such file or directory", "没有那个文件或目录", - "File o directory non esistente", + "File o directory non esistente)", ] ) with pytest.raises(FileNotFoundError, match=match): diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 57a409b0ec982..e4680cca881e5 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -2269,21 +2269,28 @@ def test_east_asian_unicode_series(self): assert repr(s) == expected # both - indices = ["ああ", "いいいい", "う", "えええ"] - s = Series(["あ", "いい", "ううう", "ええええ"], index=indices) - expected = ( - "ああ あ\n" - "いいいい いい\n" - "う ううう\n" - "えええ ええええ\n" - "dtype: object" + s = Series( + ["あ", "いい", "ううう", "ええええ"], + index=["ああ", "いいいい", "う", "えええ"], + ) + expected = "".join( + [ + "ああ あ\n", + "いいいい いい\n", + "う ううう\n", + "えええ ええええ\n", + "dtype: object", + ] ) assert repr(s) == expected - indices = ["ああ", "いいいい", "う", "えええ"] - name = "おおおおおおお" + # unicode footer - s = Series(["あ", "いい", "ううう", "ええええ"], index=indices, name=name) + s = Series( + ["あ", "いい", "ううう", "ええええ"], + index=["ああ", "いいいい", "う", "えええ"], + name="おおおおおおお", + ) expected = ( "ああ あ\nいいいい いい\nう ううう\n" "えええ ええええ\nName: おおおおおおお, dtype: object" @@ -2344,24 +2351,31 @@ def test_east_asian_unicode_series(self): # Enable Unicode option ----------------------------------------- with option_context("display.unicode.east_asian_width", True): # unicode index - indices = ["あ", "いい", "ううう", "ええええ"] - s = Series(["a", "bb", "CCC", "D"], index=indices) + s = Series( + ["a", "bb", "CCC", "D"], + index=["あ", "いい", "ううう", "ええええ"], + ) expected = ( "あ a\nいい bb\nううう CCC\n" "ええええ D\ndtype: object" ) assert repr(s) == expected - indices = ["a", "bb", "c", "ddd"] + # unicode values - s = Series(["あ", "いい", "ううう", "ええええ"], index=indices) + s = Series( + ["あ", "いい", "ううう", "ええええ"], + index=["a", "bb", "c", "ddd"], + ) expected = ( "a あ\nbb いい\nc ううう\n" "ddd ええええ\ndtype: object" ) assert repr(s) == expected - indices = ["ああ", "いいいい", "う", "えええ"] # both - s = Series(["あ", "いい", "ううう", "ええええ"], index=indices) + s = Series( + ["あ", "いい", "ううう", "ええええ"], + index=["ああ", "いいいい", "う", "えええ"], + ) expected = ( "ああ あ\n" "いいいい いい\n" @@ -2438,9 +2452,12 @@ def test_east_asian_unicode_series(self): "Name: おおおおおおお, Length: 4, dtype: object" ) assert repr(s) == expected - indices = ["ああ", "¡¡¡¡いい", "¡¡", "えええ"] + # ambiguous unicode - s = Series(["¡¡", "い¡¡", "ううう", "ええええ"], index=indices) + s = Series( + ["¡¡", "い¡¡", "ううう", "ええええ"], + index=["ああ", "¡¡¡¡いい", "¡¡", "えええ"], + ) expected = ( "ああ ¡¡\n" "¡¡¡¡いい い¡¡\n" diff --git a/pandas/tests/io/parser/test_encoding.py b/pandas/tests/io/parser/test_encoding.py index 9d6fb12ecfb3c..31c7994f39058 100644 --- a/pandas/tests/io/parser/test_encoding.py +++ b/pandas/tests/io/parser/test_encoding.py @@ -226,8 +226,11 @@ def test_parse_encoded_special_characters(encoding): data = "a\tb\n:foo\t0\nbar\t1\nbaz\t2" encoded_data = BytesIO(data.encode(encoding)) result = read_csv(encoded_data, delimiter="\t", encoding=encoding) - column = ["a", "b"] - expected = DataFrame(data=[[":foo", 0], ["bar", 1], ["baz", 2]], columns=column) + + expected = DataFrame( + data=[[":foo", 0], ["bar", 1], ["baz", 2]], + columns=["a", "b"], + ) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py index f11a8d8b36573..c6a6b9eeadf5f 100644 --- a/pandas/tests/io/xml/test_xml.py +++ b/pandas/tests/io/xml/test_xml.py @@ -454,10 +454,12 @@ def test_file_charset(datapath, parser): "問 既破性申假 亦應但破有申無 若有無兩洗 亦應性假雙破耶", ], "答": [ - "答 邪既無量 正亦多途 大略為言不出二種 謂有得與無得 有得是邪須破 ", - "無得是正須申\n\t\t故", - "答 邪既無量 正亦多途 大略為言不出二種 謂有得與無得 有得是邪須破 ", - "無得是正須申\n\t\t故", + "".join( + [ + "答 邪既無量 正亦多途 大略為言不出二種 謂", + "有得與無得 有得是邪須破 無得是正須申\n\t\t故", + ] + ), None, "答 不例 有無皆是性 所以須雙破 既分性假異 故有破不破", ], From 8739dc1277741e410a15e19b7c120be18c147cfb Mon Sep 17 00:00:00 2001 From: SnorfYang <68773271+snorfyang@users.noreply.github.com> Date: Wed, 3 May 2023 00:16:13 +0800 Subject: [PATCH 7/8] fix aligned and update ruff --- .pre-commit-config.yaml | 2 +- pandas/core/series.py | 12 ++++++------ typings/numba.pyi | 1 - 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4d131d90719e1..2116bc64a5805 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -28,7 +28,7 @@ repos: types_or: [python, pyi] additional_dependencies: [black==23.1.0] - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.0.263 + rev: v0.0.264 hooks: - id: ruff args: [--exit-non-zero-on-fix] diff --git a/pandas/core/series.py b/pandas/core/series.py index a0a90dde27bb3..843c7ee337179 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2225,11 +2225,11 @@ def drop_duplicates( >>> s = pd.Series(['llama', 'cow', 'llama', 'beetle', 'llama', 'hippo'], ... name='animal') >>> s - 0 llama + 0 llama 1 cow - 2 llama + 2 llama 3 beetle - 4 llama + 4 llama 5 hippo Name: animal, dtype: object @@ -2238,7 +2238,7 @@ def drop_duplicates( set of duplicated entries. The default value of keep is 'first'. >>> s.drop_duplicates() - 0 llama + 0 llama 1 cow 3 beetle 5 hippo @@ -2250,7 +2250,7 @@ def drop_duplicates( >>> s.drop_duplicates(keep='last') 1 cow 3 beetle - 4 llama + 4 llama 5 hippo Name: animal, dtype: object @@ -4866,7 +4866,7 @@ def drop( >>> s = pd.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1, 0.3], ... index=midx) >>> s - llama speed 45.0 + llama speed 45.0 weight 200.0 length 1.2 cow speed 30.0 diff --git a/typings/numba.pyi b/typings/numba.pyi index f6ecb85673a77..0d9184af19a0f 100644 --- a/typings/numba.pyi +++ b/typings/numba.pyi @@ -1,4 +1,3 @@ -# ruff: noqa: PYI033 # pyright: reportIncompleteStub = false from typing import ( TYPE_CHECKING, From f092bff884452fac6d0951b67bef5936e545a017 Mon Sep 17 00:00:00 2001 From: Marco Edward Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Tue, 2 May 2023 20:49:25 +0100 Subject: [PATCH 8/8] Update pandas/core/series.py --- pandas/core/series.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index c14d88062b917..afa124368a29e 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4881,7 +4881,7 @@ def drop( dtype: float64 >>> s.drop(labels='weight', level=1) - llama speed 45.0 + llama speed 45.0 length 1.2 cow speed 30.0 length 1.5