From 8f2670df73e2d657964fdaffec3eca318014bd4d Mon Sep 17 00:00:00 2001 From: donBarbos Date: Sun, 16 Feb 2025 06:20:10 +0400 Subject: [PATCH 01/13] Improve speed of difflib.IS_LINE_JUNK by replacing re --- Lib/difflib.py | 7 ++----- .../Library/2025-02-16-06-25-01.gh-issue-130167.kUg7Rc.rst | 2 ++ 2 files changed, 4 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-02-16-06-25-01.gh-issue-130167.kUg7Rc.rst diff --git a/Lib/difflib.py b/Lib/difflib.py index c124afdd039559..d12b0d439f27c5 100644 --- a/Lib/difflib.py +++ b/Lib/difflib.py @@ -1038,9 +1038,7 @@ def _qformat(self, aline, bline, atags, btags): # remaining is that perhaps it was really the case that " volatile" # was inserted after "private". I can live with that . -import re - -def IS_LINE_JUNK(line, pat=re.compile(r"\s*(?:#\s*)?$").match): +def IS_LINE_JUNK(line): r""" Return True for ignorable line: iff `line` is blank or contains a single '#'. @@ -1054,7 +1052,7 @@ def IS_LINE_JUNK(line, pat=re.compile(r"\s*(?:#\s*)?$").match): False """ - return pat(line) is not None + return line.strip() == "" or line.lstrip().rstrip() == "#" def IS_CHARACTER_JUNK(ch, ws=" \t"): r""" @@ -2027,7 +2025,6 @@ def make_table(self,fromlines,tolines,fromdesc='',todesc='',context=False, replace('\1',''). \ replace('\t',' ') -del re def restore(delta, which): r""" diff --git a/Misc/NEWS.d/next/Library/2025-02-16-06-25-01.gh-issue-130167.kUg7Rc.rst b/Misc/NEWS.d/next/Library/2025-02-16-06-25-01.gh-issue-130167.kUg7Rc.rst new file mode 100644 index 00000000000000..f58c416930b709 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-02-16-06-25-01.gh-issue-130167.kUg7Rc.rst @@ -0,0 +1,2 @@ +Improve speed of :func:`difflib.IS_LINE_JUNK` by replacing :mod:`re` with +built-in string methods. From 0fae9c52bce2bbe8ed0a1e6128e57f74969af12d Mon Sep 17 00:00:00 2001 From: donBarbos Date: Sun, 16 Feb 2025 07:20:53 +0400 Subject: [PATCH 02/13] Update difflib.py Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> --- Lib/difflib.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/difflib.py b/Lib/difflib.py index d12b0d439f27c5..4ba643c3aec50d 100644 --- a/Lib/difflib.py +++ b/Lib/difflib.py @@ -1038,7 +1038,7 @@ def _qformat(self, aline, bline, atags, btags): # remaining is that perhaps it was really the case that " volatile" # was inserted after "private". I can live with that . -def IS_LINE_JUNK(line): +def IS_LINE_JUNK(line, pat=None): r""" Return True for ignorable line: iff `line` is blank or contains a single '#'. From 352b4bccc1cf5fa83401c00a115eb2c7e6f47393 Mon Sep 17 00:00:00 2001 From: donBarbos Date: Sun, 16 Feb 2025 07:21:14 +0400 Subject: [PATCH 03/13] Update difflib.py Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> --- Lib/difflib.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/difflib.py b/Lib/difflib.py index 4ba643c3aec50d..e528af5945b63f 100644 --- a/Lib/difflib.py +++ b/Lib/difflib.py @@ -1052,7 +1052,7 @@ def IS_LINE_JUNK(line, pat=None): False """ - return line.strip() == "" or line.lstrip().rstrip() == "#" + return line.strip() in ('', '#') def IS_CHARACTER_JUNK(ch, ws=" \t"): r""" From 0ab8da87e9e035edb0fb50abb68e1fa2917648e6 Mon Sep 17 00:00:00 2001 From: donBarbos Date: Sun, 16 Feb 2025 15:51:50 +0400 Subject: [PATCH 04/13] Add backward compatibility --- Lib/difflib.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Lib/difflib.py b/Lib/difflib.py index e528af5945b63f..196c71ed7e2b2c 100644 --- a/Lib/difflib.py +++ b/Lib/difflib.py @@ -1052,7 +1052,9 @@ def IS_LINE_JUNK(line, pat=None): False """ - return line.strip() in ('', '#') + return (line.strip() in ('', '#') + if pat is None + else pat(line) is not None) def IS_CHARACTER_JUNK(ch, ws=" \t"): r""" From 64dff4af089707262e4c38a7fb647098f8595d50 Mon Sep 17 00:00:00 2001 From: donBarbos Date: Sun, 16 Feb 2025 18:42:30 +0400 Subject: [PATCH 05/13] Update difflib.py --- Lib/difflib.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Lib/difflib.py b/Lib/difflib.py index 196c71ed7e2b2c..a665ffecd10a96 100644 --- a/Lib/difflib.py +++ b/Lib/difflib.py @@ -1040,7 +1040,7 @@ def _qformat(self, aline, bline, atags, btags): def IS_LINE_JUNK(line, pat=None): r""" - Return True for ignorable line: iff `line` is blank or contains a single '#'. + Return True for ignorable line: if `line` is blank or contains a single '#'. Examples: @@ -1052,9 +1052,9 @@ def IS_LINE_JUNK(line, pat=None): False """ - return (line.strip() in ('', '#') - if pat is None - else pat(line) is not None) + if pat is None: + return line.strip() == '' or line.strip() == '#' + return pat(line) is not None def IS_CHARACTER_JUNK(ch, ws=" \t"): r""" From 32337c3ac65fc313ce8e5b29c194017b21407906 Mon Sep 17 00:00:00 2001 From: donBarbos Date: Sun, 16 Feb 2025 14:57:46 +0000 Subject: [PATCH 06/13] Update Lib/difflib.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Lib/difflib.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Lib/difflib.py b/Lib/difflib.py index a665ffecd10a96..7f15acce897c13 100644 --- a/Lib/difflib.py +++ b/Lib/difflib.py @@ -1053,7 +1053,8 @@ def IS_LINE_JUNK(line, pat=None): """ if pat is None: - return line.strip() == '' or line.strip() == '#' + stripped = line.strip() + return stripped == '' or stripped == '#' return pat(line) is not None def IS_CHARACTER_JUNK(ch, ws=" \t"): From 390b6e760a80442bdcc817f81c35b0333c27f02c Mon Sep 17 00:00:00 2001 From: donBarbos Date: Mon, 17 Feb 2025 11:15:41 +0400 Subject: [PATCH 07/13] Update difflib.py Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> --- Lib/difflib.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/difflib.py b/Lib/difflib.py index 7f15acce897c13..2590b7ff9998fb 100644 --- a/Lib/difflib.py +++ b/Lib/difflib.py @@ -1040,7 +1040,7 @@ def _qformat(self, aline, bline, atags, btags): def IS_LINE_JUNK(line, pat=None): r""" - Return True for ignorable line: if `line` is blank or contains a single '#'. + Return True for ignorable line: if and only if `line` is blank or contains a single '#'. Examples: From 30d4535f4d2250e3c09be8a1f34f0ceb7e14ed52 Mon Sep 17 00:00:00 2001 From: donBarbos Date: Tue, 18 Feb 2025 04:19:44 +0400 Subject: [PATCH 08/13] Back `iff` word --- Lib/difflib.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/difflib.py b/Lib/difflib.py index 2590b7ff9998fb..7f15acce897c13 100644 --- a/Lib/difflib.py +++ b/Lib/difflib.py @@ -1040,7 +1040,7 @@ def _qformat(self, aline, bline, atags, btags): def IS_LINE_JUNK(line, pat=None): r""" - Return True for ignorable line: if and only if `line` is blank or contains a single '#'. + Return True for ignorable line: if `line` is blank or contains a single '#'. Examples: From 0e7293d752be5d54cca43625b95a6afbefb61176 Mon Sep 17 00:00:00 2001 From: donBarbos Date: Wed, 19 Feb 2025 00:57:46 +0400 Subject: [PATCH 09/13] Update difflib --- Lib/difflib.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Lib/difflib.py b/Lib/difflib.py index 7f15acce897c13..2d2bb5cf31c2ec 100644 --- a/Lib/difflib.py +++ b/Lib/difflib.py @@ -1053,8 +1053,7 @@ def IS_LINE_JUNK(line, pat=None): """ if pat is None: - stripped = line.strip() - return stripped == '' or stripped == '#' + return line.strip() in ('', '#') return pat(line) is not None def IS_CHARACTER_JUNK(ch, ws=" \t"): From bcf45a36914e911a9b4512f2c408ebec9de0bbca Mon Sep 17 00:00:00 2001 From: donBarbos Date: Wed, 19 Feb 2025 01:04:59 +0400 Subject: [PATCH 10/13] Update difflib.py Co-authored-by: Tim Peters --- Lib/difflib.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/difflib.py b/Lib/difflib.py index 2d2bb5cf31c2ec..c563c683db6a24 100644 --- a/Lib/difflib.py +++ b/Lib/difflib.py @@ -1053,7 +1053,7 @@ def IS_LINE_JUNK(line, pat=None): """ if pat is None: - return line.strip() in ('', '#') + return line.strip() in '#' return pat(line) is not None def IS_CHARACTER_JUNK(ch, ws=" \t"): From 46a412681eb6d0a7c8bf6546a9ffa2669e01bf06 Mon Sep 17 00:00:00 2001 From: donBarbos Date: Mon, 28 Apr 2025 01:31:13 +0400 Subject: [PATCH 11/13] Fix News message --- .../Library/2025-02-16-06-25-01.gh-issue-130167.kUg7Rc.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Misc/NEWS.d/next/Library/2025-02-16-06-25-01.gh-issue-130167.kUg7Rc.rst b/Misc/NEWS.d/next/Library/2025-02-16-06-25-01.gh-issue-130167.kUg7Rc.rst index f58c416930b709..3d397084fc136c 100644 --- a/Misc/NEWS.d/next/Library/2025-02-16-06-25-01.gh-issue-130167.kUg7Rc.rst +++ b/Misc/NEWS.d/next/Library/2025-02-16-06-25-01.gh-issue-130167.kUg7Rc.rst @@ -1,2 +1 @@ -Improve speed of :func:`difflib.IS_LINE_JUNK` by replacing :mod:`re` with -built-in string methods. +Improve speed of :func:`difflib.IS_LINE_JUNK`. Patch by Semyon Moroz. From 60951fb01250ea8f1017e205425866407a215723 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Date: Thu, 1 May 2025 04:41:43 +0100 Subject: [PATCH 12/13] Improve clarity of expression --- Lib/difflib.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/difflib.py b/Lib/difflib.py index c563c683db6a24..6719927682aca8 100644 --- a/Lib/difflib.py +++ b/Lib/difflib.py @@ -1053,7 +1053,7 @@ def IS_LINE_JUNK(line, pat=None): """ if pat is None: - return line.strip() in '#' + return line.strip() in {'', '#'} return pat(line) is not None def IS_CHARACTER_JUNK(ch, ws=" \t"): From 9d987b466dbc6e6819ecb6c7aace539f0c203ebd Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Date: Thu, 1 May 2025 04:46:04 +0100 Subject: [PATCH 13/13] Revert, but add explanatory comments --- Lib/difflib.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Lib/difflib.py b/Lib/difflib.py index 6719927682aca8..f4b453fe77f81b 100644 --- a/Lib/difflib.py +++ b/Lib/difflib.py @@ -1053,7 +1053,10 @@ def IS_LINE_JUNK(line, pat=None): """ if pat is None: - return line.strip() in {'', '#'} + # Default: match '#' or the empty string + return line.strip() in '#' + # Previous versions used the undocumented parameter 'pat' as a + # match function. Retain this behaviour for compatibility. return pat(line) is not None def IS_CHARACTER_JUNK(ch, ws=" \t"):