Skip to content

Commit 35e249f

Browse files
committed
fix: certain strange characters caused reporting to fail. #1512
It turns out that str.splitlines() will break text on some characters that file.readline() does not! Use readline() to read source files the same way that Python does.
1 parent 152cdc7 commit 35e249f

File tree

3 files changed

+38
-1
lines changed

3 files changed

+38
-1
lines changed

CHANGES.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,13 @@ Unreleased
2323
- File pattern rules were too strict, forbidding plus signs and curly braces in
2424
directory and file names. This is now fixed, closing `issue 1513`_.
2525

26+
- Unusual Unicode or control characters in source files could prevent
27+
reporting. This is now fixed, closing `issue 1512`_.
28+
2629
- The PyPy wheel now installs on PyPy 3.7, 3.8, and 3.9, closing `issue 1510`_.
2730

2831
.. _issue 1510: https://github.com/nedbat/coveragepy/issues/1510
32+
.. _issue 1512: https://github.com/nedbat/coveragepy/issues/1512
2933
.. _issue 1513: https://github.com/nedbat/coveragepy/issues/1513
3034

3135

coverage/phystokens.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
"""Better tokenizing for coverage.py."""
55

66
import ast
7+
import io
78
import keyword
89
import re
910
import token
@@ -172,7 +173,7 @@ def generate_tokens(self, text):
172173
"""A stand-in for `tokenize.generate_tokens`."""
173174
if text != self.last_text:
174175
self.last_text = text
175-
readline = iter(text.splitlines(True)).__next__
176+
readline = io.StringIO(text).readline
176177
try:
177178
self.last_tokens = list(tokenize.generate_tokens(readline))
178179
except:

tests/test_html.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -469,6 +469,38 @@ def test_formfeeds(self):
469469
formfeed_html = self.get_html_report_content("formfeed.py")
470470
assert "line_two" in formfeed_html
471471

472+
def test_splitlines_special_chars(self):
473+
# https://github.com/nedbat/coveragepy/issues/1512
474+
# See https://docs.python.org/3/library/stdtypes.html#str.splitlines for
475+
# the characters splitlines treats specially that readlines does not.
476+
477+
# I'm not exactly sure why we need the "a" strings here, but the old
478+
# code wasn't failing without them.
479+
self.make_file("splitlines_is_weird.py", """\
480+
test = {
481+
"0b": ["\x0b0"], "a1": "this is line 2",
482+
"0c": ["\x0c0"], "a2": "this is line 3",
483+
"1c": ["\x1c0"], "a3": "this is line 4",
484+
"1d": ["\x1d0"], "a4": "this is line 5",
485+
"1e": ["\x1e0"], "a5": "this is line 6",
486+
"85": ["\x850"], "a6": "this is line 7",
487+
"2028": ["\u20280"], "a7": "this is line 8",
488+
"2029": ["\u20290"], "a8": "this is line 9",
489+
}
490+
DONE = 1
491+
""")
492+
cov = coverage.Coverage()
493+
self.start_import_stop(cov, "splitlines_is_weird")
494+
cov.html_report()
495+
496+
the_html = self.get_html_report_content("splitlines_is_weird.py")
497+
assert "DONE" in the_html
498+
499+
# Check that the lines are properly decoded and reported...
500+
html_lines = the_html.split("\n")
501+
assert any(re.search(r'id="t2".*"this is line 2"', line) for line in html_lines)
502+
assert any(re.search(r'id="t9".*"this is line 9"', line) for line in html_lines)
503+
472504

473505
class HtmlTest(HtmlTestHelpers, CoverageTest):
474506
"""Moar HTML tests."""

0 commit comments

Comments
 (0)