Skip to content

Commit 759c9e6

Browse files
committed
Normalize unicode in help files.
1 parent 22c9e95 commit 759c9e6

File tree

1 file changed

+6
-6
lines changed

1 file changed

+6
-6
lines changed

scripts/help/cert-help-extraction.py

100644100755
Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#!/usr/bin/env python
1+
#!/usr/bin/env python3
22
import tempfile
33
import re
44
import urllib.request
@@ -12,6 +12,7 @@
1212
import sys
1313
import marko
1414
from marko.md_renderer import MarkdownRenderer
15+
import unicodedata
1516

1617
script_path = Path(__file__)
1718
# Add the shared module to the path
@@ -35,8 +36,7 @@ def soupify(url: str) -> BeautifulSoup:
3536
cache_key = m.hexdigest()
3637
cache_file = cache_path.joinpath(cache_key)
3738
if cache_file.exists():
38-
with cache_file.open() as f:
39-
content = f.read().replace(u'\xa0', u' ')
39+
content = unicodedata.normalize("NFKD", cache_file.read_text())
4040
else:
4141
resp = requests.get(url)
4242

@@ -500,12 +500,12 @@ def get_help(rule):
500500
print(f"{err.reason}: {err.stderr}")
501501
temp_qhelp_path.unlink()
502502

503-
parsed_temp_help = md.parse(temp_help_path.read_text())
503+
parsed_temp_help = md.parse(unicodedata.normalize("NFKD", temp_help_path.read_text()))
504504
# Remove the first header that is added by the QHelp to Markdown conversion
505505
del parsed_temp_help.children[0]
506506
temp_help_path.write_text(md.render(parsed_temp_help))
507507

508-
parsed_help = md.parse(help_path.read_text())
508+
parsed_help = md.parse(unicodedata.normalize("NFKD", help_path.read_text()))
509509
if find_heading(parsed_help, 'CERT'):
510510
# Check if it contains the CERT heading that needs to be replaced
511511
print(f"ID: {rule['id']} - Found heading 'CERT' whose content will be replaced")
@@ -514,7 +514,7 @@ def get_help(rule):
514514
# Otherwise update the content of every existing second level heading, note that this doesn't add headings!
515515
second_level_headings = {get_heading_text(heading) for heading in iterate_headings(parsed_temp_help) if heading.level == 2}
516516
# Check if there are any headings we don't have in our current help file. If that is the case we need to manually update that first.
517-
existing_second_level_headings = {get_heading_text(heading).replace(u'\xa0', u' ') for heading in iterate_headings(parsed_help) if heading.level == 2}
517+
existing_second_level_headings = {get_heading_text(heading) for heading in iterate_headings(parsed_help) if heading.level == 2}
518518
if not second_level_headings.issubset(existing_second_level_headings):
519519
print(f"ID: {rule['id']} - The original help is missing the header(s) '{', '.join(second_level_headings.difference(existing_second_level_headings))}'. Proceed with manually adding these in the expected location (See {temp_help_path}).")
520520
sys.exit(1)

0 commit comments

Comments
 (0)