Skip to content

Commit 008d86c

Browse files
committed
Updates
1 parent f219b50 commit 008d86c

File tree

3 files changed

+138
-22
lines changed

3 files changed

+138
-22
lines changed

.jenkins/custom_pandoc_filter.py

Lines changed: 74 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,34 @@
11
from pandocfilters import toJSONFilter, Div, RawBlock, Para, Str, Space, Link, Code, CodeBlock
22
import markdown
33
import re
4+
import html
5+
import markdown.inlinepatterns
6+
import os
7+
import nbformat as nbf
48

5-
def to_markdown(item):
9+
def to_markdown(item, skip_octicon=False):
10+
# A handler function to process strings, links, code, and code
11+
# blocks
612
if item['t'] == 'Str':
713
return item['c']
814
elif item['t'] == 'Space':
915
return ' '
1016
elif item['t'] == 'Link':
11-
# Assuming the link text is always in the first item
12-
return f"[{item['c'][1][0]['c']}]({item['c'][2][0]})"
17+
link_text = ''.join(to_markdown(i, skip_octicon) for i in item['c'][1])
18+
return f'<a href="{item["c"][2][0]}">{link_text}</a>'
1319
elif item['t'] == 'Code':
14-
return f"`{item['c'][1]}`"
20+
# Need to remove icticon as they don't render in .ipynb
21+
if any(value == 'octicon' for key, value in item['c'][0][2]):
22+
return ''
23+
else:
24+
# Escape the code and wrap it in <code> tags
25+
return f'<code>{html.escape(item["c"][1])}</code>'
1526
elif item['t'] == 'CodeBlock':
16-
return f"```\n{item['c'][1]}\n```"
27+
# Escape the code block and wrap it in <pre><code> tags
28+
return f'<pre><code>{html.escape(item["c"][1])}</code></pre>'
1729

1830
def process_admonitions(key, value, format, meta):
31+
# Replace admonitions with proper HTML.
1932
if key == 'Div':
2033
[[ident, classes, keyvals], contents] = value
2134
if 'note' in classes:
@@ -49,20 +62,73 @@ def process_admonitions(key, value, format, meta):
4962
html_content = markdown.markdown(note_content_md)
5063

5164
return [{'t': 'RawBlock', 'c': ['html', f'<div style="background-color: {color}; color: #fff; font-weight: 700; padding-left: 10px; padding-top: 5px; padding-bottom: 5px">{label}</div>']}, {'t': 'RawBlock', 'c': ['html', '<div style="background-color: #f3f4f7; padding-left: 10px; padding-top: 10px; padding-bottom: 10px; padding-right: 10px">']}, {'t': 'RawBlock', 'c': ['html', html_content]}, {'t': 'RawBlock', 'c': ['html', '</div>']}]
52-
5365
elif key == 'RawBlock':
66+
# this is needed for the cells that have embedded video.
67+
# We add a special tag to those: ``` {python, .jupyter-code-cell}
68+
# The post-processing script then finds those and genrates separate
69+
# code cells that can load video.
5470
[format, content] = value
5571
if format == 'html' and 'iframe' in content:
5672
# Extract the video URL
5773
video_url = content.split('src="')[1].split('"')[0]
5874
# Create the Python code to display the video
59-
html_code = f"""
75+
python_code = f"""
6076
from IPython.display import display, HTML
6177
html_code = \"""
6278
{content}
6379
\"""
6480
display(HTML(html_code))
6581
"""
6682

83+
return {'t': 'CodeBlock', 'c': [['', ['python', 'jupyter-code-cell'], []], python_code]}
84+
85+
86+
def process_images(key, value, format, meta):
87+
# Add https://pytorch.org/tutorials/ to images so that they
88+
# load correctly in the notebook.
89+
if key == 'Image':
90+
[ident, classes, keyvals], caption, [src, title] = value
91+
if not src.startswith('http'):
92+
while src.startswith('../'):
93+
src = src[3:]
94+
if src.startswith('/_static'):
95+
src = src[1:]
96+
src = 'https://pytorch.org/tutorials/' + src
97+
return {'t': 'Image', 'c': [[ident, classes, keyvals], caption, [src, title]]}
98+
99+
def process_grids(key, value, format, meta):
100+
# Generate side by side grid cards. Only for the two-cards layout
101+
# that we use in the tutorial template.
102+
if key == 'Div':
103+
[[ident, classes, keyvals], contents] = value
104+
if 'grid' in classes:
105+
columns = ['<div style="width: 45%; float: left; padding: 20px;">',
106+
'<div style="width: 45%; float: right; padding: 20px;">']
107+
column_num = 0
108+
for block in contents:
109+
if 't' in block and block['t'] == 'Div' and 'grid-item-card' in block['c'][0][1]:
110+
item_html = ''
111+
for item in block['c'][1]:
112+
if item['t'] == 'Para':
113+
item_html += '<h2>' + ''.join(to_markdown(i) for i in item['c']) + '</h2>'
114+
elif item['t'] == 'BulletList':
115+
item_html += '<ul>'
116+
for list_item in item['c']:
117+
item_html += '<li>' + ''.join(to_markdown(i) for i in list_item[0]['c']) + '</li>'
118+
item_html += '</ul>'
119+
columns[column_num] += item_html
120+
column_num = (column_num + 1) % 2
121+
columns = [column + '</div>' for column in columns]
122+
return {'t': 'RawBlock', 'c': ['html', ''.join(columns)]}
123+
124+
def is_code_block(item):
125+
return item['t'] == 'Code' and 'octicon' in item['c'][1]
126+
def process_all(key, value, format, meta):
127+
new_value = process_admonitions(key, value, format, meta)
128+
if new_value is None:
129+
new_value = process_images(key, value, format, meta)
130+
if new_value is None:
131+
new_value = process_grids(key, value, format, meta)
132+
return new_value
67133
if __name__ == "__main__":
68-
toJSONFilter(process_admonitions)
134+
toJSONFilter(process_all)

.jenkins/post_process_notebooks.py

Lines changed: 61 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,25 +2,71 @@
22
import os
33
import re
44

5+
# Pattern to search ``` {.python .jupyter-code-cell}
6+
pattern = re.compile(r'(.*?)``` {.python .jupyter-code-cell}\n\n(from IPython.display import display, HTML\nhtml_code = """\n.*?\n"""\ndisplay\(HTML\(html_code\)\))\n```(.*)', re.DOTALL)
7+
58
def get_gallery_dirs(conf_path):
6-
"""Execute the conf.py file and return the gallery directories."""
9+
"""
10+
Execute the conf.py file and return the gallery directories.
11+
This is needed to make sure the script runs through all
12+
dirs.
13+
"""
714
namespace = {}
815
exec(open(conf_path).read(), namespace)
916
sphinx_gallery_conf = namespace['sphinx_gallery_conf']
1017
print(f"Processing directories: {', '.join(sphinx_gallery_conf['gallery_dirs'])}")
1118
return sphinx_gallery_conf['gallery_dirs']
1219

13-
def process_notebook(notebook_path):
14-
"""Read and process a notebook file."""
20+
def process_video_cell(notebook_path):
21+
"""
22+
This function finds the code blocks with the
23+
"``` {.python .jupyter-code-cell}" code bocks and slices them
24+
into a separe code cell (instead of markdown) which allows to
25+
load the video in the notebook. The rest of the content is placed
26+
in a new markdown cell.
27+
"""
1528
print(f'Processing file: {notebook_path}')
1629
notebook = nbf.read(notebook_path, as_version=4)
17-
for cell in notebook.cells:
30+
31+
# Iterate over markdown cells
32+
for i, cell in enumerate(notebook.cells):
1833
if cell.cell_type == 'markdown':
19-
cell.source = process_content(cell.source)
34+
match = pattern.search(cell.source)
35+
if match:
36+
# Extract the parts before and after the video code block
37+
before_html_block = match.group(1)
38+
code_block = match.group(2)
39+
40+
# Add a comment to run the cell to display the video
41+
code_block = "# Run this cell to load the video\n" + code_block
42+
# Create a new code cell
43+
new_code_cell = nbf.v4.new_code_cell(source=code_block)
44+
45+
# Replace the original markdown cell with the part before the code block
46+
cell.source = before_html_block
47+
48+
# Insert the new code cell after the current one
49+
notebook.cells.insert(i+1, new_code_cell)
50+
51+
# If there is content after the HTML code block, create a new markdown cell
52+
if len(match.group(3).strip()) > 0:
53+
after_html_block = match.group(3)
54+
new_markdown_cell = nbf.v4.new_markdown_cell(source=after_html_block)
55+
# Create a new markdown cell and add the content after code block there
56+
notebook.cells.insert(i+2, new_markdown_cell)
57+
58+
else:
59+
# Remove ```{=html} from the code block
60+
cell.source = remove_html_tagk(cell.source)
61+
2062
nbf.write(notebook, notebook_path)
2163

22-
def process_content(content):
23-
"""Remove extra syntax from the content of a Markdown cell."""
64+
def remove_html_tag(content):
65+
"""
66+
Pandoc adds an extraneous ```{=html} ``` to raw HTML blocks which
67+
prevents it from rendering correctly. This function removes
68+
```{=html} that we don't need.
69+
"""
2470
content = re.sub(r'```{=html}\n<div', '<div', content)
2571
content = re.sub(r'">\n```', '">', content)
2672
content = re.sub(r'<\/div>\n```', '</div>\n', content)
@@ -29,18 +75,20 @@ def process_content(content):
2975
content = re.sub(r'</p>\n```', '</p>', content)
3076
return content
3177

32-
def process_directory(notebook_dir):
33-
"""Process all notebook files in a directory and its subdirectories."""
78+
def walk_dir(notebook_dir):
79+
"""
80+
Walk the dir and process all notebook files in
81+
the gallery directory and its subdirectories.
82+
"""
3483
for root, dirs, files in os.walk(notebook_dir):
3584
for filename in files:
3685
if filename.endswith('.ipynb'):
37-
process_notebook(os.path.join(root, filename))
86+
process_video_cell(os.path.join(root, filename))
3887

3988
def main():
40-
"""Main function to process all directories specified in the conf.py file."""
41-
conf_path = 'conf.py'
89+
conf_path = '../conf.py'
4290
for notebook_dir in get_gallery_dirs(conf_path):
43-
process_directory(notebook_dir)
91+
walk_dir(notebook_dir)
4492

4593
if __name__ == "__main__":
4694
main()

conf.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,9 @@
4242
import distutils.file_util
4343
import re
4444
from get_sphinx_filenames import SPHINX_SHOULD_RUN
45-
45+
import pandocfilters
46+
import pandoc
47+
import pypandoc
4648
import plotly.io as pio
4749
pio.renderers.default = 'sphinx_gallery'
4850

0 commit comments

Comments
 (0)