Updates

svekars · svekars · commit 008d86cc1978 · 2024-03-08T09:53:56.000-08:00
diff --git a/.jenkins/custom_pandoc_filter.py b/.jenkins/custom_pandoc_filter.py
@@ -1,21 +1,34 @@
 from pandocfilters import toJSONFilter, Div, RawBlock, Para, Str, Space, Link, Code, CodeBlock
 import markdown
 import re
+import html
+import markdown.inlinepatterns
+import os
+import nbformat as nbf
 
-def to_markdown(item):
+def to_markdown(item, skip_octicon=False):
+    # A handler function to process strings, links, code, and code
+    # blocks
     if item['t'] == 'Str':
         return item['c']
     elif item['t'] == 'Space':
         return ' '
     elif item['t'] == 'Link':
-        # Assuming the link text is always in the first item
-        return f"[{item['c'][1][0]['c']}]({item['c'][2][0]})"
+        link_text = ''.join(to_markdown(i, skip_octicon) for i in item['c'][1])
+        return f'<a href="{item["c"][2][0]}">{link_text}</a>'
     elif item['t'] == 'Code':
-        return f"`{item['c'][1]}`"
+        # Need to remove icticon as they don't render in .ipynb
+        if any(value == 'octicon' for key, value in item['c'][0][2]):
+            return ''
+        else:
+            # Escape the code and wrap it in <code> tags
+            return f'<code>{html.escape(item["c"][1])}</code>'
     elif item['t'] == 'CodeBlock':
-        return f"```\n{item['c'][1]}\n```"
+        # Escape the code block and wrap it in <pre><code> tags
+        return f'<pre><code>{html.escape(item["c"][1])}</code></pre>'
 
 def process_admonitions(key, value, format, meta):
+    # Replace admonitions with proper HTML.
     if key == 'Div':
         [[ident, classes, keyvals], contents] = value
         if 'note' in classes:
@@ -49,20 +62,73 @@ def process_admonitions(key, value, format, meta):
         html_content = markdown.markdown(note_content_md)
 
         return [{'t': 'RawBlock', 'c': ['html', f'<div style="background-color: {color}; color: #fff; font-weight: 700; padding-left: 10px; padding-top: 5px; padding-bottom: 5px">{label}</div>']}, {'t': 'RawBlock', 'c': ['html', '<div style="background-color: #f3f4f7; padding-left: 10px; padding-top: 10px; padding-bottom: 10px; padding-right: 10px">']}, {'t': 'RawBlock', 'c': ['html', html_content]}, {'t': 'RawBlock', 'c': ['html', '</div>']}]
-
     elif key == 'RawBlock':
+    # this is needed for the cells that have embedded video.
+    # We add a special tag to those: ``` {python, .jupyter-code-cell}
+    # The post-processing script then finds those and genrates separate
+    # code cells that can load video.
         [format, content] = value
         if format == 'html' and 'iframe' in content:
             # Extract the video URL
             video_url = content.split('src="')[1].split('"')[0]
             # Create the Python code to display the video
-            html_code = f"""
+            python_code = f"""
 from IPython.display import display, HTML
 html_code = \"""
 {content}
 \"""
 display(HTML(html_code))
 """
 
+            return {'t': 'CodeBlock', 'c': [['', ['python', 'jupyter-code-cell'], []], python_code]}
+
+
+def process_images(key, value, format, meta):
+    # Add https://pytorch.org/tutorials/ to images so that they
+    # load correctly in the notebook.
+    if key == 'Image':
+        [ident, classes, keyvals], caption, [src, title] = value
+        if not src.startswith('http'):
+            while src.startswith('../'):
+                src = src[3:]
+            if src.startswith('/_static'):
+                src = src[1:]
+            src = 'https://pytorch.org/tutorials/' + src
+        return {'t': 'Image', 'c': [[ident, classes, keyvals], caption, [src, title]]}
+
+def process_grids(key, value, format, meta):
+    # Generate side by side grid cards. Only for the two-cards layout
+    # that we use in the tutorial template.
+    if key == 'Div':
+        [[ident, classes, keyvals], contents] = value
+        if 'grid' in classes:
+            columns = ['<div style="width: 45%; float: left; padding: 20px;">',
+                       '<div style="width: 45%; float: right; padding: 20px;">']
+            column_num = 0
+            for block in contents:
+                if 't' in block and block['t'] == 'Div' and 'grid-item-card' in block['c'][0][1]:
+                    item_html = ''
+                    for item in block['c'][1]:
+                        if item['t'] == 'Para':
+                            item_html += '<h2>' + ''.join(to_markdown(i) for i in item['c']) + '</h2>'
+                        elif item['t'] == 'BulletList':
+                            item_html += '<ul>'
+                            for list_item in item['c']:
+                                item_html += '<li>' + ''.join(to_markdown(i) for i in list_item[0]['c']) + '</li>'
+                            item_html += '</ul>'
+                    columns[column_num] += item_html
+                    column_num = (column_num + 1) % 2
+            columns = [column + '</div>' for column in columns]
+            return {'t': 'RawBlock', 'c': ['html', ''.join(columns)]}
+
+def is_code_block(item):
+    return item['t'] == 'Code' and 'octicon' in item['c'][1]
+def process_all(key, value, format, meta):
+    new_value = process_admonitions(key, value, format, meta)
+    if new_value is None:
+        new_value = process_images(key, value, format, meta)
+    if new_value is None:
+        new_value = process_grids(key, value, format, meta)
+    return new_value
 if __name__ == "__main__":
-    toJSONFilter(process_admonitions)
+    toJSONFilter(process_all)
diff --git a/.jenkins/post_process_notebooks.py b/.jenkins/post_process_notebooks.py
@@ -2,25 +2,71 @@
 import os
 import re
 
+# Pattern to search ``` {.python .jupyter-code-cell}
+pattern = re.compile(r'(.*?)``` {.python .jupyter-code-cell}\n\n(from IPython.display import display, HTML\nhtml_code = """\n.*?\n"""\ndisplay\(HTML\(html_code\)\))\n```(.*)', re.DOTALL)
+
 def get_gallery_dirs(conf_path):
-    """Execute the conf.py file and return the gallery directories."""
+    """
+    Execute the conf.py file and return the gallery directories.
+    This is needed to make sure the script runs through all
+    dirs.
+    """
     namespace = {}
     exec(open(conf_path).read(), namespace)
     sphinx_gallery_conf = namespace['sphinx_gallery_conf']
     print(f"Processing directories: {', '.join(sphinx_gallery_conf['gallery_dirs'])}")
     return sphinx_gallery_conf['gallery_dirs']
 
-def process_notebook(notebook_path):
-    """Read and process a notebook file."""
+def process_video_cell(notebook_path):
+    """
+    This function finds the code blocks with the
+    "``` {.python .jupyter-code-cell}" code bocks and slices them
+    into a separe code cell (instead of markdown) which allows to
+    load the video in the notebook. The rest of the content is placed
+    in a new markdown cell.
+    """
     print(f'Processing file: {notebook_path}')
     notebook = nbf.read(notebook_path, as_version=4)
-    for cell in notebook.cells:
+
+    # Iterate over markdown cells
+    for i, cell in enumerate(notebook.cells):
         if cell.cell_type == 'markdown':
-            cell.source = process_content(cell.source)
+            match = pattern.search(cell.source)
+            if match:
+                # Extract the parts before and after the video code block
+                before_html_block = match.group(1)
+                code_block = match.group(2)
+
+                # Add a comment to run the cell to display the video 
+                code_block = "# Run this cell to load the video\n" + code_block
+                # Create a new code cell
+                new_code_cell = nbf.v4.new_code_cell(source=code_block)
+
+                # Replace the original markdown cell with the part before the code block
+                cell.source = before_html_block
+
+                # Insert the new code cell after the current one
+                notebook.cells.insert(i+1, new_code_cell)
+
+                # If there is content after the HTML code block, create a new markdown cell
+                if len(match.group(3).strip()) > 0:
+                    after_html_block = match.group(3)
+                    new_markdown_cell = nbf.v4.new_markdown_cell(source=after_html_block)
+                    # Create a new markdown cell and add the content after code block there
+                    notebook.cells.insert(i+2, new_markdown_cell)
+
+            else:
+                # Remove ```{=html} from the code block
+                cell.source = remove_html_tagk(cell.source)
+
     nbf.write(notebook, notebook_path)
 
-def process_content(content):
-    """Remove extra syntax from the content of a Markdown cell."""
+def remove_html_tag(content):
+    """
+    Pandoc adds an extraneous ```{=html} ``` to raw HTML blocks which
+    prevents it from rendering correctly. This function removes
+    ```{=html} that we don't need.
+    """
     content = re.sub(r'```{=html}\n<div', '<div', content)
     content = re.sub(r'">\n```', '">', content)
     content = re.sub(r'<\/div>\n```', '</div>\n', content)
@@ -29,18 +75,20 @@ def process_content(content):
     content = re.sub(r'</p>\n```', '</p>', content)
     return content
 
-def process_directory(notebook_dir):
-    """Process all notebook files in a directory and its subdirectories."""
+def walk_dir(notebook_dir):
+    """
+    Walk the dir and process all notebook files in
+    the gallery directory and its subdirectories.
+    """
     for root, dirs, files in os.walk(notebook_dir):
         for filename in files:
             if filename.endswith('.ipynb'):
-                process_notebook(os.path.join(root, filename))
+                process_video_cell(os.path.join(root, filename))
 
 def main():
-    """Main function to process all directories specified in the conf.py file."""
-    conf_path = 'conf.py'
+    conf_path = '../conf.py'
     for notebook_dir in get_gallery_dirs(conf_path):
-        process_directory(notebook_dir)
+        walk_dir(notebook_dir)
 
 if __name__ == "__main__":
     main()
diff --git a/conf.py b/conf.py
@@ -42,7 +42,9 @@
 import distutils.file_util
 import re
 from get_sphinx_filenames import SPHINX_SHOULD_RUN
-
+import pandocfilters
+import pandoc
+import pypandoc
 import plotly.io as pio
 pio.renderers.default = 'sphinx_gallery'