docling-project · vagenas · May 27, 2025 · May 16, 2025 · May 16, 2025 · May 16, 2025
diff --git a/docling_core/transforms/serializer/common.py b/docling_core/transforms/serializer/common.py
@@ -39,7 +39,11 @@
     KeyValueItem,
     NodeItem,
     OrderedList,
+    PictureClassificationData,
+    PictureDataType,
+    PictureDescriptionData,
     PictureItem,
+    PictureMoleculeData,
     TableItem,
     TextItem,
     UnorderedList,
@@ -118,6 +122,23 @@ def _iterate_items(
         yield item
 
 
+def _get_picture_annotation_text(annotation: PictureDataType) -> Optional[str]:
+    result = None
+    if isinstance(annotation, PictureClassificationData):
+        predicted_class = (
+            annotation.predicted_classes[0].class_name
+            if annotation.predicted_classes
+            else None
+        )
+        if predicted_class is not None:
+            result = predicted_class.replace("_", " ")
+    elif isinstance(annotation, PictureDescriptionData):
+        result = annotation.text
+    elif isinstance(annotation, PictureMoleculeData):
+        result = annotation.smi
+    return result
+
+
 def create_ser_result(
     *,
     text: str = "",

diff --git a/docling_core/transforms/serializer/html.py b/docling_core/transforms/serializer/html.py
@@ -35,6 +35,7 @@
 from docling_core.transforms.serializer.common import (
     CommonParams,
     DocSerializer,
+    _get_picture_annotation_text,
     create_ser_result,
 )
 from docling_core.transforms.serializer.html_styles import (
@@ -110,6 +111,8 @@ class HTMLParams(CommonParams):
     # Enable charts to be printed into HTML as tables
     enable_chart_tables: bool = True
 
+    include_annotations: bool = True
+
 
 class HTMLTextSerializer(BaseModel, BaseTextSerializer):
     """HTML-specific text item serializer."""
@@ -943,18 +946,46 @@ def serialize_captions(
         params = self.params.merge_with_patch(patch=kwargs)
         results: list[SerializationResult] = []
         text_res = ""
+        excluded_refs = self.get_excluded_refs(**kwargs)
+
         if DocItemLabel.CAPTION in params.labels:
-            results = [
-                create_ser_result(text=it.text, span_source=it)
-                for cap in item.captions
-                if isinstance(it := cap.resolve(self.doc), TextItem)
-                and it.self_ref not in self.get_excluded_refs(**kwargs)
-            ]
-            text_res = params.caption_delim.join([r.text for r in results])
-            if text_res:
-                text_dir = get_text_direction(text_res)
-                dir_str = f' dir="{text_dir}"' if text_dir == "rtl" else ""
-                text_res = f"<{tag}{dir_str}>{html.escape(text_res)}</{tag}>"
+            for cap in item.captions:
+                if (
+                    isinstance(it := cap.resolve(self.doc), TextItem)
+                    and it.self_ref not in excluded_refs
+                ):
+                    text_cap = it.text
+                    text_dir = get_text_direction(text_cap)
+                    dir_str = f' dir="{text_dir}"' if text_dir == "rtl" else ""
+                    cap_ser_res = create_ser_result(
+                        text=(
+                            f'<div class="caption"{dir_str}>'
+                            f"{html.escape(text_cap)}"
+                            f"</div>"
+                        ),
+                        span_source=it,
+                    )
+                    results.append(cap_ser_res)
+
+        if params.include_annotations and item.self_ref not in excluded_refs:
+            if isinstance(item, PictureItem):
+                for ann in item.annotations:
+                    if ann_text := _get_picture_annotation_text(annotation=ann):
+                        text_dir = get_text_direction(ann_text)
+                        dir_str = f' dir="{text_dir}"' if text_dir == "rtl" else ""
+                        ann_ser_res = create_ser_result(
+                            text=(
+                                f'<div data-annotation-kind="{ann.kind}"{dir_str}>'
+                                f"{html.escape(ann_text)}"
+                                f"</div>"
+                            ),
+                            span_source=item,
+                        )
+                        results.append(ann_ser_res)
+
+        text_res = params.caption_delim.join([r.text for r in results])
+        if text_res:
+            text_res = f"<{tag}>{text_res}</{tag}>"
         return create_ser_result(text=text_res, span_source=results)
 
     def _generate_head(self) -> str:

diff --git a/docling_core/transforms/serializer/markdown.py b/docling_core/transforms/serializer/markdown.py
@@ -29,6 +29,7 @@
 from docling_core.transforms.serializer.common import (
     CommonParams,
     DocSerializer,
+    _get_picture_annotation_text,
     _PageBreakSerResult,
     create_ser_result,
 )
@@ -69,6 +70,8 @@ class MarkdownParams(CommonParams):
     page_break_placeholder: Optional[str] = None  # e.g. "<!-- page break -->"
     escape_underscores: bool = True
     escape_html: bool = True
+    include_annotations: bool = True
+    mark_annotations: bool = False
 
 
 class MarkdownTextSerializer(BaseModel, BaseTextSerializer):
@@ -210,6 +213,24 @@ def serialize(
             res_parts.append(cap_res)
 
         if item.self_ref not in doc_serializer.get_excluded_refs(**kwargs):
+            if params.include_annotations:
+
+                for ann in item.annotations:
+                    if ann_text := _get_picture_annotation_text(annotation=ann):
+                        ann_ser_res = create_ser_result(
+                            text=(
+                                (
+                                    f'<!--<annotation kind="{ann.kind}">-->'
+                                    f"{ann_text}"
+                                    f"<!--<annotation/>-->"
+                                )
+                                if params.mark_annotations
+                                else ann_text
+                            ),
+                            span_source=item,
+                        )
+                        res_parts.append(ann_ser_res)
+
             img_res = self._serialize_image_part(
                 item=item,
                 doc=doc,

diff --git a/docling_core/types/doc/document.py b/docling_core/types/doc/document.py
@@ -2925,6 +2925,7 @@ def save_as_markdown(
         page_no: Optional[int] = None,
         included_content_layers: Optional[set[ContentLayer]] = None,
         page_break_placeholder: Optional[str] = None,
+        include_annotations: bool = True,
     ):
         """Save to markdown."""
         if isinstance(filename, str):
@@ -2952,6 +2953,7 @@ def save_as_markdown(
             page_no=page_no,
             included_content_layers=included_content_layers,
             page_break_placeholder=page_break_placeholder,
+            include_annotations=include_annotations,
         )
 
         with open(filename, "w", encoding="utf-8") as fw:
@@ -2973,6 +2975,8 @@ def export_to_markdown(  # noqa: C901
         page_no: Optional[int] = None,
         included_content_layers: Optional[set[ContentLayer]] = None,
         page_break_placeholder: Optional[str] = None,  # e.g. "<!-- page break -->",
+        include_annotations: bool = True,
+        mark_annotations: bool = False,
     ) -> str:
         r"""Serialize to Markdown.
 
@@ -2992,9 +2996,9 @@ def export_to_markdown(  # noqa: C901
         :type labels: Optional[set[DocItemLabel]] = None
         :param strict_text: Deprecated.
         :type strict_text: bool = False
-        :param escaping_underscores: bool: Whether to escape underscores in the
+        :param escape_underscores: bool: Whether to escape underscores in the
             text content of the document. (Default value = True).
-        :type escaping_underscores: bool = True
+        :type escape_underscores: bool = True
         :param image_placeholder: The placeholder to include to position
             images in the markdown. (Default value = "\<!-- image --\>").
         :type image_placeholder: str = "<!-- image -->"
@@ -3010,6 +3014,12 @@ def export_to_markdown(  # noqa: C901
         :param page_break_placeholder: The placeholder to include for marking page
             breaks. None means no page break placeholder will be used.
         :type page_break_placeholder: Optional[str] = None
+        :param include_annotations: bool: Whether to include annotations in the export.
+            (Default value = True).
+        :type include_annotations: bool = True
+        :param mark_annotations: bool: Whether to mark annotations in the export; only
+            relevant if include_annotations is True. (Default value = False).
+        :type mark_annotations: bool = False
         :returns: The exported Markdown representation.
         :rtype: str
         """
@@ -3039,6 +3049,8 @@ def export_to_markdown(  # noqa: C901
                 indent=indent,
                 wrap_width=text_width if text_width > 0 else None,
                 page_break_placeholder=page_break_placeholder,
+                include_annotations=include_annotations,
+                mark_annotations=mark_annotations,
             ),
         )
         ser_res = serializer.serialize()
@@ -3088,6 +3100,7 @@ def save_as_html(
         html_head: str = "null",  # should be deprecated
         included_content_layers: Optional[set[ContentLayer]] = None,
         split_page_view: bool = False,
+        include_annotations: bool = True,
     ):
         """Save to HTML."""
         if isinstance(filename, str):
@@ -3113,6 +3126,7 @@ def save_as_html(
             html_head=html_head,
             included_content_layers=included_content_layers,
             split_page_view=split_page_view,
+            include_annotations=include_annotations,
         )
 
         with open(filename, "w", encoding="utf-8") as fw:
@@ -3165,6 +3179,7 @@ def export_to_html(  # noqa: C901
         html_head: str = "null",  # should be deprecated ...
         included_content_layers: Optional[set[ContentLayer]] = None,
         split_page_view: bool = False,
+        include_annotations: bool = True,
     ) -> str:
         r"""Serialize to HTML."""
         from docling_core.transforms.serializer.html import (
@@ -3196,6 +3211,7 @@ def export_to_html(  # noqa: C901
             html_head=html_head,
             html_lang=html_lang,
             output_style=output_style,
+            include_annotations=include_annotations,
         )
 
         if html_head == "null":