diff --git a/Cargo.lock b/Cargo.lock index 4e00c9034bd9b..1cd9180d2cc0d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4704,6 +4704,7 @@ dependencies = [ "itertools", "minifier", "once_cell", + "pulldown-cmark 0.10.0", "regex", "rustdoc-json-types", "serde", diff --git a/src/librustdoc/Cargo.toml b/src/librustdoc/Cargo.toml index e13e95ef70888..81e14e1cd7143 100644 --- a/src/librustdoc/Cargo.toml +++ b/src/librustdoc/Cargo.toml @@ -13,6 +13,7 @@ itertools = "0.11" indexmap = "2" minifier = "0.3.0" once_cell = "1.10.0" +pulldown-cmark-new = { version = "0.10", package = "pulldown-cmark", default-features = false } regex = "1" rustdoc-json-types = { path = "../rustdoc-json-types" } serde_json = "1.0" diff --git a/src/librustdoc/lint.rs b/src/librustdoc/lint.rs index f78743a7917ce..546c1ef7229a5 100644 --- a/src/librustdoc/lint.rs +++ b/src/librustdoc/lint.rs @@ -196,6 +196,14 @@ declare_rustdoc_lint! { "detects redundant explicit links in doc comments" } +declare_rustdoc_lint! { + /// This compatibility lint checks for Markdown syntax that works in the old engine but not + /// the new one. + UNPORTABLE_MARKDOWN, + Deny, + "detects markdown that is interpreted differently in different parser" +} + pub(crate) static RUSTDOC_LINTS: Lazy> = Lazy::new(|| { vec![ BROKEN_INTRA_DOC_LINKS, @@ -209,6 +217,7 @@ pub(crate) static RUSTDOC_LINTS: Lazy> = Lazy::new(|| { MISSING_CRATE_LEVEL_DOCS, UNESCAPED_BACKTICKS, REDUNDANT_EXPLICIT_LINKS, + UNPORTABLE_MARKDOWN, ] }); diff --git a/src/librustdoc/passes/lint.rs b/src/librustdoc/passes/lint.rs index c6d5b7bd346d4..bc804a340bf2c 100644 --- a/src/librustdoc/passes/lint.rs +++ b/src/librustdoc/passes/lint.rs @@ -6,6 +6,7 @@ mod check_code_block_syntax; mod html_tags; mod redundant_explicit_links; mod unescaped_backticks; +mod unportable_markdown; use super::Pass; use crate::clean::*; @@ -31,6 +32,7 @@ impl<'a, 'tcx> DocVisitor for Linter<'a, 'tcx> { html_tags::visit_item(self.cx, item); unescaped_backticks::visit_item(self.cx, item); redundant_explicit_links::visit_item(self.cx, item); + unportable_markdown::visit_item(self.cx, item); self.visit_item_recur(item) } diff --git a/src/librustdoc/passes/lint/unportable_markdown.rs b/src/librustdoc/passes/lint/unportable_markdown.rs new file mode 100644 index 0000000000000..089beb0621cda --- /dev/null +++ b/src/librustdoc/passes/lint/unportable_markdown.rs @@ -0,0 +1,316 @@ +//! Detects markdown syntax that's different between pulldown-cmark +//! 0.9 and 0.10. + +use crate::clean::Item; +use crate::core::DocContext; +use crate::html::markdown::main_body_opts; +use pulldown_cmark as cmarko; +use pulldown_cmark_new as cmarkn; +use rustc_resolve::rustdoc::source_span_for_markdown_range; + +pub(crate) fn visit_item(cx: &DocContext<'_>, item: &Item) { + let tcx = cx.tcx; + let Some(hir_id) = DocContext::as_local_hir_id(tcx, item.item_id) else { + // If non-local, no need to check anything. + return; + }; + + let dox = item.doc_value(); + if dox.is_empty() { + return; + } + + let link_names = item.link_names(&cx.cache); + let mut replacer_old = |broken_link: cmarko::BrokenLink<'_>| { + link_names + .iter() + .find(|link| *link.original_text == *broken_link.reference) + .map(|link| ((*link.href).into(), (*link.new_text).into())) + }; + let parser_old = cmarko::Parser::new_with_broken_link_callback( + &dox, + main_body_opts(), + Some(&mut replacer_old), + ) + .into_offset_iter() + // Not worth cleaning up minor "distinctions without difference" in the AST. + // Text events get chopped up differently between versions. + // and `code` mistakes are usually covered by unescaped_backticks and html_tags lints. + .filter(|(event, _event_range)| { + !matches!( + event, + cmarko::Event::Code(_) + | cmarko::Event::Text(_) + | cmarko::Event::Html(_) + | cmarko::Event::SoftBreak + ) + }); + + pub fn main_body_opts_new() -> cmarkn::Options { + cmarkn::Options::ENABLE_TABLES + | cmarkn::Options::ENABLE_FOOTNOTES + | cmarkn::Options::ENABLE_STRIKETHROUGH + | cmarkn::Options::ENABLE_TASKLISTS + | cmarkn::Options::ENABLE_SMART_PUNCTUATION + } + let mut replacer_new = |broken_link: cmarkn::BrokenLink<'_>| { + link_names + .iter() + .find(|link| *link.original_text.trim() == *broken_link.reference.trim()) + .map(|link| ((*link.href).into(), (*link.new_text).into())) + }; + let parser_new = cmarkn::Parser::new_with_broken_link_callback( + &dox, + main_body_opts_new(), + Some(&mut replacer_new), + ) + .into_offset_iter() + .filter(|(event, _event_range)| { + !matches!( + event, + cmarkn::Event::Code(_) + | cmarkn::Event::Text(_) + | cmarkn::Event::Html(_) + | cmarkn::Event::InlineHtml(_) + | cmarkn::Event::Start(cmarkn::Tag::HtmlBlock) + | cmarkn::Event::End(cmarkn::TagEnd::HtmlBlock) + | cmarkn::Event::SoftBreak + ) + }); + + let mut reported_an_error = false; + for ((event_old, event_range_old), (event_new, event_range_new)) in parser_old.zip(parser_new) { + match (event_old, event_new) { + ( + cmarko::Event::Start(cmarko::Tag::Emphasis), + cmarkn::Event::Start(cmarkn::Tag::Emphasis), + ) + | ( + cmarko::Event::Start(cmarko::Tag::Strong), + cmarkn::Event::Start(cmarkn::Tag::Strong), + ) + | ( + cmarko::Event::Start(cmarko::Tag::Strikethrough), + cmarkn::Event::Start(cmarkn::Tag::Strikethrough), + ) + | ( + cmarko::Event::End(cmarko::Tag::Emphasis), + cmarkn::Event::End(cmarkn::TagEnd::Emphasis), + ) + | ( + cmarko::Event::End(cmarko::Tag::Strong), + cmarkn::Event::End(cmarkn::TagEnd::Strong), + ) + | ( + cmarko::Event::End(cmarko::Tag::Strikethrough), + cmarkn::Event::End(cmarkn::TagEnd::Strikethrough), + ) + | ( + cmarko::Event::End(cmarko::Tag::Link(..)), + cmarkn::Event::End(cmarkn::TagEnd::Link), + ) + | ( + cmarko::Event::End(cmarko::Tag::Image(..)), + cmarkn::Event::End(cmarkn::TagEnd::Image), + ) + | (cmarko::Event::FootnoteReference(..), cmarkn::Event::FootnoteReference(..)) + | (cmarko::Event::TaskListMarker(false), cmarkn::Event::TaskListMarker(false)) + | (cmarko::Event::TaskListMarker(true), cmarkn::Event::TaskListMarker(true)) + if event_range_old == event_range_new => + { + // Matching tags. Do nothing. + } + ( + cmarko::Event::Start(cmarko::Tag::Link(_, old_dest_url, old_title)), + cmarkn::Event::Start(cmarkn::Tag::Link { dest_url, title, .. }), + ) + | ( + cmarko::Event::Start(cmarko::Tag::Image(_, old_dest_url, old_title)), + cmarkn::Event::Start(cmarkn::Tag::Image { dest_url, title, .. }), + ) if event_range_old == event_range_new + && &old_dest_url[..] == &dest_url[..] + && &old_title[..] == &title[..] => + { + // Matching tags. Do nothing. + } + (cmarko::Event::SoftBreak, cmarkn::Event::SoftBreak) + | (cmarko::Event::HardBreak, cmarkn::Event::HardBreak) + | (cmarko::Event::Rule, cmarkn::Event::Rule) + | ( + cmarko::Event::Start(cmarko::Tag::Paragraph), + cmarkn::Event::Start(cmarkn::Tag::Paragraph), + ) + | ( + cmarko::Event::Start(cmarko::Tag::Heading(..)), + cmarkn::Event::Start(cmarkn::Tag::Heading { .. }), + ) + | ( + cmarko::Event::Start(cmarko::Tag::BlockQuote), + cmarkn::Event::Start(cmarkn::Tag::BlockQuote), + ) + | ( + cmarko::Event::Start(cmarko::Tag::CodeBlock(..)), + cmarkn::Event::Start(cmarkn::Tag::CodeBlock(..)), + ) + | ( + cmarko::Event::Start(cmarko::Tag::List(..)), + cmarkn::Event::Start(cmarkn::Tag::List(..)), + ) + | (cmarko::Event::Start(cmarko::Tag::Item), cmarkn::Event::Start(cmarkn::Tag::Item)) + | ( + cmarko::Event::Start(cmarko::Tag::FootnoteDefinition(..)), + cmarkn::Event::Start(cmarkn::Tag::FootnoteDefinition(..)), + ) + | ( + cmarko::Event::Start(cmarko::Tag::Table(..)), + cmarkn::Event::Start(cmarkn::Tag::Table(..)), + ) + | ( + cmarko::Event::Start(cmarko::Tag::TableHead), + cmarkn::Event::Start(cmarkn::Tag::TableHead), + ) + | ( + cmarko::Event::Start(cmarko::Tag::TableRow), + cmarkn::Event::Start(cmarkn::Tag::TableRow), + ) + | ( + cmarko::Event::Start(cmarko::Tag::TableCell), + cmarkn::Event::Start(cmarkn::Tag::TableCell), + ) + | ( + cmarko::Event::End(cmarko::Tag::Paragraph), + cmarkn::Event::End(cmarkn::TagEnd::Paragraph), + ) + | ( + cmarko::Event::End(cmarko::Tag::Heading(..)), + cmarkn::Event::End(cmarkn::TagEnd::Heading(_)), + ) + | ( + cmarko::Event::End(cmarko::Tag::BlockQuote), + cmarkn::Event::End(cmarkn::TagEnd::BlockQuote), + ) + | ( + cmarko::Event::End(cmarko::Tag::CodeBlock(..)), + cmarkn::Event::End(cmarkn::TagEnd::CodeBlock), + ) + | ( + cmarko::Event::End(cmarko::Tag::List(..)), + cmarkn::Event::End(cmarkn::TagEnd::List(_)), + ) + | (cmarko::Event::End(cmarko::Tag::Item), cmarkn::Event::End(cmarkn::TagEnd::Item)) + | ( + cmarko::Event::End(cmarko::Tag::FootnoteDefinition(..)), + cmarkn::Event::End(cmarkn::TagEnd::FootnoteDefinition), + ) + | ( + cmarko::Event::End(cmarko::Tag::Table(..)), + cmarkn::Event::End(cmarkn::TagEnd::Table), + ) + | ( + cmarko::Event::End(cmarko::Tag::TableHead), + cmarkn::Event::End(cmarkn::TagEnd::TableHead), + ) + | ( + cmarko::Event::End(cmarko::Tag::TableRow), + cmarkn::Event::End(cmarkn::TagEnd::TableRow), + ) + | ( + cmarko::Event::End(cmarko::Tag::TableCell), + cmarkn::Event::End(cmarkn::TagEnd::TableCell), + ) => { + // Matching tags. Do nothing. + // + // Parsers sometimes differ in what they consider the "range of an event," + // even though the event is really the same. Inlines are pretty consistent, + // but stuff like list items? Not really. + // + // Mismatched block elements will usually nest differently, so ignoring it + // works good enough. + } + // If we've already reported an error on the start tag, don't bother on the end tag. + (cmarko::Event::End(_), _) | (_, cmarkn::Event::End(_)) if reported_an_error => {} + // Non-matching inline. + (cmarko::Event::Start(cmarko::Tag::Link(..)), cmarkn::Event::FootnoteReference(..)) + | ( + cmarko::Event::Start(cmarko::Tag::Image(..)), + cmarkn::Event::FootnoteReference(..), + ) + | ( + cmarko::Event::FootnoteReference(..), + cmarkn::Event::Start(cmarkn::Tag::Link { .. }), + ) + | ( + cmarko::Event::FootnoteReference(..), + cmarkn::Event::Start(cmarkn::Tag::Image { .. }), + ) if event_range_old == event_range_new => { + reported_an_error = true; + // If we can't get a span of the backtick, because it is in a `#[doc = ""]` attribute, + // use the span of the entire attribute as a fallback. + let span = source_span_for_markdown_range( + tcx, + &dox, + &event_range_old, + &item.attrs.doc_strings, + ) + .unwrap_or_else(|| item.attr_span(tcx)); + tcx.node_span_lint( + crate::lint::UNPORTABLE_MARKDOWN, + hir_id, + span, + "unportable markdown", + |lint| { + lint.help(format!("syntax ambiguous between footnote and link")); + }, + ); + } + // Non-matching results. + (event_old, event_new) => { + reported_an_error = true; + let (range, range_other, desc, desc_other, tag, tag_other) = if event_range_old.end + - event_range_old.start + < event_range_new.end - event_range_new.start + { + ( + event_range_old, + event_range_new, + "old", + "new", + format!("{event_old:?}"), + format!("{event_new:?}"), + ) + } else { + ( + event_range_new, + event_range_old, + "new", + "old", + format!("{event_new:?}"), + format!("{event_old:?}"), + ) + }; + let (range, tag_other) = + if range_other.start <= range.start && range_other.end <= range.end { + (range_other.start..range.end, tag_other) + } else { + (range, format!("nothing")) + }; + // If we can't get a span of the backtick, because it is in a `#[doc = ""]` attribute, + // use the span of the entire attribute as a fallback. + let span = + source_span_for_markdown_range(tcx, &dox, &range, &item.attrs.doc_strings) + .unwrap_or_else(|| item.attr_span(tcx)); + tcx.node_span_lint( + crate::lint::UNPORTABLE_MARKDOWN, + hir_id, + span, + "unportable markdown", + |lint| { + lint.help(format!( + "{desc} parser sees {tag}, {desc_other} sees {tag_other}" + )); + }, + ); + } + } + } +} diff --git a/tests/rustdoc-ui/unportable-markdown.rs b/tests/rustdoc-ui/unportable-markdown.rs new file mode 100644 index 0000000000000..ce8cd1d2f8eb2 --- /dev/null +++ b/tests/rustdoc-ui/unportable-markdown.rs @@ -0,0 +1,43 @@ +// https://internals.rust-lang.org/t/proposal-migrate-the-syntax-of-rustdoc-markdown-footnotes-to-be-compatible-with-the-syntax-used-in-github/18929 +// +// A series of test cases for CommonMark corner cases that pulldown-cmark 0.10 fixes. + +/// +/// +/// Test footnote [^foot]. +/// +/// [^foot]: This is nested within the footnote now, but didn't used to be. +//~^ ERROR unportable markdown +/// +/// This is a multi-paragraph footnote. +pub struct GfmFootnotes; + +/// +/// +/// test [^] +//~^ ERROR unportable markdown +/// +/// [^]: test2 +pub struct FootnoteEmptyName; + +/// +/// +/// - _t +/// # test +//~^ ERROR unportable markdown +/// t_ +pub struct NestingCornerCase; + +/// +/// +/// *~~__emphasis strike strong__~~* ~~*__strike emphasis strong__*~~ +//~^ ERROR unportable markdown +//~| ERROR unportable markdown +pub struct Emphasis1; + +/// +/// +/// | +//~^ ERROR unportable markdown +/// | +pub struct NotEnoughTable; diff --git a/tests/rustdoc-ui/unportable-markdown.stderr b/tests/rustdoc-ui/unportable-markdown.stderr new file mode 100644 index 0000000000000..a12a51c354c9e --- /dev/null +++ b/tests/rustdoc-ui/unportable-markdown.stderr @@ -0,0 +1,61 @@ +error: unportable markdown + --> $DIR/unportable-markdown.rs:9:5 + | +LL | /// [^foot]: This is nested within the footnote now, but didn't used to be. + | _____^ +LL | | +LL | | /// +LL | | /// This is a multi-paragraph footnote. + | |___________________________________________^ + | + = help: new parser sees Start(Paragraph), old sees End(FootnoteDefinition(Borrowed("foot"))) + = note: `#[deny(rustdoc::unportable_markdown)]` on by default + +error: unportable markdown + --> $DIR/unportable-markdown.rs:17:10 + | +LL | /// test [^] + | ^^^ + | + = help: new parser sees Start(Link { link_type: Shortcut, dest_url: Borrowed("test2"), title: Borrowed(""), id: Borrowed("^") }), old sees nothing + +error: unportable markdown + --> $DIR/unportable-markdown.rs:26:7 + | +LL | /// # test + | _______^ +LL | | +LL | | /// t_ + | |____^ + | + = help: new parser sees Start(Heading { level: H1, id: None, classes: [], attrs: [] }), old sees nothing + +error: unportable markdown + --> $DIR/unportable-markdown.rs:33:40 + | +LL | /// *~~__emphasis strike strong__~~* ~~*__strike emphasis strong__*~~ + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + | + = help: old parser sees Start(Emphasis), new sees nothing + +error: unportable markdown + --> $DIR/unportable-markdown.rs:33:41 + | +LL | /// *~~__emphasis strike strong__~~* ~~*__strike emphasis strong__*~~ + | ^^^^^^^^^^^^^^^^^^^^^^^^^^ + | + = help: old parser sees Start(Strong), new sees nothing + +error: unportable markdown + --> $DIR/unportable-markdown.rs:40:5 + | +LL | /// | + | _____^ +LL | | +LL | | /// | + | |_____^ + | + = help: new parser sees Start(Paragraph), old sees Start(Table([])) + +error: aborting due to 6 previous errors +