Skip to content

Commit 65f4f39

Browse files
committed
Get rid of locate() in markdown handling
This function was unfortunate for several reasons: - It used `unsafe` because it wanted to tell whether a string came from the same *allocation* as another, not just whether it was a textual match. - It recalculated spans even though they were already available from pulldown - It sometimes *failed* to calculate the span, which meant it was always possible for the span to be `None`, even though in practice that should never happen. This commit has several cleanups: - Make the span required - Pass through the span from pulldown in the `HeadingLinks` and `Footnotes` iterators - Only add iterator bounds on the `impl Iterator`, not on `new` and the struct itself.
1 parent 50a9097 commit 65f4f39

File tree

2 files changed

+90
-102
lines changed

2 files changed

+90
-102
lines changed

src/librustdoc/html/markdown.rs

Lines changed: 62 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -447,61 +447,60 @@ impl<'a, I: Iterator<Item = Event<'a>>> Iterator for LinkReplacer<'a, I> {
447447
}
448448

449449
/// Make headings links with anchor IDs and build up TOC.
450-
struct HeadingLinks<'a, 'b, 'ids, I: Iterator<Item = Event<'a>>> {
450+
struct HeadingLinks<'a, 'b, 'ids, I> {
451451
inner: I,
452452
toc: Option<&'b mut TocBuilder>,
453-
buf: VecDeque<Event<'a>>,
453+
buf: VecDeque<(Event<'a>, Range<usize>)>,
454454
id_map: &'ids mut IdMap,
455455
}
456456

457-
impl<'a, 'b, 'ids, I: Iterator<Item = Event<'a>>> HeadingLinks<'a, 'b, 'ids, I> {
457+
impl<'a, 'b, 'ids, I> HeadingLinks<'a, 'b, 'ids, I> {
458458
fn new(iter: I, toc: Option<&'b mut TocBuilder>, ids: &'ids mut IdMap) -> Self {
459459
HeadingLinks { inner: iter, toc, buf: VecDeque::new(), id_map: ids }
460460
}
461461
}
462462

463-
impl<'a, 'b, 'ids, I: Iterator<Item = Event<'a>>> Iterator for HeadingLinks<'a, 'b, 'ids, I> {
464-
type Item = Event<'a>;
463+
impl<'a, 'b, 'ids, I: Iterator<Item = (Event<'a>, Range<usize>)>> Iterator
464+
for HeadingLinks<'a, 'b, 'ids, I>
465+
{
466+
type Item = (Event<'a>, Range<usize>);
465467

466468
fn next(&mut self) -> Option<Self::Item> {
467469
if let Some(e) = self.buf.pop_front() {
468470
return Some(e);
469471
}
470472

471473
let event = self.inner.next();
472-
if let Some(Event::Start(Tag::Heading(level))) = event {
474+
if let Some((Event::Start(Tag::Heading(level)), _)) = event {
473475
let mut id = String::new();
474476
for event in &mut self.inner {
475-
match &event {
477+
match event.0 {
476478
Event::End(Tag::Heading(..)) => break,
477479
Event::Text(text) | Event::Code(text) => {
478480
id.extend(text.chars().filter_map(slugify));
479481
}
480-
_ => {}
481-
}
482-
match event {
483482
Event::Start(Tag::Link(_, _, _)) | Event::End(Tag::Link(..)) => {}
484-
event => self.buf.push_back(event),
483+
_ => self.buf.push_back(event),
485484
}
486485
}
487486
let id = self.id_map.derive(id);
488487

489488
if let Some(ref mut builder) = self.toc {
490489
let mut html_header = String::new();
491-
html::push_html(&mut html_header, self.buf.iter().cloned());
490+
html::push_html(&mut html_header, self.buf.iter().map(|(ev, _)| ev.clone()));
492491
let sec = builder.push(level as u32, html_header, id.clone());
493-
self.buf.push_front(Event::Html(format!("{} ", sec).into()));
492+
self.buf.push_front((Event::Html(format!("{} ", sec).into()), 0..0));
494493
}
495494

496-
self.buf.push_back(Event::Html(format!("</a></h{}>", level).into()));
495+
self.buf.push_back((Event::Html(format!("</a></h{}>", level).into()), 0..0));
497496

498497
let start_tags = format!(
499498
"<h{level} id=\"{id}\" class=\"section-header\">\
500499
<a href=\"#{id}\">",
501500
id = id,
502501
level = level
503502
);
504-
return Some(Event::Html(start_tags.into()));
503+
return Some((Event::Html(start_tags.into()), 0..0));
505504
}
506505
event
507506
}
@@ -575,39 +574,40 @@ impl<'a, I: Iterator<Item = Event<'a>>> Iterator for SummaryLine<'a, I> {
575574

576575
/// Moves all footnote definitions to the end and add back links to the
577576
/// references.
578-
struct Footnotes<'a, I: Iterator<Item = Event<'a>>> {
577+
struct Footnotes<'a, I> {
579578
inner: I,
580579
footnotes: FxHashMap<String, (Vec<Event<'a>>, u16)>,
581580
}
582581

583-
impl<'a, I: Iterator<Item = Event<'a>>> Footnotes<'a, I> {
582+
impl<'a, I> Footnotes<'a, I> {
584583
fn new(iter: I) -> Self {
585584
Footnotes { inner: iter, footnotes: FxHashMap::default() }
586585
}
586+
587587
fn get_entry(&mut self, key: &str) -> &mut (Vec<Event<'a>>, u16) {
588588
let new_id = self.footnotes.keys().count() + 1;
589589
let key = key.to_owned();
590590
self.footnotes.entry(key).or_insert((Vec::new(), new_id as u16))
591591
}
592592
}
593593

594-
impl<'a, I: Iterator<Item = Event<'a>>> Iterator for Footnotes<'a, I> {
595-
type Item = Event<'a>;
594+
impl<'a, I: Iterator<Item = (Event<'a>, Range<usize>)>> Iterator for Footnotes<'a, I> {
595+
type Item = (Event<'a>, Range<usize>);
596596

597597
fn next(&mut self) -> Option<Self::Item> {
598598
loop {
599599
match self.inner.next() {
600-
Some(Event::FootnoteReference(ref reference)) => {
600+
Some((Event::FootnoteReference(ref reference), range)) => {
601601
let entry = self.get_entry(&reference);
602602
let reference = format!(
603603
"<sup id=\"fnref{0}\"><a href=\"#fn{0}\">{0}</a></sup>",
604604
(*entry).1
605605
);
606-
return Some(Event::Html(reference.into()));
606+
return Some((Event::Html(reference.into()), range));
607607
}
608-
Some(Event::Start(Tag::FootnoteDefinition(def))) => {
608+
Some((Event::Start(Tag::FootnoteDefinition(def)), _)) => {
609609
let mut content = Vec::new();
610-
for event in &mut self.inner {
610+
for (event, _) in &mut self.inner {
611611
if let Event::End(Tag::FootnoteDefinition(..)) = event {
612612
break;
613613
}
@@ -638,7 +638,7 @@ impl<'a, I: Iterator<Item = Event<'a>>> Iterator for Footnotes<'a, I> {
638638
ret.push_str("</li>");
639639
}
640640
ret.push_str("</ol></div>");
641-
return Some(Event::Html(ret.into()));
641+
return Some((Event::Html(ret.into()), 0..0));
642642
} else {
643643
return None;
644644
}
@@ -946,13 +946,14 @@ impl Markdown<'_> {
946946
};
947947

948948
let p = Parser::new_with_broken_link_callback(md, opts(), Some(&mut replacer));
949+
let p = p.into_offset_iter();
949950

950951
let mut s = String::with_capacity(md.len() * 3 / 2);
951952

952953
let p = HeadingLinks::new(p, None, &mut ids);
953-
let p = LinkReplacer::new(p, links);
954-
let p = CodeBlocks::new(p, codes, edition, playground);
955954
let p = Footnotes::new(p);
955+
let p = LinkReplacer::new(p.map(|(ev, _)| ev), links);
956+
let p = CodeBlocks::new(p, codes, edition, playground);
956957
html::push_html(&mut s, p);
957958

958959
s
@@ -963,16 +964,16 @@ impl MarkdownWithToc<'_> {
963964
crate fn into_string(self) -> String {
964965
let MarkdownWithToc(md, mut ids, codes, edition, playground) = self;
965966

966-
let p = Parser::new_ext(md, opts());
967+
let p = Parser::new_ext(md, opts()).into_offset_iter();
967968

968969
let mut s = String::with_capacity(md.len() * 3 / 2);
969970

970971
let mut toc = TocBuilder::new();
971972

972973
{
973974
let p = HeadingLinks::new(p, Some(&mut toc), &mut ids);
974-
let p = CodeBlocks::new(p, codes, edition, playground);
975975
let p = Footnotes::new(p);
976+
let p = CodeBlocks::new(p.map(|(ev, _)| ev), codes, edition, playground);
976977
html::push_html(&mut s, p);
977978
}
978979

@@ -988,19 +989,19 @@ impl MarkdownHtml<'_> {
988989
if md.is_empty() {
989990
return String::new();
990991
}
991-
let p = Parser::new_ext(md, opts());
992+
let p = Parser::new_ext(md, opts()).into_offset_iter();
992993

993994
// Treat inline HTML as plain text.
994-
let p = p.map(|event| match event {
995-
Event::Html(text) => Event::Text(text),
995+
let p = p.map(|event| match event.0 {
996+
Event::Html(text) => (Event::Text(text), event.1),
996997
_ => event,
997998
});
998999

9991000
let mut s = String::with_capacity(md.len() * 3 / 2);
10001001

10011002
let p = HeadingLinks::new(p, None, &mut ids);
1002-
let p = CodeBlocks::new(p, codes, edition, playground);
10031003
let p = Footnotes::new(p);
1004+
let p = CodeBlocks::new(p.map(|(ev, _)| ev), codes, edition, playground);
10041005
html::push_html(&mut s, p);
10051006

10061007
s
@@ -1153,50 +1154,43 @@ crate fn plain_text_summary(md: &str) -> String {
11531154
s
11541155
}
11551156

1156-
crate fn markdown_links(md: &str) -> Vec<(String, Option<Range<usize>>)> {
1157+
crate fn markdown_links(md: &str) -> Vec<(String, Range<usize>)> {
11571158
if md.is_empty() {
11581159
return vec![];
11591160
}
11601161

11611162
let mut links = vec![];
11621163
let mut shortcut_links = vec![];
11631164

1164-
{
1165-
let locate = |s: &str| unsafe {
1166-
let s_start = s.as_ptr();
1167-
let s_end = s_start.add(s.len());
1168-
let md_start = md.as_ptr();
1169-
let md_end = md_start.add(md.len());
1170-
if md_start <= s_start && s_end <= md_end {
1171-
let start = s_start.offset_from(md_start) as usize;
1172-
let end = s_end.offset_from(md_start) as usize;
1173-
Some(start..end)
1174-
} else {
1175-
None
1176-
}
1177-
};
1178-
1179-
let mut push = |link: BrokenLink<'_>| {
1180-
// FIXME: use `link.span` instead of `locate`
1181-
// (doing it now includes the `[]` as well as the text)
1182-
shortcut_links.push((link.reference.to_owned(), locate(link.reference)));
1183-
None
1184-
};
1185-
let p = Parser::new_with_broken_link_callback(md, opts(), Some(&mut push));
1186-
1187-
// There's no need to thread an IdMap through to here because
1188-
// the IDs generated aren't going to be emitted anywhere.
1189-
let mut ids = IdMap::new();
1190-
let iter = Footnotes::new(HeadingLinks::new(p, None, &mut ids));
1191-
1192-
for ev in iter {
1193-
if let Event::Start(Tag::Link(_, dest, _)) = ev {
1194-
debug!("found link: {}", dest);
1195-
links.push(match dest {
1196-
CowStr::Borrowed(s) => (s.to_owned(), locate(s)),
1197-
s @ (CowStr::Boxed(..) | CowStr::Inlined(..)) => (s.into_string(), None),
1198-
});
1165+
let span_for_link = |link: &str, span: Range<usize>| {
1166+
// Pulldown includes the `[]` as well as the URL. Only highlight the relevant span.
1167+
// NOTE: uses `rfind` in case the title and url are the same: `[Ok][Ok]`
1168+
match md[span.clone()].rfind(link) {
1169+
Some(start) => {
1170+
let start = span.start + start;
1171+
start..start + link.len()
11991172
}
1173+
// This can happen for things other than intra-doc links, like `#1` expanded to `https://github.com/rust-lang/rust/issues/1`.
1174+
None => span,
1175+
}
1176+
};
1177+
let mut push = |link: BrokenLink<'_>| {
1178+
let span = span_for_link(link.reference, link.span);
1179+
shortcut_links.push((link.reference.to_owned(), span));
1180+
None
1181+
};
1182+
let p = Parser::new_with_broken_link_callback(md, opts(), Some(&mut push));
1183+
1184+
// There's no need to thread an IdMap through to here because
1185+
// the IDs generated aren't going to be emitted anywhere.
1186+
let mut ids = IdMap::new();
1187+
let iter = Footnotes::new(HeadingLinks::new(p.into_offset_iter(), None, &mut ids));
1188+
1189+
for ev in iter {
1190+
if let Event::Start(Tag::Link(_, dest, _)) = ev.0 {
1191+
debug!("found link: {}", dest);
1192+
let span = span_for_link(&dest, ev.1);
1193+
links.push((dest.into_string(), span));
12001194
}
12011195
}
12021196

0 commit comments

Comments
 (0)