From 39d51bd51c941021567e6adae1f80c3cbcc31d47 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 26 Jul 2023 09:11:18 +1000 Subject: [PATCH 1/4] Remove `Parser::desugar_doc_comments`. It's currently stored twice: once in `Parser`, once in the `TokenStream` within `Parser`. We only need the latter. --- compiler/rustc_parse/src/parser/mod.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index 2e1a61e634e62..843e1e4d1576d 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -138,7 +138,6 @@ pub struct Parser<'a> { // Important: This must only be advanced from `bump` to ensure that // `token_cursor.num_next_calls` is updated properly. token_cursor: TokenCursor, - desugar_doc_comments: bool, /// This field is used to keep track of how many left angle brackets we have seen. This is /// required in order to detect extra leading left angle brackets (`<` characters) and error /// appropriately. @@ -463,7 +462,6 @@ impl<'a> Parser<'a> { desugar_doc_comments, break_last_token: false, }, - desugar_doc_comments, unmatched_angle_bracket_count: 0, max_angle_bracket_count: 0, last_unexpected_token_span: None, @@ -1107,7 +1105,7 @@ impl<'a> Parser<'a> { pub fn bump(&mut self) { // Note: destructuring here would give nicer code, but it was found in #96210 to be slower // than `.0`/`.1` access. - let mut next = self.token_cursor.inlined_next(self.desugar_doc_comments); + let mut next = self.token_cursor.inlined_next(self.token_cursor.desugar_doc_comments); self.token_cursor.num_next_calls += 1; // We've retrieved an token from the underlying // cursor, so we no longer need to worry about From 395e95c4900367930af2f18c9a53ce18ed55d5d5 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 26 Jul 2023 09:17:16 +1000 Subject: [PATCH 2/4] Tweak `Parser::look_ahead`. It doesn't really matter what the `desugar_doc_comments` argument is here, because in practice we never look ahead through doc comments. Changing it to `cursor.desugar_doc_comments` will allow some follow-up simplifications. --- compiler/rustc_parse/src/parser/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index 843e1e4d1576d..1bf48d509a16f 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -1155,7 +1155,7 @@ impl<'a> Parser<'a> { let mut i = 0; let mut token = Token::dummy(); while i < dist { - token = cursor.next(/* desugar_doc_comments */ false).0; + token = cursor.next(cursor.desugar_doc_comments).0; if matches!( token.kind, token::OpenDelim(Delimiter::Invisible) | token::CloseDelim(Delimiter::Invisible) From 8bfc69285146bdfa97ea38bb9bcd5c8a92c1de47 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 26 Jul 2023 09:17:32 +1000 Subject: [PATCH 3/4] Remove `desugar_doc_comments` arguments from `TokenCursor::{inlined_,}next`. Because it's now always `self.desugar_doc_comments`. --- .../rustc_parse/src/parser/attr_wrapper.rs | 2 +- compiler/rustc_parse/src/parser/mod.rs | 41 +++++++++++-------- 2 files changed, 24 insertions(+), 19 deletions(-) diff --git a/compiler/rustc_parse/src/parser/attr_wrapper.rs b/compiler/rustc_parse/src/parser/attr_wrapper.rs index b579da098d8a1..4cc03664b47c0 100644 --- a/compiler/rustc_parse/src/parser/attr_wrapper.rs +++ b/compiler/rustc_parse/src/parser/attr_wrapper.rs @@ -107,7 +107,7 @@ impl ToAttrTokenStream for LazyAttrTokenStreamImpl { let tokens = std::iter::once((FlatToken::Token(self.start_token.0.clone()), self.start_token.1)) .chain((0..self.num_calls).map(|_| { - let token = cursor_snapshot.next(cursor_snapshot.desugar_doc_comments); + let token = cursor_snapshot.next(); (FlatToken::Token(token.0), token.1) })) .take(self.num_calls); diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index 1bf48d509a16f..1715d9f6cc439 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -254,33 +254,38 @@ struct TokenCursor { } impl TokenCursor { - fn next(&mut self, desugar_doc_comments: bool) -> (Token, Spacing) { - self.inlined_next(desugar_doc_comments) + fn next(&mut self) -> (Token, Spacing) { + self.inlined_next() } /// This always-inlined version should only be used on hot code paths. #[inline(always)] - fn inlined_next(&mut self, desugar_doc_comments: bool) -> (Token, Spacing) { + fn inlined_next(&mut self) -> (Token, Spacing) { loop { // FIXME: we currently don't return `Delimiter` open/close delims. To fix #67062 we will // need to, whereupon the `delim != Delimiter::Invisible` conditions below can be // removed. if let Some(tree) = self.tree_cursor.next_ref() { match tree { - &TokenTree::Token(ref token, spacing) => match (desugar_doc_comments, token) { - (true, &Token { kind: token::DocComment(_, attr_style, data), span }) => { - let desugared = self.desugar(attr_style, data, span); - self.tree_cursor.replace_prev_and_rewind(desugared); - // Continue to get the first token of the desugared doc comment. - } - _ => { - debug_assert!(!matches!( - token.kind, - token::OpenDelim(_) | token::CloseDelim(_) - )); - return (token.clone(), spacing); + &TokenTree::Token(ref token, spacing) => { + match (self.desugar_doc_comments, token) { + ( + true, + &Token { kind: token::DocComment(_, attr_style, data), span }, + ) => { + let desugared = self.desugar(attr_style, data, span); + self.tree_cursor.replace_prev_and_rewind(desugared); + // Continue to get the first token of the desugared doc comment. + } + _ => { + debug_assert!(!matches!( + token.kind, + token::OpenDelim(_) | token::CloseDelim(_) + )); + return (token.clone(), spacing); + } } - }, + } &TokenTree::Delimited(sp, delim, ref tts) => { let trees = tts.clone().into_trees(); self.stack.push((mem::replace(&mut self.tree_cursor, trees), delim, sp)); @@ -1105,7 +1110,7 @@ impl<'a> Parser<'a> { pub fn bump(&mut self) { // Note: destructuring here would give nicer code, but it was found in #96210 to be slower // than `.0`/`.1` access. - let mut next = self.token_cursor.inlined_next(self.token_cursor.desugar_doc_comments); + let mut next = self.token_cursor.inlined_next(); self.token_cursor.num_next_calls += 1; // We've retrieved an token from the underlying // cursor, so we no longer need to worry about @@ -1155,7 +1160,7 @@ impl<'a> Parser<'a> { let mut i = 0; let mut token = Token::dummy(); while i < dist { - token = cursor.next(cursor.desugar_doc_comments).0; + token = cursor.next().0; if matches!( token.kind, token::OpenDelim(Delimiter::Invisible) | token::CloseDelim(Delimiter::Invisible) From 34b218e4548f9a989a4131979a61d15fd73289e9 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 26 Jul 2023 09:22:59 +1000 Subject: [PATCH 4/4] Add a comment to `TokenCursor::desugar_doc_comments`. Useful information that took me some time to discern. --- compiler/rustc_parse/src/parser/mod.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index 1715d9f6cc439..37b4c371c94b4 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -224,6 +224,9 @@ struct TokenCursor { // because it's the outermost token stream which never has delimiters. stack: Vec<(TokenTreeCursor, Delimiter, DelimSpan)>, + // We need to desugar doc comments from `/// foo` form into `#[doc = + // r"foo"]` form when parsing declarative macro inputs in `parse_tt`, + // because some declarative macros look for `doc` attributes. desugar_doc_comments: bool, // Counts the number of calls to `{,inlined_}next`.