Skip to content

More macro expansion optimizations #95259

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Mar 25, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 51 additions & 36 deletions compiler/rustc_expand/src/mbe/macro_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,14 @@ struct MatcherTtFrame<'tt> {
idx: usize,
}

type NamedMatchVec = SmallVec<[NamedMatch; 4]>;
// One element is enough to cover 95-99% of vectors for most benchmarks. Also,
// vectors longer than one frequently have many elements, not just two or
// three.
type NamedMatchVec = SmallVec<[NamedMatch; 1]>;

// This type is used a lot. Make sure it doesn't unintentionally get bigger.
#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
rustc_data_structures::static_assert_size!(NamedMatchVec, 48);

/// Represents a single "position" (aka "matcher position", aka "item"), as
/// described in the module documentation.
Expand Down Expand Up @@ -153,7 +160,7 @@ struct MatcherPos<'tt> {

// This type is used a lot. Make sure it doesn't unintentionally get bigger.
#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
rustc_data_structures::static_assert_size!(MatcherPos<'_>, 136);
rustc_data_structures::static_assert_size!(MatcherPos<'_>, 112);

impl<'tt> MatcherPos<'tt> {
/// `len` `Vec`s (initially shared and empty) that will store matches of metavars.
Expand Down Expand Up @@ -202,11 +209,7 @@ impl<'tt> MatcherPos<'tt> {
match_lo: up.match_cur,
match_cur: up.match_cur,
match_hi: up.match_cur + seq.num_captures,
repetition: Some(MatcherPosRepetition {
up,
sep: seq.separator.clone(),
seq_op: seq.kleene.op,
}),
repetition: Some(MatcherPosRepetition { up, seq }),
stack: smallvec![],
}
}
Expand All @@ -220,15 +223,12 @@ impl<'tt> MatcherPos<'tt> {

#[derive(Clone)]
struct MatcherPosRepetition<'tt> {
/// The KleeneOp of this sequence.
seq_op: mbe::KleeneOp,

/// The separator.
sep: Option<Token>,

/// The "parent" matcher position. That is, the matcher position just before we enter the
/// sequence.
up: Box<MatcherPos<'tt>>,

/// The sequence itself.
seq: &'tt SequenceRepetition,
}

enum EofItems<'tt> {
Expand Down Expand Up @@ -274,22 +274,20 @@ pub(super) fn count_names(ms: &[TokenTree]) -> usize {
})
}

/// `NamedMatch` is a pattern-match result for a single `token::MATCH_NONTERMINAL`:
/// so it is associated with a single ident in a parse, and all
/// `MatchedNonterminal`s in the `NamedMatch` have the same non-terminal type
/// (expr, item, etc). Each leaf in a single `NamedMatch` corresponds to a
/// single `token::MATCH_NONTERMINAL` in the `TokenTree` that produced it.
/// `NamedMatch` is a pattern-match result for a single metavar. All
/// `MatchedNtNonTt`s in the `NamedMatch` have the same non-terminal type
/// (expr, item, etc).
///
/// The in-memory structure of a particular `NamedMatch` represents the match
/// that occurred when a particular subset of a matcher was applied to a
/// particular token tree.
///
/// The width of each `MatchedSeq` in the `NamedMatch`, and the identity of
/// the `MatchedNonterminal`s, will depend on the token tree it was applied
/// to: each `MatchedSeq` corresponds to a single `TTSeq` in the originating
/// the `MatchedNtNonTts`s, will depend on the token tree it was applied
/// to: each `MatchedSeq` corresponds to a single repetition in the originating
/// token tree. The depth of the `NamedMatch` structure will therefore depend
/// only on the nesting depth of `ast::TTSeq`s in the originating
/// token tree it was derived from.
/// only on the nesting depth of repetitions in the originating token tree it
/// was derived from.
///
/// In layman's terms: `NamedMatch` will form a tree representing nested matches of a particular
/// meta variable. For example, if we are matching the following macro against the following
Expand All @@ -308,24 +306,32 @@ pub(super) fn count_names(ms: &[TokenTree]) -> usize {
/// ```rust
/// MatchedSeq([
/// MatchedSeq([
/// MatchedNonterminal(a),
/// MatchedNonterminal(b),
/// MatchedNonterminal(c),
/// MatchedNonterminal(d),
/// MatchedNtNonTt(a),
/// MatchedNtNonTt(b),
/// MatchedNtNonTt(c),
/// MatchedNtNonTt(d),
/// ]),
/// MatchedSeq([
/// MatchedNonterminal(a),
/// MatchedNonterminal(b),
/// MatchedNonterminal(c),
/// MatchedNonterminal(d),
/// MatchedNonterminal(e),
/// MatchedNtNonTt(a),
/// MatchedNtNonTt(b),
/// MatchedNtNonTt(c),
/// MatchedNtNonTt(d),
/// MatchedNtNonTt(e),
/// ])
/// ])
/// ```
#[derive(Debug, Clone)]
crate enum NamedMatch {
MatchedSeq(Lrc<NamedMatchVec>),
MatchedNonterminal(Lrc<Nonterminal>),

// This variant should never hold an `NtTT`. `MatchedNtTt` should be used
// for that case.
MatchedNtNonTt(Lrc<Nonterminal>),

// `NtTT` is handled without any cloning when transcribing, unlike other
// nonterminals. Therefore, an `Lrc` isn't helpful and causes unnecessary
// allocations. Hence this separate variant.
MatchedNtTt(rustc_ast::tokenstream::TokenTree),
}

/// Takes a slice of token trees `ms` representing a matcher which successfully matched input
Expand Down Expand Up @@ -546,14 +552,19 @@ impl<'tt> TtParser<'tt> {
self.cur_items.push(new_pos);
}

if idx == len && repetition.sep.is_some() {
if repetition.sep.as_ref().map_or(false, |sep| token_name_eq(token, sep)) {
if idx == len && repetition.seq.separator.is_some() {
if repetition
.seq
.separator
.as_ref()
.map_or(false, |sep| token_name_eq(token, sep))
{
// The matcher has a separator, and it matches the current token. We can
// advance past the separator token.
item.idx += 1;
self.next_items.push(item);
}
} else if repetition.seq_op != mbe::KleeneOp::ZeroOrOne {
} else if repetition.seq.kleene.op != mbe::KleeneOp::ZeroOrOne {
// We don't need a separator. Move the "dot" back to the beginning of the
// matcher and try to match again UNLESS we are only allowed to have _one_
// repetition.
Expand Down Expand Up @@ -665,7 +676,11 @@ impl<'tt> TtParser<'tt> {
}
Ok(nt) => nt,
};
item.push_match(match_cur, MatchedNonterminal(Lrc::new(nt)));
let m = match nt {
Nonterminal::NtTT(tt) => MatchedNtTt(tt),
_ => MatchedNtNonTt(Lrc::new(nt)),
};
item.push_match(match_cur, m);
item.idx += 1;
item.match_cur += 1;
} else {
Expand Down
60 changes: 28 additions & 32 deletions compiler/rustc_expand/src/mbe/macro_rules.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@ use crate::expand::{ensure_complete_parse, parse_ast_fragment, AstFragment, AstF
use crate::mbe;
use crate::mbe::macro_check;
use crate::mbe::macro_parser::{Error, ErrorReported, Failure, Success, TtParser};
use crate::mbe::macro_parser::{MatchedNonterminal, MatchedSeq};
use crate::mbe::macro_parser::{MatchedNtTt, MatchedSeq};
use crate::mbe::transcribe::transcribe;

use rustc_ast as ast;
use rustc_ast::token::{self, NonterminalKind, NtTT, Token, TokenKind::*};
use rustc_ast::token::{self, NonterminalKind, Token, TokenKind::*};
use rustc_ast::tokenstream::{DelimSpan, TokenStream};
use rustc_ast::{NodeId, DUMMY_NODE_ID};
use rustc_ast_pretty::pprust;
Expand Down Expand Up @@ -470,22 +470,20 @@ pub fn compile_declarative_macro(
MatchedSeq(ref s) => s
.iter()
.map(|m| {
if let MatchedNonterminal(ref nt) = *m {
if let NtTT(ref tt) = **nt {
let mut tts = vec![];
mbe::quoted::parse(
tt.clone().into(),
true,
&sess.parse_sess,
def.id,
features,
edition,
&mut tts,
);
let tt = tts.pop().unwrap();
valid &= check_lhs_nt_follows(&sess.parse_sess, features, &def, &tt);
return tt;
}
if let MatchedNtTt(ref tt) = *m {
let mut tts = vec![];
mbe::quoted::parse(
tt.clone().into(),
true,
&sess.parse_sess,
def.id,
features,
edition,
&mut tts,
);
let tt = tts.pop().unwrap();
valid &= check_lhs_nt_follows(&sess.parse_sess, features, &def, &tt);
return tt;
}
sess.parse_sess.span_diagnostic.span_bug(def.span, "wrong-structured lhs")
})
Expand All @@ -497,20 +495,18 @@ pub fn compile_declarative_macro(
MatchedSeq(ref s) => s
.iter()
.map(|m| {
if let MatchedNonterminal(ref nt) = *m {
if let NtTT(ref tt) = **nt {
let mut tts = vec![];
mbe::quoted::parse(
tt.clone().into(),
false,
&sess.parse_sess,
def.id,
features,
edition,
&mut tts,
);
return tts.pop().unwrap();
}
if let MatchedNtTt(ref tt) = *m {
let mut tts = vec![];
mbe::quoted::parse(
tt.clone().into(),
false,
&sess.parse_sess,
def.id,
features,
edition,
&mut tts,
);
return tts.pop().unwrap();
}
sess.parse_sess.span_diagnostic.span_bug(def.span, "wrong-structured lhs")
})
Expand Down
40 changes: 22 additions & 18 deletions compiler/rustc_expand/src/mbe/transcribe.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use crate::base::ExtCtxt;
use crate::mbe::macro_parser::{MatchedNonterminal, MatchedSeq, NamedMatch};
use crate::mbe::macro_parser::{MatchedNtNonTt, MatchedNtTt, MatchedSeq, NamedMatch};
use crate::mbe::{self, MetaVarExpr};
use rustc_ast::mut_visit::{self, MutVisitor};
use rustc_ast::token::{self, NtTT, Token, TokenKind};
use rustc_ast::token::{self, Nonterminal, Token, TokenKind};
use rustc_ast::tokenstream::{DelimSpan, TokenStream, TokenTree, TreeAndSpacing};
use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::sync::Lrc;
Expand Down Expand Up @@ -233,25 +233,29 @@ pub(super) fn transcribe<'a>(
// the meta-var.
let ident = MacroRulesNormalizedIdent::new(orignal_ident);
if let Some(cur_matched) = lookup_cur_matched(ident, interp, &repeats) {
if let MatchedNonterminal(nt) = cur_matched {
let token = if let NtTT(tt) = &**nt {
match cur_matched {
MatchedNtTt(ref tt) => {
// `tt`s are emitted into the output stream directly as "raw tokens",
// without wrapping them into groups.
tt.clone()
} else {
let token = tt.clone();
result.push(token.into());
}
MatchedNtNonTt(ref nt) => {
// Other variables are emitted into the output stream as groups with
// `Delimiter::None` to maintain parsing priorities.
// `Interpolated` is currently used for such groups in rustc parser.
debug_assert!(!matches!(**nt, Nonterminal::NtTT(_)));
marker.visit_span(&mut sp);
TokenTree::token(token::Interpolated(nt.clone()), sp)
};
result.push(token.into());
} else {
// We were unable to descend far enough. This is an error.
return Err(cx.struct_span_err(
sp, /* blame the macro writer */
&format!("variable '{}' is still repeating at this depth", ident),
));
let token = TokenTree::token(token::Interpolated(nt.clone()), sp);
result.push(token.into());
}
MatchedSeq(..) => {
// We were unable to descend far enough. This is an error.
return Err(cx.struct_span_err(
sp, /* blame the macro writer */
&format!("variable '{}' is still repeating at this depth", ident),
));
}
}
} else {
// If we aren't able to match the meta-var, we push it back into the result but
Expand Down Expand Up @@ -308,7 +312,7 @@ fn lookup_cur_matched<'a>(
let mut matched = matched;
for &(idx, _) in repeats {
match matched {
MatchedNonterminal(_) => break,
MatchedNtTt(_) | MatchedNtNonTt(_) => break,
MatchedSeq(ref ads) => matched = ads.get(idx).unwrap(),
}
}
Expand Down Expand Up @@ -398,7 +402,7 @@ fn lockstep_iter_size(
let name = MacroRulesNormalizedIdent::new(name);
match lookup_cur_matched(name, interpolations, repeats) {
Some(matched) => match matched {
MatchedNonterminal(_) => LockstepIterSize::Unconstrained,
MatchedNtTt(_) | MatchedNtNonTt(_) => LockstepIterSize::Unconstrained,
MatchedSeq(ref ads) => LockstepIterSize::Constraint(ads.len(), name),
},
_ => LockstepIterSize::Unconstrained,
Expand Down Expand Up @@ -445,7 +449,7 @@ fn count_repetitions<'a>(
sp: &DelimSpan,
) -> PResult<'a, usize> {
match matched {
MatchedNonterminal(_) => {
MatchedNtTt(_) | MatchedNtNonTt(_) => {
if declared_lhs_depth == 0 {
return Err(cx.struct_span_err(
sp.entire(),
Expand Down