Skip to content

hygiene: Ensure uniqueness of SyntaxContextDatas #130324

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Mar 26, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
193 changes: 120 additions & 73 deletions compiler/rustc_span/src/hygiene.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@
use std::cell::RefCell;
use std::collections::hash_map::Entry;
use std::collections::hash_set::Entry as SetEntry;
use std::fmt;
use std::hash::Hash;
use std::sync::Arc;
use std::{fmt, iter, mem};

use rustc_data_structures::fingerprint::Fingerprint;
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
Expand Down Expand Up @@ -57,7 +57,11 @@ pub struct SyntaxContext(u32);
impl !Ord for SyntaxContext {}
impl !PartialOrd for SyntaxContext {}

#[derive(Debug, Encodable, Decodable, Clone)]
/// If this part of two syntax contexts is equal, then the whole syntax contexts should be equal.
/// The other fields are only for caching.
type SyntaxContextKey = (SyntaxContext, ExpnId, Transparency);

#[derive(Clone, Copy, PartialEq, Debug, Encodable, Decodable)]
pub struct SyntaxContextData {
outer_expn: ExpnId,
outer_transparency: Transparency,
Expand All @@ -70,6 +74,31 @@ pub struct SyntaxContextData {
dollar_crate_name: Symbol,
}

impl SyntaxContextData {
fn root() -> SyntaxContextData {
SyntaxContextData {
outer_expn: ExpnId::root(),
outer_transparency: Transparency::Opaque,
parent: SyntaxContext::root(),
opaque: SyntaxContext::root(),
opaque_and_semitransparent: SyntaxContext::root(),
dollar_crate_name: kw::DollarCrate,
}
}

fn decode_placeholder() -> SyntaxContextData {
SyntaxContextData { dollar_crate_name: kw::Empty, ..SyntaxContextData::root() }
}

fn is_decode_placeholder(&self) -> bool {
self.dollar_crate_name == kw::Empty
}

fn key(&self) -> SyntaxContextKey {
(self.parent, self.outer_expn, self.outer_transparency)
}
}

rustc_index::newtype_index! {
/// A unique ID associated with a macro invocation and expansion.
#[orderable]
Expand Down Expand Up @@ -342,7 +371,7 @@ pub(crate) struct HygieneData {
foreign_expn_hashes: FxHashMap<ExpnId, ExpnHash>,
expn_hash_to_expn_id: UnhashMap<ExpnHash, ExpnId>,
syntax_context_data: Vec<SyntaxContextData>,
syntax_context_map: FxHashMap<(SyntaxContext, ExpnId, Transparency), SyntaxContext>,
syntax_context_map: FxHashMap<SyntaxContextKey, SyntaxContext>,
/// Maps the `local_hash` of an `ExpnData` to the next disambiguator value.
/// This is used by `update_disambiguator` to keep track of which `ExpnData`s
/// would have collisions without a disambiguator.
Expand All @@ -361,22 +390,16 @@ impl HygieneData {
None,
);

let root_ctxt_data = SyntaxContextData::root();
HygieneData {
local_expn_data: IndexVec::from_elem_n(Some(root_data), 1),
local_expn_hashes: IndexVec::from_elem_n(ExpnHash(Fingerprint::ZERO), 1),
foreign_expn_data: FxHashMap::default(),
foreign_expn_hashes: FxHashMap::default(),
expn_hash_to_expn_id: std::iter::once((ExpnHash(Fingerprint::ZERO), ExpnId::root()))
expn_hash_to_expn_id: iter::once((ExpnHash(Fingerprint::ZERO), ExpnId::root()))
.collect(),
syntax_context_data: vec![SyntaxContextData {
outer_expn: ExpnId::root(),
outer_transparency: Transparency::Opaque,
parent: SyntaxContext(0),
opaque: SyntaxContext(0),
opaque_and_semitransparent: SyntaxContext(0),
dollar_crate_name: kw::DollarCrate,
}],
syntax_context_map: FxHashMap::default(),
syntax_context_data: vec![root_ctxt_data],
syntax_context_map: iter::once((root_ctxt_data.key(), SyntaxContext(0))).collect(),
expn_data_disambiguators: UnhashMap::default(),
}
}
Expand Down Expand Up @@ -425,23 +448,28 @@ impl HygieneData {
}

fn normalize_to_macros_2_0(&self, ctxt: SyntaxContext) -> SyntaxContext {
debug_assert!(!self.syntax_context_data[ctxt.0 as usize].is_decode_placeholder());
self.syntax_context_data[ctxt.0 as usize].opaque
}

fn normalize_to_macro_rules(&self, ctxt: SyntaxContext) -> SyntaxContext {
debug_assert!(!self.syntax_context_data[ctxt.0 as usize].is_decode_placeholder());
self.syntax_context_data[ctxt.0 as usize].opaque_and_semitransparent
}

fn outer_expn(&self, ctxt: SyntaxContext) -> ExpnId {
debug_assert!(!self.syntax_context_data[ctxt.0 as usize].is_decode_placeholder());
self.syntax_context_data[ctxt.0 as usize].outer_expn
}

fn outer_mark(&self, ctxt: SyntaxContext) -> (ExpnId, Transparency) {
debug_assert!(!self.syntax_context_data[ctxt.0 as usize].is_decode_placeholder());
let data = &self.syntax_context_data[ctxt.0 as usize];
(data.outer_expn, data.outer_transparency)
}

fn parent_ctxt(&self, ctxt: SyntaxContext) -> SyntaxContext {
debug_assert!(!self.syntax_context_data[ctxt.0 as usize].is_decode_placeholder());
self.syntax_context_data[ctxt.0 as usize].parent
}

Expand Down Expand Up @@ -551,6 +579,7 @@ impl HygieneData {
transparency: Transparency,
) -> SyntaxContext {
let syntax_context_data = &mut self.syntax_context_data;
debug_assert!(!syntax_context_data[ctxt.0 as usize].is_decode_placeholder());
let mut opaque = syntax_context_data[ctxt.0 as usize].opaque;
let mut opaque_and_semitransparent =
syntax_context_data[ctxt.0 as usize].opaque_and_semitransparent;
Expand All @@ -561,7 +590,7 @@ impl HygieneData {
.syntax_context_map
.entry((parent, expn_id, transparency))
.or_insert_with(|| {
let new_opaque = SyntaxContext(syntax_context_data.len() as u32);
let new_opaque = SyntaxContext::from_usize(syntax_context_data.len());
syntax_context_data.push(SyntaxContextData {
outer_expn: expn_id,
outer_transparency: transparency,
Expand All @@ -581,7 +610,7 @@ impl HygieneData {
.entry((parent, expn_id, transparency))
.or_insert_with(|| {
let new_opaque_and_semitransparent =
SyntaxContext(syntax_context_data.len() as u32);
SyntaxContext::from_usize(syntax_context_data.len());
syntax_context_data.push(SyntaxContextData {
outer_expn: expn_id,
outer_transparency: transparency,
Expand All @@ -596,8 +625,6 @@ impl HygieneData {

let parent = ctxt;
*self.syntax_context_map.entry((parent, expn_id, transparency)).or_insert_with(|| {
let new_opaque_and_semitransparent_and_transparent =
SyntaxContext(syntax_context_data.len() as u32);
syntax_context_data.push(SyntaxContextData {
outer_expn: expn_id,
outer_transparency: transparency,
Expand All @@ -606,7 +633,7 @@ impl HygieneData {
opaque_and_semitransparent,
dollar_crate_name: kw::DollarCrate,
});
new_opaque_and_semitransparent_and_transparent
SyntaxContext::from_usize(syntax_context_data.len() - 1)
})
}
}
Expand All @@ -626,25 +653,26 @@ pub fn walk_chain_collapsed(span: Span, to: Span) -> Span {

pub fn update_dollar_crate_names(mut get_name: impl FnMut(SyntaxContext) -> Symbol) {
// The new contexts that need updating are at the end of the list and have `$crate` as a name.
let (len, to_update) = HygieneData::with(|data| {
(
data.syntax_context_data.len(),
data.syntax_context_data
.iter()
.rev()
.take_while(|scdata| scdata.dollar_crate_name == kw::DollarCrate)
.count(),
)
// Also decoding placeholders can be encountered among both old and new contexts.
let mut to_update = vec![];
HygieneData::with(|data| {
for (idx, scdata) in data.syntax_context_data.iter().enumerate().rev() {
if scdata.dollar_crate_name == kw::DollarCrate {
to_update.push((idx, kw::DollarCrate));
} else if !scdata.is_decode_placeholder() {
break;
}
}
});
// The callback must be called from outside of the `HygieneData` lock,
// since it will try to acquire it too.
let range_to_update = len - to_update..len;
let names: Vec<_> =
range_to_update.clone().map(|idx| get_name(SyntaxContext::from_u32(idx as u32))).collect();
for (idx, name) in &mut to_update {
*name = get_name(SyntaxContext::from_usize(*idx));
}
HygieneData::with(|data| {
range_to_update.zip(names).for_each(|(idx, name)| {
for (idx, name) in to_update {
data.syntax_context_data[idx].dollar_crate_name = name;
})
}
})
}

Expand Down Expand Up @@ -713,6 +741,10 @@ impl SyntaxContext {
SyntaxContext(raw as u32)
}

fn from_usize(raw: usize) -> SyntaxContext {
SyntaxContext(u32::try_from(raw).unwrap())
}

/// Extend a syntax context with a given expansion and transparency.
pub fn apply_mark(self, expn_id: ExpnId, transparency: Transparency) -> SyntaxContext {
HygieneData::with(|data| data.apply_mark(self, expn_id, transparency))
Expand Down Expand Up @@ -893,7 +925,10 @@ impl SyntaxContext {
}

pub(crate) fn dollar_crate_name(self) -> Symbol {
HygieneData::with(|data| data.syntax_context_data[self.0 as usize].dollar_crate_name)
HygieneData::with(|data| {
debug_assert!(!data.syntax_context_data[self.0 as usize].is_decode_placeholder());
data.syntax_context_data[self.0 as usize].dollar_crate_name
})
}

pub fn edition(self) -> Edition {
Expand Down Expand Up @@ -1244,7 +1279,7 @@ impl HygieneEncodeContext {

// Consume the current round of SyntaxContexts.
// Drop the lock() temporary early
let latest_ctxts = { std::mem::take(&mut *self.latest_ctxts.lock()) };
let latest_ctxts = { mem::take(&mut *self.latest_ctxts.lock()) };

// It's fine to iterate over a HashMap, because the serialization
// of the table that we insert data into doesn't depend on insertion
Expand All @@ -1256,7 +1291,7 @@ impl HygieneEncodeContext {
}
});

let latest_expns = { std::mem::take(&mut *self.latest_expns.lock()) };
let latest_expns = { mem::take(&mut *self.latest_expns.lock()) };

// Same as above, this is fine as we are inserting into a order-independent hashset
#[allow(rustc::potential_query_instability)]
Expand Down Expand Up @@ -1373,28 +1408,33 @@ pub fn decode_syntax_context<D: Decoder, F: FnOnce(&mut D, u32) -> SyntaxContext
return SyntaxContext::root();
}

let ctxt = {
let pending_ctxt = {
let mut inner = context.inner.lock();

// Reminder: `HygieneDecodeContext` is per-crate, so there are no collisions between
// raw ids from different crate metadatas.
if let Some(ctxt) = inner.remapped_ctxts.get(raw_id as usize).copied().flatten() {
// This has already been decoded.
return ctxt;
}

match inner.decoding.entry(raw_id) {
Entry::Occupied(ctxt_entry) => {
let pending_ctxt = *ctxt_entry.get();
match context.local_in_progress.borrow_mut().entry(raw_id) {
SetEntry::Occupied(..) => {
// We're decoding this already on the current thread. Return here
// and let the function higher up the stack finish decoding to handle
// recursive cases.
return *ctxt_entry.get();
}
// We're decoding this already on the current thread. Return here and let the
// function higher up the stack finish decoding to handle recursive cases.
// Hopefully having a `SyntaxContext` that refers to an incorrect data is ok
// during reminder of the decoding process, it's certainly not ok after the
// top level decoding function returns.
SetEntry::Occupied(..) => return pending_ctxt,
// Some other thread is currently decoding this.
// Race with it (alternatively we could wait here).
// We cannot return this value, unlike in the recursive case above, because it
// may expose a `SyntaxContext` pointing to incorrect data to arbitrary code.
SetEntry::Vacant(entry) => {
entry.insert();

// Some other thread is current decoding this. Race with it.
*ctxt_entry.get()
pending_ctxt
}
}
}
Expand All @@ -1405,18 +1445,10 @@ pub fn decode_syntax_context<D: Decoder, F: FnOnce(&mut D, u32) -> SyntaxContext
// Allocate and store SyntaxContext id *before* calling the decoder function,
// as the SyntaxContextData may reference itself.
let new_ctxt = HygieneData::with(|hygiene_data| {
let new_ctxt = SyntaxContext(hygiene_data.syntax_context_data.len() as u32);
// Push a dummy SyntaxContextData to ensure that nobody else can get the
// same ID as us. This will be overwritten after call `decode_Data`
hygiene_data.syntax_context_data.push(SyntaxContextData {
outer_expn: ExpnId::root(),
outer_transparency: Transparency::Transparent,
parent: SyntaxContext::root(),
opaque: SyntaxContext::root(),
opaque_and_semitransparent: SyntaxContext::root(),
dollar_crate_name: kw::Empty,
});
new_ctxt
// same ID as us. This will be overwritten after call `decode_data`.
hygiene_data.syntax_context_data.push(SyntaxContextData::decode_placeholder());
SyntaxContext::from_usize(hygiene_data.syntax_context_data.len() - 1)
});
entry.insert(new_ctxt);
new_ctxt
Expand All @@ -1426,27 +1458,42 @@ pub fn decode_syntax_context<D: Decoder, F: FnOnce(&mut D, u32) -> SyntaxContext

// Don't try to decode data while holding the lock, since we need to
// be able to recursively decode a SyntaxContext
let mut ctxt_data = decode_data(d, raw_id);
// Reset `dollar_crate_name` so that it will be updated by `update_dollar_crate_names`
// We don't care what the encoding crate set this to - we want to resolve it
// from the perspective of the current compilation session
ctxt_data.dollar_crate_name = kw::DollarCrate;

// Overwrite the dummy data with our decoded SyntaxContextData
HygieneData::with(|hygiene_data| {
if let Some(old) = hygiene_data.syntax_context_data.get(raw_id as usize)
&& old.outer_expn == ctxt_data.outer_expn
&& old.outer_transparency == ctxt_data.outer_transparency
&& old.parent == ctxt_data.parent
{
ctxt_data = old.clone();
let ctxt_data = decode_data(d, raw_id);
let ctxt_key = ctxt_data.key();

let ctxt = HygieneData::with(|hygiene_data| {
match hygiene_data.syntax_context_map.get(&ctxt_key) {
// Ensure that syntax contexts are unique.
// If syntax contexts with the given key already exists, reuse it instead of
// using `pending_ctxt`.
// `pending_ctxt` will leave an unused hole in the vector of syntax contexts.
// Hopefully its value isn't stored anywhere during decoding and its dummy data
// is never accessed later. The `is_decode_placeholder` asserts on all
// accesses to syntax context data attempt to ensure it.
Some(&ctxt) => ctxt,
// This is a completely new context.
// Overwrite its placeholder data with our decoded data.
None => {
let ctxt_data_ref =
&mut hygiene_data.syntax_context_data[pending_ctxt.as_u32() as usize];
let prev_ctxt_data = mem::replace(ctxt_data_ref, ctxt_data);
// Reset `dollar_crate_name` so that it will be updated by `update_dollar_crate_names`.
// We don't care what the encoding crate set this to - we want to resolve it
// from the perspective of the current compilation session.
ctxt_data_ref.dollar_crate_name = kw::DollarCrate;
// Make sure nothing weird happened while `decode_data` was running.
if !prev_ctxt_data.is_decode_placeholder() {
// Another thread may have already inserted the decoded data,
// but the decoded data should match.
assert_eq!(prev_ctxt_data, *ctxt_data_ref);
}
hygiene_data.syntax_context_map.insert(ctxt_key, pending_ctxt);
pending_ctxt
}
}

hygiene_data.syntax_context_data[ctxt.as_u32() as usize] = ctxt_data;
});

// Mark the context as completed

context.local_in_progress.borrow_mut().remove(&raw_id);

let mut inner = context.inner.lock();
Expand Down
Loading