diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs index d6ea4249247f3..d53bb456f4c0a 100644 --- a/compiler/rustc_lexer/src/unescape.rs +++ b/compiler/rustc_lexer/src/unescape.rs @@ -356,11 +356,10 @@ where let start = src.len() - chars.as_str().len() - c.len_utf8(); let res = match c { '\\' => { - match chars.clone().next() { - Some('\n') => { - // Rust language specification requires us to skip whitespaces - // if unescaped '\' character is followed by '\n'. - // For details see [Rust language reference] + match chars.as_str().as_bytes().first() { + Some(b'\n') => { + let _ = chars.next(); + // skip whitespace for backslash newline, see [Rust language reference] // (https://doc.rust-lang.org/reference/tokens.html#string-literals). skip_ascii_whitespace(&mut chars, start, &mut |range, err| { callback(range, Err(err)) @@ -379,30 +378,38 @@ where } } +/// Skip ASCII whitespace, except for the formfeed character +/// (see [this issue](https://github.com/rust-lang/rust/issues/136600)). +/// Warns on unescaped newline and following non-ASCII whitespace. fn skip_ascii_whitespace(chars: &mut Chars<'_>, start: usize, callback: &mut F) where F: FnMut(Range, EscapeError), { - let tail = chars.as_str(); - let first_non_space = tail - .bytes() - .position(|b| b != b' ' && b != b'\t' && b != b'\n' && b != b'\r') - .unwrap_or(tail.len()); - if tail[1..first_non_space].contains('\n') { - // The +1 accounts for the escaping slash. - let end = start + first_non_space + 1; + let mut spaces = 0; + let mut contains_nl = false; + + for byte in chars.as_str().bytes() { + let is_space = b" \t\n\r".contains(&byte); + spaces += is_space as usize; + contains_nl |= byte == b'\n'; + if !is_space { + break; + } + } + *chars = chars.as_str()[spaces..].chars(); + + // the escaping backslash and newline add 2 bytes + let end = start + 2 + spaces; + + if contains_nl { callback(start..end, EscapeError::MultipleSkippedLinesWarning); } - let tail = &tail[first_non_space..]; - if let Some(c) = tail.chars().next() { + if let Some(c) = chars.clone().next() { if c.is_whitespace() { - // For error reporting, we would like the span to contain the character that was not - // skipped. The +1 is necessary to account for the leading \ that started the escape. - let end = start + first_non_space + c.len_utf8() + 1; - callback(start..end, EscapeError::UnskippedWhitespaceWarning); + // for error reporting, include the character that was not skipped in the span + callback(start..end + c.len_utf8(), EscapeError::UnskippedWhitespaceWarning); } } - *chars = tail.chars(); } /// Takes a contents of a string literal (without quotes) and produces a