Skip to content

Commit dfab3b9

Browse files
committed
literal-escaper v0.0.2 => v0.0.3 for better API without unreachable
1 parent 87b4541 commit dfab3b9

File tree

20 files changed

+258
-269
lines changed

20 files changed

+258
-269
lines changed

Cargo.lock

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3148,9 +3148,7 @@ checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
31483148

31493149
[[package]]
31503150
name = "rustc-literal-escaper"
3151-
version = "0.0.2"
3152-
source = "registry+https://github.com/rust-lang/crates.io-index"
3153-
checksum = "0041b6238913c41fe704213a4a9329e2f685a156d1781998128b4149c230ad04"
3151+
version = "0.0.3"
31543152

31553153
[[package]]
31563154
name = "rustc-main"

Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,3 +89,6 @@ codegen-units = 1
8989
# FIXME: LTO cannot be enabled for binaries in a workspace
9090
# <https://github.com/rust-lang/cargo/issues/9330>
9191
# lto = true
92+
93+
[patch.crates-io]
94+
rustc-literal-escaper = { path = '../literal-escaper/' }

compiler/rustc_ast/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ edition = "2024"
77
# tidy-alphabetical-start
88
bitflags = "2.4.1"
99
memchr = "2.7.4"
10-
rustc-literal-escaper = "0.0.2"
10+
rustc-literal-escaper = "0.0.3"
1111
rustc_ast_ir = { path = "../rustc_ast_ir" }
1212
rustc_data_structures = { path = "../rustc_data_structures" }
1313
rustc_index = { path = "../rustc_index" }

compiler/rustc_ast/src/util/literal.rs

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
use std::{ascii, fmt, str};
44

55
use rustc_literal_escaper::{
6-
MixedUnit, Mode, byte_from_char, unescape_byte, unescape_char, unescape_mixed, unescape_unicode,
6+
MixedUnit, unescape_byte, unescape_byte_str, unescape_c_str, unescape_char, unescape_str,
77
};
88
use rustc_span::{Span, Symbol, kw, sym};
99
use tracing::debug;
@@ -87,11 +87,10 @@ impl LitKind {
8787
// Force-inlining here is aggressive but the closure is
8888
// called on every char in the string, so it can be hot in
8989
// programs with many long strings containing escapes.
90-
unescape_unicode(
90+
unescape_str(
9191
s,
92-
Mode::Str,
93-
&mut #[inline(always)]
94-
|_, c| match c {
92+
#[inline(always)]
93+
|_, res| match res {
9594
Ok(c) => buf.push(c),
9695
Err(err) => {
9796
assert!(!err.is_fatal(), "failed to unescape string literal")
@@ -111,8 +110,8 @@ impl LitKind {
111110
token::ByteStr => {
112111
let s = symbol.as_str();
113112
let mut buf = Vec::with_capacity(s.len());
114-
unescape_unicode(s, Mode::ByteStr, &mut |_, c| match c {
115-
Ok(c) => buf.push(byte_from_char(c)),
113+
unescape_byte_str(s, |_, res| match res {
114+
Ok(b) => buf.push(b),
116115
Err(err) => {
117116
assert!(!err.is_fatal(), "failed to unescape string literal")
118117
}
@@ -128,7 +127,7 @@ impl LitKind {
128127
token::CStr => {
129128
let s = symbol.as_str();
130129
let mut buf = Vec::with_capacity(s.len());
131-
unescape_mixed(s, Mode::CStr, &mut |_span, c| match c {
130+
unescape_c_str(s, |_span, c| match c {
132131
Ok(MixedUnit::Char(c)) => {
133132
buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes())
134133
}

compiler/rustc_parse/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ edition = "2024"
66
[dependencies]
77
# tidy-alphabetical-start
88
bitflags = "2.4.1"
9-
rustc-literal-escaper = "0.0.2"
9+
rustc-literal-escaper = "0.0.3"
1010
rustc_ast = { path = "../rustc_ast" }
1111
rustc_ast_pretty = { path = "../rustc_ast_pretty" }
1212
rustc_data_structures = { path = "../rustc_data_structures" }

compiler/rustc_parse/src/lexer/mod.rs

Lines changed: 28 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
use std::ops::Range;
2-
31
use diagnostics::make_unclosed_delims_error;
42
use rustc_ast::ast::{self, AttrStyle};
53
use rustc_ast::token::{self, CommentKind, Delimiter, IdentIsRaw, Token, TokenKind};
@@ -10,7 +8,7 @@ use rustc_errors::{Applicability, Diag, DiagCtxtHandle, StashKey};
108
use rustc_lexer::{
119
Base, Cursor, DocStyle, FrontmatterAllowed, LiteralKind, RawStrError, is_whitespace,
1210
};
13-
use rustc_literal_escaper::{EscapeError, Mode, unescape_mixed, unescape_unicode};
11+
use rustc_literal_escaper::{EscapeError, Mode, unescape_for_errors};
1412
use rustc_session::lint::BuiltinLintDiag;
1513
use rustc_session::lint::builtin::{
1614
RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX, RUST_2024_GUARDED_STRING_INCOMPATIBLE_SYNTAX,
@@ -617,7 +615,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
617615
}
618616
err.emit()
619617
}
620-
self.cook_unicode(token::Char, Mode::Char, start, end, 1, 1) // ' '
618+
self.cook_quoted(token::Char, Mode::Char, start, end, 1, 1) // ' '
621619
}
622620
rustc_lexer::LiteralKind::Byte { terminated } => {
623621
if !terminated {
@@ -629,7 +627,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
629627
.with_code(E0763)
630628
.emit()
631629
}
632-
self.cook_unicode(token::Byte, Mode::Byte, start, end, 2, 1) // b' '
630+
self.cook_quoted(token::Byte, Mode::Byte, start, end, 2, 1) // b' '
633631
}
634632
rustc_lexer::LiteralKind::Str { terminated } => {
635633
if !terminated {
@@ -641,7 +639,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
641639
.with_code(E0765)
642640
.emit()
643641
}
644-
self.cook_unicode(token::Str, Mode::Str, start, end, 1, 1) // " "
642+
self.cook_quoted(token::Str, Mode::Str, start, end, 1, 1) // " "
645643
}
646644
rustc_lexer::LiteralKind::ByteStr { terminated } => {
647645
if !terminated {
@@ -653,7 +651,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
653651
.with_code(E0766)
654652
.emit()
655653
}
656-
self.cook_unicode(token::ByteStr, Mode::ByteStr, start, end, 2, 1) // b" "
654+
self.cook_quoted(token::ByteStr, Mode::ByteStr, start, end, 2, 1) // b" "
657655
}
658656
rustc_lexer::LiteralKind::CStr { terminated } => {
659657
if !terminated {
@@ -665,13 +663,13 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
665663
.with_code(E0767)
666664
.emit()
667665
}
668-
self.cook_mixed(token::CStr, Mode::CStr, start, end, 2, 1) // c" "
666+
self.cook_quoted(token::CStr, Mode::CStr, start, end, 2, 1) // c" "
669667
}
670668
rustc_lexer::LiteralKind::RawStr { n_hashes } => {
671669
if let Some(n_hashes) = n_hashes {
672670
let n = u32::from(n_hashes);
673671
let kind = token::StrRaw(n_hashes);
674-
self.cook_unicode(kind, Mode::RawStr, start, end, 2 + n, 1 + n) // r##" "##
672+
self.cook_quoted(kind, Mode::RawStr, start, end, 2 + n, 1 + n) // r##" "##
675673
} else {
676674
self.report_raw_str_error(start, 1);
677675
}
@@ -680,7 +678,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
680678
if let Some(n_hashes) = n_hashes {
681679
let n = u32::from(n_hashes);
682680
let kind = token::ByteStrRaw(n_hashes);
683-
self.cook_unicode(kind, Mode::RawByteStr, start, end, 3 + n, 1 + n) // br##" "##
681+
self.cook_quoted(kind, Mode::RawByteStr, start, end, 3 + n, 1 + n) // br##" "##
684682
} else {
685683
self.report_raw_str_error(start, 2);
686684
}
@@ -689,7 +687,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
689687
if let Some(n_hashes) = n_hashes {
690688
let n = u32::from(n_hashes);
691689
let kind = token::CStrRaw(n_hashes);
692-
self.cook_unicode(kind, Mode::RawCStr, start, end, 3 + n, 1 + n) // cr##" "##
690+
self.cook_quoted(kind, Mode::RawCStr, start, end, 3 + n, 1 + n) // cr##" "##
693691
} else {
694692
self.report_raw_str_error(start, 2);
695693
}
@@ -1006,40 +1004,36 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
10061004
self.dcx().emit_fatal(errors::TooManyHashes { span: self.mk_sp(start, self.pos), num });
10071005
}
10081006

1009-
fn cook_common(
1007+
fn cook_quoted(
10101008
&self,
10111009
mut kind: token::LitKind,
10121010
mode: Mode,
10131011
start: BytePos,
10141012
end: BytePos,
10151013
prefix_len: u32,
10161014
postfix_len: u32,
1017-
unescape: fn(&str, Mode, &mut dyn FnMut(Range<usize>, Result<(), EscapeError>)),
10181015
) -> (token::LitKind, Symbol) {
10191016
let content_start = start + BytePos(prefix_len);
10201017
let content_end = end - BytePos(postfix_len);
10211018
let lit_content = self.str_from_to(content_start, content_end);
1022-
unescape(lit_content, mode, &mut |range, result| {
1023-
// Here we only check for errors. The actual unescaping is done later.
1024-
if let Err(err) = result {
1025-
let span_with_quotes = self.mk_sp(start, end);
1026-
let (start, end) = (range.start as u32, range.end as u32);
1027-
let lo = content_start + BytePos(start);
1028-
let hi = lo + BytePos(end - start);
1029-
let span = self.mk_sp(lo, hi);
1030-
let is_fatal = err.is_fatal();
1031-
if let Some(guar) = emit_unescape_error(
1032-
self.dcx(),
1033-
lit_content,
1034-
span_with_quotes,
1035-
span,
1036-
mode,
1037-
range,
1038-
err,
1039-
) {
1040-
assert!(is_fatal);
1041-
kind = token::Err(guar);
1042-
}
1019+
unescape_for_errors(lit_content, mode, |range, err| {
1020+
let span_with_quotes = self.mk_sp(start, end);
1021+
let (start, end) = (range.start as u32, range.end as u32);
1022+
let lo = content_start + BytePos(start);
1023+
let hi = lo + BytePos(end - start);
1024+
let span = self.mk_sp(lo, hi);
1025+
let is_fatal = err.is_fatal();
1026+
if let Some(guar) = emit_unescape_error(
1027+
self.dcx(),
1028+
lit_content,
1029+
span_with_quotes,
1030+
span,
1031+
mode,
1032+
range,
1033+
err,
1034+
) {
1035+
assert!(is_fatal);
1036+
kind = token::Err(guar);
10431037
}
10441038
});
10451039

@@ -1052,34 +1046,6 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
10521046
};
10531047
(kind, sym)
10541048
}
1055-
1056-
fn cook_unicode(
1057-
&self,
1058-
kind: token::LitKind,
1059-
mode: Mode,
1060-
start: BytePos,
1061-
end: BytePos,
1062-
prefix_len: u32,
1063-
postfix_len: u32,
1064-
) -> (token::LitKind, Symbol) {
1065-
self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| {
1066-
unescape_unicode(src, mode, &mut |span, result| callback(span, result.map(drop)))
1067-
})
1068-
}
1069-
1070-
fn cook_mixed(
1071-
&self,
1072-
kind: token::LitKind,
1073-
mode: Mode,
1074-
start: BytePos,
1075-
end: BytePos,
1076-
prefix_len: u32,
1077-
postfix_len: u32,
1078-
) -> (token::LitKind, Symbol) {
1079-
self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| {
1080-
unescape_mixed(src, mode, &mut |span, result| callback(span, result.map(drop)))
1081-
})
1082-
}
10831049
}
10841050

10851051
pub fn nfc_normalize(string: &str) -> Symbol {

compiler/rustc_parse_format/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ edition = "2024"
55

66
[dependencies]
77
# tidy-alphabetical-start
8-
rustc-literal-escaper = "0.0.2"
8+
rustc-literal-escaper = "0.0.3"
99
rustc_lexer = { path = "../rustc_lexer" }
1010
# tidy-alphabetical-end
1111

compiler/rustc_parse_format/src/lib.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ use std::ops::Range;
2020
pub use Alignment::*;
2121
pub use Count::*;
2222
pub use Position::*;
23-
use rustc_literal_escaper::{Mode, unescape_unicode};
2423

2524
/// The type of format string that we are parsing.
2625
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
@@ -335,7 +334,7 @@ impl<'a> Parser<'a> {
335334
let without_quotes = &snippet[1..snippet.len() - 1];
336335
let (mut ok, mut vec) = (true, vec![]);
337336
let mut chars = input.chars();
338-
unescape_unicode(without_quotes, Mode::Str, &mut |range, res| match res {
337+
rustc_literal_escaper::unescape_str(without_quotes, |range, res| match res {
339338
Ok(ch) if ok && chars.next().is_some_and(|c| ch == c) => {
340339
vec.push((range, ch));
341340
}

library/Cargo.lock

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -286,9 +286,7 @@ dependencies = [
286286

287287
[[package]]
288288
name = "rustc-literal-escaper"
289-
version = "0.0.2"
290-
source = "registry+https://github.com/rust-lang/crates.io-index"
291-
checksum = "0041b6238913c41fe704213a4a9329e2f685a156d1781998128b4149c230ad04"
289+
version = "0.0.3"
292290
dependencies = [
293291
"rustc-std-workspace-std",
294292
]

library/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,3 +50,5 @@ rustc-demangle.opt-level = "s"
5050
rustc-std-workspace-core = { path = 'rustc-std-workspace-core' }
5151
rustc-std-workspace-alloc = { path = 'rustc-std-workspace-alloc' }
5252
rustc-std-workspace-std = { path = 'rustc-std-workspace-std' }
53+
54+
rustc-literal-escaper = { path = '../../literal-escaper/' }

library/proc_macro/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,4 @@ std = { path = "../std" }
99
# `core` when resolving doc links. Without this line a different `core` will be
1010
# loaded from sysroot causing duplicate lang items and other similar errors.
1111
core = { path = "../core" }
12-
rustc-literal-escaper = { version = "0.0.2", features = ["rustc-dep-of-std"] }
12+
rustc-literal-escaper = { version = "0.0.3", features = ["rustc-dep-of-std"] }

library/proc_macro/src/lib.rs

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ use std::{error, fmt};
5454
pub use diagnostic::{Diagnostic, Level, MultiSpan};
5555
#[unstable(feature = "proc_macro_value", issue = "136652")]
5656
pub use rustc_literal_escaper::EscapeError;
57-
use rustc_literal_escaper::{MixedUnit, Mode, byte_from_char, unescape_mixed, unescape_unicode};
57+
use rustc_literal_escaper::{MixedUnit, unescape_byte_str, unescape_c_str, unescape_str};
5858
#[unstable(feature = "proc_macro_totokens", issue = "130977")]
5959
pub use to_tokens::ToTokens;
6060

@@ -1438,10 +1438,9 @@ impl Literal {
14381438
// Force-inlining here is aggressive but the closure is
14391439
// called on every char in the string, so it can be hot in
14401440
// programs with many long strings containing escapes.
1441-
unescape_unicode(
1441+
unescape_str(
14421442
symbol,
1443-
Mode::Str,
1444-
&mut #[inline(always)]
1443+
#[inline(always)]
14451444
|_, c| match c {
14461445
Ok(c) => buf.push(c),
14471446
Err(err) => {
@@ -1470,7 +1469,7 @@ impl Literal {
14701469
let mut error = None;
14711470
let mut buf = Vec::with_capacity(symbol.len());
14721471

1473-
unescape_mixed(symbol, Mode::CStr, &mut |_span, c| match c {
1472+
unescape_c_str(symbol, |_span, c| match c {
14741473
Ok(MixedUnit::Char(c)) => {
14751474
buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes())
14761475
}
@@ -1509,8 +1508,8 @@ impl Literal {
15091508
let mut buf = Vec::with_capacity(symbol.len());
15101509
let mut error = None;
15111510

1512-
unescape_unicode(symbol, Mode::ByteStr, &mut |_, c| match c {
1513-
Ok(c) => buf.push(byte_from_char(c)),
1511+
unescape_byte_str(symbol, |_, res| match res {
1512+
Ok(b) => buf.push(b),
15141513
Err(err) => {
15151514
if err.is_fatal() {
15161515
error = Some(ConversionErrorKind::FailedToUnescape(err));

src/tools/clippy/clippy_dev/src/update_lints.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use crate::utils::{
22
File, FileAction, FileUpdater, RustSearcher, Token, UpdateMode, UpdateStatus, panic_file, update_text_region_fn,
33
};
44
use itertools::Itertools;
5+
use rustc_lexer::{LiteralKind, TokenKind, tokenize};
56
use std::collections::HashSet;
67
use std::fmt::Write;
78
use std::fs::OpenOptions;
@@ -325,7 +326,7 @@ fn parse_str_lit(s: &str) -> String {
325326
.and_then(|s| s.strip_suffix('"'))
326327
.unwrap_or_else(|| panic!("expected quoted string, found `{s}`"));
327328
let mut res = String::with_capacity(s.len());
328-
rustc_literal_escaper::unescape_unicode(s, mode, &mut |_, ch| {
329+
literal_escaper::unescape_str(s, |range, ch| {
329330
if let Ok(ch) = ch {
330331
res.push(ch);
331332
}

0 commit comments

Comments
 (0)