Skip to content

Commit 7db948b

Browse files
committed
literal-escaper v0.0.2 => v0.0.3 for better API without unreachable
1 parent 7295b08 commit 7db948b

File tree

22 files changed

+259
-269
lines changed

22 files changed

+259
-269
lines changed

Cargo.lock

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2068,7 +2068,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
20682068
checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34"
20692069
dependencies = [
20702070
"cfg-if",
2071-
"windows-targets 0.48.5",
2071+
"windows-targets 0.52.6",
20722072
]
20732073

20742074
[[package]]
@@ -3145,9 +3145,7 @@ checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
31453145

31463146
[[package]]
31473147
name = "rustc-literal-escaper"
3148-
version = "0.0.2"
3149-
source = "registry+https://github.com/rust-lang/crates.io-index"
3150-
checksum = "0041b6238913c41fe704213a4a9329e2f685a156d1781998128b4149c230ad04"
3148+
version = "0.0.3"
31513149

31523150
[[package]]
31533151
name = "rustc-main"

Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,3 +89,6 @@ codegen-units = 1
8989
# FIXME: LTO cannot be enabled for binaries in a workspace
9090
# <https://github.com/rust-lang/cargo/issues/9330>
9191
# lto = true
92+
93+
[patch.crates-io]
94+
rustc-literal-escaper = { path = '../literal-escaper/' }

compiler/rustc_ast/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ edition = "2024"
77
# tidy-alphabetical-start
88
bitflags = "2.4.1"
99
memchr = "2.7.4"
10-
rustc-literal-escaper = "0.0.2"
10+
rustc-literal-escaper = "0.0.3"
1111
rustc_ast_ir = { path = "../rustc_ast_ir" }
1212
rustc_data_structures = { path = "../rustc_data_structures" }
1313
rustc_index = { path = "../rustc_index" }

compiler/rustc_ast/src/util/literal.rs

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
use std::{ascii, fmt, str};
44

55
use rustc_literal_escaper::{
6-
MixedUnit, Mode, byte_from_char, unescape_byte, unescape_char, unescape_mixed, unescape_unicode,
6+
MixedUnit, unescape_byte, unescape_byte_str, unescape_c_str, unescape_char, unescape_str,
77
};
88
use rustc_span::{Span, Symbol, kw, sym};
99
use tracing::debug;
@@ -87,11 +87,10 @@ impl LitKind {
8787
// Force-inlining here is aggressive but the closure is
8888
// called on every char in the string, so it can be hot in
8989
// programs with many long strings containing escapes.
90-
unescape_unicode(
90+
unescape_str(
9191
s,
92-
Mode::Str,
93-
&mut #[inline(always)]
94-
|_, c| match c {
92+
#[inline(always)]
93+
|_, res| match res {
9594
Ok(c) => buf.push(c),
9695
Err(err) => {
9796
assert!(!err.is_fatal(), "failed to unescape string literal")
@@ -111,8 +110,8 @@ impl LitKind {
111110
token::ByteStr => {
112111
let s = symbol.as_str();
113112
let mut buf = Vec::with_capacity(s.len());
114-
unescape_unicode(s, Mode::ByteStr, &mut |_, c| match c {
115-
Ok(c) => buf.push(byte_from_char(c)),
113+
unescape_byte_str(s, |_, res| match res {
114+
Ok(b) => buf.push(b),
116115
Err(err) => {
117116
assert!(!err.is_fatal(), "failed to unescape string literal")
118117
}
@@ -128,7 +127,7 @@ impl LitKind {
128127
token::CStr => {
129128
let s = symbol.as_str();
130129
let mut buf = Vec::with_capacity(s.len());
131-
unescape_mixed(s, Mode::CStr, &mut |_span, c| match c {
130+
unescape_c_str(s, |_span, c| match c {
132131
Ok(MixedUnit::Char(c)) => {
133132
buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes())
134133
}

compiler/rustc_parse/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ edition = "2024"
66
[dependencies]
77
# tidy-alphabetical-start
88
bitflags = "2.4.1"
9-
rustc-literal-escaper = "0.0.2"
9+
rustc-literal-escaper = "0.0.3"
1010
rustc_ast = { path = "../rustc_ast" }
1111
rustc_ast_pretty = { path = "../rustc_ast_pretty" }
1212
rustc_data_structures = { path = "../rustc_data_structures" }

compiler/rustc_parse/src/lexer/mod.rs

Lines changed: 28 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
use std::ops::Range;
2-
31
use diagnostics::make_unclosed_delims_error;
42
use rustc_ast::ast::{self, AttrStyle};
53
use rustc_ast::token::{self, CommentKind, Delimiter, IdentIsRaw, Token, TokenKind};
@@ -8,7 +6,7 @@ use rustc_ast::util::unicode::contains_text_flow_control_chars;
86
use rustc_errors::codes::*;
97
use rustc_errors::{Applicability, Diag, DiagCtxtHandle, StashKey};
108
use rustc_lexer::{Base, Cursor, DocStyle, LiteralKind, RawStrError};
11-
use rustc_literal_escaper::{EscapeError, Mode, unescape_mixed, unescape_unicode};
9+
use rustc_literal_escaper::{EscapeError, Mode, unescape_for_errors};
1210
use rustc_session::lint::BuiltinLintDiag;
1311
use rustc_session::lint::builtin::{
1412
RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX, RUST_2024_GUARDED_STRING_INCOMPATIBLE_SYNTAX,
@@ -525,7 +523,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
525523
}
526524
err.emit()
527525
}
528-
self.cook_unicode(token::Char, Mode::Char, start, end, 1, 1) // ' '
526+
self.cook_quoted(token::Char, Mode::Char, start, end, 1, 1) // ' '
529527
}
530528
rustc_lexer::LiteralKind::Byte { terminated } => {
531529
if !terminated {
@@ -537,7 +535,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
537535
.with_code(E0763)
538536
.emit()
539537
}
540-
self.cook_unicode(token::Byte, Mode::Byte, start, end, 2, 1) // b' '
538+
self.cook_quoted(token::Byte, Mode::Byte, start, end, 2, 1) // b' '
541539
}
542540
rustc_lexer::LiteralKind::Str { terminated } => {
543541
if !terminated {
@@ -549,7 +547,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
549547
.with_code(E0765)
550548
.emit()
551549
}
552-
self.cook_unicode(token::Str, Mode::Str, start, end, 1, 1) // " "
550+
self.cook_quoted(token::Str, Mode::Str, start, end, 1, 1) // " "
553551
}
554552
rustc_lexer::LiteralKind::ByteStr { terminated } => {
555553
if !terminated {
@@ -561,7 +559,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
561559
.with_code(E0766)
562560
.emit()
563561
}
564-
self.cook_unicode(token::ByteStr, Mode::ByteStr, start, end, 2, 1) // b" "
562+
self.cook_quoted(token::ByteStr, Mode::ByteStr, start, end, 2, 1) // b" "
565563
}
566564
rustc_lexer::LiteralKind::CStr { terminated } => {
567565
if !terminated {
@@ -573,13 +571,13 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
573571
.with_code(E0767)
574572
.emit()
575573
}
576-
self.cook_mixed(token::CStr, Mode::CStr, start, end, 2, 1) // c" "
574+
self.cook_quoted(token::CStr, Mode::CStr, start, end, 2, 1) // c" "
577575
}
578576
rustc_lexer::LiteralKind::RawStr { n_hashes } => {
579577
if let Some(n_hashes) = n_hashes {
580578
let n = u32::from(n_hashes);
581579
let kind = token::StrRaw(n_hashes);
582-
self.cook_unicode(kind, Mode::RawStr, start, end, 2 + n, 1 + n) // r##" "##
580+
self.cook_quoted(kind, Mode::RawStr, start, end, 2 + n, 1 + n) // r##" "##
583581
} else {
584582
self.report_raw_str_error(start, 1);
585583
}
@@ -588,7 +586,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
588586
if let Some(n_hashes) = n_hashes {
589587
let n = u32::from(n_hashes);
590588
let kind = token::ByteStrRaw(n_hashes);
591-
self.cook_unicode(kind, Mode::RawByteStr, start, end, 3 + n, 1 + n) // br##" "##
589+
self.cook_quoted(kind, Mode::RawByteStr, start, end, 3 + n, 1 + n) // br##" "##
592590
} else {
593591
self.report_raw_str_error(start, 2);
594592
}
@@ -597,7 +595,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
597595
if let Some(n_hashes) = n_hashes {
598596
let n = u32::from(n_hashes);
599597
let kind = token::CStrRaw(n_hashes);
600-
self.cook_unicode(kind, Mode::RawCStr, start, end, 3 + n, 1 + n) // cr##" "##
598+
self.cook_quoted(kind, Mode::RawCStr, start, end, 3 + n, 1 + n) // cr##" "##
601599
} else {
602600
self.report_raw_str_error(start, 2);
603601
}
@@ -914,40 +912,36 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
914912
self.dcx().emit_fatal(errors::TooManyHashes { span: self.mk_sp(start, self.pos), num });
915913
}
916914

917-
fn cook_common(
915+
fn cook_quoted(
918916
&self,
919917
mut kind: token::LitKind,
920918
mode: Mode,
921919
start: BytePos,
922920
end: BytePos,
923921
prefix_len: u32,
924922
postfix_len: u32,
925-
unescape: fn(&str, Mode, &mut dyn FnMut(Range<usize>, Result<(), EscapeError>)),
926923
) -> (token::LitKind, Symbol) {
927924
let content_start = start + BytePos(prefix_len);
928925
let content_end = end - BytePos(postfix_len);
929926
let lit_content = self.str_from_to(content_start, content_end);
930-
unescape(lit_content, mode, &mut |range, result| {
931-
// Here we only check for errors. The actual unescaping is done later.
932-
if let Err(err) = result {
933-
let span_with_quotes = self.mk_sp(start, end);
934-
let (start, end) = (range.start as u32, range.end as u32);
935-
let lo = content_start + BytePos(start);
936-
let hi = lo + BytePos(end - start);
937-
let span = self.mk_sp(lo, hi);
938-
let is_fatal = err.is_fatal();
939-
if let Some(guar) = emit_unescape_error(
940-
self.dcx(),
941-
lit_content,
942-
span_with_quotes,
943-
span,
944-
mode,
945-
range,
946-
err,
947-
) {
948-
assert!(is_fatal);
949-
kind = token::Err(guar);
950-
}
927+
unescape_for_errors(lit_content, mode, |range, err| {
928+
let span_with_quotes = self.mk_sp(start, end);
929+
let (start, end) = (range.start as u32, range.end as u32);
930+
let lo = content_start + BytePos(start);
931+
let hi = lo + BytePos(end - start);
932+
let span = self.mk_sp(lo, hi);
933+
let is_fatal = err.is_fatal();
934+
if let Some(guar) = emit_unescape_error(
935+
self.dcx(),
936+
lit_content,
937+
span_with_quotes,
938+
span,
939+
mode,
940+
range,
941+
err,
942+
) {
943+
assert!(is_fatal);
944+
kind = token::Err(guar);
951945
}
952946
});
953947

@@ -960,34 +954,6 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
960954
};
961955
(kind, sym)
962956
}
963-
964-
fn cook_unicode(
965-
&self,
966-
kind: token::LitKind,
967-
mode: Mode,
968-
start: BytePos,
969-
end: BytePos,
970-
prefix_len: u32,
971-
postfix_len: u32,
972-
) -> (token::LitKind, Symbol) {
973-
self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| {
974-
unescape_unicode(src, mode, &mut |span, result| callback(span, result.map(drop)))
975-
})
976-
}
977-
978-
fn cook_mixed(
979-
&self,
980-
kind: token::LitKind,
981-
mode: Mode,
982-
start: BytePos,
983-
end: BytePos,
984-
prefix_len: u32,
985-
postfix_len: u32,
986-
) -> (token::LitKind, Symbol) {
987-
self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| {
988-
unescape_mixed(src, mode, &mut |span, result| callback(span, result.map(drop)))
989-
})
990-
}
991957
}
992958

993959
pub fn nfc_normalize(string: &str) -> Symbol {

compiler/rustc_parse_format/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ edition = "2024"
55

66
[dependencies]
77
# tidy-alphabetical-start
8-
rustc-literal-escaper = "0.0.2"
8+
rustc-literal-escaper = "0.0.3"
99
rustc_lexer = { path = "../rustc_lexer" }
1010
# tidy-alphabetical-end
1111

compiler/rustc_parse_format/src/lib.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
pub use Alignment::*;
1919
pub use Count::*;
2020
pub use Position::*;
21-
use rustc_literal_escaper::{Mode, unescape_unicode};
2221

2322
// Note: copied from rustc_span
2423
/// Range inside of a `Span` used for diagnostics when we only have access to relative positions.
@@ -1123,7 +1122,7 @@ fn find_width_map_from_snippet(
11231122
fn unescape_string(string: &str) -> Option<String> {
11241123
let mut buf = String::new();
11251124
let mut ok = true;
1126-
unescape_unicode(string, Mode::Str, &mut |_, unescaped_char| match unescaped_char {
1125+
rustc_literal_escaper::unescape_str(string, &mut |_, res| match res {
11271126
Ok(c) => buf.push(c),
11281127
Err(_) => ok = false,
11291128
});

library/Cargo.lock

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -287,9 +287,7 @@ dependencies = [
287287

288288
[[package]]
289289
name = "rustc-literal-escaper"
290-
version = "0.0.2"
291-
source = "registry+https://github.com/rust-lang/crates.io-index"
292-
checksum = "0041b6238913c41fe704213a4a9329e2f685a156d1781998128b4149c230ad04"
290+
version = "0.0.3"
293291
dependencies = [
294292
"rustc-std-workspace-std",
295293
]

library/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,3 +50,5 @@ rustc-demangle.opt-level = "s"
5050
rustc-std-workspace-core = { path = 'rustc-std-workspace-core' }
5151
rustc-std-workspace-alloc = { path = 'rustc-std-workspace-alloc' }
5252
rustc-std-workspace-std = { path = 'rustc-std-workspace-std' }
53+
54+
rustc-literal-escaper = { path = '../../literal-escaper/' }

library/core/src/num/niche_types.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,8 @@ define_valid_range_type! {
131131
pub struct NonZeroI32Inner(i32 as u32 in 1..=0xffff_ffff);
132132
pub struct NonZeroI64Inner(i64 as u64 in 1..=0xffffffff_ffffffff);
133133
pub struct NonZeroI128Inner(i128 as u128 in 1..=0xffffffffffffffff_ffffffffffffffff);
134+
135+
pub struct NonZeroCharInner(char as u32 in 1..=0x10ffff);
134136
}
135137

136138
#[cfg(target_pointer_width = "16")]

library/core/src/num/nonzero.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ impl_zeroable_primitive!(
7979
NonZeroI64Inner(i64),
8080
NonZeroI128Inner(i128),
8181
NonZeroIsizeInner(isize),
82+
NonZeroCharInner(char),
8283
);
8384

8485
/// A value that is known not to equal zero.

library/proc_macro/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,4 @@ std = { path = "../std" }
99
# `core` when resolving doc links. Without this line a different `core` will be
1010
# loaded from sysroot causing duplicate lang items and other similar errors.
1111
core = { path = "../core" }
12-
rustc-literal-escaper = { version = "0.0.2", features = ["rustc-dep-of-std"] }
12+
rustc-literal-escaper = { version = "0.0.3", features = ["rustc-dep-of-std"] }

library/proc_macro/src/lib.rs

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ use std::{error, fmt};
5454
pub use diagnostic::{Diagnostic, Level, MultiSpan};
5555
#[unstable(feature = "proc_macro_value", issue = "136652")]
5656
pub use rustc_literal_escaper::EscapeError;
57-
use rustc_literal_escaper::{MixedUnit, Mode, byte_from_char, unescape_mixed, unescape_unicode};
57+
use rustc_literal_escaper::{MixedUnit, unescape_byte_str, unescape_c_str, unescape_str};
5858
#[unstable(feature = "proc_macro_totokens", issue = "130977")]
5959
pub use to_tokens::ToTokens;
6060

@@ -1438,10 +1438,9 @@ impl Literal {
14381438
// Force-inlining here is aggressive but the closure is
14391439
// called on every char in the string, so it can be hot in
14401440
// programs with many long strings containing escapes.
1441-
unescape_unicode(
1441+
unescape_str(
14421442
symbol,
1443-
Mode::Str,
1444-
&mut #[inline(always)]
1443+
#[inline(always)]
14451444
|_, c| match c {
14461445
Ok(c) => buf.push(c),
14471446
Err(err) => {
@@ -1470,7 +1469,7 @@ impl Literal {
14701469
let mut error = None;
14711470
let mut buf = Vec::with_capacity(symbol.len());
14721471

1473-
unescape_mixed(symbol, Mode::CStr, &mut |_span, c| match c {
1472+
unescape_c_str(symbol, |_span, c| match c {
14741473
Ok(MixedUnit::Char(c)) => {
14751474
buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes())
14761475
}
@@ -1509,8 +1508,8 @@ impl Literal {
15091508
let mut buf = Vec::with_capacity(symbol.len());
15101509
let mut error = None;
15111510

1512-
unescape_unicode(symbol, Mode::ByteStr, &mut |_, c| match c {
1513-
Ok(c) => buf.push(byte_from_char(c)),
1511+
unescape_byte_str(symbol, |_, res| match res {
1512+
Ok(b) => buf.push(b),
15141513
Err(err) => {
15151514
if err.is_fatal() {
15161515
error = Some(ConversionErrorKind::FailedToUnescape(err));

src/tools/clippy/clippy_dev/src/update_lints.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ use crate::utils::{UpdateMode, clippy_project_root, exit_with_failure, replace_r
22
use aho_corasick::AhoCorasickBuilder;
33
use itertools::Itertools;
44
use rustc_lexer::{LiteralKind, TokenKind, tokenize};
5-
use rustc_literal_escaper::{Mode, unescape_unicode};
65
use std::collections::{HashMap, HashSet};
76
use std::ffi::OsStr;
87
use std::fmt::{self, Write};
@@ -804,7 +803,7 @@ fn remove_line_splices(s: &str) -> String {
804803
.and_then(|s| s.strip_suffix('"'))
805804
.unwrap_or_else(|| panic!("expected quoted string, found `{s}`"));
806805
let mut res = String::with_capacity(s.len());
807-
unescape_unicode(s, Mode::Str, &mut |range, ch| {
806+
literal_escaper::unescape_str(s, |range, ch| {
808807
if ch.is_ok() {
809808
res.push_str(&s[range]);
810809
}

0 commit comments

Comments
 (0)