Skip to content

Commit 07be629

Browse files
committed
Auto merge of #26947 - nagisa:unicode-escape-error, r=nrc
Inspired by the now-mysteriously-closed #26782. This PR introduces better error messages when unicode escapes have invalid format (e.g. `\uFFFF`). It also makes rustc always tell the user that escape may not be used in byte-strings and bytes and fixes some spans to not include unecessary characters and include escape backslash in some others.
2 parents 7ea2674 + 4d65ef4 commit 07be629

File tree

2 files changed

+32
-20
lines changed

2 files changed

+32
-20
lines changed

src/libsyntax/parse/lexer/mod.rs

Lines changed: 27 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,11 @@ impl<'a> StringReader<'a> {
172172
self.span_diagnostic.span_err(sp, m)
173173
}
174174

175+
/// Suggest some help with a given span.
176+
pub fn help_span(&self, sp: Span, m: &str) {
177+
self.span_diagnostic.span_help(sp, m)
178+
}
179+
175180
/// Report a fatal error spanning [`from_pos`, `to_pos`).
176181
fn fatal_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) -> ! {
177182
self.fatal_span(codemap::mk_sp(from_pos, to_pos), m)
@@ -182,6 +187,11 @@ impl<'a> StringReader<'a> {
182187
self.err_span(codemap::mk_sp(from_pos, to_pos), m)
183188
}
184189

190+
/// Suggest some help spanning [`from_pos`, `to_pos`).
191+
fn help_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) {
192+
self.help_span(codemap::mk_sp(from_pos, to_pos), m)
193+
}
194+
185195
/// Report a lexical error spanning [`from_pos`, `to_pos`), appending an
186196
/// escaped character to the error message
187197
fn fatal_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char) -> ! {
@@ -728,19 +738,24 @@ impl<'a> StringReader<'a> {
728738
return match e {
729739
'n' | 'r' | 't' | '\\' | '\'' | '"' | '0' => true,
730740
'x' => self.scan_byte_escape(delim, !ascii_only),
731-
'u' if self.curr_is('{') => {
732-
let valid = self.scan_unicode_escape(delim);
733-
if valid && ascii_only {
734-
self.err_span_(
735-
escaped_pos,
736-
self.last_pos,
741+
'u' => {
742+
let valid = if self.curr_is('{') {
743+
self.scan_unicode_escape(delim) && !ascii_only
744+
} else {
745+
self.err_span_(start, self.last_pos,
746+
"incorrect unicode escape sequence");
747+
self.help_span_(start, self.last_pos,
748+
"format of unicode escape sequences is `\\u{…}`");
749+
false
750+
};
751+
if ascii_only {
752+
self.err_span_(start, self.last_pos,
737753
"unicode escape sequences cannot be used as a byte or in \
738754
a byte string"
739755
);
740-
false
741-
} else {
742-
valid
743756
}
757+
valid
758+
744759
}
745760
'\n' if delim == '"' => {
746761
self.consume_whitespace();
@@ -757,16 +772,13 @@ impl<'a> StringReader<'a> {
757772
if ascii_only { "unknown byte escape" }
758773
else { "unknown character escape" },
759774
c);
760-
let sp = codemap::mk_sp(escaped_pos, last_pos);
761775
if e == '\r' {
762-
self.span_diagnostic.span_help(
763-
sp,
776+
self.help_span_(escaped_pos, last_pos,
764777
"this is an isolated carriage return; consider checking \
765778
your editor and version control settings")
766779
}
767780
if (e == '{' || e == '}') && !ascii_only {
768-
self.span_diagnostic.span_help(
769-
sp,
781+
self.help_span_(escaped_pos, last_pos,
770782
"if used in a formatting string, \
771783
curly braces are escaped with `{{` and `}}`")
772784
}
@@ -848,14 +860,12 @@ impl<'a> StringReader<'a> {
848860
valid = false;
849861
}
850862

851-
self.bump(); // past the ending }
852-
853863
if valid && (char::from_u32(accum_int).is_none() || count == 0) {
854864
self.err_span_(start_bpos, self.last_pos, "illegal unicode character escape");
855865
valid = false;
856866
}
857867

858-
868+
self.bump(); // past the ending }
859869
valid
860870
}
861871

src/test/parse-fail/issue-23620-invalid-escapes.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@ fn main() {
1616
//~^ ERROR unicode escape sequences cannot be used as a byte or in a byte string
1717

1818
let _ = b'\u';
19-
//~^ ERROR unknown byte escape: u
19+
//~^ ERROR incorrect unicode escape sequence
20+
//~^^ ERROR unicode escape sequences cannot be used as a byte or in a byte string
2021

2122
let _ = b'\x5';
2223
//~^ ERROR numeric character escape is too short
@@ -35,11 +36,12 @@ fn main() {
3536
let _ = b"\u{a4a4} \xf \u";
3637
//~^ ERROR unicode escape sequences cannot be used as a byte or in a byte string
3738
//~^^ ERROR illegal character in numeric character escape:
38-
//~^^^ ERROR unknown byte escape: u
39+
//~^^^ ERROR incorrect unicode escape sequence
40+
//~^^^^ ERROR unicode escape sequences cannot be used as a byte or in a byte string
3941

4042
let _ = "\u{ffffff} \xf \u";
4143
//~^ ERROR illegal unicode character escape
4244
//~^^ ERROR illegal character in numeric character escape:
4345
//~^^^ ERROR form of character escape may only be used with characters in the range [\x00-\x7f]
44-
//~^^^^ ERROR unknown character escape: u
46+
//~^^^^ ERROR incorrect unicode escape sequence
4547
}

0 commit comments

Comments
 (0)