Skip to content

Show better diagnostics for unicode escapes #26947

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jul 13, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 27 additions & 17 deletions src/libsyntax/parse/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,11 @@ impl<'a> StringReader<'a> {
self.span_diagnostic.span_err(sp, m)
}

/// Suggest some help with a given span.
pub fn help_span(&self, sp: Span, m: &str) {
self.span_diagnostic.span_help(sp, m)
}

/// Report a fatal error spanning [`from_pos`, `to_pos`).
fn fatal_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) -> ! {
self.fatal_span(codemap::mk_sp(from_pos, to_pos), m)
Expand All @@ -182,6 +187,11 @@ impl<'a> StringReader<'a> {
self.err_span(codemap::mk_sp(from_pos, to_pos), m)
}

/// Suggest some help spanning [`from_pos`, `to_pos`).
fn help_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) {
self.help_span(codemap::mk_sp(from_pos, to_pos), m)
}

/// Report a lexical error spanning [`from_pos`, `to_pos`), appending an
/// escaped character to the error message
fn fatal_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char) -> ! {
Expand Down Expand Up @@ -728,19 +738,24 @@ impl<'a> StringReader<'a> {
return match e {
'n' | 'r' | 't' | '\\' | '\'' | '"' | '0' => true,
'x' => self.scan_byte_escape(delim, !ascii_only),
'u' if self.curr_is('{') => {
let valid = self.scan_unicode_escape(delim);
if valid && ascii_only {
self.err_span_(
escaped_pos,
self.last_pos,
'u' => {
let valid = if self.curr_is('{') {
self.scan_unicode_escape(delim) && !ascii_only
} else {
self.err_span_(start, self.last_pos,
"incorrect unicode escape sequence");
self.help_span_(start, self.last_pos,
"format of unicode escape sequences is `\\u{…}`");
false
};
if ascii_only {
self.err_span_(start, self.last_pos,
"unicode escape sequences cannot be used as a byte or in \
a byte string"
);
false
} else {
valid
}
valid

}
'\n' if delim == '"' => {
self.consume_whitespace();
Expand All @@ -757,16 +772,13 @@ impl<'a> StringReader<'a> {
if ascii_only { "unknown byte escape" }
else { "unknown character escape" },
c);
let sp = codemap::mk_sp(escaped_pos, last_pos);
if e == '\r' {
self.span_diagnostic.span_help(
sp,
self.help_span_(escaped_pos, last_pos,
"this is an isolated carriage return; consider checking \
your editor and version control settings")
}
if (e == '{' || e == '}') && !ascii_only {
self.span_diagnostic.span_help(
sp,
self.help_span_(escaped_pos, last_pos,
"if used in a formatting string, \
curly braces are escaped with `{{` and `}}`")
}
Expand Down Expand Up @@ -848,14 +860,12 @@ impl<'a> StringReader<'a> {
valid = false;
}

self.bump(); // past the ending }

if valid && (char::from_u32(accum_int).is_none() || count == 0) {
self.err_span_(start_bpos, self.last_pos, "illegal unicode character escape");
valid = false;
}


self.bump(); // past the ending }
valid
}

Expand Down
8 changes: 5 additions & 3 deletions src/test/parse-fail/issue-23620-invalid-escapes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ fn main() {
//~^ ERROR unicode escape sequences cannot be used as a byte or in a byte string

let _ = b'\u';
//~^ ERROR unknown byte escape: u
//~^ ERROR incorrect unicode escape sequence
//~^^ ERROR unicode escape sequences cannot be used as a byte or in a byte string

let _ = b'\x5';
//~^ ERROR numeric character escape is too short
Expand All @@ -35,11 +36,12 @@ fn main() {
let _ = b"\u{a4a4} \xf \u";
//~^ ERROR unicode escape sequences cannot be used as a byte or in a byte string
//~^^ ERROR illegal character in numeric character escape:
//~^^^ ERROR unknown byte escape: u
//~^^^ ERROR incorrect unicode escape sequence
//~^^^^ ERROR unicode escape sequences cannot be used as a byte or in a byte string

let _ = "\u{ffffff} \xf \u";
//~^ ERROR illegal unicode character escape
//~^^ ERROR illegal character in numeric character escape:
//~^^^ ERROR form of character escape may only be used with characters in the range [\x00-\x7f]
//~^^^^ ERROR unknown character escape: u
//~^^^^ ERROR incorrect unicode escape sequence
}