-
Notifications
You must be signed in to change notification settings - Fork 13.4k
librustc_lexer: Refactor the module #66015
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
0825b35
72767a8
e0c45f7
649a524
d6f722d
6e350bd
ecd2673
e8b8d2a
31735b0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -248,7 +248,7 @@ impl Cursor<'_> { | |
let first_char = self.bump().unwrap(); | ||
let token_kind = match first_char { | ||
// Slash, comment or block comment. | ||
'/' => match self.nth_char(0) { | ||
'/' => match self.first() { | ||
'/' => self.line_comment(), | ||
'*' => self.block_comment(), | ||
_ => Slash, | ||
|
@@ -257,8 +257,8 @@ impl Cursor<'_> { | |
// Whitespace sequence. | ||
c if is_whitespace(c) => self.whitespace(), | ||
|
||
// Raw string literal or identifier. | ||
'r' => match (self.nth_char(0), self.nth_char(1)) { | ||
// Raw identifier, raw string literal or identifier. | ||
'r' => match (self.first(), self.second()) { | ||
('#', c1) if is_id_start(c1) => self.raw_ident(), | ||
('#', _) | ('"', _) => { | ||
let (n_hashes, started, terminated) = self.raw_double_quoted_string(); | ||
|
@@ -273,7 +273,7 @@ impl Cursor<'_> { | |
}, | ||
|
||
// Byte literal, byte string literal, raw byte string literal or identifier. | ||
'b' => match (self.nth_char(0), self.nth_char(1)) { | ||
'b' => match (self.first(), self.second()) { | ||
('\'', _) => { | ||
self.bump(); | ||
let terminated = self.single_quoted_string(); | ||
|
@@ -366,31 +366,23 @@ impl Cursor<'_> { | |
} | ||
|
||
fn line_comment(&mut self) -> TokenKind { | ||
debug_assert!(self.prev() == '/' && self.nth_char(0) == '/'); | ||
debug_assert!(self.prev() == '/' && self.first() == '/'); | ||
self.bump(); | ||
loop { | ||
match self.nth_char(0) { | ||
'\n' => break, | ||
EOF_CHAR if self.is_eof() => break, | ||
_ => { | ||
self.bump(); | ||
} | ||
} | ||
} | ||
self.eat_while(|c| c != '\n'); | ||
LineComment | ||
} | ||
|
||
fn block_comment(&mut self) -> TokenKind { | ||
debug_assert!(self.prev() == '/' && self.nth_char(0) == '*'); | ||
debug_assert!(self.prev() == '/' && self.first() == '*'); | ||
self.bump(); | ||
let mut depth = 1usize; | ||
while let Some(c) = self.bump() { | ||
match c { | ||
'/' if self.nth_char(0) == '*' => { | ||
'/' if self.first() == '*' => { | ||
self.bump(); | ||
depth += 1; | ||
} | ||
'*' if self.nth_char(0) == '/' => { | ||
'*' if self.first() == '/' => { | ||
self.bump(); | ||
depth -= 1; | ||
if depth == 0 { | ||
|
@@ -409,31 +401,27 @@ impl Cursor<'_> { | |
|
||
fn whitespace(&mut self) -> TokenKind { | ||
debug_assert!(is_whitespace(self.prev())); | ||
while is_whitespace(self.nth_char(0)) { | ||
self.bump(); | ||
} | ||
self.eat_while(is_whitespace); | ||
Whitespace | ||
} | ||
|
||
fn raw_ident(&mut self) -> TokenKind { | ||
debug_assert!( | ||
self.prev() == 'r' | ||
&& self.nth_char(0) == '#' | ||
&& is_id_start(self.nth_char(1)) | ||
&& self.first() == '#' | ||
&& is_id_start(self.second()) | ||
); | ||
// Eat "#" symbol. | ||
self.bump(); | ||
self.bump(); | ||
while is_id_continue(self.nth_char(0)) { | ||
self.bump(); | ||
} | ||
// Eat the identifier part of RawIdent. | ||
self.eat_identifier(); | ||
RawIdent | ||
} | ||
|
||
fn ident(&mut self) -> TokenKind { | ||
debug_assert!(is_id_start(self.prev())); | ||
while is_id_continue(self.nth_char(0)) { | ||
self.bump(); | ||
} | ||
// Start is already eaten, eat the rest of identifier. | ||
self.eat_while(is_id_continue); | ||
Ident | ||
} | ||
|
||
|
@@ -442,7 +430,7 @@ impl Cursor<'_> { | |
let mut base = Base::Decimal; | ||
if first_digit == '0' { | ||
// Attempt to parse encoding base. | ||
let has_digits = match self.nth_char(0) { | ||
let has_digits = match self.first() { | ||
'b' => { | ||
base = Base::Binary; | ||
self.bump(); | ||
|
@@ -476,23 +464,23 @@ impl Cursor<'_> { | |
self.eat_decimal_digits(); | ||
}; | ||
|
||
match self.nth_char(0) { | ||
match self.first() { | ||
// Don't be greedy if this is actually an | ||
// integer literal followed by field/method access or a range pattern | ||
// (`0..2` and `12.foo()`) | ||
'.' if self.nth_char(1) != '.' | ||
&& !is_id_start(self.nth_char(1)) => | ||
'.' if self.second() != '.' | ||
&& !is_id_start(self.second()) => | ||
{ | ||
// might have stuff after the ., and if it does, it needs to start | ||
// with a number | ||
self.bump(); | ||
let mut empty_exponent = false; | ||
if self.nth_char(0).is_digit(10) { | ||
if self.first().is_digit(10) { | ||
self.eat_decimal_digits(); | ||
match self.nth_char(0) { | ||
match self.first() { | ||
'e' | 'E' => { | ||
self.bump(); | ||
empty_exponent = self.float_exponent().is_err() | ||
empty_exponent = !self.eat_float_exponent(); | ||
} | ||
_ => (), | ||
} | ||
|
@@ -501,7 +489,7 @@ impl Cursor<'_> { | |
} | ||
'e' | 'E' => { | ||
self.bump(); | ||
let empty_exponent = self.float_exponent().is_err(); | ||
let empty_exponent = !self.eat_float_exponent(); | ||
Float { base, empty_exponent } | ||
} | ||
_ => Int { base, empty_int: false }, | ||
|
@@ -549,26 +537,30 @@ impl Cursor<'_> { | |
|
||
fn single_quoted_string(&mut self) -> bool { | ||
debug_assert!(self.prev() == '\''); | ||
// Parse `'''` as a single char literal. | ||
if self.nth_char(0) == '\'' && self.nth_char(1) == '\'' { | ||
// Check if it's a one-symbol literal. | ||
if self.second() == '\'' && self.first() != '\\' { | ||
self.bump(); | ||
self.bump(); | ||
return true; | ||
} | ||
|
||
// Literal has more than one symbol. | ||
|
||
// Parse until either quotes are terminated or error is detected. | ||
let mut first = true; | ||
loop { | ||
match self.nth_char(0) { | ||
// Probably beginning of the comment, which we don't want to include | ||
// to the error report. | ||
'/' if !first => break, | ||
// Newline without following '\'' means unclosed quote, stop parsing. | ||
'\n' if self.nth_char(1) != '\'' => break, | ||
// End of file, stop parsing. | ||
EOF_CHAR if self.is_eof() => break, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure why the order of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well, I had two-level motivation here:
|
||
match self.first() { | ||
// Quotes are terminated, finish parsing. | ||
'\'' => { | ||
self.bump(); | ||
return true; | ||
} | ||
// Probably beginning of the comment, which we don't want to include | ||
// to the error report. | ||
'/' => break, | ||
// Newline without following '\'' means unclosed quote, stop parsing. | ||
'\n' if self.second() != '\'' => break, | ||
// End of file, stop parsing. | ||
EOF_CHAR if self.is_eof() => break, | ||
// Escaped slash is considered one character, so bump twice. | ||
'\\' => { | ||
self.bump(); | ||
|
@@ -579,8 +571,8 @@ impl Cursor<'_> { | |
self.bump(); | ||
} | ||
} | ||
first = false; | ||
} | ||
// String was not terminated. | ||
false | ||
} | ||
|
||
|
@@ -643,7 +635,7 @@ impl Cursor<'_> { | |
fn eat_decimal_digits(&mut self) -> bool { | ||
let mut has_digits = false; | ||
loop { | ||
match self.nth_char(0) { | ||
match self.first() { | ||
'_' => { | ||
self.bump(); | ||
} | ||
|
@@ -660,7 +652,7 @@ impl Cursor<'_> { | |
fn eat_hexadecimal_digits(&mut self) -> bool { | ||
let mut has_digits = false; | ||
loop { | ||
match self.nth_char(0) { | ||
match self.first() { | ||
'_' => { | ||
self.bump(); | ||
} | ||
|
@@ -674,23 +666,43 @@ impl Cursor<'_> { | |
has_digits | ||
} | ||
|
||
fn float_exponent(&mut self) -> Result<(), ()> { | ||
/// Eats the float exponent. Returns true if at least one digit was met, | ||
/// and returns false otherwise. | ||
fn eat_float_exponent(&mut self) -> bool { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. All other This function always consumed something, and, if it returns an There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hm, do they? For example, |
||
debug_assert!(self.prev() == 'e' || self.prev() == 'E'); | ||
if self.nth_char(0) == '-' || self.nth_char(0) == '+' { | ||
if self.first() == '-' || self.first() == '+' { | ||
self.bump(); | ||
} | ||
if self.eat_decimal_digits() { Ok(()) } else { Err(()) } | ||
self.eat_decimal_digits() | ||
} | ||
|
||
// Eats the suffix if it's an identifier. | ||
// Eats the suffix of the literal, e.g. "_u8". | ||
fn eat_literal_suffix(&mut self) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. seems like this method can be removed now? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it's better to have it for readability. It's obvious why we are calling "eat_literal_suffix" after parsing the literal, but it's not that obvious when we'll call "eat_identifier" instead. |
||
if !is_id_start(self.nth_char(0)) { | ||
self.eat_identifier(); | ||
} | ||
|
||
// Eats the identifier. | ||
fn eat_identifier(&mut self) { | ||
if !is_id_start(self.first()) { | ||
return; | ||
} | ||
self.bump(); | ||
|
||
while is_id_continue(self.nth_char(0)) { | ||
self.eat_while(is_id_continue); | ||
petrochenkov marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
/// Eats symbols while predicate returns true or until the end of file is reached. | ||
/// Returns amount of eaten symbols. | ||
fn eat_while<F>(&mut self, mut predicate: F) -> usize | ||
where | ||
F: FnMut(char) -> bool | ||
{ | ||
let mut eaten: usize = 0; | ||
while predicate(self.first()) && !self.is_eof() { | ||
eaten += 1; | ||
self.bump(); | ||
} | ||
|
||
eaten | ||
} | ||
} |
Uh oh!
There was an error while loading. Please reload this page.