Skip to content

Commit da84f0f

Browse files
committed
Add rustc_lexer::TokenKind::Eof.
For alignment with `rust_ast::TokenKind::Eof`. Plus it's a bit faster, due to less `Option` manipulation in `StringReader::next_token`.
1 parent cc0022a commit da84f0f

File tree

3 files changed

+20
-13
lines changed

3 files changed

+20
-13
lines changed

compiler/rustc_lexer/src/lib.rs

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,9 @@ pub enum TokenKind {
139139

140140
/// Unknown token, not expected by the lexer, e.g. "№"
141141
Unknown,
142+
143+
/// End of input.
144+
Eof,
142145
}
143146

144147
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
@@ -235,7 +238,10 @@ pub fn validate_raw_str(input: &str, prefix_len: u32) -> Result<(), RawStrError>
235238
/// Creates an iterator that produces tokens from the input string.
236239
pub fn tokenize(input: &str) -> impl Iterator<Item = Token> + '_ {
237240
let mut cursor = Cursor::new(input);
238-
std::iter::from_fn(move || cursor.advance_token())
241+
std::iter::from_fn(move || {
242+
let token = cursor.advance_token();
243+
if token.kind != TokenKind::Eof { Some(token) } else { None }
244+
})
239245
}
240246

241247
/// True if `c` is considered a whitespace according to Rust language definition.
@@ -297,8 +303,11 @@ pub fn is_ident(string: &str) -> bool {
297303

298304
impl Cursor<'_> {
299305
/// Parses a token from the input string.
300-
pub fn advance_token(&mut self) -> Option<Token> {
301-
let first_char = self.bump()?;
306+
pub fn advance_token(&mut self) -> Token {
307+
let first_char = match self.bump() {
308+
Some(c) => c,
309+
None => return Token::new(TokenKind::Eof, 0),
310+
};
302311
let token_kind = match first_char {
303312
// Slash, comment or block comment.
304313
'/' => match self.first() {
@@ -419,7 +428,7 @@ impl Cursor<'_> {
419428
}
420429
_ => Unknown,
421430
};
422-
let res = Some(Token::new(token_kind, self.pos_within_token()));
431+
let res = Token::new(token_kind, self.pos_within_token());
423432
self.reset_pos_within_token();
424433
res
425434
}

compiler/rustc_parse/src/lexer/mod.rs

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -80,14 +80,7 @@ impl<'a> StringReader<'a> {
8080

8181
// Skip trivial (whitespace & comments) tokens
8282
loop {
83-
let token = match self.cursor.advance_token() {
84-
Some(token) => token,
85-
None => {
86-
let span = self.mk_sp(self.pos, self.pos);
87-
return (Token::new(token::Eof, span), preceded_by_whitespace);
88-
}
89-
};
90-
83+
let token = self.cursor.advance_token();
9184
let start = self.pos;
9285
self.pos = self.pos + BytePos(token.len);
9386

@@ -327,6 +320,7 @@ impl<'a> StringReader<'a> {
327320
err.emit();
328321
token?
329322
}
323+
rustc_lexer::TokenKind::Eof => token::Eof,
330324
})
331325
}
332326

src/librustdoc/html/highlight.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -415,7 +415,10 @@ struct TokenIter<'a> {
415415
impl<'a> Iterator for TokenIter<'a> {
416416
type Item = (TokenKind, &'a str);
417417
fn next(&mut self) -> Option<(TokenKind, &'a str)> {
418-
let token = self.cursor.advance_token()?;
418+
let token = self.cursor.advance_token();
419+
if token.kind == TokenKind::Eof {
420+
return None;
421+
}
419422
let (text, rest) = self.src.split_at(token.len as usize);
420423
self.src = rest;
421424
Some((token.kind, text))
@@ -849,6 +852,7 @@ impl<'a> Classifier<'a> {
849852
Class::Ident(self.new_span(before, text))
850853
}
851854
TokenKind::Lifetime { .. } => Class::Lifetime,
855+
TokenKind::Eof => panic!("Eof in advance"),
852856
};
853857
// Anything that didn't return above is the simple case where we the
854858
// class just spans a single token, so we can use the `string` method.

0 commit comments

Comments
 (0)