Add rustc_lexer::TokenKind::Eof.

nnethercote · nnethercote · commit da84f0f4c319 · 2022-09-26T13:48:08.000+10:00
For alignment with `rust_ast::TokenKind::Eof`. Plus it's a bit faster,
due to less `Option` manipulation in `StringReader::next_token`.
diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs
@@ -139,6 +139,9 @@ pub enum TokenKind {
 
     /// Unknown token, not expected by the lexer, e.g. "№"
     Unknown,
+
+    /// End of input.
+    Eof,
 }
 
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
@@ -235,7 +238,10 @@ pub fn validate_raw_str(input: &str, prefix_len: u32) -> Result<(), RawStrError>
 /// Creates an iterator that produces tokens from the input string.
 pub fn tokenize(input: &str) -> impl Iterator<Item = Token> + '_ {
     let mut cursor = Cursor::new(input);
-    std::iter::from_fn(move || cursor.advance_token())
+    std::iter::from_fn(move || {
+        let token = cursor.advance_token();
+        if token.kind != TokenKind::Eof { Some(token) } else { None }
+    })
 }
 
 /// True if `c` is considered a whitespace according to Rust language definition.
@@ -297,8 +303,11 @@ pub fn is_ident(string: &str) -> bool {
 
 impl Cursor<'_> {
     /// Parses a token from the input string.
-    pub fn advance_token(&mut self) -> Option<Token> {
-        let first_char = self.bump()?;
+    pub fn advance_token(&mut self) -> Token {
+        let first_char = match self.bump() {
+            Some(c) => c,
+            None => return Token::new(TokenKind::Eof, 0),
+        };
         let token_kind = match first_char {
             // Slash, comment or block comment.
             '/' => match self.first() {
@@ -419,7 +428,7 @@ impl Cursor<'_> {
             }
             _ => Unknown,
         };
-        let res = Some(Token::new(token_kind, self.pos_within_token()));
+        let res = Token::new(token_kind, self.pos_within_token());
         self.reset_pos_within_token();
         res
     }
diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs
@@ -80,14 +80,7 @@ impl<'a> StringReader<'a> {
 
         // Skip trivial (whitespace & comments) tokens
         loop {
-            let token = match self.cursor.advance_token() {
-                Some(token) => token,
-                None => {
-                    let span = self.mk_sp(self.pos, self.pos);
-                    return (Token::new(token::Eof, span), preceded_by_whitespace);
-                }
-            };
-
+            let token = self.cursor.advance_token();
             let start = self.pos;
             self.pos = self.pos + BytePos(token.len);
 
@@ -327,6 +320,7 @@ impl<'a> StringReader<'a> {
                 err.emit();
                 token?
             }
+            rustc_lexer::TokenKind::Eof => token::Eof,
         })
     }
 
diff --git a/src/librustdoc/html/highlight.rs b/src/librustdoc/html/highlight.rs
@@ -415,7 +415,10 @@ struct TokenIter<'a> {
 impl<'a> Iterator for TokenIter<'a> {
     type Item = (TokenKind, &'a str);
     fn next(&mut self) -> Option<(TokenKind, &'a str)> {
-        let token = self.cursor.advance_token()?;
+        let token = self.cursor.advance_token();
+        if token.kind == TokenKind::Eof {
+            return None;
+        }
         let (text, rest) = self.src.split_at(token.len as usize);
         self.src = rest;
         Some((token.kind, text))
@@ -849,6 +852,7 @@ impl<'a> Classifier<'a> {
                 Class::Ident(self.new_span(before, text))
             }
             TokenKind::Lifetime { .. } => Class::Lifetime,
+            TokenKind::Eof => panic!("Eof in advance"),
         };
         // Anything that didn't return above is the simple case where we the
         // class just spans a single token, so we can use the `string` method.