Skip to content

Commit aa6bfaf

Browse files
committed
Make rustc_lexer::cursor::Cursor public.
`Cursor` is currently hidden, and the main tokenization path uses `rustc_lexer::first_token` which involves constructing a new `Cursor` for every single token, which is weird. Also, `first_token` also can't handle empty input, so callers have to check for that first. This commit makes `Cursor` public, so `StringReader` can contain a `Cursor`, which results in a simpler structure. The commit also changes `StringReader::advance_token` so it returns an `Option<Token>`, simplifying the the empty input case.
1 parent 33516ac commit aa6bfaf

File tree

4 files changed

+26
-36
lines changed

4 files changed

+26
-36
lines changed

compiler/rustc_lexer/src/cursor.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use std::str::Chars;
44
///
55
/// Next characters can be peeked via `first` method,
66
/// and position can be shifted forward via `bump` method.
7-
pub(crate) struct Cursor<'a> {
7+
pub struct Cursor<'a> {
88
initial_len: usize,
99
/// Iterator over chars. Slightly faster than a &str.
1010
chars: Chars<'a>,
@@ -15,7 +15,7 @@ pub(crate) struct Cursor<'a> {
1515
pub(crate) const EOF_CHAR: char = '\0';
1616

1717
impl<'a> Cursor<'a> {
18-
pub(crate) fn new(input: &'a str) -> Cursor<'a> {
18+
pub fn new(input: &'a str) -> Cursor<'a> {
1919
Cursor {
2020
initial_len: input.len(),
2121
chars: input.chars(),

compiler/rustc_lexer/src/lib.rs

Lines changed: 7 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
// We want to be able to build this crate with a stable compiler, so no
2424
// `#![feature]` attributes should be added.
2525

26-
mod cursor;
26+
pub mod cursor;
2727
pub mod unescape;
2828

2929
#[cfg(test)]
@@ -219,13 +219,6 @@ pub fn strip_shebang(input: &str) -> Option<usize> {
219219
None
220220
}
221221

222-
/// Parses the first token from the provided input string.
223-
#[inline]
224-
pub fn first_token(input: &str) -> Token {
225-
debug_assert!(!input.is_empty());
226-
Cursor::new(input).advance_token()
227-
}
228-
229222
/// Validates a raw string literal. Used for getting more information about a
230223
/// problem with a `RawStr`/`RawByteStr` with a `None` field.
231224
#[inline]
@@ -242,14 +235,7 @@ pub fn validate_raw_str(input: &str, prefix_len: u32) -> Result<(), RawStrError>
242235
/// Creates an iterator that produces tokens from the input string.
243236
pub fn tokenize(input: &str) -> impl Iterator<Item = Token> + '_ {
244237
let mut cursor = Cursor::new(input);
245-
std::iter::from_fn(move || {
246-
if cursor.is_eof() {
247-
None
248-
} else {
249-
cursor.reset_len_consumed();
250-
Some(cursor.advance_token())
251-
}
252-
})
238+
std::iter::from_fn(move || cursor.advance_token())
253239
}
254240

255241
/// True if `c` is considered a whitespace according to Rust language definition.
@@ -311,8 +297,8 @@ pub fn is_ident(string: &str) -> bool {
311297

312298
impl Cursor<'_> {
313299
/// Parses a token from the input string.
314-
fn advance_token(&mut self) -> Token {
315-
let first_char = self.bump().unwrap();
300+
pub fn advance_token(&mut self) -> Option<Token> {
301+
let first_char = self.bump()?;
316302
let token_kind = match first_char {
317303
// Slash, comment or block comment.
318304
'/' => match self.first() {
@@ -433,7 +419,9 @@ impl Cursor<'_> {
433419
}
434420
_ => Unknown,
435421
};
436-
Token::new(token_kind, self.len_consumed())
422+
let res = Some(Token::new(token_kind, self.len_consumed()));
423+
self.reset_len_consumed();
424+
res
437425
}
438426

439427
fn line_comment(&mut self) -> TokenKind {

compiler/rustc_parse/src/lexer/mod.rs

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ use rustc_ast::token::{self, CommentKind, Delimiter, Token, TokenKind};
44
use rustc_ast::tokenstream::TokenStream;
55
use rustc_ast::util::unicode::contains_text_flow_control_chars;
66
use rustc_errors::{error_code, Applicability, DiagnosticBuilder, ErrorGuaranteed, PResult};
7+
use rustc_lexer::cursor::Cursor;
78
use rustc_lexer::unescape::{self, Mode};
89
use rustc_lexer::{Base, DocStyle, RawStrError};
910
use rustc_session::lint::builtin::{
@@ -48,7 +49,9 @@ pub(crate) fn parse_token_trees<'a>(
4849
start_pos = start_pos + BytePos::from_usize(shebang_len);
4950
}
5051

51-
let string_reader = StringReader { sess, start_pos, pos: start_pos, src, override_span };
52+
let cursor = Cursor::new(src);
53+
let string_reader =
54+
StringReader { sess, start_pos, pos: start_pos, src, cursor, override_span };
5255
tokentrees::TokenTreesReader::parse_token_trees(string_reader)
5356
}
5457

@@ -60,6 +63,8 @@ struct StringReader<'a> {
6063
pos: BytePos,
6164
/// Source text to tokenize.
6265
src: &'a str,
66+
/// Cursor for getting lexer tokens.
67+
cursor: Cursor<'a>,
6368
override_span: Option<Span>,
6469
}
6570

@@ -75,15 +80,13 @@ impl<'a> StringReader<'a> {
7580

7681
// Skip trivial (whitespace & comments) tokens
7782
loop {
78-
let start_src_index = self.src_index(self.pos);
79-
let text: &str = &self.src[start_src_index..];
80-
81-
if text.is_empty() {
82-
let span = self.mk_sp(self.pos, self.pos);
83-
return (Token::new(token::Eof, span), preceded_by_whitespace);
84-
}
85-
86-
let token = rustc_lexer::first_token(text);
83+
let token = match self.cursor.advance_token() {
84+
Some(token) => token,
85+
None => {
86+
let span = self.mk_sp(self.pos, self.pos);
87+
return (Token::new(token::Eof, span), preceded_by_whitespace);
88+
}
89+
};
8790

8891
let start = self.pos;
8992
self.pos = self.pos + BytePos(token.len);

src/librustdoc/html/highlight.rs

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ use std::collections::VecDeque;
1313
use std::fmt::{Display, Write};
1414

1515
use rustc_data_structures::fx::FxHashMap;
16+
use rustc_lexer::cursor::Cursor;
1617
use rustc_lexer::{LiteralKind, TokenKind};
1718
use rustc_span::edition::Edition;
1819
use rustc_span::symbol::Symbol;
@@ -408,15 +409,13 @@ enum Highlight<'a> {
408409

409410
struct TokenIter<'a> {
410411
src: &'a str,
412+
cursor: Cursor<'a>,
411413
}
412414

413415
impl<'a> Iterator for TokenIter<'a> {
414416
type Item = (TokenKind, &'a str);
415417
fn next(&mut self) -> Option<(TokenKind, &'a str)> {
416-
if self.src.is_empty() {
417-
return None;
418-
}
419-
let token = rustc_lexer::first_token(self.src);
418+
let token = self.cursor.advance_token()?;
420419
let (text, rest) = self.src.split_at(token.len as usize);
421420
self.src = rest;
422421
Some((token.kind, text))
@@ -525,7 +524,7 @@ impl<'a> Classifier<'a> {
525524
/// Takes as argument the source code to HTML-ify, the rust edition to use and the source code
526525
/// file span which will be used later on by the `span_correspondance_map`.
527526
fn new(src: &str, file_span: Span, decoration_info: Option<DecorationInfo>) -> Classifier<'_> {
528-
let tokens = PeekIter::new(TokenIter { src });
527+
let tokens = PeekIter::new(TokenIter { src, cursor: Cursor::new(src) });
529528
let decorations = decoration_info.map(Decorations::new);
530529
Classifier {
531530
tokens,

0 commit comments

Comments
 (0)