Skip to content

Commit 629e161

Browse files
author
Julian Wollersberger
committed
Simplified cursor.rs a bit and renamed first to peek and eat_while to bump_while.
1 parent 70a4bc8 commit 629e161

File tree

3 files changed

+46
-56
lines changed

3 files changed

+46
-56
lines changed

compiler/rustc_lexer/src/cursor.rs

Lines changed: 9 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ use std::str::Chars;
22

33
/// Peekable iterator over a char sequence.
44
///
5-
/// Next characters can be peeked via `nth_char` method,
5+
/// Next characters can be peeked via `peek` method,
66
/// and position can be shifted forward via `bump` method.
77
pub(crate) struct Cursor<'a> {
88
initial_len: usize,
@@ -37,22 +37,17 @@ impl<'a> Cursor<'a> {
3737
}
3838
}
3939

40-
/// Returns nth character relative to the current cursor position.
41-
/// If requested position doesn't exist, `EOF_CHAR` is returned.
40+
/// Peeks the next symbol from the input stream without consuming it.
41+
/// If it doesn't exist, `EOF_CHAR` is returned.
4242
/// However, getting `EOF_CHAR` doesn't always mean actual end of file,
4343
/// it should be checked with `is_eof` method.
44-
fn nth_char(&self, n: usize) -> char {
45-
self.chars().nth(n).unwrap_or(EOF_CHAR)
46-
}
47-
48-
/// Peeks the next symbol from the input stream without consuming it.
49-
pub(crate) fn first(&self) -> char {
50-
self.nth_char(0)
44+
pub(crate) fn peek(&self) -> char {
45+
self.chars.clone().nth(0).unwrap_or(EOF_CHAR)
5146
}
5247

5348
/// Peeks the second symbol from the input stream without consuming it.
54-
pub(crate) fn second(&self) -> char {
55-
self.nth_char(1)
49+
pub(crate) fn peek_second(&self) -> char {
50+
self.chars.clone().nth(1).unwrap_or(EOF_CHAR)
5651
}
5752

5853
/// Checks if there is nothing more to consume.
@@ -65,11 +60,6 @@ impl<'a> Cursor<'a> {
6560
self.initial_len - self.chars.as_str().len()
6661
}
6762

68-
/// Returns a `Chars` iterator over the remaining characters.
69-
fn chars(&self) -> Chars<'a> {
70-
self.chars.clone()
71-
}
72-
7363
/// Moves to the next character.
7464
pub(crate) fn bump(&mut self) -> Option<char> {
7565
let c = self.chars.next()?;
@@ -83,8 +73,8 @@ impl<'a> Cursor<'a> {
8373
}
8474

8575
/// Eats symbols while predicate returns true or until the end of file is reached.
86-
pub(crate) fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) {
87-
while predicate(self.first()) && !self.is_eof() {
76+
pub(crate) fn bump_while(&mut self, mut predicate: impl FnMut(char) -> bool) {
77+
while predicate(self.peek()) && !self.is_eof() {
8878
self.bump();
8979
}
9080
}

compiler/rustc_lexer/src/lib.rs

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,7 @@ fn advance_token(cursor: &mut Cursor) -> Token {
255255
let first_char = cursor.bump().unwrap();
256256
let token_kind = match first_char {
257257
// Slash, comment or block comment.
258-
'/' => match cursor.first() {
258+
'/' => match cursor.peek() {
259259
'/' => line_comment(cursor),
260260
'*' => block_comment(cursor),
261261
_ => Slash,
@@ -265,7 +265,7 @@ fn advance_token(cursor: &mut Cursor) -> Token {
265265
c if is_whitespace(c) => whitespace(cursor),
266266

267267
// Raw identifier, raw string literal or identifier.
268-
'r' => match (cursor.first(), cursor.second()) {
268+
'r' => match (cursor.peek(), cursor.peek_second()) {
269269
('#', c1) if is_id_start(c1) => raw_ident(cursor),
270270
('#', _) | ('"', _) => {
271271
let (n_hashes, err) = raw_double_quoted_string(cursor, 1);
@@ -280,7 +280,7 @@ fn advance_token(cursor: &mut Cursor) -> Token {
280280
},
281281

282282
// Byte literal, byte string literal, raw byte string literal or identifier.
283-
'b' => match (cursor.first(), cursor.second()) {
283+
'b' => match (cursor.peek(), cursor.peek_second()) {
284284
('\'', _) => {
285285
cursor.bump();
286286
let terminated = single_quoted_string(cursor);
@@ -373,42 +373,42 @@ fn advance_token(cursor: &mut Cursor) -> Token {
373373
}
374374

375375
fn line_comment(cursor: &mut Cursor) -> TokenKind {
376-
debug_assert!(cursor.prev() == '/' && cursor.first() == '/');
376+
debug_assert!(cursor.prev() == '/' && cursor.peek() == '/');
377377
cursor.bump();
378378

379-
let doc_style = match cursor.first() {
379+
let doc_style = match cursor.peek() {
380380
// `//!` is an inner line doc comment.
381381
'!' => Some(DocStyle::Inner),
382382
// `////` (more than 3 slashes) is not considered a doc comment.
383-
'/' if cursor.second() != '/' => Some(DocStyle::Outer),
383+
'/' if cursor.peek_second() != '/' => Some(DocStyle::Outer),
384384
_ => None,
385385
};
386386

387-
cursor.eat_while(|c| c != '\n');
387+
cursor.bump_while(|c| c != '\n');
388388
LineComment { doc_style }
389389
}
390390

391391
fn block_comment(cursor: &mut Cursor) -> TokenKind {
392-
debug_assert!(cursor.prev() == '/' && cursor.first() == '*');
392+
debug_assert!(cursor.prev() == '/' && cursor.peek() == '*');
393393
cursor.bump();
394394

395-
let doc_style = match cursor.first() {
395+
let doc_style = match cursor.peek() {
396396
// `/*!` is an inner block doc comment.
397397
'!' => Some(DocStyle::Inner),
398398
// `/***` (more than 2 stars) is not considered a doc comment.
399399
// `/**/` is not considered a doc comment.
400-
'*' if !matches!(cursor.second(), '*' | '/') => Some(DocStyle::Outer),
400+
'*' if !matches!(cursor.peek_second(), '*' | '/') => Some(DocStyle::Outer),
401401
_ => None,
402402
};
403403

404404
let mut depth = 1usize;
405405
while let Some(c) = cursor.bump() {
406406
match c {
407-
'/' if cursor.first() == '*' => {
407+
'/' if cursor.peek() == '*' => {
408408
cursor.bump();
409409
depth += 1;
410410
}
411-
'*' if cursor.first() == '/' => {
411+
'*' if cursor.peek() == '/' => {
412412
cursor.bump();
413413
depth -= 1;
414414
if depth == 0 {
@@ -427,12 +427,12 @@ fn block_comment(cursor: &mut Cursor) -> TokenKind {
427427

428428
fn whitespace(cursor: &mut Cursor) -> TokenKind {
429429
debug_assert!(is_whitespace(cursor.prev()));
430-
cursor.eat_while(is_whitespace);
430+
cursor.bump_while(is_whitespace);
431431
Whitespace
432432
}
433433

434434
fn raw_ident(cursor: &mut Cursor) -> TokenKind {
435-
debug_assert!(cursor.prev() == 'r' && cursor.first() == '#' && is_id_start(cursor.second()));
435+
debug_assert!(cursor.prev() == 'r' && cursor.peek() == '#' && is_id_start(cursor.peek_second()));
436436
// Eat "#" symbol.
437437
cursor.bump();
438438
// Eat the identifier part of RawIdent.
@@ -443,16 +443,16 @@ fn raw_ident(cursor: &mut Cursor) -> TokenKind {
443443
fn ident(cursor: &mut Cursor) -> TokenKind {
444444
debug_assert!(is_id_start(cursor.prev()));
445445
// Start is already eaten, eat the rest of identifier.
446-
cursor.eat_while(is_id_continue);
446+
cursor.bump_while(is_id_continue);
447447
Ident
448448
}
449449

450450
/// Eats one identifier.
451451
pub(crate) fn eat_identifier(cursor: &mut Cursor) {
452-
if !is_id_start(cursor.first()) {
452+
if !is_id_start(cursor.peek()) {
453453
return;
454454
}
455455
cursor.bump();
456456

457-
cursor.eat_while(is_id_continue);
457+
cursor.bump_while(is_id_continue);
458458
}

compiler/rustc_lexer/src/literals.rs

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ pub(crate) fn number(cursor: &mut Cursor, first_digit: char) -> LiteralKind {
5656
let mut base = Base::Decimal;
5757
if first_digit == '0' {
5858
// Attempt to parse encoding base.
59-
let has_digits = match cursor.first() {
59+
let has_digits = match cursor.peek() {
6060
'b' => {
6161
base = Base::Binary;
6262
cursor.bump();
@@ -90,18 +90,18 @@ pub(crate) fn number(cursor: &mut Cursor, first_digit: char) -> LiteralKind {
9090
eat_decimal_digits(cursor);
9191
};
9292

93-
match cursor.first() {
93+
match cursor.peek() {
9494
// Don't be greedy if this is actually an
9595
// integer literal followed by field/method access or a range pattern
9696
// (`0..2` and `12.foo()`)
97-
'.' if cursor.second() != '.' && !is_id_start(cursor.second()) => {
97+
'.' if cursor.peek_second() != '.' && !is_id_start(cursor.peek_second()) => {
9898
// might have stuff after the ., and if it does, it needs to start
9999
// with a number
100100
cursor.bump();
101101
let mut empty_exponent = false;
102-
if cursor.first().is_digit(10) {
102+
if cursor.peek().is_digit(10) {
103103
eat_decimal_digits(cursor);
104-
match cursor.first() {
104+
match cursor.peek() {
105105
'e' | 'E' => {
106106
cursor.bump();
107107
empty_exponent = !eat_float_exponent(cursor);
@@ -123,7 +123,7 @@ pub(crate) fn number(cursor: &mut Cursor, first_digit: char) -> LiteralKind {
123123
pub(crate) fn eat_decimal_digits(cursor: &mut Cursor) -> bool {
124124
let mut has_digits = false;
125125
loop {
126-
match cursor.first() {
126+
match cursor.peek() {
127127
'_' => {
128128
cursor.bump();
129129
}
@@ -140,7 +140,7 @@ pub(crate) fn eat_decimal_digits(cursor: &mut Cursor) -> bool {
140140
pub(crate) fn eat_hexadecimal_digits(cursor: &mut Cursor) -> bool {
141141
let mut has_digits = false;
142142
loop {
143-
match cursor.first() {
143+
match cursor.peek() {
144144
'_' => {
145145
cursor.bump();
146146
}
@@ -158,7 +158,7 @@ pub(crate) fn eat_hexadecimal_digits(cursor: &mut Cursor) -> bool {
158158
/// and returns false otherwise.
159159
fn eat_float_exponent(cursor: &mut Cursor) -> bool {
160160
debug_assert!(cursor.prev() == 'e' || cursor.prev() == 'E');
161-
if cursor.first() == '-' || cursor.first() == '+' {
161+
if cursor.peek() == '-' || cursor.peek() == '+' {
162162
cursor.bump();
163163
}
164164
eat_decimal_digits(cursor)
@@ -167,14 +167,14 @@ fn eat_float_exponent(cursor: &mut Cursor) -> bool {
167167
pub(crate) fn lifetime_or_char(cursor: &mut Cursor) -> TokenKind {
168168
debug_assert!(cursor.prev() == '\'');
169169

170-
let can_be_a_lifetime = if cursor.second() == '\'' {
170+
let can_be_a_lifetime = if cursor.peek_second() == '\'' {
171171
// It's surely not a lifetime.
172172
false
173173
} else {
174174
// If the first symbol is valid for identifier, it can be a lifetime.
175175
// Also check if it's a number for a better error reporting (so '0 will
176176
// be reported as invalid lifetime and not as unterminated char literal).
177-
is_id_start(cursor.first()) || cursor.first().is_digit(10)
177+
is_id_start(cursor.peek()) || cursor.peek().is_digit(10)
178178
};
179179

180180
if !can_be_a_lifetime {
@@ -190,18 +190,18 @@ pub(crate) fn lifetime_or_char(cursor: &mut Cursor) -> TokenKind {
190190
// Either a lifetime or a character literal with
191191
// length greater than 1.
192192

193-
let starts_with_number = cursor.first().is_digit(10);
193+
let starts_with_number = cursor.peek().is_digit(10);
194194

195195
// Skip the literal contents.
196196
// First symbol can be a number (which isn't a valid identifier start),
197197
// so skip it without any checks.
198198
cursor.bump();
199-
cursor.eat_while(is_id_continue);
199+
cursor.bump_while(is_id_continue);
200200

201201
// Check if after skipping literal contents we've met a closing
202202
// single quote (which means that user attempted to create a
203203
// string with single quotes).
204-
if cursor.first() == '\'' {
204+
if cursor.peek() == '\'' {
205205
cursor.bump();
206206
let kind = LiteralKind::Char { terminated: true };
207207
TokenKind::Literal { kind, suffix_start: cursor.len_consumed() }
@@ -213,7 +213,7 @@ pub(crate) fn lifetime_or_char(cursor: &mut Cursor) -> TokenKind {
213213
pub(crate) fn single_quoted_string(cursor: &mut Cursor) -> bool {
214214
debug_assert!(cursor.prev() == '\'');
215215
// Check if it's a one-symbol literal.
216-
if cursor.second() == '\'' && cursor.first() != '\\' {
216+
if cursor.peek_second() == '\'' && cursor.peek() != '\\' {
217217
cursor.bump();
218218
cursor.bump();
219219
return true;
@@ -223,7 +223,7 @@ pub(crate) fn single_quoted_string(cursor: &mut Cursor) -> bool {
223223

224224
// Parse until either quotes are terminated or error is detected.
225225
loop {
226-
match cursor.first() {
226+
match cursor.peek() {
227227
// Quotes are terminated, finish parsing.
228228
'\'' => {
229229
cursor.bump();
@@ -233,7 +233,7 @@ pub(crate) fn single_quoted_string(cursor: &mut Cursor) -> bool {
233233
// to the error report.
234234
'/' => break,
235235
// Newline without following '\'' means unclosed quote, stop parsing.
236-
'\n' if cursor.second() != '\'' => break,
236+
'\n' if cursor.peek_second() != '\'' => break,
237237
// End of file, stop parsing.
238238
EOF_CHAR if cursor.is_eof() => break,
239239
// Escaped slash is considered one character, so bump twice.
@@ -260,7 +260,7 @@ pub(crate) fn double_quoted_string(cursor: &mut Cursor) -> bool {
260260
'"' => {
261261
return true;
262262
}
263-
'\\' if cursor.first() == '\\' || cursor.first() == '"' => {
263+
'\\' if cursor.peek() == '\\' || cursor.peek() == '"' => {
264264
// Bump again to skip escaped character.
265265
cursor.bump();
266266
}
@@ -295,7 +295,7 @@ fn raw_string_unvalidated(cursor: &mut Cursor, prefix_len: usize) -> (usize, Opt
295295

296296
// Count opening '#' symbols.
297297
let mut eaten = 0;
298-
while cursor.first() == '#' {
298+
while cursor.peek() == '#' {
299299
eaten += 1;
300300
cursor.bump();
301301
}
@@ -313,7 +313,7 @@ fn raw_string_unvalidated(cursor: &mut Cursor, prefix_len: usize) -> (usize, Opt
313313
// Skip the string contents and on each '#' character met, check if this is
314314
// a raw string termination.
315315
loop {
316-
cursor.eat_while(|c| c != '"');
316+
cursor.bump_while(|c| c != '"');
317317

318318
if cursor.is_eof() {
319319
return (
@@ -335,7 +335,7 @@ fn raw_string_unvalidated(cursor: &mut Cursor, prefix_len: usize) -> (usize, Opt
335335
// `r###"abcde"####` is lexed as a `RawStr { n_hashes: 3 }`
336336
// followed by a `#` token.
337337
let mut n_end_hashes = 0;
338-
while cursor.first() == '#' && n_end_hashes < n_start_hashes {
338+
while cursor.peek() == '#' && n_end_hashes < n_start_hashes {
339339
n_end_hashes += 1;
340340
cursor.bump();
341341
}

0 commit comments

Comments
 (0)