Skip to content
This repository was archived by the owner on Jul 27, 2023. It is now read-only.

Commit e363fb8

Browse files
authored
Lex Jupyter Magic in assignment value position (#30)
Emit `MagicCommand` token when it is the assignment value[^1] i.e., on the right side of an assignment statement. Examples: ```python pwd = !pwd foo = %timeit a = b bar = %timeit a % 3 baz = %matplotlib \ inline" ``` [^1]: Only `%` and `!` are valid in that position, other magic kinds are not valid
1 parent 4888d80 commit e363fb8

File tree

1 file changed

+109
-18
lines changed

1 file changed

+109
-18
lines changed

parser/src/lexer.rs

Lines changed: 109 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,8 @@ pub struct Lexer<T: Iterator<Item = char>> {
175175
pending: Vec<Spanned>,
176176
// The current location.
177177
location: TextSize,
178+
// Is the last token an equal sign?
179+
last_token_is_equal: bool,
178180
// Lexer mode.
179181
mode: Mode,
180182
}
@@ -233,6 +235,7 @@ where
233235
pending: Vec::with_capacity(5),
234236
location: start,
235237
window: CharWindow::new(input),
238+
last_token_is_equal: false,
236239
mode,
237240
};
238241
// Fill the window.
@@ -945,15 +948,19 @@ where
945948
}
946949
}
947950
'%' => {
948-
let tok_start = self.get_pos();
949-
self.next_char();
950-
if let Some('=') = self.window[0] {
951-
self.next_char();
952-
let tok_end = self.get_pos();
953-
self.emit((Tok::PercentEqual, TextRange::new(tok_start, tok_end)));
951+
if self.mode == Mode::Jupyter && self.nesting == 0 && self.last_token_is_equal {
952+
self.lex_and_emit_magic_command();
954953
} else {
955-
let tok_end = self.get_pos();
956-
self.emit((Tok::Percent, TextRange::new(tok_start, tok_end)));
954+
let tok_start = self.get_pos();
955+
self.next_char();
956+
if let Some('=') = self.window[0] {
957+
self.next_char();
958+
let tok_end = self.get_pos();
959+
self.emit((Tok::PercentEqual, TextRange::new(tok_start, tok_end)));
960+
} else {
961+
let tok_end = self.get_pos();
962+
self.emit((Tok::Percent, TextRange::new(tok_start, tok_end)));
963+
}
957964
}
958965
}
959966
'|' => {
@@ -1025,17 +1032,21 @@ where
10251032
}
10261033
}
10271034
'!' => {
1028-
let tok_start = self.get_pos();
1029-
self.next_char();
1030-
if let Some('=') = self.window[0] {
1031-
self.next_char();
1032-
let tok_end = self.get_pos();
1033-
self.emit((Tok::NotEqual, TextRange::new(tok_start, tok_end)));
1035+
if self.mode == Mode::Jupyter && self.nesting == 0 && self.last_token_is_equal {
1036+
self.lex_and_emit_magic_command();
10341037
} else {
1035-
return Err(LexicalError {
1036-
error: LexicalErrorType::UnrecognizedToken { tok: '!' },
1037-
location: tok_start,
1038-
});
1038+
let tok_start = self.get_pos();
1039+
self.next_char();
1040+
if let Some('=') = self.window[0] {
1041+
self.next_char();
1042+
let tok_end = self.get_pos();
1043+
self.emit((Tok::NotEqual, TextRange::new(tok_start, tok_end)));
1044+
} else {
1045+
return Err(LexicalError {
1046+
error: LexicalErrorType::UnrecognizedToken { tok: '!' },
1047+
location: tok_start,
1048+
});
1049+
}
10391050
}
10401051
}
10411052
'~' => {
@@ -1292,6 +1303,7 @@ where
12921303

12931304
// Helper function to emit a lexed token to the queue of tokens.
12941305
fn emit(&mut self, spanned: Spanned) {
1306+
self.last_token_is_equal = matches!(spanned.0, Tok::Equal);
12951307
self.pending.push(spanned);
12961308
}
12971309
}
@@ -1669,6 +1681,85 @@ mod tests {
16691681
)
16701682
}
16711683

1684+
#[test]
1685+
fn test_jupyter_magic_assignment() {
1686+
let source = r"
1687+
pwd = !pwd
1688+
foo = %timeit a = b
1689+
bar = %timeit a % 3
1690+
baz = %matplotlib \
1691+
inline"
1692+
.trim();
1693+
let tokens = lex_jupyter_source(source);
1694+
assert_eq!(
1695+
tokens,
1696+
vec![
1697+
Tok::Name {
1698+
name: "pwd".to_string()
1699+
},
1700+
Tok::Equal,
1701+
Tok::MagicCommand {
1702+
value: "pwd".to_string(),
1703+
kind: MagicKind::Shell,
1704+
},
1705+
Tok::Newline,
1706+
Tok::Name {
1707+
name: "foo".to_string()
1708+
},
1709+
Tok::Equal,
1710+
Tok::MagicCommand {
1711+
value: "timeit a = b".to_string(),
1712+
kind: MagicKind::Magic,
1713+
},
1714+
Tok::Newline,
1715+
Tok::Name {
1716+
name: "bar".to_string()
1717+
},
1718+
Tok::Equal,
1719+
Tok::MagicCommand {
1720+
value: "timeit a % 3".to_string(),
1721+
kind: MagicKind::Magic,
1722+
},
1723+
Tok::Newline,
1724+
Tok::Name {
1725+
name: "baz".to_string()
1726+
},
1727+
Tok::Equal,
1728+
Tok::MagicCommand {
1729+
value: "matplotlib inline".to_string(),
1730+
kind: MagicKind::Magic,
1731+
},
1732+
Tok::Newline,
1733+
]
1734+
)
1735+
}
1736+
1737+
fn assert_no_jupyter_magic(tokens: &[Tok]) {
1738+
for tok in tokens {
1739+
match tok {
1740+
Tok::MagicCommand { .. } => panic!("Unexpected magic command token: {:?}", tok),
1741+
_ => {}
1742+
}
1743+
}
1744+
}
1745+
1746+
#[test]
1747+
fn test_jupyter_magic_not_an_assignment() {
1748+
let source = r"
1749+
# Other magic kinds are not valid here (can't test `foo = ?str` because '?' is not a valid token)
1750+
foo = /func
1751+
foo = ;func
1752+
foo = ,func
1753+
1754+
(foo == %timeit a = b)
1755+
(foo := %timeit a = b)
1756+
def f(arg=%timeit a = b):
1757+
pass"
1758+
.trim();
1759+
let tokens = lex_jupyter_source(source);
1760+
assert_no_jupyter_magic(&tokens);
1761+
}
1762+
16721763
#[test]
16731764
fn test_numbers() {
16741765
let source = "0x2f 0o12 0b1101 0 123 123_45_67_890 0.2 1e+2 2.1e3 2j 2.2j";

0 commit comments

Comments
 (0)