From 757be6e5740ee75ce0daeb4be58f799ae56c0cdc Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Thu, 20 Jul 2023 08:06:17 +0530 Subject: [PATCH 1/5] Lex Jupyter Magic in assignment value position --- parser/src/lexer.rs | 54 ++++++++++++++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 18 deletions(-) diff --git a/parser/src/lexer.rs b/parser/src/lexer.rs index eced33ce..ad8d8262 100644 --- a/parser/src/lexer.rs +++ b/parser/src/lexer.rs @@ -175,6 +175,8 @@ pub struct Lexer> { pending: Vec, // The current location. location: TextSize, + // The last emitted token. + last_emitted: Option, // Lexer mode. mode: Mode, } @@ -233,6 +235,7 @@ where pending: Vec::with_capacity(5), location: start, window: CharWindow::new(input), + last_emitted: None, mode, }; // Fill the window. @@ -945,15 +948,22 @@ where } } '%' => { - let tok_start = self.get_pos(); - self.next_char(); - if let Some('=') = self.window[0] { - self.next_char(); - let tok_end = self.get_pos(); - self.emit((Tok::PercentEqual, TextRange::new(tok_start, tok_end))); + if self.mode == Mode::Jupyter + && self.nesting == 0 + && matches!(self.last_emitted, Some(Tok::Equal)) + { + self.lex_and_emit_magic_command(); } else { - let tok_end = self.get_pos(); - self.emit((Tok::Percent, TextRange::new(tok_start, tok_end))); + let tok_start = self.get_pos(); + self.next_char(); + if let Some('=') = self.window[0] { + self.next_char(); + let tok_end = self.get_pos(); + self.emit((Tok::PercentEqual, TextRange::new(tok_start, tok_end))); + } else { + let tok_end = self.get_pos(); + self.emit((Tok::Percent, TextRange::new(tok_start, tok_end))); + } } } '|' => { @@ -1025,17 +1035,24 @@ where } } '!' => { - let tok_start = self.get_pos(); - self.next_char(); - if let Some('=') = self.window[0] { - self.next_char(); - let tok_end = self.get_pos(); - self.emit((Tok::NotEqual, TextRange::new(tok_start, tok_end))); + if self.mode == Mode::Jupyter + && self.nesting == 0 + && matches!(self.last_emitted, Some(Tok::Equal)) + { + self.lex_and_emit_magic_command(); } else { - return Err(LexicalError { - error: LexicalErrorType::UnrecognizedToken { tok: '!' }, - location: tok_start, - }); + let tok_start = self.get_pos(); + self.next_char(); + if let Some('=') = self.window[0] { + self.next_char(); + let tok_end = self.get_pos(); + self.emit((Tok::NotEqual, TextRange::new(tok_start, tok_end))); + } else { + return Err(LexicalError { + error: LexicalErrorType::UnrecognizedToken { tok: '!' }, + location: tok_start, + }); + } } } '~' => { @@ -1292,6 +1309,7 @@ where // Helper function to emit a lexed token to the queue of tokens. fn emit(&mut self, spanned: Spanned) { + self.last_emitted = Some(spanned.0.clone()); self.pending.push(spanned); } } From 1f98fe2286a13e2ffe3a2e41a0754af8b28e7e0a Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Thu, 20 Jul 2023 10:49:17 +0530 Subject: [PATCH 2/5] Emit `Newline` and handle indentation for Magic commands --- parser/src/lexer.rs | 119 ++++++++++++++++++++++++-------------------- 1 file changed, 65 insertions(+), 54 deletions(-) diff --git a/parser/src/lexer.rs b/parser/src/lexer.rs index ad8d8262..36532897 100644 --- a/parser/src/lexer.rs +++ b/parser/src/lexer.rs @@ -655,6 +655,12 @@ where // Detect indentation levels if self.at_begin_of_line { self.handle_indentations()?; + if self.mode == Mode::Jupyter + // https://github.com/ipython/ipython/blob/635815e8f1ded5b764d66cacc80bbe25e9e2587f/IPython/core/inputtransformer2.py#L345 + && matches!(self.window[0], Some('%' | '!' | '?' | '/' | ';' | ',')) + { + self.lex_and_emit_magic_command(); + } } self.consume_normal()?; @@ -703,10 +709,6 @@ where spaces = 0; tabs = 0; } - // https://github.com/ipython/ipython/blob/635815e8f1ded5b764d66cacc80bbe25e9e2587f/IPython/core/inputtransformer2.py#L345 - Some('%' | '!' | '?' | '/' | ';' | ',') if self.mode == Mode::Jupyter => { - self.lex_and_emit_magic_command(); - } Some('\x0C') => { // Form feed character! // Reset indentation for the Emacs user. @@ -1485,10 +1487,13 @@ mod tests { let tokens = lex_jupyter_source(&source); assert_eq!( tokens, - vec![Tok::MagicCommand { - value: "matplotlib --inline".to_string(), - kind: MagicKind::Magic - },] + vec![ + Tok::MagicCommand { + value: "matplotlib --inline".to_string(), + kind: MagicKind::Magic + }, + Tok::Newline + ] ) } @@ -1512,10 +1517,13 @@ mod tests { let tokens = lex_jupyter_source(&source); assert_eq!( tokens, - vec![Tok::MagicCommand { - value: "matplotlib ".to_string(), - kind: MagicKind::Magic - },] + vec![ + Tok::MagicCommand { + value: "matplotlib ".to_string(), + kind: MagicKind::Magic + }, + Tok::Newline + ] ) } @@ -1545,54 +1553,47 @@ mod tests { value: "".to_string(), kind: MagicKind::Magic, }, - #[cfg(feature = "full-lexer")] - Tok::NonLogicalNewline, + Tok::Newline, Tok::MagicCommand { value: "".to_string(), kind: MagicKind::Magic2, }, - #[cfg(feature = "full-lexer")] - Tok::NonLogicalNewline, + Tok::Newline, Tok::MagicCommand { value: "".to_string(), kind: MagicKind::Shell, }, - #[cfg(feature = "full-lexer")] - Tok::NonLogicalNewline, + Tok::Newline, Tok::MagicCommand { value: "".to_string(), kind: MagicKind::ShCap, }, - #[cfg(feature = "full-lexer")] - Tok::NonLogicalNewline, + Tok::Newline, Tok::MagicCommand { value: "".to_string(), kind: MagicKind::Help, }, - #[cfg(feature = "full-lexer")] - Tok::NonLogicalNewline, + Tok::Newline, Tok::MagicCommand { value: "".to_string(), kind: MagicKind::Help2, }, - #[cfg(feature = "full-lexer")] - Tok::NonLogicalNewline, + Tok::Newline, Tok::MagicCommand { value: "".to_string(), kind: MagicKind::Paren, }, - #[cfg(feature = "full-lexer")] - Tok::NonLogicalNewline, + Tok::Newline, Tok::MagicCommand { value: "".to_string(), kind: MagicKind::Quote, }, - #[cfg(feature = "full-lexer")] - Tok::NonLogicalNewline, + Tok::Newline, Tok::MagicCommand { value: "".to_string(), kind: MagicKind::Quote2, }, + Tok::Newline, ] ) } @@ -1611,10 +1612,8 @@ mod tests { !!cd /Users/foo/Library/Application\ Support/ /foo 1 2 ,foo 1 2 -;foo 1 2 - !ls -" - .trim(); +;foo 1 2" + .trim(); let tokens = lex_jupyter_source(source); assert_eq!( tokens, @@ -1623,66 +1622,78 @@ mod tests { value: "foo".to_string(), kind: MagicKind::Help, }, - #[cfg(feature = "full-lexer")] - Tok::NonLogicalNewline, + Tok::Newline, Tok::MagicCommand { value: "foo".to_string(), kind: MagicKind::Help2, }, - #[cfg(feature = "full-lexer")] - Tok::NonLogicalNewline, + Tok::Newline, Tok::MagicCommand { value: "timeit a = b".to_string(), kind: MagicKind::Magic, }, - #[cfg(feature = "full-lexer")] - Tok::NonLogicalNewline, + Tok::Newline, Tok::MagicCommand { value: "timeit a % 3".to_string(), kind: MagicKind::Magic, }, - #[cfg(feature = "full-lexer")] - Tok::NonLogicalNewline, + Tok::Newline, Tok::MagicCommand { value: "matplotlib --inline".to_string(), kind: MagicKind::Magic, }, - #[cfg(feature = "full-lexer")] - Tok::NonLogicalNewline, + Tok::Newline, Tok::MagicCommand { value: "pwd && ls -a | sed 's/^/\\\\ /'".to_string(), kind: MagicKind::Shell, }, - #[cfg(feature = "full-lexer")] - Tok::NonLogicalNewline, + Tok::Newline, Tok::MagicCommand { value: "cd /Users/foo/Library/Application\\ Support/".to_string(), kind: MagicKind::ShCap, }, - #[cfg(feature = "full-lexer")] - Tok::NonLogicalNewline, + Tok::Newline, Tok::MagicCommand { value: "foo 1 2".to_string(), kind: MagicKind::Paren, }, - #[cfg(feature = "full-lexer")] - Tok::NonLogicalNewline, + Tok::Newline, Tok::MagicCommand { value: "foo 1 2".to_string(), kind: MagicKind::Quote, }, - #[cfg(feature = "full-lexer")] - Tok::NonLogicalNewline, + Tok::Newline, Tok::MagicCommand { value: "foo 1 2".to_string(), kind: MagicKind::Quote2, }, - #[cfg(feature = "full-lexer")] - Tok::NonLogicalNewline, + Tok::Newline, + ] + ) + } + + #[test] + fn test_jupyter_magic_indentation() { + let source = r" +if True: + %matplotlib \ + --inline" + .trim(); + let tokens = lex_jupyter_source(source); + assert_eq!( + tokens, + vec![ + Tok::If, + Tok::True, + Tok::Colon, + Tok::Newline, + Tok::Indent, Tok::MagicCommand { - value: "ls".to_string(), - kind: MagicKind::Shell, + value: "matplotlib --inline".to_string(), + kind: MagicKind::Magic, }, + Tok::Newline, + Tok::Dedent, ] ) } From 71c2aa9ce90829afc4287b53935c79fb7206d5ae Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Thu, 20 Jul 2023 10:55:41 +0530 Subject: [PATCH 3/5] Test Magic command token in assignment value --- parser/src/lexer.rs | 79 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/parser/src/lexer.rs b/parser/src/lexer.rs index 36532897..0a778077 100644 --- a/parser/src/lexer.rs +++ b/parser/src/lexer.rs @@ -1698,6 +1698,85 @@ if True: ) } + #[test] + fn test_jupyter_magic_assignment() { + let source = r" +pwd = !pwd +foo = %timeit a = b +bar = %timeit a % 3 +baz = %matplotlib \ + inline" + .trim(); + let tokens = lex_jupyter_source(source); + assert_eq!( + tokens, + vec![ + Tok::Name { + name: "pwd".to_string() + }, + Tok::Equal, + Tok::MagicCommand { + value: "pwd".to_string(), + kind: MagicKind::Shell, + }, + Tok::Newline, + Tok::Name { + name: "foo".to_string() + }, + Tok::Equal, + Tok::MagicCommand { + value: "timeit a = b".to_string(), + kind: MagicKind::Magic, + }, + Tok::Newline, + Tok::Name { + name: "bar".to_string() + }, + Tok::Equal, + Tok::MagicCommand { + value: "timeit a % 3".to_string(), + kind: MagicKind::Magic, + }, + Tok::Newline, + Tok::Name { + name: "baz".to_string() + }, + Tok::Equal, + Tok::MagicCommand { + value: "matplotlib inline".to_string(), + kind: MagicKind::Magic, + }, + Tok::Newline, + ] + ) + } + + fn assert_no_jupyter_magic(tokens: &[Tok]) { + for tok in tokens { + match tok { + Tok::MagicCommand { .. } => panic!("Unexpected magic command token: {:?}", tok), + _ => {} + } + } + } + + #[test] + fn test_jupyter_magic_not_an_assignment() { + let source = r" +# Other magic kinds are not valid here (can't test `foo = ?str` because '?' is not a valid token) +foo = /func +foo = ;func +foo = ,func + +(foo == %timeit a = b) +(foo := %timeit a = b) +def f(arg=%timeit a = b): + pass" + .trim(); + let tokens = lex_jupyter_source(source); + assert_no_jupyter_magic(&tokens); + } + #[test] fn test_numbers() { let source = "0x2f 0o12 0b1101 0 123 123_45_67_890 0.2 1e+2 2.1e3 2j 2.2j"; From 7a222ad8179df990fb3c3d91bab725a3c5eba0ce Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Thu, 20 Jul 2023 11:04:56 +0530 Subject: [PATCH 4/5] Revert "Emit `Newline` and handle indentation for Magic commands" This reverts commit 0a909cc20de74b74d09b0504f907594ecc75d30a. This will be added in the next PR. --- parser/src/lexer.rs | 119 ++++++++++++++++++++------------------------ 1 file changed, 54 insertions(+), 65 deletions(-) diff --git a/parser/src/lexer.rs b/parser/src/lexer.rs index 0a778077..fc6c9910 100644 --- a/parser/src/lexer.rs +++ b/parser/src/lexer.rs @@ -655,12 +655,6 @@ where // Detect indentation levels if self.at_begin_of_line { self.handle_indentations()?; - if self.mode == Mode::Jupyter - // https://github.com/ipython/ipython/blob/635815e8f1ded5b764d66cacc80bbe25e9e2587f/IPython/core/inputtransformer2.py#L345 - && matches!(self.window[0], Some('%' | '!' | '?' | '/' | ';' | ',')) - { - self.lex_and_emit_magic_command(); - } } self.consume_normal()?; @@ -709,6 +703,10 @@ where spaces = 0; tabs = 0; } + // https://github.com/ipython/ipython/blob/635815e8f1ded5b764d66cacc80bbe25e9e2587f/IPython/core/inputtransformer2.py#L345 + Some('%' | '!' | '?' | '/' | ';' | ',') if self.mode == Mode::Jupyter => { + self.lex_and_emit_magic_command(); + } Some('\x0C') => { // Form feed character! // Reset indentation for the Emacs user. @@ -1487,13 +1485,10 @@ mod tests { let tokens = lex_jupyter_source(&source); assert_eq!( tokens, - vec![ - Tok::MagicCommand { - value: "matplotlib --inline".to_string(), - kind: MagicKind::Magic - }, - Tok::Newline - ] + vec![Tok::MagicCommand { + value: "matplotlib --inline".to_string(), + kind: MagicKind::Magic + },] ) } @@ -1517,13 +1512,10 @@ mod tests { let tokens = lex_jupyter_source(&source); assert_eq!( tokens, - vec![ - Tok::MagicCommand { - value: "matplotlib ".to_string(), - kind: MagicKind::Magic - }, - Tok::Newline - ] + vec![Tok::MagicCommand { + value: "matplotlib ".to_string(), + kind: MagicKind::Magic + },] ) } @@ -1553,47 +1545,54 @@ mod tests { value: "".to_string(), kind: MagicKind::Magic, }, - Tok::Newline, + #[cfg(feature = "full-lexer")] + Tok::NonLogicalNewline, Tok::MagicCommand { value: "".to_string(), kind: MagicKind::Magic2, }, - Tok::Newline, + #[cfg(feature = "full-lexer")] + Tok::NonLogicalNewline, Tok::MagicCommand { value: "".to_string(), kind: MagicKind::Shell, }, - Tok::Newline, + #[cfg(feature = "full-lexer")] + Tok::NonLogicalNewline, Tok::MagicCommand { value: "".to_string(), kind: MagicKind::ShCap, }, - Tok::Newline, + #[cfg(feature = "full-lexer")] + Tok::NonLogicalNewline, Tok::MagicCommand { value: "".to_string(), kind: MagicKind::Help, }, - Tok::Newline, + #[cfg(feature = "full-lexer")] + Tok::NonLogicalNewline, Tok::MagicCommand { value: "".to_string(), kind: MagicKind::Help2, }, - Tok::Newline, + #[cfg(feature = "full-lexer")] + Tok::NonLogicalNewline, Tok::MagicCommand { value: "".to_string(), kind: MagicKind::Paren, }, - Tok::Newline, + #[cfg(feature = "full-lexer")] + Tok::NonLogicalNewline, Tok::MagicCommand { value: "".to_string(), kind: MagicKind::Quote, }, - Tok::Newline, + #[cfg(feature = "full-lexer")] + Tok::NonLogicalNewline, Tok::MagicCommand { value: "".to_string(), kind: MagicKind::Quote2, }, - Tok::Newline, ] ) } @@ -1612,8 +1611,10 @@ mod tests { !!cd /Users/foo/Library/Application\ Support/ /foo 1 2 ,foo 1 2 -;foo 1 2" - .trim(); +;foo 1 2 + !ls +" + .trim(); let tokens = lex_jupyter_source(source); assert_eq!( tokens, @@ -1622,78 +1623,66 @@ mod tests { value: "foo".to_string(), kind: MagicKind::Help, }, - Tok::Newline, + #[cfg(feature = "full-lexer")] + Tok::NonLogicalNewline, Tok::MagicCommand { value: "foo".to_string(), kind: MagicKind::Help2, }, - Tok::Newline, + #[cfg(feature = "full-lexer")] + Tok::NonLogicalNewline, Tok::MagicCommand { value: "timeit a = b".to_string(), kind: MagicKind::Magic, }, - Tok::Newline, + #[cfg(feature = "full-lexer")] + Tok::NonLogicalNewline, Tok::MagicCommand { value: "timeit a % 3".to_string(), kind: MagicKind::Magic, }, - Tok::Newline, + #[cfg(feature = "full-lexer")] + Tok::NonLogicalNewline, Tok::MagicCommand { value: "matplotlib --inline".to_string(), kind: MagicKind::Magic, }, - Tok::Newline, + #[cfg(feature = "full-lexer")] + Tok::NonLogicalNewline, Tok::MagicCommand { value: "pwd && ls -a | sed 's/^/\\\\ /'".to_string(), kind: MagicKind::Shell, }, - Tok::Newline, + #[cfg(feature = "full-lexer")] + Tok::NonLogicalNewline, Tok::MagicCommand { value: "cd /Users/foo/Library/Application\\ Support/".to_string(), kind: MagicKind::ShCap, }, - Tok::Newline, + #[cfg(feature = "full-lexer")] + Tok::NonLogicalNewline, Tok::MagicCommand { value: "foo 1 2".to_string(), kind: MagicKind::Paren, }, - Tok::Newline, + #[cfg(feature = "full-lexer")] + Tok::NonLogicalNewline, Tok::MagicCommand { value: "foo 1 2".to_string(), kind: MagicKind::Quote, }, - Tok::Newline, + #[cfg(feature = "full-lexer")] + Tok::NonLogicalNewline, Tok::MagicCommand { value: "foo 1 2".to_string(), kind: MagicKind::Quote2, }, - Tok::Newline, - ] - ) - } - - #[test] - fn test_jupyter_magic_indentation() { - let source = r" -if True: - %matplotlib \ - --inline" - .trim(); - let tokens = lex_jupyter_source(source); - assert_eq!( - tokens, - vec![ - Tok::If, - Tok::True, - Tok::Colon, - Tok::Newline, - Tok::Indent, + #[cfg(feature = "full-lexer")] + Tok::NonLogicalNewline, Tok::MagicCommand { - value: "matplotlib --inline".to_string(), - kind: MagicKind::Magic, + value: "ls".to_string(), + kind: MagicKind::Shell, }, - Tok::Newline, - Tok::Dedent, ] ) } From 3dfb8f48eab05b84286ef854c97a011f864937ec Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Thu, 20 Jul 2023 18:49:43 +0530 Subject: [PATCH 5/5] Use a boolean flag for "Is last token an `Equal`"? Cloning is expensive especially for every token. --- parser/src/lexer.rs | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/parser/src/lexer.rs b/parser/src/lexer.rs index fc6c9910..49c8b60d 100644 --- a/parser/src/lexer.rs +++ b/parser/src/lexer.rs @@ -175,8 +175,8 @@ pub struct Lexer> { pending: Vec, // The current location. location: TextSize, - // The last emitted token. - last_emitted: Option, + // Is the last token an equal sign? + last_token_is_equal: bool, // Lexer mode. mode: Mode, } @@ -235,7 +235,7 @@ where pending: Vec::with_capacity(5), location: start, window: CharWindow::new(input), - last_emitted: None, + last_token_is_equal: false, mode, }; // Fill the window. @@ -948,10 +948,7 @@ where } } '%' => { - if self.mode == Mode::Jupyter - && self.nesting == 0 - && matches!(self.last_emitted, Some(Tok::Equal)) - { + if self.mode == Mode::Jupyter && self.nesting == 0 && self.last_token_is_equal { self.lex_and_emit_magic_command(); } else { let tok_start = self.get_pos(); @@ -1035,10 +1032,7 @@ where } } '!' => { - if self.mode == Mode::Jupyter - && self.nesting == 0 - && matches!(self.last_emitted, Some(Tok::Equal)) - { + if self.mode == Mode::Jupyter && self.nesting == 0 && self.last_token_is_equal { self.lex_and_emit_magic_command(); } else { let tok_start = self.get_pos(); @@ -1309,7 +1303,7 @@ where // Helper function to emit a lexed token to the queue of tokens. fn emit(&mut self, spanned: Spanned) { - self.last_emitted = Some(spanned.0.clone()); + self.last_token_is_equal = matches!(spanned.0, Tok::Equal); self.pending.push(spanned); } }