diff --git a/TwigCS/src/Token/Tokenizer.php b/TwigCS/src/Token/Tokenizer.php index 1898418..d4ce9f2 100644 --- a/TwigCS/src/Token/Tokenizer.php +++ b/TwigCS/src/Token/Tokenizer.php @@ -10,6 +10,9 @@ /** * An override of Twig's Lexer to add whitespace and new line detection. + * + * Since the regex are using bytes as position, mb_ methods are voluntary not used. + * phpcs:disable SymfonyCustom.PHP.EncourageMultiBytes */ class Tokenizer { @@ -193,7 +196,7 @@ protected function resetState(Source $source): void $this->bracketsAndTernary = []; $this->code = str_replace(["\r\n", "\r"], "\n", $source->getCode()); - $this->end = mb_strlen($this->code); + $this->end = strlen($this->code); $this->filename = $source->getName(); } @@ -310,8 +313,8 @@ protected function moveCurrentPosition(int $value = 1): void */ protected function moveCursor(string $value): void { - $this->cursor += mb_strlen($value); - $this->line += mb_substr_count($value, "\n"); + $this->cursor += strlen($value); + $this->line += substr_count($value, "\n"); } /** @@ -322,7 +325,7 @@ protected function moveCursor(string $value): void */ protected function pushToken(int $type, string $value = null): void { - $tokenPositionInLine = $this->cursor - mb_strrpos(mb_substr($this->code, 0, $this->cursor), PHP_EOL); + $tokenPositionInLine = $this->cursor - strrpos(substr($this->code, 0, $this->cursor), PHP_EOL); $this->tokens[] = new Token($type, $this->line, $tokenPositionInLine, $this->filename, $value); } @@ -350,7 +353,7 @@ protected function lexExpression(): void $this->lexName($match[0]); } elseif (preg_match(self::REGEX_NUMBER, $this->code, $match, 0, $this->cursor)) { $this->lexNumber($match[0]); - } elseif (false !== mb_strpos(self::PUNCTUATION, $this->code[$this->cursor])) { + } elseif (false !== strpos(self::PUNCTUATION, $this->code[$this->cursor])) { $this->lexPunctuation(); } elseif (preg_match(self::REGEX_STRING, $this->code, $match, 0, $this->cursor)) { $this->lexString($match[0]); @@ -436,7 +439,7 @@ protected function lexDqString(): void $this->lexStartInterpolation(); } elseif ( preg_match(self::REGEX_DQ_STRING_PART, $this->code, $match, 0, $this->cursor) - && mb_strlen($match[0]) > 0 + && strlen($match[0]) > 0 ) { $this->pushToken(Token::STRING_TYPE, $match[0]); $this->moveCursor($match[0]); @@ -500,8 +503,8 @@ protected function lexData(int $limit = 0): void $value = $match[0]; // Stop if cursor reaches the next token start. - if (0 !== $limit && $limit <= ($this->cursor + mb_strlen($value))) { - $value = mb_substr($value, 0, $limit - $this->cursor); + if (0 !== $limit && $limit <= ($this->cursor + strlen($value))) { + $value = substr($value, 0, $limit - $this->cursor); } // Fixing token start among expressions and comments. @@ -702,7 +705,7 @@ protected function lexPunctuation(): void return; } - if (false !== mb_strpos(',)]}', $currentToken)) { + if (false !== strpos(',)]}', $currentToken)) { // Because {{ foo ? 'yes' }} is the same as {{ foo ? 'yes' : '' }} do { array_pop($this->bracketsAndTernary); @@ -716,9 +719,9 @@ protected function lexPunctuation(): void } } - if (false !== mb_strpos('([{', $currentToken)) { + if (false !== strpos('([{', $currentToken)) { $this->bracketsAndTernary[] = [$currentToken, $this->line]; - } elseif (false !== mb_strpos(')]}', $currentToken)) { + } elseif (false !== strpos(')]}', $currentToken)) { if (0 === count($this->bracketsAndTernary)) { throw new Exception(sprintf('Unexpected "%s"', $currentToken)); } diff --git a/TwigCS/tests/Token/Tokenizer/TokenizerTest.php b/TwigCS/tests/Token/Tokenizer/TokenizerTest.php index 5b4e89b..56e1f6a 100644 --- a/TwigCS/tests/Token/Tokenizer/TokenizerTest.php +++ b/TwigCS/tests/Token/Tokenizer/TokenizerTest.php @@ -276,6 +276,26 @@ public function tokenizeDataProvider(): array 44 => Token::EOF_TYPE, ], ], + [ + __DIR__.'/TokenizerTest6.twig', + [ + 0 => Token::BLOCK_START_TYPE, + 1 => Token::WHITESPACE_TYPE, + 2 => Token::BLOCK_TAG_TYPE, + 3 => Token::WHITESPACE_TYPE, + 4 => Token::NAME_TYPE, + 5 => Token::WHITESPACE_TYPE, + 6 => Token::BLOCK_END_TYPE, + 7 => Token::TEXT_TYPE, + 8 => Token::BLOCK_START_TYPE, + 9 => Token::WHITESPACE_TYPE, + 10 => Token::BLOCK_TAG_TYPE, + 11 => Token::WHITESPACE_TYPE, + 12 => Token::BLOCK_END_TYPE, + 13 => Token::EOL_TYPE, + 14 => Token::EOF_TYPE, + ], + ], ]; } } diff --git a/TwigCS/tests/Token/Tokenizer/TokenizerTest6.twig b/TwigCS/tests/Token/Tokenizer/TokenizerTest6.twig new file mode 100644 index 0000000..d5af71b --- /dev/null +++ b/TwigCS/tests/Token/Tokenizer/TokenizerTest6.twig @@ -0,0 +1 @@ +{% block try %}⇨{% endblock %}