Skip to content

🐛 Fix issue with multi byte #136

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 23, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 14 additions & 11 deletions TwigCS/src/Token/Tokenizer.php
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@

/**
* An override of Twig's Lexer to add whitespace and new line detection.
*
* Since the regex are using bytes as position, mb_ methods are voluntary not used.
* phpcs:disable SymfonyCustom.PHP.EncourageMultiBytes
*/
class Tokenizer
{
Expand Down Expand Up @@ -193,7 +196,7 @@ protected function resetState(Source $source): void
$this->bracketsAndTernary = [];

$this->code = str_replace(["\r\n", "\r"], "\n", $source->getCode());
$this->end = mb_strlen($this->code);
$this->end = strlen($this->code);
$this->filename = $source->getName();
}

Expand Down Expand Up @@ -310,8 +313,8 @@ protected function moveCurrentPosition(int $value = 1): void
*/
protected function moveCursor(string $value): void
{
$this->cursor += mb_strlen($value);
$this->line += mb_substr_count($value, "\n");
$this->cursor += strlen($value);
$this->line += substr_count($value, "\n");
}

/**
Expand All @@ -322,7 +325,7 @@ protected function moveCursor(string $value): void
*/
protected function pushToken(int $type, string $value = null): void
{
$tokenPositionInLine = $this->cursor - mb_strrpos(mb_substr($this->code, 0, $this->cursor), PHP_EOL);
$tokenPositionInLine = $this->cursor - strrpos(substr($this->code, 0, $this->cursor), PHP_EOL);
$this->tokens[] = new Token($type, $this->line, $tokenPositionInLine, $this->filename, $value);
}

Expand Down Expand Up @@ -350,7 +353,7 @@ protected function lexExpression(): void
$this->lexName($match[0]);
} elseif (preg_match(self::REGEX_NUMBER, $this->code, $match, 0, $this->cursor)) {
$this->lexNumber($match[0]);
} elseif (false !== mb_strpos(self::PUNCTUATION, $this->code[$this->cursor])) {
} elseif (false !== strpos(self::PUNCTUATION, $this->code[$this->cursor])) {
$this->lexPunctuation();
} elseif (preg_match(self::REGEX_STRING, $this->code, $match, 0, $this->cursor)) {
$this->lexString($match[0]);
Expand Down Expand Up @@ -436,7 +439,7 @@ protected function lexDqString(): void
$this->lexStartInterpolation();
} elseif (
preg_match(self::REGEX_DQ_STRING_PART, $this->code, $match, 0, $this->cursor)
&& mb_strlen($match[0]) > 0
&& strlen($match[0]) > 0
) {
$this->pushToken(Token::STRING_TYPE, $match[0]);
$this->moveCursor($match[0]);
Expand Down Expand Up @@ -500,8 +503,8 @@ protected function lexData(int $limit = 0): void
$value = $match[0];

// Stop if cursor reaches the next token start.
if (0 !== $limit && $limit <= ($this->cursor + mb_strlen($value))) {
$value = mb_substr($value, 0, $limit - $this->cursor);
if (0 !== $limit && $limit <= ($this->cursor + strlen($value))) {
$value = substr($value, 0, $limit - $this->cursor);
}

// Fixing token start among expressions and comments.
Expand Down Expand Up @@ -702,7 +705,7 @@ protected function lexPunctuation(): void

return;
}
if (false !== mb_strpos(',)]}', $currentToken)) {
if (false !== strpos(',)]}', $currentToken)) {
// Because {{ foo ? 'yes' }} is the same as {{ foo ? 'yes' : '' }}
do {
array_pop($this->bracketsAndTernary);
Expand All @@ -716,9 +719,9 @@ protected function lexPunctuation(): void
}
}

if (false !== mb_strpos('([{', $currentToken)) {
if (false !== strpos('([{', $currentToken)) {
$this->bracketsAndTernary[] = [$currentToken, $this->line];
} elseif (false !== mb_strpos(')]}', $currentToken)) {
} elseif (false !== strpos(')]}', $currentToken)) {
if (0 === count($this->bracketsAndTernary)) {
throw new Exception(sprintf('Unexpected "%s"', $currentToken));
}
Expand Down
20 changes: 20 additions & 0 deletions TwigCS/tests/Token/Tokenizer/TokenizerTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,26 @@ public function tokenizeDataProvider(): array
44 => Token::EOF_TYPE,
],
],
[
__DIR__.'/TokenizerTest6.twig',
[
0 => Token::BLOCK_START_TYPE,
1 => Token::WHITESPACE_TYPE,
2 => Token::BLOCK_TAG_TYPE,
3 => Token::WHITESPACE_TYPE,
4 => Token::NAME_TYPE,
5 => Token::WHITESPACE_TYPE,
6 => Token::BLOCK_END_TYPE,
7 => Token::TEXT_TYPE,
8 => Token::BLOCK_START_TYPE,
9 => Token::WHITESPACE_TYPE,
10 => Token::BLOCK_TAG_TYPE,
11 => Token::WHITESPACE_TYPE,
12 => Token::BLOCK_END_TYPE,
13 => Token::EOL_TYPE,
14 => Token::EOF_TYPE,
],
],
];
}
}
1 change: 1 addition & 0 deletions TwigCS/tests/Token/Tokenizer/TokenizerTest6.twig
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{% block try %}⇨{% endblock %}