9
9
/**
10
10
* An override of Twig's Lexer to add whitespace and new line detection.
11
11
*/
12
- class Tokenizer implements TokenizerInterface
12
+ class Tokenizer
13
13
{
14
14
const STATE_DATA = Lexer::STATE_DATA ;
15
15
const STATE_BLOCK = Lexer::STATE_BLOCK ;
@@ -92,27 +92,20 @@ class Tokenizer implements TokenizerInterface
92
92
public function __construct (Environment $ env , array $ options = [])
93
93
{
94
94
$ this ->options = array_merge ([
95
- 'tag_comment ' => ['{# ' , '#} ' ],
96
- 'tag_block ' => ['{% ' , '%} ' ],
97
- 'tag_variable ' => ['{{ ' , '}} ' ],
98
- 'whitespace_trim ' => '- ' ,
99
- 'whitespace_line_trim ' => '~ ' ,
100
- 'whitespace_line_chars ' => ' \t\0\x0B ' ,
101
- 'interpolation ' => ['#{ ' , '} ' ],
95
+ 'tag_comment ' => ['{# ' , '#} ' ],
96
+ 'tag_block ' => ['{% ' , '%} ' ],
97
+ 'tag_variable ' => ['{{ ' , '}} ' ],
98
+ 'whitespace_trim ' => '- ' ,
99
+ 'interpolation ' => ['#{ ' , '} ' ],
102
100
], $ options );
103
101
104
102
$ tokenizerHelper = new TokenizerHelper ($ env , $ this ->options );
105
103
$ this ->regexes = [
106
- 'lex_var ' => $ tokenizerHelper ->getVarRegex (),
107
- 'lex_block ' => $ tokenizerHelper ->getBlockRegex (),
108
- 'lex_raw_data ' => $ tokenizerHelper ->getRawDataRegex (),
109
- 'operator ' => $ tokenizerHelper ->getOperatorRegex (),
110
- 'lex_comment ' => $ tokenizerHelper ->getCommentRegex (),
111
- 'lex_block_raw ' => $ tokenizerHelper ->getBlockRawRegex (),
112
- 'lex_block_line ' => $ tokenizerHelper ->getBlockLineRegex (),
113
- 'lex_tokens_start ' => $ tokenizerHelper ->getTokensStartRegex (),
114
- 'interpolation_start ' => $ tokenizerHelper ->getInterpolationStartRegex (),
115
- 'interpolation_end ' => $ tokenizerHelper ->getInterpolationEndRegex (),
104
+ 'lex_block ' => $ tokenizerHelper ->getBlockRegex (),
105
+ 'lex_comment ' => $ tokenizerHelper ->getCommentRegex (),
106
+ 'lex_variable ' => $ tokenizerHelper ->getVariableRegex (),
107
+ 'operator ' => $ tokenizerHelper ->getOperatorRegex (),
108
+ 'lex_tokens_start ' => $ tokenizerHelper ->getTokensStartRegex (),
116
109
];
117
110
}
118
111
@@ -127,7 +120,14 @@ public function tokenize(Source $source)
127
120
$ this ->preflightSource ($ this ->code );
128
121
129
122
while ($ this ->cursor < $ this ->end ) {
130
- $ nextToken = $ this ->getTokenPosition ();
123
+ $ lastToken = $ this ->getTokenPosition ();
124
+ $ nextToken = $ this ->getTokenPosition (1 );
125
+
126
+ while (null !== $ nextToken && $ nextToken ['position ' ] < $ this ->cursor ) {
127
+ $ this ->moveCurrentPosition ();
128
+ $ lastToken = $ nextToken ;
129
+ $ nextToken = $ this ->getTokenPosition (1 );
130
+ }
131
131
132
132
switch ($ this ->getState ()) {
133
133
case self ::STATE_BLOCK :
@@ -139,14 +139,8 @@ public function tokenize(Source $source)
139
139
case self ::STATE_COMMENT :
140
140
$ this ->lexComment ();
141
141
break ;
142
- // case self::STATE_STRING:
143
- // $this->lexString();
144
- // break;
145
- // case self::STATE_INTERPOLATION:
146
- // $this->lexInterpolation();
147
- // break;
148
142
case self ::STATE_DATA :
149
- if ($ this ->cursor === $ nextToken ['position ' ]) {
143
+ if (null !== $ lastToken && $ this ->cursor === $ lastToken ['position ' ]) {
150
144
$ this ->lexStart ();
151
145
} else {
152
146
$ this ->lexData ();
@@ -230,15 +224,19 @@ protected function preflightSource($code)
230
224
}
231
225
232
226
/**
227
+ * @param int $offset
228
+ *
233
229
* @return array|null
234
230
*/
235
- protected function getTokenPosition ()
231
+ protected function getTokenPosition ($ offset = 0 )
236
232
{
237
- if (empty ($ this ->tokenPositions ) || !isset ($ this ->tokenPositions [$ this ->currentPosition ])) {
233
+ if (empty ($ this ->tokenPositions )
234
+ || !isset ($ this ->tokenPositions [$ this ->currentPosition + $ offset ])
235
+ ) {
238
236
return null ;
239
237
}
240
238
241
- return $ this ->tokenPositions [$ this ->currentPosition ];
239
+ return $ this ->tokenPositions [$ this ->currentPosition + $ offset ];
242
240
}
243
241
244
242
/**
@@ -277,23 +275,20 @@ protected function pushToken($type, $value = null)
277
275
protected function lex ($ endType , $ endRegex )
278
276
{
279
277
preg_match ($ endRegex , $ this ->code , $ match , PREG_OFFSET_CAPTURE , $ this ->cursor );
278
+
280
279
if (!isset ($ match [0 ])) {
281
- // Should not happen, but in case it is;
282
- throw new \Exception (sprintf ('Unclosed "%s" in "%s" at line %d ' , $ endType , $ this ->filename , $ this ->line ));
283
- }
284
- if ($ match [0 ][1 ] === $ this ->cursor ) {
280
+ $ this ->lexExpression ();
281
+ } elseif ($ match [0 ][1 ] === $ this ->cursor ) {
285
282
$ this ->pushToken ($ endType , $ match [0 ][0 ]);
286
283
$ this ->moveCursor ($ match [0 ][0 ]);
287
284
$ this ->moveCurrentPosition ();
288
285
$ this ->popState ();
286
+ } elseif ($ this ->getState () === self ::STATE_COMMENT ) {
287
+ // Parse as text until the end position.
288
+ $ this ->lexData ($ match [0 ][1 ]);
289
289
} else {
290
- if ($ this ->getState () === self ::STATE_COMMENT ) {
291
- // Parse as text until the end position.
292
- $ this ->lexData ($ match [0 ][1 ]);
293
- } else {
294
- while ($ this ->cursor < $ match [0 ][1 ]) {
295
- $ this ->lexExpression ();
296
- }
290
+ while ($ this ->cursor < $ match [0 ][1 ]) {
291
+ $ this ->lexExpression ();
297
292
}
298
293
}
299
294
}
@@ -304,7 +299,9 @@ protected function lex($endType, $endRegex)
304
299
protected function lexExpression ()
305
300
{
306
301
$ currentToken = $ this ->code [$ this ->cursor ];
307
- if (' ' === $ currentToken ) {
302
+ if (preg_match ('/\t/ ' , $ currentToken )) {
303
+ $ this ->lexTab ();
304
+ } elseif (' ' === $ currentToken ) {
308
305
$ this ->lexWhitespace ();
309
306
} elseif (PHP_EOL === $ currentToken ) {
310
307
$ this ->lexEOL ();
@@ -332,11 +329,11 @@ protected function lexExpression()
332
329
} elseif (false !== strpos (')]} ' , $ this ->code [$ this ->cursor ])) {
333
330
// closing bracket
334
331
if (empty ($ this ->brackets )) {
335
- throw new \Exception (sprintf ('Unexpected "%s". ' , $ this ->code [$ this ->cursor ]));
332
+ throw new \Exception (sprintf ('Unexpected "%s" ' , $ this ->code [$ this ->cursor ]));
336
333
}
337
334
$ expect = array_pop ($ this ->brackets )[0 ];
338
335
if (strtr ($ expect , '([{ ' , ')]} ' ) !== $ this ->code [$ this ->cursor ]) {
339
- throw new \Exception (sprintf ('Unclosed "%s". ' , $ expect ));
336
+ throw new \Exception (sprintf ('Unclosed "%s" ' , $ expect ));
340
337
}
341
338
}
342
339
$ this ->pushToken (Token::PUNCTUATION_TYPE , $ this ->code [$ this ->cursor ]);
@@ -347,7 +344,7 @@ protected function lexExpression()
347
344
$ this ->moveCursor ($ match [0 ]);
348
345
} else {
349
346
// unlexable
350
- throw new \Exception (sprintf ('Unexpected character "%s". ' , $ this -> code [ $ this -> cursor ] ));
347
+ throw new \Exception (sprintf ('Unexpected character "%s" ' , $ currentToken ));
351
348
}
352
349
}
353
350
@@ -384,6 +381,7 @@ protected function lexData($limit = 0)
384
381
if (0 === $ limit && null !== $ nextToken ) {
385
382
$ limit = $ nextToken ['position ' ];
386
383
}
384
+
387
385
$ currentToken = $ this ->code [$ this ->cursor ];
388
386
if (preg_match ('/\t/ ' , $ currentToken )) {
389
387
$ this ->lexTab ();
0 commit comments