@@ -16,7 +16,7 @@ class Tokenizer
16
16
private const STATE_DATA = 0 ;
17
17
private const STATE_BLOCK = 1 ;
18
18
private const STATE_VAR = 2 ;
19
- private const STATE_STRING = 3 ;
19
+ private const STATE_DQ_STRING = 3 ;
20
20
private const STATE_INTERPOLATION = 4 ;
21
21
private const STATE_COMMENT = 5 ;
22
22
@@ -103,11 +103,13 @@ public function __construct(Environment $env, array $options = [])
103
103
104
104
$ tokenizerHelper = new TokenizerHelper ($ env , $ this ->options );
105
105
$ this ->regexes = [
106
- 'lex_block ' => $ tokenizerHelper ->getBlockRegex (),
107
- 'lex_comment ' => $ tokenizerHelper ->getCommentRegex (),
108
- 'lex_variable ' => $ tokenizerHelper ->getVariableRegex (),
109
- 'operator ' => $ tokenizerHelper ->getOperatorRegex (),
110
- 'lex_tokens_start ' => $ tokenizerHelper ->getTokensStartRegex (),
106
+ 'lex_block ' => $ tokenizerHelper ->getBlockRegex (),
107
+ 'lex_comment ' => $ tokenizerHelper ->getCommentRegex (),
108
+ 'lex_variable ' => $ tokenizerHelper ->getVariableRegex (),
109
+ 'operator ' => $ tokenizerHelper ->getOperatorRegex (),
110
+ 'lex_tokens_start ' => $ tokenizerHelper ->getTokensStartRegex (),
111
+ 'interpolation_start ' => $ tokenizerHelper ->getInterpolationStartRegex (),
112
+ 'interpolation_end ' => $ tokenizerHelper ->getInterpolationEndRegex (),
111
113
];
112
114
}
113
115
@@ -150,6 +152,12 @@ public function tokenize(Source $source): array
150
152
$ this ->lexData ();
151
153
}
152
154
break ;
155
+ case self ::STATE_DQ_STRING :
156
+ $ this ->lexDqString ();
157
+ break ;
158
+ case self ::STATE_INTERPOLATION :
159
+ $ this ->lexInterpolation ();
160
+ break ;
153
161
default :
154
162
throw new Exception ('Unhandled state in tokenize ' , 1 );
155
163
}
@@ -277,35 +285,29 @@ protected function pushToken(int $type, string $value = null): void
277
285
protected function lexExpression (): void
278
286
{
279
287
$ currentToken = $ this ->code [$ this ->cursor ];
288
+ $ nextToken = $ this ->code [$ this ->cursor + 1 ] ?? null ;
280
289
281
290
if (preg_match ('/\t/ ' , $ currentToken )) {
282
291
$ this ->lexTab ();
283
292
} elseif (' ' === $ currentToken ) {
284
293
$ this ->lexWhitespace ();
285
294
} elseif (PHP_EOL === $ currentToken ) {
286
295
$ this ->lexEOL ();
296
+ } elseif ('= ' === $ currentToken && '> ' === $ nextToken ) {
297
+ $ this ->lexArrowFunction ();
287
298
} elseif (preg_match ($ this ->regexes ['operator ' ], $ this ->code , $ match , 0 , $ this ->cursor )) {
288
299
$ this ->lexOperator ($ match [0 ]);
289
300
} elseif (preg_match (self ::REGEX_NAME , $ this ->code , $ match , 0 , $ this ->cursor )) {
290
- // names
291
- $ this ->pushToken (Token::NAME_TYPE , $ match [0 ]);
292
- $ this ->moveCursor ($ match [0 ]);
301
+ $ this ->lexName ($ match [0 ]);
293
302
} elseif (preg_match (self ::REGEX_NUMBER , $ this ->code , $ match , 0 , $ this ->cursor )) {
294
- // numbers
295
- $ number = (float ) $ match [0 ]; // floats
296
- if (ctype_digit ($ match [0 ]) && $ number <= PHP_INT_MAX ) {
297
- $ number = (int ) $ match [0 ]; // integers lower than the maximum
298
- }
299
- $ this ->pushToken (Token::NUMBER_TYPE , (string ) $ number );
300
- $ this ->moveCursor ($ match [0 ]);
303
+ $ this ->lexNumber ($ match [0 ]);
301
304
} elseif (false !== strpos (self ::PUNCTUATION , $ this ->code [$ this ->cursor ])) {
302
305
$ this ->lexPunctuation ();
303
306
} elseif (preg_match (self ::REGEX_STRING , $ this ->code , $ match , 0 , $ this ->cursor )) {
304
- // strings
305
- $ this ->pushToken (Token:: STRING_TYPE , addcslashes ( stripcslashes ( $ match[ 0 ]), '\\' ));
306
- $ this ->moveCursor ( $ match [ 0 ] );
307
+ $ this -> lexString ( $ match [ 0 ]);
308
+ } elseif ( preg_match ( self :: REGEX_DQ_STRING_DELIM , $ this ->code , $ match, 0 , $ this -> cursor )) {
309
+ $ this ->lexStartDqString ( );
307
310
} else {
308
- // unlexable
309
311
throw new Exception (sprintf ('Unexpected character "%s" ' , $ currentToken ));
310
312
}
311
313
}
@@ -368,6 +370,51 @@ protected function lexComment(): void
368
370
}
369
371
}
370
372
373
+ /**
374
+ * @throws Exception
375
+ */
376
+ protected function lexDqString (): void
377
+ {
378
+ if (preg_match ($ this ->regexes ['interpolation_start ' ], $ this ->code , $ match , 0 , $ this ->cursor )) {
379
+ $ this ->lexStartInterpolation ();
380
+ } elseif (preg_match (self ::REGEX_DQ_STRING_PART , $ this ->code , $ match , 0 , $ this ->cursor )
381
+ && strlen ($ match [0 ]) > 0
382
+ ) {
383
+ $ this ->pushToken (Token::STRING_TYPE , stripcslashes ($ match [0 ]));
384
+ $ this ->moveCursor ($ match [0 ]);
385
+ } elseif (preg_match (self ::REGEX_DQ_STRING_DELIM , $ this ->code , $ match , 0 , $ this ->cursor )) {
386
+ $ bracket = array_pop ($ this ->bracketsAndTernary );
387
+
388
+ if ('" ' !== $ this ->code [$ this ->cursor ]) {
389
+ throw new Exception (sprintf ('Unclosed "%s" ' , $ bracket [0 ]));
390
+ }
391
+
392
+ $ this ->popState ();
393
+ $ this ->moveCursor ('" ' );
394
+ } else {
395
+ throw new Exception (sprintf ('Unexpected character "%s" ' , $ this ->code [$ this ->cursor ]));
396
+ }
397
+ }
398
+
399
+ /**
400
+ * @throws Exception
401
+ */
402
+ protected function lexInterpolation (): void
403
+ {
404
+ $ bracket = end ($ this ->bracketsAndTernary );
405
+
406
+ if ($ this ->options ['interpolation ' ][0 ] === $ bracket [0 ]
407
+ && preg_match ($ this ->regexes ['interpolation_end ' ], $ this ->code , $ match , 0 , $ this ->cursor )
408
+ ) {
409
+ array_pop ($ this ->bracketsAndTernary );
410
+ $ this ->pushToken (Token::INTERPOLATION_END_TYPE );
411
+ $ this ->moveCursor ($ match [0 ]);
412
+ $ this ->popState ();
413
+ } else {
414
+ $ this ->lexExpression ();
415
+ }
416
+ }
417
+
371
418
/**
372
419
* @param int $limit
373
420
*/
@@ -387,10 +434,12 @@ protected function lexData(int $limit = 0): void
387
434
$ this ->lexEOL ();
388
435
} elseif (preg_match ('/\S+/ ' , $ this ->code , $ match , 0 , $ this ->cursor )) {
389
436
$ value = $ match [0 ];
437
+
390
438
// Stop if cursor reaches the next token start.
391
439
if (0 !== $ limit && $ limit <= ($ this ->cursor + strlen ($ value ))) {
392
440
$ value = substr ($ value , 0 , $ limit - $ this ->cursor );
393
441
}
442
+
394
443
// Fixing token start among expressions and comments.
395
444
$ nbTokenStart = preg_match_all ($ this ->regexes ['lex_tokens_start ' ], $ value , $ matches );
396
445
if ($ nbTokenStart ) {
@@ -431,6 +480,21 @@ protected function lexStart(): void
431
480
$ this ->moveCursor ($ tokenStart ['fullMatch ' ]);
432
481
}
433
482
483
+ protected function lexStartDqString (): void
484
+ {
485
+ $ this ->bracketsAndTernary [] = ['" ' , $ this ->line ];
486
+ $ this ->pushState (self ::STATE_DQ_STRING );
487
+ $ this ->moveCursor ('" ' );
488
+ }
489
+
490
+ protected function lexStartInterpolation (): void
491
+ {
492
+ $ this ->bracketsAndTernary [] = [$ this ->options ['interpolation ' ][0 ], $ this ->line ];
493
+ $ this ->pushToken (Token::INTERPOLATION_START_TYPE );
494
+ $ this ->pushState (self ::STATE_INTERPOLATION );
495
+ $ this ->moveCursor ($ this ->options ['interpolation ' ][0 ]);
496
+ }
497
+
434
498
protected function lexTab (): void
435
499
{
436
500
$ currentToken = $ this ->code [$ this ->cursor ];
@@ -478,10 +542,16 @@ protected function lexEOL(): void
478
542
$ this ->moveCursor ($ this ->code [$ this ->cursor ]);
479
543
}
480
544
545
+ protected function lexArrowFunction (): void
546
+ {
547
+ $ this ->pushToken (Token::ARROW_TYPE , '=> ' );
548
+ $ this ->moveCursor ('=> ' );
549
+ }
550
+
481
551
/**
482
552
* @param string $operator
483
553
*/
484
- protected function lexOperator ($ operator ): void
554
+ protected function lexOperator (string $ operator ): void
485
555
{
486
556
if ('? ' === $ operator ) {
487
557
$ this ->bracketsAndTernary [] = [$ operator , $ this ->line ];
@@ -492,6 +562,29 @@ protected function lexOperator($operator): void
492
562
$ this ->moveCursor ($ operator );
493
563
}
494
564
565
+ /**
566
+ * @param string $name
567
+ */
568
+ protected function lexName (string $ name ): void
569
+ {
570
+ $ this ->pushToken (Token::NAME_TYPE , $ name );
571
+ $ this ->moveCursor ($ name );
572
+ }
573
+
574
+ /**
575
+ * @param string $numberAsString
576
+ */
577
+ protected function lexNumber (string $ numberAsString ): void
578
+ {
579
+ $ number = (float ) $ numberAsString ; // floats
580
+ if (ctype_digit ($ numberAsString ) && $ number <= PHP_INT_MAX ) {
581
+ $ number = (int ) $ numberAsString ; // integers lower than the maximum
582
+ }
583
+
584
+ $ this ->pushToken (Token::NUMBER_TYPE , (string ) $ number );
585
+ $ this ->moveCursor ($ numberAsString );
586
+ }
587
+
495
588
/**
496
589
* @throws Exception
497
590
*/
@@ -515,16 +608,26 @@ protected function lexPunctuation(): void
515
608
throw new Exception (sprintf ('Unexpected "%s" ' , $ currentToken ));
516
609
}
517
610
518
- $ expect = array_pop ($ this ->bracketsAndTernary )[0 ];
519
- if ('? ' === $ expect ) {
520
- throw new Exception ('Unclosed ternary ' );
611
+ $ bracket = array_pop ($ this ->bracketsAndTernary );
612
+ if ('? ' === $ bracket [0 ]) {
613
+ // Because {{ foo ? 'yes' }} is the same as {{ foo ? 'yes' : '' }}
614
+ $ bracket = array_pop ($ this ->bracketsAndTernary );
521
615
}
522
- if (strtr ($ expect , '([{ ' , ')]} ' ) !== $ currentToken ) {
523
- throw new Exception (sprintf ('Unclosed "%s" ' , $ expect ));
616
+ if (strtr ($ bracket [ 0 ] , '([{ ' , ')]} ' ) !== $ currentToken ) {
617
+ throw new Exception (sprintf ('Unclosed "%s" ' , $ bracket [ 0 ] ));
524
618
}
525
619
}
526
620
527
621
$ this ->pushToken (Token::PUNCTUATION_TYPE , $ currentToken );
528
622
$ this ->moveCursor ($ currentToken );
529
623
}
624
+
625
+ /**
626
+ * @param string $string
627
+ */
628
+ protected function lexString (string $ string ): void
629
+ {
630
+ $ this ->pushToken (Token::STRING_TYPE , addcslashes (stripcslashes ($ string ), '\\' ));
631
+ $ this ->moveCursor ($ string );
632
+ }
530
633
}
0 commit comments