@@ -192,6 +192,7 @@ void startup_scanner(void)
192
192
CG (doc_comment) = NULL ;
193
193
CG (extra_fn_flags) = 0 ;
194
194
zend_stack_init (&SCNG (state_stack), sizeof (int ));
195
+ zend_stack_init (&SCNG (nest_location_stack), sizeof (zend_nest_location));
195
196
zend_ptr_stack_init (&SCNG (heredoc_label_stack));
196
197
SCNG (heredoc_scan_ahead) = 0 ;
197
198
}
@@ -205,6 +206,7 @@ void shutdown_scanner(void)
205
206
CG (parse_error) = 0 ;
206
207
RESET_DOC_COMMENT ();
207
208
zend_stack_destroy (&SCNG (state_stack));
209
+ zend_stack_destroy (&SCNG (nest_location_stack));
208
210
zend_ptr_stack_clean (&SCNG (heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1 );
209
211
zend_ptr_stack_destroy (&SCNG (heredoc_label_stack));
210
212
SCNG (heredoc_scan_ahead) = 0 ;
@@ -223,6 +225,9 @@ ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state)
223
225
lex_state->state_stack = SCNG (state_stack);
224
226
zend_stack_init (&SCNG (state_stack), sizeof (int ));
225
227
228
+ lex_state->nest_location_stack = SCNG (nest_location_stack);
229
+ zend_stack_init (&SCNG (nest_location_stack), sizeof (zend_nest_location));
230
+
226
231
lex_state->heredoc_label_stack = SCNG (heredoc_label_stack);
227
232
zend_ptr_stack_init (&SCNG (heredoc_label_stack));
228
233
@@ -258,6 +263,9 @@ ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state)
258
263
zend_stack_destroy (&SCNG (state_stack));
259
264
SCNG (state_stack) = lex_state->state_stack ;
260
265
266
+ zend_stack_destroy (&SCNG (nest_location_stack));
267
+ SCNG (nest_location_stack) = lex_state->nest_location_stack ;
268
+
261
269
zend_ptr_stack_clean (&SCNG (heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1 );
262
270
zend_ptr_stack_destroy (&SCNG (heredoc_label_stack));
263
271
SCNG (heredoc_label_stack) = lex_state->heredoc_label_stack ;
@@ -1250,6 +1258,63 @@ static void copy_heredoc_label_stack(void *void_heredoc_label)
1250
1258
zend_ptr_stack_push (&SCNG (heredoc_label_stack), (void *) new_heredoc_label);
1251
1259
}
1252
1260
1261
+ /* Check that { }, [ ], ( ) are nested correctly */
1262
+ static void report_bad_nesting (char opening, int opening_lineno, char closing)
1263
+ {
1264
+ char buf[256 ];
1265
+ size_t used = 0 ;
1266
+
1267
+ used = snprintf (buf, sizeof (buf), " Unclosed '%c'" , opening);
1268
+
1269
+ if (opening_lineno != CG (zend_lineno)) {
1270
+ used += snprintf (buf + used, sizeof (buf) - used, " on line %d" , opening_lineno);
1271
+ }
1272
+
1273
+ if (closing) { /* 'closing' will be 0 if at end of file */
1274
+ used += snprintf (buf + used, sizeof (buf) - used, " does not match '%c'" , closing);
1275
+ }
1276
+
1277
+ zend_throw_exception (zend_ce_parse_error, buf, 0 );
1278
+ }
1279
+
1280
+ static void enter_nesting (char opening)
1281
+ {
1282
+ zend_nest_location nest_loc = {opening, CG (zend_lineno)};
1283
+ zend_stack_push (&SCNG (nest_location_stack), &nest_loc);
1284
+ }
1285
+
1286
+ static int exit_nesting (char closing)
1287
+ {
1288
+ if (zend_stack_is_empty (&SCNG (nest_location_stack))) {
1289
+ zend_throw_exception_ex (zend_ce_parse_error, 0 , " Unmatched '%c'" , closing);
1290
+ return -1 ;
1291
+ }
1292
+
1293
+ zend_nest_location *nest_loc = zend_stack_top (&SCNG (nest_location_stack));
1294
+ char opening = nest_loc->text ;
1295
+
1296
+ if ((opening == ' {' && closing != ' }' ) ||
1297
+ (opening == ' [' && closing != ' ]' ) ||
1298
+ (opening == ' (' && closing != ' )' )) {
1299
+ report_bad_nesting (opening, nest_loc->lineno , closing);
1300
+ return -1 ;
1301
+ }
1302
+
1303
+ zend_stack_del_top (&SCNG (nest_location_stack));
1304
+ return 0 ;
1305
+ }
1306
+
1307
+ static int check_nesting_at_end ()
1308
+ {
1309
+ if (!zend_stack_is_empty (&SCNG (nest_location_stack))) {
1310
+ zend_nest_location *nest_loc = zend_stack_top (&SCNG (nest_location_stack));
1311
+ report_bad_nesting (nest_loc->text , nest_loc->lineno , 0 );
1312
+ return -1 ;
1313
+ }
1314
+
1315
+ return 0 ;
1316
+ }
1317
+
1253
1318
#define PARSER_MODE () \
1254
1319
EXPECTED (elem != NULL )
1255
1320
@@ -1277,6 +1342,22 @@ static void copy_heredoc_label_stack(void *void_heredoc_label)
1277
1342
goto emit_token; \
1278
1343
} while (0 )
1279
1344
1345
+ #define RETURN_EXIT_NESTING_TOKEN (_token ) do { \
1346
+ if (exit_nesting (_token) && PARSER_MODE ()) { \
1347
+ RETURN_TOKEN (T_ERROR); \
1348
+ } else { \
1349
+ RETURN_TOKEN (_token); \
1350
+ } \
1351
+ } while (0 )
1352
+
1353
+ #define RETURN_END_TOKEN do { \
1354
+ if (check_nesting_at_end () && PARSER_MODE ()) { \
1355
+ RETURN_TOKEN (T_ERROR); \
1356
+ } else { \
1357
+ RETURN_TOKEN (END); \
1358
+ } \
1359
+ } while (0 )
1360
+
1280
1361
int ZEND_FASTCALL lex_scan (zval *zendlval, zend_parser_stack_elem *elem)
1281
1362
{
1282
1363
int token;
@@ -1297,7 +1378,7 @@ BNUM "0b"[01]+(_[01]+)*
1297
1378
LABEL [a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*
1298
1379
WHITESPACE [ \n\r\t]+
1299
1380
TABS_AND_SPACES [ \t]*
1300
- TOKENS [;:,.\[\]() |^&+-/*=%!~$<>?@]
1381
+ TOKENS [;:,.|^&+-/*=%!~$<>?@]
1301
1382
ANY_CHAR [^]
1302
1383
NEWLINE ("\r"|"\n"|"\r\n")
1303
1384
@@ -1770,29 +1851,40 @@ NEWLINE ("\r"|"\n"|"\r\n")
1770
1851
RETURN_TOKEN (T_SR);
1771
1852
}
1772
1853
1854
+ <ST_IN_SCRIPTING>" ]" |" )" {
1855
+ /* Check that ] and ) match up properly with a preceding [ or ( */
1856
+ RETURN_EXIT_NESTING_TOKEN (yytext[0 ]);
1857
+ }
1858
+
1859
+ <ST_IN_SCRIPTING>" [" |" (" {
1860
+ enter_nesting (yytext[0 ]);
1861
+ RETURN_TOKEN (yytext[0 ]);
1862
+ }
1863
+
1773
1864
<ST_IN_SCRIPTING>{TOKENS} {
1774
1865
RETURN_TOKEN (yytext[0 ]);
1775
1866
}
1776
1867
1777
1868
1778
1869
<ST_IN_SCRIPTING>" {" {
1779
1870
yy_push_state (ST_IN_SCRIPTING);
1871
+ enter_nesting (' {' );
1780
1872
RETURN_TOKEN (' {' );
1781
1873
}
1782
1874
1783
1875
1784
1876
<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>" ${" {
1785
1877
yy_push_state (ST_LOOKING_FOR_VARNAME);
1878
+ enter_nesting (' {' );
1786
1879
RETURN_TOKEN (T_DOLLAR_OPEN_CURLY_BRACES);
1787
1880
}
1788
1881
1789
-
1790
1882
<ST_IN_SCRIPTING>" }" {
1791
1883
RESET_DOC_COMMENT ();
1792
1884
if (!zend_stack_is_empty (&SCNG (state_stack))) {
1793
1885
yy_pop_state ();
1794
1886
}
1795
- RETURN_TOKEN (' }' );
1887
+ RETURN_EXIT_NESTING_TOKEN (' }' );
1796
1888
}
1797
1889
1798
1890
@@ -2088,7 +2180,7 @@ string:
2088
2180
2089
2181
<INITIAL>{ANY_CHAR} {
2090
2182
if (YYCURSOR > YYLIMIT) {
2091
- RETURN_TOKEN (END) ;
2183
+ RETURN_END_TOKEN ;
2092
2184
}
2093
2185
2094
2186
inline_char_handler:
@@ -2165,7 +2257,7 @@ inline_char_handler:
2165
2257
RETURN_TOKEN (' ]' );
2166
2258
}
2167
2259
2168
- <ST_VAR_OFFSET>{TOKENS}|[{}" `] {
2260
+ <ST_VAR_OFFSET>{TOKENS}|[[() {}" `] {
2169
2261
/* Only '[' or '-' can be valid, but returning other tokens will allow a more explicit parse error */
2170
2262
RETURN_TOKEN(yytext[0]);
2171
2263
}
@@ -2569,6 +2661,7 @@ skip_escape_conversion:
2569
2661
<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"{$" {
2570
2662
yy_push_state(ST_IN_SCRIPTING);
2571
2663
yyless(1);
2664
+ enter_nesting(' {' );
2572
2665
RETURN_TOKEN(T_CURLY_OPEN);
2573
2666
}
2574
2667
@@ -2593,7 +2686,7 @@ skip_escape_conversion:
2593
2686
}
2594
2687
2595
2688
if (YYCURSOR > YYLIMIT) {
2596
- RETURN_TOKEN(END) ;
2689
+ RETURN_END_TOKEN ;
2597
2690
}
2598
2691
if (yytext[0] == '\\ ' && YYCURSOR < YYLIMIT) {
2599
2692
YYCURSOR++;
@@ -2640,7 +2733,7 @@ double_quotes_scan_done:
2640
2733
2641
2734
<ST_BACKQUOTE>{ANY_CHAR} {
2642
2735
if (YYCURSOR > YYLIMIT) {
2643
- RETURN_TOKEN(END) ;
2736
+ RETURN_END_TOKEN ;
2644
2737
}
2645
2738
if (yytext[0] == '\\ ' && YYCURSOR < YYLIMIT) {
2646
2739
YYCURSOR++;
@@ -2689,7 +2782,7 @@ double_quotes_scan_done:
2689
2782
int newline = 0, indentation = 0, spacing = 0;
2690
2783
2691
2784
if (YYCURSOR > YYLIMIT) {
2692
- RETURN_TOKEN(END) ;
2785
+ RETURN_END_TOKEN ;
2693
2786
}
2694
2787
2695
2788
YYCURSOR--;
@@ -2813,7 +2906,7 @@ heredoc_scan_done:
2813
2906
int newline = 0, indentation = 0, spacing = -1;
2814
2907
2815
2908
if (YYCURSOR > YYLIMIT) {
2816
- RETURN_TOKEN(END) ;
2909
+ RETURN_END_TOKEN ;
2817
2910
}
2818
2911
2819
2912
YYCURSOR--;
@@ -2901,7 +2994,7 @@ nowdoc_scan_done:
2901
2994
2902
2995
<ST_IN_SCRIPTING,ST_VAR_OFFSET>{ANY_CHAR} {
2903
2996
if (YYCURSOR > YYLIMIT) {
2904
- RETURN_TOKEN(END) ;
2997
+ RETURN_END_TOKEN ;
2905
2998
}
2906
2999
2907
3000
RETURN_TOKEN(T_BAD_CHARACTER);
0 commit comments