Skip to content

Commit 47cf18b

Browse files
committed
Don't include trailing newline in comment token
Don't include a trailing newline in T_COMMENT tokens, instead leave it for a following T_WHITESPACE token. The newline does not belong to the comment logically, and this makes for an ugly special case, as other tokens do not include trailing newlines. Whitespace-sensitive tooling will want to either forward or backward emulate this change. Closes GH-5182.
1 parent e12b9df commit 47cf18b

File tree

4 files changed

+116
-94
lines changed

4 files changed

+116
-94
lines changed

UPGRADING

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -470,6 +470,12 @@ PHP 8.0 UPGRADE NOTES
470470
. The $use_include_path parameter, which was not used internally, has been
471471
removed from tidy_repair_string().
472472

473+
- Tokenizer:
474+
. T_COMMENT tokens will no longer include a trailing newline. The newline will
475+
instead be part of a following T_WHITESPACE token. It should be noted that
476+
T_COMMENT is not always followed by whitespace, it may also be followed by
477+
T_CLOSE_TAG or end-of-file.
478+
473479
- XML:
474480
. xml_parser_create(_ns) will now return an XmlParser object rather than a
475481
resource. Return value checks using is_resource() should be replaced with

Zend/zend_language_scanner.l

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2293,12 +2293,8 @@ inline_char_handler:
22932293
while (YYCURSOR < YYLIMIT) {
22942294
switch (*YYCURSOR++) {
22952295
case '\r':
2296-
if (*YYCURSOR == '\n') {
2297-
YYCURSOR++;
2298-
}
2299-
/* fall through */
23002296
case '\n':
2301-
CG(zend_lineno)++;
2297+
YYCURSOR--;
23022298
break;
23032299
case '?':
23042300
if (*YYCURSOR == '>') {

ext/tokenizer/tests/PhpToken_methods.phpt

Lines changed: 34 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -22,50 +22,51 @@ foreach ($tokens as $i => $token) {
2222
}
2323

2424
// is() variations
25+
$token = $tokens[5];
2526

2627
echo "\nSuccess:\n";
27-
var_dump($tokens[4]->is(T_FUNCTION));
28-
var_dump($tokens[4]->is('function'));
29-
var_dump($tokens[4]->is(['class', T_FUNCTION]));
30-
var_dump($tokens[4]->is([T_CLASS, 'function']));
28+
var_dump($token->is(T_FUNCTION));
29+
var_dump($token->is('function'));
30+
var_dump($token->is(['class', T_FUNCTION]));
31+
var_dump($token->is([T_CLASS, 'function']));
3132

3233
echo "\nFailure:\n";
33-
var_dump($tokens[4]->is(T_CLASS));
34-
var_dump($tokens[4]->is('class'));
35-
var_dump($tokens[4]->is(['class', T_TRAIT]));
36-
var_dump($tokens[4]->is([T_CLASS, 'trait']));
34+
var_dump($token->is(T_CLASS));
35+
var_dump($token->is('class'));
36+
var_dump($token->is(['class', T_TRAIT]));
37+
var_dump($token->is([T_CLASS, 'trait']));
3738

3839
echo "\nError:\n";
3940
try {
40-
$tokens[4]->is(3.141);
41+
$token->is(3.141);
4142
} catch (TypeError $e) {
4243
echo $e->getMessage(), "\n";
4344
}
4445
try {
45-
$tokens[4]->is([3.141]);
46+
$token->is([3.141]);
4647
} catch (TypeError $e) {
4748
echo $e->getMessage(), "\n";
4849
}
4950

50-
unset($tokens[4]->id);
51-
unset($tokens[4]->text);
51+
unset($token->id);
52+
unset($token->text);
5253
try {
53-
$tokens[4]->is(T_FUNCTION);
54+
$token->is(T_FUNCTION);
5455
} catch (Error $e) {
5556
echo $e->getMessage(), "\n";
5657
}
5758
try {
58-
$tokens[4]->is('function');
59+
$token->is('function');
5960
} catch (Error $e) {
6061
echo $e->getMessage(), "\n";
6162
}
6263
try {
63-
$tokens[4]->is([T_FUNCTION]);
64+
$token->is([T_FUNCTION]);
6465
} catch (Error $e) {
6566
echo $e->getMessage(), "\n";
6667
}
6768
try {
68-
$tokens[4]->is(['function']);
69+
$token->is(['function']);
6970
} catch (Error $e) {
7071
echo $e->getMessage(), "\n";
7172
}
@@ -78,22 +79,23 @@ var_dump($token->getTokenName());
7879
--EXPECT--
7980
[ 0] T_OPEN_TAG ignorable
8081
[ 1] T_COMMENT ignorable
81-
[ 2] T_DOC_COMMENT ignorable
82-
[ 3] T_WHITESPACE ignorable
83-
[ 4] T_FUNCTION meaningful
84-
[ 5] T_WHITESPACE ignorable
85-
[ 6] T_STRING meaningful
86-
[ 7] ( meaningful
87-
[ 8] ) meaningful
88-
[ 9] T_WHITESPACE ignorable
89-
[10] { meaningful
90-
[11] T_WHITESPACE ignorable
91-
[12] T_ECHO meaningful
92-
[13] T_WHITESPACE ignorable
93-
[14] T_CONSTANT_ENCAPSED_STRING meaningful
94-
[15] ; meaningful
95-
[16] T_WHITESPACE ignorable
96-
[17] } meaningful
82+
[ 2] T_WHITESPACE ignorable
83+
[ 3] T_DOC_COMMENT ignorable
84+
[ 4] T_WHITESPACE ignorable
85+
[ 5] T_FUNCTION meaningful
86+
[ 6] T_WHITESPACE ignorable
87+
[ 7] T_STRING meaningful
88+
[ 8] ( meaningful
89+
[ 9] ) meaningful
90+
[10] T_WHITESPACE ignorable
91+
[11] { meaningful
92+
[12] T_WHITESPACE ignorable
93+
[13] T_ECHO meaningful
94+
[14] T_WHITESPACE ignorable
95+
[15] T_CONSTANT_ENCAPSED_STRING meaningful
96+
[16] ; meaningful
97+
[17] T_WHITESPACE ignorable
98+
[18] } meaningful
9799

98100
Success:
99101
bool(true)

0 commit comments

Comments
 (0)