[Proposal] Warn about the loss of precision in binary literals

TysonAndre · TysonAndre · commit 302de7f33574 · 2020-08-01T18:06:27.000-04:00
Emit an E_COMPILE_WARNING if these are seen in hexadecimal, octal, or binary literals. - E_COMPILE_WARNING is also emitted for "Octal escape sequence overflow" but it's been long enough to consider changing that. See GH-4758. - Making this proposal suddenly a ParseError in php 8.1 seems too soon. I expect this to behave the same on 32-bit and 64-bit builds (floats are 64 bits on both) For example, `0xffff_ffff_ffff_f400` overflows and becomes the **float** `0xffff_ffff_ffff_f000`. This PR will warn about that. Instead of using `0xffff_ffff_ffff_f400` with binary bitwise operands, most code should use the signed 64-bit int `~0xbff`. - E.g. PHP code ported from cryptography algorithms or other C code doing bitwise operations.
diff --git a/Zend/tests/binary.phpt b/Zend/tests/binary.phpt
@@ -79,7 +79,16 @@ var_dump(-0b1111111111111111111111111111111111111111111111111111111111111111);
 var_dump(-0b111111111111111111111111111111111111111111111111111111111111111);
 var_dump(-0b11111111111111111111111111111111111111111111111111111111111111);
 var_dump(-0b1);
---EXPECT--
+--EXPECTF--
+Warning: Saw imprecise float binary literal - the last 11 non-zero bits were truncated in %sbinary.php on line 66
+
+Warning: Saw imprecise float binary literal - the last 11 non-zero bits were truncated in %sbinary.php on line 67
+
+Warning: Saw imprecise float binary literal - the last 12 non-zero bits were truncated in %sbinary.php on line 68
+
+Warning: Saw imprecise float binary literal - the last 12 non-zero bits were truncated in %sbinary.php on line 69
+
+Warning: Saw imprecise float binary literal - the last 11 non-zero bits were truncated in %sbinary.php on line 71
 int(1)
 int(3)
 int(7)
@@ -151,4 +160,4 @@ float(3.68934881474191E+19)
 float(-1.844674407370955E+19)
 int(-9223372036854775807)
 int(-4611686018427387903)
-int(-1)
+int(-1)
diff --git a/Zend/tests/binary_overflow_number.phpt b/Zend/tests/binary_overflow_number.phpt
@@ -0,0 +1,42 @@
+--TEST--
+Octal overflow in numeric literal warning
+--FILE--
+<?php
+// rounding down
+var_dump(eval('return 0b1011111111111111111111111111111111111111111111111111100000000000;'));
+var_dump(eval('return 0b1011111111111111111111111111111111111111111111111111100000000001;'));
+// rounding up
+var_dump(eval('return 0b1011111111111111111111111111111111111111111111111111111111111111;'));
+var_dump(eval('return 0b1011111111111111111111111111111111111111111111111111110000000000;'));
+var_dump(eval('return 0b1011111111111111111111111111111111111111111111111111111000000000;'));
+// don't count _ or leading 0s
+var_dump(eval('return     0b111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_000_000_000_0;'));
+var_dump(eval('return 0b000_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_000_000_000_0;'));
+var_dump(eval('return 0b1111111111111111111111111111111111111111111111111111111111111111;'));
+var_dump(eval('return 0b1000000000000000000000000000000000000000000000000000010000000000;'));
+--EXPECTF--
+float(1.383505805528216E+19)
+
+Warning: Saw imprecise float binary literal - the last 11 non-zero bits were truncated in %sbinary_overflow_number.php(4) : eval()'d code on line 1
+float(1.383505805528216E+19)
+
+Warning: Saw imprecise float binary literal - the last 11 non-zero bits were truncated in %sbinary_overflow_number.php(6) : eval()'d code on line 1
+float(1.3835058055282164E+19)
+
+Warning: Saw imprecise float binary literal - the last 1 non-zero bits were truncated in %sbinary_overflow_number.php(7) : eval()'d code on line 1
+float(1.3835058055282164E+19)
+
+Warning: Saw imprecise float binary literal - the last 2 non-zero bits were truncated in %sbinary_overflow_number.php(8) : eval()'d code on line 1
+float(1.3835058055282164E+19)
+
+Warning: Saw imprecise float binary literal - the last 1 non-zero bits were truncated in %sbinary_overflow_number.php(10) : eval()'d code on line 1
+float(1.844674407370955E+19)
+
+Warning: Saw imprecise float binary literal - the last 1 non-zero bits were truncated in %sbinary_overflow_number.php(11) : eval()'d code on line 1
+float(1.844674407370955E+19)
+
+Warning: Saw imprecise float binary literal - the last 11 non-zero bits were truncated in %sbinary_overflow_number.php(12) : eval()'d code on line 1
+float(1.844674407370955E+19)
+
+Warning: Saw imprecise float binary literal - the last 1 non-zero bits were truncated in %sbinary_overflow_number.php(13) : eval()'d code on line 1
+float(9.223372036854775E+18)
diff --git a/Zend/tests/hex_overflow_number.phpt b/Zend/tests/hex_overflow_number.phpt
@@ -0,0 +1,39 @@
+--TEST--
+Hex overflow in numeric literal warning
+--FILE--
+<?php
+var_dump(eval('return 0xffff_ffff_ffff_f800;'));
+var_dump(eval('return 0xffff_ffff_ffff_fa00;'));
+var_dump(eval('return 0xffff_ffff_ffff_fb00;'));
+var_dump(eval('return 0xffff_ffff_ffff_ffff;'));
+var_dump(eval('return 0x1_ffff_ffff_ffff_ffff;'));
+var_dump(eval('return 0x3_ffff_ffff_ffff_ffff;'));
+var_dump(eval('return 0x5_ffff_ffff_ffff_ffff;'));
+var_dump(eval('return 0x8_ffff_ffff_ffff_ffff;'));
+var_dump(eval('return 0x0008_ffff_ffff_ffff_ffff;'));
+--EXPECTF--
+float(1.844674407370955E+19)
+
+Warning: Saw imprecise float hex literal - the last 2 non-zero bits were truncated in %shex_overflow_number.php(3) : eval()'d code on line 1
+float(1.844674407370955E+19)
+
+Warning: Saw imprecise float hex literal - the last 3 non-zero bits were truncated in %shex_overflow_number.php(4) : eval()'d code on line 1
+float(1.844674407370955E+19)
+
+Warning: Saw imprecise float hex literal - the last 11 non-zero bits were truncated in %shex_overflow_number.php(5) : eval()'d code on line 1
+float(1.8446744073709552E+19)
+
+Warning: Saw imprecise float hex literal - the last 12 non-zero bits were truncated in %shex_overflow_number.php(6) : eval()'d code on line 1
+float(3.6893488147419103E+19)
+
+Warning: Saw imprecise float hex literal - the last 13 non-zero bits were truncated in %shex_overflow_number.php(7) : eval()'d code on line 1
+float(7.378697629483821E+19)
+
+Warning: Saw imprecise float hex literal - the last 14 non-zero bits were truncated in %shex_overflow_number.php(8) : eval()'d code on line 1
+float(1.1068046444225731E+20)
+
+Warning: Saw imprecise float hex literal - the last 15 non-zero bits were truncated in %shex_overflow_number.php(9) : eval()'d code on line 1
+float(1.6602069666338596E+20)
+
+Warning: Saw imprecise float hex literal - the last 15 non-zero bits were truncated in %shex_overflow_number.php(10) : eval()'d code on line 1
+float(1.6602069666338596E+20)
diff --git a/Zend/tests/octal_overflow_number.phpt b/Zend/tests/octal_overflow_number.phpt
@@ -0,0 +1,27 @@
+--TEST--
+Octal overflow in numeric literal warning
+--FILE--
+<?php
+var_dump(eval('return     01777777777777777770000;'));
+var_dump(eval('return 0_00_1777777777777777770000;'));
+var_dump(eval('return     01777777777777777772000;'));
+var_dump(eval('return     01777777777777777774000;'));
+var_dump(eval('return     02777777777777777774000;'));
+var_dump(eval('return     07777777777777777774000;'));
+var_dump(eval('return 04_777_777_7777777777774000;'));
+--EXPECTF--
+float(1.8446744073709548E+19)
+float(1.8446744073709548E+19)
+
+Warning: Saw imprecise float octal literal - the last 1 non-zero bits were truncated in %soctal_overflow_number.php(4) : eval()'d code on line 1
+float(1.8446744073709548E+19)
+float(1.8446744073709552E+19)
+
+Warning: Saw imprecise float octal literal - the last 1 non-zero bits were truncated in %soctal_overflow_number.php(6) : eval()'d code on line 1
+float(2.7670116110564327E+19)
+
+Warning: Saw imprecise float octal literal - the last 2 non-zero bits were truncated in %soctal_overflow_number.php(7) : eval()'d code on line 1
+float(7.378697629483821E+19)
+
+Warning: Saw imprecise float octal literal - the last 5 non-zero bits were truncated in %soctal_overflow_number.php(8) : eval()'d code on line 1
+float(3.68934881474191E+20)
diff --git a/Zend/zend_language_scanner.l b/Zend/zend_language_scanner.l
@@ -135,6 +135,77 @@ static void strip_underscores(char *str, size_t *len)
 	*dest = '\0';
 }
 
+/* Get the number of bits in the representation of a hex literal. Precondition: *str represents a non-zero number that overflowed an int. */
+static int bits_in_hex_representation(const char *str, size_t len)
+{
+	size_t bits = len * 4;
+	const char *end = str + len - 1;
+	int last_digit;
+	while (*end == '0') {
+		bits -= 4;
+		end--;
+		ZEND_ASSERT(end >= str);
+	}
+	if ('0' <= *end && *end <= '9') {
+		last_digit = *end - '0';
+	} else if ('a' <= *end && *end <= 'f') {
+		last_digit = *end - 'a' + 10;
+	} else {
+		ZEND_ASSERT('A' <= *end && *end <= 'F');
+		last_digit = *end - 'A' + 10;
+	}
+	if ((last_digit & 1) == 0) {
+		bits--;
+		if ((last_digit & 2) == 0) {
+			bits--;
+			if ((last_digit & 4) == 0) {
+				bits--;
+			}
+		}
+	}
+	/* Check how many bits the first character started with */
+	if (*str < '2') {
+		bits -= 3;
+	} else if (*str < '4') {
+		bits -= 2;
+	} else if (*str < '8') {
+		bits -= 1;
+	}
+	return bits;
+}
+
+/* Get the number of bits in the representation of an octal literal. Precondition: *str represents a non-zero number that overflowed an int. */
+static size_t bits_in_octal_representation(const char *str, size_t len)
+{
+	size_t bits = len * 3;
+	const char *end = str + len - 1;
+	int last_digit;
+	while (*str == '0') {
+		bits -= 3;
+		str++;
+		ZEND_ASSERT(end >= str);
+	}
+	while (*end == '0') {
+		bits -= 3;
+		end--;
+		ZEND_ASSERT(end >= str);
+	}
+	last_digit = *end - '0';
+	if ((last_digit & 1) == 0) {
+		bits--;
+		if ((last_digit & 2) == 0) {
+			bits--;
+		}
+	}
+	if (*str < '2') {
+		bits -= 2;
+	} else if (*str < '4') {
+		bits -= 1;
+	}
+	return bits;
+}
+
+
 static size_t encoding_filter_script_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
 {
 	const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding();
@@ -1961,9 +2032,17 @@ NEWLINE ("\r"|"\n"|"\r\n")
 		}
 		RETURN_TOKEN_WITH_VAL(T_LNUMBER);
 	} else {
+		const char* last_one_bit = bin + len - 1;
+		while (*last_one_bit == '0') {
+			last_one_bit--;
+			ZEND_ASSERT(last_one_bit > bin);
+		}
 		ZVAL_DOUBLE(zendlval, zend_bin_strtod(bin, (const char **)&end));
 		/* errno isn't checked since we allow HUGE_VAL/INF overflow */
 		ZEND_ASSERT(end == bin + len);
+		if (last_one_bit - bin + 1> 53) {
+			zend_error(E_COMPILE_WARNING, "Saw imprecise float binary literal - the last %zu non-zero bits were truncated", (size_t)(last_one_bit - bin + 1 - 53));
+		}
 		if (contains_underscores) {
 			efree(bin);
 		}
@@ -1975,6 +2054,7 @@ NEWLINE ("\r"|"\n"|"\r\n")
 	size_t len = yyleng;
 	char *end, *lnum = yytext;
 	zend_bool is_octal = lnum[0] == '0';
+	zend_bool is_truncated = 0;
 	zend_bool contains_underscores = (memchr(lnum, '_', len) != NULL);
 
 	if (contains_underscores) {
@@ -1998,6 +2078,7 @@ NEWLINE ("\r"|"\n"|"\r\n")
 
 				/* Continue in order to determine if this is T_LNUMBER or T_DNUMBER. */
 				len = i;
+				is_truncated = 1;
 				break;
 			}
 		}
@@ -2016,6 +2097,12 @@ NEWLINE ("\r"|"\n"|"\r\n")
 			errno = 0;
 			if (is_octal) { /* octal overflow */
 				ZVAL_DOUBLE(zendlval, zend_oct_strtod(lnum, (const char **)&end));
+				if (!is_truncated) {
+					size_t bits_in_representation = bits_in_octal_representation(lnum, len);
+					if (bits_in_representation > 53) {
+						zend_error(E_COMPILE_WARNING, "Saw imprecise float octal literal - the last %zu non-zero bits were truncated", bits_in_representation - 53);
+					}
+				}
 			} else {
 				ZVAL_DOUBLE(zendlval, zend_strtod(lnum, (const char **)&end));
 			}
@@ -2066,9 +2153,13 @@ NEWLINE ("\r"|"\n"|"\r\n")
 		}
 		RETURN_TOKEN_WITH_VAL(T_LNUMBER);
 	} else {
+		size_t bits_in_representation = bits_in_hex_representation(hex, len);
 		ZVAL_DOUBLE(zendlval, zend_hex_strtod(hex, (const char **)&end));
 		/* errno isn't checked since we allow HUGE_VAL/INF overflow */
 		ZEND_ASSERT(end == hex + len);
+		if (bits_in_representation > 53) {
+			zend_error(E_COMPILE_WARNING, "Saw imprecise float hex literal - the last %zu non-zero bits were truncated", bits_in_representation - 53);
+		}
 		if (contains_underscores) {
 			efree(hex);
 		}
diff --git a/ext/standard/tests/strings/pack64.phpt b/ext/standard/tests/strings/pack64.phpt
@@ -32,7 +32,14 @@ print_r(unpack("q", pack("q", 0x8000000000000002)));
 print_r(unpack("q", pack("q", -1)));
 print_r(unpack("q", pack("q", 0x8000000000000000)));
 ?>
---EXPECT--
+--EXPECTF--
+Warning: Saw imprecise float hex literal - the last 10 non-zero bits were truncated in %spack64.php on line 4
+
+Warning: Saw imprecise float hex literal - the last 10 non-zero bits were truncated in %spack64.php on line 10
+
+Warning: Saw imprecise float hex literal - the last 10 non-zero bits were truncated in %spack64.php on line 16
+
+Warning: Saw imprecise float hex literal - the last 10 non-zero bits were truncated in %spack64.php on line 22
 Array
 (
     [1] => 281474976710654
@@ -112,4 +119,4 @@ Array
 Array
 (
     [1] => -9223372036854775808
-)
+)
diff --git a/ext/tokenizer/tests/invalid_octal_dnumber.phpt b/ext/tokenizer/tests/invalid_octal_dnumber.phpt
@@ -4,7 +4,16 @@ Invalid octal number that overflows to double
 <?php if (!extension_loaded("tokenizer")) print "skip tokenizer extension not enabled"; ?>
 --FILE--
 <?php
-echo token_name(token_get_all('<?php 0177777777777777777777787')[1][0]), "\n";
+$token = token_get_all('<?php 0177777777777777777777787')[1];
+echo token_name($token[0]), "\n";
+echo $token[1], "\n";
+// The tokenizer should only warn about lost precision for octal literals when valid
+$token = token_get_all('<?php 0177777777777777777777777')[1];
+echo $token[1], "\n";
 ?>
---EXPECT--
+--EXPECTF--
 T_DNUMBER
+0177777777777777777777787
+
+Warning: Saw imprecise float octal literal - the last 17 non-zero bits were truncated in %sinvalid_octal_dnumber.php on line 6
+0177777777777777777777777