Skip to content

Commit bdeda95

Browse files
cor3ntinAaronBallman
authored andcommitted
Make wide multi-character character literals ill-formed
This implements P2362, which has not yet been approved by the C++ committee, but because wide-multi character literals are implementation defined, clang might not have to wait for WG21. This change is also being applied in C mode as the behavior is implementation-defined in C as well and there's no benefit to having different rules between the languages. The other part of P2362, making non-representable character literals ill-formed, is already implemented by clang
1 parent c7aacce commit bdeda95

File tree

9 files changed

+26
-41
lines changed

9 files changed

+26
-41
lines changed

clang/docs/ReleaseNotes.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,9 @@ Windows Support
100100
C Language Changes in Clang
101101
---------------------------
102102

103-
- ...
103+
- Wide multi-characters literals such as ``L'ab'`` that would previously be interpreted as ``L'b'``
104+
are now ill-formed in all language modes. The motivation for this change is outlined in
105+
`P2362 <wg21.link/P2362>`_.
104106

105107
C++ Language Changes in Clang
106108
-----------------------------

clang/include/clang/Basic/DiagnosticLexKinds.td

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -183,12 +183,10 @@ def warn_c2x_compat_digit_separator : Warning<
183183
InGroup<CPre2xCompat>, DefaultIgnore;
184184
def err_digit_separator_not_between_digits : Error<
185185
"digit separator cannot appear at %select{start|end}0 of digit sequence">;
186-
def warn_extraneous_char_constant : Warning<
187-
"extraneous characters in character constant ignored">;
188186
def warn_char_constant_too_large : Warning<
189187
"character constant too long for its type">;
190-
def err_multichar_utf_character_literal : Error<
191-
"Unicode character literals may not contain multiple characters">;
188+
def err_multichar_character_literal : Error<
189+
"%select{wide|Unicode}0 character literals may not contain multiple characters">;
192190
def err_exponent_has_no_digits : Error<"exponent has no digits">;
193191
def err_hex_constant_requires : Error<
194192
"hexadecimal floating %select{constant|literal}0 requires "

clang/lib/Lex/LiteralSupport.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1390,14 +1390,14 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
13901390
unsigned NumCharsSoFar = buffer_begin - &codepoint_buffer.front();
13911391

13921392
if (NumCharsSoFar > 1) {
1393-
if (isWide())
1394-
PP.Diag(Loc, diag::warn_extraneous_char_constant);
1395-
else if (isAscii() && NumCharsSoFar == 4)
1393+
if (isAscii() && NumCharsSoFar == 4)
13961394
PP.Diag(Loc, diag::warn_four_char_character_literal);
13971395
else if (isAscii())
13981396
PP.Diag(Loc, diag::warn_multichar_character_literal);
1399-
else
1400-
PP.Diag(Loc, diag::err_multichar_utf_character_literal);
1397+
else {
1398+
PP.Diag(Loc, diag::err_multichar_character_literal) << (isWide() ? 0 : 1);
1399+
HadError = true;
1400+
}
14011401
IsMultiChar = true;
14021402
} else {
14031403
IsMultiChar = false;

clang/test/CodeGen/char-literal.c

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
// RUN: %clang_cc1 -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -check-prefix=CHECK-C %s
2-
// RUN: %clang_cc1 -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -check-prefix=CHECK-C %s
32
// RUN: %clang_cc1 -x c++ -std=c++11 -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -check-prefix=CHECK-CPP0X %s
43

54
#include <stddef.h>
@@ -33,11 +32,6 @@ int main() {
3332
// CHECK-CPP0X: store i32 97
3433
wchar_t wa = L'a';
3534

36-
// Should pick second character.
37-
// CHECK-C: store i32 98
38-
// CHECK-CPP0X: store i32 98
39-
wchar_t wb = L'ab';
40-
4135
#if __cplusplus >= 201103L
4236
// CHECK-CPP0X: store i16 97
4337
char16_t ua = u'a';
@@ -83,8 +77,4 @@ int main() {
8377
char32_t Ud = U'\U0010F00B';
8478
#endif
8579

86-
// Should pick second character.
87-
// CHECK-C: store i32 1110027
88-
// CHECK-CPP0X: store i32 1110027
89-
wchar_t we = L'\u1234\U0010F00B';
9080
}

clang/test/CodeGen/string-literal-short-wstring.c

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
1-
// RUN: %clang_cc1 -x c++ -triple %itanium_abi_triple -emit-llvm -fwchar-type=short -fno-signed-wchar %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=ITANIUM
2-
// RUN: %clang_cc1 -x c++ -triple %ms_abi_triple -emit-llvm -fwchar-type=short -fno-signed-wchar %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=MSABI
3-
// Runs in c++ mode so that wchar_t is available.
1+
// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm -fwchar-type=short -fno-signed-wchar %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=ITANIUM
2+
// RUN: %clang_cc1 -triple %ms_abi_triple -emit-llvm -fwchar-type=short -fno-signed-wchar %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=MSABI
3+
4+
// Run in C mode as wide multichar literals are not valid in C++
45

56
// XFAIL: hexagon
67
// Hexagon aligns arrays of size 8+ bytes to a 64-bit boundary, which fails
78
// the first check line with "align 1".
89

10+
typedef __WCHAR_TYPE__ wchar_t;
11+
912
int main() {
1013
// This should convert to utf8.
1114
// CHECK: private unnamed_addr constant [10 x i8] c"\E1\84\A0\C8\A0\F4\82\80\B0\00", align 1
@@ -20,19 +23,13 @@ int main() {
2023
// MSABI: linkonce_odr dso_local unnamed_addr constant [5 x i16] [i16 4384, i16 544, i16 -9272, i16 -9168, i16 0]
2124
const wchar_t *bar = L"\u1120\u0220\U00102030";
2225

23-
24-
2526
// Should pick second character.
2627
// CHECK: store i8 98
2728
char c = 'ab';
2829

2930
// CHECK: store i16 97
3031
wchar_t wa = L'a';
3132

32-
// Should pick second character.
33-
// CHECK: store i16 98
34-
wchar_t wb = L'ab';
35-
3633
// -4085 == 0xf00b
3734
// CHECK: store i16 -4085
3835
wchar_t wc = L'\uF00B';

clang/test/Lexer/char-literal.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ auto f = '\xE2\x8C\x98'; // expected-warning {{multi-character character constan
2121
char16_t g = u'ab'; // expected-error {{Unicode character literals may not contain multiple characters}}
2222
char16_t h = u'\U0010FFFD'; // expected-error {{character too large for enclosing character literal type}}
2323

24-
wchar_t i = L'ab'; // expected-warning {{extraneous characters in character constant ignored}}
24+
wchar_t i = L'ab'; // expected-error {{wide character literals may not contain multiple characters}}
25+
2526
wchar_t j = L'\U0010FFFD';
2627

2728
char32_t k = U'\U0010FFFD';

clang/test/Lexer/wchar.c

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,8 @@
33
void f() {
44
(void)L"\U00010000"; // unicode escape produces UTF-16 sequence, so no warning
55

6-
(void)L'\U00010000'; // expected-error {{character too large for enclosing character literal type}}
6+
(void)L'ab'; // expected-error {{wide character literals may not contain multiple characters}}
77

8-
(void)L'ab'; // expected-warning {{extraneous characters in character constant ignored}}
9-
10-
(void)L'a\u1000'; // expected-warning {{extraneous characters in character constant ignored}}
8+
(void)L'a\u1000'; // expected-error {{wide character literals may not contain multiple characters}}
119
}
1210

clang/test/Misc/warning-flags.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ This test serves two purposes:
1818

1919
The list of warnings below should NEVER grow. It should gradually shrink to 0.
2020

21-
CHECK: Warnings without flags (68):
21+
CHECK: Warnings without flags (67):
2222

2323
CHECK-NEXT: ext_expected_semi_decl_list
2424
CHECK-NEXT: ext_explicit_specialization_storage_class
@@ -50,7 +50,6 @@ CHECK-NEXT: warn_drv_pch_not_first_include
5050
CHECK-NEXT: warn_dup_category_def
5151
CHECK-NEXT: warn_enum_value_overflow
5252
CHECK-NEXT: warn_expected_qualified_after_typename
53-
CHECK-NEXT: warn_extraneous_char_constant
5453
CHECK-NEXT: warn_fe_backend_unsupported
5554
CHECK-NEXT: warn_fe_cc_log_diagnostics_failure
5655
CHECK-NEXT: warn_fe_cc_print_header_failure

clang/test/Preprocessor/Weverything_pragma.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,21 +10,21 @@ void foo(void) // expected-warning {{no previous prototype for function}}
1010
// expected-note@-1{{declare 'static' if the function is not intended to be used outside of this translation unit}}
1111
{
1212
// A diagnostic without DefaultIgnore, and not part of a group.
13-
(void) L'ab'; // expected-warning {{extraneous characters in character constant ignored}}
13+
(void) 'ab'; // expected-warning {{multi-character character constant}}
1414

1515
#pragma clang diagnostic warning "-Weverything" // Should not change anyhting.
1616
#define UNUSED_MACRO2 1 // expected-warning{{macro is not used}}
17-
(void) L'cd'; // expected-warning {{extraneous characters in character constant ignored}}
17+
(void) 'cd'; // expected-warning {{multi-character character constant}}
1818

1919
#pragma clang diagnostic ignored "-Weverything" // Ignore warnings now.
2020
#define UNUSED_MACRO2 1 // no warning
21-
(void) L'ef'; // no warning here
21+
(void) 'ef'; // no warning here
2222

2323
#pragma clang diagnostic warning "-Weverything" // Revert back to warnings.
2424
#define UNUSED_MACRO3 1 // expected-warning{{macro is not used}}
25-
(void) L'gh'; // expected-warning {{extraneous characters in character constant ignored}}
25+
(void) 'gh'; // expected-warning {{multi-character character constant}}
2626

2727
#pragma clang diagnostic error "-Weverything" // Give errors now.
2828
#define UNUSED_MACRO4 1 // expected-error{{macro is not used}}
29-
(void) L'ij'; // expected-error {{extraneous characters in character constant ignored}}
29+
(void) 'ij'; // expected-error {{multi-character character constant}}
3030
}

0 commit comments

Comments
 (0)