Skip to content

Commit 2ace7bd

Browse files
authored
[Clang] allow ` @$ `` in raw string delimiters in C++26 (#93216)
And as an extension in older language modes. Per https://eel.is/c++draft/lex.string#nt:d-char Fixes #93130
1 parent 8995ccc commit 2ace7bd

File tree

6 files changed

+49
-18
lines changed

6 files changed

+49
-18
lines changed

clang/docs/ReleaseNotes.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -802,6 +802,7 @@ Bug Fixes to C++ Support
802802
- Fixed a regression introduced in Clang 18 causing a static function overloading a non-static function
803803
with the same parameters not to be diagnosed. (Fixes #GH93456).
804804
- Clang now diagnoses unexpanded parameter packs in attributes. (Fixes #GH93269).
805+
- Clang now allows ``@$``` in raw string literals. Fixes (#GH93130).
805806

806807
Bug Fixes to AST Handling
807808
^^^^^^^^^^^^^^^^^^^^^^^^^

clang/include/clang/Basic/CharInfo.h

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,7 @@ namespace charinfo {
2828
CHAR_LOWER = 0x0040, // a-z
2929
CHAR_UNDER = 0x0080, // _
3030
CHAR_PERIOD = 0x0100, // .
31-
CHAR_RAWDEL = 0x0200, // {}[]#<>%:;?*+-/^&|~!=,"'
32-
CHAR_PUNCT = 0x0400 // `$@()
31+
CHAR_PUNCT = 0x0200, // {}[]#<>%:;?*+-/^&|~!=,"'`$@()
3332
};
3433

3534
enum {
@@ -152,16 +151,17 @@ LLVM_READONLY inline bool isHexDigit(unsigned char c) {
152151
/// Note that '_' is both a punctuation character and an identifier character!
153152
LLVM_READONLY inline bool isPunctuation(unsigned char c) {
154153
using namespace charinfo;
155-
return (InfoTable[c] & (CHAR_UNDER|CHAR_PERIOD|CHAR_RAWDEL|CHAR_PUNCT)) != 0;
154+
return (InfoTable[c] &
155+
(CHAR_UNDER | CHAR_PERIOD | CHAR_PUNCT | CHAR_PUNCT)) != 0;
156156
}
157157

158158
/// Return true if this character is an ASCII printable character; that is, a
159159
/// character that should take exactly one column to print in a fixed-width
160160
/// terminal.
161161
LLVM_READONLY inline bool isPrintable(unsigned char c) {
162162
using namespace charinfo;
163-
return (InfoTable[c] & (CHAR_UPPER|CHAR_LOWER|CHAR_PERIOD|CHAR_PUNCT|
164-
CHAR_DIGIT|CHAR_UNDER|CHAR_RAWDEL|CHAR_SPACE)) != 0;
163+
return (InfoTable[c] & (CHAR_UPPER | CHAR_LOWER | CHAR_PERIOD | CHAR_PUNCT |
164+
CHAR_DIGIT | CHAR_UNDER | CHAR_SPACE)) != 0;
165165
}
166166

167167
/// Return true if this is the body character of a C preprocessing number,
@@ -175,8 +175,9 @@ LLVM_READONLY inline bool isPreprocessingNumberBody(unsigned char c) {
175175
/// Return true if this is the body character of a C++ raw string delimiter.
176176
LLVM_READONLY inline bool isRawStringDelimBody(unsigned char c) {
177177
using namespace charinfo;
178-
return (InfoTable[c] & (CHAR_UPPER|CHAR_LOWER|CHAR_PERIOD|
179-
CHAR_DIGIT|CHAR_UNDER|CHAR_RAWDEL)) != 0;
178+
return (InfoTable[c] & (CHAR_UPPER | CHAR_LOWER | CHAR_PERIOD | CHAR_DIGIT |
179+
CHAR_UNDER | CHAR_PUNCT)) != 0 &&
180+
c != '(' && c != ')';
180181
}
181182

182183
enum class EscapeChar {

clang/include/clang/Basic/DiagnosticLexKinds.td

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,14 @@ def warn_cxx98_compat_raw_string_literal : Warning<
111111
"raw string literals are incompatible with C++98">,
112112
InGroup<CXX98Compat>, DefaultIgnore;
113113

114+
def warn_cxx26_compat_raw_string_literal_character_set : Warning<
115+
" '%0' in a raw string literal delimiter is incompatible "
116+
"with standards before C++2c">,
117+
InGroup<CXXPre26Compat>, DefaultIgnore;
118+
def ext_cxx26_raw_string_literal_character_set : Extension<
119+
" '%0' in a raw string literal delimiter is a C++2c extension">,
120+
InGroup<CXX26>, DefaultIgnore;
121+
114122
def warn_multichar_character_literal : Warning<
115123
"multi-character character constant">, InGroup<MultiChar>;
116124
def warn_four_char_character_literal : Warning<

clang/lib/Basic/CharInfo.cpp

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -31,20 +31,20 @@ const uint16_t clang::charinfo::InfoTable[256] = {
3131
0 , 0 , 0 , 0 ,
3232
//32 SP 33 ! 34 " 35 #
3333
//36 $ 37 % 38 & 39 '
34-
CHAR_SPACE , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL ,
35-
CHAR_PUNCT , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL ,
34+
CHAR_SPACE , CHAR_PUNCT , CHAR_PUNCT , CHAR_PUNCT ,
35+
CHAR_PUNCT , CHAR_PUNCT , CHAR_PUNCT , CHAR_PUNCT ,
3636
//40 ( 41 ) 42 * 43 +
3737
//44 , 45 - 46 . 47 /
38-
CHAR_PUNCT , CHAR_PUNCT , CHAR_RAWDEL , CHAR_RAWDEL ,
39-
CHAR_RAWDEL , CHAR_RAWDEL , CHAR_PERIOD , CHAR_RAWDEL ,
38+
CHAR_PUNCT , CHAR_PUNCT , CHAR_PUNCT , CHAR_PUNCT ,
39+
CHAR_PUNCT , CHAR_PUNCT , CHAR_PERIOD , CHAR_PUNCT ,
4040
//48 0 49 1 50 2 51 3
4141
//52 4 53 5 54 6 55 7
4242
CHAR_DIGIT , CHAR_DIGIT , CHAR_DIGIT , CHAR_DIGIT ,
4343
CHAR_DIGIT , CHAR_DIGIT , CHAR_DIGIT , CHAR_DIGIT ,
4444
//56 8 57 9 58 : 59 ;
4545
//60 < 61 = 62 > 63 ?
46-
CHAR_DIGIT , CHAR_DIGIT , CHAR_RAWDEL , CHAR_RAWDEL ,
47-
CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL ,
46+
CHAR_DIGIT , CHAR_DIGIT , CHAR_PUNCT , CHAR_PUNCT ,
47+
CHAR_PUNCT , CHAR_PUNCT , CHAR_PUNCT , CHAR_PUNCT ,
4848
//64 @ 65 A 66 B 67 C
4949
//68 D 69 E 70 F 71 G
5050
CHAR_PUNCT , CHAR_XUPPER , CHAR_XUPPER , CHAR_XUPPER ,
@@ -59,8 +59,8 @@ const uint16_t clang::charinfo::InfoTable[256] = {
5959
CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , CHAR_UPPER ,
6060
//88 X 89 Y 90 Z 91 [
6161
//92 \ 93 ] 94 ^ 95 _
62-
CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , CHAR_RAWDEL ,
63-
CHAR_PUNCT , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_UNDER ,
62+
CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , CHAR_PUNCT ,
63+
CHAR_PUNCT , CHAR_PUNCT , CHAR_PUNCT , CHAR_UNDER ,
6464
//96 ` 97 a 98 b 99 c
6565
//100 d 101 e 102 f 103 g
6666
CHAR_PUNCT , CHAR_XLOWER , CHAR_XLOWER , CHAR_XLOWER ,
@@ -75,6 +75,6 @@ const uint16_t clang::charinfo::InfoTable[256] = {
7575
CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , CHAR_LOWER ,
7676
//120 x 121 y 122 z 123 {
7777
//124 | 125 } 126 ~ 127 DEL
78-
CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , CHAR_RAWDEL ,
79-
CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL , 0
78+
CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , CHAR_PUNCT ,
79+
CHAR_PUNCT , CHAR_PUNCT , CHAR_PUNCT , 0
8080
};

clang/lib/Lex/Lexer.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2261,8 +2261,17 @@ bool Lexer::LexRawStringLiteral(Token &Result, const char *CurPtr,
22612261

22622262
unsigned PrefixLen = 0;
22632263

2264-
while (PrefixLen != 16 && isRawStringDelimBody(CurPtr[PrefixLen]))
2264+
while (PrefixLen != 16 && isRawStringDelimBody(CurPtr[PrefixLen])) {
22652265
++PrefixLen;
2266+
if (!isLexingRawMode() &&
2267+
llvm::is_contained({'$', '@', '`'}, CurPtr[PrefixLen])) {
2268+
const char *Pos = &CurPtr[PrefixLen];
2269+
Diag(Pos, LangOpts.CPlusPlus26
2270+
? diag::warn_cxx26_compat_raw_string_literal_character_set
2271+
: diag::ext_cxx26_raw_string_literal_character_set)
2272+
<< StringRef(Pos, 1);
2273+
}
2274+
}
22662275

22672276
// If the last character was not a '(', then we didn't lex a valid delimiter.
22682277
if (CurPtr[PrefixLen] != '(') {
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify -Wc++26-extensions %s
2+
// RUN: %clang_cc1 -std=c++2c -fsyntax-only -verify=cxx26 -Wpre-c++26-compat %s
3+
4+
int main() {
5+
(void) R"abc`@$(foobar)abc`@$";
6+
//expected-warning@-1 {{'`' in a raw string literal delimiter is a C++2c extension}}
7+
//expected-warning@-2 {{'@' in a raw string literal delimiter is a C++2c extension}}
8+
//expected-warning@-3 {{'$' in a raw string literal delimiter is a C++2c extension}}
9+
//cxx26-warning@-4 {{'`' in a raw string literal delimiter is incompatible with standards before C++2c}}
10+
//cxx26-warning@-5 {{'@' in a raw string literal delimiter is incompatible with standards before C++2c}}
11+
//cxx26-warning@-6 {{'$' in a raw string literal delimiter is incompatible with standards before C++2c}}
12+
}

0 commit comments

Comments
 (0)