Skip to content

Commit 89aad1e

Browse files
committed
Reland [clang-format] Add an option to format integer literal separators
Previously committed in 46c94e5 which was reverted in f0756e0 due to a memory bug. Closes #58949. Differential Revision: https://reviews.llvm.org/D140543
1 parent 7a8cb6c commit 89aad1e

File tree

9 files changed

+551
-0
lines changed

9 files changed

+551
-0
lines changed

clang/docs/ClangFormatStyleOptions.rst

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3159,6 +3159,37 @@ the configuration (without a prefix: ``Auto``).
31593159

31603160

31613161

3162+
**IntegerLiteralSeparator** (``IntegerLiteralSeparatorStyle``) :versionbadge:`clang-format 16`
3163+
Format integer literal separators (``'`` for C++ and ``_`` for C#, Java,
3164+
and JavaScript).
3165+
3166+
Nested configuration flags:
3167+
3168+
Separator format of integer literals of different bases.
3169+
<0: Remove separators.
3170+
0: Leave the literal as is.
3171+
>0: Insert separators between digits, starting from the rightmost digit.
3172+
3173+
* ``int8_t Binary`` .. code-block:: c++
3174+
3175+
-1: 0b100111101101
3176+
0: 0b10011'11'0110'1
3177+
3: 0b100'111'101'101
3178+
4: 0b1001'1110'1101
3179+
3180+
* ``int8_t Decimal`` .. code-block:: c++
3181+
3182+
-1: 18446744073709550592ull
3183+
0: 184467'440737'0'95505'92ull
3184+
3: 18'446'744'073'709'550'592ull
3185+
3186+
* ``int8_t Hex`` .. code-block:: c++
3187+
3188+
-1: 0xDEADBEEFDEADBEEFuz
3189+
0: 0xDEAD'BEEF'DE'AD'BEE'Fuz
3190+
2: 0xDE'AD'BE'EF'DE'AD'BE'EFuz
3191+
3192+
31623193
**JavaImportGroups** (``List of Strings``) :versionbadge:`clang-format 8`
31633194
A vector of prefixes ordered by the desired groups for Java imports.
31643195

clang/docs/ReleaseNotes.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -867,6 +867,8 @@ clang-format
867867
- Add ``RequiresExpressionIndentation`` option for configuring the alignment of requires-expressions.
868868
The default value of this option is ``OuterScope``, which differs in behavior from clang-format 15.
869869
To match the default behavior of clang-format 15, use the ``Keyword`` value.
870+
- Add ``IntegerLiteralSeparator`` option for fixing integer literal separators
871+
in C++, C#, Java, and JavaScript.
870872

871873
clang-extdef-mapping
872874
--------------------

clang/include/clang/Format/Format.h

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2450,6 +2450,37 @@ struct FormatStyle {
24502450
/// \version 11
24512451
TrailingCommaStyle InsertTrailingCommas;
24522452

2453+
/// Separator format of integer literals of different bases.
2454+
/// <0: Remove separators.
2455+
/// 0: Leave the literal as is.
2456+
/// >0: Insert separators between digits, starting from the rightmost digit.
2457+
struct IntegerLiteralSeparatorStyle {
2458+
/// \code
2459+
/// -1: 0b100111101101
2460+
/// 0: 0b10011'11'0110'1
2461+
/// 3: 0b100'111'101'101
2462+
/// 4: 0b1001'1110'1101
2463+
/// \endcode
2464+
int8_t Binary;
2465+
/// \code
2466+
/// -1: 18446744073709550592ull
2467+
/// 0: 184467'440737'0'95505'92ull
2468+
/// 3: 18'446'744'073'709'550'592ull
2469+
/// \endcode
2470+
int8_t Decimal;
2471+
/// \code
2472+
/// -1: 0xDEADBEEFDEADBEEFuz
2473+
/// 0: 0xDEAD'BEEF'DE'AD'BEE'Fuz
2474+
/// 2: 0xDE'AD'BE'EF'DE'AD'BE'EFuz
2475+
/// \endcode
2476+
int8_t Hex;
2477+
};
2478+
2479+
/// Format integer literal separators (``'`` for C++ and ``_`` for C#, Java,
2480+
/// and JavaScript).
2481+
/// \version 16
2482+
IntegerLiteralSeparatorStyle IntegerLiteralSeparator;
2483+
24532484
/// A vector of prefixes ordered by the desired groups for Java imports.
24542485
///
24552486
/// One group's prefix can be a subset of another - the longest prefix is
@@ -4089,6 +4120,10 @@ struct FormatStyle {
40894120
IndentWidth == R.IndentWidth &&
40904121
IndentWrappedFunctionNames == R.IndentWrappedFunctionNames &&
40914122
InsertBraces == R.InsertBraces &&
4123+
IntegerLiteralSeparator.Binary == R.IntegerLiteralSeparator.Binary &&
4124+
IntegerLiteralSeparator.Decimal ==
4125+
R.IntegerLiteralSeparator.Decimal &&
4126+
IntegerLiteralSeparator.Hex == R.IntegerLiteralSeparator.Hex &&
40924127
JavaImportGroups == R.JavaImportGroups &&
40934128
JavaScriptQuotes == R.JavaScriptQuotes &&
40944129
JavaScriptWrapImports == R.JavaScriptWrapImports &&

clang/lib/Format/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ add_clang_library(clangFormat
88
Format.cpp
99
FormatToken.cpp
1010
FormatTokenLexer.cpp
11+
IntegerLiteralSeparatorFixer.cpp
1112
MacroCallReconstructor.cpp
1213
MacroExpander.cpp
1314
NamespaceEndCommentsFixer.cpp

clang/lib/Format/Format.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "FormatInternal.h"
2121
#include "FormatToken.h"
2222
#include "FormatTokenLexer.h"
23+
#include "IntegerLiteralSeparatorFixer.h"
2324
#include "NamespaceEndCommentsFixer.h"
2425
#include "QualifierAlignmentFixer.h"
2526
#include "SortJavaScriptImports.h"
@@ -335,6 +336,14 @@ struct ScalarEnumerationTraits<FormatStyle::IndentExternBlockStyle> {
335336
}
336337
};
337338

339+
template <> struct MappingTraits<FormatStyle::IntegerLiteralSeparatorStyle> {
340+
static void mapping(IO &IO, FormatStyle::IntegerLiteralSeparatorStyle &Base) {
341+
IO.mapOptional("Binary", Base.Binary);
342+
IO.mapOptional("Decimal", Base.Decimal);
343+
IO.mapOptional("Hex", Base.Hex);
344+
}
345+
};
346+
338347
template <> struct ScalarEnumerationTraits<FormatStyle::JavaScriptQuoteStyle> {
339348
static void enumeration(IO &IO, FormatStyle::JavaScriptQuoteStyle &Value) {
340349
IO.enumCase(Value, "Leave", FormatStyle::JSQS_Leave);
@@ -881,6 +890,7 @@ template <> struct MappingTraits<FormatStyle> {
881890
Style.IndentWrappedFunctionNames);
882891
IO.mapOptional("InsertBraces", Style.InsertBraces);
883892
IO.mapOptional("InsertTrailingCommas", Style.InsertTrailingCommas);
893+
IO.mapOptional("IntegerLiteralSeparator", Style.IntegerLiteralSeparator);
884894
IO.mapOptional("JavaImportGroups", Style.JavaImportGroups);
885895
IO.mapOptional("JavaScriptQuotes", Style.JavaScriptQuotes);
886896
IO.mapOptional("JavaScriptWrapImports", Style.JavaScriptWrapImports);
@@ -1335,6 +1345,7 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) {
13351345
LLVMStyle.IndentWrappedFunctionNames = false;
13361346
LLVMStyle.InsertBraces = false;
13371347
LLVMStyle.InsertTrailingCommas = FormatStyle::TCS_None;
1348+
LLVMStyle.IntegerLiteralSeparator = {/*Binary=*/0, /*Decimal=*/0, /*Hex=*/0};
13381349
LLVMStyle.JavaScriptQuotes = FormatStyle::JSQS_Leave;
13391350
LLVMStyle.JavaScriptWrapImports = true;
13401351
LLVMStyle.KeepEmptyLinesAtTheStartOfBlocks = true;
@@ -3391,6 +3402,10 @@ reformat(const FormatStyle &Style, StringRef Code,
33913402
AnalyzerPass;
33923403
SmallVector<AnalyzerPass, 8> Passes;
33933404

3405+
Passes.emplace_back([&](const Environment &Env) {
3406+
return IntegerLiteralSeparatorFixer().process(Env, Expanded);
3407+
});
3408+
33943409
if (Style.isCpp()) {
33953410
if (Style.QualifierAlignment != FormatStyle::QAS_Leave) {
33963411
Passes.emplace_back([&](const Environment &Env) {
Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,200 @@
1+
//===--- IntegerLiteralSeparatorFixer.cpp -----------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
///
9+
/// \file
10+
/// This file implements IntegerLiteralSeparatorFixer that fixes C++ integer
11+
/// literal separators.
12+
///
13+
//===----------------------------------------------------------------------===//
14+
15+
#include "IntegerLiteralSeparatorFixer.h"
16+
17+
namespace clang {
18+
namespace format {
19+
20+
enum class Base { Binary, Decimal, Hex, Other };
21+
22+
static Base getBase(const StringRef IntegerLiteral) {
23+
assert(IntegerLiteral.size() > 1);
24+
25+
if (IntegerLiteral[0] > '0') {
26+
assert(IntegerLiteral[0] <= '9');
27+
return Base::Decimal;
28+
}
29+
30+
assert(IntegerLiteral[0] == '0');
31+
32+
switch (IntegerLiteral[1]) {
33+
case 'b':
34+
case 'B':
35+
return Base::Binary;
36+
case 'x':
37+
case 'X':
38+
return Base::Hex;
39+
default:
40+
return Base::Other;
41+
}
42+
}
43+
44+
std::pair<tooling::Replacements, unsigned>
45+
IntegerLiteralSeparatorFixer::process(const Environment &Env,
46+
const FormatStyle &Style) {
47+
switch (Style.Language) {
48+
case FormatStyle::LK_Cpp:
49+
case FormatStyle::LK_ObjC:
50+
Separator = '\'';
51+
break;
52+
case FormatStyle::LK_CSharp:
53+
case FormatStyle::LK_Java:
54+
case FormatStyle::LK_JavaScript:
55+
Separator = '_';
56+
break;
57+
default:
58+
return {};
59+
}
60+
61+
const auto &Option = Style.IntegerLiteralSeparator;
62+
const auto Binary = Option.Binary;
63+
const auto Decimal = Option.Decimal;
64+
const auto Hex = Option.Hex;
65+
const bool SkipBinary = Binary == 0;
66+
const bool SkipDecimal = Decimal == 0;
67+
const bool SkipHex = Hex == 0;
68+
69+
if (SkipBinary && SkipDecimal && SkipHex)
70+
return {};
71+
72+
const auto &SourceMgr = Env.getSourceManager();
73+
AffectedRangeManager AffectedRangeMgr(SourceMgr, Env.getCharRanges());
74+
75+
const auto ID = Env.getFileID();
76+
const auto LangOpts = getFormattingLangOpts(Style);
77+
Lexer Lex(ID, SourceMgr.getBufferOrFake(ID), SourceMgr, LangOpts);
78+
Lex.SetCommentRetentionState(true);
79+
80+
Token Tok;
81+
Lex.LexFromRawLexer(Tok);
82+
83+
tooling::Replacements Result;
84+
for (bool Skip = false; Tok.isNot(tok::eof); Lex.LexFromRawLexer(Tok)) {
85+
auto Length = Tok.getLength();
86+
if (Length < 2)
87+
continue;
88+
auto Location = Tok.getLocation();
89+
auto Text = StringRef(SourceMgr.getCharacterData(Location), Length);
90+
if (Tok.is(tok::comment)) {
91+
if (Text == "// clang-format off" || Text == "/* clang-format off */")
92+
Skip = true;
93+
else if (Text == "// clang-format on" || Text == "/* clang-format on */")
94+
Skip = false;
95+
continue;
96+
}
97+
if (Skip || Tok.isNot(tok::numeric_constant) || Text[0] == '.' ||
98+
!AffectedRangeMgr.affectsCharSourceRange(
99+
CharSourceRange::getCharRange(Location, Tok.getEndLoc()))) {
100+
continue;
101+
}
102+
const auto B = getBase(Text);
103+
const bool IsBase2 = B == Base::Binary;
104+
const bool IsBase10 = B == Base::Decimal;
105+
const bool IsBase16 = B == Base::Hex;
106+
if ((IsBase2 && SkipBinary) || (IsBase10 && SkipDecimal) ||
107+
(IsBase16 && SkipHex) || B == Base::Other) {
108+
continue;
109+
}
110+
if ((IsBase10 && Text.find_last_of(".eEfFdDmM") != StringRef::npos) ||
111+
(IsBase16 && Text.find_last_of(".pP") != StringRef::npos)) {
112+
continue;
113+
}
114+
if (((IsBase2 && Binary < 0) || (IsBase10 && Decimal < 0) ||
115+
(IsBase16 && Hex < 0)) &&
116+
Text.find(Separator) == StringRef::npos) {
117+
continue;
118+
}
119+
const auto Start = Text[0] == '0' ? 2 : 0;
120+
auto End = Text.find_first_of("uUlLzZn");
121+
if (End == StringRef::npos)
122+
End = Length;
123+
if (Start > 0 || End < Length) {
124+
Length = End - Start;
125+
Text = Text.substr(Start, Length);
126+
}
127+
auto DigitsPerGroup = Decimal;
128+
if (IsBase2)
129+
DigitsPerGroup = Binary;
130+
else if (IsBase16)
131+
DigitsPerGroup = Hex;
132+
if (DigitsPerGroup > 0 && checkSeparator(Text, DigitsPerGroup))
133+
continue;
134+
if (Start > 0)
135+
Location = Location.getLocWithOffset(Start);
136+
cantFail(Result.add(tooling::Replacement(SourceMgr, Location, Length,
137+
format(Text, DigitsPerGroup))));
138+
}
139+
140+
return {Result, 0};
141+
}
142+
143+
bool IntegerLiteralSeparatorFixer::checkSeparator(
144+
const StringRef IntegerLiteral, int DigitsPerGroup) const {
145+
assert(DigitsPerGroup > 0);
146+
147+
int I = 0;
148+
for (auto C : llvm::reverse(IntegerLiteral)) {
149+
if (C == Separator) {
150+
if (I < DigitsPerGroup)
151+
return false;
152+
I = 0;
153+
} else {
154+
++I;
155+
if (I == DigitsPerGroup)
156+
return false;
157+
}
158+
}
159+
160+
return true;
161+
}
162+
163+
std::string IntegerLiteralSeparatorFixer::format(const StringRef IntegerLiteral,
164+
int DigitsPerGroup) const {
165+
assert(DigitsPerGroup != 0);
166+
167+
std::string Formatted;
168+
169+
if (DigitsPerGroup < 0) {
170+
for (auto C : IntegerLiteral)
171+
if (C != Separator)
172+
Formatted.push_back(C);
173+
return Formatted;
174+
}
175+
176+
int DigitCount = 0;
177+
for (auto C : IntegerLiteral)
178+
if (C != Separator)
179+
++DigitCount;
180+
181+
int Remainder = DigitCount % DigitsPerGroup;
182+
183+
int I = 0;
184+
for (auto C : IntegerLiteral) {
185+
if (C == Separator)
186+
continue;
187+
if (I == (Remainder > 0 ? Remainder : DigitsPerGroup)) {
188+
Formatted.push_back(Separator);
189+
I = 0;
190+
Remainder = 0;
191+
}
192+
Formatted.push_back(C);
193+
++I;
194+
}
195+
196+
return Formatted;
197+
}
198+
199+
} // namespace format
200+
} // namespace clang
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
//===--- IntegerLiteralSeparatorFixer.h -------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
///
9+
/// \file
10+
/// This file declares IntegerLiteralSeparatorFixer that fixes C++ integer
11+
/// literal separators.
12+
///
13+
//===----------------------------------------------------------------------===//
14+
15+
#ifndef LLVM_CLANG_LIB_FORMAT_INTEGERLITERALSEPARATORFIXER_H
16+
#define LLVM_CLANG_LIB_FORMAT_INTEGERLITERALSEPARATORFIXER_H
17+
18+
#include "TokenAnalyzer.h"
19+
20+
namespace clang {
21+
namespace format {
22+
23+
class IntegerLiteralSeparatorFixer {
24+
public:
25+
std::pair<tooling::Replacements, unsigned> process(const Environment &Env,
26+
const FormatStyle &Style);
27+
28+
private:
29+
bool checkSeparator(const StringRef IntegerLiteral, int DigitsPerGroup) const;
30+
std::string format(const StringRef IntegerLiteral, int DigitsPerGroup) const;
31+
32+
char Separator;
33+
};
34+
35+
} // end namespace format
36+
} // end namespace clang
37+
38+
#endif

0 commit comments

Comments
 (0)