Skip to content

Commit a39b13e

Browse files
committed
address comments, rename CharSet to EncodingConverter
1 parent 52635f2 commit a39b13e

File tree

5 files changed

+45
-48
lines changed

5 files changed

+45
-48
lines changed

llvm/include/llvm/Support/CharSet.h renamed to llvm/include/llvm/Support/EncodingConverter.h

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
//===-- CharSet.h - Characters set conversion class ---------------*- C++ -*-=//
1+
//===-- EncodingConverter.h - Encoding conversion class -----------*- C++ -*-=//
22
//
33
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
44
// See https://llvm.org/LICENSE.txt for license information.
@@ -12,8 +12,8 @@
1212
///
1313
//===----------------------------------------------------------------------===//
1414

15-
#ifndef LLVM_SUPPORT_CHARSET_H
16-
#define LLVM_SUPPORT_CHARSET_H
15+
#ifndef LLVM_SUPPORT_ENCODING_CONVERTER_H
16+
#define LLVM_SUPPORT_ENCODING_CONVERTER_H
1717

1818
#include "llvm/ADT/SmallString.h"
1919
#include "llvm/ADT/StringRef.h"
@@ -44,8 +44,8 @@ class EncodingConverterImplBase {
4444
/// - std::errc::invalid_argument: The input contains an incomplete
4545
/// multibyte sequence.
4646
///
47-
/// If the destination charset is a stateful character set, the shift state
48-
/// will be set to the initial state.
47+
/// If the destination encoding is stateful, the shift state will be set
48+
/// to the initial state.
4949
///
5050
/// In case of an error, the result string contains the successfully converted
5151
/// part of the input string.
@@ -77,7 +77,7 @@ enum class TextEncoding {
7777
IBM1047
7878
};
7979

80-
/// Utility class to convert between different character set encodings.
80+
/// Utility class to convert between different character encodings.
8181
class EncodingConverter {
8282
std::unique_ptr<details::EncodingConverterImplBase> Converter;
8383

@@ -89,19 +89,18 @@ class EncodingConverter {
8989
/// Creates a EncodingConverter instance.
9090
/// Returns std::errc::invalid_argument in case the requested conversion is
9191
/// not supported.
92-
/// \param[in] CSFrom the source character encoding
93-
/// \param[in] CSTo the target character encoding
92+
/// \param[in] From the source character encoding
93+
/// \param[in] To the target character encoding
9494
/// \return a EncodingConverter instance or an error code
95-
static ErrorOr<EncodingConverter> create(TextEncoding CSFrom,
96-
TextEncoding CSTo);
95+
static ErrorOr<EncodingConverter> create(TextEncoding From, TextEncoding To);
9796

9897
/// Creates a EncodingConverter instance.
9998
/// Returns std::errc::invalid_argument in case the requested conversion is
10099
/// not supported.
101-
/// \param[in] CPFrom name of the source character encoding
102-
/// \param[in] CPTo name of the target character encoding
100+
/// \param[in] From name of the source character encoding
101+
/// \param[in] To name of the target character encoding
103102
/// \return a EncodingConverter instance or an error code
104-
static ErrorOr<EncodingConverter> create(StringRef CPFrom, StringRef CPTo);
103+
static ErrorOr<EncodingConverter> create(StringRef From, StringRef To);
105104

106105
EncodingConverter(const EncodingConverter &) = delete;
107106
EncodingConverter &operator=(const EncodingConverter &) = delete;

llvm/lib/Support/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,6 @@ add_llvm_component_library(LLVMSupport
162162
CachePruning.cpp
163163
Caching.cpp
164164
circular_raw_ostream.cpp
165-
CharSet.cpp
166165
Chrono.cpp
167166
COM.cpp
168167
CodeGenCoverage.cpp
@@ -187,6 +186,7 @@ add_llvm_component_library(LLVMSupport
187186
ELFAttributes.cpp
188187
ELFAttrParserCompact.cpp
189188
ELFAttrParserExtended.cpp
189+
EncodingConverter.cpp
190190
Error.cpp
191191
ErrorHandling.cpp
192192
ExponentialBackoff.cpp

llvm/lib/Support/CharSet.cpp renamed to llvm/lib/Support/EncodingConverter.cpp

Lines changed: 22 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
//===-- CharSet.cpp - Characters sets conversion class ------------*- C++ -*-=//
1+
//===-- EncodingConverter.cpp - Encoding conversion class ---------*- C++ -*-=//
22
//
33
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
44
// See https://llvm.org/LICENSE.txt for license information.
@@ -8,11 +8,11 @@
88
///
99
/// \file
1010
/// This file provides utility classes to convert between different character
11-
/// set encodings.
11+
/// encodings.
1212
///
1313
//===----------------------------------------------------------------------===//
1414

15-
#include "llvm/Support/CharSet.h"
15+
#include "llvm/Support/EncodingConverter.h"
1616
#include "llvm/ADT/SmallString.h"
1717
#include "llvm/ADT/SmallVector.h"
1818
#include "llvm/ADT/StringExtras.h"
@@ -46,10 +46,10 @@ static void normalizeCharSetName(StringRef CSName,
4646
}
4747
}
4848

49-
// Maps the charset name to enum constant if possible.
50-
static std::optional<TextEncoding> getKnownEncoding(StringRef CSName) {
49+
// Maps the encoding name to enum constant if possible.
50+
static std::optional<TextEncoding> getKnownEncoding(StringRef Name) {
5151
SmallString<16> Normalized;
52-
normalizeCharSetName(CSName, Normalized);
52+
normalizeCharSetName(Name, Normalized);
5353
if (Normalized.equals("utf8"))
5454
return TextEncoding::UTF8;
5555
if (Normalized.equals("ibm1047"))
@@ -63,9 +63,8 @@ HandleOverflow(size_t &Capacity, char *&Output, size_t &OutputLength,
6363
// No space left in output buffer. Double the size of the underlying
6464
// memory in the SmallVectorImpl, adjust pointer and length and continue
6565
// the conversion.
66-
Capacity = (Capacity < std::numeric_limits<size_t>::max() / 2)
67-
? 2 * Capacity
68-
: std::numeric_limits<size_t>::max();
66+
Capacity =
67+
(Capacity < Result.max_size() / 2) ? 2 * Capacity : Result.max_size();
6968
Result.resize(0);
7069
Result.resize_for_overwrite(Capacity);
7170
Output = static_cast<char *>(Result.data());
@@ -80,9 +79,9 @@ enum ConversionType {
8079

8180
// Support conversion between EBCDIC 1047 and UTF-8. This class uses
8281
// built-in translation tables that allow for translation between the
83-
// aforementioned character sets. The use of tables for conversion is only
82+
// aforementioned encodings. The use of tables for conversion is only
8483
// possible because EBCDIC 1047 is a single-byte, stateless encoding; other
85-
// character sets are not supported.
84+
// encodings are not supported.
8685
class EncodingConverterTable : public details::EncodingConverterImplBase {
8786
const ConversionType ConvType;
8887

@@ -169,8 +168,7 @@ EncodingConverterICU::convertString(StringRef Source,
169168
/*pivotLimit=*/NULL, /*reset=*/true,
170169
/*flush=*/true, &EC);
171170
if (U_FAILURE(EC)) {
172-
if (EC == U_BUFFER_OVERFLOW_ERROR &&
173-
Capacity < std::numeric_limits<size_t>::max()) {
171+
if (EC == U_BUFFER_OVERFLOW_ERROR && Capacity < Result.max_size()) {
174172
HandleOverflow(Capacity, Output, OutputLength, Result);
175173
continue;
176174
}
@@ -246,7 +244,7 @@ EncodingConverterIconv::convertString(StringRef Source,
246244
this](size_t Ret) {
247245
if (Ret == static_cast<size_t>(-1)) {
248246
// An error occured. Check if we can gracefully handle it.
249-
if (errno == E2BIG && Capacity < std::numeric_limits<size_t>::max()) {
247+
if (errno == E2BIG && Capacity < Result.max_size()) {
250248
HandleOverflow(Capacity, Output, OutputLength, Result);
251249
// Reset converter
252250
iconv(ConvDesc, nullptr, nullptr, nullptr, nullptr);
@@ -301,7 +299,7 @@ void EncodingConverterIconv::reset() {
301299
ErrorOr<EncodingConverter> EncodingConverter::create(TextEncoding CPFrom,
302300
TextEncoding CPTo) {
303301

304-
// text encodings should be distinct
302+
// Text encodings should be distinct.
305303
if (CPFrom == CPTo)
306304
return std::make_error_code(std::errc::invalid_argument);
307305

@@ -317,22 +315,22 @@ ErrorOr<EncodingConverter> EncodingConverter::create(TextEncoding CPFrom,
317315
std::make_unique<EncodingConverterTable>(Conversion));
318316
}
319317

320-
ErrorOr<EncodingConverter> EncodingConverter::create(StringRef CSFrom,
321-
StringRef CSTo) {
322-
std::optional<TextEncoding> From = getKnownEncoding(CSFrom);
323-
std::optional<TextEncoding> To = getKnownEncoding(CSTo);
324-
if (From && To) {
325-
ErrorOr<EncodingConverter> Converter = create(*From, *To);
318+
ErrorOr<EncodingConverter> EncodingConverter::create(StringRef From,
319+
StringRef To) {
320+
std::optional<TextEncoding> FromEncoding = getKnownEncoding(From);
321+
std::optional<TextEncoding> ToEncoding = getKnownEncoding(To);
322+
if (FromEncoding && ToEncoding) {
323+
ErrorOr<EncodingConverter> Converter = create(*FromEncoding, *ToEncoding);
326324
if (Converter)
327325
return Converter;
328326
}
329327
#if HAVE_ICU
330328
UErrorCode EC = U_ZERO_ERROR;
331-
UConverterUniquePtr FromConvDesc(ucnv_open(CSFrom.str().c_str(), &EC));
329+
UConverterUniquePtr FromConvDesc(ucnv_open(From.str().c_str(), &EC));
332330
if (U_FAILURE(EC)) {
333331
return std::error_code(errno, std::generic_category());
334332
}
335-
UConverterUniquePtr ToConvDesc(ucnv_open(CSTo.str().c_str(), &EC));
333+
UConverterUniquePtr ToConvDesc(ucnv_open(To.str().c_str(), &EC));
336334
if (U_FAILURE(EC)) {
337335
return std::error_code(errno, std::generic_category());
338336
}
@@ -341,7 +339,7 @@ ErrorOr<EncodingConverter> EncodingConverter::create(StringRef CSFrom,
341339
std::move(ToConvDesc));
342340
return EncodingConverter(std::move(Converter));
343341
#elif HAVE_ICONV
344-
iconv_t ConvDesc = iconv_open(CSTo.str().c_str(), CSFrom.str().c_str());
342+
iconv_t ConvDesc = iconv_open(To.str().c_str(), From.str().c_str());
345343
if (ConvDesc == (iconv_t)-1)
346344
return std::error_code(errno, std::generic_category());
347345
return EncodingConverter(std::make_unique<EncodingConverterIconv>(ConvDesc));

llvm/unittests/Support/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ add_llvm_unittest(SupportTests
2020
CrashRecoveryTest.cpp
2121
Caching.cpp
2222
Casting.cpp
23-
CharSetTest.cpp
2423
CheckedArithmeticTest.cpp
2524
Chrono.cpp
2625
CommandLineTest.cpp
@@ -40,6 +39,7 @@ add_llvm_unittest(SupportTests
4039
ErrnoTest.cpp
4140
ErrorOrTest.cpp
4241
ErrorTest.cpp
42+
EncodingConverterTest.cpp
4343
ExponentialBackoffTest.cpp
4444
ExtensibleRTTITest.cpp
4545
FileCollectorTest.cpp

llvm/unittests/Support/CharSetTest.cpp renamed to llvm/unittests/Support/EncodingConverterTest.cpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1-
//===- unittests/Support/CharSetTest.cpp - Charset conversion tests -------===//
1+
//===----------------------------------------------------------------------===//
22
//
33
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
44
// See https://llvm.org/LICENSE.txt for license information.
55
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
66
//
77
//===----------------------------------------------------------------------===//
88

9-
#include "llvm/Support/CharSet.h"
9+
#include "llvm/Support/EncodingConverter.h"
1010
#include "llvm/ADT/SmallString.h"
1111
#include "gtest/gtest.h"
1212
using namespace llvm;
@@ -53,7 +53,7 @@ static const char EarthIBM939[] =
5353
static const char EarthUTFExtraPartial[] =
5454
"\x45\x61\x72\x74\x68\xe5\x9c\xb0\xe7\x90\x83\xe5";
5555

56-
TEST(CharSet, FromUTF8) {
56+
TEST(Encoding, FromUTF8) {
5757
// Hello string.
5858
StringRef Src(HelloA);
5959
SmallString<64> Dst;
@@ -93,7 +93,7 @@ TEST(CharSet, FromUTF8) {
9393
EXPECT_EQ(EC, std::errc::illegal_byte_sequence);
9494
}
9595

96-
TEST(CharSet, ToUTF8) {
96+
TEST(Encoding, ToUTF8) {
9797
// Hello string.
9898
StringRef Src(HelloE);
9999
SmallString<64> Dst;
@@ -128,7 +128,7 @@ TEST(CharSet, ToUTF8) {
128128
EXPECT_STREQ(AccentUTF, static_cast<std::string>(Dst).c_str());
129129
}
130130

131-
TEST(CharSet, RoundTrip) {
131+
TEST(Encoding, RoundTrip) {
132132
ErrorOr<EncodingConverter> ConvToUTF16 =
133133
EncodingConverter::create("IBM-1047", "UTF-16");
134134
// Stop test if conversion is not supported (no underlying iconv support).
@@ -170,7 +170,7 @@ TEST(CharSet, RoundTrip) {
170170
EXPECT_STREQ(SrcStr, static_cast<std::string>(Dst3Str).c_str());
171171
}
172172

173-
TEST(CharSet, ShiftState2022) {
173+
TEST(Encoding, ShiftState2022) {
174174
// Earth string.
175175
StringRef Src(EarthUTF);
176176
SmallString<8> Dst;
@@ -190,7 +190,7 @@ TEST(CharSet, ShiftState2022) {
190190
EXPECT_STREQ(EarthISO2022, static_cast<std::string>(Dst).c_str());
191191
}
192192

193-
TEST(CharSet, ShiftState2022Partial) {
193+
TEST(Encoding, InvalidInput) {
194194
// Earth string.
195195
StringRef Src(EarthUTFExtraPartial);
196196
SmallString<8> Dst;
@@ -204,12 +204,12 @@ TEST(CharSet, ShiftState2022Partial) {
204204
return;
205205
}
206206

207-
// Check that the string is properly converted.
207+
// Check that the string failed to convert.
208208
std::error_code EC = ConvTo2022->convert(Src, Dst);
209209
EXPECT_TRUE(EC);
210210
}
211211

212-
TEST(CharSet, ShiftStateIBM939) {
212+
TEST(Encoding, ShiftStateIBM939) {
213213
// Earth string.
214214
StringRef Src(EarthUTF);
215215
SmallString<64> Dst;

0 commit comments

Comments
 (0)