Skip to content

Commit 6d40922

Browse files
committed
address comments
1 parent 3192c7b commit 6d40922

File tree

4 files changed

+40
-35
lines changed

4 files changed

+40
-35
lines changed

llvm/include/llvm/Config/config.h.cmake

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -237,10 +237,10 @@
237237
#cmakedefine HAVE____CHKSTK_MS ${HAVE____CHKSTK_MS}
238238

239239
/* Define if ICU library is available */
240-
#cmakedefine HAVE_ICU ${HAVE_ICU}
240+
#cmakedefine01 HAVE_ICU
241241

242242
/* Define if iconv library is available */
243-
#cmakedefine HAVE_ICONV ${HAVE_ICONV}
243+
#cmakedefine01 HAVE_ICONV
244244

245245
/* Linker version detected at compile time. */
246246
#cmakedefine HOST_LINK_VERSION "${HOST_LINK_VERSION}"

llvm/include/llvm/Support/CharSet.h

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -69,15 +69,13 @@ class CharSetConverterImplBase {
6969
} // namespace details
7070

7171
// Names inspired by https://wg21.link/p1885.
72-
namespace text_encoding {
73-
enum class id {
72+
enum class TextEncoding {
7473
/// UTF-8 character set encoding.
7574
UTF8,
7675

7776
/// IBM EBCDIC 1047 character set encoding.
7877
IBM1047
7978
};
80-
} // end namespace text_encoding
8179

8280
/// Utility class to convert between different character set encodings.
8381
class CharSetConverter {
@@ -93,8 +91,8 @@ class CharSetConverter {
9391
/// \param[in] CSFrom the source character encoding
9492
/// \param[in] CSTo the target character encoding
9593
/// \return a CharSetConverter instance or an error code
96-
static ErrorOr<CharSetConverter> create(text_encoding::id CSFrom,
97-
text_encoding::id CSTo);
94+
static ErrorOr<CharSetConverter> create(TextEncoding CSFrom,
95+
TextEncoding CSTo);
9896

9997
/// Creates a CharSetConverter instance.
10098
/// Returns std::errc::invalid_argument in case the requested conversion is

llvm/lib/Support/CharSet.cpp

Lines changed: 33 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,9 @@
2222
#include <limits>
2323
#include <system_error>
2424

25-
#ifdef HAVE_ICU
25+
#if HAVE_ICU
2626
#include <unicode/ucnv.h>
27-
#elif defined(HAVE_ICONV)
27+
#elif HAVE_ICONV
2828
#include <iconv.h>
2929
#endif
3030

@@ -47,13 +47,13 @@ static void normalizeCharSetName(StringRef CSName,
4747
}
4848

4949
// Maps the charset name to enum constant if possible.
50-
static std::optional<text_encoding::id> getKnownCharSet(StringRef CSName) {
50+
static std::optional<TextEncoding> getKnownCharSet(StringRef CSName) {
5151
SmallString<16> Normalized;
5252
normalizeCharSetName(CSName, Normalized);
5353
if (Normalized.equals("utf8"))
54-
return text_encoding::id::UTF8;
54+
return TextEncoding::UTF8;
5555
if (Normalized.equals("ibm1047"))
56-
return text_encoding::id::IBM1047;
56+
return TextEncoding::IBM1047;
5757
return std::nullopt;
5858
}
5959

@@ -98,17 +98,18 @@ class CharSetConverterTable : public details::CharSetConverterImplBase {
9898
std::error_code
9999
CharSetConverterTable::convertString(StringRef Source,
100100
SmallVectorImpl<char> &Result) {
101-
if (ConvType == IBM1047ToUTF8) {
101+
switch (ConvType) {
102+
case IBM1047ToUTF8:
102103
ConverterEBCDIC::convertToUTF8(Source, Result);
103104
return std::error_code();
104-
} else if (ConvType == UTF8ToIBM1047) {
105+
case UTF8ToIBM1047:
105106
return ConverterEBCDIC::convertToEBCDIC(Source, Result);
106107
}
107108
llvm_unreachable("Invalid ConvType!");
108109
return std::error_code();
109110
}
110111

111-
#ifdef HAVE_ICU
112+
#if HAVE_ICU
112113
struct UConverterDeleter {
113114
void operator()(UConverter *Converter) const {
114115
if (Converter)
@@ -133,6 +134,10 @@ class CharSetConverterICU : public details::CharSetConverterImplBase {
133134
void reset() override;
134135
};
135136

137+
// TODO: The current implementation discards the partial result and restarts the
138+
// conversion from the beginning if there is a conversion error due to
139+
// insufficient buffer size. In the future, it would better to save the partial
140+
// result and redo the conversion for the remaining string.
136141
std::error_code
137142
CharSetConverterICU::convertString(StringRef Source,
138143
SmallVectorImpl<char> &Result) {
@@ -144,7 +149,7 @@ CharSetConverterICU::convertString(StringRef Source,
144149
size_t Capacity = Result.capacity();
145150
size_t OutputLength = Capacity;
146151
Result.resize_for_overwrite(Capacity);
147-
char *Output = static_cast<char *>(Result.data());
152+
char *Output;
148153
UErrorCode EC = U_ZERO_ERROR;
149154

150155
ucnv_setToUCallBack(&*FromConvDesc, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL,
@@ -185,7 +190,7 @@ void CharSetConverterICU::reset() {
185190
ucnv_reset(&*ToConvDesc);
186191
}
187192

188-
#elif defined(HAVE_ICONV)
193+
#elif HAVE_ICONV
189194
class CharSetConverterIconv : public details::CharSetConverterImplBase {
190195
class UniqueIconvT {
191196
iconv_t ConvDesc;
@@ -222,6 +227,10 @@ class CharSetConverterIconv : public details::CharSetConverterImplBase {
222227
void reset() override;
223228
};
224229

230+
// TODO: The current implementation discards the partial result and restarts the
231+
// conversion from the beginning if there is a conversion error due to
232+
// insufficient buffer size. In the future, it would better to save the partial
233+
// result and redo the conversion for the remaining string.
225234
std::error_code
226235
CharSetConverterIconv::convertString(StringRef Source,
227236
SmallVectorImpl<char> &Result) {
@@ -289,35 +298,35 @@ void CharSetConverterIconv::reset() {
289298
#endif // HAVE_ICONV
290299
} // namespace
291300

292-
ErrorOr<CharSetConverter> CharSetConverter::create(text_encoding::id CPFrom,
293-
text_encoding::id CPTo) {
301+
ErrorOr<CharSetConverter> CharSetConverter::create(TextEncoding CPFrom,
302+
TextEncoding CPTo) {
294303

295-
assert(CPFrom != CPTo && "Text encodings should be distinct");
304+
// text encodings should be distinct
305+
if(CPFrom == CPTo)
306+
return std::make_error_code(std::errc::invalid_argument);
296307

297308
ConversionType Conversion;
298-
if (CPFrom == text_encoding::id::UTF8 && CPTo == text_encoding::id::IBM1047)
309+
if (CPFrom == TextEncoding::UTF8 && CPTo == TextEncoding::IBM1047)
299310
Conversion = UTF8ToIBM1047;
300-
else if (CPFrom == text_encoding::id::IBM1047 &&
301-
CPTo == text_encoding::id::UTF8)
311+
else if (CPFrom == TextEncoding::IBM1047 &&
312+
CPTo == TextEncoding::UTF8)
302313
Conversion = IBM1047ToUTF8;
303314
else
304315
return std::error_code(errno, std::generic_category());
305316

306-
std::unique_ptr<details::CharSetConverterImplBase> Converter =
307-
std::make_unique<CharSetConverterTable>(Conversion);
308-
return CharSetConverter(std::move(Converter));
317+
return CharSetConverter(std::make_unique<CharSetConverterTable>(Conversion));
309318
}
310319

311320
ErrorOr<CharSetConverter> CharSetConverter::create(StringRef CSFrom,
312321
StringRef CSTo) {
313-
std::optional<text_encoding::id> From = getKnownCharSet(CSFrom);
314-
std::optional<text_encoding::id> To = getKnownCharSet(CSTo);
322+
std::optional<TextEncoding> From = getKnownCharSet(CSFrom);
323+
std::optional<TextEncoding> To = getKnownCharSet(CSTo);
315324
if (From && To) {
316325
ErrorOr<CharSetConverter> Converter = create(*From, *To);
317326
if (Converter)
318327
return Converter;
319328
}
320-
#ifdef HAVE_ICU
329+
#if HAVE_ICU
321330
UErrorCode EC = U_ZERO_ERROR;
322331
UConverterUniquePtr FromConvDesc(ucnv_open(CSFrom.str().c_str(), &EC));
323332
if (U_FAILURE(EC)) {
@@ -331,13 +340,11 @@ ErrorOr<CharSetConverter> CharSetConverter::create(StringRef CSFrom,
331340
std::make_unique<CharSetConverterICU>(std::move(FromConvDesc),
332341
std::move(ToConvDesc));
333342
return CharSetConverter(std::move(Converter));
334-
#elif defined(HAVE_ICONV)
343+
#elif HAVE_ICONV
335344
iconv_t ConvDesc = iconv_open(CSTo.str().c_str(), CSFrom.str().c_str());
336345
if (ConvDesc == (iconv_t)-1)
337346
return std::error_code(errno, std::generic_category());
338-
std::unique_ptr<details::CharSetConverterImplBase> Converter =
339-
std::make_unique<CharSetConverterIconv>(ConvDesc);
340-
return CharSetConverter(std::move(Converter));
347+
return CharSetConverter(std::make_unique<CharSetConverterIconv>(ConvDesc));
341348
#else
342349
return std::make_error_code(std::errc::invalid_argument);
343350
#endif

llvm/unittests/Support/CharSetTest.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ TEST(CharSet, FromUTF8) {
5959
SmallString<64> Dst;
6060

6161
ErrorOr<CharSetConverter> Conv = CharSetConverter::create(
62-
text_encoding::id::UTF8, text_encoding::id::IBM1047);
62+
TextEncoding::UTF8, TextEncoding::IBM1047);
6363

6464
// Stop test if conversion is not supported.
6565
if (!Conv) {
@@ -99,7 +99,7 @@ TEST(CharSet, ToUTF8) {
9999
SmallString<64> Dst;
100100

101101
ErrorOr<CharSetConverter> Conv = CharSetConverter::create(
102-
text_encoding::id::IBM1047, text_encoding::id::UTF8);
102+
TextEncoding::IBM1047, TextEncoding::UTF8);
103103

104104
// Stop test if conversion is not supported.
105105
if (!Conv) {

0 commit comments

Comments
 (0)