22
22
#include < limits>
23
23
#include < system_error>
24
24
25
- #ifdef HAVE_ICU
25
+ #if HAVE_ICU
26
26
#include < unicode/ucnv.h>
27
- #elif defined( HAVE_ICONV)
27
+ #elif HAVE_ICONV
28
28
#include < iconv.h>
29
29
#endif
30
30
@@ -47,13 +47,13 @@ static void normalizeCharSetName(StringRef CSName,
47
47
}
48
48
49
49
// Maps the charset name to enum constant if possible.
50
- static std::optional<text_encoding::id > getKnownCharSet (StringRef CSName) {
50
+ static std::optional<TextEncoding > getKnownCharSet (StringRef CSName) {
51
51
SmallString<16 > Normalized;
52
52
normalizeCharSetName (CSName, Normalized);
53
53
if (Normalized.equals (" utf8" ))
54
- return text_encoding::id ::UTF8;
54
+ return TextEncoding ::UTF8;
55
55
if (Normalized.equals (" ibm1047" ))
56
- return text_encoding::id ::IBM1047;
56
+ return TextEncoding ::IBM1047;
57
57
return std::nullopt;
58
58
}
59
59
@@ -98,17 +98,18 @@ class CharSetConverterTable : public details::CharSetConverterImplBase {
98
98
std::error_code
99
99
CharSetConverterTable::convertString (StringRef Source,
100
100
SmallVectorImpl<char > &Result) {
101
- if (ConvType == IBM1047ToUTF8) {
101
+ switch (ConvType) {
102
+ case IBM1047ToUTF8:
102
103
ConverterEBCDIC::convertToUTF8 (Source, Result);
103
104
return std::error_code ();
104
- } else if (ConvType == UTF8ToIBM1047) {
105
+ case UTF8ToIBM1047:
105
106
return ConverterEBCDIC::convertToEBCDIC (Source, Result);
106
107
}
107
108
llvm_unreachable (" Invalid ConvType!" );
108
109
return std::error_code ();
109
110
}
110
111
111
- #ifdef HAVE_ICU
112
+ #if HAVE_ICU
112
113
struct UConverterDeleter {
113
114
void operator ()(UConverter *Converter) const {
114
115
if (Converter)
@@ -133,6 +134,10 @@ class CharSetConverterICU : public details::CharSetConverterImplBase {
133
134
void reset () override ;
134
135
};
135
136
137
+ // TODO: The current implementation discards the partial result and restarts the
138
+ // conversion from the beginning if there is a conversion error due to
139
+ // insufficient buffer size. In the future, it would better to save the partial
140
+ // result and redo the conversion for the remaining string.
136
141
std::error_code
137
142
CharSetConverterICU::convertString (StringRef Source,
138
143
SmallVectorImpl<char > &Result) {
@@ -144,7 +149,7 @@ CharSetConverterICU::convertString(StringRef Source,
144
149
size_t Capacity = Result.capacity ();
145
150
size_t OutputLength = Capacity;
146
151
Result.resize_for_overwrite (Capacity);
147
- char *Output = static_cast < char *>(Result. data ()) ;
152
+ char *Output;
148
153
UErrorCode EC = U_ZERO_ERROR;
149
154
150
155
ucnv_setToUCallBack (&*FromConvDesc, UCNV_TO_U_CALLBACK_STOP, NULL , NULL , NULL ,
@@ -185,7 +190,7 @@ void CharSetConverterICU::reset() {
185
190
ucnv_reset (&*ToConvDesc);
186
191
}
187
192
188
- #elif defined( HAVE_ICONV)
193
+ #elif HAVE_ICONV
189
194
class CharSetConverterIconv : public details ::CharSetConverterImplBase {
190
195
class UniqueIconvT {
191
196
iconv_t ConvDesc;
@@ -222,6 +227,10 @@ class CharSetConverterIconv : public details::CharSetConverterImplBase {
222
227
void reset () override ;
223
228
};
224
229
230
+ // TODO: The current implementation discards the partial result and restarts the
231
+ // conversion from the beginning if there is a conversion error due to
232
+ // insufficient buffer size. In the future, it would better to save the partial
233
+ // result and redo the conversion for the remaining string.
225
234
std::error_code
226
235
CharSetConverterIconv::convertString (StringRef Source,
227
236
SmallVectorImpl<char > &Result) {
@@ -289,35 +298,35 @@ void CharSetConverterIconv::reset() {
289
298
#endif // HAVE_ICONV
290
299
} // namespace
291
300
292
- ErrorOr<CharSetConverter> CharSetConverter::create (text_encoding::id CPFrom,
293
- text_encoding::id CPTo) {
301
+ ErrorOr<CharSetConverter> CharSetConverter::create (TextEncoding CPFrom,
302
+ TextEncoding CPTo) {
294
303
295
- assert (CPFrom != CPTo && " Text encodings should be distinct" );
304
+ // text encodings should be distinct
305
+ if (CPFrom == CPTo)
306
+ return std::make_error_code (std::errc::invalid_argument);
296
307
297
308
ConversionType Conversion;
298
- if (CPFrom == text_encoding::id:: UTF8 && CPTo == text_encoding::id ::IBM1047)
309
+ if (CPFrom == TextEncoding:: UTF8 && CPTo == TextEncoding ::IBM1047)
299
310
Conversion = UTF8ToIBM1047;
300
- else if (CPFrom == text_encoding::id ::IBM1047 &&
301
- CPTo == text_encoding::id ::UTF8)
311
+ else if (CPFrom == TextEncoding ::IBM1047 &&
312
+ CPTo == TextEncoding ::UTF8)
302
313
Conversion = IBM1047ToUTF8;
303
314
else
304
315
return std::error_code (errno, std::generic_category ());
305
316
306
- std::unique_ptr<details::CharSetConverterImplBase> Converter =
307
- std::make_unique<CharSetConverterTable>(Conversion);
308
- return CharSetConverter (std::move (Converter));
317
+ return CharSetConverter (std::make_unique<CharSetConverterTable>(Conversion));
309
318
}
310
319
311
320
ErrorOr<CharSetConverter> CharSetConverter::create (StringRef CSFrom,
312
321
StringRef CSTo) {
313
- std::optional<text_encoding::id > From = getKnownCharSet (CSFrom);
314
- std::optional<text_encoding::id > To = getKnownCharSet (CSTo);
322
+ std::optional<TextEncoding > From = getKnownCharSet (CSFrom);
323
+ std::optional<TextEncoding > To = getKnownCharSet (CSTo);
315
324
if (From && To) {
316
325
ErrorOr<CharSetConverter> Converter = create (*From, *To);
317
326
if (Converter)
318
327
return Converter;
319
328
}
320
- #ifdef HAVE_ICU
329
+ #if HAVE_ICU
321
330
UErrorCode EC = U_ZERO_ERROR;
322
331
UConverterUniquePtr FromConvDesc (ucnv_open (CSFrom.str ().c_str (), &EC));
323
332
if (U_FAILURE (EC)) {
@@ -331,13 +340,11 @@ ErrorOr<CharSetConverter> CharSetConverter::create(StringRef CSFrom,
331
340
std::make_unique<CharSetConverterICU>(std::move (FromConvDesc),
332
341
std::move (ToConvDesc));
333
342
return CharSetConverter (std::move (Converter));
334
- #elif defined( HAVE_ICONV)
343
+ #elif HAVE_ICONV
335
344
iconv_t ConvDesc = iconv_open (CSTo.str ().c_str (), CSFrom.str ().c_str ());
336
345
if (ConvDesc == (iconv_t )-1 )
337
346
return std::error_code (errno, std::generic_category ());
338
- std::unique_ptr<details::CharSetConverterImplBase> Converter =
339
- std::make_unique<CharSetConverterIconv>(ConvDesc);
340
- return CharSetConverter (std::move (Converter));
347
+ return CharSetConverter (std::make_unique<CharSetConverterIconv>(ConvDesc));
341
348
#else
342
349
return std::make_error_code (std::errc::invalid_argument);
343
350
#endif
0 commit comments