1
- // ===-- CharSet .cpp - Characters sets conversion class --- ---------*- C++ -*-=//
1
+ // ===-- EncodingConverter .cpp - Encoding conversion class ---------*- C++ -*-=//
2
2
//
3
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
4
// See https://llvm.org/LICENSE.txt for license information.
8
8
// /
9
9
// / \file
10
10
// / This file provides utility classes to convert between different character
11
- // / set encodings.
11
+ // / encodings.
12
12
// /
13
13
// ===----------------------------------------------------------------------===//
14
14
15
- #include " llvm/Support/CharSet .h"
15
+ #include " llvm/Support/EncodingConverter .h"
16
16
#include " llvm/ADT/SmallString.h"
17
17
#include " llvm/ADT/SmallVector.h"
18
18
#include " llvm/ADT/StringExtras.h"
@@ -46,10 +46,10 @@ static void normalizeCharSetName(StringRef CSName,
46
46
}
47
47
}
48
48
49
- // Maps the charset name to enum constant if possible.
50
- static std::optional<TextEncoding> getKnownEncoding (StringRef CSName ) {
49
+ // Maps the encoding name to enum constant if possible.
50
+ static std::optional<TextEncoding> getKnownEncoding (StringRef Name ) {
51
51
SmallString<16 > Normalized;
52
- normalizeCharSetName (CSName , Normalized);
52
+ normalizeCharSetName (Name , Normalized);
53
53
if (Normalized.equals (" utf8" ))
54
54
return TextEncoding::UTF8;
55
55
if (Normalized.equals (" ibm1047" ))
@@ -63,9 +63,8 @@ HandleOverflow(size_t &Capacity, char *&Output, size_t &OutputLength,
63
63
// No space left in output buffer. Double the size of the underlying
64
64
// memory in the SmallVectorImpl, adjust pointer and length and continue
65
65
// the conversion.
66
- Capacity = (Capacity < std::numeric_limits<size_t >::max () / 2 )
67
- ? 2 * Capacity
68
- : std::numeric_limits<size_t >::max ();
66
+ Capacity =
67
+ (Capacity < Result.max_size () / 2 ) ? 2 * Capacity : Result.max_size ();
69
68
Result.resize (0 );
70
69
Result.resize_for_overwrite (Capacity);
71
70
Output = static_cast <char *>(Result.data ());
@@ -80,9 +79,9 @@ enum ConversionType {
80
79
81
80
// Support conversion between EBCDIC 1047 and UTF-8. This class uses
82
81
// built-in translation tables that allow for translation between the
83
- // aforementioned character sets . The use of tables for conversion is only
82
+ // aforementioned encodings . The use of tables for conversion is only
84
83
// possible because EBCDIC 1047 is a single-byte, stateless encoding; other
85
- // character sets are not supported.
84
+ // encodings are not supported.
86
85
class EncodingConverterTable : public details ::EncodingConverterImplBase {
87
86
const ConversionType ConvType;
88
87
@@ -169,8 +168,7 @@ EncodingConverterICU::convertString(StringRef Source,
169
168
/* pivotLimit=*/ NULL , /* reset=*/ true ,
170
169
/* flush=*/ true , &EC);
171
170
if (U_FAILURE (EC)) {
172
- if (EC == U_BUFFER_OVERFLOW_ERROR &&
173
- Capacity < std::numeric_limits<size_t >::max ()) {
171
+ if (EC == U_BUFFER_OVERFLOW_ERROR && Capacity < Result.max_size ()) {
174
172
HandleOverflow (Capacity, Output, OutputLength, Result);
175
173
continue ;
176
174
}
@@ -246,7 +244,7 @@ EncodingConverterIconv::convertString(StringRef Source,
246
244
this ](size_t Ret) {
247
245
if (Ret == static_cast <size_t >(-1 )) {
248
246
// An error occured. Check if we can gracefully handle it.
249
- if (errno == E2BIG && Capacity < std::numeric_limits< size_t >:: max ()) {
247
+ if (errno == E2BIG && Capacity < Result. max_size ()) {
250
248
HandleOverflow (Capacity, Output, OutputLength, Result);
251
249
// Reset converter
252
250
iconv (ConvDesc, nullptr , nullptr , nullptr , nullptr );
@@ -301,7 +299,7 @@ void EncodingConverterIconv::reset() {
301
299
ErrorOr<EncodingConverter> EncodingConverter::create (TextEncoding CPFrom,
302
300
TextEncoding CPTo) {
303
301
304
- // text encodings should be distinct
302
+ // Text encodings should be distinct.
305
303
if (CPFrom == CPTo)
306
304
return std::make_error_code (std::errc::invalid_argument);
307
305
@@ -317,22 +315,22 @@ ErrorOr<EncodingConverter> EncodingConverter::create(TextEncoding CPFrom,
317
315
std::make_unique<EncodingConverterTable>(Conversion));
318
316
}
319
317
320
- ErrorOr<EncodingConverter> EncodingConverter::create (StringRef CSFrom ,
321
- StringRef CSTo ) {
322
- std::optional<TextEncoding> From = getKnownEncoding (CSFrom );
323
- std::optional<TextEncoding> To = getKnownEncoding (CSTo );
324
- if (From && To ) {
325
- ErrorOr<EncodingConverter> Converter = create (*From , *To );
318
+ ErrorOr<EncodingConverter> EncodingConverter::create (StringRef From ,
319
+ StringRef To ) {
320
+ std::optional<TextEncoding> FromEncoding = getKnownEncoding (From );
321
+ std::optional<TextEncoding> ToEncoding = getKnownEncoding (To );
322
+ if (FromEncoding && ToEncoding ) {
323
+ ErrorOr<EncodingConverter> Converter = create (*FromEncoding , *ToEncoding );
326
324
if (Converter)
327
325
return Converter;
328
326
}
329
327
#if HAVE_ICU
330
328
UErrorCode EC = U_ZERO_ERROR;
331
- UConverterUniquePtr FromConvDesc (ucnv_open (CSFrom .str ().c_str (), &EC));
329
+ UConverterUniquePtr FromConvDesc (ucnv_open (From .str ().c_str (), &EC));
332
330
if (U_FAILURE (EC)) {
333
331
return std::error_code (errno, std::generic_category ());
334
332
}
335
- UConverterUniquePtr ToConvDesc (ucnv_open (CSTo .str ().c_str (), &EC));
333
+ UConverterUniquePtr ToConvDesc (ucnv_open (To .str ().c_str (), &EC));
336
334
if (U_FAILURE (EC)) {
337
335
return std::error_code (errno, std::generic_category ());
338
336
}
@@ -341,7 +339,7 @@ ErrorOr<EncodingConverter> EncodingConverter::create(StringRef CSFrom,
341
339
std::move (ToConvDesc));
342
340
return EncodingConverter (std::move (Converter));
343
341
#elif HAVE_ICONV
344
- iconv_t ConvDesc = iconv_open (CSTo .str ().c_str (), CSFrom .str ().c_str ());
342
+ iconv_t ConvDesc = iconv_open (To .str ().c_str (), From .str ().c_str ());
345
343
if (ConvDesc == (iconv_t )-1 )
346
344
return std::error_code (errno, std::generic_category ());
347
345
return EncodingConverter (std::make_unique<EncodingConverterIconv>(ConvDesc));
0 commit comments