|
20 | 20 | #include "src/utils/string.h"
|
21 | 21 |
|
22 | 22 |
|
23 |
| -namespace modsecurity::actions::transformations { |
24 |
| - |
25 |
| - |
26 |
| -bool Utf8ToUnicode::transform(std::string &value, const Transaction *trans) const { |
27 |
| - std::string ret; |
28 |
| - unsigned char *input; |
29 |
| - int _changed = 0; |
30 |
| - char *out; |
31 |
| - |
32 |
| - input = reinterpret_cast<unsigned char *> |
33 |
| - (malloc(sizeof(char) * value.length()+1)); |
| 23 | +constexpr int UNICODE_ERROR_CHARACTERS_MISSING = -1; |
| 24 | +constexpr int UNICODE_ERROR_INVALID_ENCODING = -2; |
34 | 25 |
|
35 |
| - if (input == NULL) { |
36 |
| - return ""; |
37 |
| - } |
38 |
| - |
39 |
| - memcpy(input, value.c_str(), value.length()+1); |
40 | 26 |
|
41 |
| - out = inplace(input, value.size() + 1, &_changed); |
42 |
| - free(input); |
43 |
| - if (out != NULL) { |
44 |
| - ret.assign(reinterpret_cast<char *>(out), |
45 |
| - strlen(reinterpret_cast<char *>(out))); |
46 |
| - free(out); |
47 |
| - } |
| 27 | +namespace modsecurity::actions::transformations { |
48 | 28 |
|
49 |
| - const auto changed = ret != value; |
50 |
| - value = ret; |
51 |
| - return changed; |
52 |
| -} |
53 | 29 |
|
| 30 | +static inline bool encode(std::string &value) { |
| 31 | + auto input = reinterpret_cast<unsigned char*>(value.data()); |
| 32 | + const auto input_len = value.length(); |
54 | 33 |
|
55 |
| -char *Utf8ToUnicode::inplace(unsigned char *input, |
56 |
| - uint64_t input_len, int *changed) { |
57 |
| - unsigned int count = 0; |
58 |
| - char *data; |
59 |
| - char *data_orig; |
60 |
| - unsigned int i, len, j; |
61 |
| - unsigned int bytes_left = input_len; |
| 34 | + bool changed = false; |
| 35 | + std::string::size_type count = 0; |
| 36 | + auto bytes_left = input_len; |
62 | 37 | unsigned char unicode[8];
|
63 |
| - *changed = 0; |
64 | 38 |
|
65 | 39 | /* RFC3629 states that UTF-8 are encoded using sequences of 1 to 4 octets. */
|
66 | 40 | /* Max size per character should fit in 4 bytes */
|
67 |
| - len = input_len * 4 + 1; |
68 |
| - data = reinterpret_cast<char *>(malloc(sizeof(char) * len)); |
69 |
| - if (data == NULL) { |
70 |
| - return NULL; |
71 |
| - } |
72 |
| - data_orig = data; |
| 41 | + const auto len = input_len * 4 + 1; |
| 42 | + std::string ret(len, {}); |
| 43 | + auto data = ret.data(); |
73 | 44 |
|
74 |
| - if (input == NULL) { |
75 |
| - free(data); |
76 |
| - return NULL; |
77 |
| - } |
78 |
| - |
79 |
| - for (i = 0; i < bytes_left;) { |
| 45 | + for (std::string::size_type i = 0; i < bytes_left;) { |
80 | 46 | int unicode_len = 0;
|
81 | 47 | unsigned int d = 0;
|
82 | 48 | unsigned char c;
|
83 |
| - unsigned char *utf = (unsigned char *)&input[i]; |
| 49 | + auto utf = &input[i]; |
84 | 50 |
|
85 | 51 | c = *utf;
|
86 | 52 |
|
@@ -108,7 +74,7 @@ char *Utf8ToUnicode::inplace(unsigned char *input,
|
108 | 74 | unicode_len = UNICODE_ERROR_INVALID_ENCODING;
|
109 | 75 | } else {
|
110 | 76 | unicode_len = 2;
|
111 |
| - count+=6; |
| 77 | + count += 6; |
112 | 78 | if (count <= len) {
|
113 | 79 | int length = 0;
|
114 | 80 | /* compute character number */
|
@@ -138,11 +104,11 @@ char *Utf8ToUnicode::inplace(unsigned char *input,
|
138 | 104 | break;
|
139 | 105 | }
|
140 | 106 |
|
141 |
| - for (j = 0; j < length; j++) { |
| 107 | + for (std::string::size_type j = 0; j < length; j++) { |
142 | 108 | *data++ = unicode[j];
|
143 | 109 | }
|
144 | 110 |
|
145 |
| - *changed = 1; |
| 111 | + changed = true; |
146 | 112 | }
|
147 | 113 | }
|
148 | 114 | } else if ((c & 0xF0) == 0xE0) {
|
@@ -190,11 +156,11 @@ char *Utf8ToUnicode::inplace(unsigned char *input,
|
190 | 156 | break;
|
191 | 157 | }
|
192 | 158 |
|
193 |
| - for (j = 0; j < length; j++) { |
| 159 | + for (std::string::size_type j = 0; j < length; j++) { |
194 | 160 | *data++ = unicode[j];
|
195 | 161 | }
|
196 | 162 |
|
197 |
| - *changed = 1; |
| 163 | + changed = true; |
198 | 164 | }
|
199 | 165 | }
|
200 | 166 | } else if ((c & 0xF8) == 0xF0) {
|
@@ -252,11 +218,11 @@ char *Utf8ToUnicode::inplace(unsigned char *input,
|
252 | 218 | break;
|
253 | 219 | }
|
254 | 220 |
|
255 |
| - for (j = 0; j < length; j++) { |
| 221 | + for (std::string::size_type j = 0; j < length; j++) { |
256 | 222 | *data++ = unicode[j];
|
257 | 223 | }
|
258 | 224 |
|
259 |
| - *changed = 1; |
| 225 | + changed = true; |
260 | 226 | }
|
261 | 227 | }
|
262 | 228 | } else {
|
@@ -300,7 +266,14 @@ char *Utf8ToUnicode::inplace(unsigned char *input,
|
300 | 266 |
|
301 | 267 | *data ='\0';
|
302 | 268 |
|
303 |
| - return data_orig; |
| 269 | + ret.resize(data - ret.c_str()); |
| 270 | + std::swap(value, ret); |
| 271 | + return changed; |
| 272 | +} |
| 273 | + |
| 274 | + |
| 275 | +bool Utf8ToUnicode::transform(std::string &value, const Transaction *trans) const { |
| 276 | + return encode(value); |
304 | 277 | }
|
305 | 278 |
|
306 | 279 |
|
|
0 commit comments