Skip to content

Commit de3b2a0

Browse files
committed
Perform Utf8ToUnicode transformation in-place
- Removed inplace helper function from the class, as it's only referenced by the implementation.
1 parent af8be01 commit de3b2a0

File tree

2 files changed

+29
-65
lines changed

2 files changed

+29
-65
lines changed

src/actions/transformations/utf8_to_unicode.cc

Lines changed: 29 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -20,67 +20,33 @@
2020
#include "src/utils/string.h"
2121

2222

23-
namespace modsecurity::actions::transformations {
24-
25-
26-
bool Utf8ToUnicode::transform(std::string &value, const Transaction *trans) const {
27-
std::string ret;
28-
unsigned char *input;
29-
int _changed = 0;
30-
char *out;
31-
32-
input = reinterpret_cast<unsigned char *>
33-
(malloc(sizeof(char) * value.length()+1));
23+
constexpr int UNICODE_ERROR_CHARACTERS_MISSING = -1;
24+
constexpr int UNICODE_ERROR_INVALID_ENCODING = -2;
3425

35-
if (input == NULL) {
36-
return "";
37-
}
38-
39-
memcpy(input, value.c_str(), value.length()+1);
4026

41-
out = inplace(input, value.size() + 1, &_changed);
42-
free(input);
43-
if (out != NULL) {
44-
ret.assign(reinterpret_cast<char *>(out),
45-
strlen(reinterpret_cast<char *>(out)));
46-
free(out);
47-
}
27+
namespace modsecurity::actions::transformations {
4828

49-
const auto changed = ret != value;
50-
value = ret;
51-
return changed;
52-
}
5329

30+
static inline bool encode(std::string &value) {
31+
auto input = reinterpret_cast<unsigned char*>(value.data());
32+
const auto input_len = value.length();
5433

55-
char *Utf8ToUnicode::inplace(unsigned char *input,
56-
uint64_t input_len, int *changed) {
57-
unsigned int count = 0;
58-
char *data;
59-
char *data_orig;
60-
unsigned int i, len, j;
61-
unsigned int bytes_left = input_len;
34+
bool changed = false;
35+
std::string::size_type count = 0;
36+
auto bytes_left = input_len;
6237
unsigned char unicode[8];
63-
*changed = 0;
6438

6539
/* RFC3629 states that UTF-8 are encoded using sequences of 1 to 4 octets. */
6640
/* Max size per character should fit in 4 bytes */
67-
len = input_len * 4 + 1;
68-
data = reinterpret_cast<char *>(malloc(sizeof(char) * len));
69-
if (data == NULL) {
70-
return NULL;
71-
}
72-
data_orig = data;
41+
const auto len = input_len * 4 + 1;
42+
std::string ret(len, {});
43+
auto data = ret.data();
7344

74-
if (input == NULL) {
75-
free(data);
76-
return NULL;
77-
}
78-
79-
for (i = 0; i < bytes_left;) {
45+
for (std::string::size_type i = 0; i < bytes_left;) {
8046
int unicode_len = 0;
8147
unsigned int d = 0;
8248
unsigned char c;
83-
unsigned char *utf = (unsigned char *)&input[i];
49+
auto utf = &input[i];
8450

8551
c = *utf;
8652

@@ -108,7 +74,7 @@ char *Utf8ToUnicode::inplace(unsigned char *input,
10874
unicode_len = UNICODE_ERROR_INVALID_ENCODING;
10975
} else {
11076
unicode_len = 2;
111-
count+=6;
77+
count += 6;
11278
if (count <= len) {
11379
int length = 0;
11480
/* compute character number */
@@ -138,11 +104,11 @@ char *Utf8ToUnicode::inplace(unsigned char *input,
138104
break;
139105
}
140106

141-
for (j = 0; j < length; j++) {
107+
for (std::string::size_type j = 0; j < length; j++) {
142108
*data++ = unicode[j];
143109
}
144110

145-
*changed = 1;
111+
changed = true;
146112
}
147113
}
148114
} else if ((c & 0xF0) == 0xE0) {
@@ -190,11 +156,11 @@ char *Utf8ToUnicode::inplace(unsigned char *input,
190156
break;
191157
}
192158

193-
for (j = 0; j < length; j++) {
159+
for (std::string::size_type j = 0; j < length; j++) {
194160
*data++ = unicode[j];
195161
}
196162

197-
*changed = 1;
163+
changed = true;
198164
}
199165
}
200166
} else if ((c & 0xF8) == 0xF0) {
@@ -252,11 +218,11 @@ char *Utf8ToUnicode::inplace(unsigned char *input,
252218
break;
253219
}
254220

255-
for (j = 0; j < length; j++) {
221+
for (std::string::size_type j = 0; j < length; j++) {
256222
*data++ = unicode[j];
257223
}
258224

259-
*changed = 1;
225+
changed = true;
260226
}
261227
}
262228
} else {
@@ -300,7 +266,14 @@ char *Utf8ToUnicode::inplace(unsigned char *input,
300266

301267
*data ='\0';
302268

303-
return data_orig;
269+
ret.resize(data - ret.c_str());
270+
std::swap(value, ret);
271+
return changed;
272+
}
273+
274+
275+
bool Utf8ToUnicode::transform(std::string &value, const Transaction *trans) const {
276+
return encode(value);
304277
}
305278

306279

src/actions/transformations/utf8_to_unicode.h

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,6 @@
1818

1919
#include "transformation.h"
2020

21-
#define UNICODE_ERROR_CHARACTERS_MISSING -1
22-
#define UNICODE_ERROR_INVALID_ENCODING -2
23-
#define UNICODE_ERROR_OVERLONG_CHARACTER -3
24-
#define UNICODE_ERROR_RESTRICTED_CHARACTER -4
25-
#define UNICODE_ERROR_DECODING_ERROR -5
26-
2721
namespace modsecurity::actions::transformations {
2822

2923
class Utf8ToUnicode : public Transformation {
@@ -32,9 +26,6 @@ class Utf8ToUnicode : public Transformation {
3226
: Transformation(action) { }
3327

3428
bool transform(std::string &value, const Transaction *trans) const override;
35-
36-
static char *inplace(unsigned char *input, uint64_t input_len,
37-
int *changed);
3829
};
3930

4031
} // namespace modsecurity::actions::transformations

0 commit comments

Comments
 (0)