Skip to content

Commit c5b9ffd

Browse files
committed
rollup merge of #20391: daramos/utf8_lossy
Prior to 9bae6ec from_utf8_lossy had a minor optimization in place that avoided having to loop from the beginning of the input slice. Recently 4908017 implemented Utf8Error::InvalidByte which makes this possible again.
2 parents e80b981 + 8aeefbb commit c5b9ffd

File tree

1 file changed

+7
-3
lines changed

1 file changed

+7
-3
lines changed

src/libcollections/string.rs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -143,14 +143,18 @@ impl String {
143143
/// ```
144144
#[stable]
145145
pub fn from_utf8_lossy<'a>(v: &'a [u8]) -> CowString<'a> {
146+
let mut i = 0;
146147
match str::from_utf8(v) {
147148
Ok(s) => return Cow::Borrowed(s),
148-
Err(..) => {}
149+
Err(e) => {
150+
if let Utf8Error::InvalidByte(firstbad) = e {
151+
i = firstbad;
152+
}
153+
}
149154
}
150155

151156
static TAG_CONT_U8: u8 = 128u8;
152157
static REPLACEMENT: &'static [u8] = b"\xEF\xBF\xBD"; // U+FFFD in UTF-8
153-
let mut i = 0;
154158
let total = v.len();
155159
fn unsafe_get(xs: &[u8], i: uint) -> u8 {
156160
unsafe { *xs.get_unchecked(i) }
@@ -174,7 +178,7 @@ impl String {
174178
// subseqidx is the index of the first byte of the subsequence we're looking at.
175179
// It's used to copy a bunch of contiguous good codepoints at once instead of copying
176180
// them one by one.
177-
let mut subseqidx = 0;
181+
let mut subseqidx = i;
178182

179183
while i < total {
180184
let i_ = i;

0 commit comments

Comments
 (0)