Open
Description
I tried this code:
use std::ops::Range;
#[derive(Debug, PartialEq, Eq)]
pub enum EscapeError {
/// Raw '\r' encountered.
BareCarriageReturn,
/// Raw '\r' encountered in raw string.
BareCarriageReturnInRawString,
}
pub fn check_raw_str_while(
src: &str,
mut callback: impl FnMut(Range<usize>, Result<char, EscapeError>),
) {
let mut chars = src.chars();
while let Some(c) = chars.next() {
let start = src.len() - chars.as_str().len() - c.len_utf8();
let res = match c {
'\r' => Err(EscapeError::BareCarriageReturn),
_ => Ok(c),
};
let end = src.len() - chars.as_str().len();
callback(start..end, res);
}
}
pub fn check_raw_str_char_indices(
src: &str,
mut callback: impl FnMut(Range<usize>, Result<char, EscapeError>),
) {
src.char_indices().for_each(|(pos, c)| {
callback(
pos..pos + c.len_utf8(),
if c == '\r' {
Err(EscapeError::BareCarriageReturn)
} else {
Ok(c)
},
);
});
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn same() {
let s = "abcdefghijklmnopqrstuvwxyz0123456789";
let (mut r1, mut r2) = (vec![], vec![]);
check_raw_str_while(&s, |r, c| r1.push((r, c)));
check_raw_str_char_indices(&s, |r, c| r2.push((r, c)));
assert_eq!(r1, r2);
}
}
with these benches:
#![feature(test)]
extern crate test;
use bench_char_indices::*;
use std::iter::repeat_n;
const LEN: usize = 10_000;
macro_rules! fn_bench_check_raw {
($name:ident, $unit:ty, $check_raw:ident) => {
fn $name(b: &mut test::Bencher, s: &str, expected: $unit) {
let input: String = test::black_box(repeat_n(s, LEN).collect());
assert_eq!(input.len(), LEN * s.len());
b.iter(|| {
let mut output = vec![];
$check_raw(&input, |range, res| output.push((range, res)));
assert_eq!(output.len(), LEN);
assert_eq!(output[0], ((0..s.len()), Ok(expected)));
});
}
};
}
fn_bench_check_raw!(bench_check_raw_str_while, char, check_raw_str_while);
fn_bench_check_raw!(
bench_check_raw_str_char_indices,
char,
check_raw_str_char_indices
);
#[bench]
fn bench_check_raw_str_ascii_while(b: &mut test::Bencher) {
bench_check_raw_str_while(b, "a", 'a');
}
#[bench]
fn bench_check_raw_str_ascii_char_indices(b: &mut test::Bencher) {
bench_check_raw_str_char_indices(b, "a", 'a');
}
I expected to see this happen: NO performance difference
Instead, there is a more than 10% difference:
test bench_check_raw_str_ascii_char_indices ... bench: 27,733.96 ns/iter (+/- 103.24)
test bench_check_raw_str_ascii_while ... bench: 24,525.01 ns/iter (+/- 582.28)
Tested on: rustc 1.88.0-nightly (2e6882a 2025-05-05)