From 3ad4d24751d2e11c8b98dcac5e30720e138de788 Mon Sep 17 00:00:00 2001 From: Neutron3529 Date: Thu, 3 Nov 2022 16:35:37 +0800 Subject: [PATCH 1/3] Optimize the code to run faster. such code is copy from https://github.com/rust-lang/rust/blob/master/library/std/src/f32.rs and https://github.com/rust-lang/rust/blob/master/library/std/src/f64.rs using r+rhs.abs() is faster than calc it directly. Bench result: ``` $ cargo bench Compiling div-euclid v0.1.0 (/me/div-euclid) Finished bench [optimized] target(s) in 1.01s Running unittests src/lib.rs (target/release/deps/div_euclid-7a4530ca7817d1ef) running 7 tests test tests::it_works ... ignored test tests::bench_aaabs ... bench: 10,498,793 ns/iter (+/- 104,360) test tests::bench_aadefault ... bench: 11,061,862 ns/iter (+/- 94,107) test tests::bench_abs ... bench: 10,477,193 ns/iter (+/- 81,942) test tests::bench_default ... bench: 10,622,983 ns/iter (+/- 25,119) test tests::bench_zzabs ... bench: 10,481,971 ns/iter (+/- 43,787) test tests::bench_zzdefault ... bench: 11,074,976 ns/iter (+/- 29,633) test result: ok. 0 passed; 0 failed; 1 ignored; 6 measured; 0 filtered out; finished in 19.35s ``` bench code: ``` #![feature(test)] extern crate test; fn rem_euclid(a:i32,rhs:i32)->i32{ let r = a % rhs; if r < 0 { r + rhs.abs() } else { r } } #[cfg(test)] mod tests { use super::*; use test::Bencher; use rand::prelude::*; use rand::rngs::SmallRng; const N:i32=1000; #[test] fn it_works() { let a: i32 = 7; // or any other integer type let b = 4; let d:Vec=(-N..=N).collect(); let n:Vec=(-N..0).chain(1..=N).collect(); for i in &d { for j in &n { assert_eq!(i.rem_euclid(*j),rem_euclid(*i,*j)); } } assert_eq!(rem_euclid(a,b), 3); assert_eq!(rem_euclid(-a,b), 1); assert_eq!(rem_euclid(a,-b), 3); assert_eq!(rem_euclid(-a,-b), 1); } #[bench] fn bench_aaabs(b: &mut Bencher) { let mut d:Vec=(-N..=N).collect(); let mut n:Vec=(-N..0).chain(1..=N).collect(); let mut rng=SmallRng::from_seed([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,21]); n.shuffle(&mut rng); d.shuffle(&mut rng); n.shuffle(&mut rng); b.iter(||{ let mut res=0; for i in &d { for j in &n { res+=rem_euclid(*i,*j); } } res }); } #[bench] fn bench_aadefault(b: &mut Bencher) { let mut d:Vec=(-N..=N).collect(); let mut n:Vec=(-N..0).chain(1..=N).collect(); let mut rng=SmallRng::from_seed([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,21]); n.shuffle(&mut rng); d.shuffle(&mut rng); n.shuffle(&mut rng); b.iter(||{ let mut res=0; for i in &d { for j in &n { res+=i.rem_euclid(*j); } } res }); } #[bench] fn bench_abs(b: &mut Bencher) { let d:Vec=(-N..=N).collect(); let n:Vec=(-N..0).chain(1..=N).collect(); b.iter(||{ let mut res=0; for i in &d { for j in &n { res+=rem_euclid(*i,*j); } } res }); } #[bench] fn bench_default(b: &mut Bencher) { let d:Vec=(-N..=N).collect(); let n:Vec=(-N..0).chain(1..=N).collect(); b.iter(||{ let mut res=0; for i in &d { for j in &n { res+=i.rem_euclid(*j); } } res }); } #[bench] fn bench_zzabs(b: &mut Bencher) { let mut d:Vec=(-N..=N).collect(); let mut n:Vec=(-N..0).chain(1..=N).collect(); let mut rng=SmallRng::from_seed([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,21]); d.shuffle(&mut rng); n.shuffle(&mut rng); d.shuffle(&mut rng); b.iter(||{ let mut res=0; for i in &d { for j in &n { res+=rem_euclid(*i,*j); } } res }); } #[bench] fn bench_zzdefault(b: &mut Bencher) { let mut d:Vec=(-N..=N).collect(); let mut n:Vec=(-N..0).chain(1..=N).collect(); let mut rng=SmallRng::from_seed([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,21]); d.shuffle(&mut rng); n.shuffle(&mut rng); d.shuffle(&mut rng); b.iter(||{ let mut res=0; for i in &d { for j in &n { res+=i.rem_euclid(*j); } } res }); } } ``` --- library/core/src/num/int_macros.rs | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/library/core/src/num/int_macros.rs b/library/core/src/num/int_macros.rs index 81f050cb283d4..812deebce7a5e 100644 --- a/library/core/src/num/int_macros.rs +++ b/library/core/src/num/int_macros.rs @@ -2067,15 +2067,7 @@ macro_rules! int_impl { #[rustc_inherit_overflow_checks] pub const fn rem_euclid(self, rhs: Self) -> Self { let r = self % rhs; - if r < 0 { - if rhs < 0 { - r - rhs - } else { - r + rhs - } - } else { - r - } + if r < 0 { r + rhs.abs() } else { r } } /// Calculates the quotient of `self` and `rhs`, rounding the result towards negative infinity. From aafe6db079ab22225f89fc573e4e86e89e8d825a Mon Sep 17 00:00:00 2001 From: Neutron3529 Date: Thu, 3 Nov 2022 17:08:10 +0800 Subject: [PATCH 2/3] fix the overflow warning. benchmark result: ``` $ cargo bench Compiling div-euclid v0.1.0 (/me/div-euclid) Finished bench [optimized] target(s) in 1.01s Running unittests src/lib.rs (target/release/deps/div_euclid-7a4530ca7817d1ef) running 7 tests test tests::it_works ... ignored test tests::bench_aaabs ... bench: 10,498,793 ns/iter (+/- 104,360) test tests::bench_aadefault ... bench: 11,061,862 ns/iter (+/- 94,107) test tests::bench_abs ... bench: 10,477,193 ns/iter (+/- 81,942) test tests::bench_default ... bench: 10,622,983 ns/iter (+/- 25,119) test tests::bench_zzabs ... bench: 10,481,971 ns/iter (+/- 43,787) test tests::bench_zzdefault ... bench: 11,074,976 ns/iter (+/- 29,633) test result: ok. 0 passed; 0 failed; 1 ignored; 6 measured; 0 filtered out; finished in 19.35s ``` benchmark code: ```rust #![feature(test)] extern crate test; #[inline(always)] fn rem_euclid(a:i32,rhs:i32)->i32{ let r = a % rhs; if r < 0 { // if rhs is `integer::MIN`, rhs.wrapping_abs() == rhs.wrapping_abs, // thus r.wrapping_add(rhs.wrapping_abs()) == r.wrapping_add(rhs) == r - rhs, // which suits our need. // otherwise, rhs.wrapping_abs() == -rhs, which won't overflow since r is negative. r.wrapping_add(rhs.wrapping_abs()) } else { r } } #[cfg(test)] mod tests { use super::*; use test::Bencher; use rand::prelude::*; use rand::rngs::SmallRng; const N:i32=1000; #[test] fn it_works() { let a: i32 = 7; // or any other integer type let b = 4; let d:Vec=(-N..=N).collect(); let n:Vec=(-N..0).chain(1..=N).collect(); for i in &d { for j in &n { assert_eq!(i.rem_euclid(*j),rem_euclid(*i,*j)); } } assert_eq!(rem_euclid(a,b), 3); assert_eq!(rem_euclid(-a,b), 1); assert_eq!(rem_euclid(a,-b), 3); assert_eq!(rem_euclid(-a,-b), 1); } #[bench] fn bench_aaabs(b: &mut Bencher) { let mut d:Vec=(-N..=N).collect(); let mut n:Vec=(-N..0).chain(1..=N).collect(); let mut rng=SmallRng::from_seed([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,21]); n.shuffle(&mut rng); d.shuffle(&mut rng); n.shuffle(&mut rng); b.iter(||{ let mut res=0; for i in &d { for j in &n { res+=rem_euclid(*i,*j); } } res }); } #[bench] fn bench_aadefault(b: &mut Bencher) { let mut d:Vec=(-N..=N).collect(); let mut n:Vec=(-N..0).chain(1..=N).collect(); let mut rng=SmallRng::from_seed([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,21]); n.shuffle(&mut rng); d.shuffle(&mut rng); n.shuffle(&mut rng); b.iter(||{ let mut res=0; for i in &d { for j in &n { res+=i.rem_euclid(*j); } } res }); } #[bench] fn bench_abs(b: &mut Bencher) { let d:Vec=(-N..=N).collect(); let n:Vec=(-N..0).chain(1..=N).collect(); b.iter(||{ let mut res=0; for i in &d { for j in &n { res+=rem_euclid(*i,*j); } } res }); } #[bench] fn bench_default(b: &mut Bencher) { let d:Vec=(-N..=N).collect(); let n:Vec=(-N..0).chain(1..=N).collect(); b.iter(||{ let mut res=0; for i in &d { for j in &n { res+=i.rem_euclid(*j); } } res }); } #[bench] fn bench_zzabs(b: &mut Bencher) { let mut d:Vec=(-N..=N).collect(); let mut n:Vec=(-N..0).chain(1..=N).collect(); let mut rng=SmallRng::from_seed([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,21]); d.shuffle(&mut rng); n.shuffle(&mut rng); d.shuffle(&mut rng); b.iter(||{ let mut res=0; for i in &d { for j in &n { res+=rem_euclid(*i,*j); } } res }); } #[bench] fn bench_zzdefault(b: &mut Bencher) { let mut d:Vec=(-N..=N).collect(); let mut n:Vec=(-N..0).chain(1..=N).collect(); let mut rng=SmallRng::from_seed([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,21]); d.shuffle(&mut rng); n.shuffle(&mut rng); d.shuffle(&mut rng); b.iter(||{ let mut res=0; for i in &d { for j in &n { res+=i.rem_euclid(*j); } } res }); } } ``` --- library/core/src/num/int_macros.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/library/core/src/num/int_macros.rs b/library/core/src/num/int_macros.rs index 812deebce7a5e..36b6d6e774eaf 100644 --- a/library/core/src/num/int_macros.rs +++ b/library/core/src/num/int_macros.rs @@ -2067,7 +2067,15 @@ macro_rules! int_impl { #[rustc_inherit_overflow_checks] pub const fn rem_euclid(self, rhs: Self) -> Self { let r = self % rhs; - if r < 0 { r + rhs.abs() } else { r } + if r < 0 { + // if rhs is `integer::MIN`, rhs.wrapping_abs() == rhs.wrapping_abs, + // thus r.wrapping_add(rhs.wrapping_abs()) == r.wrapping_add(rhs) == r - rhs, + // which suits our need. + // otherwise, rhs.wrapping_abs() == -rhs, which won't overflow since r is negative. + r.wrapping_add(rhs.wrapping_abs()) + } else { + r + } } /// Calculates the quotient of `self` and `rhs`, rounding the result towards negative infinity. From d81a0e9e2df9d6a693d7eef362c0aed393e1bf2e Mon Sep 17 00:00:00 2001 From: Neutron3529 Date: Fri, 4 Nov 2022 15:37:33 +0800 Subject: [PATCH 3/3] update comment --- library/core/src/num/int_macros.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/library/core/src/num/int_macros.rs b/library/core/src/num/int_macros.rs index 36b6d6e774eaf..a6deb3f65ec5d 100644 --- a/library/core/src/num/int_macros.rs +++ b/library/core/src/num/int_macros.rs @@ -2068,10 +2068,14 @@ macro_rules! int_impl { pub const fn rem_euclid(self, rhs: Self) -> Self { let r = self % rhs; if r < 0 { - // if rhs is `integer::MIN`, rhs.wrapping_abs() == rhs.wrapping_abs, - // thus r.wrapping_add(rhs.wrapping_abs()) == r.wrapping_add(rhs) == r - rhs, - // which suits our need. - // otherwise, rhs.wrapping_abs() == -rhs, which won't overflow since r is negative. + // Semantically equivalent to `if rhs < 0 { r - rhs } else { r + rhs }`. + // If `rhs` is not `Self::MIN`, then `r + abs(rhs)` will not overflow + // and is clearly equivalent, because `r` is negative. + // Otherwise, `rhs` is `Self::MIN`, then we have + // `r.wrapping_add(Self::MIN.wrapping_abs())`, which evaluates + // to `r.wrapping_add(Self::MIN)`, which is equivalent to + // `r - Self::MIN`, which is what we wanted (and will not overflow + // for negative `r`). r.wrapping_add(rhs.wrapping_abs()) } else { r