diff --git a/.gitignore b/.gitignore
index b203ea61f..3fc2fde3f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,7 @@
+/defined_symbols.txt
+/undefined_symbols.txt
+/code.tar.gz
+/compiler-rt
 *.rs.bk
 Cargo.lock
 target
diff --git a/ci/run.sh b/ci/run.sh
index 6376d2216..2e3993a38 100755
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -42,9 +42,9 @@ if [ "$NM" = "" ]; then
 fi
 
 if [ -d /target ]; then
-    path=/target/${1}/debug/deps/libcompiler_builtins-*.rlib
+    path=/target/${1}/release/deps/libcompiler_builtins-*.rlib
 else
-    path=target/${1}/debug/deps/libcompiler_builtins-*.rlib
+    path=target/${1}/release/deps/libcompiler_builtins-*.rlib
 fi
 
 # Look out for duplicated symbols when we include the compiler-rt (C) implementation
@@ -80,10 +80,13 @@ rm -f $path
 
 # Verify that we haven't drop any intrinsic/symbol
 build_intrinsics="$cargo build --target $1 -v --example intrinsics"
-RUSTFLAGS="-C debug-assertions=no" $build_intrinsics
-RUSTFLAGS="-C debug-assertions=no" $build_intrinsics --release
-RUSTFLAGS="-C debug-assertions=no" $build_intrinsics --features c
-RUSTFLAGS="-C debug-assertions=no" $build_intrinsics --features c --release
+# Match the behavior with 
+# https://github.com/rust-lang/rust/blob/e49442bf9815a67e68f9d9f2f80560ec1d754b31/Cargo.toml#L66-L86.
+export RUSTFLAGS="-C debug-assertions=no -C overflow-checks=no -C codegen-units=10000"
+$build_intrinsics
+$build_intrinsics --release
+$build_intrinsics --features c
+$build_intrinsics --features c --release
 
 # Verify that there are no undefined symbols to `panic` within our
 # implementations
@@ -93,15 +96,18 @@ if [ -z "$DEBUG_LTO_BUILD_DOESNT_WORK" ]; then
   RUSTFLAGS="-C debug-assertions=no" \
     CARGO_INCREMENTAL=0 \
     CARGO_PROFILE_DEV_LTO=true \
-    $cargo rustc --features "$INTRINSICS_FEATURES" --target $1 --example intrinsics
+    $cargo rustc --features "$INTRINSICS_FEATURES" --target $1 --example intrinsics --release
 fi
 CARGO_PROFILE_RELEASE_LTO=true \
   $cargo rustc --features "$INTRINSICS_FEATURES" --target $1 --example intrinsics --release
+unset RUSTFLAGS
 
-# Ensure no references to a panicking function
+# Ensure no references to any symbols from core
 for rlib in $(echo $path); do
     set +ex
-    $NM -u $rlib 2>&1 | grep panicking
+    $NM --quiet -U $rlib | grep 'T _ZN4core' | awk '{print $3}' | sort | uniq > defined_symbols.txt
+    $NM --quiet -u $rlib | grep 'U _ZN4core' | awk '{print $2}' | sort | uniq > undefined_symbols.txt
+    grep -v -F -x -f defined_symbols.txt undefined_symbols.txt
 
     if test $? = 0; then
         exit 1
diff --git a/src/float/conv.rs b/src/float/conv.rs
index 790c0ab9f..d74db5e97 100644
--- a/src/float/conv.rs
+++ b/src/float/conv.rs
@@ -5,6 +5,7 @@
 ///
 /// The algorithm is explained here: <https://blog.m-ou.se/floats/>
 mod int_to_float {
+    #[rustc_nounwind]
     pub fn u32_to_f32_bits(i: u32) -> u32 {
         if i == 0 {
             return 0;
@@ -17,6 +18,7 @@ mod int_to_float {
         (e << 23) + m // + not |, so the mantissa can overflow into the exponent.
     }
 
+    #[rustc_nounwind]
     pub fn u32_to_f64_bits(i: u32) -> u64 {
         if i == 0 {
             return 0;
@@ -27,6 +29,7 @@ mod int_to_float {
         (e << 52) + m // Bit 53 of m will overflow into e.
     }
 
+    #[rustc_nounwind]
     pub fn u64_to_f32_bits(i: u64) -> u32 {
         let n = i.leading_zeros();
         let y = i.wrapping_shl(n);
@@ -37,6 +40,7 @@ mod int_to_float {
         (e << 23) + m // + not |, so the mantissa can overflow into the exponent.
     }
 
+    #[rustc_nounwind]
     pub fn u64_to_f64_bits(i: u64) -> u64 {
         if i == 0 {
             return 0;
@@ -49,6 +53,7 @@ mod int_to_float {
         (e << 52) + m // + not |, so the mantissa can overflow into the exponent.
     }
 
+    #[rustc_nounwind]
     pub fn u128_to_f32_bits(i: u128) -> u32 {
         let n = i.leading_zeros();
         let y = i.wrapping_shl(n);
@@ -59,6 +64,7 @@ mod int_to_float {
         (e << 23) + m // + not |, so the mantissa can overflow into the exponent.
     }
 
+    #[rustc_nounwind]
     pub fn u128_to_f64_bits(i: u128) -> u64 {
         let n = i.leading_zeros();
         let y = i.wrapping_shl(n);
diff --git a/src/float/mod.rs b/src/float/mod.rs
index fdbe9dde3..b9f34389e 100644
--- a/src/float/mod.rs
+++ b/src/float/mod.rs
@@ -120,12 +120,15 @@ macro_rules! float_impl {
             const IMPLICIT_BIT: Self::Int = 1 << Self::SIGNIFICAND_BITS;
             const EXPONENT_MASK: Self::Int = !(Self::SIGN_MASK | Self::SIGNIFICAND_MASK);
 
+            #[rustc_nounwind]
             fn repr(self) -> Self::Int {
                 self.to_bits()
             }
+            #[rustc_nounwind]
             fn signed_repr(self) -> Self::SignedInt {
                 self.to_bits() as Self::SignedInt
             }
+            #[rustc_nounwind]
             fn eq_repr(self, rhs: Self) -> bool {
                 if self.is_nan() && rhs.is_nan() {
                     true
@@ -133,21 +136,27 @@ macro_rules! float_impl {
                     self.repr() == rhs.repr()
                 }
             }
+            #[rustc_nounwind]
             fn sign(self) -> bool {
                 self.signed_repr() < Self::SignedInt::ZERO
             }
+            #[rustc_nounwind]
             fn exp(self) -> Self::ExpInt {
                 ((self.to_bits() & Self::EXPONENT_MASK) >> Self::SIGNIFICAND_BITS) as Self::ExpInt
             }
+            #[rustc_nounwind]
             fn frac(self) -> Self::Int {
                 self.to_bits() & Self::SIGNIFICAND_MASK
             }
+            #[rustc_nounwind]
             fn imp_frac(self) -> Self::Int {
                 self.frac() | Self::IMPLICIT_BIT
             }
+            #[rustc_nounwind]
             fn from_repr(a: Self::Int) -> Self {
                 Self::from_bits(a)
             }
+            #[rustc_nounwind]
             fn from_parts(sign: bool, exponent: Self::Int, significand: Self::Int) -> Self {
                 Self::from_repr(
                     ((sign as Self::Int) << (Self::BITS - 1))
@@ -155,6 +164,7 @@ macro_rules! float_impl {
                         | (significand & Self::SIGNIFICAND_MASK),
                 )
             }
+            #[rustc_nounwind]
             fn normalize(significand: Self::Int) -> (i32, Self::Int) {
                 let shift = significand
                     .leading_zeros()
@@ -164,6 +174,7 @@ macro_rules! float_impl {
                     significand << shift as Self::Int,
                 )
             }
+            #[rustc_nounwind]
             fn is_subnormal(self) -> bool {
                 (self.repr() & Self::EXPONENT_MASK) == Self::Int::ZERO
             }
diff --git a/src/int/mod.rs b/src/int/mod.rs
index 509f9fdae..e355e52bb 100644
--- a/src/int/mod.rs
+++ b/src/int/mod.rs
@@ -151,50 +151,62 @@ macro_rules! int_impl_common {
             }
         };
 
+        #[rustc_nounwind]
         fn from_bool(b: bool) -> Self {
             b as $ty
         }
 
+        #[rustc_nounwind]
         fn logical_shr(self, other: u32) -> Self {
             Self::from_unsigned(self.unsigned().wrapping_shr(other))
         }
 
+        #[rustc_nounwind]
         fn is_zero(self) -> bool {
             self == Self::ZERO
         }
 
+        #[rustc_nounwind]
         fn wrapping_neg(self) -> Self {
             <Self>::wrapping_neg(self)
         }
 
+        #[rustc_nounwind]
         fn wrapping_add(self, other: Self) -> Self {
             <Self>::wrapping_add(self, other)
         }
 
+        #[rustc_nounwind]
         fn wrapping_mul(self, other: Self) -> Self {
             <Self>::wrapping_mul(self, other)
         }
 
+        #[rustc_nounwind]
         fn wrapping_sub(self, other: Self) -> Self {
             <Self>::wrapping_sub(self, other)
         }
 
+        #[rustc_nounwind]
         fn wrapping_shl(self, other: u32) -> Self {
             <Self>::wrapping_shl(self, other)
         }
 
+        #[rustc_nounwind]
         fn wrapping_shr(self, other: u32) -> Self {
             <Self>::wrapping_shr(self, other)
         }
 
+        #[rustc_nounwind]
         fn rotate_left(self, other: u32) -> Self {
             <Self>::rotate_left(self, other)
         }
 
+        #[rustc_nounwind]
         fn overflowing_add(self, other: Self) -> (Self, bool) {
             <Self>::overflowing_add(self, other)
         }
 
+        #[rustc_nounwind]
         fn leading_zeros(self) -> u32 {
             <Self>::leading_zeros(self)
         }
diff --git a/src/int/specialized_div_rem/asymmetric.rs b/src/int/specialized_div_rem/asymmetric.rs
index 56ce188a3..9398daad0 100644
--- a/src/int/specialized_div_rem/asymmetric.rs
+++ b/src/int/specialized_div_rem/asymmetric.rs
@@ -14,9 +14,13 @@ macro_rules! impl_asymmetric {
         $uH:ident, // unsigned integer with half the bit width of $uX
         $uX:ident, // unsigned integer with half the bit width of $uD
         $uD:ident // unsigned integer type for the inputs and outputs of `$fn`
+        $(, $fun_attr:meta)* // attributes for the function
     ) => {
         /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a
         /// tuple.
+        $(
+            #[$fun_attr]
+        )*
         pub fn $fn(duo: $uD, div: $uD) -> ($uD, $uD) {
             let n: u32 = $n_h * 2;
 
diff --git a/src/int/specialized_div_rem/delegate.rs b/src/int/specialized_div_rem/delegate.rs
index 330c6e4f8..5e82faa4b 100644
--- a/src/int/specialized_div_rem/delegate.rs
+++ b/src/int/specialized_div_rem/delegate.rs
@@ -14,9 +14,13 @@ macro_rules! impl_delegate {
         $uX:ident, // unsigned integer with half the bit width of $uD.
         $uD:ident, // unsigned integer type for the inputs and outputs of `$fn`
         $iD:ident // signed integer type with the same bitwidth as `$uD`
+        $(, $fun_attr:meta)* // attributes for the function
     ) => {
         /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a
         /// tuple.
+        $(
+            #[$fun_attr]
+        )*
         pub fn $fn(duo: $uD, div: $uD) -> ($uD, $uD) {
             // The two possibility algorithm, undersubtracting long division algorithm, or any kind
             // of reciprocal based algorithm will not be fastest, because they involve large
diff --git a/src/int/specialized_div_rem/mod.rs b/src/int/specialized_div_rem/mod.rs
index 760f5f5b7..ad70de380 100644
--- a/src/int/specialized_div_rem/mod.rs
+++ b/src/int/specialized_div_rem/mod.rs
@@ -110,7 +110,8 @@ impl_normalization_shift!(
     32,
     u32,
     i32,
-    allow(dead_code)
+    allow(dead_code),
+    rustc_nounwind
 );
 impl_normalization_shift!(
     u64_normalization_shift,
@@ -118,7 +119,8 @@ impl_normalization_shift!(
     64,
     u64,
     i64,
-    allow(dead_code)
+    allow(dead_code),
+    rustc_nounwind
 );
 
 /// Divides `duo` by `div` and returns a tuple of the quotient and the remainder.
@@ -149,7 +151,8 @@ impl_trifecta!(
     32,
     u32,
     u64,
-    u128
+    u128,
+    rustc_nounwind
 );
 
 // If the pointer width less than 64, then the target architecture almost certainly does not have
@@ -168,7 +171,8 @@ impl_delegate!(
     u32,
     u64,
     u128,
-    i128
+    i128,
+    rustc_nounwind
 );
 
 /// Divides `duo` by `div` and returns a tuple of the quotient and the remainder.
@@ -209,7 +213,8 @@ impl_asymmetric!(
     32,
     u32,
     u64,
-    u128
+    u128,
+    rustc_nounwind
 );
 
 /// Divides `duo` by `div` and returns a tuple of the quotient and the remainder.
@@ -241,7 +246,8 @@ impl_delegate!(
     u16,
     u32,
     u64,
-    i64
+    i64,
+    rustc_nounwind
 );
 
 // When not on x86 and the pointer width is 64, use `binary_long`.
@@ -255,7 +261,8 @@ impl_binary_long!(
     u64_normalization_shift,
     64,
     u64,
-    i64
+    i64,
+    rustc_nounwind
 );
 
 /// Divides `duo` by `div` and returns a tuple of the quotient and the remainder.
@@ -296,7 +303,8 @@ impl_asymmetric!(
     16,
     u16,
     u32,
-    u64
+    u64,
+    rustc_nounwind
 );
 
 // 32 bits is the smallest division used by `compiler-builtins`, so we end with binary long division
@@ -307,5 +315,6 @@ impl_binary_long!(
     32,
     u32,
     i32,
-    allow(dead_code)
+    allow(dead_code),
+    rustc_nounwind
 );
diff --git a/src/int/specialized_div_rem/trifecta.rs b/src/int/specialized_div_rem/trifecta.rs
index 7e104053b..ecb0bcd74 100644
--- a/src/int/specialized_div_rem/trifecta.rs
+++ b/src/int/specialized_div_rem/trifecta.rs
@@ -12,9 +12,13 @@ macro_rules! impl_trifecta {
         $uH:ident, // unsigned integer with half the bit width of $uX
         $uX:ident, // unsigned integer with half the bit width of $uD
         $uD:ident // unsigned integer type for the inputs and outputs of `$unsigned_name`
+        $(, $fun_attr:meta)* // attributes for the function
     ) => {
         /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a
         /// tuple.
+        $(
+            #[$fun_attr]
+        )*
         pub fn $fn(duo: $uD, div: $uD) -> ($uD, $uD) {
             // This is called the trifecta algorithm because it uses three main algorithms: short
             // division for small divisors, the two possibility algorithm for large divisors, and an
diff --git a/src/lib.rs b/src/lib.rs
index 47aef540e..2d2d20d4e 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -12,6 +12,7 @@
 #![feature(linkage)]
 #![feature(naked_functions)]
 #![feature(repr_simd)]
+#![feature(rustc_attrs)]
 #![no_builtins]
 #![no_std]
 #![allow(unused_features)]
diff --git a/src/mem/x86_64.rs b/src/mem/x86_64.rs
index 40b67093f..4fa080c1e 100644
--- a/src/mem/x86_64.rs
+++ b/src/mem/x86_64.rs
@@ -304,6 +304,7 @@ pub unsafe fn c_string_length(mut s: *const core::ffi::c_char) -> usize {
 }
 
 /// Determine optimal parameters for a `rep` instruction.
+#[rustc_nounwind]
 fn rep_param(dest: *mut u8, mut count: usize) -> (usize, usize, usize) {
     // Unaligned writes are still slow on modern processors, so align the destination address.
     let pre_byte_count = ((8 - (dest as usize & 0b111)) & 0b111).min(count);