From 89ee040161d54dce3cecdfbb223ddedc59c8d3e5 Mon Sep 17 00:00:00 2001 From: Filipe Rodrigues Date: Sun, 11 Oct 2020 20:29:09 +0100 Subject: [PATCH 1/4] Added global allow lint for `unused_unsafe` in preparation for feature `unsafe_block_in_unsafe_fn`. Documented all unsafe uses in `ascii_char` module. --- src/ascii_char.rs | 76 ++++++++++++++++++++++++++++++++++------------- src/lib.rs | 5 +++- 2 files changed, 59 insertions(+), 22 deletions(-) diff --git a/src/ascii_char.rs b/src/ascii_char.rs index c595e3a..7dce2be 100644 --- a/src/ascii_char.rs +++ b/src/ascii_char.rs @@ -367,7 +367,8 @@ impl AsciiChar { /// and `Some(AsciiChar::from_ascii_unchecked(128))` might be `None`. #[inline] pub unsafe fn from_ascii_unchecked(ch: u8) -> Self { - ch.to_ascii_char_unchecked() + // SAFETY: Caller guarantees `ch` is within bounds of ascii. + unsafe { ch.to_ascii_char_unchecked() } } /// Converts an ASCII character into a `u8`. @@ -628,12 +629,20 @@ impl AsciiChar { /// assert_eq!(AsciiChar::new('p').as_printable_char(), 'p'); /// ``` pub fn as_printable_char(self) -> char { - unsafe { - match self as u8 { - b' '..=b'~' => self.as_char(), - 127 => '␡', - _ => char::from_u32_unchecked(self as u32 + '␀' as u32), - } + match self as u8 { + // Non printable characters + // SAFETY: From codepoint 0x2400 ('␀') to 0x241f (`␟`), there are characters representing + // the unprintable characters from 0x0 to 0x1f, ordered correctly. + // As `b` is guaranteed to be within 0x0 to 0x1f, the conversion represents a + // valid character. + b @ 0x0..=0x1f => unsafe { char::from_u32_unchecked(u32::from('␀') + u32::from(b)) }, + + // 0x7f (delete) has it's own character at codepoint 0x2420, not 0x247f, so it is special + // cased to return it's character + 0x7f => '␡', + + // All other characters are printable, and per function contract use `Self::as_char` + _ => self.as_char(), } } @@ -781,10 +790,19 @@ impl Error for ToAsciiCharError { /// Convert `char`, `u8` and other character types to `AsciiChar`. pub trait ToAsciiChar { - /// Convert to `AsciiChar` without checking that it is an ASCII character. - unsafe fn to_ascii_char_unchecked(self) -> AsciiChar; /// Convert to `AsciiChar`. fn to_ascii_char(self) -> Result; + + /// Convert to `AsciiChar` without checking that it is an ASCII character. + /// + /// # Safety + /// Calling this function with a value outside of the ascii range, `0x0` to `0x7f` inclusive, + /// is undefined behavior. + // TODO: Make sure this is the contract we want to express in this function. + // It is ambigous if numbers such as `0xffffff20_u32` are valid ascii characters, + // as this function returns `Ascii::Space` due to the cast to `u8`, even though + // `to_ascii_char` returns `Err()`. + unsafe fn to_ascii_char_unchecked(self) -> AsciiChar; } impl ToAsciiChar for AsciiChar { @@ -792,6 +810,7 @@ impl ToAsciiChar for AsciiChar { fn to_ascii_char(self) -> Result { Ok(self) } + #[inline] unsafe fn to_ascii_char_unchecked(self) -> AsciiChar { self @@ -805,44 +824,56 @@ impl ToAsciiChar for u8 { } #[inline] unsafe fn to_ascii_char_unchecked(self) -> AsciiChar { - mem::transmute(self) + // SAFETY: Caller guarantees `self` is within bounds of the enum + // variants, so this cast successfully produces a valid ascii + // variant + unsafe { mem::transmute::(self) } } } +// Note: Casts to `u8` here does not cause problems, as the negative +// range is mapped outside of ascii bounds. impl ToAsciiChar for i8 { #[inline] fn to_ascii_char(self) -> Result { - (self as u32).to_ascii_char() + u32::from(self as u8).to_ascii_char() } #[inline] unsafe fn to_ascii_char_unchecked(self) -> AsciiChar { - mem::transmute(self) + // SAFETY: Caller guarantees `self` is within bounds of the enum + // variants, so this cast successfully produces a valid ascii + // variant + unsafe { mem::transmute::(self as u8) } } } impl ToAsciiChar for char { #[inline] fn to_ascii_char(self) -> Result { - (self as u32).to_ascii_char() + u32::from(self).to_ascii_char() } #[inline] unsafe fn to_ascii_char_unchecked(self) -> AsciiChar { - (self as u32).to_ascii_char_unchecked() + // SAFETY: Caller guarantees we're within ascii range. + unsafe { u32::from(self).to_ascii_char_unchecked() } } } impl ToAsciiChar for u32 { fn to_ascii_char(self) -> Result { - unsafe { - match self { - 0..=127 => Ok(self.to_ascii_char_unchecked()), - _ => Err(ToAsciiCharError(())), - } + match self { + // SAFETY: We're within the valid ascii range in this branch. + 0x0..=0x7f => Ok(unsafe { self.to_ascii_char_unchecked() }), + _ => Err(ToAsciiCharError(())), } } + #[inline] unsafe fn to_ascii_char_unchecked(self) -> AsciiChar { - (self as u8).to_ascii_char_unchecked() + // Note: This cast discards the top bytes, this may cause problems, see + // the TODO on this method's documentation in the trait. + // SAFETY: Caller guarantees we're within ascii range. + unsafe { (self as u8).to_ascii_char_unchecked() } } } @@ -852,7 +883,10 @@ impl ToAsciiChar for u16 { } #[inline] unsafe fn to_ascii_char_unchecked(self) -> AsciiChar { - (self as u8).to_ascii_char_unchecked() + // Note: This cast discards the top bytes, this may cause problems, see + // the TODO on this method's documentation in the trait. + // SAFETY: Caller guarantees we're within ascii range. + unsafe { (self as u8).to_ascii_char_unchecked() } } } diff --git a/src/lib.rs b/src/lib.rs index 4c77509..3ca0772 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -29,7 +29,10 @@ //! API changed significantly since then. #![cfg_attr(not(feature = "std"), no_std)] -#![allow(clippy::trivially_copy_pass_by_ref)] // for compatibility with methods on char and u8 +// for compatibility with methods on char and u8 +#![allow(clippy::trivially_copy_pass_by_ref)] +// In preparation for feature `unsafe_block_in_unsafe_fn` (https://github.com/rust-lang/rust/issues/71668) +#![allow(unused_unsafe)] #[cfg(feature = "std")] extern crate core; From e888608c070bcf85706a65db4e29941a49bfd84e Mon Sep 17 00:00:00 2001 From: Filipe Rodrigues Date: Sun, 11 Oct 2020 20:29:10 +0100 Subject: [PATCH 2/4] Document all unsafe usages in `ascii_str` module. Redid implementation of `::slice_ascii_mut` to include less unsafe. --- src/ascii_str.rs | 108 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 79 insertions(+), 29 deletions(-) diff --git a/src/ascii_str.rs b/src/ascii_str.rs index 208e9a3..7fdebd1 100644 --- a/src/ascii_str.rs +++ b/src/ascii_str.rs @@ -27,12 +27,14 @@ impl AsciiStr { /// Converts `&self` to a `&str` slice. #[inline] pub fn as_str(&self) -> &str { + // SAFETY: All variants of `AsciiChar` are valid bytes for a `str`. unsafe { &*(self as *const AsciiStr as *const str) } } /// Converts `&self` into a byte slice. #[inline] pub fn as_bytes(&self) -> &[u8] { + // SAFETY: All variants of `AsciiChar` are valid `u8`, given they're `repr(u8)`. unsafe { &*(self as *const AsciiStr as *const [u8]) } } @@ -95,6 +97,10 @@ impl AsciiStr { /// Converts anything that can be represented as a byte slice to an `AsciiStr` without checking /// for non-ASCII characters.. /// + /// # Safety + /// If any of the bytes in `bytes` do not represent valid ascii characters, calling + /// this function is undefined behavior. + /// /// # Examples /// ``` /// # use ascii::AsciiStr; @@ -103,7 +109,9 @@ impl AsciiStr { /// ``` #[inline] pub unsafe fn from_ascii_unchecked(bytes: &[u8]) -> &AsciiStr { - bytes.as_ascii_str_unchecked() + // SAFETY: Caller guarantees all bytes in `bytes` are valid + // ascii characters. + unsafe { bytes.as_ascii_str_unchecked() } } /// Returns the number of characters / bytes in this ASCII sequence. @@ -782,6 +790,10 @@ pub trait AsAsciiStr { } /// Convert to an ASCII slice without checking for non-ASCII characters. /// + /// # Safety + /// Calling this function when `self` contains non-ascii characters is + /// undefined behavior. + /// /// # Examples /// unsafe fn as_ascii_str_unchecked(&self) -> &AsciiStr; @@ -793,11 +805,17 @@ pub trait AsMutAsciiStr: AsAsciiStr { fn slice_ascii_mut(&mut self, range: R) -> Result<&mut AsciiStr, AsAsciiStrError> where R: SliceIndex<[Self::Inner], Output = [Self::Inner]>; + /// Convert to a mutable ASCII slice. fn as_mut_ascii_str(&mut self) -> Result<&mut AsciiStr, AsAsciiStrError> { self.slice_ascii_mut(..) } + /// Convert to a mutable ASCII slice without checking for non-ASCII characters. + /// + /// # Safety + /// Calling this function when `self` contains non-ascii characters is + /// undefined behavior. unsafe fn as_mut_ascii_str_unchecked(&mut self) -> &mut AsciiStr; } @@ -813,8 +831,10 @@ where { ::slice_ascii(*self, range) } + unsafe fn as_ascii_str_unchecked(&self) -> &AsciiStr { - ::as_ascii_str_unchecked(*self) + // SAFETY: Caller guarantees `self` does not contain non-ascii characters + unsafe { ::as_ascii_str_unchecked(*self) } } } @@ -831,7 +851,8 @@ where } unsafe fn as_ascii_str_unchecked(&self) -> &AsciiStr { - ::as_ascii_str_unchecked(*self) + // SAFETY: Caller guarantees `self` does not contain non-ascii characters + unsafe { ::as_ascii_str_unchecked(*self) } } } @@ -847,26 +868,31 @@ where } unsafe fn as_mut_ascii_str_unchecked(&mut self) -> &mut AsciiStr { - ::as_mut_ascii_str_unchecked(*self) + // SAFETY: Caller guarantees `self` does not contain non-ascii characters + unsafe { ::as_mut_ascii_str_unchecked(*self) } } } impl AsAsciiStr for AsciiStr { type Inner = AsciiChar; + fn slice_ascii(&self, range: R) -> Result<&AsciiStr, AsAsciiStrError> where R: SliceIndex<[AsciiChar], Output = [AsciiChar]>, { self.slice.slice_ascii(range) } + #[inline] fn as_ascii_str(&self) -> Result<&AsciiStr, AsAsciiStrError> { Ok(self) } + #[inline] unsafe fn as_ascii_str_unchecked(&self) -> &AsciiStr { self } + #[inline] fn get_ascii(&self, index: usize) -> Option { self.slice.get_ascii(index) @@ -879,6 +905,7 @@ impl AsMutAsciiStr for AsciiStr { { self.slice.slice_ascii_mut(range) } + #[inline] unsafe fn as_mut_ascii_str_unchecked(&mut self) -> &mut AsciiStr { self @@ -896,14 +923,17 @@ impl AsAsciiStr for [AsciiChar] { None => Err(AsAsciiStrError(self.len())), } } + #[inline] fn as_ascii_str(&self) -> Result<&AsciiStr, AsAsciiStrError> { Ok(self.into()) } + #[inline] unsafe fn as_ascii_str_unchecked(&self) -> &AsciiStr { - self.into() + <&AsciiStr>::from(self) } + #[inline] fn get_ascii(&self, index: usize) -> Option { self.get(index).cloned() @@ -922,12 +952,13 @@ impl AsMutAsciiStr for [AsciiChar] { } #[inline] unsafe fn as_mut_ascii_str_unchecked(&mut self) -> &mut AsciiStr { - self.into() + <&mut AsciiStr>::from(self) } } impl AsAsciiStr for [u8] { type Inner = u8; + fn slice_ascii(&self, range: R) -> Result<&AsciiStr, AsAsciiStrError> where R: SliceIndex<[u8], Output = [u8]>, @@ -941,9 +972,11 @@ impl AsAsciiStr for [u8] { Err(AsAsciiStrError(self.len())) } } + fn as_ascii_str(&self) -> Result<&AsciiStr, AsAsciiStrError> { + // is_ascii is likely optimized if self.is_ascii() { - // is_ascii is likely optimized + // SAFETY: `is_ascii` guarantees all bytes are within ascii range. unsafe { Ok(self.as_ascii_str_unchecked()) } } else { Err(AsAsciiStrError( @@ -951,10 +984,11 @@ impl AsAsciiStr for [u8] { )) } } + #[inline] unsafe fn as_ascii_str_unchecked(&self) -> &AsciiStr { - let ptr = self as *const [u8] as *const AsciiStr; - &*ptr + // SAFETY: Caller guarantees `self` does not contain non-ascii characters + unsafe { &*(self as *const [u8] as *const AsciiStr) } } } impl AsMutAsciiStr for [u8] { @@ -975,9 +1009,11 @@ impl AsMutAsciiStr for [u8] { Err(AsAsciiStrError(len)) } } + fn as_mut_ascii_str(&mut self) -> Result<&mut AsciiStr, AsAsciiStrError> { + // is_ascii() is likely optimized if self.is_ascii() { - // is_ascii() is likely optimized + // SAFETY: `is_ascii` guarantees all bytes are within ascii range. unsafe { Ok(self.as_mut_ascii_str_unchecked()) } } else { Err(AsAsciiStrError( @@ -985,10 +1021,11 @@ impl AsMutAsciiStr for [u8] { )) } } + #[inline] unsafe fn as_mut_ascii_str_unchecked(&mut self) -> &mut AsciiStr { - let ptr = self as *mut [u8] as *mut AsciiStr; - &mut *ptr + // SAFETY: Caller guarantees `self` does not contain non-ascii characters + unsafe { &mut *(self as *mut [u8] as *mut AsciiStr) } } } @@ -1005,7 +1042,8 @@ impl AsAsciiStr for str { } #[inline] unsafe fn as_ascii_str_unchecked(&self) -> &AsciiStr { - self.as_bytes().as_ascii_str_unchecked() + // SAFETY: Caller guarantees `self` does not contain non-ascii characters + unsafe { self.as_bytes().as_ascii_str_unchecked() } } } impl AsMutAsciiStr for str { @@ -1013,32 +1051,43 @@ impl AsMutAsciiStr for str { where R: SliceIndex<[u8], Output = [u8]>, { - let (ptr, len) = if let Some(slice) = self.as_bytes().get(range) { - if !slice.is_ascii() { + // SAFETY: We don't modify the reference in this function, and the caller may + // only modify it to include valid ascii characters. + let bytes = unsafe { self.as_bytes_mut() }; + match bytes.get_mut(range) { + // Valid ascii slice + Some(slice) if slice.is_ascii() => { + // SAFETY: All bytes are ascii, so this cast is valid + let ptr = slice.as_mut_ptr() as *mut AsciiChar; + let len = slice.len(); + + // SAFETY: The pointer is valid for `len` elements, as it came + // from a slice. + unsafe { + let slice = core::slice::from_raw_parts_mut(ptr, len); + Ok(<&mut AsciiStr>::from(slice)) + } + } + Some(slice) => { + let not_ascii_len = slice.iter().copied().take_while(u8::is_ascii).count(); let offset = slice.as_ptr() as usize - self.as_ptr() as usize; - let not_ascii = slice.iter().take_while(|&b| b.is_ascii()).count(); - return Err(AsAsciiStrError(offset + not_ascii)); + + Err(AsAsciiStrError(offset + not_ascii_len)) } - (slice.as_ptr(), slice.len()) - } else { - return Err(AsAsciiStrError(self.len())); - }; - unsafe { - let ptr = ptr as *const AsciiChar as *mut AsciiChar; - let slice = core::slice::from_raw_parts_mut(ptr, len); - Ok(slice.into()) + None => Err(AsAsciiStrError(self.len())), } } fn as_mut_ascii_str(&mut self) -> Result<&mut AsciiStr, AsAsciiStrError> { - match self.bytes().position(|b| b > 127) { + match self.bytes().position(|b| !b.is_ascii()) { Some(index) => Err(AsAsciiStrError(index)), + // SAFETY: All bytes were iterated, and all were ascii None => unsafe { Ok(self.as_mut_ascii_str_unchecked()) }, } } #[inline] unsafe fn as_mut_ascii_str_unchecked(&mut self) -> &mut AsciiStr { - let ptr = self as *mut str as *mut AsciiStr; - &mut *ptr + // SAFETY: Caller guarantees `self` does not contain non-ascii characters + &mut *(self as *mut str as *mut AsciiStr) } } @@ -1058,7 +1107,8 @@ impl AsAsciiStr for CStr { } #[inline] unsafe fn as_ascii_str_unchecked(&self) -> &AsciiStr { - self.to_bytes().as_ascii_str_unchecked() + // SAFETY: Caller guarantees `self` does not contain non-ascii characters + unsafe { self.to_bytes().as_ascii_str_unchecked() } } } From d5f206d2d6726249342e69eedadb7852cb5b0d8a Mon Sep 17 00:00:00 2001 From: Filipe Rodrigues Date: Sun, 11 Oct 2020 20:29:10 +0100 Subject: [PATCH 3/4] Document all unsafe usages in `ascii_string` module. --- src/ascii_string.rs | 102 +++++++++++++++++++++++++++----------------- 1 file changed, 62 insertions(+), 40 deletions(-) diff --git a/src/ascii_string.rs b/src/ascii_string.rs index 5f0b644..b7ab9fe 100644 --- a/src/ascii_string.rs +++ b/src/ascii_string.rs @@ -52,10 +52,12 @@ impl AsciiString { /// /// This is highly unsafe, due to the number of invariants that aren't checked: /// - /// * The memory at `ptr` need to have been previously allocated by the same allocator this + /// * The memory at `buf` need to have been previously allocated by the same allocator this /// library uses. /// * `length` needs to be less than or equal to `capacity`. /// * `capacity` needs to be the correct value. + /// * `buf` must have `length` valid ascii elements and contain a total of `capacity` total, + /// possibly, uninitialized, elements. /// /// Violating these may cause problems like corrupting the allocator's internal datastructures. /// @@ -83,7 +85,10 @@ impl AsciiString { #[inline] pub unsafe fn from_raw_parts(buf: *mut AsciiChar, length: usize, capacity: usize) -> Self { AsciiString { - vec: Vec::from_raw_parts(buf, length, capacity), + // SAFETY: Caller guarantees `buf` was previously allocated by this library, + // that `buf` contains `length` valid ascii elements and has a total + // capacity of `capacity` elements. + vec: unsafe { Vec::from_raw_parts(buf, length, capacity) }, } } @@ -100,13 +105,19 @@ impl AsciiString { B: Into>, { let mut bytes = bytes.into(); - let vec = Vec::from_raw_parts( - bytes.as_mut_ptr() as *mut AsciiChar, - bytes.len(), - bytes.capacity(), - ); + // SAFETY: The caller guarantees all bytes are valid ascii bytes. + let ptr = bytes.as_mut_ptr() as *mut AsciiChar; + let length = bytes.len(); + let capacity = bytes.capacity(); mem::forget(bytes); - AsciiString { vec } + + // SAFETY: We guarantee all invariants, as we got the + // pointer, length and capacity from a `Vec`, + // and we also guarantee the pointer is valid per + // the `SAFETY` notice above. + let vec = Vec::from_raw_parts(ptr, length, capacity); + + Self { vec } } /// Converts anything that can represent a byte buffer into an `AsciiString`. @@ -126,14 +137,13 @@ impl AsciiString { where B: Into> + AsRef<[u8]>, { - unsafe { - match bytes.as_ref().as_ascii_str() { - Ok(_) => Ok(AsciiString::from_ascii_unchecked(bytes)), - Err(e) => Err(FromAsciiError { - error: e, - owner: bytes, - }), - } + match bytes.as_ref().as_ascii_str() { + // SAFETY: `as_ascii_str` guarantees all bytes are valid ascii bytes. + Ok(_) => Ok(unsafe { AsciiString::from_ascii_unchecked(bytes) }), + Err(e) => Err(FromAsciiError { + error: e, + owner: bytes, + }), } } @@ -428,19 +438,18 @@ impl From> for AsciiString { } impl Into> for AsciiString { - fn into(self) -> Vec { - unsafe { - let v = Vec::from_raw_parts( - self.vec.as_ptr() as *mut u8, - self.vec.len(), - self.vec.capacity(), - ); - - // We forget `self` to avoid freeing it at the end of the scope. - // Otherwise, the returned `Vec` would point to freed memory. - mem::forget(self); - v - } + fn into(mut self) -> Vec { + // SAFETY: All ascii bytes are valid `u8`, as we are `repr(u8)`. + // Note: We forget `self` to avoid `self.vec` from being deallocated. + let ptr = self.vec.as_mut_ptr() as *mut u8; + let length = self.vec.len(); + let capacity = self.vec.capacity(); + mem::forget(self); + + // SAFETY: We guarantee all invariants due to getting `ptr`, `length` + // and `capacity` from a `Vec`. We also guarantee `ptr` is valid + // due to the `SAFETY` block above. + unsafe { Vec::from_raw_parts(ptr, length, capacity) } } } @@ -461,6 +470,7 @@ impl<'a> From<&'a [AsciiChar]> for AsciiString { impl Into for AsciiString { #[inline] fn into(self) -> String { + // SAFETY: All ascii bytes are `utf8`. unsafe { String::from_utf8_unchecked(self.into()) } } } @@ -684,7 +694,12 @@ impl Error for FromAsciiError { /// Convert vectors into `AsciiString`. pub trait IntoAsciiString: Sized { /// Convert to `AsciiString` without checking for non-ASCII characters. + /// + /// # Safety + /// If `self` contains non-ascii characters, calling this function is + /// undefined behavior. unsafe fn into_ascii_string_unchecked(self) -> AsciiString; + /// Convert to `AsciiString`. fn into_ascii_string(self) -> Result>; } @@ -727,7 +742,8 @@ macro_rules! impl_into_ascii_string { impl<'a> IntoAsciiString for $wider { #[inline] unsafe fn into_ascii_string_unchecked(self) -> AsciiString { - AsciiString::from_ascii_unchecked(self) + // SAFETY: Caller guarantees `self` only has valid ascii bytes + unsafe { AsciiString::from_ascii_unchecked(self) } } #[inline] @@ -741,7 +757,8 @@ macro_rules! impl_into_ascii_string { impl IntoAsciiString for $wider { #[inline] unsafe fn into_ascii_string_unchecked(self) -> AsciiString { - AsciiString::from_ascii_unchecked(self) + // SAFETY: Caller guarantees `self` only has valid ascii bytes + unsafe { AsciiString::from_ascii_unchecked(self) } } #[inline] @@ -758,22 +775,22 @@ impl_into_ascii_string! {'a, &'a [u8]} impl_into_ascii_string! {String} impl_into_ascii_string! {'a, &'a str} -/// Note that the trailing null byte will be removed in the conversion. +/// # Notes +/// The trailing null byte `CString` has will be removed during this conversion impl IntoAsciiString for CString { #[inline] unsafe fn into_ascii_string_unchecked(self) -> AsciiString { - AsciiString::from_ascii_unchecked(self.into_bytes()) + // SAFETY: Caller guarantees `self` only has valid ascii bytes + unsafe { AsciiString::from_ascii_unchecked(self.into_bytes()) } } fn into_ascii_string(self) -> Result> { AsciiString::from_ascii(self.into_bytes_with_nul()) .map_err(|FromAsciiError { error, owner }| { FromAsciiError { - owner: unsafe { - // The null byte is preserved from the original - // `CString`, so this is safe. - CString::from_vec_unchecked(owner) - }, + // SAFETY: We don't discard the NULL byte from the original + // string, so we ensure that it's null terminated + owner: unsafe { CString::from_vec_unchecked(owner) }, error, } }) @@ -789,12 +806,15 @@ impl IntoAsciiString for CString { impl<'a> IntoAsciiString for &'a CStr { #[inline] unsafe fn into_ascii_string_unchecked(self) -> AsciiString { - AsciiString::from_ascii_unchecked(self.to_bytes()) + // SAFETY: Caller guarantees `self` only has valid ascii bytes + unsafe { AsciiString::from_ascii_unchecked(self.to_bytes()) } } fn into_ascii_string(self) -> Result> { AsciiString::from_ascii(self.to_bytes_with_nul()) .map_err(|FromAsciiError { error, owner }| FromAsciiError { + // SAFETY: We don't discard the NULL byte from the original + // string, so we ensure that it's null terminated owner: unsafe { CStr::from_ptr(owner.as_ptr() as *const _) }, error, }) @@ -814,7 +834,8 @@ where { #[inline] unsafe fn into_ascii_string_unchecked(self) -> AsciiString { - IntoAsciiString::into_ascii_string_unchecked(self.into_owned()) + // SAFETY: Caller guarantees `self` only has valid ascii bytes + unsafe { IntoAsciiString::into_ascii_string_unchecked(self.into_owned()) } } fn into_ascii_string(self) -> Result> { @@ -875,6 +896,7 @@ mod tests { assert_eq!(ascii_str.len(), 3); assert_eq!(ascii_str.as_slice(), expected_chars); + // SAFETY: "baz" only contains valid ascii characters. let ascii_str_unchecked = unsafe { cstring.into_ascii_string_unchecked() }; assert_eq!(ascii_str_unchecked.len(), 3); assert_eq!(ascii_str_unchecked.as_slice(), expected_chars); From 9848243bb84f79df51d9109544c1c93f47c7ff9b Mon Sep 17 00:00:00 2001 From: Filipe Rodrigues Date: Sun, 11 Oct 2020 20:29:11 +0100 Subject: [PATCH 4/4] Documented all unsafe usages in `free_functions` module. --- src/free_functions.rs | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/src/free_functions.rs b/src/free_functions.rs index 4dbff7a..42f355f 100644 --- a/src/free_functions.rs +++ b/src/free_functions.rs @@ -17,12 +17,11 @@ use ascii_char::{AsciiChar, ToAsciiChar}; pub fn caret_encode>(c: C) -> Option { // The formula is explained in the Wikipedia article. let c = c.into() ^ 0b0100_0000; - unsafe { - if c >= b'?' && c <= b'_' { - Some(c.to_ascii_char_unchecked()) - } else { - None - } + if c >= b'?' && c <= b'_' { + // SAFETY: All bytes between '?' (0x3F) and '_' (0x5f) are valid ascii characters. + Some(unsafe { c.to_ascii_char_unchecked() }) + } else { + None } } @@ -51,10 +50,10 @@ pub fn caret_encode>(c: C) -> Option { /// ``` pub fn caret_decode>(c: C) -> Option { // The formula is explained in the Wikipedia article. - unsafe { - match c.into() { - b'?'..=b'_' => Some(AsciiChar::from_ascii_unchecked(c.into() ^ 0b0100_0000)), - _ => None, - } + match c.into() { + // SAFETY: All bytes between '?' (0x3F) and '_' (0x5f) after `xoring` with `0b0100_0000` are + // valid bytes, as they represent characters between '␀' (0x0) and '␠' (0x1f) + '␡' (0x7f) + b'?'..=b'_' => Some(unsafe { AsciiChar::from_ascii_unchecked(c.into() ^ 0b0100_0000) }), + _ => None, } }