Skip to content

Commit d5f206d

Browse files
committed
Document all unsafe usages in ascii_string module.
1 parent e888608 commit d5f206d

File tree

1 file changed

+62
-40
lines changed

1 file changed

+62
-40
lines changed

src/ascii_string.rs

Lines changed: 62 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,12 @@ impl AsciiString {
5252
///
5353
/// This is highly unsafe, due to the number of invariants that aren't checked:
5454
///
55-
/// * The memory at `ptr` need to have been previously allocated by the same allocator this
55+
/// * The memory at `buf` need to have been previously allocated by the same allocator this
5656
/// library uses.
5757
/// * `length` needs to be less than or equal to `capacity`.
5858
/// * `capacity` needs to be the correct value.
59+
/// * `buf` must have `length` valid ascii elements and contain a total of `capacity` total,
60+
/// possibly, uninitialized, elements.
5961
///
6062
/// Violating these may cause problems like corrupting the allocator's internal datastructures.
6163
///
@@ -83,7 +85,10 @@ impl AsciiString {
8385
#[inline]
8486
pub unsafe fn from_raw_parts(buf: *mut AsciiChar, length: usize, capacity: usize) -> Self {
8587
AsciiString {
86-
vec: Vec::from_raw_parts(buf, length, capacity),
88+
// SAFETY: Caller guarantees `buf` was previously allocated by this library,
89+
// that `buf` contains `length` valid ascii elements and has a total
90+
// capacity of `capacity` elements.
91+
vec: unsafe { Vec::from_raw_parts(buf, length, capacity) },
8792
}
8893
}
8994

@@ -100,13 +105,19 @@ impl AsciiString {
100105
B: Into<Vec<u8>>,
101106
{
102107
let mut bytes = bytes.into();
103-
let vec = Vec::from_raw_parts(
104-
bytes.as_mut_ptr() as *mut AsciiChar,
105-
bytes.len(),
106-
bytes.capacity(),
107-
);
108+
// SAFETY: The caller guarantees all bytes are valid ascii bytes.
109+
let ptr = bytes.as_mut_ptr() as *mut AsciiChar;
110+
let length = bytes.len();
111+
let capacity = bytes.capacity();
108112
mem::forget(bytes);
109-
AsciiString { vec }
113+
114+
// SAFETY: We guarantee all invariants, as we got the
115+
// pointer, length and capacity from a `Vec`,
116+
// and we also guarantee the pointer is valid per
117+
// the `SAFETY` notice above.
118+
let vec = Vec::from_raw_parts(ptr, length, capacity);
119+
120+
Self { vec }
110121
}
111122

112123
/// Converts anything that can represent a byte buffer into an `AsciiString`.
@@ -126,14 +137,13 @@ impl AsciiString {
126137
where
127138
B: Into<Vec<u8>> + AsRef<[u8]>,
128139
{
129-
unsafe {
130-
match bytes.as_ref().as_ascii_str() {
131-
Ok(_) => Ok(AsciiString::from_ascii_unchecked(bytes)),
132-
Err(e) => Err(FromAsciiError {
133-
error: e,
134-
owner: bytes,
135-
}),
136-
}
140+
match bytes.as_ref().as_ascii_str() {
141+
// SAFETY: `as_ascii_str` guarantees all bytes are valid ascii bytes.
142+
Ok(_) => Ok(unsafe { AsciiString::from_ascii_unchecked(bytes) }),
143+
Err(e) => Err(FromAsciiError {
144+
error: e,
145+
owner: bytes,
146+
}),
137147
}
138148
}
139149

@@ -428,19 +438,18 @@ impl From<Vec<AsciiChar>> for AsciiString {
428438
}
429439

430440
impl Into<Vec<u8>> for AsciiString {
431-
fn into(self) -> Vec<u8> {
432-
unsafe {
433-
let v = Vec::from_raw_parts(
434-
self.vec.as_ptr() as *mut u8,
435-
self.vec.len(),
436-
self.vec.capacity(),
437-
);
438-
439-
// We forget `self` to avoid freeing it at the end of the scope.
440-
// Otherwise, the returned `Vec` would point to freed memory.
441-
mem::forget(self);
442-
v
443-
}
441+
fn into(mut self) -> Vec<u8> {
442+
// SAFETY: All ascii bytes are valid `u8`, as we are `repr(u8)`.
443+
// Note: We forget `self` to avoid `self.vec` from being deallocated.
444+
let ptr = self.vec.as_mut_ptr() as *mut u8;
445+
let length = self.vec.len();
446+
let capacity = self.vec.capacity();
447+
mem::forget(self);
448+
449+
// SAFETY: We guarantee all invariants due to getting `ptr`, `length`
450+
// and `capacity` from a `Vec`. We also guarantee `ptr` is valid
451+
// due to the `SAFETY` block above.
452+
unsafe { Vec::from_raw_parts(ptr, length, capacity) }
444453
}
445454
}
446455

@@ -461,6 +470,7 @@ impl<'a> From<&'a [AsciiChar]> for AsciiString {
461470
impl Into<String> for AsciiString {
462471
#[inline]
463472
fn into(self) -> String {
473+
// SAFETY: All ascii bytes are `utf8`.
464474
unsafe { String::from_utf8_unchecked(self.into()) }
465475
}
466476
}
@@ -684,7 +694,12 @@ impl<O: Any> Error for FromAsciiError<O> {
684694
/// Convert vectors into `AsciiString`.
685695
pub trait IntoAsciiString: Sized {
686696
/// Convert to `AsciiString` without checking for non-ASCII characters.
697+
///
698+
/// # Safety
699+
/// If `self` contains non-ascii characters, calling this function is
700+
/// undefined behavior.
687701
unsafe fn into_ascii_string_unchecked(self) -> AsciiString;
702+
688703
/// Convert to `AsciiString`.
689704
fn into_ascii_string(self) -> Result<AsciiString, FromAsciiError<Self>>;
690705
}
@@ -727,7 +742,8 @@ macro_rules! impl_into_ascii_string {
727742
impl<'a> IntoAsciiString for $wider {
728743
#[inline]
729744
unsafe fn into_ascii_string_unchecked(self) -> AsciiString {
730-
AsciiString::from_ascii_unchecked(self)
745+
// SAFETY: Caller guarantees `self` only has valid ascii bytes
746+
unsafe { AsciiString::from_ascii_unchecked(self) }
731747
}
732748

733749
#[inline]
@@ -741,7 +757,8 @@ macro_rules! impl_into_ascii_string {
741757
impl IntoAsciiString for $wider {
742758
#[inline]
743759
unsafe fn into_ascii_string_unchecked(self) -> AsciiString {
744-
AsciiString::from_ascii_unchecked(self)
760+
// SAFETY: Caller guarantees `self` only has valid ascii bytes
761+
unsafe { AsciiString::from_ascii_unchecked(self) }
745762
}
746763

747764
#[inline]
@@ -758,22 +775,22 @@ impl_into_ascii_string! {'a, &'a [u8]}
758775
impl_into_ascii_string! {String}
759776
impl_into_ascii_string! {'a, &'a str}
760777

761-
/// Note that the trailing null byte will be removed in the conversion.
778+
/// # Notes
779+
/// The trailing null byte `CString` has will be removed during this conversion
762780
impl IntoAsciiString for CString {
763781
#[inline]
764782
unsafe fn into_ascii_string_unchecked(self) -> AsciiString {
765-
AsciiString::from_ascii_unchecked(self.into_bytes())
783+
// SAFETY: Caller guarantees `self` only has valid ascii bytes
784+
unsafe { AsciiString::from_ascii_unchecked(self.into_bytes()) }
766785
}
767786

768787
fn into_ascii_string(self) -> Result<AsciiString, FromAsciiError<Self>> {
769788
AsciiString::from_ascii(self.into_bytes_with_nul())
770789
.map_err(|FromAsciiError { error, owner }| {
771790
FromAsciiError {
772-
owner: unsafe {
773-
// The null byte is preserved from the original
774-
// `CString`, so this is safe.
775-
CString::from_vec_unchecked(owner)
776-
},
791+
// SAFETY: We don't discard the NULL byte from the original
792+
// string, so we ensure that it's null terminated
793+
owner: unsafe { CString::from_vec_unchecked(owner) },
777794
error,
778795
}
779796
})
@@ -789,12 +806,15 @@ impl IntoAsciiString for CString {
789806
impl<'a> IntoAsciiString for &'a CStr {
790807
#[inline]
791808
unsafe fn into_ascii_string_unchecked(self) -> AsciiString {
792-
AsciiString::from_ascii_unchecked(self.to_bytes())
809+
// SAFETY: Caller guarantees `self` only has valid ascii bytes
810+
unsafe { AsciiString::from_ascii_unchecked(self.to_bytes()) }
793811
}
794812

795813
fn into_ascii_string(self) -> Result<AsciiString, FromAsciiError<Self>> {
796814
AsciiString::from_ascii(self.to_bytes_with_nul())
797815
.map_err(|FromAsciiError { error, owner }| FromAsciiError {
816+
// SAFETY: We don't discard the NULL byte from the original
817+
// string, so we ensure that it's null terminated
798818
owner: unsafe { CStr::from_ptr(owner.as_ptr() as *const _) },
799819
error,
800820
})
@@ -814,7 +834,8 @@ where
814834
{
815835
#[inline]
816836
unsafe fn into_ascii_string_unchecked(self) -> AsciiString {
817-
IntoAsciiString::into_ascii_string_unchecked(self.into_owned())
837+
// SAFETY: Caller guarantees `self` only has valid ascii bytes
838+
unsafe { IntoAsciiString::into_ascii_string_unchecked(self.into_owned()) }
818839
}
819840

820841
fn into_ascii_string(self) -> Result<AsciiString, FromAsciiError<Self>> {
@@ -875,6 +896,7 @@ mod tests {
875896
assert_eq!(ascii_str.len(), 3);
876897
assert_eq!(ascii_str.as_slice(), expected_chars);
877898

899+
// SAFETY: "baz" only contains valid ascii characters.
878900
let ascii_str_unchecked = unsafe { cstring.into_ascii_string_unchecked() };
879901
assert_eq!(ascii_str_unchecked.len(), 3);
880902
assert_eq!(ascii_str_unchecked.as_slice(), expected_chars);

0 commit comments

Comments
 (0)