diff --git a/src/ascii_char.rs b/src/ascii_char.rs index c595e3a..49c1b21 100644 --- a/src/ascii_char.rs +++ b/src/ascii_char.rs @@ -280,7 +280,7 @@ pub enum AsciiChar { impl AsciiChar { /// Constructs an ASCII character from a `u8`, `char` or other character type. /// - /// # Failure + /// # Errors /// Returns `Err(())` if the character can't be ASCII encoded. /// /// # Example @@ -328,8 +328,13 @@ impl AsciiChar { /// /// The panic message might not be the most descriptive due to the /// current limitations of `const fn`. + #[must_use] pub const fn new(ch: char) -> AsciiChar { + // It's restricted to this function, and without it + // we'd need to specify `AsciiChar::` or `Self::` 128 times. + #[allow(clippy::enum_glob_use)] use AsciiChar::*; + #[rustfmt::skip] const ALL: [AsciiChar; 128] = [ Null, SOH, SOX, ETX, EOT, ENQ, ACK, Bell, @@ -348,7 +353,10 @@ impl AsciiChar { h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z, CurlyBraceOpen, VerticalBar, CurlyBraceClose, Tilde, DEL, - ]; + ]; + + // We want to slice here and detect `const_err` from rustc if the slice is invalid + #[allow(clippy::indexing_slicing)] ALL[ch as usize] } @@ -366,18 +374,21 @@ impl AsciiChar { /// might not panic, creating a buffer overflow, /// and `Some(AsciiChar::from_ascii_unchecked(128))` might be `None`. #[inline] + #[must_use] pub unsafe fn from_ascii_unchecked(ch: u8) -> Self { ch.to_ascii_char_unchecked() } /// Converts an ASCII character into a `u8`. #[inline] + #[must_use] pub const fn as_byte(self) -> u8 { self as u8 } /// Converts an ASCII character into a `char`. #[inline] + #[must_use] pub const fn as_char(self) -> char { self as u8 as char } @@ -390,12 +401,14 @@ impl AsciiChar { // make the compiler optimize away the indirection. /// Turns uppercase into lowercase, but also modifies '@' and '<'..='_' + #[must_use] const fn to_not_upper(self) -> u8 { self as u8 | 0b010_0000 } /// Check if the character is a letter (a-z, A-Z) #[inline] + #[must_use] pub const fn is_alphabetic(self) -> bool { (self.to_not_upper() >= b'a') & (self.to_not_upper() <= b'z') } @@ -404,6 +417,7 @@ impl AsciiChar { /// /// This method is identical to [`is_alphabetic()`](#method.is_alphabetic) #[inline] + #[must_use] pub const fn is_ascii_alphabetic(&self) -> bool { self.is_alphabetic() } @@ -417,6 +431,7 @@ impl AsciiChar { /// # Panics /// /// Radixes greater than 36 are not supported and will result in a panic. + #[must_use] pub fn is_digit(self, radix: u32) -> bool { match (self as u8, radix) { (b'0'..=b'9', 0..=36) => u32::from(self as u8 - b'0') < radix, @@ -439,12 +454,14 @@ impl AsciiChar { /// assert_eq!(AsciiChar::new('/').is_ascii_digit(), false); /// ``` #[inline] + #[must_use] pub const fn is_ascii_digit(&self) -> bool { (*self as u8 >= b'0') & (*self as u8 <= b'9') } /// Check if the character is a letter or number #[inline] + #[must_use] pub const fn is_alphanumeric(self) -> bool { self.is_alphabetic() | self.is_ascii_digit() } @@ -453,6 +470,7 @@ impl AsciiChar { /// /// This method is identical to [`is_alphanumeric()`](#method.is_alphanumeric) #[inline] + #[must_use] pub const fn is_ascii_alphanumeric(&self) -> bool { self.is_alphanumeric() } @@ -470,6 +488,7 @@ impl AsciiChar { /// assert!(!AsciiChar::FF.is_ascii_blank()); /// ``` #[inline] + #[must_use] pub const fn is_ascii_blank(&self) -> bool { (*self as u8 == b' ') | (*self as u8 == b'\t') } @@ -477,6 +496,7 @@ impl AsciiChar { /// Check if the character one of ' ', '\t', '\n', '\r', /// '\0xb' (vertical tab) or '\0xc' (form feed). #[inline] + #[must_use] pub const fn is_whitespace(self) -> bool { let b = self as u8; self.is_ascii_blank() | (b == b'\n') | (b == b'\r') | (b == 0x0b) | (b == 0x0c) @@ -486,6 +506,7 @@ impl AsciiChar { /// /// This method is NOT identical to `is_whitespace()`. #[inline] + #[must_use] pub const fn is_ascii_whitespace(&self) -> bool { self.is_ascii_blank() | (*self as u8 == b'\n') @@ -506,6 +527,7 @@ impl AsciiChar { /// assert_eq!(AsciiChar::EOT.is_ascii_control(), true); /// ``` #[inline] + #[must_use] pub const fn is_ascii_control(&self) -> bool { ((*self as u8) < b' ') | (*self as u8 == 127) } @@ -520,6 +542,7 @@ impl AsciiChar { /// assert_eq!(AsciiChar::new('\n').is_ascii_graphic(), false); /// ``` #[inline] + #[must_use] pub const fn is_ascii_graphic(&self) -> bool { self.as_byte().wrapping_sub(b' ' + 1) < 0x5E } @@ -534,6 +557,7 @@ impl AsciiChar { /// assert_eq!(AsciiChar::new('\n').is_ascii_printable(), false); /// ``` #[inline] + #[must_use] pub const fn is_ascii_printable(&self) -> bool { self.as_byte().wrapping_sub(b' ') < 0x5F } @@ -548,6 +572,7 @@ impl AsciiChar { /// assert_eq!(AsciiChar::new('@').is_lowercase(), false); /// ``` #[inline] + #[must_use] pub const fn is_lowercase(self) -> bool { self.as_byte().wrapping_sub(b'a') < 26 } @@ -556,6 +581,7 @@ impl AsciiChar { /// /// This method is identical to [`is_lowercase()`](#method.is_lowercase) #[inline] + #[must_use] pub const fn is_ascii_lowercase(&self) -> bool { self.is_lowercase() } @@ -570,6 +596,7 @@ impl AsciiChar { /// assert_eq!(AsciiChar::new('@').is_uppercase(), false); /// ``` #[inline] + #[must_use] pub const fn is_uppercase(self) -> bool { self.as_byte().wrapping_sub(b'A') < 26 } @@ -578,6 +605,7 @@ impl AsciiChar { /// /// This method is identical to [`is_uppercase()`](#method.is_uppercase) #[inline] + #[must_use] pub const fn is_ascii_uppercase(&self) -> bool { self.is_uppercase() } @@ -593,6 +621,7 @@ impl AsciiChar { /// assert_eq!(AsciiChar::new('~').is_ascii_punctuation(), true); /// ``` #[inline] + #[must_use] pub const fn is_ascii_punctuation(&self) -> bool { self.is_ascii_graphic() & !self.is_alphanumeric() } @@ -609,8 +638,9 @@ impl AsciiChar { /// assert_eq!(AsciiChar::new(' ').is_ascii_hexdigit(), false); /// ``` #[inline] + #[must_use] pub const fn is_ascii_hexdigit(&self) -> bool { - self.is_ascii_digit() | ((*self as u8 | 0x20u8).wrapping_sub(b'a') < 6) + self.is_ascii_digit() | ((*self as u8 | 0x20_u8).wrapping_sub(b'a') < 6) } /// Unicode has printable versions of the ASCII control codes, like '␛'. @@ -627,6 +657,7 @@ impl AsciiChar { /// assert_eq!(AsciiChar::new(' ').as_printable_char(), ' '); /// assert_eq!(AsciiChar::new('p').as_printable_char(), 'p'); /// ``` + #[must_use] pub fn as_printable_char(self) -> char { unsafe { match self as u8 { @@ -659,6 +690,8 @@ impl AsciiChar { /// assert_eq!(AsciiChar::new('[').to_ascii_uppercase().as_char(), '['); /// ``` #[inline] + #[must_use] + #[allow(clippy::indexing_slicing)] // We're sure it'll either access one or the other, as `bool` is either `0` or `1` pub const fn to_ascii_uppercase(&self) -> Self { [*self, AsciiChar::new((*self as u8 & 0b101_1111) as char)][self.is_lowercase() as usize] } @@ -675,12 +708,15 @@ impl AsciiChar { /// assert_eq!(AsciiChar::new('\x7f').to_ascii_lowercase().as_char(), '\x7f'); /// ``` #[inline] + #[must_use] + #[allow(clippy::indexing_slicing)] // We're sure it'll either access one or the other, as `bool` is either `0` or `1` pub const fn to_ascii_lowercase(&self) -> Self { [*self, AsciiChar::new(self.to_not_upper() as char)][self.is_uppercase() as usize] } /// Compares two characters case-insensitively. #[inline] + #[must_use] pub const fn eq_ignore_ascii_case(&self, other: &Self) -> bool { (self.as_byte() == other.as_byte()) | (self.is_alphabetic() & (self.to_not_upper() == other.to_not_upper())) @@ -754,6 +790,7 @@ const ERRORMSG_CHAR: &str = "not an ASCII character"; impl ToAsciiCharError { /// Returns a description for this error, like `std::error::Error::description`. #[inline] + #[must_use] pub const fn description(&self) -> &'static str { ERRORMSG_CHAR } @@ -781,10 +818,14 @@ impl Error for ToAsciiCharError { /// Convert `char`, `u8` and other character types to `AsciiChar`. pub trait ToAsciiChar { - /// Convert to `AsciiChar` without checking that it is an ASCII character. - unsafe fn to_ascii_char_unchecked(self) -> AsciiChar; /// Convert to `AsciiChar`. + /// + /// # Errors + /// If `self` is outside the valid ascii range, this returns `Err` fn to_ascii_char(self) -> Result; + + /// Convert to `AsciiChar` without checking that it is an ASCII character. + unsafe fn to_ascii_char_unchecked(self) -> AsciiChar; } impl ToAsciiChar for AsciiChar { @@ -809,6 +850,10 @@ impl ToAsciiChar for u8 { } } +// Note: Casts to `u8` here does not cause problems, as the negative +// range is mapped outside of ascii bounds and we don't mind losing +// the sign, as long as negative numbers are mapped outside ascii range. +#[allow(clippy::cast_sign_loss)] impl ToAsciiChar for i8 { #[inline] fn to_ascii_char(self) -> Result { @@ -842,7 +887,13 @@ impl ToAsciiChar for u32 { } #[inline] unsafe fn to_ascii_char_unchecked(self) -> AsciiChar { - (self as u8).to_ascii_char_unchecked() + // Note: This cast discards the top bytes, this may cause problems, see + // the TODO on this method's documentation in the trait. + // SAFETY: Caller guarantees we're within ascii range. + #[allow(clippy::cast_possible_truncation)] // We want to truncate it + unsafe { + (self as u8).to_ascii_char_unchecked() + } } } @@ -852,43 +903,48 @@ impl ToAsciiChar for u16 { } #[inline] unsafe fn to_ascii_char_unchecked(self) -> AsciiChar { - (self as u8).to_ascii_char_unchecked() + // Note: This cast discards the top bytes, this may cause problems, see + // the TODO on this method's documentation in the trait. + // SAFETY: Caller guarantees we're within ascii range. + #[allow(clippy::cast_possible_truncation)] // We want to truncate it + unsafe { + (self as u8).to_ascii_char_unchecked() + } } } #[cfg(test)] mod tests { use super::{AsciiChar, ToAsciiChar, ToAsciiCharError}; - use AsciiChar::*; #[test] fn to_ascii_char() { fn generic(ch: C) -> Result { ch.to_ascii_char() } - assert_eq!(generic(A), Ok(A)); - assert_eq!(generic(b'A'), Ok(A)); - assert_eq!(generic('A'), Ok(A)); - assert!(generic(200u16).is_err()); + assert_eq!(generic(AsciiChar::A), Ok(AsciiChar::A)); + assert_eq!(generic(b'A'), Ok(AsciiChar::A)); + assert_eq!(generic('A'), Ok(AsciiChar::A)); + assert!(generic(200_u16).is_err()); assert!(generic('λ').is_err()); } #[test] fn as_byte_and_char() { - assert_eq!(A.as_byte(), b'A'); - assert_eq!(A.as_char(), 'A'); + assert_eq!(AsciiChar::A.as_byte(), b'A'); + assert_eq!(AsciiChar::A.as_char(), 'A'); } #[test] fn new_array_is_correct() { - for byte in 0..128u8 { + for byte in 0..128_u8 { assert_eq!(AsciiChar::new(byte as char).as_byte(), byte); } } #[test] fn is_all() { - for byte in 0..128u8 { + for byte in 0..128_u8 { let ch = byte as char; let ascii = AsciiChar::new(ch); assert_eq!(ascii.is_alphabetic(), ch.is_alphabetic()); @@ -938,49 +994,49 @@ mod tests { #[test] #[should_panic] fn is_digit_bad_radix() { - AsciiChar::_7.is_digit(37); + let _ = AsciiChar::_7.is_digit(37); } #[test] fn cmp_wider() { - assert_eq!(A, 'A'); - assert_eq!(b'b', b); - assert!(a < 'z'); + assert_eq!(AsciiChar::A, 'A'); + assert_eq!(b'b', AsciiChar::b); + assert!(AsciiChar::a < 'z'); } #[test] fn ascii_case() { - assert_eq!(At.to_ascii_lowercase(), At); - assert_eq!(At.to_ascii_uppercase(), At); - assert_eq!(A.to_ascii_lowercase(), a); - assert_eq!(A.to_ascii_uppercase(), A); - assert_eq!(a.to_ascii_lowercase(), a); - assert_eq!(a.to_ascii_uppercase(), A); - - let mut mutable = (A, a); + assert_eq!(AsciiChar::At.to_ascii_lowercase(), AsciiChar::At); + assert_eq!(AsciiChar::At.to_ascii_uppercase(), AsciiChar::At); + assert_eq!(AsciiChar::A.to_ascii_lowercase(), AsciiChar::a); + assert_eq!(AsciiChar::A.to_ascii_uppercase(), AsciiChar::A); + assert_eq!(AsciiChar::a.to_ascii_lowercase(), AsciiChar::a); + assert_eq!(AsciiChar::a.to_ascii_uppercase(), AsciiChar::A); + + let mut mutable = (AsciiChar::A, AsciiChar::a); mutable.0.make_ascii_lowercase(); mutable.1.make_ascii_uppercase(); - assert_eq!(mutable.0, a); - assert_eq!(mutable.1, A); + assert_eq!(mutable.0, AsciiChar::a); + assert_eq!(mutable.1, AsciiChar::A); - assert!(LineFeed.eq_ignore_ascii_case(&LineFeed)); - assert!(!LineFeed.eq_ignore_ascii_case(&CarriageReturn)); - assert!(z.eq_ignore_ascii_case(&Z)); - assert!(Z.eq_ignore_ascii_case(&z)); - assert!(A.eq_ignore_ascii_case(&a)); - assert!(!K.eq_ignore_ascii_case(&C)); - assert!(!Z.eq_ignore_ascii_case(&DEL)); - assert!(!BracketOpen.eq_ignore_ascii_case(&CurlyBraceOpen)); - assert!(!Grave.eq_ignore_ascii_case(&At)); - assert!(!Grave.eq_ignore_ascii_case(&DEL)); + assert!(AsciiChar::LineFeed.eq_ignore_ascii_case(&AsciiChar::LineFeed)); + assert!(!AsciiChar::LineFeed.eq_ignore_ascii_case(&AsciiChar::CarriageReturn)); + assert!(AsciiChar::z.eq_ignore_ascii_case(&AsciiChar::Z)); + assert!(AsciiChar::Z.eq_ignore_ascii_case(&AsciiChar::z)); + assert!(AsciiChar::A.eq_ignore_ascii_case(&AsciiChar::a)); + assert!(!AsciiChar::K.eq_ignore_ascii_case(&AsciiChar::C)); + assert!(!AsciiChar::Z.eq_ignore_ascii_case(&AsciiChar::DEL)); + assert!(!AsciiChar::BracketOpen.eq_ignore_ascii_case(&AsciiChar::CurlyBraceOpen)); + assert!(!AsciiChar::Grave.eq_ignore_ascii_case(&AsciiChar::At)); + assert!(!AsciiChar::Grave.eq_ignore_ascii_case(&AsciiChar::DEL)); } #[test] #[cfg(feature = "std")] fn fmt_ascii() { - assert_eq!(format!("{}", t), "t"); - assert_eq!(format!("{:?}", t), "'t'"); - assert_eq!(format!("{}", LineFeed), "\n"); - assert_eq!(format!("{:?}", LineFeed), "'\\n'"); + assert_eq!(format!("{}", AsciiChar::t), "t"); + assert_eq!(format!("{:?}", AsciiChar::t), "'t'"); + assert_eq!(format!("{}", AsciiChar::LineFeed), "\n"); + assert_eq!(format!("{:?}", AsciiChar::LineFeed), "'\\n'"); } } diff --git a/src/ascii_str.rs b/src/ascii_str.rs index 208e9a3..2d1fbbd 100644 --- a/src/ascii_str.rs +++ b/src/ascii_str.rs @@ -11,7 +11,7 @@ use ascii_char::AsciiChar; #[cfg(feature = "std")] use ascii_string::AsciiString; -/// AsciiStr represents a byte or string slice that only contains ASCII characters. +/// [`AsciiStr`] represents a byte or string slice that only contains ASCII characters. /// /// It wraps an `[AsciiChar]` and implements many of `str`s methods and traits. /// @@ -26,24 +26,28 @@ pub struct AsciiStr { impl AsciiStr { /// Converts `&self` to a `&str` slice. #[inline] + #[must_use] pub fn as_str(&self) -> &str { unsafe { &*(self as *const AsciiStr as *const str) } } /// Converts `&self` into a byte slice. #[inline] + #[must_use] pub fn as_bytes(&self) -> &[u8] { unsafe { &*(self as *const AsciiStr as *const [u8]) } } /// Returns the entire string as slice of `AsciiChar`s. #[inline] + #[must_use] pub const fn as_slice(&self) -> &[AsciiChar] { &self.slice } /// Returns the entire string as mutable slice of `AsciiChar`s. #[inline] + #[must_use] pub fn as_mut_slice(&mut self) -> &mut [AsciiChar] { &mut self.slice } @@ -54,6 +58,7 @@ impl AsciiStr { /// will end up pointing to garbage. Modifying the `AsciiStr` may cause it's buffer to be /// reallocated, which would also make any pointers to it invalid. #[inline] + #[must_use] pub const fn as_ptr(&self) -> *const AsciiChar { self.as_slice().as_ptr() } @@ -64,18 +69,23 @@ impl AsciiStr { /// will end up pointing to garbage. Modifying the `AsciiStr` may cause it's buffer to be /// reallocated, which would also make any pointers to it invalid. #[inline] + #[must_use] pub fn as_mut_ptr(&mut self) -> *mut AsciiChar { self.as_mut_slice().as_mut_ptr() } /// Copies the content of this `AsciiStr` into an owned `AsciiString`. #[cfg(feature = "std")] + #[must_use] pub fn to_ascii_string(&self) -> AsciiString { AsciiString::from(self.slice.to_vec()) } /// Converts anything that can represent a byte slice into an `AsciiStr`. /// + /// # Errors + /// If `bytes` contains a non-ascii byte, `Err` will be returned + /// /// # Examples /// ``` /// # use ascii::AsciiStr; @@ -102,6 +112,7 @@ impl AsciiStr { /// assert_eq!(foo.as_str(), "foo"); /// ``` #[inline] + #[must_use] pub unsafe fn from_ascii_unchecked(bytes: &[u8]) -> &AsciiStr { bytes.as_ascii_str_unchecked() } @@ -115,6 +126,7 @@ impl AsciiStr { /// assert_eq!(s.len(), 3); /// ``` #[inline] + #[must_use] pub fn len(&self) -> usize { self.slice.len() } @@ -130,12 +142,14 @@ impl AsciiStr { /// assert!(!full.is_empty()); /// ``` #[inline] + #[must_use] pub fn is_empty(&self) -> bool { self.len() == 0 } /// Returns an iterator over the characters of the `AsciiStr`. #[inline] + #[must_use] pub fn chars(&self) -> Chars { Chars(self.slice.iter()) } @@ -143,6 +157,7 @@ impl AsciiStr { /// Returns an iterator over the characters of the `AsciiStr` which allows you to modify the /// value of each `AsciiChar`. #[inline] + #[must_use] pub fn chars_mut(&mut self) -> CharsMut { CharsMut(self.slice.iter_mut()) } @@ -158,6 +173,7 @@ impl AsciiStr { /// .collect::>(); /// assert_eq!(words, ["apple", "banana", "lemon"]); /// ``` + #[must_use] pub fn split(&self, on: AsciiChar) -> impl DoubleEndedIterator { Split { on, @@ -172,6 +188,7 @@ impl AsciiStr { /// /// The final line ending is optional. #[inline] + #[must_use] pub fn lines(&self) -> impl DoubleEndedIterator { Lines { string: self } } @@ -184,6 +201,7 @@ impl AsciiStr { /// let example = AsciiStr::from_ascii(" \twhite \tspace \t").unwrap(); /// assert_eq!("white \tspace", example.trim()); /// ``` + #[must_use] pub fn trim(&self) -> &Self { self.trim_start().trim_end() } @@ -196,8 +214,15 @@ impl AsciiStr { /// let example = AsciiStr::from_ascii(" \twhite \tspace \t").unwrap(); /// assert_eq!("white \tspace \t", example.trim_start()); /// ``` + #[must_use] pub fn trim_start(&self) -> &Self { - &self[self.chars().take_while(|ch| ch.is_whitespace()).count()..] + let whitespace_len = self + .chars() + .position(|ch| !ch.is_whitespace()) + .unwrap_or_else(|| self.len()); + + // SAFETY: `whitespace_len` is `0..=len`, which is at most `len`, which is a valid empty slice. + unsafe { self.as_slice().get_unchecked(whitespace_len..).into() } } /// Returns an ASCII string slice with trailing whitespace removed. @@ -208,16 +233,25 @@ impl AsciiStr { /// let example = AsciiStr::from_ascii(" \twhite \tspace \t").unwrap(); /// assert_eq!(" \twhite \tspace", example.trim_end()); /// ``` + #[must_use] pub fn trim_end(&self) -> &Self { - let trimmed = self + // Number of whitespace characters counting from the end + let whitespace_len = self .chars() .rev() - .take_while(|ch| ch.is_whitespace()) - .count(); - &self[..self.len() - trimmed] + .position(|ch| !ch.is_whitespace()) + .unwrap_or_else(|| self.len()); + + // SAFETY: `whitespace_len` is `0..=len`, which is at most `len`, which is a valid empty slice, and at least `0`, which is the whole slice. + unsafe { + self.as_slice() + .get_unchecked(..self.len() - whitespace_len) + .into() + } } /// Compares two strings case-insensitively. + #[must_use] pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool { self.len() == other.len() && self @@ -242,6 +276,7 @@ impl AsciiStr { /// Returns a copy of this string where letters 'a' to 'z' are mapped to 'A' to 'Z'. #[cfg(feature = "std")] + #[must_use] pub fn to_ascii_uppercase(&self) -> AsciiString { let mut ascii_string = self.to_ascii_string(); ascii_string.make_ascii_uppercase(); @@ -250,6 +285,7 @@ impl AsciiStr { /// Returns a copy of this string where letters 'A' to 'Z' are mapped to 'a' to 'z'. #[cfg(feature = "std")] + #[must_use] pub fn to_ascii_lowercase(&self) -> AsciiString { let mut ascii_string = self.to_ascii_string(); ascii_string.make_ascii_lowercase(); @@ -258,12 +294,14 @@ impl AsciiStr { /// Returns the first character if the string is not empty. #[inline] + #[must_use] pub fn first(&self) -> Option { self.slice.first().cloned() } /// Returns the last character if the string is not empty. #[inline] + #[must_use] pub fn last(&self) -> Option { self.slice.last().cloned() } @@ -442,6 +480,7 @@ impl fmt::Debug for AsciiStr { macro_rules! impl_index { ($idx:ty) => { + #[allow(clippy::indexing_slicing)] // In `Index`, if it's out of bounds, panic is the default impl Index<$idx> for AsciiStr { type Output = AsciiStr; @@ -451,6 +490,7 @@ macro_rules! impl_index { } } + #[allow(clippy::indexing_slicing)] // In `IndexMut`, if it's out of bounds, panic is the default impl IndexMut<$idx> for AsciiStr { #[inline] fn index_mut(&mut self, index: $idx) -> &mut AsciiStr { @@ -467,6 +507,7 @@ impl_index! { RangeFull } impl_index! { RangeInclusive } impl_index! { RangeToInclusive } +#[allow(clippy::indexing_slicing)] // In `Index`, if it's out of bounds, panic is the default impl Index for AsciiStr { type Output = AsciiChar; @@ -476,6 +517,7 @@ impl Index for AsciiStr { } } +#[allow(clippy::indexing_slicing)] // In `IndexMut`, if it's out of bounds, panic is the default impl IndexMut for AsciiStr { #[inline] fn index_mut(&mut self, index: usize) -> &mut AsciiChar { @@ -510,6 +552,7 @@ impl<'a> IntoIterator for &'a mut AsciiStr { pub struct Chars<'a>(Iter<'a, AsciiChar>); impl<'a> Chars<'a> { /// Returns the ascii string slice with the remaining characters. + #[must_use] pub fn as_str(&self) -> &'a AsciiStr { self.0.as_slice().into() } @@ -541,6 +584,7 @@ impl<'a> ExactSizeIterator for Chars<'a> { pub struct CharsMut<'a>(IterMut<'a, AsciiChar>); impl<'a> CharsMut<'a> { /// Returns the ascii string slice with the remaining characters. + #[must_use] pub fn into_str(self) -> &'a mut AsciiStr { self.0.into_slice().into() } @@ -572,6 +616,7 @@ impl<'a> ExactSizeIterator for CharsMut<'a> { pub struct CharsRef<'a>(Iter<'a, AsciiChar>); impl<'a> CharsRef<'a> { /// Returns the ascii string slice with the remaining characters. + #[must_use] pub fn as_str(&self) -> &'a AsciiStr { self.0.as_slice().into() } @@ -609,8 +654,10 @@ impl<'a> Iterator for Split<'a> { if !self.ended { let start: &AsciiStr = self.chars.as_str(); let split_on = self.on; + if let Some(at) = self.chars.position(|ch| ch == split_on) { - Some(&start[..at]) + // SAFETY: `at` is guaranteed to be in bounds, as `position` returns `Ok(0..len)`. + Some(unsafe { start.as_slice().get_unchecked(..at).into() }) } else { self.ended = true; Some(start) @@ -625,8 +672,10 @@ impl<'a> DoubleEndedIterator for Split<'a> { if !self.ended { let start: &AsciiStr = self.chars.as_str(); let split_on = self.on; + if let Some(at) = self.chars.rposition(|ch| ch == split_on) { - Some(&start[at + 1..]) + // SAFETY: `at` is guaranteed to be in bounds, as `rposition` returns `Ok(0..len)`, and slices `1..`, `2..`, etc... until `len..` inclusive, are valid. + Some(unsafe { start.as_slice().get_unchecked(at + 1..).into() }) } else { self.ended = true; Some(start) @@ -651,40 +700,81 @@ impl<'a> Iterator for Lines<'a> { .chars() .position(|chr| chr == AsciiChar::LineFeed) { - let line = if idx > 0 && self.string[idx - 1] == AsciiChar::CarriageReturn { - &self.string[..idx - 1] + // SAFETY: `idx` is guaranteed to be `1..len`, as we get it from `position` as `0..len` and make sure it's not `0`. + let line = if idx > 0 + && *unsafe { self.string.as_slice().get_unchecked(idx - 1) } + == AsciiChar::CarriageReturn + { + // SAFETY: As per above, `idx` is guaranteed to be `1..len` + unsafe { self.string.as_slice().get_unchecked(..idx - 1).into() } } else { - &self.string[..idx] + // SAFETY: As per above, `idx` is guaranteed to be `0..len` + unsafe { self.string.as_slice().get_unchecked(..idx).into() } }; - self.string = &self.string[idx + 1..]; + // SAFETY: As per above, `idx` is guaranteed to be `0..len`, so at the extreme, slicing `len..` is a valid empty slice. + self.string = unsafe { self.string.as_slice().get_unchecked(idx + 1..).into() }; Some(line) } else if self.string.is_empty() { None } else { let line = self.string; - self.string = &self.string[..0]; + // SAFETY: An empty string is a valid string. + self.string = unsafe { AsciiStr::from_ascii_unchecked(b"") }; Some(line) } } } + impl<'a> DoubleEndedIterator for Lines<'a> { fn next_back(&mut self) -> Option<&'a AsciiStr> { if self.string.is_empty() { return None; } - let mut i = self.string.len(); - if self.string[i - 1] == AsciiChar::LineFeed { - i -= 1; - if i > 0 && self.string[i - 1] == AsciiChar::CarriageReturn { - i -= 1; + + // If we end with `LF` / `CR/LF`, remove them + if let Some(AsciiChar::LineFeed) = self.string.last() { + // SAFETY: `last()` returned `Some`, so our len is at least 1. + self.string = unsafe { + self.string + .as_slice() + .get_unchecked(..self.string.len() - 1) + .into() + }; + + if let Some(AsciiChar::CarriageReturn) = self.string.last() { + // SAFETY: `last()` returned `Some`, so our len is at least 1. + self.string = unsafe { + self.string + .as_slice() + .get_unchecked(..self.string.len() - 1) + .into() + }; } } - self.string = &self.string[..i]; - while i > 0 && self.string[i - 1] != AsciiChar::LineFeed { - i -= 1; - } - let line = &self.string[i..]; - self.string = &self.string[..i]; + + // Get the position of the first `LF` from the end. + let lf_rev_pos = self + .string + .chars() + .rev() + .position(|ch| ch == AsciiChar::LineFeed) + .unwrap_or_else(|| self.string.len()); + + // SAFETY: `lf_rev_pos` will be in range `0..=len`, so `len - lf_rev_pos` + // will be within `0..=len`, making it correct as a start and end + // point for the strings. + let line = unsafe { + self.string + .as_slice() + .get_unchecked(self.string.len() - lf_rev_pos..) + .into() + }; + self.string = unsafe { + self.string + .as_slice() + .get_unchecked(..self.string.len() - lf_rev_pos) + .into() + }; Some(line) } } @@ -702,12 +792,14 @@ impl AsAsciiStrError { /// /// It is the maximum index such that `from_ascii(input[..index])` would return `Ok(_)`. #[inline] + #[must_use] pub const fn valid_up_to(self) -> usize { self.0 } #[cfg(not(feature = "std"))] /// Returns a description for this error, like `std::error::Error::description`. #[inline] + #[must_use] pub const fn description(&self) -> &'static str { ERRORMSG_STR } @@ -725,7 +817,7 @@ impl Error for AsAsciiStrError { } } -/// Convert slices of bytes or AsciiChar to `AsciiStr`. +/// Convert slices of bytes or [`AsciiChar`] to [`AsciiStr`]. // Could nearly replace this trait with SliceIndex, but its methods isn't even // on a path for stabilization. pub trait AsAsciiStr { @@ -734,6 +826,7 @@ pub trait AsAsciiStr { type Inner; /// Convert a subslice to an ASCII slice. /// + /// # Errors /// Returns `Err` if the range is out of bounds or if not all bytes in the /// slice are ASCII. The value in the error will be the index of the first /// non-ASCII byte or the end of the slice. @@ -752,6 +845,9 @@ pub trait AsAsciiStr { R: SliceIndex<[Self::Inner], Output = [Self::Inner]>; /// Convert to an ASCII slice. /// + /// # Errors + /// Returns `Err` if not all bytes are valid ascii values. + /// /// # Example /// ``` /// use ascii::{AsAsciiStr, AsciiChar}; @@ -778,7 +874,7 @@ pub trait AsAsciiStr { fn get_ascii(&self, index: usize) -> Option { self.slice_ascii(index..=index) .ok() - .and_then(|str| str.first()) + .and_then(AsciiStr::first) } /// Convert to an ASCII slice without checking for non-ASCII characters. /// @@ -787,13 +883,20 @@ pub trait AsAsciiStr { unsafe fn as_ascii_str_unchecked(&self) -> &AsciiStr; } -/// Convert mutable slices of bytes or AsciiChar to `AsciiStr`. +/// Convert mutable slices of bytes or [`AsciiChar`] to [`AsciiStr`]. pub trait AsMutAsciiStr: AsAsciiStr { /// Convert a subslice to an ASCII slice. + /// + /// # Errors + /// This function returns `Err` if range is out of bounds, or if + /// `self` contains non-ascii values fn slice_ascii_mut(&mut self, range: R) -> Result<&mut AsciiStr, AsAsciiStrError> where R: SliceIndex<[Self::Inner], Output = [Self::Inner]>; /// Convert to a mutable ASCII slice. + /// + /// # Errors + /// This function returns `Err` if `self` contains non-ascii values fn as_mut_ascii_str(&mut self) -> Result<&mut AsciiStr, AsAsciiStrError> { self.slice_ascii_mut(..) } @@ -928,6 +1031,7 @@ impl AsMutAsciiStr for [AsciiChar] { impl AsAsciiStr for [u8] { type Inner = u8; + fn slice_ascii(&self, range: R) -> Result<&AsciiStr, AsAsciiStrError> where R: SliceIndex<[u8], Output = [u8]>, @@ -1067,19 +1171,41 @@ mod tests { use super::{AsAsciiStr, AsAsciiStrError, AsMutAsciiStr, AsciiStr}; use AsciiChar; + /// Ensures that common types, `str`, `[u8]`, `AsciiStr` and their + /// references, shared and mutable implement `AsAsciiStr`. #[test] fn generic_as_ascii_str() { + // Generic function to ensure `C` implements `AsAsciiStr` fn generic(c: &C) -> Result<&AsciiStr, AsAsciiStrError> { c.as_ascii_str() } + let arr = [AsciiChar::A]; - let ascii_str: &AsciiStr = arr.as_ref().into(); - assert_eq!(generic("A"), Ok(ascii_str)); - assert_eq!(generic(&b"A"[..]), Ok(ascii_str)); - assert_eq!(generic(ascii_str), Ok(ascii_str)); - assert_eq!(generic(&"A"), Ok(ascii_str)); - assert_eq!(generic(&ascii_str), Ok(ascii_str)); - assert_eq!(generic(&mut "A"), Ok(ascii_str)); + let ascii_str = arr.as_ref().into(); + let mut mut_arr = arr; // Note: We need a second copy to prevent overlapping mutable borrows. + let mut_ascii_str = mut_arr.as_mut().into(); + let mut_arr_mut_ref: &mut [AsciiChar] = &mut [AsciiChar::A]; + let mut string_bytes = [b'A']; + let string_mut = unsafe { core::str::from_utf8_unchecked_mut(&mut string_bytes) }; // SAFETY: 'A' is a valid string. + let string_mut_bytes: &mut [u8] = &mut [b'A']; + + // Note: This is a trick because `rustfmt` doesn't support + // attributes on blocks yet. + #[rustfmt::skip] + let _ = [ + assert_eq!(generic::("A" ), Ok(ascii_str)), + assert_eq!(generic::<[u8] >(&b"A"[..] ), Ok(ascii_str)), + assert_eq!(generic::(ascii_str ), Ok(ascii_str)), + assert_eq!(generic::<[AsciiChar] >(&arr ), Ok(ascii_str)), + assert_eq!(generic::<&str >(&"A" ), Ok(ascii_str)), + assert_eq!(generic::<&[u8] >(&&b"A"[..] ), Ok(ascii_str)), + assert_eq!(generic::<&AsciiStr >(&ascii_str ), Ok(ascii_str)), + assert_eq!(generic::<&[AsciiChar] >(&&arr[..] ), Ok(ascii_str)), + assert_eq!(generic::<&mut str >(&string_mut ), Ok(ascii_str)), + assert_eq!(generic::<&mut [u8] >(&string_mut_bytes), Ok(ascii_str)), + assert_eq!(generic::<&mut AsciiStr >(&mut_ascii_str ), Ok(ascii_str)), + assert_eq!(generic::<&mut [AsciiChar]>(&mut_arr_mut_ref ), Ok(ascii_str)), + ]; } #[cfg(feature = "std")] @@ -1147,11 +1273,11 @@ mod tests { assert_eq!(cstr.get_ascii(5), None); assert_eq!(cstr.get_ascii(6), Some(AsciiChar::g)); assert_eq!(cstr.get_ascii(7), None); - let aslice = &[AsciiChar::X, AsciiChar::Y, AsciiChar::Z, AsciiChar::Null][..]; - let astr: &AsciiStr = aslice.as_ref(); - let cstr = CStr::from_bytes_with_nul(astr.as_bytes()).unwrap(); - assert_eq!(cstr.slice_ascii(..2), Ok(&astr[..2])); - assert_eq!(cstr.as_ascii_str(), Ok(&astr[..3])); + let ascii_slice = &[AsciiChar::X, AsciiChar::Y, AsciiChar::Z, AsciiChar::Null][..]; + let ascii_str: &AsciiStr = ascii_slice.as_ref(); + let cstr = CStr::from_bytes_with_nul(ascii_str.as_bytes()).unwrap(); + assert_eq!(cstr.slice_ascii(..2), Ok(&ascii_str[..2])); + assert_eq!(cstr.as_ascii_str(), Ok(&ascii_str[..3])); } #[test] @@ -1316,6 +1442,15 @@ mod tests { assert_eq!(iter.next_back(), Some("baz".as_ascii_str().unwrap())); assert_eq!(iter.next_back(), Some("".as_ascii_str().unwrap())); assert_eq!(iter.next(), Some("bar".as_ascii_str().unwrap())); + + let empty_lines = b"\n\r\n\n\r\n"; + let mut iter_count = 0; + let ascii = AsciiStr::from_ascii(&empty_lines).unwrap(); + for line in ascii.lines().rev() { + iter_count += 1; + assert!(line.is_empty()); + } + assert_eq!(4, iter_count); } #[test] @@ -1333,7 +1468,7 @@ mod tests { .as_ascii_str() .unwrap() .split(AsciiChar::from_ascii(needle).unwrap()) - .map(|a| a.as_str()); + .map(AsciiStr::as_str); loop { assert_eq!(asciis.size_hint(), strs.size_hint()); let (a, s) = (asciis.next(), strs.next()); diff --git a/src/ascii_string.rs b/src/ascii_string.rs index 5f0b644..b5a66db 100644 --- a/src/ascii_string.rs +++ b/src/ascii_string.rs @@ -26,6 +26,7 @@ impl AsciiString { /// let mut s = AsciiString::new(); /// ``` #[inline] + #[must_use] pub const fn new() -> Self { AsciiString { vec: Vec::new() } } @@ -40,6 +41,7 @@ impl AsciiString { /// let mut s = AsciiString::with_capacity(10); /// ``` #[inline] + #[must_use] pub fn with_capacity(capacity: usize) -> Self { AsciiString { vec: Vec::with_capacity(capacity), @@ -81,6 +83,7 @@ impl AsciiString { /// } /// ``` #[inline] + #[must_use] pub unsafe fn from_raw_parts(buf: *mut AsciiChar, length: usize, capacity: usize) -> Self { AsciiString { vec: Vec::from_raw_parts(buf, length, capacity), @@ -95,6 +98,7 @@ impl AsciiString { /// future of the `AsciiString`, as the rest of this library assumes that `AsciiString`s are /// ASCII encoded. #[inline] + #[must_use] pub unsafe fn from_ascii_unchecked(bytes: B) -> Self where B: Into>, @@ -111,7 +115,7 @@ impl AsciiString { /// Converts anything that can represent a byte buffer into an `AsciiString`. /// - /// # Failure + /// # Errors /// Returns the byte buffer if not all of the bytes are ASCII characters. /// /// # Examples @@ -161,6 +165,7 @@ impl AsciiString { /// assert!(s.capacity() >= 10); /// ``` #[inline] + #[must_use] pub fn capacity(&self) -> usize { self.vec.capacity() } @@ -201,6 +206,7 @@ impl AsciiString { /// assert!(s.capacity() >= 10); /// ``` #[inline] + pub fn reserve_exact(&mut self, additional: usize) { self.vec.reserve_exact(additional) } @@ -218,6 +224,7 @@ impl AsciiString { /// assert_eq!(s.capacity(), 3); /// ``` #[inline] + pub fn shrink_to_fit(&mut self) { self.vec.shrink_to_fit() } @@ -234,6 +241,7 @@ impl AsciiString { /// assert_eq!(s, "abc123"); /// ``` #[inline] + pub fn push(&mut self, ch: AsciiChar) { self.vec.push(ch) } @@ -251,6 +259,7 @@ impl AsciiString { /// assert_eq!(s, "he"); /// ``` #[inline] + pub fn truncate(&mut self, new_len: usize) { self.vec.truncate(new_len) } @@ -268,6 +277,7 @@ impl AsciiString { /// assert_eq!(s.pop(), None); /// ``` #[inline] + #[must_use] pub fn pop(&mut self) -> Option { self.vec.pop() } @@ -289,6 +299,7 @@ impl AsciiString { /// assert_eq!(s.remove(0).as_char(), 'o'); /// ``` #[inline] + #[must_use] pub fn remove(&mut self, idx: usize) -> AsciiChar { self.vec.remove(idx) } @@ -309,6 +320,7 @@ impl AsciiString { /// assert_eq!(s, "fobo"); /// ``` #[inline] + pub fn insert(&mut self, idx: usize, ch: AsciiChar) { self.vec.insert(idx, ch) } @@ -322,6 +334,7 @@ impl AsciiString { /// assert_eq!(s.len(), 3); /// ``` #[inline] + #[must_use] pub fn len(&self) -> usize { self.vec.len() } @@ -337,6 +350,7 @@ impl AsciiString { /// assert!(!s.is_empty()); /// ``` #[inline] + #[must_use] pub fn is_empty(&self) -> bool { self.len() == 0 } @@ -351,6 +365,7 @@ impl AsciiString { /// assert!(s.is_empty()); /// ``` #[inline] + pub fn clear(&mut self) { self.vec.clear() } @@ -605,6 +620,7 @@ impl<'a> AddAssign<&'a AsciiStr> for AsciiString { } } +#[allow(clippy::indexing_slicing)] // In `Index`, if it's out of bounds, panic is the default impl Index for AsciiString where AsciiStr: Index, @@ -617,6 +633,7 @@ where } } +#[allow(clippy::indexing_slicing)] // In `IndexMut`, if it's out of bounds, panic is the default impl IndexMut for AsciiString where AsciiStr: IndexMut, @@ -649,11 +666,13 @@ pub struct FromAsciiError { impl FromAsciiError { /// Get the position of the first non-ASCII byte or character. #[inline] + #[must_use] pub fn ascii_error(&self) -> AsAsciiStrError { self.error } /// Get back the original, unmodified type. #[inline] + #[must_use] pub fn into_source(self) -> O { self.owner } @@ -672,6 +691,7 @@ impl fmt::Display for FromAsciiError { } impl Error for FromAsciiError { #[inline] + #[allow(deprecated)] // TODO: Remove deprecation once the earliest version we support deprecates this method. fn description(&self) -> &str { self.error.description() } @@ -686,6 +706,9 @@ pub trait IntoAsciiString: Sized { /// Convert to `AsciiString` without checking for non-ASCII characters. unsafe fn into_ascii_string_unchecked(self) -> AsciiString; /// Convert to `AsciiString`. + /// + /// # Errors + /// If `self` contains non-ascii characters, this will return `Err` fn into_ascii_string(self) -> Result>; } @@ -778,8 +801,8 @@ impl IntoAsciiString for CString { } }) .map(|mut s| { - let _nul = s.pop(); - debug_assert_eq!(_nul, Some(AsciiChar::Null)); + let nul = s.pop(); + debug_assert_eq!(nul, Some(AsciiChar::Null)); s }) } @@ -799,8 +822,8 @@ impl<'a> IntoAsciiString for &'a CStr { error, }) .map(|mut s| { - let _nul = s.pop(); - debug_assert_eq!(_nul, Some(AsciiChar::Null)); + let nul = s.pop(); + debug_assert_eq!(nul, Some(AsciiChar::Null)); s }) } @@ -879,7 +902,7 @@ mod tests { assert_eq!(ascii_str_unchecked.len(), 3); assert_eq!(ascii_str_unchecked.as_slice(), expected_chars); - let sparkle_heart_bytes = vec![240u8, 159, 146, 150]; + let sparkle_heart_bytes = vec![240_u8, 159, 146, 150]; let cstring = CString::new(sparkle_heart_bytes).unwrap(); let cstr = &*cstring; let ascii_err = cstr.into_ascii_string().unwrap_err(); diff --git a/src/lib.rs b/src/lib.rs index 4c77509..6161c2a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -29,7 +29,30 @@ //! API changed significantly since then. #![cfg_attr(not(feature = "std"), no_std)] -#![allow(clippy::trivially_copy_pass_by_ref)] // for compatibility with methods on char and u8 +// Clippy lints +#![warn( + clippy::pedantic, + clippy::decimal_literal_representation, + clippy::get_unwrap, + clippy::indexing_slicing +)] +// Naming conventions sometimes go against this lint +#![allow(clippy::module_name_repetitions)] +// We need to get literal non-asciis for tests +#![allow(clippy::non_ascii_literal)] +// Sometimes it looks better to invert the order, such as when the `else` block is small +#![allow(clippy::if_not_else)] +// Shadowing is common and doesn't affect understanding +// TODO: Consider removing `shadow_unrelated`, as it can show some actual logic errors +#![allow(clippy::shadow_unrelated, clippy::shadow_reuse, clippy::shadow_same)] +// A `if let` / `else` sometimes looks better than using iterator adaptors +#![allow(clippy::option_if_let_else)] +// In tests, we're fine with indexing, since a panic is a failure. +#![cfg_attr(test, allow(clippy::indexing_slicing))] +// for compatibility with methods on char and u8 +#![allow(clippy::trivially_copy_pass_by_ref)] +// In preparation for feature `unsafe_block_in_unsafe_fn` (https://github.com/rust-lang/rust/issues/71668) +#![allow(unused_unsafe)] #[cfg(feature = "std")] extern crate core; diff --git a/tests.rs b/tests.rs index b9f3c9c..3454830 100644 --- a/tests.rs +++ b/tests.rs @@ -130,7 +130,7 @@ fn extend_from_iterator() { .unwrap() .split(AsciiChar::Space) .map(|case| { - if case.chars().all(|ch| ch.is_uppercase()) { + if case.chars().all(AsciiChar::is_uppercase) { Cow::from(case) } else { Cow::from(case.to_ascii_uppercase())