From cb19d68abd52c338119da83a69bead1c808dc399 Mon Sep 17 00:00:00 2001 From: Filipe Rodrigues Date: Sat, 19 Sep 2020 21:17:58 +0100 Subject: [PATCH 1/9] Fixed most existing clippy lints and now `warn`ing against some `pedantic` clippy lints. --- src/ascii_char.rs | 137 ++++++++++++++++++++++++++++++-------------- src/ascii_str.rs | 74 ++++++++++++++++++++---- src/ascii_string.rs | 32 +++++++++-- src/lib.rs | 16 +++++- tests.rs | 2 +- 5 files changed, 198 insertions(+), 63 deletions(-) diff --git a/src/ascii_char.rs b/src/ascii_char.rs index c595e3a..2444b81 100644 --- a/src/ascii_char.rs +++ b/src/ascii_char.rs @@ -280,7 +280,7 @@ pub enum AsciiChar { impl AsciiChar { /// Constructs an ASCII character from a `u8`, `char` or other character type. /// - /// # Failure + /// # Errors /// Returns `Err(())` if the character can't be ASCII encoded. /// /// # Example @@ -328,8 +328,13 @@ impl AsciiChar { /// /// The panic message might not be the most descriptive due to the /// current limitations of `const fn`. + #[must_use] pub const fn new(ch: char) -> AsciiChar { + // It's restricted to this function, and without it + // we'd need to specify `AsciiChar::` or `Self::` 128 times. + #[allow(clippy::enum_glob_use)] use AsciiChar::*; + #[rustfmt::skip] const ALL: [AsciiChar; 128] = [ Null, SOH, SOX, ETX, EOT, ENQ, ACK, Bell, @@ -366,18 +371,21 @@ impl AsciiChar { /// might not panic, creating a buffer overflow, /// and `Some(AsciiChar::from_ascii_unchecked(128))` might be `None`. #[inline] + #[must_use] pub unsafe fn from_ascii_unchecked(ch: u8) -> Self { ch.to_ascii_char_unchecked() } /// Converts an ASCII character into a `u8`. #[inline] + #[must_use] pub const fn as_byte(self) -> u8 { self as u8 } /// Converts an ASCII character into a `char`. #[inline] + #[must_use] pub const fn as_char(self) -> char { self as u8 as char } @@ -390,12 +398,14 @@ impl AsciiChar { // make the compiler optimize away the indirection. /// Turns uppercase into lowercase, but also modifies '@' and '<'..='_' + #[must_use] const fn to_not_upper(self) -> u8 { self as u8 | 0b010_0000 } /// Check if the character is a letter (a-z, A-Z) #[inline] + #[must_use] pub const fn is_alphabetic(self) -> bool { (self.to_not_upper() >= b'a') & (self.to_not_upper() <= b'z') } @@ -404,6 +414,7 @@ impl AsciiChar { /// /// This method is identical to [`is_alphabetic()`](#method.is_alphabetic) #[inline] + #[must_use] pub const fn is_ascii_alphabetic(&self) -> bool { self.is_alphabetic() } @@ -417,6 +428,7 @@ impl AsciiChar { /// # Panics /// /// Radixes greater than 36 are not supported and will result in a panic. + #[must_use] pub fn is_digit(self, radix: u32) -> bool { match (self as u8, radix) { (b'0'..=b'9', 0..=36) => u32::from(self as u8 - b'0') < radix, @@ -439,12 +451,14 @@ impl AsciiChar { /// assert_eq!(AsciiChar::new('/').is_ascii_digit(), false); /// ``` #[inline] + #[must_use] pub const fn is_ascii_digit(&self) -> bool { (*self as u8 >= b'0') & (*self as u8 <= b'9') } /// Check if the character is a letter or number #[inline] + #[must_use] pub const fn is_alphanumeric(self) -> bool { self.is_alphabetic() | self.is_ascii_digit() } @@ -453,6 +467,7 @@ impl AsciiChar { /// /// This method is identical to [`is_alphanumeric()`](#method.is_alphanumeric) #[inline] + #[must_use] pub const fn is_ascii_alphanumeric(&self) -> bool { self.is_alphanumeric() } @@ -470,6 +485,7 @@ impl AsciiChar { /// assert!(!AsciiChar::FF.is_ascii_blank()); /// ``` #[inline] + #[must_use] pub const fn is_ascii_blank(&self) -> bool { (*self as u8 == b' ') | (*self as u8 == b'\t') } @@ -477,6 +493,7 @@ impl AsciiChar { /// Check if the character one of ' ', '\t', '\n', '\r', /// '\0xb' (vertical tab) or '\0xc' (form feed). #[inline] + #[must_use] pub const fn is_whitespace(self) -> bool { let b = self as u8; self.is_ascii_blank() | (b == b'\n') | (b == b'\r') | (b == 0x0b) | (b == 0x0c) @@ -486,6 +503,7 @@ impl AsciiChar { /// /// This method is NOT identical to `is_whitespace()`. #[inline] + #[must_use] pub const fn is_ascii_whitespace(&self) -> bool { self.is_ascii_blank() | (*self as u8 == b'\n') @@ -506,6 +524,7 @@ impl AsciiChar { /// assert_eq!(AsciiChar::EOT.is_ascii_control(), true); /// ``` #[inline] + #[must_use] pub const fn is_ascii_control(&self) -> bool { ((*self as u8) < b' ') | (*self as u8 == 127) } @@ -520,6 +539,7 @@ impl AsciiChar { /// assert_eq!(AsciiChar::new('\n').is_ascii_graphic(), false); /// ``` #[inline] + #[must_use] pub const fn is_ascii_graphic(&self) -> bool { self.as_byte().wrapping_sub(b' ' + 1) < 0x5E } @@ -534,6 +554,7 @@ impl AsciiChar { /// assert_eq!(AsciiChar::new('\n').is_ascii_printable(), false); /// ``` #[inline] + #[must_use] pub const fn is_ascii_printable(&self) -> bool { self.as_byte().wrapping_sub(b' ') < 0x5F } @@ -548,6 +569,7 @@ impl AsciiChar { /// assert_eq!(AsciiChar::new('@').is_lowercase(), false); /// ``` #[inline] + #[must_use] pub const fn is_lowercase(self) -> bool { self.as_byte().wrapping_sub(b'a') < 26 } @@ -556,6 +578,7 @@ impl AsciiChar { /// /// This method is identical to [`is_lowercase()`](#method.is_lowercase) #[inline] + #[must_use] pub const fn is_ascii_lowercase(&self) -> bool { self.is_lowercase() } @@ -570,6 +593,7 @@ impl AsciiChar { /// assert_eq!(AsciiChar::new('@').is_uppercase(), false); /// ``` #[inline] + #[must_use] pub const fn is_uppercase(self) -> bool { self.as_byte().wrapping_sub(b'A') < 26 } @@ -578,6 +602,7 @@ impl AsciiChar { /// /// This method is identical to [`is_uppercase()`](#method.is_uppercase) #[inline] + #[must_use] pub const fn is_ascii_uppercase(&self) -> bool { self.is_uppercase() } @@ -593,6 +618,7 @@ impl AsciiChar { /// assert_eq!(AsciiChar::new('~').is_ascii_punctuation(), true); /// ``` #[inline] + #[must_use] pub const fn is_ascii_punctuation(&self) -> bool { self.is_ascii_graphic() & !self.is_alphanumeric() } @@ -609,8 +635,9 @@ impl AsciiChar { /// assert_eq!(AsciiChar::new(' ').is_ascii_hexdigit(), false); /// ``` #[inline] + #[must_use] pub const fn is_ascii_hexdigit(&self) -> bool { - self.is_ascii_digit() | ((*self as u8 | 0x20u8).wrapping_sub(b'a') < 6) + self.is_ascii_digit() | ((*self as u8 | 0x20_u8).wrapping_sub(b'a') < 6) } /// Unicode has printable versions of the ASCII control codes, like '␛'. @@ -627,6 +654,7 @@ impl AsciiChar { /// assert_eq!(AsciiChar::new(' ').as_printable_char(), ' '); /// assert_eq!(AsciiChar::new('p').as_printable_char(), 'p'); /// ``` + #[must_use] pub fn as_printable_char(self) -> char { unsafe { match self as u8 { @@ -659,6 +687,7 @@ impl AsciiChar { /// assert_eq!(AsciiChar::new('[').to_ascii_uppercase().as_char(), '['); /// ``` #[inline] + #[must_use] pub const fn to_ascii_uppercase(&self) -> Self { [*self, AsciiChar::new((*self as u8 & 0b101_1111) as char)][self.is_lowercase() as usize] } @@ -675,12 +704,14 @@ impl AsciiChar { /// assert_eq!(AsciiChar::new('\x7f').to_ascii_lowercase().as_char(), '\x7f'); /// ``` #[inline] + #[must_use] pub const fn to_ascii_lowercase(&self) -> Self { [*self, AsciiChar::new(self.to_not_upper() as char)][self.is_uppercase() as usize] } /// Compares two characters case-insensitively. #[inline] + #[must_use] pub const fn eq_ignore_ascii_case(&self, other: &Self) -> bool { (self.as_byte() == other.as_byte()) | (self.is_alphabetic() & (self.to_not_upper() == other.to_not_upper())) @@ -754,6 +785,7 @@ const ERRORMSG_CHAR: &str = "not an ASCII character"; impl ToAsciiCharError { /// Returns a description for this error, like `std::error::Error::description`. #[inline] + #[must_use] pub const fn description(&self) -> &'static str { ERRORMSG_CHAR } @@ -781,10 +813,14 @@ impl Error for ToAsciiCharError { /// Convert `char`, `u8` and other character types to `AsciiChar`. pub trait ToAsciiChar { - /// Convert to `AsciiChar` without checking that it is an ASCII character. - unsafe fn to_ascii_char_unchecked(self) -> AsciiChar; /// Convert to `AsciiChar`. + /// + /// # Errors + /// If `self` is outside the valid ascii range, this returns `Err` fn to_ascii_char(self) -> Result; + + /// Convert to `AsciiChar` without checking that it is an ASCII character. + unsafe fn to_ascii_char_unchecked(self) -> AsciiChar; } impl ToAsciiChar for AsciiChar { @@ -809,6 +845,10 @@ impl ToAsciiChar for u8 { } } +// Note: Casts to `u8` here does not cause problems, as the negative +// range is mapped outside of ascii bounds and we don't mind losing +// the sign, as long as negative numbers are mapped outside ascii range. +#[allow(clippy::cast_sign_loss)] impl ToAsciiChar for i8 { #[inline] fn to_ascii_char(self) -> Result { @@ -842,7 +882,13 @@ impl ToAsciiChar for u32 { } #[inline] unsafe fn to_ascii_char_unchecked(self) -> AsciiChar { - (self as u8).to_ascii_char_unchecked() + // Note: This cast discards the top bytes, this may cause problems, see + // the TODO on this method's documentation in the trait. + // SAFETY: Caller guarantees we're within ascii range. + #[allow(clippy::cast_possible_truncation)] // We want to truncate it + unsafe { + (self as u8).to_ascii_char_unchecked() + } } } @@ -852,43 +898,48 @@ impl ToAsciiChar for u16 { } #[inline] unsafe fn to_ascii_char_unchecked(self) -> AsciiChar { - (self as u8).to_ascii_char_unchecked() + // Note: This cast discards the top bytes, this may cause problems, see + // the TODO on this method's documentation in the trait. + // SAFETY: Caller guarantees we're within ascii range. + #[allow(clippy::cast_possible_truncation)] // We want to truncate it + unsafe { + (self as u8).to_ascii_char_unchecked() + } } } #[cfg(test)] mod tests { use super::{AsciiChar, ToAsciiChar, ToAsciiCharError}; - use AsciiChar::*; #[test] fn to_ascii_char() { fn generic(ch: C) -> Result { ch.to_ascii_char() } - assert_eq!(generic(A), Ok(A)); - assert_eq!(generic(b'A'), Ok(A)); - assert_eq!(generic('A'), Ok(A)); - assert!(generic(200u16).is_err()); + assert_eq!(generic(AsciiChar::A), Ok(AsciiChar::A)); + assert_eq!(generic(b'A'), Ok(AsciiChar::A)); + assert_eq!(generic('A'), Ok(AsciiChar::A)); + assert!(generic(200_u16).is_err()); assert!(generic('λ').is_err()); } #[test] fn as_byte_and_char() { - assert_eq!(A.as_byte(), b'A'); - assert_eq!(A.as_char(), 'A'); + assert_eq!(AsciiChar::A.as_byte(), b'A'); + assert_eq!(AsciiChar::A.as_char(), 'A'); } #[test] fn new_array_is_correct() { - for byte in 0..128u8 { + for byte in 0..128_u8 { assert_eq!(AsciiChar::new(byte as char).as_byte(), byte); } } #[test] fn is_all() { - for byte in 0..128u8 { + for byte in 0..128_u8 { let ch = byte as char; let ascii = AsciiChar::new(ch); assert_eq!(ascii.is_alphabetic(), ch.is_alphabetic()); @@ -938,49 +989,49 @@ mod tests { #[test] #[should_panic] fn is_digit_bad_radix() { - AsciiChar::_7.is_digit(37); + let _ = AsciiChar::_7.is_digit(37); } #[test] fn cmp_wider() { - assert_eq!(A, 'A'); - assert_eq!(b'b', b); - assert!(a < 'z'); + assert_eq!(AsciiChar::A, 'A'); + assert_eq!(b'b', AsciiChar::b); + assert!(AsciiChar::a < 'z'); } #[test] fn ascii_case() { - assert_eq!(At.to_ascii_lowercase(), At); - assert_eq!(At.to_ascii_uppercase(), At); - assert_eq!(A.to_ascii_lowercase(), a); - assert_eq!(A.to_ascii_uppercase(), A); - assert_eq!(a.to_ascii_lowercase(), a); - assert_eq!(a.to_ascii_uppercase(), A); - - let mut mutable = (A, a); + assert_eq!(AsciiChar::At.to_ascii_lowercase(), AsciiChar::At); + assert_eq!(AsciiChar::At.to_ascii_uppercase(), AsciiChar::At); + assert_eq!(AsciiChar::A.to_ascii_lowercase(), AsciiChar::a); + assert_eq!(AsciiChar::A.to_ascii_uppercase(), AsciiChar::A); + assert_eq!(AsciiChar::a.to_ascii_lowercase(), AsciiChar::a); + assert_eq!(AsciiChar::a.to_ascii_uppercase(), AsciiChar::A); + + let mut mutable = (AsciiChar::A, AsciiChar::a); mutable.0.make_ascii_lowercase(); mutable.1.make_ascii_uppercase(); - assert_eq!(mutable.0, a); - assert_eq!(mutable.1, A); + assert_eq!(mutable.0, AsciiChar::a); + assert_eq!(mutable.1, AsciiChar::A); - assert!(LineFeed.eq_ignore_ascii_case(&LineFeed)); - assert!(!LineFeed.eq_ignore_ascii_case(&CarriageReturn)); - assert!(z.eq_ignore_ascii_case(&Z)); - assert!(Z.eq_ignore_ascii_case(&z)); - assert!(A.eq_ignore_ascii_case(&a)); - assert!(!K.eq_ignore_ascii_case(&C)); - assert!(!Z.eq_ignore_ascii_case(&DEL)); - assert!(!BracketOpen.eq_ignore_ascii_case(&CurlyBraceOpen)); - assert!(!Grave.eq_ignore_ascii_case(&At)); - assert!(!Grave.eq_ignore_ascii_case(&DEL)); + assert!(AsciiChar::LineFeed.eq_ignore_ascii_case(&AsciiChar::LineFeed)); + assert!(!AsciiChar::LineFeed.eq_ignore_ascii_case(&AsciiChar::CarriageReturn)); + assert!(AsciiChar::z.eq_ignore_ascii_case(&AsciiChar::Z)); + assert!(AsciiChar::Z.eq_ignore_ascii_case(&AsciiChar::z)); + assert!(AsciiChar::A.eq_ignore_ascii_case(&AsciiChar::a)); + assert!(!AsciiChar::K.eq_ignore_ascii_case(&AsciiChar::C)); + assert!(!AsciiChar::Z.eq_ignore_ascii_case(&AsciiChar::DEL)); + assert!(!AsciiChar::BracketOpen.eq_ignore_ascii_case(&AsciiChar::CurlyBraceOpen)); + assert!(!AsciiChar::Grave.eq_ignore_ascii_case(&AsciiChar::At)); + assert!(!AsciiChar::Grave.eq_ignore_ascii_case(&AsciiChar::DEL)); } #[test] #[cfg(feature = "std")] fn fmt_ascii() { - assert_eq!(format!("{}", t), "t"); - assert_eq!(format!("{:?}", t), "'t'"); - assert_eq!(format!("{}", LineFeed), "\n"); - assert_eq!(format!("{:?}", LineFeed), "'\\n'"); + assert_eq!(format!("{}", AsciiChar::t), "t"); + assert_eq!(format!("{:?}", AsciiChar::t), "'t'"); + assert_eq!(format!("{}", AsciiChar::LineFeed), "\n"); + assert_eq!(format!("{:?}", AsciiChar::LineFeed), "'\\n'"); } } diff --git a/src/ascii_str.rs b/src/ascii_str.rs index 208e9a3..36f9f31 100644 --- a/src/ascii_str.rs +++ b/src/ascii_str.rs @@ -11,7 +11,7 @@ use ascii_char::AsciiChar; #[cfg(feature = "std")] use ascii_string::AsciiString; -/// AsciiStr represents a byte or string slice that only contains ASCII characters. +/// [`AsciiStr`] represents a byte or string slice that only contains ASCII characters. /// /// It wraps an `[AsciiChar]` and implements many of `str`s methods and traits. /// @@ -26,24 +26,28 @@ pub struct AsciiStr { impl AsciiStr { /// Converts `&self` to a `&str` slice. #[inline] + #[must_use] pub fn as_str(&self) -> &str { unsafe { &*(self as *const AsciiStr as *const str) } } /// Converts `&self` into a byte slice. #[inline] + #[must_use] pub fn as_bytes(&self) -> &[u8] { unsafe { &*(self as *const AsciiStr as *const [u8]) } } /// Returns the entire string as slice of `AsciiChar`s. #[inline] + #[must_use] pub const fn as_slice(&self) -> &[AsciiChar] { &self.slice } /// Returns the entire string as mutable slice of `AsciiChar`s. #[inline] + #[must_use] pub fn as_mut_slice(&mut self) -> &mut [AsciiChar] { &mut self.slice } @@ -54,6 +58,7 @@ impl AsciiStr { /// will end up pointing to garbage. Modifying the `AsciiStr` may cause it's buffer to be /// reallocated, which would also make any pointers to it invalid. #[inline] + #[must_use] pub const fn as_ptr(&self) -> *const AsciiChar { self.as_slice().as_ptr() } @@ -64,18 +69,23 @@ impl AsciiStr { /// will end up pointing to garbage. Modifying the `AsciiStr` may cause it's buffer to be /// reallocated, which would also make any pointers to it invalid. #[inline] + #[must_use] pub fn as_mut_ptr(&mut self) -> *mut AsciiChar { self.as_mut_slice().as_mut_ptr() } /// Copies the content of this `AsciiStr` into an owned `AsciiString`. #[cfg(feature = "std")] + #[must_use] pub fn to_ascii_string(&self) -> AsciiString { AsciiString::from(self.slice.to_vec()) } /// Converts anything that can represent a byte slice into an `AsciiStr`. /// + /// # Errors + /// If `bytes` contains a non-ascii byte, `Err` will be returned + /// /// # Examples /// ``` /// # use ascii::AsciiStr; @@ -102,6 +112,7 @@ impl AsciiStr { /// assert_eq!(foo.as_str(), "foo"); /// ``` #[inline] + #[must_use] pub unsafe fn from_ascii_unchecked(bytes: &[u8]) -> &AsciiStr { bytes.as_ascii_str_unchecked() } @@ -115,6 +126,7 @@ impl AsciiStr { /// assert_eq!(s.len(), 3); /// ``` #[inline] + #[must_use] pub fn len(&self) -> usize { self.slice.len() } @@ -130,12 +142,14 @@ impl AsciiStr { /// assert!(!full.is_empty()); /// ``` #[inline] + #[must_use] pub fn is_empty(&self) -> bool { self.len() == 0 } /// Returns an iterator over the characters of the `AsciiStr`. #[inline] + #[must_use] pub fn chars(&self) -> Chars { Chars(self.slice.iter()) } @@ -143,6 +157,7 @@ impl AsciiStr { /// Returns an iterator over the characters of the `AsciiStr` which allows you to modify the /// value of each `AsciiChar`. #[inline] + #[must_use] pub fn chars_mut(&mut self) -> CharsMut { CharsMut(self.slice.iter_mut()) } @@ -158,6 +173,7 @@ impl AsciiStr { /// .collect::>(); /// assert_eq!(words, ["apple", "banana", "lemon"]); /// ``` + #[must_use] pub fn split(&self, on: AsciiChar) -> impl DoubleEndedIterator { Split { on, @@ -172,6 +188,7 @@ impl AsciiStr { /// /// The final line ending is optional. #[inline] + #[must_use] pub fn lines(&self) -> impl DoubleEndedIterator { Lines { string: self } } @@ -184,6 +201,7 @@ impl AsciiStr { /// let example = AsciiStr::from_ascii(" \twhite \tspace \t").unwrap(); /// assert_eq!("white \tspace", example.trim()); /// ``` + #[must_use] pub fn trim(&self) -> &Self { self.trim_start().trim_end() } @@ -196,8 +214,12 @@ impl AsciiStr { /// let example = AsciiStr::from_ascii(" \twhite \tspace \t").unwrap(); /// assert_eq!("white \tspace \t", example.trim_start()); /// ``` + #[must_use] pub fn trim_start(&self) -> &Self { - &self[self.chars().take_while(|ch| ch.is_whitespace()).count()..] + &self[self + .chars() + .take_while(|ascii| ascii.is_whitespace()) + .count()..] } /// Returns an ASCII string slice with trailing whitespace removed. @@ -208,6 +230,7 @@ impl AsciiStr { /// let example = AsciiStr::from_ascii(" \twhite \tspace \t").unwrap(); /// assert_eq!(" \twhite \tspace", example.trim_end()); /// ``` + #[must_use] pub fn trim_end(&self) -> &Self { let trimmed = self .chars() @@ -218,6 +241,7 @@ impl AsciiStr { } /// Compares two strings case-insensitively. + #[must_use] pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool { self.len() == other.len() && self @@ -242,6 +266,7 @@ impl AsciiStr { /// Returns a copy of this string where letters 'a' to 'z' are mapped to 'A' to 'Z'. #[cfg(feature = "std")] + #[must_use] pub fn to_ascii_uppercase(&self) -> AsciiString { let mut ascii_string = self.to_ascii_string(); ascii_string.make_ascii_uppercase(); @@ -250,6 +275,7 @@ impl AsciiStr { /// Returns a copy of this string where letters 'A' to 'Z' are mapped to 'a' to 'z'. #[cfg(feature = "std")] + #[must_use] pub fn to_ascii_lowercase(&self) -> AsciiString { let mut ascii_string = self.to_ascii_string(); ascii_string.make_ascii_lowercase(); @@ -258,12 +284,14 @@ impl AsciiStr { /// Returns the first character if the string is not empty. #[inline] + #[must_use] pub fn first(&self) -> Option { self.slice.first().cloned() } /// Returns the last character if the string is not empty. #[inline] + #[must_use] pub fn last(&self) -> Option { self.slice.last().cloned() } @@ -510,6 +538,7 @@ impl<'a> IntoIterator for &'a mut AsciiStr { pub struct Chars<'a>(Iter<'a, AsciiChar>); impl<'a> Chars<'a> { /// Returns the ascii string slice with the remaining characters. + #[must_use] pub fn as_str(&self) -> &'a AsciiStr { self.0.as_slice().into() } @@ -541,6 +570,7 @@ impl<'a> ExactSizeIterator for Chars<'a> { pub struct CharsMut<'a>(IterMut<'a, AsciiChar>); impl<'a> CharsMut<'a> { /// Returns the ascii string slice with the remaining characters. + #[must_use] pub fn into_str(self) -> &'a mut AsciiStr { self.0.into_slice().into() } @@ -572,6 +602,7 @@ impl<'a> ExactSizeIterator for CharsMut<'a> { pub struct CharsRef<'a>(Iter<'a, AsciiChar>); impl<'a> CharsRef<'a> { /// Returns the ascii string slice with the remaining characters. + #[must_use] pub fn as_str(&self) -> &'a AsciiStr { self.0.as_slice().into() } @@ -605,6 +636,7 @@ struct Split<'a> { impl<'a> Iterator for Split<'a> { type Item = &'a AsciiStr; + #[allow(clippy::option_if_let_else)] // There are side effects with `else` so we don't use iterator adaptors fn next(&mut self) -> Option<&'a AsciiStr> { if !self.ended { let start: &AsciiStr = self.chars.as_str(); @@ -621,6 +653,7 @@ impl<'a> Iterator for Split<'a> { } } impl<'a> DoubleEndedIterator for Split<'a> { + #[allow(clippy::option_if_let_else)] // There are side effects with `else` so we don't use iterator adaptors fn next_back(&mut self) -> Option<&'a AsciiStr> { if !self.ended { let start: &AsciiStr = self.chars.as_str(); @@ -702,12 +735,14 @@ impl AsAsciiStrError { /// /// It is the maximum index such that `from_ascii(input[..index])` would return `Ok(_)`. #[inline] + #[must_use] pub const fn valid_up_to(self) -> usize { self.0 } #[cfg(not(feature = "std"))] /// Returns a description for this error, like `std::error::Error::description`. #[inline] + #[must_use] pub const fn description(&self) -> &'static str { ERRORMSG_STR } @@ -725,7 +760,7 @@ impl Error for AsAsciiStrError { } } -/// Convert slices of bytes or AsciiChar to `AsciiStr`. +/// Convert slices of bytes or [`AsciiChar`] to [`AsciiStr`]. // Could nearly replace this trait with SliceIndex, but its methods isn't even // on a path for stabilization. pub trait AsAsciiStr { @@ -734,6 +769,7 @@ pub trait AsAsciiStr { type Inner; /// Convert a subslice to an ASCII slice. /// + /// # Errors /// Returns `Err` if the range is out of bounds or if not all bytes in the /// slice are ASCII. The value in the error will be the index of the first /// non-ASCII byte or the end of the slice. @@ -752,6 +788,9 @@ pub trait AsAsciiStr { R: SliceIndex<[Self::Inner], Output = [Self::Inner]>; /// Convert to an ASCII slice. /// + /// # Errors + /// Returns `Err` if not all bytes are valid ascii values. + /// /// # Example /// ``` /// use ascii::{AsAsciiStr, AsciiChar}; @@ -778,7 +817,7 @@ pub trait AsAsciiStr { fn get_ascii(&self, index: usize) -> Option { self.slice_ascii(index..=index) .ok() - .and_then(|str| str.first()) + .and_then(AsciiStr::first) } /// Convert to an ASCII slice without checking for non-ASCII characters. /// @@ -787,13 +826,20 @@ pub trait AsAsciiStr { unsafe fn as_ascii_str_unchecked(&self) -> &AsciiStr; } -/// Convert mutable slices of bytes or AsciiChar to `AsciiStr`. +/// Convert mutable slices of bytes or [`AsciiChar`] to [`AsciiStr`]. pub trait AsMutAsciiStr: AsAsciiStr { /// Convert a subslice to an ASCII slice. + /// + /// # Errors + /// This function returns `Err` if range is out of bounds, or if + /// `self` contains non-ascii values fn slice_ascii_mut(&mut self, range: R) -> Result<&mut AsciiStr, AsAsciiStrError> where R: SliceIndex<[Self::Inner], Output = [Self::Inner]>; /// Convert to a mutable ASCII slice. + /// + /// # Errors + /// This function returns `Err` if `self` contains non-ascii values fn as_mut_ascii_str(&mut self) -> Result<&mut AsciiStr, AsAsciiStrError> { self.slice_ascii_mut(..) } @@ -928,6 +974,8 @@ impl AsMutAsciiStr for [AsciiChar] { impl AsAsciiStr for [u8] { type Inner = u8; + + #[allow(clippy::option_if_let_else)] // `if` needs to use past results, so it's more expressive like this fn slice_ascii(&self, range: R) -> Result<&AsciiStr, AsAsciiStrError> where R: SliceIndex<[u8], Output = [u8]>, @@ -958,6 +1006,7 @@ impl AsAsciiStr for [u8] { } } impl AsMutAsciiStr for [u8] { + #[allow(clippy::option_if_let_else)] // `if` needs to use past results, so it's more expressive like this fn slice_ascii_mut(&mut self, range: R) -> Result<&mut AsciiStr, AsAsciiStrError> where R: SliceIndex<[u8], Output = [u8]>, @@ -1079,7 +1128,8 @@ mod tests { assert_eq!(generic(ascii_str), Ok(ascii_str)); assert_eq!(generic(&"A"), Ok(ascii_str)); assert_eq!(generic(&ascii_str), Ok(ascii_str)); - assert_eq!(generic(&mut "A"), Ok(ascii_str)); + // Note: Not sure what this is supposed to be testing, as `generic` can only accept shared references + //assert_eq!(generic(&mut "A"), Ok(ascii_str)); } #[cfg(feature = "std")] @@ -1147,11 +1197,11 @@ mod tests { assert_eq!(cstr.get_ascii(5), None); assert_eq!(cstr.get_ascii(6), Some(AsciiChar::g)); assert_eq!(cstr.get_ascii(7), None); - let aslice = &[AsciiChar::X, AsciiChar::Y, AsciiChar::Z, AsciiChar::Null][..]; - let astr: &AsciiStr = aslice.as_ref(); - let cstr = CStr::from_bytes_with_nul(astr.as_bytes()).unwrap(); - assert_eq!(cstr.slice_ascii(..2), Ok(&astr[..2])); - assert_eq!(cstr.as_ascii_str(), Ok(&astr[..3])); + let ascii_slice = &[AsciiChar::X, AsciiChar::Y, AsciiChar::Z, AsciiChar::Null][..]; + let ascii_str: &AsciiStr = ascii_slice.as_ref(); + let cstr = CStr::from_bytes_with_nul(ascii_str.as_bytes()).unwrap(); + assert_eq!(cstr.slice_ascii(..2), Ok(&ascii_str[..2])); + assert_eq!(cstr.as_ascii_str(), Ok(&ascii_str[..3])); } #[test] @@ -1333,7 +1383,7 @@ mod tests { .as_ascii_str() .unwrap() .split(AsciiChar::from_ascii(needle).unwrap()) - .map(|a| a.as_str()); + .map(AsciiStr::as_str); loop { assert_eq!(asciis.size_hint(), strs.size_hint()); let (a, s) = (asciis.next(), strs.next()); diff --git a/src/ascii_string.rs b/src/ascii_string.rs index 5f0b644..a62abb6 100644 --- a/src/ascii_string.rs +++ b/src/ascii_string.rs @@ -40,6 +40,7 @@ impl AsciiString { /// let mut s = AsciiString::with_capacity(10); /// ``` #[inline] + #[must_use] pub fn with_capacity(capacity: usize) -> Self { AsciiString { vec: Vec::with_capacity(capacity), @@ -81,6 +82,7 @@ impl AsciiString { /// } /// ``` #[inline] + #[must_use] pub unsafe fn from_raw_parts(buf: *mut AsciiChar, length: usize, capacity: usize) -> Self { AsciiString { vec: Vec::from_raw_parts(buf, length, capacity), @@ -95,6 +97,7 @@ impl AsciiString { /// future of the `AsciiString`, as the rest of this library assumes that `AsciiString`s are /// ASCII encoded. #[inline] + #[must_use] pub unsafe fn from_ascii_unchecked(bytes: B) -> Self where B: Into>, @@ -111,7 +114,7 @@ impl AsciiString { /// Converts anything that can represent a byte buffer into an `AsciiString`. /// - /// # Failure + /// # Errors /// Returns the byte buffer if not all of the bytes are ASCII characters. /// /// # Examples @@ -161,6 +164,7 @@ impl AsciiString { /// assert!(s.capacity() >= 10); /// ``` #[inline] + #[must_use] pub fn capacity(&self) -> usize { self.vec.capacity() } @@ -201,6 +205,7 @@ impl AsciiString { /// assert!(s.capacity() >= 10); /// ``` #[inline] + pub fn reserve_exact(&mut self, additional: usize) { self.vec.reserve_exact(additional) } @@ -218,6 +223,7 @@ impl AsciiString { /// assert_eq!(s.capacity(), 3); /// ``` #[inline] + pub fn shrink_to_fit(&mut self) { self.vec.shrink_to_fit() } @@ -234,6 +240,7 @@ impl AsciiString { /// assert_eq!(s, "abc123"); /// ``` #[inline] + pub fn push(&mut self, ch: AsciiChar) { self.vec.push(ch) } @@ -251,6 +258,7 @@ impl AsciiString { /// assert_eq!(s, "he"); /// ``` #[inline] + pub fn truncate(&mut self, new_len: usize) { self.vec.truncate(new_len) } @@ -268,6 +276,7 @@ impl AsciiString { /// assert_eq!(s.pop(), None); /// ``` #[inline] + #[must_use] pub fn pop(&mut self) -> Option { self.vec.pop() } @@ -289,6 +298,7 @@ impl AsciiString { /// assert_eq!(s.remove(0).as_char(), 'o'); /// ``` #[inline] + #[must_use] pub fn remove(&mut self, idx: usize) -> AsciiChar { self.vec.remove(idx) } @@ -309,6 +319,7 @@ impl AsciiString { /// assert_eq!(s, "fobo"); /// ``` #[inline] + pub fn insert(&mut self, idx: usize, ch: AsciiChar) { self.vec.insert(idx, ch) } @@ -322,6 +333,7 @@ impl AsciiString { /// assert_eq!(s.len(), 3); /// ``` #[inline] + #[must_use] pub fn len(&self) -> usize { self.vec.len() } @@ -337,6 +349,7 @@ impl AsciiString { /// assert!(!s.is_empty()); /// ``` #[inline] + #[must_use] pub fn is_empty(&self) -> bool { self.len() == 0 } @@ -351,6 +364,7 @@ impl AsciiString { /// assert!(s.is_empty()); /// ``` #[inline] + pub fn clear(&mut self) { self.vec.clear() } @@ -649,11 +663,13 @@ pub struct FromAsciiError { impl FromAsciiError { /// Get the position of the first non-ASCII byte or character. #[inline] + #[must_use] pub fn ascii_error(&self) -> AsAsciiStrError { self.error } /// Get back the original, unmodified type. #[inline] + #[must_use] pub fn into_source(self) -> O { self.owner } @@ -672,6 +688,7 @@ impl fmt::Display for FromAsciiError { } impl Error for FromAsciiError { #[inline] + #[allow(deprecated)] // TODO: Remove deprecation once the earliest version we support deprecates this method. fn description(&self) -> &str { self.error.description() } @@ -686,6 +703,9 @@ pub trait IntoAsciiString: Sized { /// Convert to `AsciiString` without checking for non-ASCII characters. unsafe fn into_ascii_string_unchecked(self) -> AsciiString; /// Convert to `AsciiString`. + /// + /// # Errors + /// If `self` contains non-ascii characters, this will return `Err` fn into_ascii_string(self) -> Result>; } @@ -778,8 +798,8 @@ impl IntoAsciiString for CString { } }) .map(|mut s| { - let _nul = s.pop(); - debug_assert_eq!(_nul, Some(AsciiChar::Null)); + let nul = s.pop(); + debug_assert_eq!(nul, Some(AsciiChar::Null)); s }) } @@ -799,8 +819,8 @@ impl<'a> IntoAsciiString for &'a CStr { error, }) .map(|mut s| { - let _nul = s.pop(); - debug_assert_eq!(_nul, Some(AsciiChar::Null)); + let nul = s.pop(); + debug_assert_eq!(nul, Some(AsciiChar::Null)); s }) } @@ -879,7 +899,7 @@ mod tests { assert_eq!(ascii_str_unchecked.len(), 3); assert_eq!(ascii_str_unchecked.as_slice(), expected_chars); - let sparkle_heart_bytes = vec![240u8, 159, 146, 150]; + let sparkle_heart_bytes = vec![240_u8, 159, 146, 150]; let cstring = CString::new(sparkle_heart_bytes).unwrap(); let cstr = &*cstring; let ascii_err = cstr.into_ascii_string().unwrap_err(); diff --git a/src/lib.rs b/src/lib.rs index 4c77509..81eefdb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -29,7 +29,21 @@ //! API changed significantly since then. #![cfg_attr(not(feature = "std"), no_std)] -#![allow(clippy::trivially_copy_pass_by_ref)] // for compatibility with methods on char and u8 +// Clippy lints +#![warn(clippy::pedantic)] +// Naming conventions sometimes go against this lint +#![allow(clippy::module_name_repetitions)] +// We need to get literal non-asciis for tests +#![allow(clippy::non_ascii_literal)] +// Sometimes it looks better to invert the order, such as when the `else` block is small +#![allow(clippy::if_not_else)] +// Shadowing is common and doesn't affect understanding +// TODO: Consider removing `shadow_unrelated`, as it can show some actual logic errors +#![allow(clippy::shadow_unrelated, clippy::shadow_reuse, clippy::shadow_same)] +// for compatibility with methods on char and u8 +#![allow(clippy::trivially_copy_pass_by_ref)] +// In preparation for feature `unsafe_block_in_unsafe_fn` (https://github.com/rust-lang/rust/issues/71668) +#![allow(unused_unsafe)] #[cfg(feature = "std")] extern crate core; diff --git a/tests.rs b/tests.rs index b9f3c9c..3454830 100644 --- a/tests.rs +++ b/tests.rs @@ -130,7 +130,7 @@ fn extend_from_iterator() { .unwrap() .split(AsciiChar::Space) .map(|case| { - if case.chars().all(|ch| ch.is_uppercase()) { + if case.chars().all(AsciiChar::is_uppercase) { Cow::from(case) } else { Cow::from(case.to_ascii_uppercase()) From 5fffc7e51a753147234819b3c9f8b839e06ca2cf Mon Sep 17 00:00:00 2001 From: Filipe Rodrigues Date: Sat, 19 Sep 2020 21:21:37 +0100 Subject: [PATCH 2/9] Added some clippy restriction lints. Changed most indexing to `get_unchecked` because of lint `clippy::indexing_slicing`. This should also improve performance, as the compiler doesn't need to emmit panic code. `clippy::option_if_let_else` is now allowed throughout the whole crate, as it doesn't add much. --- src/ascii_char.rs | 7 ++- src/ascii_str.rs | 103 ++++++++++++++++++++++++++++++-------------- src/ascii_string.rs | 2 + src/lib.rs | 11 ++++- 4 files changed, 89 insertions(+), 34 deletions(-) diff --git a/src/ascii_char.rs b/src/ascii_char.rs index 2444b81..49c1b21 100644 --- a/src/ascii_char.rs +++ b/src/ascii_char.rs @@ -353,7 +353,10 @@ impl AsciiChar { h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z, CurlyBraceOpen, VerticalBar, CurlyBraceClose, Tilde, DEL, - ]; + ]; + + // We want to slice here and detect `const_err` from rustc if the slice is invalid + #[allow(clippy::indexing_slicing)] ALL[ch as usize] } @@ -688,6 +691,7 @@ impl AsciiChar { /// ``` #[inline] #[must_use] + #[allow(clippy::indexing_slicing)] // We're sure it'll either access one or the other, as `bool` is either `0` or `1` pub const fn to_ascii_uppercase(&self) -> Self { [*self, AsciiChar::new((*self as u8 & 0b101_1111) as char)][self.is_lowercase() as usize] } @@ -705,6 +709,7 @@ impl AsciiChar { /// ``` #[inline] #[must_use] + #[allow(clippy::indexing_slicing)] // We're sure it'll either access one or the other, as `bool` is either `0` or `1` pub const fn to_ascii_lowercase(&self) -> Self { [*self, AsciiChar::new(self.to_not_upper() as char)][self.is_uppercase() as usize] } diff --git a/src/ascii_str.rs b/src/ascii_str.rs index 36f9f31..e047101 100644 --- a/src/ascii_str.rs +++ b/src/ascii_str.rs @@ -216,10 +216,13 @@ impl AsciiStr { /// ``` #[must_use] pub fn trim_start(&self) -> &Self { - &self[self + let whitespace_len = self .chars() - .take_while(|ascii| ascii.is_whitespace()) - .count()..] + .position(|ascii| !ascii.is_whitespace()) + .unwrap_or_else(|| self.len()); + + // SAFETY: `whitespace_len` is `0..=len`, which is at most `len`, which is a valid empty slice. + unsafe { self.as_slice().get_unchecked(whitespace_len..).into() } } /// Returns an ASCII string slice with trailing whitespace removed. @@ -232,12 +235,19 @@ impl AsciiStr { /// ``` #[must_use] pub fn trim_end(&self) -> &Self { - let trimmed = self + // Number of whitespace characters counting from the end + let whitespace_len = self .chars() .rev() - .take_while(|ch| ch.is_whitespace()) - .count(); - &self[..self.len() - trimmed] + .position(|ascii| !ascii.is_whitespace()) + .unwrap_or_else(|| self.len()); + + // SAFETY: `whitespace_len` is `0..=len`, which is at most `len`, which is a valid empty slice, and at least `0`, which is the whole slice. + unsafe { + self.as_slice() + .get_unchecked(..self.len() - whitespace_len) + .into() + } } /// Compares two strings case-insensitively. @@ -470,6 +480,7 @@ impl fmt::Debug for AsciiStr { macro_rules! impl_index { ($idx:ty) => { + #[allow(clippy::indexing_slicing)] // In `Index`, if it's out of bounds, panic is the default impl Index<$idx> for AsciiStr { type Output = AsciiStr; @@ -479,6 +490,7 @@ macro_rules! impl_index { } } + #[allow(clippy::indexing_slicing)] // In `IndexMut`, if it's out of bounds, panic is the default impl IndexMut<$idx> for AsciiStr { #[inline] fn index_mut(&mut self, index: $idx) -> &mut AsciiStr { @@ -495,6 +507,7 @@ impl_index! { RangeFull } impl_index! { RangeInclusive } impl_index! { RangeToInclusive } +#[allow(clippy::indexing_slicing)] // In `Index`, if it's out of bounds, panic is the default impl Index for AsciiStr { type Output = AsciiChar; @@ -504,6 +517,7 @@ impl Index for AsciiStr { } } +#[allow(clippy::indexing_slicing)] // In `IndexMut`, if it's out of bounds, panic is the default impl IndexMut for AsciiStr { #[inline] fn index_mut(&mut self, index: usize) -> &mut AsciiChar { @@ -636,13 +650,14 @@ struct Split<'a> { impl<'a> Iterator for Split<'a> { type Item = &'a AsciiStr; - #[allow(clippy::option_if_let_else)] // There are side effects with `else` so we don't use iterator adaptors fn next(&mut self) -> Option<&'a AsciiStr> { if !self.ended { let start: &AsciiStr = self.chars.as_str(); let split_on = self.on; - if let Some(at) = self.chars.position(|ch| ch == split_on) { - Some(&start[..at]) + + if let Some(at) = self.chars.position(|ascii| ascii == split_on) { + // SAFETY: `at` is guaranteed to be in bounds, as `position` returns `Ok(0..len)`. + Some(unsafe { start.as_slice().get_unchecked(..at).into() }) } else { self.ended = true; Some(start) @@ -653,13 +668,14 @@ impl<'a> Iterator for Split<'a> { } } impl<'a> DoubleEndedIterator for Split<'a> { - #[allow(clippy::option_if_let_else)] // There are side effects with `else` so we don't use iterator adaptors fn next_back(&mut self) -> Option<&'a AsciiStr> { if !self.ended { let start: &AsciiStr = self.chars.as_str(); let split_on = self.on; - if let Some(at) = self.chars.rposition(|ch| ch == split_on) { - Some(&start[at + 1..]) + + if let Some(at) = self.chars.rposition(|ascii| ascii == split_on) { + // SAFETY: `at` is guaranteed to be in bounds, as `rposition` returns `Ok(0..len)`, and slices `1..`, `2..`, etc... until `len..` inclusive, are valid. + Some(unsafe { start.as_slice().get_unchecked(at + 1..).into() }) } else { self.ended = true; Some(start) @@ -684,40 +700,65 @@ impl<'a> Iterator for Lines<'a> { .chars() .position(|chr| chr == AsciiChar::LineFeed) { - let line = if idx > 0 && self.string[idx - 1] == AsciiChar::CarriageReturn { - &self.string[..idx - 1] + // SAFETY: `idx` is guaranteed to be `1..len`, as we get it from `position` as `0..len` and make sure it's not `0`. + let line = if idx > 0 + && *unsafe { self.string.as_slice().get_unchecked(idx - 1) } + == AsciiChar::CarriageReturn + { + // SAFETY: As per above, `idx` is guaranteed to be `1..len` + unsafe { self.string.as_slice().get_unchecked(..idx - 1).into() } } else { - &self.string[..idx] + // SAFETY: As per above, `idx` is guaranteed to be `0..len` + unsafe { self.string.as_slice().get_unchecked(..idx).into() } }; - self.string = &self.string[idx + 1..]; + // SAFETY: As per above, `idx` is guaranteed to be `0..len`, so at the extreme, slicing `len..` is a valid empty slice. + self.string = unsafe { self.string.as_slice().get_unchecked(idx + 1..).into() }; Some(line) } else if self.string.is_empty() { None } else { let line = self.string; - self.string = &self.string[..0]; + // SAFETY: Slicing `..0` is always valid and yields an empty slice + self.string = unsafe { self.string.as_slice().get_unchecked(..0).into() }; Some(line) } } } + impl<'a> DoubleEndedIterator for Lines<'a> { fn next_back(&mut self) -> Option<&'a AsciiStr> { if self.string.is_empty() { return None; } - let mut i = self.string.len(); - if self.string[i - 1] == AsciiChar::LineFeed { - i -= 1; - if i > 0 && self.string[i - 1] == AsciiChar::CarriageReturn { - i -= 1; - } - } - self.string = &self.string[..i]; - while i > 0 && self.string[i - 1] != AsciiChar::LineFeed { - i -= 1; + + // If we end with `LF` / `CR/LF`, remove them + if let [slice @ .., AsciiChar::CarriageReturn, AsciiChar::LineFeed] + | [slice @ .., AsciiChar::LineFeed] = self.string.as_slice() + { + self.string = slice.into(); } - let line = &self.string[i..]; - self.string = &self.string[..i]; + + // SAFETY: This will never be `0`, as we remove any `LF` from the end, it is `1..len` + let lf_rev_pos = self + .string + .chars() + .rev() + .position(|ascii| ascii == AsciiChar::LineFeed) + .unwrap_or_else(|| self.string.len()); + + // SAFETY: As per above, `self.len() - lf_rev_pos` will be in range `0..len - 1`, so both indexes are correct. + let line = unsafe { + self.string + .as_slice() + .get_unchecked(self.string.len() - lf_rev_pos..) + .into() + }; + self.string = unsafe { + self.string + .as_slice() + .get_unchecked(..self.string.len() - lf_rev_pos) + .into() + }; Some(line) } } @@ -975,7 +1016,6 @@ impl AsMutAsciiStr for [AsciiChar] { impl AsAsciiStr for [u8] { type Inner = u8; - #[allow(clippy::option_if_let_else)] // `if` needs to use past results, so it's more expressive like this fn slice_ascii(&self, range: R) -> Result<&AsciiStr, AsAsciiStrError> where R: SliceIndex<[u8], Output = [u8]>, @@ -1006,7 +1046,6 @@ impl AsAsciiStr for [u8] { } } impl AsMutAsciiStr for [u8] { - #[allow(clippy::option_if_let_else)] // `if` needs to use past results, so it's more expressive like this fn slice_ascii_mut(&mut self, range: R) -> Result<&mut AsciiStr, AsAsciiStrError> where R: SliceIndex<[u8], Output = [u8]>, diff --git a/src/ascii_string.rs b/src/ascii_string.rs index a62abb6..fb08a21 100644 --- a/src/ascii_string.rs +++ b/src/ascii_string.rs @@ -619,6 +619,7 @@ impl<'a> AddAssign<&'a AsciiStr> for AsciiString { } } +#[allow(clippy::indexing_slicing)] // In `Index`, if it's out of bounds, panic is the default impl Index for AsciiString where AsciiStr: Index, @@ -631,6 +632,7 @@ where } } +#[allow(clippy::indexing_slicing)] // In `IndexMut`, if it's out of bounds, panic is the default impl IndexMut for AsciiString where AsciiStr: IndexMut, diff --git a/src/lib.rs b/src/lib.rs index 81eefdb..6161c2a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -30,7 +30,12 @@ #![cfg_attr(not(feature = "std"), no_std)] // Clippy lints -#![warn(clippy::pedantic)] +#![warn( + clippy::pedantic, + clippy::decimal_literal_representation, + clippy::get_unwrap, + clippy::indexing_slicing +)] // Naming conventions sometimes go against this lint #![allow(clippy::module_name_repetitions)] // We need to get literal non-asciis for tests @@ -40,6 +45,10 @@ // Shadowing is common and doesn't affect understanding // TODO: Consider removing `shadow_unrelated`, as it can show some actual logic errors #![allow(clippy::shadow_unrelated, clippy::shadow_reuse, clippy::shadow_same)] +// A `if let` / `else` sometimes looks better than using iterator adaptors +#![allow(clippy::option_if_let_else)] +// In tests, we're fine with indexing, since a panic is a failure. +#![cfg_attr(test, allow(clippy::indexing_slicing))] // for compatibility with methods on char and u8 #![allow(clippy::trivially_copy_pass_by_ref)] // In preparation for feature `unsafe_block_in_unsafe_fn` (https://github.com/rust-lang/rust/issues/71668) From 26b0c90db7ab9ba7d12a84f836fcd39a69d79650 Mon Sep 17 00:00:00 2001 From: Filipe Rodrigues Date: Sat, 19 Sep 2020 21:26:24 +0100 Subject: [PATCH 3/9] Changed closure arguments from `|ascii| ...` to `|ch| ...` to match with master --- src/ascii_str.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/ascii_str.rs b/src/ascii_str.rs index e047101..e2d1534 100644 --- a/src/ascii_str.rs +++ b/src/ascii_str.rs @@ -218,7 +218,7 @@ impl AsciiStr { pub fn trim_start(&self) -> &Self { let whitespace_len = self .chars() - .position(|ascii| !ascii.is_whitespace()) + .position(|ch| !ch.is_whitespace()) .unwrap_or_else(|| self.len()); // SAFETY: `whitespace_len` is `0..=len`, which is at most `len`, which is a valid empty slice. @@ -239,7 +239,7 @@ impl AsciiStr { let whitespace_len = self .chars() .rev() - .position(|ascii| !ascii.is_whitespace()) + .position(|ch| !ch.is_whitespace()) .unwrap_or_else(|| self.len()); // SAFETY: `whitespace_len` is `0..=len`, which is at most `len`, which is a valid empty slice, and at least `0`, which is the whole slice. @@ -655,7 +655,7 @@ impl<'a> Iterator for Split<'a> { let start: &AsciiStr = self.chars.as_str(); let split_on = self.on; - if let Some(at) = self.chars.position(|ascii| ascii == split_on) { + if let Some(at) = self.chars.position(|ch| ch == split_on) { // SAFETY: `at` is guaranteed to be in bounds, as `position` returns `Ok(0..len)`. Some(unsafe { start.as_slice().get_unchecked(..at).into() }) } else { @@ -673,7 +673,7 @@ impl<'a> DoubleEndedIterator for Split<'a> { let start: &AsciiStr = self.chars.as_str(); let split_on = self.on; - if let Some(at) = self.chars.rposition(|ascii| ascii == split_on) { + if let Some(at) = self.chars.rposition(|ch| ch == split_on) { // SAFETY: `at` is guaranteed to be in bounds, as `rposition` returns `Ok(0..len)`, and slices `1..`, `2..`, etc... until `len..` inclusive, are valid. Some(unsafe { start.as_slice().get_unchecked(at + 1..).into() }) } else { @@ -743,7 +743,7 @@ impl<'a> DoubleEndedIterator for Lines<'a> { .string .chars() .rev() - .position(|ascii| ascii == AsciiChar::LineFeed) + .position(|ch| ch == AsciiChar::LineFeed) .unwrap_or_else(|| self.string.len()); // SAFETY: As per above, `self.len() - lf_rev_pos` will be in range `0..len - 1`, so both indexes are correct. From 8d239bec453532f4ae86a957dae56036e8dc5be6 Mon Sep 17 00:00:00 2001 From: Filipe Rodrigues Date: Sun, 11 Oct 2020 20:13:38 +0100 Subject: [PATCH 4/9] Fixed `ascii_str::tests::generic_as_ascii_str`'s mutable reference tests and added tests within it for most expected types. --- src/ascii_str.rs | 38 ++++++++++++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 8 deletions(-) diff --git a/src/ascii_str.rs b/src/ascii_str.rs index e2d1534..c770e3a 100644 --- a/src/ascii_str.rs +++ b/src/ascii_str.rs @@ -1155,20 +1155,42 @@ mod tests { use super::{AsAsciiStr, AsAsciiStrError, AsMutAsciiStr, AsciiStr}; use AsciiChar; + /// Ensures that common types, `str`, `[u8]`, `AsciiStr` and their + /// references, shared and mutable implement `AsAsciiStr`. #[test] fn generic_as_ascii_str() { + // Generic function to ensure `C` implements `AsAsciiStr` fn generic(c: &C) -> Result<&AsciiStr, AsAsciiStrError> { c.as_ascii_str() } + let arr = [AsciiChar::A]; - let ascii_str: &AsciiStr = arr.as_ref().into(); - assert_eq!(generic("A"), Ok(ascii_str)); - assert_eq!(generic(&b"A"[..]), Ok(ascii_str)); - assert_eq!(generic(ascii_str), Ok(ascii_str)); - assert_eq!(generic(&"A"), Ok(ascii_str)); - assert_eq!(generic(&ascii_str), Ok(ascii_str)); - // Note: Not sure what this is supposed to be testing, as `generic` can only accept shared references - //assert_eq!(generic(&mut "A"), Ok(ascii_str)); + let ascii_str = arr.as_ref().into(); + let mut mut_arr = arr; // Note: We need a second copy to prevent overlapping mutable borrows. + let mut_ascii_str = mut_arr.as_mut().into(); + let mut_arr_mut_ref: &mut [AsciiChar] = &mut [AsciiChar::A]; + let mut string = "A".to_string(); + let mut string2 = "A".to_string(); + let string_mut = string.as_mut(); + let string_mut_bytes = unsafe { string2.as_bytes_mut() }; // SAFETY: We don't modify it + + // Note: This is a trick because `rustfmt` doesn't support + // attributes on blocks yet. + #[rustfmt::skip] + let _ = [ + assert_eq!(generic::("A" ), Ok(ascii_str)), + assert_eq!(generic::<[u8] >(&b"A"[..] ), Ok(ascii_str)), + assert_eq!(generic::(ascii_str ), Ok(ascii_str)), + assert_eq!(generic::<[AsciiChar] >(&arr ), Ok(ascii_str)), + assert_eq!(generic::<&str >(&"A" ), Ok(ascii_str)), + assert_eq!(generic::<&[u8] >(&&b"A"[..] ), Ok(ascii_str)), + assert_eq!(generic::<&AsciiStr >(&ascii_str ), Ok(ascii_str)), + assert_eq!(generic::<&[AsciiChar] >(&&arr[..] ), Ok(ascii_str)), + assert_eq!(generic::<&mut str >(&string_mut ), Ok(ascii_str)), + assert_eq!(generic::<&mut [u8] >(&string_mut_bytes), Ok(ascii_str)), + assert_eq!(generic::<&mut AsciiStr >(&mut_ascii_str ), Ok(ascii_str)), + assert_eq!(generic::<&mut [AsciiChar]>(&mut_arr_mut_ref ), Ok(ascii_str)), + ]; } #[cfg(feature = "std")] From 627891cdd5df38f924f8b5534c90132defc648fb Mon Sep 17 00:00:00 2001 From: Zenithsiz <34359769+Zenithsiz@users.noreply.github.com> Date: Sun, 11 Oct 2020 20:48:00 +0100 Subject: [PATCH 5/9] Update src/ascii_str.rs Co-authored-by: Thomas Bahn --- src/ascii_str.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ascii_str.rs b/src/ascii_str.rs index c770e3a..504c8a4 100644 --- a/src/ascii_str.rs +++ b/src/ascii_str.rs @@ -719,7 +719,7 @@ impl<'a> Iterator for Lines<'a> { } else { let line = self.string; // SAFETY: Slicing `..0` is always valid and yields an empty slice - self.string = unsafe { self.string.as_slice().get_unchecked(..0).into() }; + self.string = unsafe { AsciiStr::from_ascii_unchecked(b"") }; Some(line) } } From c7e2c25a121ee2b32bd891048aaef28d19aaca73 Mon Sep 17 00:00:00 2001 From: Filipe Rodrigues Date: Sun, 11 Oct 2020 21:12:33 +0100 Subject: [PATCH 6/9] Added missing `#[must_use]` to `AsciiString::new` now that it is a `const fn`. --- src/ascii_string.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ascii_string.rs b/src/ascii_string.rs index fb08a21..b5a66db 100644 --- a/src/ascii_string.rs +++ b/src/ascii_string.rs @@ -26,6 +26,7 @@ impl AsciiString { /// let mut s = AsciiString::new(); /// ``` #[inline] + #[must_use] pub const fn new() -> Self { AsciiString { vec: Vec::new() } } From 418de0020300ea1cf84227369e3a841e77df5462 Mon Sep 17 00:00:00 2001 From: Filipe Rodrigues Date: Sun, 11 Oct 2020 22:40:26 +0100 Subject: [PATCH 7/9] Fixed issue regarding usage of subslice patterns. Fixed usage of inexistant `AsMut` impl for `String` (Only added in `1.43`). Extended test `ascii_str::tests::lines_iter_rev`. --- src/ascii_str.rs | 41 +++++++++++++++++++++++++++++++++-------- 1 file changed, 33 insertions(+), 8 deletions(-) diff --git a/src/ascii_str.rs b/src/ascii_str.rs index 504c8a4..268e33a 100644 --- a/src/ascii_str.rs +++ b/src/ascii_str.rs @@ -718,7 +718,7 @@ impl<'a> Iterator for Lines<'a> { None } else { let line = self.string; - // SAFETY: Slicing `..0` is always valid and yields an empty slice + // SAFETY: An empty string is a valid string. self.string = unsafe { AsciiStr::from_ascii_unchecked(b"") }; Some(line) } @@ -732,13 +732,27 @@ impl<'a> DoubleEndedIterator for Lines<'a> { } // If we end with `LF` / `CR/LF`, remove them - if let [slice @ .., AsciiChar::CarriageReturn, AsciiChar::LineFeed] - | [slice @ .., AsciiChar::LineFeed] = self.string.as_slice() - { - self.string = slice.into(); + if let Some(AsciiChar::LineFeed) = self.string.last() { + // SAFETY: `last()` returned `Some`, so our len is at least 1. + self.string = unsafe { + self.string + .as_slice() + .get_unchecked(..self.string.len() - 1) + .into() + }; + + if let Some(AsciiChar::CarriageReturn) = self.string.last() { + // SAFETY: `last()` returned `Some`, so our len is at least 1. + self.string = unsafe { + self.string + .as_slice() + .get_unchecked(..self.string.len() - 1) + .into() + }; + } } - // SAFETY: This will never be `0`, as we remove any `LF` from the end, it is `1..len` + // Get the position of the first `LF` from the end. let lf_rev_pos = self .string .chars() @@ -746,7 +760,9 @@ impl<'a> DoubleEndedIterator for Lines<'a> { .position(|ch| ch == AsciiChar::LineFeed) .unwrap_or_else(|| self.string.len()); - // SAFETY: As per above, `self.len() - lf_rev_pos` will be in range `0..len - 1`, so both indexes are correct. + // SAFETY: `lf_rev_pos` will be in range `0..=len`, so `len - lf_rev_pos` + // will be within `0..=len`, making it correct as a start and end + // point for the strings. let line = unsafe { self.string .as_slice() @@ -1171,7 +1187,7 @@ mod tests { let mut_arr_mut_ref: &mut [AsciiChar] = &mut [AsciiChar::A]; let mut string = "A".to_string(); let mut string2 = "A".to_string(); - let string_mut = string.as_mut(); + let string_mut = string.as_mut_str(); let string_mut_bytes = unsafe { string2.as_bytes_mut() }; // SAFETY: We don't modify it // Note: This is a trick because `rustfmt` doesn't support @@ -1427,6 +1443,15 @@ mod tests { assert_eq!(iter.next_back(), Some("baz".as_ascii_str().unwrap())); assert_eq!(iter.next_back(), Some("".as_ascii_str().unwrap())); assert_eq!(iter.next(), Some("bar".as_ascii_str().unwrap())); + + let empty_lines = b"\n\r\n\n\r\n"; + let mut iter_count = 0; + let ascii = AsciiStr::from_ascii(&empty_lines).unwrap(); + for line in ascii.lines().rev() { + iter_count += 1; + assert!(line.is_empty()); + } + assert_eq!(4, iter_count); } #[test] From 78d9f48de3a2ad474f8fff35006f7a4d74752f21 Mon Sep 17 00:00:00 2001 From: Filipe Rodrigues Date: Sun, 11 Oct 2020 22:48:45 +0100 Subject: [PATCH 8/9] Fixed usage of `String` in a `#[no_std]` environment in `ascii_str::tests`. --- src/ascii_str.rs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/ascii_str.rs b/src/ascii_str.rs index 268e33a..5d3c690 100644 --- a/src/ascii_str.rs +++ b/src/ascii_str.rs @@ -1184,11 +1184,10 @@ mod tests { let ascii_str = arr.as_ref().into(); let mut mut_arr = arr; // Note: We need a second copy to prevent overlapping mutable borrows. let mut_ascii_str = mut_arr.as_mut().into(); - let mut_arr_mut_ref: &mut [AsciiChar] = &mut [AsciiChar::A]; - let mut string = "A".to_string(); - let mut string2 = "A".to_string(); - let string_mut = string.as_mut_str(); - let string_mut_bytes = unsafe { string2.as_bytes_mut() }; // SAFETY: We don't modify it + let mut_arr_mut_ref: &mut [AsciiChar] = &mut [AsciiChar::A]; + let mut string_bytes = [b'A']; + let string_mut = unsafe { std::str::from_utf8_unchecked_mut(&mut string_bytes) }; // SAFETY: 'A' is a valid string. + let string_mut_bytes: &mut [u8] = &mut [b'A']; // Note: This is a trick because `rustfmt` doesn't support // attributes on blocks yet. From d1edb5600805af81508fea5f7bdfefc798920d6b Mon Sep 17 00:00:00 2001 From: Filipe Rodrigues Date: Sun, 11 Oct 2020 22:51:58 +0100 Subject: [PATCH 9/9] Fixed usage of `std` instead of `core` in a `ascii_str::tests::generic_as_ascii_str`. --- src/ascii_str.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ascii_str.rs b/src/ascii_str.rs index 5d3c690..2d1fbbd 100644 --- a/src/ascii_str.rs +++ b/src/ascii_str.rs @@ -1186,7 +1186,7 @@ mod tests { let mut_ascii_str = mut_arr.as_mut().into(); let mut_arr_mut_ref: &mut [AsciiChar] = &mut [AsciiChar::A]; let mut string_bytes = [b'A']; - let string_mut = unsafe { std::str::from_utf8_unchecked_mut(&mut string_bytes) }; // SAFETY: 'A' is a valid string. + let string_mut = unsafe { core::str::from_utf8_unchecked_mut(&mut string_bytes) }; // SAFETY: 'A' is a valid string. let string_mut_bytes: &mut [u8] = &mut [b'A']; // Note: This is a trick because `rustfmt` doesn't support