From 7d91039ac06dc34795cb141b53b610e10cbc1340 Mon Sep 17 00:00:00 2001 From: William Venner Date: Tue, 5 Jul 2022 17:19:03 +0100 Subject: [PATCH 1/5] Add `BufRead::skip_until` --- library/std/src/io/mod.rs | 84 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/library/std/src/io/mod.rs b/library/std/src/io/mod.rs index f4f2e3f243457..a5dab49c75dc4 100644 --- a/library/std/src/io/mod.rs +++ b/library/std/src/io/mod.rs @@ -1923,6 +1923,28 @@ fn read_until(r: &mut R, delim: u8, buf: &mut Vec) -> R } } +fn skip_until(r: &mut R, delim: u8) -> Result { + let mut read = 0; + loop { + let (done, used) = { + let available = match r.fill_buf() { + Ok(n) => n, + Err(ref e) if e.kind() == ErrorKind::Interrupted => continue, + Err(e) => return Err(e), + }; + match memchr::memchr(delim, available) { + Some(i) => (true, i + 1), + None => (false, available.len()), + } + }; + r.consume(used); + read += used; + if done || used == 0 { + return Ok(read); + } + } +} + /// A `BufRead` is a type of `Read`er which has an internal buffer, allowing it /// to perform extra ways of reading. /// @@ -2126,6 +2148,68 @@ pub trait BufRead: Read { read_until(self, byte, buf) } + /// Skip all bytes until the delimiter `byte` or EOF is reached. + /// + /// This function will read (and discard) bytes from the underlying stream until the + /// delimiter or EOF is found. + /// + /// If successful, this function will return the total number of bytes read, + /// including the delimiter byte. + /// + /// This is useful for efficiently skipping data such as NUL-terminated strings + /// in binary file formats without buffering. + /// + /// This function is blocking and should be used carefully: it is possible for + /// an attacker to continuously send bytes without ever sending the delimiter + /// or EOF. + /// + /// # Errors + /// + /// This function will ignore all instances of [`ErrorKind::Interrupted`] and + /// will otherwise return any errors returned by [`fill_buf`]. + /// + /// If an I/O error is encountered then all bytes read so far will be + /// present in `buf` and its length will have been adjusted appropriately. + /// + /// [`fill_buf`]: BufRead::fill_buf + /// + /// # Examples + /// + /// [`std::io::Cursor`][`Cursor`] is a type that implements `BufRead`. In + /// this example, we use [`Cursor`] to read some NUL-terminated information + /// about Ferris from a binary string, skipping the fun fact: + /// + /// ``` + /// #![feature(bufread_skip_until)] + /// + /// use std::io::{self, BufRead}; + /// + /// let mut cursor = io::Cursor::new(b"Ferris\0Likes long walks on the beach\0Crustacean\0"); + /// + /// // read name + /// let mut name = Vec::new(); + /// let num_bytes = cursor.read_until(b'\0', &mut name) + /// .expect("reading from cursor won't fail"); + /// assert_eq!(num_bytes, 7); + /// assert_eq!(name, b"Ferris\0"); + /// + /// // skip fun fact + /// let num_bytes = cursor.skip_until(b'\0') + /// .expect("reading from cursor won't fail"); + /// assert_eq!(num_bytes, 30); + /// + /// // read animal type + /// let mut animal = Vec::new(); + /// let num_bytes = cursor.read_until(b'\0', &mut animal) + /// .expect("reading from cursor won't fail"); + /// assert_eq!(num_bytes, 11); + /// assert_eq!(animal, b"Crustacean\0"); + /// ``` + #[unstable(feature = "bufread_skip_until", issue = "none")] + fn skip_until(&mut self, byte: u8) -> Result { + skip_until(self, byte) + } + /// Read all bytes until a newline (the `0xA` byte) is reached, and append /// them to the provided buffer. You do not need to clear the buffer before /// appending. From 4eea9763847250c89f535f6a8c3b0fd49d860e78 Mon Sep 17 00:00:00 2001 From: William Venner Date: Thu, 18 May 2023 18:57:15 +0100 Subject: [PATCH 2/5] Add `BufRead::skip_until` test --- library/std/src/io/buffered/tests.rs | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/library/std/src/io/buffered/tests.rs b/library/std/src/io/buffered/tests.rs index 9d429e7090e83..ca5d8fb4b7f9a 100644 --- a/library/std/src/io/buffered/tests.rs +++ b/library/std/src/io/buffered/tests.rs @@ -400,6 +400,34 @@ fn test_read_until() { assert_eq!(v, []); } +#[test] +fn test_skip_until() { + let bytes: &[u8] = b"read\0ignore\0read\0ignore\0read\0ignore"; + let mut reader = BufReader::new(bytes); + + // read from the bytes, alternating between + // consuming `read\0`s and skipping `ignore\0`s + loop { + // consume `read\0` + let mut out = Vec::new(); + let read = reader.read_until(0, &mut out).unwrap(); + if read == 0 { + // eof + break; + } else { + assert_eq!(out, b"read\0"); + } + + // skip past `ignore\0` + reader.skip_until(0).unwrap(); + } + + // ensure we are at the end of the byte slice and that we can skip no further + // also ensure skip_until matches the behavior of read_until at EOF + let skipped = reader.skip_until(0).unwrap(); + assert_eq!(skipped, 0); +} + #[test] fn test_line_buffer() { let mut writer = LineWriter::new(Vec::new()); From 7c9ad34362ab545dd5be75637afd84b6287507a0 Mon Sep 17 00:00:00 2001 From: William Venner Date: Thu, 18 May 2023 18:59:36 +0100 Subject: [PATCH 3/5] Move `BufRead::skip_until` test to a more appropriate location --- library/std/src/io/buffered/tests.rs | 28 ---------------------------- library/std/src/io/tests.rs | 28 ++++++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/library/std/src/io/buffered/tests.rs b/library/std/src/io/buffered/tests.rs index ca5d8fb4b7f9a..9d429e7090e83 100644 --- a/library/std/src/io/buffered/tests.rs +++ b/library/std/src/io/buffered/tests.rs @@ -400,34 +400,6 @@ fn test_read_until() { assert_eq!(v, []); } -#[test] -fn test_skip_until() { - let bytes: &[u8] = b"read\0ignore\0read\0ignore\0read\0ignore"; - let mut reader = BufReader::new(bytes); - - // read from the bytes, alternating between - // consuming `read\0`s and skipping `ignore\0`s - loop { - // consume `read\0` - let mut out = Vec::new(); - let read = reader.read_until(0, &mut out).unwrap(); - if read == 0 { - // eof - break; - } else { - assert_eq!(out, b"read\0"); - } - - // skip past `ignore\0` - reader.skip_until(0).unwrap(); - } - - // ensure we are at the end of the byte slice and that we can skip no further - // also ensure skip_until matches the behavior of read_until at EOF - let skipped = reader.skip_until(0).unwrap(); - assert_eq!(skipped, 0); -} - #[test] fn test_line_buffer() { let mut writer = LineWriter::new(Vec::new()); diff --git a/library/std/src/io/tests.rs b/library/std/src/io/tests.rs index d5a8c93b0ce9f..9a9a790d77d9c 100644 --- a/library/std/src/io/tests.rs +++ b/library/std/src/io/tests.rs @@ -25,6 +25,34 @@ fn read_until() { assert_eq!(v, []); } +#[test] +fn skip_until() { + let bytes: &[u8] = b"read\0ignore\0read\0ignore\0read\0ignore"; + let mut reader = BufReader::new(bytes); + + // read from the bytes, alternating between + // consuming `read\0`s and skipping `ignore\0`s + loop { + // consume `read\0` + let mut out = Vec::new(); + let read = reader.read_until(0, &mut out).unwrap(); + if read == 0 { + // eof + break; + } else { + assert_eq!(out, b"read\0"); + } + + // skip past `ignore\0` + reader.skip_until(0).unwrap(); + } + + // ensure we are at the end of the byte slice and that we can skip no further + // also ensure skip_until matches the behavior of read_until at EOF + let skipped = reader.skip_until(0).unwrap(); + assert_eq!(skipped, 0); +} + #[test] fn split() { let buf = Cursor::new(&b"12"[..]); From 1d7d7654e686f0452f10e016ef78c3d4b191526e Mon Sep 17 00:00:00 2001 From: William Venner Date: Thu, 18 May 2023 19:08:41 +0100 Subject: [PATCH 4/5] Add tracking issue ID for `bufread_skip_until` feature gate --- library/std/src/io/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/library/std/src/io/mod.rs b/library/std/src/io/mod.rs index a5dab49c75dc4..2cbdd027036d8 100644 --- a/library/std/src/io/mod.rs +++ b/library/std/src/io/mod.rs @@ -2205,7 +2205,7 @@ pub trait BufRead: Read { /// assert_eq!(num_bytes, 11); /// assert_eq!(animal, b"Crustacean\0"); /// ``` - #[unstable(feature = "bufread_skip_until", issue = "none")] + #[unstable(feature = "bufread_skip_until", issue = "111735")] fn skip_until(&mut self, byte: u8) -> Result { skip_until(self, byte) } From 7c1ab71f71087d88aade79925ae68a447397422f Mon Sep 17 00:00:00 2001 From: William Venner Date: Thu, 3 Aug 2023 09:52:57 +0100 Subject: [PATCH 5/5] Add assertion to test `skip_until` return value The extra `\0` in this commit is needed because the assertion on line 49 will fail otherwise (as `skip_until` stops reading on EOF and therefore does not read a trailing `\0`, returning 6 read bytes rather than the expected 7) --- library/std/src/io/tests.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/library/std/src/io/tests.rs b/library/std/src/io/tests.rs index 9a9a790d77d9c..4c5f86fe43163 100644 --- a/library/std/src/io/tests.rs +++ b/library/std/src/io/tests.rs @@ -27,7 +27,7 @@ fn read_until() { #[test] fn skip_until() { - let bytes: &[u8] = b"read\0ignore\0read\0ignore\0read\0ignore"; + let bytes: &[u8] = b"read\0ignore\0read\0ignore\0read\0ignore\0"; let mut reader = BufReader::new(bytes); // read from the bytes, alternating between @@ -41,10 +41,12 @@ fn skip_until() { break; } else { assert_eq!(out, b"read\0"); + assert_eq!(read, b"read\0".len()); } // skip past `ignore\0` - reader.skip_until(0).unwrap(); + let skipped = reader.skip_until(0).unwrap(); + assert_eq!(skipped, b"ignore\0".len()); } // ensure we are at the end of the byte slice and that we can skip no further