From 939d210de9f490f7e4014b11b7eae51dd801b596 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 18 Mar 2022 17:02:59 +0800 Subject: [PATCH 01/22] prepare for git-attribute file parsing (#301) --- git-attributes/src/parse/mod.rs | 2 + git-attributes/tests/attributes.rs | 176 +-------------------------- git-attributes/tests/parse/ignore.rs | 172 ++++++++++++++++++++++++++ git-attributes/tests/parse/mod.rs | 1 + 4 files changed, 176 insertions(+), 175 deletions(-) create mode 100644 git-attributes/tests/parse/ignore.rs create mode 100644 git-attributes/tests/parse/mod.rs diff --git a/git-attributes/src/parse/mod.rs b/git-attributes/src/parse/mod.rs index 03b01b5790e..ec13abd8271 100644 --- a/git-attributes/src/parse/mod.rs +++ b/git-attributes/src/parse/mod.rs @@ -1,5 +1,7 @@ pub mod ignore; +pub mod attributes {} + pub fn ignore(buf: &[u8]) -> ignore::Iter<'_> { ignore::Iter::new(buf) } diff --git a/git-attributes/tests/attributes.rs b/git-attributes/tests/attributes.rs index 05840a46a13..06f1a3c69d4 100644 --- a/git-attributes/tests/attributes.rs +++ b/git-attributes/tests/attributes.rs @@ -1,175 +1 @@ -mod parse { - mod ignore { - use git_attributes::ignore::pattern::Mode; - use git_testtools::fixture_path; - - #[test] - fn line_numbers_are_counted_correctly() { - let ignore = std::fs::read(fixture_path("ignore/various.txt")).unwrap(); - let actual: Vec<_> = git_attributes::parse::ignore(&ignore).collect(); - assert_eq!( - actual, - vec![ - ("*.[oa]".into(), Mode::NO_SUB_DIR, 2), - ("*.html".into(), Mode::NO_SUB_DIR | Mode::ENDS_WITH, 5), - ("foo.html".into(), Mode::NO_SUB_DIR | Mode::NEGATIVE, 8), - ("/*".into(), Mode::empty(), 11), - ("/foo".into(), Mode::NEGATIVE, 12), - ("/foo/*".into(), Mode::empty(), 13), - ("/foo/bar".into(), Mode::NEGATIVE, 14) - ] - ); - } - - #[test] - fn line_endings_can_be_windows_or_unix() { - assert_eq!( - git_attributes::parse::ignore(b"unix\nwindows\r\nlast").collect::>(), - vec![ - (r"unix".into(), Mode::NO_SUB_DIR, 1), - (r"windows".into(), Mode::NO_SUB_DIR, 2), - (r"last".into(), Mode::NO_SUB_DIR, 3) - ] - ); - } - - #[test] - fn mark_ends_with_pattern_specifically() { - assert_eq!( - git_attributes::parse::ignore(br"*literal").next(), - Some((r"*literal".into(), Mode::NO_SUB_DIR | Mode::ENDS_WITH, 1)) - ); - assert_eq!( - git_attributes::parse::ignore(br"**literal").next(), - Some((r"**literal".into(), Mode::NO_SUB_DIR, 1)), - "double-asterisk won't allow for fast comparisons" - ); - assert_eq!( - git_attributes::parse::ignore(br"*litera[l]").next(), - Some((r"*litera[l]".into(), Mode::NO_SUB_DIR, 1)) - ); - assert_eq!( - git_attributes::parse::ignore(br"*litera?").next(), - Some((r"*litera?".into(), Mode::NO_SUB_DIR, 1)) - ); - assert_eq!( - git_attributes::parse::ignore(br"*litera\?").next(), - Some((r"*litera\?".into(), Mode::NO_SUB_DIR, 1)), - "for now we don't handle escapes properly like git seems to do" - ); - } - - #[test] - fn comments_are_ignored() { - assert!(git_attributes::parse::ignore(b"# hello world").next().is_none()); - } - - #[test] - fn backslashes_before_hashes_are_no_comments() { - assert_eq!( - git_attributes::parse::ignore(br"\#hello").next(), - Some((r"#hello".into(), Mode::NO_SUB_DIR, 1)) - ); - } - - #[test] - fn backslashes_are_part_of_the_pattern_if_not_in_specific_positions() { - assert_eq!( - git_attributes::parse::ignore(br"\hello\world").next(), - Some((r"\hello\world".into(), Mode::NO_SUB_DIR, 1)) - ); - } - - #[test] - fn leading_exclamation_mark_negates_pattern() { - assert_eq!( - git_attributes::parse::ignore(b"!hello").next(), - Some(("hello".into(), Mode::NEGATIVE | Mode::NO_SUB_DIR, 1)) - ); - } - - #[test] - fn leading_exclamation_marks_can_be_escaped_with_backslash() { - assert_eq!( - git_attributes::parse::ignore(br"\!hello").next(), - Some(("!hello".into(), Mode::NO_SUB_DIR, 1)) - ); - } - - #[test] - fn absence_of_sub_directories_are_marked() { - assert_eq!( - git_attributes::parse::ignore(br"a/b").next(), - Some(("a/b".into(), Mode::empty(), 1)) - ); - assert_eq!( - git_attributes::parse::ignore(br"ab").next(), - Some(("ab".into(), Mode::NO_SUB_DIR, 1)) - ); - } - - #[test] - fn trailing_slashes_are_marked_and_removed() { - assert_eq!( - git_attributes::parse::ignore(b"dir/").next(), - Some(("dir".into(), Mode::MUST_BE_DIR | Mode::NO_SUB_DIR, 1)) - ); - assert_eq!( - git_attributes::parse::ignore(b"dir///").next(), - Some(("dir//".into(), Mode::MUST_BE_DIR, 1)), - "but only the last slash is removed" - ); - } - - #[test] - fn trailing_spaces_are_ignored() { - assert_eq!( - git_attributes::parse::ignore(br"a ").next(), - Some(("a".into(), Mode::NO_SUB_DIR, 1)) - ); - assert_eq!( - git_attributes::parse::ignore(b"a\t\t ").next(), - Some(("a\t\t".into(), Mode::NO_SUB_DIR, 1)), - "trailing tabs are not ignored" - ); - } - #[test] - fn trailing_spaces_can_be_escaped_to_be_literal() { - assert_eq!( - git_attributes::parse::ignore(br"a \ ").next(), - Some(("a ".into(), Mode::NO_SUB_DIR, 1)), - "a single escape in front of the last desired space is enough" - ); - assert_eq!( - git_attributes::parse::ignore(br"a b c ").next(), - Some(("a b c".into(), Mode::NO_SUB_DIR, 1)), - "spaces in the middle are fine" - ); - assert_eq!( - git_attributes::parse::ignore(br"a\ \ \ ").next(), - Some(("a ".into(), Mode::NO_SUB_DIR, 1)), - "one can also escape every single one" - ); - assert_eq!( - git_attributes::parse::ignore(br"a \ ").next(), - Some(("a ".into(), Mode::NO_SUB_DIR, 1)), - "or just the one in the middle, losing the last actual space" - ); - assert_eq!( - git_attributes::parse::ignore(br"a \").next(), - Some(("a ".into(), Mode::NO_SUB_DIR, 1)), - "escaping nothing also works as a whitespace protection" - ); - assert_eq!( - git_attributes::parse::ignore(br"a \\\ ").next(), - Some((r"a ".into(), Mode::NO_SUB_DIR, 1)), - "strange things like these work too" - ); - assert_eq!( - git_attributes::parse::ignore(br"a \\ ").next(), - Some((r"a ".into(), Mode::NO_SUB_DIR, 1)), - "strange things like these work as well" - ); - } - } -} +mod parse; diff --git a/git-attributes/tests/parse/ignore.rs b/git-attributes/tests/parse/ignore.rs new file mode 100644 index 00000000000..ff3cfc52d7e --- /dev/null +++ b/git-attributes/tests/parse/ignore.rs @@ -0,0 +1,172 @@ +use git_attributes::ignore::pattern::Mode; +use git_testtools::fixture_path; + +#[test] +fn line_numbers_are_counted_correctly() { + let ignore = std::fs::read(fixture_path("ignore/various.txt")).unwrap(); + let actual: Vec<_> = git_attributes::parse::ignore(&ignore).collect(); + assert_eq!( + actual, + vec![ + ("*.[oa]".into(), Mode::NO_SUB_DIR, 2), + ("*.html".into(), Mode::NO_SUB_DIR | Mode::ENDS_WITH, 5), + ("foo.html".into(), Mode::NO_SUB_DIR | Mode::NEGATIVE, 8), + ("/*".into(), Mode::empty(), 11), + ("/foo".into(), Mode::NEGATIVE, 12), + ("/foo/*".into(), Mode::empty(), 13), + ("/foo/bar".into(), Mode::NEGATIVE, 14) + ] + ); +} + +#[test] +fn line_endings_can_be_windows_or_unix() { + assert_eq!( + git_attributes::parse::ignore(b"unix\nwindows\r\nlast").collect::>(), + vec![ + (r"unix".into(), Mode::NO_SUB_DIR, 1), + (r"windows".into(), Mode::NO_SUB_DIR, 2), + (r"last".into(), Mode::NO_SUB_DIR, 3) + ] + ); +} + +#[test] +fn mark_ends_with_pattern_specifically() { + assert_eq!( + git_attributes::parse::ignore(br"*literal").next(), + Some((r"*literal".into(), Mode::NO_SUB_DIR | Mode::ENDS_WITH, 1)) + ); + assert_eq!( + git_attributes::parse::ignore(br"**literal").next(), + Some((r"**literal".into(), Mode::NO_SUB_DIR, 1)), + "double-asterisk won't allow for fast comparisons" + ); + assert_eq!( + git_attributes::parse::ignore(br"*litera[l]").next(), + Some((r"*litera[l]".into(), Mode::NO_SUB_DIR, 1)) + ); + assert_eq!( + git_attributes::parse::ignore(br"*litera?").next(), + Some((r"*litera?".into(), Mode::NO_SUB_DIR, 1)) + ); + assert_eq!( + git_attributes::parse::ignore(br"*litera\?").next(), + Some((r"*litera\?".into(), Mode::NO_SUB_DIR, 1)), + "for now we don't handle escapes properly like git seems to do" + ); +} + +#[test] +fn comments_are_ignored() { + assert!(git_attributes::parse::ignore(b"# hello world").next().is_none()); +} + +#[test] +fn backslashes_before_hashes_are_no_comments() { + assert_eq!( + git_attributes::parse::ignore(br"\#hello").next(), + Some((r"#hello".into(), Mode::NO_SUB_DIR, 1)) + ); +} + +#[test] +fn backslashes_are_part_of_the_pattern_if_not_in_specific_positions() { + assert_eq!( + git_attributes::parse::ignore(br"\hello\world").next(), + Some((r"\hello\world".into(), Mode::NO_SUB_DIR, 1)) + ); +} + +#[test] +fn leading_exclamation_mark_negates_pattern() { + assert_eq!( + git_attributes::parse::ignore(b"!hello").next(), + Some(("hello".into(), Mode::NEGATIVE | Mode::NO_SUB_DIR, 1)) + ); +} + +#[test] +fn leading_exclamation_marks_can_be_escaped_with_backslash() { + assert_eq!( + git_attributes::parse::ignore(br"\!hello").next(), + Some(("!hello".into(), Mode::NO_SUB_DIR, 1)) + ); +} + +#[test] +fn absence_of_sub_directories_are_marked() { + assert_eq!( + git_attributes::parse::ignore(br"a/b").next(), + Some(("a/b".into(), Mode::empty(), 1)) + ); + assert_eq!( + git_attributes::parse::ignore(br"ab").next(), + Some(("ab".into(), Mode::NO_SUB_DIR, 1)) + ); +} + +#[test] +fn trailing_slashes_are_marked_and_removed() { + assert_eq!( + git_attributes::parse::ignore(b"dir/").next(), + Some(("dir".into(), Mode::MUST_BE_DIR | Mode::NO_SUB_DIR, 1)) + ); + assert_eq!( + git_attributes::parse::ignore(b"dir///").next(), + Some(("dir//".into(), Mode::MUST_BE_DIR, 1)), + "but only the last slash is removed" + ); +} + +#[test] +fn trailing_spaces_are_ignored() { + assert_eq!( + git_attributes::parse::ignore(br"a ").next(), + Some(("a".into(), Mode::NO_SUB_DIR, 1)) + ); + assert_eq!( + git_attributes::parse::ignore(b"a\t\t ").next(), + Some(("a\t\t".into(), Mode::NO_SUB_DIR, 1)), + "trailing tabs are not ignored" + ); +} + +#[test] +fn trailing_spaces_can_be_escaped_to_be_literal() { + assert_eq!( + git_attributes::parse::ignore(br"a \ ").next(), + Some(("a ".into(), Mode::NO_SUB_DIR, 1)), + "a single escape in front of the last desired space is enough" + ); + assert_eq!( + git_attributes::parse::ignore(br"a b c ").next(), + Some(("a b c".into(), Mode::NO_SUB_DIR, 1)), + "spaces in the middle are fine" + ); + assert_eq!( + git_attributes::parse::ignore(br"a\ \ \ ").next(), + Some(("a ".into(), Mode::NO_SUB_DIR, 1)), + "one can also escape every single one" + ); + assert_eq!( + git_attributes::parse::ignore(br"a \ ").next(), + Some(("a ".into(), Mode::NO_SUB_DIR, 1)), + "or just the one in the middle, losing the last actual space" + ); + assert_eq!( + git_attributes::parse::ignore(br"a \").next(), + Some(("a ".into(), Mode::NO_SUB_DIR, 1)), + "escaping nothing also works as a whitespace protection" + ); + assert_eq!( + git_attributes::parse::ignore(br"a \\\ ").next(), + Some((r"a ".into(), Mode::NO_SUB_DIR, 1)), + "strange things like these work too" + ); + assert_eq!( + git_attributes::parse::ignore(br"a \\ ").next(), + Some((r"a ".into(), Mode::NO_SUB_DIR, 1)), + "strange things like these work as well" + ); +} diff --git a/git-attributes/tests/parse/mod.rs b/git-attributes/tests/parse/mod.rs new file mode 100644 index 00000000000..5ae31c89f66 --- /dev/null +++ b/git-attributes/tests/parse/mod.rs @@ -0,0 +1 @@ +mod ignore; From 0c256d3a60b83ae20575f26ac1a9152fd30c7b29 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 18 Mar 2022 19:56:49 +0800 Subject: [PATCH 02/22] skip the BOM as well (#301) --- Cargo.lock | 7 +++++++ git-attributes/Cargo.toml | 1 + git-attributes/src/parse/ignore.rs | 3 ++- git-attributes/tests/parse/ignore.rs | 8 ++++++++ 4 files changed, 18 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index 1020d608940..38919a5fb84 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1078,6 +1078,7 @@ dependencies = [ "bitflags", "bstr", "git-testtools", + "unicode-bom", ] [[package]] @@ -2824,6 +2825,12 @@ version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1a01404663e3db436ed2746d9fefef640d868edae3cceb81c3b8d5732fda678f" +[[package]] +name = "unicode-bom" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63ec69f541d875b783ca40184d655f2927c95f0bffd486faa83cd3ac3529ec32" + [[package]] name = "unicode-normalization" version = "0.1.19" diff --git a/git-attributes/Cargo.toml b/git-attributes/Cargo.toml index 5e8ac460d10..3ef53f1df3a 100644 --- a/git-attributes/Cargo.toml +++ b/git-attributes/Cargo.toml @@ -15,6 +15,7 @@ doctest = false [dependencies] bstr = { version = "0.2.13", default-features = false, features = ["std"]} bitflags = "1.3.2" +unicode-bom = "1.1.4" [dev-dependencies] git-testtools = { path = "../tests/tools"} diff --git a/git-attributes/src/parse/ignore.rs b/git-attributes/src/parse/ignore.rs index b9ebd783ffd..f0a5b615c56 100644 --- a/git-attributes/src/parse/ignore.rs +++ b/git-attributes/src/parse/ignore.rs @@ -8,8 +8,9 @@ pub struct Iter<'a> { impl<'a> Iter<'a> { pub fn new(buf: &'a [u8]) -> Self { + let bom = unicode_bom::Bom::from(buf); Iter { - lines: buf.lines(), + lines: buf[bom.len()..].lines(), line_no: 0, } } diff --git a/git-attributes/tests/parse/ignore.rs b/git-attributes/tests/parse/ignore.rs index ff3cfc52d7e..7ad310a54e7 100644 --- a/git-attributes/tests/parse/ignore.rs +++ b/git-attributes/tests/parse/ignore.rs @@ -1,6 +1,14 @@ use git_attributes::ignore::pattern::Mode; use git_testtools::fixture_path; +#[test] +fn byte_order_marks_are_no_patterns() { + assert_eq!( + git_attributes::parse::ignore("\u{feff}hello".as_bytes()).next(), + Some((r"hello".into(), Mode::NO_SUB_DIR, 1)) + ); +} + #[test] fn line_numbers_are_counted_correctly() { let ignore = std::fs::read(fixture_path("ignore/various.txt")).unwrap(); From 3c7fbec21d6424e311b9a27c08b8a9c08a7c59f8 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 19 Mar 2022 19:52:23 +0800 Subject: [PATCH 03/22] some more features to track after reading git-attributes docs (#301) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …in greater depth. --- crate-status.md | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/crate-status.md b/crate-status.md index 242c9978ced..2a9d24d3ddc 100644 --- a/crate-status.md +++ b/crate-status.md @@ -53,6 +53,7 @@ * [x] pack only changed objects as derived from input * [x] base object compression * [ ] delta compression + * [ ] respect the `delta=false` attribute * [x] create 'thin' pack, i.e. deltas that are based on objects the other side has. * [x] parallel implementation that scales perfectly * [x] entries to pack data iterator @@ -209,9 +210,10 @@ Check out the [performance discussion][git-traverse-performance] as well. ### git-attributes -* [ ] parse git-ignore files (aka git-attributes without the attributes or negation) +* [x] parse git-ignore files (aka git-attributes without the attributes or negation) * [ ] parse git-attributes files * [ ] create an attributes stack, ideally one that includes 'ignored' status from .gitignore files. + * [ ] support for built-in `binary` macro for `-text -diff -merge` ### git-pathspec @@ -226,6 +228,15 @@ Check out the [performance discussion][git-traverse-performance] as well. - [ ] handle sparse directories - [ ] handle sparse index - [ ] linear scaling with multi-threading up to IO saturation + - supported attributes to affect working tree and index contents + - [ ] eol + - [ ] working-tree-encoding + - …more + - **filtering** + - [ ] `text` + - [ ] `ident` + - [ ] filter processes + - [ ] single-invocation clean/smudge filters * manage multiple worktrees * deal with exclude specifications, like .gitignore and other exclude files. @@ -384,6 +395,7 @@ See its [README.md](https://github.com/Byron/gitoxide/blob/main/git-lock/README. ### git-bundle * [ ] create a bundle from an archive + * [ ] respect `export-ignore` and `export-subst` * [ ] extract a branch from a bundle into a repository * [ ] API documentation * [ ] Some examples From 3f627954d2e992dd56eeee82a99f7ad41e619fb2 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 22 Mar 2022 17:41:38 +0800 Subject: [PATCH 04/22] refactor (#301) --- git-attributes/src/parse/ignore.rs | 66 ++++++++++++++++-------------- 1 file changed, 36 insertions(+), 30 deletions(-) diff --git a/git-attributes/src/parse/ignore.rs b/git-attributes/src/parse/ignore.rs index f0a5b615c56..be6212cbf51 100644 --- a/git-attributes/src/parse/ignore.rs +++ b/git-attributes/src/parse/ignore.rs @@ -20,40 +20,46 @@ impl<'a> Iterator for Iter<'a> { type Item = (BString, ignore::pattern::Mode, usize); fn next(&mut self) -> Option { - let mut res = None; - for mut line in self.lines.by_ref() { + for line in self.lines.by_ref() { self.line_no += 1; - let mut mode = ignore::pattern::Mode::empty(); - if line.is_empty() { - continue; - }; - if line.first() == Some(&b'#') { - continue; - } else if line.first() == Some(&b'!') { - mode |= ignore::pattern::Mode::NEGATIVE; - line = &line[1..]; - } else if line.first() == Some(&b'\\') { - let second = line.get(1); - if second == Some(&b'!') || second == Some(&b'#') { - line = &line[1..]; - } - } - let mut line = truncate_non_escaped_trailing_spaces(line); - if line.last() == Some(&b'/') { - mode |= ignore::pattern::Mode::MUST_BE_DIR; - line.pop(); - } - if !line.contains(&b'/') { - mode |= ignore::pattern::Mode::NO_SUB_DIR; + match parse_line(line) { + None => continue, + Some((line, flags)) => return Some((line, flags, self.line_no)), } - if line.first() == Some(&b'*') && line[1..].find_byteset(br"*?[\").is_none() { - mode |= ignore::pattern::Mode::ENDS_WITH; - } - res = Some((line, mode, self.line_no)); - break; } - res + None + } +} + +#[inline] +fn parse_line(mut line: &[u8]) -> Option<(BString, ignore::pattern::Mode)> { + let mut mode = ignore::pattern::Mode::empty(); + if line.is_empty() { + return None; + }; + if line.first() == Some(&b'#') { + return None; + } else if line.first() == Some(&b'!') { + mode |= ignore::pattern::Mode::NEGATIVE; + line = &line[1..]; + } else if line.first() == Some(&b'\\') { + let second = line.get(1); + if second == Some(&b'!') || second == Some(&b'#') { + line = &line[1..]; + } + } + let mut line = truncate_non_escaped_trailing_spaces(line); + if line.last() == Some(&b'/') { + mode |= ignore::pattern::Mode::MUST_BE_DIR; + line.pop(); + } + if !line.contains(&b'/') { + mode |= ignore::pattern::Mode::NO_SUB_DIR; + } + if line.first() == Some(&b'*') && line[1..].find_byteset(br"*?[\").is_none() { + mode |= ignore::pattern::Mode::ENDS_WITH; } + Some((line, mode)) } /// We always copy just because that's ultimately needed anyway, not because we always have to. From ccc87defb4e739ccc1de8a0deae57233901f674d Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 22 Mar 2022 18:20:01 +0800 Subject: [PATCH 05/22] API and first test for attributes parsing (#301) --- git-attributes/src/lib.rs | 18 +++++++++++ git-attributes/src/parse/attribute.rs | 43 +++++++++++++++++++++++++ git-attributes/src/parse/ignore.rs | 8 ++--- git-attributes/src/parse/mod.rs | 6 ++-- git-attributes/tests/parse/attribute.rs | 31 ++++++++++++++++++ git-attributes/tests/parse/mod.rs | 1 + 6 files changed, 100 insertions(+), 7 deletions(-) create mode 100644 git-attributes/src/parse/attribute.rs create mode 100644 git-attributes/tests/parse/attribute.rs diff --git a/git-attributes/src/lib.rs b/git-attributes/src/lib.rs index ca3616728c5..efe597ba0a5 100644 --- a/git-attributes/src/lib.rs +++ b/git-attributes/src/lib.rs @@ -1,5 +1,23 @@ #![forbid(unsafe_code, rust_2018_idioms)] +use bstr::BStr; + +pub enum State<'a> { + /// The attribute is listed, or has the special value 'true' + Set, + /// The attribute has the special value 'false', or was prefixed with a `-` sign. + Unset, + /// The attribute is set to the given value, which followed the `=` sign. + /// Note that values can be empty. + Value(&'a BStr), + /// The attribute isn't mentioned with a given path or is explicitly set to `Unspecified` using the `!` sign. + Unspecified, +} + pub mod ignore; pub mod parse; + +pub fn parse(buf: &[u8]) -> parse::attribute::Lines<'_> { + parse::attribute::Lines::new(buf) +} diff --git a/git-attributes/src/parse/attribute.rs b/git-attributes/src/parse/attribute.rs new file mode 100644 index 00000000000..78e976d85f8 --- /dev/null +++ b/git-attributes/src/parse/attribute.rs @@ -0,0 +1,43 @@ +use bstr::{BStr, BString, ByteSlice}; + +pub struct Lines<'a> { + lines: bstr::Lines<'a>, + line_no: usize, +} + +pub struct Iter<'a> { + _attrs: bstr::Split<'a>, +} + +impl<'a> Iterator for Iter<'a> { + type Item = (&'a BStr, crate::State<'a>); + + fn next(&mut self) -> Option { + todo!() + } +} + +impl<'a> Lines<'a> { + pub fn new(buf: &'a [u8]) -> Self { + let bom = unicode_bom::Bom::from(buf); + Lines { + lines: buf[bom.len()..].lines(), + line_no: 0, + } + } +} + +impl<'a> Iterator for Lines<'a> { + type Item = (BString, crate::ignore::pattern::Mode, Iter<'a>, usize); + + fn next(&mut self) -> Option { + for line in self.lines.by_ref() { + self.line_no += 1; + if line.is_empty() || line.first() == Some(&b'#') { + continue; + } + todo!("parse line") + } + None + } +} diff --git a/git-attributes/src/parse/ignore.rs b/git-attributes/src/parse/ignore.rs index be6212cbf51..1b496bd8010 100644 --- a/git-attributes/src/parse/ignore.rs +++ b/git-attributes/src/parse/ignore.rs @@ -1,22 +1,22 @@ use crate::ignore; use bstr::{BString, ByteSlice}; -pub struct Iter<'a> { +pub struct Lines<'a> { lines: bstr::Lines<'a>, line_no: usize, } -impl<'a> Iter<'a> { +impl<'a> Lines<'a> { pub fn new(buf: &'a [u8]) -> Self { let bom = unicode_bom::Bom::from(buf); - Iter { + Lines { lines: buf[bom.len()..].lines(), line_no: 0, } } } -impl<'a> Iterator for Iter<'a> { +impl<'a> Iterator for Lines<'a> { type Item = (BString, ignore::pattern::Mode, usize); fn next(&mut self) -> Option { diff --git a/git-attributes/src/parse/mod.rs b/git-attributes/src/parse/mod.rs index ec13abd8271..5bcf9487b7a 100644 --- a/git-attributes/src/parse/mod.rs +++ b/git-attributes/src/parse/mod.rs @@ -1,7 +1,7 @@ pub mod ignore; -pub mod attributes {} +pub mod attribute; -pub fn ignore(buf: &[u8]) -> ignore::Iter<'_> { - ignore::Iter::new(buf) +pub fn ignore(buf: &[u8]) -> ignore::Lines<'_> { + ignore::Lines::new(buf) } diff --git a/git-attributes/tests/parse/attribute.rs b/git-attributes/tests/parse/attribute.rs new file mode 100644 index 00000000000..aa5f70289fc --- /dev/null +++ b/git-attributes/tests/parse/attribute.rs @@ -0,0 +1,31 @@ +#[test] +#[ignore] +fn byte_order_marks_are_no_patterns() { + git_attributes::parse("\u{feff}hello".as_bytes()).next(); + todo!(); +} + +#[test] +#[ignore] +fn line_numbers_are_counted_correctly() { + todo!() +} + +#[test] +#[ignore] +fn line_endings_can_be_windows_or_unix() { + let _ = git_attributes::parse(b"unix\nwindows\r\nlast").collect::>(); + todo!() +} + +#[test] +fn comments_are_ignored() { + assert!(git_attributes::parse(b"# hello world").next().is_none()); +} + +#[test] +#[ignore] +fn backslashes_before_hashes_are_part_of_the_path() { + git_attributes::parse(br"\#hello").next(); + todo!(); +} diff --git a/git-attributes/tests/parse/mod.rs b/git-attributes/tests/parse/mod.rs index 5ae31c89f66..3a142578d0d 100644 --- a/git-attributes/tests/parse/mod.rs +++ b/git-attributes/tests/parse/mod.rs @@ -1 +1,2 @@ +mod attribute; mod ignore; From 311db977049216928bba66201620c3a08d05f07f Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 23 Mar 2022 18:58:09 +0800 Subject: [PATCH 06/22] part of line handling implemented, but test still fails for good reason (#301) --- Cargo.lock | 2 + etc/check-package-size.sh | 2 +- git-attributes/Cargo.toml | 6 +++ git-attributes/src/lib.rs | 2 + git-attributes/src/parse/attribute.rs | 58 ++++++++++++++++++++++--- git-attributes/src/parse/ignore.rs | 2 +- git-attributes/tests/parse/attribute.rs | 34 ++++++++++++++- 7 files changed, 96 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 38919a5fb84..fe6489cdfbd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1078,6 +1078,8 @@ dependencies = [ "bitflags", "bstr", "git-testtools", + "quick-error", + "serde", "unicode-bom", ] diff --git a/etc/check-package-size.sh b/etc/check-package-size.sh index fe0836492c9..f1147e8707a 100755 --- a/etc/check-package-size.sh +++ b/etc/check-package-size.sh @@ -19,7 +19,7 @@ echo "in root: gitoxide CLI" (enter cargo-smart-release && indent cargo diet -n --package-size-limit 85KB) (enter git-actor && indent cargo diet -n --package-size-limit 5KB) (enter git-pathspec && indent cargo diet -n --package-size-limit 5KB) -(enter git-attributes && indent cargo diet -n --package-size-limit 5KB) +(enter git-attributes && indent cargo diet -n --package-size-limit 10KB) (enter git-index && indent cargo diet -n --package-size-limit 30KB) (enter git-worktree && indent cargo diet -n --package-size-limit 20KB) (enter git-revision && indent cargo diet -n --package-size-limit 10KB) diff --git a/git-attributes/Cargo.toml b/git-attributes/Cargo.toml index 3ef53f1df3a..9bed5fa03fc 100644 --- a/git-attributes/Cargo.toml +++ b/git-attributes/Cargo.toml @@ -10,12 +10,18 @@ edition = "2018" [lib] doctest = false +[features] +## Data structures implement `serde::Serialize` and `serde::Deserialize`. +serde1 = ["serde", "bstr/serde1"] + # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] bstr = { version = "0.2.13", default-features = false, features = ["std"]} bitflags = "1.3.2" unicode-bom = "1.1.4" +quick-error = "2.0.0" +serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"]} [dev-dependencies] git-testtools = { path = "../tests/tools"} diff --git a/git-attributes/src/lib.rs b/git-attributes/src/lib.rs index efe597ba0a5..755541c9499 100644 --- a/git-attributes/src/lib.rs +++ b/git-attributes/src/lib.rs @@ -2,6 +2,8 @@ use bstr::BStr; +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] pub enum State<'a> { /// The attribute is listed, or has the special value 'true' Set, diff --git a/git-attributes/src/parse/attribute.rs b/git-attributes/src/parse/attribute.rs index 78e976d85f8..6091e6843f2 100644 --- a/git-attributes/src/parse/attribute.rs +++ b/git-attributes/src/parse/attribute.rs @@ -1,4 +1,20 @@ use bstr::{BStr, BString, ByteSlice}; +use std::borrow::Cow; + +mod error { + use bstr::BString; + use quick_error::quick_error; + + quick_error! { + #[derive(Debug)] + pub enum Error { + PatternNegation { line_number: usize, line: BString } { + display("Line {} has a negative pattern, for literal characters use \\!: {}", line_number, line) + } + } + } +} +pub use error::Error; pub struct Lines<'a> { lines: bstr::Lines<'a>, @@ -6,14 +22,23 @@ pub struct Lines<'a> { } pub struct Iter<'a> { - _attrs: bstr::Split<'a>, + attrs: bstr::Split<'a>, +} + +impl<'a> Iter<'a> { + pub fn new(attrs: &'a [u8]) -> Self { + Iter { + attrs: attrs.as_bstr().split_str(b" "), + } + } } impl<'a> Iterator for Iter<'a> { - type Item = (&'a BStr, crate::State<'a>); + type Item = Result<(&'a BStr, crate::State<'a>), Error>; fn next(&mut self) -> Option { - todo!() + let _attr = self.attrs.next().filter(|a| !a.is_empty())?; + todo!("parse attribute") } } @@ -28,16 +53,35 @@ impl<'a> Lines<'a> { } impl<'a> Iterator for Lines<'a> { - type Item = (BString, crate::ignore::pattern::Mode, Iter<'a>, usize); + type Item = Result<(BString, crate::ignore::pattern::Mode, Iter<'a>, usize), Error>; fn next(&mut self) -> Option { for line in self.lines.by_ref() { self.line_no += 1; - if line.is_empty() || line.first() == Some(&b'#') { - continue; + match parse_line(line) { + None => continue, + Some(res) => return Some(res.map(|(line, flags, attrs)| (line, flags, attrs, self.line_no))), } - todo!("parse line") } None } } + +fn parse_line(line: &[u8]) -> Option), Error>> { + if line.is_empty() { + return None; + } + + let (line, attrs): (Cow<'_, _>, _) = if line.starts_with(b"\"") { + todo!("unquote, need length of consumed bytes to know where attrs start") + } else { + let mut tokens = line.splitn(2, |n| *n == b' '); + ( + tokens.next().expect("at least a line").into(), + tokens.next().unwrap_or_default(), + ) + }; + + let (pattern, flags) = super::ignore::parse_line(line.as_ref())?; + Ok((pattern, flags, Iter::new(attrs))).into() +} diff --git a/git-attributes/src/parse/ignore.rs b/git-attributes/src/parse/ignore.rs index 1b496bd8010..96298798c4d 100644 --- a/git-attributes/src/parse/ignore.rs +++ b/git-attributes/src/parse/ignore.rs @@ -32,7 +32,7 @@ impl<'a> Iterator for Lines<'a> { } #[inline] -fn parse_line(mut line: &[u8]) -> Option<(BString, ignore::pattern::Mode)> { +pub(crate) fn parse_line(mut line: &[u8]) -> Option<(BString, ignore::pattern::Mode)> { let mut mode = ignore::pattern::Mode::empty(); if line.is_empty() { return None; diff --git a/git-attributes/tests/parse/attribute.rs b/git-attributes/tests/parse/attribute.rs index aa5f70289fc..1baa49e03be 100644 --- a/git-attributes/tests/parse/attribute.rs +++ b/git-attributes/tests/parse/attribute.rs @@ -1,3 +1,7 @@ +use bstr::{BStr, BString}; +use git_attributes::ignore::pattern::Mode; +use git_attributes::{ignore, parse}; + #[test] #[ignore] fn byte_order_marks_are_no_patterns() { @@ -19,13 +23,41 @@ fn line_endings_can_be_windows_or_unix() { } #[test] -fn comments_are_ignored() { +fn comment_lines_are_ignored() { assert!(git_attributes::parse(b"# hello world").next().is_none()); } +#[test] +#[ignore] +fn comment_cannot_be_escaped_like_gitignore() { + assert_eq!(line(r"\#hello"), (r"\#hello".into(), Mode::empty(), vec![], 0)); +} + #[test] #[ignore] fn backslashes_before_hashes_are_part_of_the_path() { git_attributes::parse(br"\#hello").next(); todo!(); } + +type ExpandedAttribute<'a> = ( + BString, + ignore::pattern::Mode, + Vec<(&'a BStr, git_attributes::State<'a>)>, + usize, +); + +fn line(input: &str) -> ExpandedAttribute { + let mut lines = git_attributes::parse(input.as_bytes()); + let res = expand(lines.next().unwrap()).unwrap(); + assert!(lines.next().is_none(), "expected only one line"); + res +} + +fn expand( + input: Result<(BString, ignore::pattern::Mode, parse::attribute::Iter<'_>, usize), parse::attribute::Error>, +) -> Result, parse::attribute::Error> { + let (pattern, mode, attrs, line_no) = input?; + let attrs = attrs.collect::, _>>()?; + Ok((pattern, mode, attrs, line_no)) +} From 81d2bf2ec5f571245d56eb853306d07ede3010a2 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 24 Mar 2022 12:27:50 +0800 Subject: [PATCH 07/22] all path-related tests are green (#301) --- git-attributes/src/parse/attribute.rs | 13 +++- .../tests/fixtures/attributes/various.txt | 11 ++++ git-attributes/tests/parse/attribute.rs | 64 +++++++++++++++---- git-odb/src/alternate/mod.rs | 2 +- 4 files changed, 74 insertions(+), 16 deletions(-) create mode 100644 git-attributes/tests/fixtures/attributes/various.txt diff --git a/git-attributes/src/parse/attribute.rs b/git-attributes/src/parse/attribute.rs index 6091e6843f2..82471d014a6 100644 --- a/git-attributes/src/parse/attribute.rs +++ b/git-attributes/src/parse/attribute.rs @@ -14,6 +14,7 @@ mod error { } } } +use crate::ignore; pub use error::Error; pub struct Lines<'a> { @@ -60,7 +61,17 @@ impl<'a> Iterator for Lines<'a> { self.line_no += 1; match parse_line(line) { None => continue, - Some(res) => return Some(res.map(|(line, flags, attrs)| (line, flags, attrs, self.line_no))), + Some(Ok((pattern, flags, attrs))) => { + return Some(if flags.contains(ignore::pattern::Mode::NEGATIVE) { + Err(Error::PatternNegation { + line: line.into(), + line_number: self.line_no, + }) + } else { + Ok((pattern, flags, attrs, self.line_no)) + }) + } + Some(Err(err)) => return Some(Err(err)), } } None diff --git a/git-attributes/tests/fixtures/attributes/various.txt b/git-attributes/tests/fixtures/attributes/various.txt new file mode 100644 index 00000000000..7974f710569 --- /dev/null +++ b/git-attributes/tests/fixtures/attributes/various.txt @@ -0,0 +1,11 @@ +# no attribute for now +*.[oa] + +# comment +*.html + +# other comment +\!foo.html + +\#a/path +/* diff --git a/git-attributes/tests/parse/attribute.rs b/git-attributes/tests/parse/attribute.rs index 1baa49e03be..29d25f52e3b 100644 --- a/git-attributes/tests/parse/attribute.rs +++ b/git-attributes/tests/parse/attribute.rs @@ -1,25 +1,38 @@ use bstr::{BStr, BString}; use git_attributes::ignore::pattern::Mode; use git_attributes::{ignore, parse}; +use git_testtools::fixture_path; #[test] -#[ignore] fn byte_order_marks_are_no_patterns() { - git_attributes::parse("\u{feff}hello".as_bytes()).next(); - todo!(); + assert_eq!(line("\u{feff}hello"), (r"hello".into(), Mode::NO_SUB_DIR, vec![], 1)); } #[test] -#[ignore] fn line_numbers_are_counted_correctly() { - todo!() + let ignore = std::fs::read(fixture_path("attributes/various.txt")).unwrap(); + assert_eq!( + try_lines(&String::from_utf8(ignore).unwrap()).unwrap(), + vec![ + (r"*.[oa]".into(), Mode::NO_SUB_DIR, vec![], 2), + (r"*.html".into(), Mode::NO_SUB_DIR | Mode::ENDS_WITH, vec![], 5), + (r"!foo.html".into(), Mode::NO_SUB_DIR, vec![], 8), + (r"#a/path".into(), Mode::empty(), vec![], 10), + (r"/*".into(), Mode::empty(), vec![], 11), + ] + ); } #[test] -#[ignore] fn line_endings_can_be_windows_or_unix() { - let _ = git_attributes::parse(b"unix\nwindows\r\nlast").collect::>(); - todo!() + assert_eq!( + try_lines("unix\nwindows\r\nlast").unwrap(), + vec![ + (r"unix".into(), Mode::NO_SUB_DIR, vec![], 1), + (r"windows".into(), Mode::NO_SUB_DIR, vec![], 2), + (r"last".into(), Mode::NO_SUB_DIR, vec![], 3) + ] + ); } #[test] @@ -28,16 +41,28 @@ fn comment_lines_are_ignored() { } #[test] -#[ignore] -fn comment_cannot_be_escaped_like_gitignore() { - assert_eq!(line(r"\#hello"), (r"\#hello".into(), Mode::empty(), vec![], 0)); +fn comment_can_be_escaped_like_gitignore() { + assert_eq!( + line(r"\#hello"), + (r"#hello".into(), Mode::NO_SUB_DIR, vec![], 1), + "undocumented, but definitely works" + ); +} + +#[test] +fn esclamation_marks_must_be_escaped_or_error_unlike_gitignore() { + assert_eq!(line(r"\!hello"), (r"!hello".into(), Mode::NO_SUB_DIR, vec![], 1)); + assert!(matches!( + try_line(r"!hello"), + Err(parse::attribute::Error::PatternNegation { line_number: 1, .. }) + )); } #[test] #[ignore] -fn backslashes_before_hashes_are_part_of_the_path() { - git_attributes::parse(br"\#hello").next(); - todo!(); +fn attributes_are_parsed_behind_various_whitespace_characters() { + // see https://github.com/git/git/blob/master/attr.c#L280:L280 + todo!() } type ExpandedAttribute<'a> = ( @@ -47,6 +72,13 @@ type ExpandedAttribute<'a> = ( usize, ); +fn try_line(input: &str) -> Result { + let mut lines = git_attributes::parse(input.as_bytes()); + let res = expand(lines.next().unwrap())?; + assert!(lines.next().is_none(), "expected only one line"); + Ok(res) +} + fn line(input: &str) -> ExpandedAttribute { let mut lines = git_attributes::parse(input.as_bytes()); let res = expand(lines.next().unwrap()).unwrap(); @@ -54,6 +86,10 @@ fn line(input: &str) -> ExpandedAttribute { res } +fn try_lines(input: &str) -> Result, parse::attribute::Error> { + git_attributes::parse(input.as_bytes()).map(|l| expand(l)).collect() +} + fn expand( input: Result<(BString, ignore::pattern::Mode, parse::attribute::Iter<'_>, usize), parse::attribute::Error>, ) -> Result, parse::attribute::Error> { diff --git a/git-odb/src/alternate/mod.rs b/git-odb/src/alternate/mod.rs index 1fd6eb95a08..1f0d4ffbcbf 100644 --- a/git-odb/src/alternate/mod.rs +++ b/git-odb/src/alternate/mod.rs @@ -20,7 +20,7 @@ use std::{fs, io, path::PathBuf}; /// pub mod parse; -#[allow(missing_docs)] +/// pub mod unquote; /// Returned by [`resolve()`] From 0d1aaf00160f98e40fb92fd401c67f59da2475fc Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 24 Mar 2022 16:07:16 +0800 Subject: [PATCH 08/22] add empty git-quote crate (#301) --- Cargo.lock | 4 ++++ Cargo.toml | 1 + README.md | 1 + crate-status.md | 6 ++++++ etc/check-package-size.sh | 1 + git-quote/CHANGELOG.md | 10 ++++++++++ git-quote/Cargo.toml | 15 +++++++++++++++ git-quote/src/lib.rs | 1 + 8 files changed, 39 insertions(+) create mode 100644 git-quote/CHANGELOG.md create mode 100644 git-quote/Cargo.toml create mode 100644 git-quote/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index fe6489cdfbd..ac99e989454 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1329,6 +1329,10 @@ dependencies = [ "serde", ] +[[package]] +name = "git-quote" +version = "0.0.0" + [[package]] name = "git-ref" version = "0.12.0" diff --git a/Cargo.toml b/Cargo.toml index 81166c1fcd4..58511e809a4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -133,6 +133,7 @@ members = [ "git-features", "git-commitgraph", "git-chunk", + "git-quote", "git-object", "git-diff", "git-traverse", diff --git a/README.md b/README.md index f2e28f87bb0..a5d5ec4095f 100644 --- a/README.md +++ b/README.md @@ -106,6 +106,7 @@ Follow linked crate name for detailed status. Please note that all crates follow * [git-bitmap](https://github.com/Byron/gitoxide/blob/main/crate-status.md#git-bitmap) * [git-revision](https://github.com/Byron/gitoxide/blob/main/crate-status.md#git-revision) * [git-attributes](https://github.com/Byron/gitoxide/blob/main/crate-status.md#git-attributes) + * [git-quote](https://github.com/Byron/gitoxide/blob/main/crate-status.md#git-quote) * **idea** * [git-pathspec](https://github.com/Byron/gitoxide/blob/main/crate-status.md#git-pathspec) * [git-subomdule](https://github.com/Byron/gitoxide/blob/main/crate-status.md#git-submodule) diff --git a/crate-status.md b/crate-status.md index 2a9d24d3ddc..494598be814 100644 --- a/crate-status.md +++ b/crate-status.md @@ -214,6 +214,12 @@ Check out the [performance discussion][git-traverse-performance] as well. * [ ] parse git-attributes files * [ ] create an attributes stack, ideally one that includes 'ignored' status from .gitignore files. * [ ] support for built-in `binary` macro for `-text -diff -merge` + +### git-quote + +* **ansi-c** + * [x] quote + * [ ] unquote ### git-pathspec diff --git a/etc/check-package-size.sh b/etc/check-package-size.sh index f1147e8707a..008e0776a1a 100755 --- a/etc/check-package-size.sh +++ b/etc/check-package-size.sh @@ -22,6 +22,7 @@ echo "in root: gitoxide CLI" (enter git-attributes && indent cargo diet -n --package-size-limit 10KB) (enter git-index && indent cargo diet -n --package-size-limit 30KB) (enter git-worktree && indent cargo diet -n --package-size-limit 20KB) +(enter git-quote && indent cargo diet -n --package-size-limit 5KB) (enter git-revision && indent cargo diet -n --package-size-limit 10KB) (enter git-bitmap && indent cargo diet -n --package-size-limit 5KB) (enter git-tempfile && indent cargo diet -n --package-size-limit 25KB) diff --git a/git-quote/CHANGELOG.md b/git-quote/CHANGELOG.md new file mode 100644 index 00000000000..ea8f0d10c8a --- /dev/null +++ b/git-quote/CHANGELOG.md @@ -0,0 +1,10 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## Unreleased + +An empty crate without any content to reserve the name for the gitoxide project. diff --git a/git-quote/Cargo.toml b/git-quote/Cargo.toml new file mode 100644 index 00000000000..2a9cd64364f --- /dev/null +++ b/git-quote/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "git-quote" +version = "0.0.0" +repository = "https://github.com/Byron/gitoxide" +license = "MIT/Apache-2.0" +description = "A WIP crate of the gitoxide project dealing with various quotations used by git" +authors = ["Sebastian Thiel "] +edition = "2018" + +[lib] +doctest = false + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] diff --git a/git-quote/src/lib.rs b/git-quote/src/lib.rs new file mode 100644 index 00000000000..d7a83e4f525 --- /dev/null +++ b/git-quote/src/lib.rs @@ -0,0 +1 @@ +#![forbid(unsafe_code, rust_2018_idioms)] From 1be8f14128b673ea3399bc04b0a6747de9d6d404 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 24 Mar 2022 16:19:21 +0800 Subject: [PATCH 09/22] Add ansic::undo (#301) --- Cargo.lock | 5 ++ git-quote/CHANGELOG.md | 2 +- git-quote/Cargo.toml | 3 ++ git-quote/src/ansi_c.rs | 110 +++++++++++++++++++++++++++++++++++++++ git-quote/src/lib.rs | 3 ++ git-quote/tests/quote.rs | 29 +++++++++++ 6 files changed, 151 insertions(+), 1 deletion(-) create mode 100644 git-quote/src/ansi_c.rs create mode 100644 git-quote/tests/quote.rs diff --git a/Cargo.lock b/Cargo.lock index ac99e989454..0e969dfc371 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1332,6 +1332,11 @@ dependencies = [ [[package]] name = "git-quote" version = "0.0.0" +dependencies = [ + "bstr", + "btoi", + "quick-error", +] [[package]] name = "git-ref" diff --git a/git-quote/CHANGELOG.md b/git-quote/CHANGELOG.md index ea8f0d10c8a..cc1a6cca004 100644 --- a/git-quote/CHANGELOG.md +++ b/git-quote/CHANGELOG.md @@ -7,4 +7,4 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased -An empty crate without any content to reserve the name for the gitoxide project. +Initial release with ansi_c unquoting capability. diff --git a/git-quote/Cargo.toml b/git-quote/Cargo.toml index 2a9cd64364f..1ea868d0a95 100644 --- a/git-quote/Cargo.toml +++ b/git-quote/Cargo.toml @@ -13,3 +13,6 @@ doctest = false # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +bstr = { version = "0.2.13", default-features = false, features = ["std"]} +quick-error = "2.0.0" +btoi = "0.4.2" diff --git a/git-quote/src/ansi_c.rs b/git-quote/src/ansi_c.rs new file mode 100644 index 00000000000..b5b5d7f1b80 --- /dev/null +++ b/git-quote/src/ansi_c.rs @@ -0,0 +1,110 @@ +pub mod undo { + use bstr::{BStr, BString}; + use quick_error::quick_error; + + quick_error! { + #[derive(Debug)] + pub enum Error { + InvalidInput { message: String, input: BString } { + display("{}: {:?}", message, input) + } + UnsupportedEscapeByte { byte: u8, input: BString } { + display("Invalid escaped value {} in input {:?}", byte, input) + } + } + } + + impl Error { + pub(crate) fn new(message: impl ToString, input: &BStr) -> Error { + Error::InvalidInput { + message: message.to_string(), + input: input.into(), + } + } + } +} + +use std::{borrow::Cow, io::Read}; + +use bstr::{BStr, BString, ByteSlice}; + +/// Unquote the given ansi-c quoted `input` string. +/// +/// The `input` is returned unaltered if it doesn't start with a `"` character to indicate +/// quotation, otherwise a new unqoted string will always be allocated. +/// +/// See [the tests][tests] for quotation examples. +/// +/// [tests]: https://github.com/Byron/gitoxide/blob/e355b4ad133075152312816816af5ce72cf79cff/git-odb/src/alternate/unquote.rs#L110-L118 +pub fn undo(input: &BStr) -> Result, undo::Error> { + if !input.starts_with(b"\"") { + return Ok(input.into()); + } + if input.len() < 2 { + return Err(undo::Error::new("Input must be surrounded by double quotes", input)); + } + let original = input.as_bstr(); + let mut input = &input[1..]; + let mut out = BString::default(); + fn consume_one_past(input: &mut &BStr, position: usize) -> Result { + *input = input + .get(position + 1..) + .ok_or_else(|| undo::Error::new("Unexpected end of input", input))? + .as_bstr(); + let next = input[0]; + *input = input.get(1..).unwrap_or_default().as_bstr(); + Ok(next) + } + loop { + match input.find_byteset(b"\"\\") { + Some(position) => { + out.extend_from_slice(&input[..position]); + match input[position] { + b'"' => break, + b'\\' => { + let next = consume_one_past(&mut input, position)?; + match next { + b'n' => out.push(b'\n'), + b'r' => out.push(b'\r'), + b't' => out.push(b'\t'), + b'a' => out.push(7), + b'b' => out.push(8), + b'v' => out.push(0xb), + b'f' => out.push(0xc), + b'"' => out.push(b'"'), + b'\\' => out.push(b'\\'), + b'0' | b'1' | b'2' | b'3' => { + let mut buf = [next; 3]; + input + .get(..2) + .ok_or_else(|| { + undo::Error::new( + "Unexpected end of input when fetching two more octal bytes", + input, + ) + })? + .read_exact(&mut buf[1..]) + .expect("impossible to fail as numbers match"); + let byte = btoi::btou_radix(&buf, 8).map_err(|e| undo::Error::new(e, original))?; + out.push(byte); + input = &input[2..]; + } + _ => { + return Err(undo::Error::UnsupportedEscapeByte { + byte: next, + input: original.into(), + }) + } + } + } + _ => unreachable!("cannot find character that we didn't search for"), + } + } + None => { + out.extend_from_slice(input); + break; + } + } + } + Ok(out.into()) +} diff --git a/git-quote/src/lib.rs b/git-quote/src/lib.rs index d7a83e4f525..d2d266b56e0 100644 --- a/git-quote/src/lib.rs +++ b/git-quote/src/lib.rs @@ -1 +1,4 @@ #![forbid(unsafe_code, rust_2018_idioms)] + +/// +pub mod ansi_c; diff --git a/git-quote/tests/quote.rs b/git-quote/tests/quote.rs new file mode 100644 index 00000000000..b4db2166e5d --- /dev/null +++ b/git-quote/tests/quote.rs @@ -0,0 +1,29 @@ +mod ansi_c { + mod undo { + use bstr::ByteSlice; + + macro_rules! test { + ($name:ident, $input:literal, $expected:literal) => { + #[test] + fn $name() { + assert_eq!( + git_quote::ansi_c::undo($input.as_bytes().as_bstr()).expect("valid input"), + std::borrow::Cow::Borrowed($expected.as_bytes().as_bstr()) + ); + } + }; + } + + test!(unquoted_remains_unchanged, "hello", "hello"); + test!(empty_surrounded_by_quotes, "\"\"", ""); + test!(surrounded_only_by_quotes, "\"hello\"", "hello"); + test!(typical_escapes, r#""\n\r\t""#, b"\n\r\t"); + test!(untypical_escapes, r#""\a\b\f\v""#, b"\x07\x08\x0c\x0b"); + test!(literal_escape_and_double_quote, r#""\"\\""#, br#""\"#); + test!( + unicode_byte_escapes_by_number, + r#""\346\277\261\351\207\216\t\347\264\224""#, + "濱野\t純" + ); + } +} From 8e49aa6090c1c361e3ddd44798754c44c179ab49 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 24 Mar 2022 16:22:34 +0800 Subject: [PATCH 10/22] use git-quote crate in git-odb alternate parsing (#301) --- Cargo.lock | 4 +- git-odb/Cargo.toml | 2 +- git-odb/src/alternate/mod.rs | 2 - git-odb/src/alternate/parse.rs | 6 +- git-odb/src/alternate/unquote.rs | 129 ------------------------------- git-quote/Cargo.toml | 2 +- 6 files changed, 6 insertions(+), 139 deletions(-) delete mode 100644 git-odb/src/alternate/unquote.rs diff --git a/Cargo.lock b/Cargo.lock index 0e969dfc371..7b1517d2aac 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1241,13 +1241,13 @@ name = "git-odb" version = "0.27.0" dependencies = [ "arc-swap", - "btoi", "filetime", "git-actor", "git-features", "git-hash", "git-object", "git-pack", + "git-quote", "git-testtools", "parking_lot 0.12.0", "pretty_assertions", @@ -1331,7 +1331,7 @@ dependencies = [ [[package]] name = "git-quote" -version = "0.0.0" +version = "0.1.0" dependencies = [ "bstr", "btoi", diff --git a/git-odb/Cargo.toml b/git-odb/Cargo.toml index 67981c6d82d..fb053eade2e 100644 --- a/git-odb/Cargo.toml +++ b/git-odb/Cargo.toml @@ -32,11 +32,11 @@ all-features = true [dependencies] git-features = { version = "^0.19.1", path = "../git-features", features = ["rustsha1", "walkdir", "zlib", "crc32", "bstr"] } git-hash = { version = "^0.9.2", path = "../git-hash" } +git-quote = { version = "^0.1.0", path = "../git-quote" } git-object = { version = "^0.17.1", path = "../git-object" } git-pack = { version = "^0.17.0", path = "../git-pack" } serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"]} -btoi = "0.4.2" tempfile = "3.1.0" thiserror = "1.0.26" parking_lot = { version = "0.12.0" } diff --git a/git-odb/src/alternate/mod.rs b/git-odb/src/alternate/mod.rs index 1f0d4ffbcbf..ffcbb16a9c2 100644 --- a/git-odb/src/alternate/mod.rs +++ b/git-odb/src/alternate/mod.rs @@ -20,8 +20,6 @@ use std::{fs, io, path::PathBuf}; /// pub mod parse; -/// -pub mod unquote; /// Returned by [`resolve()`] #[derive(thiserror::Error, Debug)] diff --git a/git-odb/src/alternate/parse.rs b/git-odb/src/alternate/parse.rs index 4f134c28a29..fd4e4fe7f32 100644 --- a/git-odb/src/alternate/parse.rs +++ b/git-odb/src/alternate/parse.rs @@ -2,8 +2,6 @@ use std::{borrow::Cow, path::PathBuf}; use git_object::bstr::ByteSlice; -use crate::alternate::unquote; - /// Returned as part of [`crate::alternate::Error::Parse`] #[derive(thiserror::Error, Debug)] #[allow(missing_docs)] @@ -11,7 +9,7 @@ pub enum Error { #[error("Could not obtain an object path for the alternate directory '{}'", String::from_utf8_lossy(.0))] PathConversion(Vec), #[error("Could not unquote alternate path")] - Unquote(#[from] unquote::Error), + Unquote(#[from] git_quote::ansi_c::undo::Error), } pub(crate) fn content(input: &[u8]) -> Result, Error> { @@ -23,7 +21,7 @@ pub(crate) fn content(input: &[u8]) -> Result, Error> { } out.push( git_features::path::from_bstr(if line.starts_with(b"\"") { - unquote::ansi_c(line)? + git_quote::ansi_c::undo(line)? } else { Cow::Borrowed(line) }) diff --git a/git-odb/src/alternate/unquote.rs b/git-odb/src/alternate/unquote.rs deleted file mode 100644 index 8c3e84e42aa..00000000000 --- a/git-odb/src/alternate/unquote.rs +++ /dev/null @@ -1,129 +0,0 @@ -use std::{borrow::Cow, io::Read}; - -use git_object::bstr::{BStr, BString, ByteSlice}; - -#[derive(thiserror::Error, Debug)] -pub enum Error { - #[error("{message}: {:?}", String::from_utf8_lossy(.input))] - InvalidInput { message: String, input: Vec }, - #[error("Invalid escaped value {byte} in input {:?}", String::from_utf8_lossy(.input))] - UnsupportedEscapeByte { byte: u8, input: Vec }, -} - -impl Error { - fn new(message: impl ToString, input: &BStr) -> Error { - Error::InvalidInput { - message: message.to_string(), - input: input.to_vec(), - } - } -} - -/// Unquote the given ansi-c quoted `input` string. -/// -/// The `input` is returned unaltered if it doesn't start with a `"` character to indicate -/// quotation, otherwise a new unqoted string will always be allocated. -/// -/// See [the tests][tests] for quotation examples. -/// -/// [tests]: https://github.com/Byron/gitoxide/blob/e355b4ad133075152312816816af5ce72cf79cff/git-odb/src/alternate/unquote.rs#L110-L118 -pub fn ansi_c(input: &BStr) -> Result, Error> { - if !input.starts_with(b"\"") { - return Ok(input.into()); - } - if input.len() < 2 { - return Err(Error::new("Input must be surrounded by double quotes", input)); - } - let original = input.as_bstr(); - let mut input = &input[1..]; - let mut out = BString::default(); - fn consume_one_past(input: &mut &BStr, position: usize) -> Result { - *input = input - .get(position + 1..) - .ok_or_else(|| Error::new("Unexpected end of input", input))? - .as_bstr(); - let next = input[0]; - *input = input.get(1..).unwrap_or_default().as_bstr(); - Ok(next) - } - loop { - match input.find_byteset(b"\"\\") { - Some(position) => { - out.extend_from_slice(&input[..position]); - match input[position] { - b'"' => break, - b'\\' => { - let next = consume_one_past(&mut input, position)?; - match next { - b'n' => out.push(b'\n'), - b'r' => out.push(b'\r'), - b't' => out.push(b'\t'), - b'a' => out.push(7), - b'b' => out.push(8), - b'v' => out.push(0xb), - b'f' => out.push(0xc), - b'"' => out.push(b'"'), - b'\\' => out.push(b'\\'), - b'0' | b'1' | b'2' | b'3' => { - let mut buf = [next; 3]; - input - .get(..2) - .ok_or_else(|| { - Error::new("Unexpected end of input when fetching two more octal bytes", input) - })? - .read_exact(&mut buf[1..]) - .expect("impossible to fail as numbers match"); - let byte = btoi::btou_radix(&buf, 8).map_err(|e| Error::new(e, original))?; - out.push(byte); - input = &input[2..]; - } - _ => { - return Err(Error::UnsupportedEscapeByte { - byte: next, - input: original.to_vec(), - }) - } - } - } - _ => unreachable!("cannot find character that we didn't search for"), - } - } - None => { - out.extend_from_slice(input); - break; - } - } - } - Ok(out.into()) -} - -#[cfg(test)] -mod tests { - use git_object::bstr::ByteSlice; - - use super::*; - - macro_rules! test { - ($name:ident, $input:literal, $expected:literal) => { - #[test] - fn $name() { - assert_eq!( - ansi_c($input.as_bytes().as_bstr()).expect("valid input"), - std::borrow::Cow::Borrowed($expected.as_bytes().as_bstr()) - ); - } - }; - } - - test!(unquoted_remains_unchanged, "hello", "hello"); - test!(empty_surrounded_by_quotes, "\"\"", ""); - test!(surrounded_only_by_quotes, "\"hello\"", "hello"); - test!(typical_escapes, r#""\n\r\t""#, b"\n\r\t"); - test!(untypical_escapes, r#""\a\b\f\v""#, b"\x07\x08\x0c\x0b"); - test!(literal_escape_and_double_quote, r#""\"\\""#, br#""\"#); - test!( - unicode_byte_escapes_by_number, - r#""\346\277\261\351\207\216\t\347\264\224""#, - "濱野\t純" - ); -} diff --git a/git-quote/Cargo.toml b/git-quote/Cargo.toml index 1ea868d0a95..8547a6578b5 100644 --- a/git-quote/Cargo.toml +++ b/git-quote/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "git-quote" -version = "0.0.0" +version = "0.1.0" repository = "https://github.com/Byron/gitoxide" license = "MIT/Apache-2.0" description = "A WIP crate of the gitoxide project dealing with various quotations used by git" From a8f6c4d9e039be7fe82899ed281edb37e17e2a77 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 24 Mar 2022 16:23:03 +0800 Subject: [PATCH 11/22] Release git-quote v0.1.0 --- git-quote/CHANGELOG.md | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/git-quote/CHANGELOG.md b/git-quote/CHANGELOG.md index cc1a6cca004..51e396c6aa0 100644 --- a/git-quote/CHANGELOG.md +++ b/git-quote/CHANGELOG.md @@ -5,6 +5,27 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## Unreleased +## 0.1.0 (2022-03-24) Initial release with ansi_c unquoting capability. + +### Commit Statistics + + + + - 3 commits contributed to the release. + - 0 commits where understood as [conventional](https://www.conventionalcommits.org). + - 1 unique issue was worked on: [#301](https://github.com/Byron/gitoxide/issues/301) + +### Commit Details + + + +
view details + + * **[#301](https://github.com/Byron/gitoxide/issues/301)** + - use git-quote crate in git-odb alternate parsing ([`8e49aa6`](https://github.com/Byron/gitoxide/commit/8e49aa6090c1c361e3ddd44798754c44c179ab49)) + - Add ansic::undo ([`1be8f14`](https://github.com/Byron/gitoxide/commit/1be8f14128b673ea3399bc04b0a6747de9d6d404)) + - add empty git-quote crate ([`0d1aaf0`](https://github.com/Byron/gitoxide/commit/0d1aaf00160f98e40fb92fd401c67f59da2475fc)) +
+ From 32b063477bc12b6b823de3dc390c3dd51012ba20 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 24 Mar 2022 16:24:18 +0800 Subject: [PATCH 12/22] thanks clippy --- git-attributes/tests/parse/attribute.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/git-attributes/tests/parse/attribute.rs b/git-attributes/tests/parse/attribute.rs index 29d25f52e3b..4ec31791aa1 100644 --- a/git-attributes/tests/parse/attribute.rs +++ b/git-attributes/tests/parse/attribute.rs @@ -87,7 +87,7 @@ fn line(input: &str) -> ExpandedAttribute { } fn try_lines(input: &str) -> Result, parse::attribute::Error> { - git_attributes::parse(input.as_bytes()).map(|l| expand(l)).collect() + git_attributes::parse(input.as_bytes()).map(expand).collect() } fn expand( From 22c776badd1ea26a2b1ece84fd8c551784c72212 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 24 Mar 2022 17:58:12 +0800 Subject: [PATCH 13/22] validate out-of-quote portions can be passed (#301) --- git-quote/tests/quote.rs | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/git-quote/tests/quote.rs b/git-quote/tests/quote.rs index b4db2166e5d..20ee11f8959 100644 --- a/git-quote/tests/quote.rs +++ b/git-quote/tests/quote.rs @@ -1,13 +1,14 @@ mod ansi_c { mod undo { use bstr::ByteSlice; + use git_quote::ansi_c; macro_rules! test { ($name:ident, $input:literal, $expected:literal) => { #[test] fn $name() { assert_eq!( - git_quote::ansi_c::undo($input.as_bytes().as_bstr()).expect("valid input"), + ansi_c::undo($input.as_bytes().as_bstr()).expect("valid input"), std::borrow::Cow::Borrowed($expected.as_bytes().as_bstr()) ); } @@ -25,5 +26,18 @@ mod ansi_c { r#""\346\277\261\351\207\216\t\347\264\224""#, "濱野\t純" ); + test!( + exclamation_and_tilde_survive_an_escape_with_double_escaping, + r#""\\!\\#hello there/file.ext""#, + r"\!\#hello there/file.ext" + ); + + #[test] + fn out_of_quote_characters_can_be_passed_and_will_not_be_consumed() { + assert_eq!( + ansi_c::undo(br#""hello there" out of quote"#.as_bstr()).expect("valid input"), + std::borrow::Cow::Borrowed(b"hello there".as_bstr()) + ); + } } } From a052d79674ccfe8693994150ccbe965792579491 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 24 Mar 2022 18:21:05 +0800 Subject: [PATCH 14/22] feat!: `ansi_c::unquote()` returns the amount of consumed bytes. (#301) That way it's possible to continue parsing past the quoted string. --- git-quote/src/ansi_c.rs | 16 +++++++++++----- git-quote/tests/quote.rs | 33 +++++++++++++++++++-------------- 2 files changed, 30 insertions(+), 19 deletions(-) diff --git a/git-quote/src/ansi_c.rs b/git-quote/src/ansi_c.rs index b5b5d7f1b80..d91f93b280e 100644 --- a/git-quote/src/ansi_c.rs +++ b/git-quote/src/ansi_c.rs @@ -28,23 +28,25 @@ use std::{borrow::Cow, io::Read}; use bstr::{BStr, BString, ByteSlice}; -/// Unquote the given ansi-c quoted `input` string. +/// Unquote the given ansi-c quoted `input` string, returning it and all of the consumed bytes. /// /// The `input` is returned unaltered if it doesn't start with a `"` character to indicate -/// quotation, otherwise a new unqoted string will always be allocated. +/// quotation, otherwise a new unquoted string will always be allocated. +/// The amount of consumed bytes allow to pass strings that start with a quote, and skip all quoted text for additional processing /// /// See [the tests][tests] for quotation examples. /// /// [tests]: https://github.com/Byron/gitoxide/blob/e355b4ad133075152312816816af5ce72cf79cff/git-odb/src/alternate/unquote.rs#L110-L118 -pub fn undo(input: &BStr) -> Result, undo::Error> { +pub fn undo(input: &BStr) -> Result<(Cow<'_, BStr>, usize), undo::Error> { if !input.starts_with(b"\"") { - return Ok(input.into()); + return Ok((input.into(), input.len())); } if input.len() < 2 { return Err(undo::Error::new("Input must be surrounded by double quotes", input)); } let original = input.as_bstr(); let mut input = &input[1..]; + let mut consumed = 1; let mut out = BString::default(); fn consume_one_past(input: &mut &BStr, position: usize) -> Result { *input = input @@ -59,10 +61,12 @@ pub fn undo(input: &BStr) -> Result, undo::Error> { match input.find_byteset(b"\"\\") { Some(position) => { out.extend_from_slice(&input[..position]); + consumed += position + 1; match input[position] { b'"' => break, b'\\' => { let next = consume_one_past(&mut input, position)?; + consumed += 1; match next { b'n' => out.push(b'\n'), b'r' => out.push(b'\r'), @@ -88,6 +92,7 @@ pub fn undo(input: &BStr) -> Result, undo::Error> { let byte = btoi::btou_radix(&buf, 8).map_err(|e| undo::Error::new(e, original))?; out.push(byte); input = &input[2..]; + consumed += 2; } _ => { return Err(undo::Error::UnsupportedEscapeByte { @@ -102,9 +107,10 @@ pub fn undo(input: &BStr) -> Result, undo::Error> { } None => { out.extend_from_slice(input); + consumed += input.len(); break; } } } - Ok(out.into()) + Ok((out.into(), consumed)) } diff --git a/git-quote/tests/quote.rs b/git-quote/tests/quote.rs index 20ee11f8959..370f5baa1d5 100644 --- a/git-quote/tests/quote.rs +++ b/git-quote/tests/quote.rs @@ -4,40 +4,45 @@ mod ansi_c { use git_quote::ansi_c; macro_rules! test { - ($name:ident, $input:literal, $expected:literal) => { + ($name:ident, $input:literal, $expected:literal, $consumed:literal) => { #[test] fn $name() { assert_eq!( ansi_c::undo($input.as_bytes().as_bstr()).expect("valid input"), - std::borrow::Cow::Borrowed($expected.as_bytes().as_bstr()) + ( + std::borrow::Cow::Borrowed($expected.as_bytes().as_bstr()), + $consumed + ) ); } }; } - test!(unquoted_remains_unchanged, "hello", "hello"); - test!(empty_surrounded_by_quotes, "\"\"", ""); - test!(surrounded_only_by_quotes, "\"hello\"", "hello"); - test!(typical_escapes, r#""\n\r\t""#, b"\n\r\t"); - test!(untypical_escapes, r#""\a\b\f\v""#, b"\x07\x08\x0c\x0b"); - test!(literal_escape_and_double_quote, r#""\"\\""#, br#""\"#); + test!(unquoted_remains_unchanged, "hello", "hello", 5); + test!(empty_surrounded_by_quotes, "\"\"", "", 2); + test!(surrounded_only_by_quotes, "\"hello\"", "hello", 7); + test!(typical_escapes, r#""\n\r\t""#, b"\n\r\t", 8); + test!(untypical_escapes, r#""\a\b\f\v""#, b"\x07\x08\x0c\x0b", 10); + test!(literal_escape_and_double_quote, r#""\"\\""#, br#""\"#, 6); test!( unicode_byte_escapes_by_number, r#""\346\277\261\351\207\216\t\347\264\224""#, - "濱野\t純" + "濱野\t純", + 40 ); test!( exclamation_and_tilde_survive_an_escape_with_double_escaping, r#""\\!\\#hello there/file.ext""#, - r"\!\#hello there/file.ext" + r"\!\#hello there/file.ext", + 28 ); #[test] fn out_of_quote_characters_can_be_passed_and_will_not_be_consumed() { - assert_eq!( - ansi_c::undo(br#""hello there" out of quote"#.as_bstr()).expect("valid input"), - std::borrow::Cow::Borrowed(b"hello there".as_bstr()) - ); + let input = br#""hello there" out of quote"#.as_bstr(); + let (unquoted, consumed) = ansi_c::undo(input).expect("valid input"); + assert_eq!(unquoted, std::borrow::Cow::Borrowed(b"hello there".as_bstr())); + assert_eq!(&input[consumed..], " out of quote"); } } } From ba486297114154c334e0c38cd883504608973f3c Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 24 Mar 2022 18:22:03 +0800 Subject: [PATCH 15/22] adapt to changes in git-quote (#301) --- git-odb/src/alternate/parse.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/git-odb/src/alternate/parse.rs b/git-odb/src/alternate/parse.rs index fd4e4fe7f32..a244076fdcb 100644 --- a/git-odb/src/alternate/parse.rs +++ b/git-odb/src/alternate/parse.rs @@ -21,7 +21,7 @@ pub(crate) fn content(input: &[u8]) -> Result, Error> { } out.push( git_features::path::from_bstr(if line.starts_with(b"\"") { - git_quote::ansi_c::undo(line)? + git_quote::ansi_c::undo(line)?.0 } else { Cow::Borrowed(line) }) From 8ec7b30f6bfaab8273c1007f16a7a1375fe46239 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 24 Mar 2022 18:39:06 +0800 Subject: [PATCH 16/22] A first stab at unquoting ansi_c style patterns (#301) --- Cargo.lock | 1 + git-attributes/Cargo.toml | 2 ++ git-attributes/src/parse/attribute.rs | 20 +++++++++++++++----- git-attributes/tests/parse/attribute.rs | 4 ++++ 4 files changed, 22 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7b1517d2aac..2e0f5c7719c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1077,6 +1077,7 @@ version = "0.0.0" dependencies = [ "bitflags", "bstr", + "git-quote", "git-testtools", "quick-error", "serde", diff --git a/git-attributes/Cargo.toml b/git-attributes/Cargo.toml index 9bed5fa03fc..cb743b74d2e 100644 --- a/git-attributes/Cargo.toml +++ b/git-attributes/Cargo.toml @@ -17,6 +17,8 @@ serde1 = ["serde", "bstr/serde1"] # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +git-quote = { version = "^0.1.0", path = "../git-quote" } + bstr = { version = "0.2.13", default-features = false, features = ["std"]} bitflags = "1.3.2" unicode-bom = "1.1.4" diff --git a/git-attributes/src/parse/attribute.rs b/git-attributes/src/parse/attribute.rs index 82471d014a6..82414943526 100644 --- a/git-attributes/src/parse/attribute.rs +++ b/git-attributes/src/parse/attribute.rs @@ -11,6 +11,11 @@ mod error { PatternNegation { line_number: usize, line: BString } { display("Line {} has a negative pattern, for literal characters use \\!: {}", line_number, line) } + Unquote(err: git_quote::ansi_c::undo::Error) { + display("Could not unquote attributes line") + from() + source(err) + } } } } @@ -27,9 +32,9 @@ pub struct Iter<'a> { } impl<'a> Iter<'a> { - pub fn new(attrs: &'a [u8]) -> Self { + pub fn new(attrs: &'a BStr) -> Self { Iter { - attrs: attrs.as_bstr().split_str(b" "), + attrs: attrs.split_str(b" "), } } } @@ -83,13 +88,18 @@ fn parse_line(line: &[u8]) -> Option, _) = if line.starts_with(b"\"") { - todo!("unquote, need length of consumed bytes to know where attrs start") + let (unquoted, consumed) = match git_quote::ansi_c::undo(line) { + Ok(res) => res, + Err(err) => return Some(Err(err.into())), + }; + (unquoted, &line[consumed..]) } else { let mut tokens = line.splitn(2, |n| *n == b' '); ( - tokens.next().expect("at least a line").into(), - tokens.next().unwrap_or_default(), + tokens.next().expect("at least a line").as_bstr().into(), + tokens.next().unwrap_or_default().as_bstr(), ) }; diff --git a/git-attributes/tests/parse/attribute.rs b/git-attributes/tests/parse/attribute.rs index 4ec31791aa1..437156d78fe 100644 --- a/git-attributes/tests/parse/attribute.rs +++ b/git-attributes/tests/parse/attribute.rs @@ -6,6 +6,10 @@ use git_testtools::fixture_path; #[test] fn byte_order_marks_are_no_patterns() { assert_eq!(line("\u{feff}hello"), (r"hello".into(), Mode::NO_SUB_DIR, vec![], 1)); + assert_eq!( + line("\u{feff}\"hello\""), + (r"hello".into(), Mode::NO_SUB_DIR, vec![], 1) + ); } #[test] From 93bf1189902f3a6bff3ea5922bf62006b983e5b5 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 25 Mar 2022 09:44:17 +0800 Subject: [PATCH 17/22] add quote tests (#301) --- git-attributes/src/parse/attribute.rs | 3 +++ git-attributes/src/parse/ignore.rs | 7 ++++--- git-attributes/tests/parse/attribute.rs | 26 +++++++++++++++++++++++-- 3 files changed, 31 insertions(+), 5 deletions(-) diff --git a/git-attributes/src/parse/attribute.rs b/git-attributes/src/parse/attribute.rs index 82414943526..fbed928d73d 100644 --- a/git-attributes/src/parse/attribute.rs +++ b/git-attributes/src/parse/attribute.rs @@ -64,6 +64,9 @@ impl<'a> Iterator for Lines<'a> { fn next(&mut self) -> Option { for line in self.lines.by_ref() { self.line_no += 1; + if line.first() == Some(&b'#') { + continue; + } match parse_line(line) { None => continue, Some(Ok((pattern, flags, attrs))) => { diff --git a/git-attributes/src/parse/ignore.rs b/git-attributes/src/parse/ignore.rs index 96298798c4d..fde352d7342 100644 --- a/git-attributes/src/parse/ignore.rs +++ b/git-attributes/src/parse/ignore.rs @@ -22,6 +22,9 @@ impl<'a> Iterator for Lines<'a> { fn next(&mut self) -> Option { for line in self.lines.by_ref() { self.line_no += 1; + if line.first() == Some(&b'#') { + continue; + } match parse_line(line) { None => continue, Some((line, flags)) => return Some((line, flags, self.line_no)), @@ -37,9 +40,7 @@ pub(crate) fn parse_line(mut line: &[u8]) -> Option<(BString, ignore::pattern::M if line.is_empty() { return None; }; - if line.first() == Some(&b'#') { - return None; - } else if line.first() == Some(&b'!') { + if line.first() == Some(&b'!') { mode |= ignore::pattern::Mode::NEGATIVE; line = &line[1..]; } else if line.first() == Some(&b'\\') { diff --git a/git-attributes/tests/parse/attribute.rs b/git-attributes/tests/parse/attribute.rs index 437156d78fe..034dd01fc94 100644 --- a/git-attributes/tests/parse/attribute.rs +++ b/git-attributes/tests/parse/attribute.rs @@ -42,15 +42,17 @@ fn line_endings_can_be_windows_or_unix() { #[test] fn comment_lines_are_ignored() { assert!(git_attributes::parse(b"# hello world").next().is_none()); + assert!(git_attributes::parse(b"# \"hello world\"").next().is_none()); } #[test] -fn comment_can_be_escaped_like_gitignore() { +fn comment_can_be_escaped_like_gitignore_or_quoted() { assert_eq!( line(r"\#hello"), (r"#hello".into(), Mode::NO_SUB_DIR, vec![], 1), "undocumented, but definitely works" ); + assert_eq!(line("\"# hello\""), (r"# hello".into(), Mode::NO_SUB_DIR, vec![], 1)); } #[test] @@ -60,6 +62,26 @@ fn esclamation_marks_must_be_escaped_or_error_unlike_gitignore() { try_line(r"!hello"), Err(parse::attribute::Error::PatternNegation { line_number: 1, .. }) )); + assert!( + matches!( + try_line(r#""!hello""#), + Err(parse::attribute::Error::PatternNegation { line_number: 1, .. }), + ), + "even in quotes they trigger…" + ); + assert_eq!( + line(r#""\\!hello""#), + (r"!hello".into(), Mode::NO_SUB_DIR, vec![], 1), + "…and must be escaped" + ); +} + +#[test] +fn invalid_escapes_in_quotes_are_an_error() { + assert!(matches!( + try_line(r#""\!hello""#), + Err(parse::attribute::Error::Unquote(_)), + ),); } #[test] @@ -85,7 +107,7 @@ fn try_line(input: &str) -> Result { fn line(input: &str) -> ExpandedAttribute { let mut lines = git_attributes::parse(input.as_bytes()); - let res = expand(lines.next().unwrap()).unwrap(); + let res = expand(lines.next().expect("single line")).unwrap(); assert!(lines.next().is_none(), "expected only one line"); res } From 3409a66a0b8f279d5c10ef4a948824e7809394da Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 25 Mar 2022 10:33:21 +0800 Subject: [PATCH 18/22] very basic parsing of attributes (#301) --- git-attributes/src/parse/attribute.rs | 28 ++++++----- git-attributes/tests/parse/attribute.rs | 65 ++++++++++++++++++++++--- 2 files changed, 74 insertions(+), 19 deletions(-) diff --git a/git-attributes/src/parse/attribute.rs b/git-attributes/src/parse/attribute.rs index fbed928d73d..819e9c7f387 100644 --- a/git-attributes/src/parse/attribute.rs +++ b/git-attributes/src/parse/attribute.rs @@ -28,14 +28,12 @@ pub struct Lines<'a> { } pub struct Iter<'a> { - attrs: bstr::Split<'a>, + attrs: bstr::Fields<'a>, } impl<'a> Iter<'a> { pub fn new(attrs: &'a BStr) -> Self { - Iter { - attrs: attrs.split_str(b" "), - } + Iter { attrs: attrs.fields() } } } @@ -43,8 +41,8 @@ impl<'a> Iterator for Iter<'a> { type Item = Result<(&'a BStr, crate::State<'a>), Error>; fn next(&mut self) -> Option { - let _attr = self.attrs.next().filter(|a| !a.is_empty())?; - todo!("parse attribute") + let attr = self.attrs.next().filter(|a| !a.is_empty())?; + Some(Ok((attr.as_bstr(), crate::State::Set))) } } @@ -64,6 +62,7 @@ impl<'a> Iterator for Lines<'a> { fn next(&mut self) -> Option { for line in self.lines.by_ref() { self.line_no += 1; + let line = skip_blanks(line.into()); if line.first() == Some(&b'#') { continue; } @@ -86,12 +85,11 @@ impl<'a> Iterator for Lines<'a> { } } -fn parse_line(line: &[u8]) -> Option), Error>> { +fn parse_line(line: &BStr) -> Option), Error>> { if line.is_empty() { return None; } - let line = line.as_bstr(); let (line, attrs): (Cow<'_, _>, _) = if line.starts_with(b"\"") { let (unquoted, consumed) = match git_quote::ansi_c::undo(line) { Ok(res) => res, @@ -99,13 +97,17 @@ fn parse_line(line: &[u8]) -> Option &BStr { + line.find_not_byteset(BLANKS).map(|pos| &line[pos..]).unwrap_or(line) +} diff --git a/git-attributes/tests/parse/attribute.rs b/git-attributes/tests/parse/attribute.rs index 034dd01fc94..39991109707 100644 --- a/git-attributes/tests/parse/attribute.rs +++ b/git-attributes/tests/parse/attribute.rs @@ -1,6 +1,6 @@ -use bstr::{BStr, BString}; +use bstr::{BStr, BString, ByteSlice}; use git_attributes::ignore::pattern::Mode; -use git_attributes::{ignore, parse}; +use git_attributes::{ignore, parse, State}; use git_testtools::fixture_path; #[test] @@ -43,6 +43,16 @@ fn line_endings_can_be_windows_or_unix() { fn comment_lines_are_ignored() { assert!(git_attributes::parse(b"# hello world").next().is_none()); assert!(git_attributes::parse(b"# \"hello world\"").next().is_none()); + assert!( + git_attributes::parse(b" \t\r# \"hello world\"").next().is_none(), + "also behind leading whitespace" + ); +} + +#[test] +fn leading_whitespace_is_ignored() { + assert_eq!(line(" \r\tp"), (r"p".into(), Mode::NO_SUB_DIR, vec![], 1)); + assert_eq!(line(" \r\t\"p\""), (r"p".into(), Mode::NO_SUB_DIR, vec![], 1)); } #[test] @@ -56,7 +66,7 @@ fn comment_can_be_escaped_like_gitignore_or_quoted() { } #[test] -fn esclamation_marks_must_be_escaped_or_error_unlike_gitignore() { +fn exclamation_marks_must_be_escaped_or_error_unlike_gitignore() { assert_eq!(line(r"\!hello"), (r"!hello".into(), Mode::NO_SUB_DIR, vec![], 1)); assert!(matches!( try_line(r"!hello"), @@ -72,7 +82,7 @@ fn esclamation_marks_must_be_escaped_or_error_unlike_gitignore() { assert_eq!( line(r#""\\!hello""#), (r"!hello".into(), Mode::NO_SUB_DIR, vec![], 1), - "…and must be escaped" + "…and must be double-escaped, once to get through quote, then to get through parse ignore line" ); } @@ -86,9 +96,48 @@ fn invalid_escapes_in_quotes_are_an_error() { #[test] #[ignore] +fn custom_macros_can_be_defined() { + todo!("name validation, leave rejecting them based on location to the caller") +} + +#[test] fn attributes_are_parsed_behind_various_whitespace_characters() { - // see https://github.com/git/git/blob/master/attr.c#L280:L280 - todo!() + assert_eq!( + line(r#"p a b"#), + ("p".into(), Mode::NO_SUB_DIR, vec![set("a"), set("b")], 1), + "behind space" + ); + assert_eq!( + line(r#""p" a b"#), + ("p".into(), Mode::NO_SUB_DIR, vec![set("a"), set("b")], 1), + "behind space" + ); + assert_eq!( + line("p\ta\tb"), + ("p".into(), Mode::NO_SUB_DIR, vec![set("a"), set("b")], 1), + "behind tab" + ); + assert_eq!( + line("\"p\"\ta\tb"), + ("p".into(), Mode::NO_SUB_DIR, vec![set("a"), set("b")], 1), + "behind tab" + ); + assert_eq!( + line("p \t a \t b"), + ("p".into(), Mode::NO_SUB_DIR, vec![set("a"), set("b")], 1), + "behind a mix of space and tab" + ); + assert_eq!( + line("\"p\" \t a \t b"), + ("p".into(), Mode::NO_SUB_DIR, vec![set("a"), set("b")], 1), + "behind a mix of space and tab" + ); +} + +#[test] +fn trailing_whitespace_in_attributes_is_ignored() { + assert_eq!(line("p a \r\t"), ("p".into(), Mode::NO_SUB_DIR, vec![set("a")], 1),); + assert_eq!(line("\"p\" a \r\t"), ("p".into(), Mode::NO_SUB_DIR, vec![set("a")], 1),); } type ExpandedAttribute<'a> = ( @@ -98,6 +147,10 @@ type ExpandedAttribute<'a> = ( usize, ); +fn set(attr: &str) -> (&BStr, State) { + (attr.as_bytes().as_bstr(), State::Set) +} + fn try_line(input: &str) -> Result { let mut lines = git_attributes::parse(input.as_bytes()); let res = expand(lines.next().unwrap())?; From 96b0fcad1229ad2563e5e628d24289207a165005 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 25 Mar 2022 11:13:50 +0800 Subject: [PATCH 19/22] parse all kinds of attributes, lacking name validation (#301) --- git-attributes/src/parse/attribute.rs | 21 +++++++++- git-attributes/tests/parse/attribute.rs | 53 +++++++++++++++++++++++++ 2 files changed, 73 insertions(+), 1 deletion(-) diff --git a/git-attributes/src/parse/attribute.rs b/git-attributes/src/parse/attribute.rs index 819e9c7f387..5879246e30a 100644 --- a/git-attributes/src/parse/attribute.rs +++ b/git-attributes/src/parse/attribute.rs @@ -42,10 +42,29 @@ impl<'a> Iterator for Iter<'a> { fn next(&mut self) -> Option { let attr = self.attrs.next().filter(|a| !a.is_empty())?; - Some(Ok((attr.as_bstr(), crate::State::Set))) + parse_attr(attr).into() } } +fn parse_attr(attr: &[u8]) -> Result<(&BStr, crate::State<'_>), Error> { + let mut tokens = attr.splitn(2, |b| *b == b'='); + let attr = tokens.next().expect("attr itself").as_bstr(); + let possibly_value = tokens.next(); + let (attr, state) = if attr.first() == Some(&b'-') { + (&attr[1..], crate::State::Unset) + } else if attr.first() == Some(&b'!') { + (&attr[1..], crate::State::Unspecified) + } else { + ( + attr, + possibly_value + .map(|v| crate::State::Value(v.as_bstr())) + .unwrap_or(crate::State::Set), + ) + }; + Ok((attr, state)) +} + impl<'a> Lines<'a> { pub fn new(buf: &'a [u8]) -> Self { let bom = unicode_bom::Bom::from(buf); diff --git a/git-attributes/tests/parse/attribute.rs b/git-attributes/tests/parse/attribute.rs index 39991109707..3bb61d24ac0 100644 --- a/git-attributes/tests/parse/attribute.rs +++ b/git-attributes/tests/parse/attribute.rs @@ -134,6 +134,47 @@ fn attributes_are_parsed_behind_various_whitespace_characters() { ); } +#[test] +fn attributes_come_in_different_flavors_due_to_prefixes() { + assert_eq!( + line(r#"p set -unset !unspecified -set"#), + ( + "p".into(), + Mode::NO_SUB_DIR, + vec![set("set"), unset("unset"), unspecified("unspecified"), unset("set")], + 1 + ), + "the parser doesn't care about double-mentions either" + ); +} + +#[test] +fn attributes_can_have_values() { + assert_eq!( + line(r#"p a=one b=2 c=你好 "#), + ( + "p".into(), + Mode::NO_SUB_DIR, + vec![value("a", "one"), value("b", "2"), value("c", "你好")], + 1 + ), + "only non-whitespace ascii values are allowed, no escaping or anything fancy is possible there" + ); +} + +#[test] +fn attributes_see_state_adjustments_over_value_assignments() { + assert_eq!( + line(r#"p set -unset=a !unspecified=b"#), + ( + "p".into(), + Mode::NO_SUB_DIR, + vec![set("set"), unset("unset"), unspecified("unspecified")], + 1 + ) + ); +} + #[test] fn trailing_whitespace_in_attributes_is_ignored() { assert_eq!(line("p a \r\t"), ("p".into(), Mode::NO_SUB_DIR, vec![set("a")], 1),); @@ -151,6 +192,18 @@ fn set(attr: &str) -> (&BStr, State) { (attr.as_bytes().as_bstr(), State::Set) } +fn unset(attr: &str) -> (&BStr, State) { + (attr.as_bytes().as_bstr(), State::Unset) +} + +fn unspecified(attr: &str) -> (&BStr, State) { + (attr.as_bytes().as_bstr(), State::Unspecified) +} + +fn value<'a, 'b>(attr: &'a str, value: &'b str) -> (&'a BStr, State<'b>) { + (attr.as_bytes().as_bstr(), State::Value(value.as_bytes().as_bstr())) +} + fn try_line(input: &str) -> Result { let mut lines = git_attributes::parse(input.as_bytes()); let res = expand(lines.next().unwrap())?; From 65c416bef3323250d0fb82085049ea68adae8001 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 25 Mar 2022 11:58:28 +0800 Subject: [PATCH 20/22] attribute name validation (#301) --- git-attributes/src/parse/attribute.rs | 77 +++++++++++++++++-------- git-attributes/tests/parse/attribute.rs | 22 +++++++ 2 files changed, 74 insertions(+), 25 deletions(-) diff --git a/git-attributes/src/parse/attribute.rs b/git-attributes/src/parse/attribute.rs index 5879246e30a..00fdfc0b598 100644 --- a/git-attributes/src/parse/attribute.rs +++ b/git-attributes/src/parse/attribute.rs @@ -11,6 +11,9 @@ mod error { PatternNegation { line_number: usize, line: BString } { display("Line {} has a negative pattern, for literal characters use \\!: {}", line_number, line) } + AttributeName { line_number: usize, attribute: BString } { + display("Line {} has non-ascii characters or starts with '-': {}", line_number, attribute) + } Unquote(err: git_quote::ansi_c::undo::Error) { display("Could not unquote attributes line") from() @@ -29,42 +32,63 @@ pub struct Lines<'a> { pub struct Iter<'a> { attrs: bstr::Fields<'a>, + line_no: usize, } impl<'a> Iter<'a> { - pub fn new(attrs: &'a BStr) -> Self { - Iter { attrs: attrs.fields() } + pub fn new(attrs: &'a BStr, line_no: usize) -> Self { + Iter { + attrs: attrs.fields(), + line_no, + } + } + + fn parse_attr(&self, attr: &'a [u8]) -> Result<(&'a BStr, crate::State<'a>), Error> { + let mut tokens = attr.splitn(2, |b| *b == b'='); + let attr = tokens.next().expect("attr itself").as_bstr(); + let possibly_value = tokens.next(); + let (attr, state) = if attr.first() == Some(&b'-') { + (&attr[1..], crate::State::Unset) + } else if attr.first() == Some(&b'!') { + (&attr[1..], crate::State::Unspecified) + } else { + ( + attr, + possibly_value + .map(|v| crate::State::Value(v.as_bstr())) + .unwrap_or(crate::State::Set), + ) + }; + if !attr_valid(attr) { + return Err(Error::AttributeName { + line_number: self.line_no, + attribute: attr.into(), + }); + } + Ok((attr, state)) } } +fn attr_valid(attr: &BStr) -> bool { + if attr.first() == Some(&b'-') { + return false; + } + + attr.bytes().all(|b| match b { + b'-' | b'.' | b'_' | b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' => true, + _ => false, + }) +} + impl<'a> Iterator for Iter<'a> { type Item = Result<(&'a BStr, crate::State<'a>), Error>; fn next(&mut self) -> Option { let attr = self.attrs.next().filter(|a| !a.is_empty())?; - parse_attr(attr).into() + self.parse_attr(attr).into() } } -fn parse_attr(attr: &[u8]) -> Result<(&BStr, crate::State<'_>), Error> { - let mut tokens = attr.splitn(2, |b| *b == b'='); - let attr = tokens.next().expect("attr itself").as_bstr(); - let possibly_value = tokens.next(); - let (attr, state) = if attr.first() == Some(&b'-') { - (&attr[1..], crate::State::Unset) - } else if attr.first() == Some(&b'!') { - (&attr[1..], crate::State::Unspecified) - } else { - ( - attr, - possibly_value - .map(|v| crate::State::Value(v.as_bstr())) - .unwrap_or(crate::State::Set), - ) - }; - Ok((attr, state)) -} - impl<'a> Lines<'a> { pub fn new(buf: &'a [u8]) -> Self { let bom = unicode_bom::Bom::from(buf); @@ -85,7 +109,7 @@ impl<'a> Iterator for Lines<'a> { if line.first() == Some(&b'#') { continue; } - match parse_line(line) { + match parse_line(line, self.line_no) { None => continue, Some(Ok((pattern, flags, attrs))) => { return Some(if flags.contains(ignore::pattern::Mode::NEGATIVE) { @@ -104,7 +128,10 @@ impl<'a> Iterator for Lines<'a> { } } -fn parse_line(line: &BStr) -> Option), Error>> { +fn parse_line( + line: &BStr, + line_number: usize, +) -> Option), Error>> { if line.is_empty() { return None; } @@ -122,7 +149,7 @@ fn parse_line(line: &BStr) -> Option Date: Fri, 25 Mar 2022 12:12:03 +0800 Subject: [PATCH 21/22] prepare for macro support (#301) --- git-attributes/src/lib.rs | 4 +- git-attributes/src/parse/attribute.rs | 15 ++++- git-attributes/src/parse/mod.rs | 3 +- git-attributes/tests/parse/attribute.rs | 88 +++++++++++++------------ 4 files changed, 62 insertions(+), 48 deletions(-) diff --git a/git-attributes/src/lib.rs b/git-attributes/src/lib.rs index 755541c9499..1e429b1a1d1 100644 --- a/git-attributes/src/lib.rs +++ b/git-attributes/src/lib.rs @@ -20,6 +20,6 @@ pub mod ignore; pub mod parse; -pub fn parse(buf: &[u8]) -> parse::attribute::Lines<'_> { - parse::attribute::Lines::new(buf) +pub fn parse(buf: &[u8]) -> parse::Lines<'_> { + parse::Lines::new(buf) } diff --git a/git-attributes/src/parse/attribute.rs b/git-attributes/src/parse/attribute.rs index 00fdfc0b598..8c14eeb1700 100644 --- a/git-attributes/src/parse/attribute.rs +++ b/git-attributes/src/parse/attribute.rs @@ -1,6 +1,15 @@ use bstr::{BStr, BString, ByteSlice}; use std::borrow::Cow; +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub enum Kind { + /// A pattern to match paths against + Pattern(BString), + /// The name of the macro to define, always a valid attribute name + Macro(BString), +} + mod error { use bstr::BString; use quick_error::quick_error; @@ -100,7 +109,7 @@ impl<'a> Lines<'a> { } impl<'a> Iterator for Lines<'a> { - type Item = Result<(BString, crate::ignore::pattern::Mode, Iter<'a>, usize), Error>; + type Item = Result<(Kind, crate::ignore::pattern::Mode, Iter<'a>, usize), Error>; fn next(&mut self) -> Option { for line in self.lines.by_ref() { @@ -131,7 +140,7 @@ impl<'a> Iterator for Lines<'a> { fn parse_line( line: &BStr, line_number: usize, -) -> Option), Error>> { +) -> Option), Error>> { if line.is_empty() { return None; } @@ -149,7 +158,7 @@ fn parse_line( }; let (pattern, flags) = super::ignore::parse_line(line.as_ref())?; - Ok((pattern, flags, Iter::new(attrs, line_number))).into() + Ok((Kind::Pattern(pattern), flags, Iter::new(attrs, line_number))).into() } const BLANKS: &[u8] = b" \t\r"; diff --git a/git-attributes/src/parse/mod.rs b/git-attributes/src/parse/mod.rs index 5bcf9487b7a..5bbd86f5bd4 100644 --- a/git-attributes/src/parse/mod.rs +++ b/git-attributes/src/parse/mod.rs @@ -1,6 +1,7 @@ pub mod ignore; -pub mod attribute; +mod attribute; +pub use attribute::{Error, Iter, Kind, Lines}; pub fn ignore(buf: &[u8]) -> ignore::Lines<'_> { ignore::Lines::new(buf) diff --git a/git-attributes/tests/parse/attribute.rs b/git-attributes/tests/parse/attribute.rs index 675c801ec50..38065c83e16 100644 --- a/git-attributes/tests/parse/attribute.rs +++ b/git-attributes/tests/parse/attribute.rs @@ -1,14 +1,14 @@ -use bstr::{BStr, BString, ByteSlice}; +use bstr::{BStr, ByteSlice}; use git_attributes::ignore::pattern::Mode; use git_attributes::{ignore, parse, State}; use git_testtools::fixture_path; #[test] fn byte_order_marks_are_no_patterns() { - assert_eq!(line("\u{feff}hello"), (r"hello".into(), Mode::NO_SUB_DIR, vec![], 1)); + assert_eq!(line("\u{feff}hello"), (pattern(r"hello"), Mode::NO_SUB_DIR, vec![], 1)); assert_eq!( line("\u{feff}\"hello\""), - (r"hello".into(), Mode::NO_SUB_DIR, vec![], 1) + (pattern(r"hello"), Mode::NO_SUB_DIR, vec![], 1) ); } @@ -18,11 +18,11 @@ fn line_numbers_are_counted_correctly() { assert_eq!( try_lines(&String::from_utf8(ignore).unwrap()).unwrap(), vec![ - (r"*.[oa]".into(), Mode::NO_SUB_DIR, vec![], 2), - (r"*.html".into(), Mode::NO_SUB_DIR | Mode::ENDS_WITH, vec![], 5), - (r"!foo.html".into(), Mode::NO_SUB_DIR, vec![], 8), - (r"#a/path".into(), Mode::empty(), vec![], 10), - (r"/*".into(), Mode::empty(), vec![], 11), + (pattern(r"*.[oa]"), Mode::NO_SUB_DIR, vec![], 2), + (pattern(r"*.html"), Mode::NO_SUB_DIR | Mode::ENDS_WITH, vec![], 5), + (pattern(r"!foo.html"), Mode::NO_SUB_DIR, vec![], 8), + (pattern(r"#a/path"), Mode::empty(), vec![], 10), + (pattern(r"/*"), Mode::empty(), vec![], 11), ] ); } @@ -32,9 +32,9 @@ fn line_endings_can_be_windows_or_unix() { assert_eq!( try_lines("unix\nwindows\r\nlast").unwrap(), vec![ - (r"unix".into(), Mode::NO_SUB_DIR, vec![], 1), - (r"windows".into(), Mode::NO_SUB_DIR, vec![], 2), - (r"last".into(), Mode::NO_SUB_DIR, vec![], 3) + (pattern(r"unix"), Mode::NO_SUB_DIR, vec![], 1), + (pattern(r"windows"), Mode::NO_SUB_DIR, vec![], 2), + (pattern(r"last"), Mode::NO_SUB_DIR, vec![], 3) ] ); } @@ -51,47 +51,44 @@ fn comment_lines_are_ignored() { #[test] fn leading_whitespace_is_ignored() { - assert_eq!(line(" \r\tp"), (r"p".into(), Mode::NO_SUB_DIR, vec![], 1)); - assert_eq!(line(" \r\t\"p\""), (r"p".into(), Mode::NO_SUB_DIR, vec![], 1)); + assert_eq!(line(" \r\tp"), (pattern(r"p"), Mode::NO_SUB_DIR, vec![], 1)); + assert_eq!(line(" \r\t\"p\""), (pattern(r"p"), Mode::NO_SUB_DIR, vec![], 1)); } #[test] fn comment_can_be_escaped_like_gitignore_or_quoted() { assert_eq!( line(r"\#hello"), - (r"#hello".into(), Mode::NO_SUB_DIR, vec![], 1), + (pattern(r"#hello"), Mode::NO_SUB_DIR, vec![], 1), "undocumented, but definitely works" ); - assert_eq!(line("\"# hello\""), (r"# hello".into(), Mode::NO_SUB_DIR, vec![], 1)); + assert_eq!(line("\"# hello\""), (pattern(r"# hello"), Mode::NO_SUB_DIR, vec![], 1)); } #[test] fn exclamation_marks_must_be_escaped_or_error_unlike_gitignore() { - assert_eq!(line(r"\!hello"), (r"!hello".into(), Mode::NO_SUB_DIR, vec![], 1)); + assert_eq!(line(r"\!hello"), (pattern(r"!hello"), Mode::NO_SUB_DIR, vec![], 1)); assert!(matches!( try_line(r"!hello"), - Err(parse::attribute::Error::PatternNegation { line_number: 1, .. }) + Err(parse::Error::PatternNegation { line_number: 1, .. }) )); assert!( matches!( try_line(r#""!hello""#), - Err(parse::attribute::Error::PatternNegation { line_number: 1, .. }), + Err(parse::Error::PatternNegation { line_number: 1, .. }), ), "even in quotes they trigger…" ); assert_eq!( line(r#""\\!hello""#), - (r"!hello".into(), Mode::NO_SUB_DIR, vec![], 1), + (pattern(r"!hello"), Mode::NO_SUB_DIR, vec![], 1), "…and must be double-escaped, once to get through quote, then to get through parse ignore line" ); } #[test] fn invalid_escapes_in_quotes_are_an_error() { - assert!(matches!( - try_line(r#""\!hello""#), - Err(parse::attribute::Error::Unquote(_)), - ),); + assert!(matches!(try_line(r#""\!hello""#), Err(parse::Error::Unquote(_)),),); } #[test] @@ -104,19 +101,19 @@ fn custom_macros_can_be_defined() { fn attribute_names_must_not_begin_with_dash_and_must_be_ascii_only() { assert!(matches!( try_line(r"p !-a"), - Err(parse::attribute::Error::AttributeName { line_number: 1, .. }) + Err(parse::Error::AttributeName { line_number: 1, .. }) )); assert!( matches!( try_line(r#"p !!a"#), - Err(parse::attribute::Error::AttributeName { line_number: 1, .. }) + Err(parse::Error::AttributeName { line_number: 1, .. }) ), "exclamation marks aren't allowed either" ); assert!( matches!( try_line(r#"p 你好"#), - Err(parse::attribute::Error::AttributeName { line_number: 1, .. }) + Err(parse::Error::AttributeName { line_number: 1, .. }) ), "nor is utf-8 encoded characters - gitoxide could consider to relax this when established" ); @@ -126,32 +123,32 @@ fn attribute_names_must_not_begin_with_dash_and_must_be_ascii_only() { fn attributes_are_parsed_behind_various_whitespace_characters() { assert_eq!( line(r#"p a b"#), - ("p".into(), Mode::NO_SUB_DIR, vec![set("a"), set("b")], 1), + (pattern("p"), Mode::NO_SUB_DIR, vec![set("a"), set("b")], 1), "behind space" ); assert_eq!( line(r#""p" a b"#), - ("p".into(), Mode::NO_SUB_DIR, vec![set("a"), set("b")], 1), + (pattern("p"), Mode::NO_SUB_DIR, vec![set("a"), set("b")], 1), "behind space" ); assert_eq!( line("p\ta\tb"), - ("p".into(), Mode::NO_SUB_DIR, vec![set("a"), set("b")], 1), + (pattern("p"), Mode::NO_SUB_DIR, vec![set("a"), set("b")], 1), "behind tab" ); assert_eq!( line("\"p\"\ta\tb"), - ("p".into(), Mode::NO_SUB_DIR, vec![set("a"), set("b")], 1), + (pattern("p"), Mode::NO_SUB_DIR, vec![set("a"), set("b")], 1), "behind tab" ); assert_eq!( line("p \t a \t b"), - ("p".into(), Mode::NO_SUB_DIR, vec![set("a"), set("b")], 1), + (pattern("p"), Mode::NO_SUB_DIR, vec![set("a"), set("b")], 1), "behind a mix of space and tab" ); assert_eq!( line("\"p\" \t a \t b"), - ("p".into(), Mode::NO_SUB_DIR, vec![set("a"), set("b")], 1), + (pattern("p"), Mode::NO_SUB_DIR, vec![set("a"), set("b")], 1), "behind a mix of space and tab" ); } @@ -161,7 +158,7 @@ fn attributes_come_in_different_flavors_due_to_prefixes() { assert_eq!( line(r#"p set -unset !unspecified -set"#), ( - "p".into(), + pattern("p"), Mode::NO_SUB_DIR, vec![set("set"), unset("unset"), unspecified("unspecified"), unset("set")], 1 @@ -175,7 +172,7 @@ fn attributes_can_have_values() { assert_eq!( line(r#"p a=one b=2 c=你好 "#), ( - "p".into(), + pattern("p"), Mode::NO_SUB_DIR, vec![value("a", "one"), value("b", "2"), value("c", "你好")], 1 @@ -189,7 +186,7 @@ fn attributes_see_state_adjustments_over_value_assignments() { assert_eq!( line(r#"p set -unset=a !unspecified=b"#), ( - "p".into(), + pattern("p"), Mode::NO_SUB_DIR, vec![set("set"), unset("unset"), unspecified("unspecified")], 1 @@ -199,12 +196,15 @@ fn attributes_see_state_adjustments_over_value_assignments() { #[test] fn trailing_whitespace_in_attributes_is_ignored() { - assert_eq!(line("p a \r\t"), ("p".into(), Mode::NO_SUB_DIR, vec![set("a")], 1),); - assert_eq!(line("\"p\" a \r\t"), ("p".into(), Mode::NO_SUB_DIR, vec![set("a")], 1),); + assert_eq!(line("p a \r\t"), (pattern("p"), Mode::NO_SUB_DIR, vec![set("a")], 1),); + assert_eq!( + line("\"p\" a \r\t"), + (pattern("p"), Mode::NO_SUB_DIR, vec![set("a")], 1), + ); } type ExpandedAttribute<'a> = ( - BString, + parse::Kind, ignore::pattern::Mode, Vec<(&'a BStr, git_attributes::State<'a>)>, usize, @@ -226,7 +226,11 @@ fn value<'a, 'b>(attr: &'a str, value: &'b str) -> (&'a BStr, State<'b>) { (attr.as_bytes().as_bstr(), State::Value(value.as_bytes().as_bstr())) } -fn try_line(input: &str) -> Result { +fn pattern(name: &str) -> parse::Kind { + parse::Kind::Pattern(name.into()) +} + +fn try_line(input: &str) -> Result { let mut lines = git_attributes::parse(input.as_bytes()); let res = expand(lines.next().unwrap())?; assert!(lines.next().is_none(), "expected only one line"); @@ -240,13 +244,13 @@ fn line(input: &str) -> ExpandedAttribute { res } -fn try_lines(input: &str) -> Result, parse::attribute::Error> { +fn try_lines(input: &str) -> Result, parse::Error> { git_attributes::parse(input.as_bytes()).map(expand).collect() } fn expand( - input: Result<(BString, ignore::pattern::Mode, parse::attribute::Iter<'_>, usize), parse::attribute::Error>, -) -> Result, parse::attribute::Error> { + input: Result<(parse::Kind, ignore::pattern::Mode, parse::Iter<'_>, usize), parse::Error>, +) -> Result, parse::Error> { let (pattern, mode, attrs, line_no) = input?; let attrs = attrs.collect::, _>>()?; Ok((pattern, mode, attrs, line_no)) From 365a8f08134a023bac7b78f3eee7baff410ba4cb Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 25 Mar 2022 12:12:46 +0800 Subject: [PATCH 22/22] thanks clippy --- git-attributes/src/parse/attribute.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/git-attributes/src/parse/attribute.rs b/git-attributes/src/parse/attribute.rs index 8c14eeb1700..0e4e668d571 100644 --- a/git-attributes/src/parse/attribute.rs +++ b/git-attributes/src/parse/attribute.rs @@ -83,9 +83,9 @@ fn attr_valid(attr: &BStr) -> bool { return false; } - attr.bytes().all(|b| match b { - b'-' | b'.' | b'_' | b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' => true, - _ => false, + attr.bytes().all(|b| { + matches!(b, + b'-' | b'.' | b'_' | b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9') }) }