diff --git a/Cargo.lock b/Cargo.lock index d898db32a5e..a4d78cf2bdf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1073,6 +1073,11 @@ dependencies = [ [[package]] name = "git-attributes" version = "0.0.0" +dependencies = [ + "bitflags", + "bstr", + "git-testtools", +] [[package]] name = "git-bitmap" @@ -2014,9 +2019,9 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" [[package]] name = "openssl-src" -version = "111.17.0+1.1.1m" +version = "111.18.0+1.1.1n" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05d6a336abd10814198f66e2a91ccd7336611f30334119ca8ce300536666fcf4" +checksum = "7897a926e1e8d00219127dc020130eca4292e5ca666dd592480d72c3eca2ff6c" dependencies = [ "cc", ] diff --git a/git-attributes/Cargo.toml b/git-attributes/Cargo.toml index 88a87c456e4..5e8ac460d10 100644 --- a/git-attributes/Cargo.toml +++ b/git-attributes/Cargo.toml @@ -13,3 +13,8 @@ doctest = false # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +bstr = { version = "0.2.13", default-features = false, features = ["std"]} +bitflags = "1.3.2" + +[dev-dependencies] +git-testtools = { path = "../tests/tools"} diff --git a/git-attributes/src/ignore.rs b/git-attributes/src/ignore.rs new file mode 100644 index 00000000000..1bde72dc586 --- /dev/null +++ b/git-attributes/src/ignore.rs @@ -0,0 +1,15 @@ +pub mod pattern { + use bitflags::bitflags; + + bitflags! { + pub struct Mode: u32 { + /// The pattern does not contain a sub-directory and - it doesn't contain slashes after removing the trailing one. + const NO_SUB_DIR = 1 << 0; + /// A pattern that is '*literal', meaning that it ends with what's given here + const ENDS_WITH = 1 << 1; + /// The pattern must match a directory, and not a file. + const MUST_BE_DIR = 1 << 2; + const NEGATIVE = 1 << 3; + } + } +} diff --git a/git-attributes/src/lib.rs b/git-attributes/src/lib.rs index d7a83e4f525..ca3616728c5 100644 --- a/git-attributes/src/lib.rs +++ b/git-attributes/src/lib.rs @@ -1 +1,5 @@ #![forbid(unsafe_code, rust_2018_idioms)] + +pub mod ignore; + +pub mod parse; diff --git a/git-attributes/src/parse/ignore.rs b/git-attributes/src/parse/ignore.rs new file mode 100644 index 00000000000..b9ebd783ffd --- /dev/null +++ b/git-attributes/src/parse/ignore.rs @@ -0,0 +1,90 @@ +use crate::ignore; +use bstr::{BString, ByteSlice}; + +pub struct Iter<'a> { + lines: bstr::Lines<'a>, + line_no: usize, +} + +impl<'a> Iter<'a> { + pub fn new(buf: &'a [u8]) -> Self { + Iter { + lines: buf.lines(), + line_no: 0, + } + } +} + +impl<'a> Iterator for Iter<'a> { + type Item = (BString, ignore::pattern::Mode, usize); + + fn next(&mut self) -> Option { + let mut res = None; + for mut line in self.lines.by_ref() { + self.line_no += 1; + let mut mode = ignore::pattern::Mode::empty(); + if line.is_empty() { + continue; + }; + if line.first() == Some(&b'#') { + continue; + } else if line.first() == Some(&b'!') { + mode |= ignore::pattern::Mode::NEGATIVE; + line = &line[1..]; + } else if line.first() == Some(&b'\\') { + let second = line.get(1); + if second == Some(&b'!') || second == Some(&b'#') { + line = &line[1..]; + } + } + let mut line = truncate_non_escaped_trailing_spaces(line); + if line.last() == Some(&b'/') { + mode |= ignore::pattern::Mode::MUST_BE_DIR; + line.pop(); + } + if !line.contains(&b'/') { + mode |= ignore::pattern::Mode::NO_SUB_DIR; + } + if line.first() == Some(&b'*') && line[1..].find_byteset(br"*?[\").is_none() { + mode |= ignore::pattern::Mode::ENDS_WITH; + } + res = Some((line, mode, self.line_no)); + break; + } + res + } +} + +/// We always copy just because that's ultimately needed anyway, not because we always have to. +fn truncate_non_escaped_trailing_spaces(buf: &[u8]) -> BString { + match buf.rfind_not_byteset(br"\ ") { + Some(pos) if pos + 1 == buf.len() => buf.into(), // does not end in (escaped) whitespace + None => buf.into(), + Some(start_of_non_space) => { + // This seems a bit strange but attempts to recreate the git implementation while + // actually removing the escape characters before spaces. We leave other backslashes + // for escapes to be handled by `glob/globset`. + let mut res: BString = buf[..start_of_non_space + 1].into(); + + let mut trailing_bytes = buf[start_of_non_space + 1..].iter(); + let mut bare_spaces = 0; + while let Some(b) = trailing_bytes.next() { + match b { + b' ' => { + bare_spaces += 1; + } + b'\\' => { + res.extend(std::iter::repeat(b' ').take(bare_spaces)); + bare_spaces = 0; + // Skip what follows, like git does, but keep spaces if possible. + if trailing_bytes.next() == Some(&b' ') { + res.push(b' '); + } + } + _ => unreachable!("BUG: this must be either backslash or space"), + } + } + res + } + } +} diff --git a/git-attributes/src/parse/mod.rs b/git-attributes/src/parse/mod.rs new file mode 100644 index 00000000000..03b01b5790e --- /dev/null +++ b/git-attributes/src/parse/mod.rs @@ -0,0 +1,5 @@ +pub mod ignore; + +pub fn ignore(buf: &[u8]) -> ignore::Iter<'_> { + ignore::Iter::new(buf) +} diff --git a/git-attributes/tests/attributes.rs b/git-attributes/tests/attributes.rs new file mode 100644 index 00000000000..05840a46a13 --- /dev/null +++ b/git-attributes/tests/attributes.rs @@ -0,0 +1,175 @@ +mod parse { + mod ignore { + use git_attributes::ignore::pattern::Mode; + use git_testtools::fixture_path; + + #[test] + fn line_numbers_are_counted_correctly() { + let ignore = std::fs::read(fixture_path("ignore/various.txt")).unwrap(); + let actual: Vec<_> = git_attributes::parse::ignore(&ignore).collect(); + assert_eq!( + actual, + vec![ + ("*.[oa]".into(), Mode::NO_SUB_DIR, 2), + ("*.html".into(), Mode::NO_SUB_DIR | Mode::ENDS_WITH, 5), + ("foo.html".into(), Mode::NO_SUB_DIR | Mode::NEGATIVE, 8), + ("/*".into(), Mode::empty(), 11), + ("/foo".into(), Mode::NEGATIVE, 12), + ("/foo/*".into(), Mode::empty(), 13), + ("/foo/bar".into(), Mode::NEGATIVE, 14) + ] + ); + } + + #[test] + fn line_endings_can_be_windows_or_unix() { + assert_eq!( + git_attributes::parse::ignore(b"unix\nwindows\r\nlast").collect::>(), + vec![ + (r"unix".into(), Mode::NO_SUB_DIR, 1), + (r"windows".into(), Mode::NO_SUB_DIR, 2), + (r"last".into(), Mode::NO_SUB_DIR, 3) + ] + ); + } + + #[test] + fn mark_ends_with_pattern_specifically() { + assert_eq!( + git_attributes::parse::ignore(br"*literal").next(), + Some((r"*literal".into(), Mode::NO_SUB_DIR | Mode::ENDS_WITH, 1)) + ); + assert_eq!( + git_attributes::parse::ignore(br"**literal").next(), + Some((r"**literal".into(), Mode::NO_SUB_DIR, 1)), + "double-asterisk won't allow for fast comparisons" + ); + assert_eq!( + git_attributes::parse::ignore(br"*litera[l]").next(), + Some((r"*litera[l]".into(), Mode::NO_SUB_DIR, 1)) + ); + assert_eq!( + git_attributes::parse::ignore(br"*litera?").next(), + Some((r"*litera?".into(), Mode::NO_SUB_DIR, 1)) + ); + assert_eq!( + git_attributes::parse::ignore(br"*litera\?").next(), + Some((r"*litera\?".into(), Mode::NO_SUB_DIR, 1)), + "for now we don't handle escapes properly like git seems to do" + ); + } + + #[test] + fn comments_are_ignored() { + assert!(git_attributes::parse::ignore(b"# hello world").next().is_none()); + } + + #[test] + fn backslashes_before_hashes_are_no_comments() { + assert_eq!( + git_attributes::parse::ignore(br"\#hello").next(), + Some((r"#hello".into(), Mode::NO_SUB_DIR, 1)) + ); + } + + #[test] + fn backslashes_are_part_of_the_pattern_if_not_in_specific_positions() { + assert_eq!( + git_attributes::parse::ignore(br"\hello\world").next(), + Some((r"\hello\world".into(), Mode::NO_SUB_DIR, 1)) + ); + } + + #[test] + fn leading_exclamation_mark_negates_pattern() { + assert_eq!( + git_attributes::parse::ignore(b"!hello").next(), + Some(("hello".into(), Mode::NEGATIVE | Mode::NO_SUB_DIR, 1)) + ); + } + + #[test] + fn leading_exclamation_marks_can_be_escaped_with_backslash() { + assert_eq!( + git_attributes::parse::ignore(br"\!hello").next(), + Some(("!hello".into(), Mode::NO_SUB_DIR, 1)) + ); + } + + #[test] + fn absence_of_sub_directories_are_marked() { + assert_eq!( + git_attributes::parse::ignore(br"a/b").next(), + Some(("a/b".into(), Mode::empty(), 1)) + ); + assert_eq!( + git_attributes::parse::ignore(br"ab").next(), + Some(("ab".into(), Mode::NO_SUB_DIR, 1)) + ); + } + + #[test] + fn trailing_slashes_are_marked_and_removed() { + assert_eq!( + git_attributes::parse::ignore(b"dir/").next(), + Some(("dir".into(), Mode::MUST_BE_DIR | Mode::NO_SUB_DIR, 1)) + ); + assert_eq!( + git_attributes::parse::ignore(b"dir///").next(), + Some(("dir//".into(), Mode::MUST_BE_DIR, 1)), + "but only the last slash is removed" + ); + } + + #[test] + fn trailing_spaces_are_ignored() { + assert_eq!( + git_attributes::parse::ignore(br"a ").next(), + Some(("a".into(), Mode::NO_SUB_DIR, 1)) + ); + assert_eq!( + git_attributes::parse::ignore(b"a\t\t ").next(), + Some(("a\t\t".into(), Mode::NO_SUB_DIR, 1)), + "trailing tabs are not ignored" + ); + } + #[test] + fn trailing_spaces_can_be_escaped_to_be_literal() { + assert_eq!( + git_attributes::parse::ignore(br"a \ ").next(), + Some(("a ".into(), Mode::NO_SUB_DIR, 1)), + "a single escape in front of the last desired space is enough" + ); + assert_eq!( + git_attributes::parse::ignore(br"a b c ").next(), + Some(("a b c".into(), Mode::NO_SUB_DIR, 1)), + "spaces in the middle are fine" + ); + assert_eq!( + git_attributes::parse::ignore(br"a\ \ \ ").next(), + Some(("a ".into(), Mode::NO_SUB_DIR, 1)), + "one can also escape every single one" + ); + assert_eq!( + git_attributes::parse::ignore(br"a \ ").next(), + Some(("a ".into(), Mode::NO_SUB_DIR, 1)), + "or just the one in the middle, losing the last actual space" + ); + assert_eq!( + git_attributes::parse::ignore(br"a \").next(), + Some(("a ".into(), Mode::NO_SUB_DIR, 1)), + "escaping nothing also works as a whitespace protection" + ); + assert_eq!( + git_attributes::parse::ignore(br"a \\\ ").next(), + Some((r"a ".into(), Mode::NO_SUB_DIR, 1)), + "strange things like these work too" + ); + assert_eq!( + git_attributes::parse::ignore(br"a \\ ").next(), + Some((r"a ".into(), Mode::NO_SUB_DIR, 1)), + "strange things like these work as well" + ); + } + } +} diff --git a/git-attributes/tests/fixtures/ignore/various.txt b/git-attributes/tests/fixtures/ignore/various.txt new file mode 100644 index 00000000000..214e46f2d54 --- /dev/null +++ b/git-attributes/tests/fixtures/ignore/various.txt @@ -0,0 +1,14 @@ +# ignore objects and archives, anywhere in the tree. +*.[oa] + +# ignore generated html files, +*.html + +# except foo.html which is maintained by hand +!foo.html + +# exclude everything except directory foo/bar +/* +!/foo +/foo/* +!/foo/bar