diff --git a/Cargo.lock b/Cargo.lock index 1020d608940..2e0f5c7719c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1077,7 +1077,11 @@ version = "0.0.0" dependencies = [ "bitflags", "bstr", + "git-quote", "git-testtools", + "quick-error", + "serde", + "unicode-bom", ] [[package]] @@ -1238,13 +1242,13 @@ name = "git-odb" version = "0.27.0" dependencies = [ "arc-swap", - "btoi", "filetime", "git-actor", "git-features", "git-hash", "git-object", "git-pack", + "git-quote", "git-testtools", "parking_lot 0.12.0", "pretty_assertions", @@ -1326,6 +1330,15 @@ dependencies = [ "serde", ] +[[package]] +name = "git-quote" +version = "0.1.0" +dependencies = [ + "bstr", + "btoi", + "quick-error", +] + [[package]] name = "git-ref" version = "0.12.0" @@ -2824,6 +2837,12 @@ version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1a01404663e3db436ed2746d9fefef640d868edae3cceb81c3b8d5732fda678f" +[[package]] +name = "unicode-bom" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63ec69f541d875b783ca40184d655f2927c95f0bffd486faa83cd3ac3529ec32" + [[package]] name = "unicode-normalization" version = "0.1.19" diff --git a/Cargo.toml b/Cargo.toml index 81166c1fcd4..58511e809a4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -133,6 +133,7 @@ members = [ "git-features", "git-commitgraph", "git-chunk", + "git-quote", "git-object", "git-diff", "git-traverse", diff --git a/README.md b/README.md index f2e28f87bb0..a5d5ec4095f 100644 --- a/README.md +++ b/README.md @@ -106,6 +106,7 @@ Follow linked crate name for detailed status. Please note that all crates follow * [git-bitmap](https://github.com/Byron/gitoxide/blob/main/crate-status.md#git-bitmap) * [git-revision](https://github.com/Byron/gitoxide/blob/main/crate-status.md#git-revision) * [git-attributes](https://github.com/Byron/gitoxide/blob/main/crate-status.md#git-attributes) + * [git-quote](https://github.com/Byron/gitoxide/blob/main/crate-status.md#git-quote) * **idea** * [git-pathspec](https://github.com/Byron/gitoxide/blob/main/crate-status.md#git-pathspec) * [git-subomdule](https://github.com/Byron/gitoxide/blob/main/crate-status.md#git-submodule) diff --git a/crate-status.md b/crate-status.md index 242c9978ced..494598be814 100644 --- a/crate-status.md +++ b/crate-status.md @@ -53,6 +53,7 @@ * [x] pack only changed objects as derived from input * [x] base object compression * [ ] delta compression + * [ ] respect the `delta=false` attribute * [x] create 'thin' pack, i.e. deltas that are based on objects the other side has. * [x] parallel implementation that scales perfectly * [x] entries to pack data iterator @@ -209,9 +210,16 @@ Check out the [performance discussion][git-traverse-performance] as well. ### git-attributes -* [ ] parse git-ignore files (aka git-attributes without the attributes or negation) +* [x] parse git-ignore files (aka git-attributes without the attributes or negation) * [ ] parse git-attributes files * [ ] create an attributes stack, ideally one that includes 'ignored' status from .gitignore files. + * [ ] support for built-in `binary` macro for `-text -diff -merge` + +### git-quote + +* **ansi-c** + * [x] quote + * [ ] unquote ### git-pathspec @@ -226,6 +234,15 @@ Check out the [performance discussion][git-traverse-performance] as well. - [ ] handle sparse directories - [ ] handle sparse index - [ ] linear scaling with multi-threading up to IO saturation + - supported attributes to affect working tree and index contents + - [ ] eol + - [ ] working-tree-encoding + - …more + - **filtering** + - [ ] `text` + - [ ] `ident` + - [ ] filter processes + - [ ] single-invocation clean/smudge filters * manage multiple worktrees * deal with exclude specifications, like .gitignore and other exclude files. @@ -384,6 +401,7 @@ See its [README.md](https://github.com/Byron/gitoxide/blob/main/git-lock/README. ### git-bundle * [ ] create a bundle from an archive + * [ ] respect `export-ignore` and `export-subst` * [ ] extract a branch from a bundle into a repository * [ ] API documentation * [ ] Some examples diff --git a/etc/check-package-size.sh b/etc/check-package-size.sh index fe0836492c9..008e0776a1a 100755 --- a/etc/check-package-size.sh +++ b/etc/check-package-size.sh @@ -19,9 +19,10 @@ echo "in root: gitoxide CLI" (enter cargo-smart-release && indent cargo diet -n --package-size-limit 85KB) (enter git-actor && indent cargo diet -n --package-size-limit 5KB) (enter git-pathspec && indent cargo diet -n --package-size-limit 5KB) -(enter git-attributes && indent cargo diet -n --package-size-limit 5KB) +(enter git-attributes && indent cargo diet -n --package-size-limit 10KB) (enter git-index && indent cargo diet -n --package-size-limit 30KB) (enter git-worktree && indent cargo diet -n --package-size-limit 20KB) +(enter git-quote && indent cargo diet -n --package-size-limit 5KB) (enter git-revision && indent cargo diet -n --package-size-limit 10KB) (enter git-bitmap && indent cargo diet -n --package-size-limit 5KB) (enter git-tempfile && indent cargo diet -n --package-size-limit 25KB) diff --git a/git-attributes/Cargo.toml b/git-attributes/Cargo.toml index 5e8ac460d10..cb743b74d2e 100644 --- a/git-attributes/Cargo.toml +++ b/git-attributes/Cargo.toml @@ -10,11 +10,20 @@ edition = "2018" [lib] doctest = false +[features] +## Data structures implement `serde::Serialize` and `serde::Deserialize`. +serde1 = ["serde", "bstr/serde1"] + # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +git-quote = { version = "^0.1.0", path = "../git-quote" } + bstr = { version = "0.2.13", default-features = false, features = ["std"]} bitflags = "1.3.2" +unicode-bom = "1.1.4" +quick-error = "2.0.0" +serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"]} [dev-dependencies] git-testtools = { path = "../tests/tools"} diff --git a/git-attributes/src/lib.rs b/git-attributes/src/lib.rs index ca3616728c5..1e429b1a1d1 100644 --- a/git-attributes/src/lib.rs +++ b/git-attributes/src/lib.rs @@ -1,5 +1,25 @@ #![forbid(unsafe_code, rust_2018_idioms)] +use bstr::BStr; + +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub enum State<'a> { + /// The attribute is listed, or has the special value 'true' + Set, + /// The attribute has the special value 'false', or was prefixed with a `-` sign. + Unset, + /// The attribute is set to the given value, which followed the `=` sign. + /// Note that values can be empty. + Value(&'a BStr), + /// The attribute isn't mentioned with a given path or is explicitly set to `Unspecified` using the `!` sign. + Unspecified, +} + pub mod ignore; pub mod parse; + +pub fn parse(buf: &[u8]) -> parse::Lines<'_> { + parse::Lines::new(buf) +} diff --git a/git-attributes/src/parse/attribute.rs b/git-attributes/src/parse/attribute.rs new file mode 100644 index 00000000000..0e4e668d571 --- /dev/null +++ b/git-attributes/src/parse/attribute.rs @@ -0,0 +1,168 @@ +use bstr::{BStr, BString, ByteSlice}; +use std::borrow::Cow; + +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub enum Kind { + /// A pattern to match paths against + Pattern(BString), + /// The name of the macro to define, always a valid attribute name + Macro(BString), +} + +mod error { + use bstr::BString; + use quick_error::quick_error; + + quick_error! { + #[derive(Debug)] + pub enum Error { + PatternNegation { line_number: usize, line: BString } { + display("Line {} has a negative pattern, for literal characters use \\!: {}", line_number, line) + } + AttributeName { line_number: usize, attribute: BString } { + display("Line {} has non-ascii characters or starts with '-': {}", line_number, attribute) + } + Unquote(err: git_quote::ansi_c::undo::Error) { + display("Could not unquote attributes line") + from() + source(err) + } + } + } +} +use crate::ignore; +pub use error::Error; + +pub struct Lines<'a> { + lines: bstr::Lines<'a>, + line_no: usize, +} + +pub struct Iter<'a> { + attrs: bstr::Fields<'a>, + line_no: usize, +} + +impl<'a> Iter<'a> { + pub fn new(attrs: &'a BStr, line_no: usize) -> Self { + Iter { + attrs: attrs.fields(), + line_no, + } + } + + fn parse_attr(&self, attr: &'a [u8]) -> Result<(&'a BStr, crate::State<'a>), Error> { + let mut tokens = attr.splitn(2, |b| *b == b'='); + let attr = tokens.next().expect("attr itself").as_bstr(); + let possibly_value = tokens.next(); + let (attr, state) = if attr.first() == Some(&b'-') { + (&attr[1..], crate::State::Unset) + } else if attr.first() == Some(&b'!') { + (&attr[1..], crate::State::Unspecified) + } else { + ( + attr, + possibly_value + .map(|v| crate::State::Value(v.as_bstr())) + .unwrap_or(crate::State::Set), + ) + }; + if !attr_valid(attr) { + return Err(Error::AttributeName { + line_number: self.line_no, + attribute: attr.into(), + }); + } + Ok((attr, state)) + } +} + +fn attr_valid(attr: &BStr) -> bool { + if attr.first() == Some(&b'-') { + return false; + } + + attr.bytes().all(|b| { + matches!(b, + b'-' | b'.' | b'_' | b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9') + }) +} + +impl<'a> Iterator for Iter<'a> { + type Item = Result<(&'a BStr, crate::State<'a>), Error>; + + fn next(&mut self) -> Option { + let attr = self.attrs.next().filter(|a| !a.is_empty())?; + self.parse_attr(attr).into() + } +} + +impl<'a> Lines<'a> { + pub fn new(buf: &'a [u8]) -> Self { + let bom = unicode_bom::Bom::from(buf); + Lines { + lines: buf[bom.len()..].lines(), + line_no: 0, + } + } +} + +impl<'a> Iterator for Lines<'a> { + type Item = Result<(Kind, crate::ignore::pattern::Mode, Iter<'a>, usize), Error>; + + fn next(&mut self) -> Option { + for line in self.lines.by_ref() { + self.line_no += 1; + let line = skip_blanks(line.into()); + if line.first() == Some(&b'#') { + continue; + } + match parse_line(line, self.line_no) { + None => continue, + Some(Ok((pattern, flags, attrs))) => { + return Some(if flags.contains(ignore::pattern::Mode::NEGATIVE) { + Err(Error::PatternNegation { + line: line.into(), + line_number: self.line_no, + }) + } else { + Ok((pattern, flags, attrs, self.line_no)) + }) + } + Some(Err(err)) => return Some(Err(err)), + } + } + None + } +} + +fn parse_line( + line: &BStr, + line_number: usize, +) -> Option), Error>> { + if line.is_empty() { + return None; + } + + let (line, attrs): (Cow<'_, _>, _) = if line.starts_with(b"\"") { + let (unquoted, consumed) = match git_quote::ansi_c::undo(line) { + Ok(res) => res, + Err(err) => return Some(Err(err.into())), + }; + (unquoted, &line[consumed..]) + } else { + line.find_byteset(BLANKS) + .map(|pos| (line[..pos].as_bstr().into(), line[pos..].as_bstr())) + .unwrap_or((line.into(), [].as_bstr())) + }; + + let (pattern, flags) = super::ignore::parse_line(line.as_ref())?; + Ok((Kind::Pattern(pattern), flags, Iter::new(attrs, line_number))).into() +} + +const BLANKS: &[u8] = b" \t\r"; + +fn skip_blanks(line: &BStr) -> &BStr { + line.find_not_byteset(BLANKS).map(|pos| &line[pos..]).unwrap_or(line) +} diff --git a/git-attributes/src/parse/ignore.rs b/git-attributes/src/parse/ignore.rs index b9ebd783ffd..fde352d7342 100644 --- a/git-attributes/src/parse/ignore.rs +++ b/git-attributes/src/parse/ignore.rs @@ -1,58 +1,66 @@ use crate::ignore; use bstr::{BString, ByteSlice}; -pub struct Iter<'a> { +pub struct Lines<'a> { lines: bstr::Lines<'a>, line_no: usize, } -impl<'a> Iter<'a> { +impl<'a> Lines<'a> { pub fn new(buf: &'a [u8]) -> Self { - Iter { - lines: buf.lines(), + let bom = unicode_bom::Bom::from(buf); + Lines { + lines: buf[bom.len()..].lines(), line_no: 0, } } } -impl<'a> Iterator for Iter<'a> { +impl<'a> Iterator for Lines<'a> { type Item = (BString, ignore::pattern::Mode, usize); fn next(&mut self) -> Option { - let mut res = None; - for mut line in self.lines.by_ref() { + for line in self.lines.by_ref() { self.line_no += 1; - let mut mode = ignore::pattern::Mode::empty(); - if line.is_empty() { - continue; - }; if line.first() == Some(&b'#') { continue; - } else if line.first() == Some(&b'!') { - mode |= ignore::pattern::Mode::NEGATIVE; - line = &line[1..]; - } else if line.first() == Some(&b'\\') { - let second = line.get(1); - if second == Some(&b'!') || second == Some(&b'#') { - line = &line[1..]; - } - } - let mut line = truncate_non_escaped_trailing_spaces(line); - if line.last() == Some(&b'/') { - mode |= ignore::pattern::Mode::MUST_BE_DIR; - line.pop(); } - if !line.contains(&b'/') { - mode |= ignore::pattern::Mode::NO_SUB_DIR; + match parse_line(line) { + None => continue, + Some((line, flags)) => return Some((line, flags, self.line_no)), } - if line.first() == Some(&b'*') && line[1..].find_byteset(br"*?[\").is_none() { - mode |= ignore::pattern::Mode::ENDS_WITH; - } - res = Some((line, mode, self.line_no)); - break; } - res + None + } +} + +#[inline] +pub(crate) fn parse_line(mut line: &[u8]) -> Option<(BString, ignore::pattern::Mode)> { + let mut mode = ignore::pattern::Mode::empty(); + if line.is_empty() { + return None; + }; + if line.first() == Some(&b'!') { + mode |= ignore::pattern::Mode::NEGATIVE; + line = &line[1..]; + } else if line.first() == Some(&b'\\') { + let second = line.get(1); + if second == Some(&b'!') || second == Some(&b'#') { + line = &line[1..]; + } + } + let mut line = truncate_non_escaped_trailing_spaces(line); + if line.last() == Some(&b'/') { + mode |= ignore::pattern::Mode::MUST_BE_DIR; + line.pop(); + } + if !line.contains(&b'/') { + mode |= ignore::pattern::Mode::NO_SUB_DIR; + } + if line.first() == Some(&b'*') && line[1..].find_byteset(br"*?[\").is_none() { + mode |= ignore::pattern::Mode::ENDS_WITH; } + Some((line, mode)) } /// We always copy just because that's ultimately needed anyway, not because we always have to. diff --git a/git-attributes/src/parse/mod.rs b/git-attributes/src/parse/mod.rs index 03b01b5790e..5bbd86f5bd4 100644 --- a/git-attributes/src/parse/mod.rs +++ b/git-attributes/src/parse/mod.rs @@ -1,5 +1,8 @@ pub mod ignore; -pub fn ignore(buf: &[u8]) -> ignore::Iter<'_> { - ignore::Iter::new(buf) +mod attribute; +pub use attribute::{Error, Iter, Kind, Lines}; + +pub fn ignore(buf: &[u8]) -> ignore::Lines<'_> { + ignore::Lines::new(buf) } diff --git a/git-attributes/tests/attributes.rs b/git-attributes/tests/attributes.rs index 05840a46a13..06f1a3c69d4 100644 --- a/git-attributes/tests/attributes.rs +++ b/git-attributes/tests/attributes.rs @@ -1,175 +1 @@ -mod parse { - mod ignore { - use git_attributes::ignore::pattern::Mode; - use git_testtools::fixture_path; - - #[test] - fn line_numbers_are_counted_correctly() { - let ignore = std::fs::read(fixture_path("ignore/various.txt")).unwrap(); - let actual: Vec<_> = git_attributes::parse::ignore(&ignore).collect(); - assert_eq!( - actual, - vec![ - ("*.[oa]".into(), Mode::NO_SUB_DIR, 2), - ("*.html".into(), Mode::NO_SUB_DIR | Mode::ENDS_WITH, 5), - ("foo.html".into(), Mode::NO_SUB_DIR | Mode::NEGATIVE, 8), - ("/*".into(), Mode::empty(), 11), - ("/foo".into(), Mode::NEGATIVE, 12), - ("/foo/*".into(), Mode::empty(), 13), - ("/foo/bar".into(), Mode::NEGATIVE, 14) - ] - ); - } - - #[test] - fn line_endings_can_be_windows_or_unix() { - assert_eq!( - git_attributes::parse::ignore(b"unix\nwindows\r\nlast").collect::>(), - vec![ - (r"unix".into(), Mode::NO_SUB_DIR, 1), - (r"windows".into(), Mode::NO_SUB_DIR, 2), - (r"last".into(), Mode::NO_SUB_DIR, 3) - ] - ); - } - - #[test] - fn mark_ends_with_pattern_specifically() { - assert_eq!( - git_attributes::parse::ignore(br"*literal").next(), - Some((r"*literal".into(), Mode::NO_SUB_DIR | Mode::ENDS_WITH, 1)) - ); - assert_eq!( - git_attributes::parse::ignore(br"**literal").next(), - Some((r"**literal".into(), Mode::NO_SUB_DIR, 1)), - "double-asterisk won't allow for fast comparisons" - ); - assert_eq!( - git_attributes::parse::ignore(br"*litera[l]").next(), - Some((r"*litera[l]".into(), Mode::NO_SUB_DIR, 1)) - ); - assert_eq!( - git_attributes::parse::ignore(br"*litera?").next(), - Some((r"*litera?".into(), Mode::NO_SUB_DIR, 1)) - ); - assert_eq!( - git_attributes::parse::ignore(br"*litera\?").next(), - Some((r"*litera\?".into(), Mode::NO_SUB_DIR, 1)), - "for now we don't handle escapes properly like git seems to do" - ); - } - - #[test] - fn comments_are_ignored() { - assert!(git_attributes::parse::ignore(b"# hello world").next().is_none()); - } - - #[test] - fn backslashes_before_hashes_are_no_comments() { - assert_eq!( - git_attributes::parse::ignore(br"\#hello").next(), - Some((r"#hello".into(), Mode::NO_SUB_DIR, 1)) - ); - } - - #[test] - fn backslashes_are_part_of_the_pattern_if_not_in_specific_positions() { - assert_eq!( - git_attributes::parse::ignore(br"\hello\world").next(), - Some((r"\hello\world".into(), Mode::NO_SUB_DIR, 1)) - ); - } - - #[test] - fn leading_exclamation_mark_negates_pattern() { - assert_eq!( - git_attributes::parse::ignore(b"!hello").next(), - Some(("hello".into(), Mode::NEGATIVE | Mode::NO_SUB_DIR, 1)) - ); - } - - #[test] - fn leading_exclamation_marks_can_be_escaped_with_backslash() { - assert_eq!( - git_attributes::parse::ignore(br"\!hello").next(), - Some(("!hello".into(), Mode::NO_SUB_DIR, 1)) - ); - } - - #[test] - fn absence_of_sub_directories_are_marked() { - assert_eq!( - git_attributes::parse::ignore(br"a/b").next(), - Some(("a/b".into(), Mode::empty(), 1)) - ); - assert_eq!( - git_attributes::parse::ignore(br"ab").next(), - Some(("ab".into(), Mode::NO_SUB_DIR, 1)) - ); - } - - #[test] - fn trailing_slashes_are_marked_and_removed() { - assert_eq!( - git_attributes::parse::ignore(b"dir/").next(), - Some(("dir".into(), Mode::MUST_BE_DIR | Mode::NO_SUB_DIR, 1)) - ); - assert_eq!( - git_attributes::parse::ignore(b"dir///").next(), - Some(("dir//".into(), Mode::MUST_BE_DIR, 1)), - "but only the last slash is removed" - ); - } - - #[test] - fn trailing_spaces_are_ignored() { - assert_eq!( - git_attributes::parse::ignore(br"a ").next(), - Some(("a".into(), Mode::NO_SUB_DIR, 1)) - ); - assert_eq!( - git_attributes::parse::ignore(b"a\t\t ").next(), - Some(("a\t\t".into(), Mode::NO_SUB_DIR, 1)), - "trailing tabs are not ignored" - ); - } - #[test] - fn trailing_spaces_can_be_escaped_to_be_literal() { - assert_eq!( - git_attributes::parse::ignore(br"a \ ").next(), - Some(("a ".into(), Mode::NO_SUB_DIR, 1)), - "a single escape in front of the last desired space is enough" - ); - assert_eq!( - git_attributes::parse::ignore(br"a b c ").next(), - Some(("a b c".into(), Mode::NO_SUB_DIR, 1)), - "spaces in the middle are fine" - ); - assert_eq!( - git_attributes::parse::ignore(br"a\ \ \ ").next(), - Some(("a ".into(), Mode::NO_SUB_DIR, 1)), - "one can also escape every single one" - ); - assert_eq!( - git_attributes::parse::ignore(br"a \ ").next(), - Some(("a ".into(), Mode::NO_SUB_DIR, 1)), - "or just the one in the middle, losing the last actual space" - ); - assert_eq!( - git_attributes::parse::ignore(br"a \").next(), - Some(("a ".into(), Mode::NO_SUB_DIR, 1)), - "escaping nothing also works as a whitespace protection" - ); - assert_eq!( - git_attributes::parse::ignore(br"a \\\ ").next(), - Some((r"a ".into(), Mode::NO_SUB_DIR, 1)), - "strange things like these work too" - ); - assert_eq!( - git_attributes::parse::ignore(br"a \\ ").next(), - Some((r"a ".into(), Mode::NO_SUB_DIR, 1)), - "strange things like these work as well" - ); - } - } -} +mod parse; diff --git a/git-attributes/tests/fixtures/attributes/various.txt b/git-attributes/tests/fixtures/attributes/various.txt new file mode 100644 index 00000000000..7974f710569 --- /dev/null +++ b/git-attributes/tests/fixtures/attributes/various.txt @@ -0,0 +1,11 @@ +# no attribute for now +*.[oa] + +# comment +*.html + +# other comment +\!foo.html + +\#a/path +/* diff --git a/git-attributes/tests/parse/attribute.rs b/git-attributes/tests/parse/attribute.rs new file mode 100644 index 00000000000..38065c83e16 --- /dev/null +++ b/git-attributes/tests/parse/attribute.rs @@ -0,0 +1,257 @@ +use bstr::{BStr, ByteSlice}; +use git_attributes::ignore::pattern::Mode; +use git_attributes::{ignore, parse, State}; +use git_testtools::fixture_path; + +#[test] +fn byte_order_marks_are_no_patterns() { + assert_eq!(line("\u{feff}hello"), (pattern(r"hello"), Mode::NO_SUB_DIR, vec![], 1)); + assert_eq!( + line("\u{feff}\"hello\""), + (pattern(r"hello"), Mode::NO_SUB_DIR, vec![], 1) + ); +} + +#[test] +fn line_numbers_are_counted_correctly() { + let ignore = std::fs::read(fixture_path("attributes/various.txt")).unwrap(); + assert_eq!( + try_lines(&String::from_utf8(ignore).unwrap()).unwrap(), + vec![ + (pattern(r"*.[oa]"), Mode::NO_SUB_DIR, vec![], 2), + (pattern(r"*.html"), Mode::NO_SUB_DIR | Mode::ENDS_WITH, vec![], 5), + (pattern(r"!foo.html"), Mode::NO_SUB_DIR, vec![], 8), + (pattern(r"#a/path"), Mode::empty(), vec![], 10), + (pattern(r"/*"), Mode::empty(), vec![], 11), + ] + ); +} + +#[test] +fn line_endings_can_be_windows_or_unix() { + assert_eq!( + try_lines("unix\nwindows\r\nlast").unwrap(), + vec![ + (pattern(r"unix"), Mode::NO_SUB_DIR, vec![], 1), + (pattern(r"windows"), Mode::NO_SUB_DIR, vec![], 2), + (pattern(r"last"), Mode::NO_SUB_DIR, vec![], 3) + ] + ); +} + +#[test] +fn comment_lines_are_ignored() { + assert!(git_attributes::parse(b"# hello world").next().is_none()); + assert!(git_attributes::parse(b"# \"hello world\"").next().is_none()); + assert!( + git_attributes::parse(b" \t\r# \"hello world\"").next().is_none(), + "also behind leading whitespace" + ); +} + +#[test] +fn leading_whitespace_is_ignored() { + assert_eq!(line(" \r\tp"), (pattern(r"p"), Mode::NO_SUB_DIR, vec![], 1)); + assert_eq!(line(" \r\t\"p\""), (pattern(r"p"), Mode::NO_SUB_DIR, vec![], 1)); +} + +#[test] +fn comment_can_be_escaped_like_gitignore_or_quoted() { + assert_eq!( + line(r"\#hello"), + (pattern(r"#hello"), Mode::NO_SUB_DIR, vec![], 1), + "undocumented, but definitely works" + ); + assert_eq!(line("\"# hello\""), (pattern(r"# hello"), Mode::NO_SUB_DIR, vec![], 1)); +} + +#[test] +fn exclamation_marks_must_be_escaped_or_error_unlike_gitignore() { + assert_eq!(line(r"\!hello"), (pattern(r"!hello"), Mode::NO_SUB_DIR, vec![], 1)); + assert!(matches!( + try_line(r"!hello"), + Err(parse::Error::PatternNegation { line_number: 1, .. }) + )); + assert!( + matches!( + try_line(r#""!hello""#), + Err(parse::Error::PatternNegation { line_number: 1, .. }), + ), + "even in quotes they trigger…" + ); + assert_eq!( + line(r#""\\!hello""#), + (pattern(r"!hello"), Mode::NO_SUB_DIR, vec![], 1), + "…and must be double-escaped, once to get through quote, then to get through parse ignore line" + ); +} + +#[test] +fn invalid_escapes_in_quotes_are_an_error() { + assert!(matches!(try_line(r#""\!hello""#), Err(parse::Error::Unquote(_)),),); +} + +#[test] +#[ignore] +fn custom_macros_can_be_defined() { + todo!("name validation, leave rejecting them based on location to the caller") +} + +#[test] +fn attribute_names_must_not_begin_with_dash_and_must_be_ascii_only() { + assert!(matches!( + try_line(r"p !-a"), + Err(parse::Error::AttributeName { line_number: 1, .. }) + )); + assert!( + matches!( + try_line(r#"p !!a"#), + Err(parse::Error::AttributeName { line_number: 1, .. }) + ), + "exclamation marks aren't allowed either" + ); + assert!( + matches!( + try_line(r#"p 你好"#), + Err(parse::Error::AttributeName { line_number: 1, .. }) + ), + "nor is utf-8 encoded characters - gitoxide could consider to relax this when established" + ); +} + +#[test] +fn attributes_are_parsed_behind_various_whitespace_characters() { + assert_eq!( + line(r#"p a b"#), + (pattern("p"), Mode::NO_SUB_DIR, vec![set("a"), set("b")], 1), + "behind space" + ); + assert_eq!( + line(r#""p" a b"#), + (pattern("p"), Mode::NO_SUB_DIR, vec![set("a"), set("b")], 1), + "behind space" + ); + assert_eq!( + line("p\ta\tb"), + (pattern("p"), Mode::NO_SUB_DIR, vec![set("a"), set("b")], 1), + "behind tab" + ); + assert_eq!( + line("\"p\"\ta\tb"), + (pattern("p"), Mode::NO_SUB_DIR, vec![set("a"), set("b")], 1), + "behind tab" + ); + assert_eq!( + line("p \t a \t b"), + (pattern("p"), Mode::NO_SUB_DIR, vec![set("a"), set("b")], 1), + "behind a mix of space and tab" + ); + assert_eq!( + line("\"p\" \t a \t b"), + (pattern("p"), Mode::NO_SUB_DIR, vec![set("a"), set("b")], 1), + "behind a mix of space and tab" + ); +} + +#[test] +fn attributes_come_in_different_flavors_due_to_prefixes() { + assert_eq!( + line(r#"p set -unset !unspecified -set"#), + ( + pattern("p"), + Mode::NO_SUB_DIR, + vec![set("set"), unset("unset"), unspecified("unspecified"), unset("set")], + 1 + ), + "the parser doesn't care about double-mentions either" + ); +} + +#[test] +fn attributes_can_have_values() { + assert_eq!( + line(r#"p a=one b=2 c=你好 "#), + ( + pattern("p"), + Mode::NO_SUB_DIR, + vec![value("a", "one"), value("b", "2"), value("c", "你好")], + 1 + ), + "only non-whitespace ascii values are allowed, no escaping or anything fancy is possible there" + ); +} + +#[test] +fn attributes_see_state_adjustments_over_value_assignments() { + assert_eq!( + line(r#"p set -unset=a !unspecified=b"#), + ( + pattern("p"), + Mode::NO_SUB_DIR, + vec![set("set"), unset("unset"), unspecified("unspecified")], + 1 + ) + ); +} + +#[test] +fn trailing_whitespace_in_attributes_is_ignored() { + assert_eq!(line("p a \r\t"), (pattern("p"), Mode::NO_SUB_DIR, vec![set("a")], 1),); + assert_eq!( + line("\"p\" a \r\t"), + (pattern("p"), Mode::NO_SUB_DIR, vec![set("a")], 1), + ); +} + +type ExpandedAttribute<'a> = ( + parse::Kind, + ignore::pattern::Mode, + Vec<(&'a BStr, git_attributes::State<'a>)>, + usize, +); + +fn set(attr: &str) -> (&BStr, State) { + (attr.as_bytes().as_bstr(), State::Set) +} + +fn unset(attr: &str) -> (&BStr, State) { + (attr.as_bytes().as_bstr(), State::Unset) +} + +fn unspecified(attr: &str) -> (&BStr, State) { + (attr.as_bytes().as_bstr(), State::Unspecified) +} + +fn value<'a, 'b>(attr: &'a str, value: &'b str) -> (&'a BStr, State<'b>) { + (attr.as_bytes().as_bstr(), State::Value(value.as_bytes().as_bstr())) +} + +fn pattern(name: &str) -> parse::Kind { + parse::Kind::Pattern(name.into()) +} + +fn try_line(input: &str) -> Result { + let mut lines = git_attributes::parse(input.as_bytes()); + let res = expand(lines.next().unwrap())?; + assert!(lines.next().is_none(), "expected only one line"); + Ok(res) +} + +fn line(input: &str) -> ExpandedAttribute { + let mut lines = git_attributes::parse(input.as_bytes()); + let res = expand(lines.next().expect("single line")).unwrap(); + assert!(lines.next().is_none(), "expected only one line"); + res +} + +fn try_lines(input: &str) -> Result, parse::Error> { + git_attributes::parse(input.as_bytes()).map(expand).collect() +} + +fn expand( + input: Result<(parse::Kind, ignore::pattern::Mode, parse::Iter<'_>, usize), parse::Error>, +) -> Result, parse::Error> { + let (pattern, mode, attrs, line_no) = input?; + let attrs = attrs.collect::, _>>()?; + Ok((pattern, mode, attrs, line_no)) +} diff --git a/git-attributes/tests/parse/ignore.rs b/git-attributes/tests/parse/ignore.rs new file mode 100644 index 00000000000..7ad310a54e7 --- /dev/null +++ b/git-attributes/tests/parse/ignore.rs @@ -0,0 +1,180 @@ +use git_attributes::ignore::pattern::Mode; +use git_testtools::fixture_path; + +#[test] +fn byte_order_marks_are_no_patterns() { + assert_eq!( + git_attributes::parse::ignore("\u{feff}hello".as_bytes()).next(), + Some((r"hello".into(), Mode::NO_SUB_DIR, 1)) + ); +} + +#[test] +fn line_numbers_are_counted_correctly() { + let ignore = std::fs::read(fixture_path("ignore/various.txt")).unwrap(); + let actual: Vec<_> = git_attributes::parse::ignore(&ignore).collect(); + assert_eq!( + actual, + vec![ + ("*.[oa]".into(), Mode::NO_SUB_DIR, 2), + ("*.html".into(), Mode::NO_SUB_DIR | Mode::ENDS_WITH, 5), + ("foo.html".into(), Mode::NO_SUB_DIR | Mode::NEGATIVE, 8), + ("/*".into(), Mode::empty(), 11), + ("/foo".into(), Mode::NEGATIVE, 12), + ("/foo/*".into(), Mode::empty(), 13), + ("/foo/bar".into(), Mode::NEGATIVE, 14) + ] + ); +} + +#[test] +fn line_endings_can_be_windows_or_unix() { + assert_eq!( + git_attributes::parse::ignore(b"unix\nwindows\r\nlast").collect::>(), + vec![ + (r"unix".into(), Mode::NO_SUB_DIR, 1), + (r"windows".into(), Mode::NO_SUB_DIR, 2), + (r"last".into(), Mode::NO_SUB_DIR, 3) + ] + ); +} + +#[test] +fn mark_ends_with_pattern_specifically() { + assert_eq!( + git_attributes::parse::ignore(br"*literal").next(), + Some((r"*literal".into(), Mode::NO_SUB_DIR | Mode::ENDS_WITH, 1)) + ); + assert_eq!( + git_attributes::parse::ignore(br"**literal").next(), + Some((r"**literal".into(), Mode::NO_SUB_DIR, 1)), + "double-asterisk won't allow for fast comparisons" + ); + assert_eq!( + git_attributes::parse::ignore(br"*litera[l]").next(), + Some((r"*litera[l]".into(), Mode::NO_SUB_DIR, 1)) + ); + assert_eq!( + git_attributes::parse::ignore(br"*litera?").next(), + Some((r"*litera?".into(), Mode::NO_SUB_DIR, 1)) + ); + assert_eq!( + git_attributes::parse::ignore(br"*litera\?").next(), + Some((r"*litera\?".into(), Mode::NO_SUB_DIR, 1)), + "for now we don't handle escapes properly like git seems to do" + ); +} + +#[test] +fn comments_are_ignored() { + assert!(git_attributes::parse::ignore(b"# hello world").next().is_none()); +} + +#[test] +fn backslashes_before_hashes_are_no_comments() { + assert_eq!( + git_attributes::parse::ignore(br"\#hello").next(), + Some((r"#hello".into(), Mode::NO_SUB_DIR, 1)) + ); +} + +#[test] +fn backslashes_are_part_of_the_pattern_if_not_in_specific_positions() { + assert_eq!( + git_attributes::parse::ignore(br"\hello\world").next(), + Some((r"\hello\world".into(), Mode::NO_SUB_DIR, 1)) + ); +} + +#[test] +fn leading_exclamation_mark_negates_pattern() { + assert_eq!( + git_attributes::parse::ignore(b"!hello").next(), + Some(("hello".into(), Mode::NEGATIVE | Mode::NO_SUB_DIR, 1)) + ); +} + +#[test] +fn leading_exclamation_marks_can_be_escaped_with_backslash() { + assert_eq!( + git_attributes::parse::ignore(br"\!hello").next(), + Some(("!hello".into(), Mode::NO_SUB_DIR, 1)) + ); +} + +#[test] +fn absence_of_sub_directories_are_marked() { + assert_eq!( + git_attributes::parse::ignore(br"a/b").next(), + Some(("a/b".into(), Mode::empty(), 1)) + ); + assert_eq!( + git_attributes::parse::ignore(br"ab").next(), + Some(("ab".into(), Mode::NO_SUB_DIR, 1)) + ); +} + +#[test] +fn trailing_slashes_are_marked_and_removed() { + assert_eq!( + git_attributes::parse::ignore(b"dir/").next(), + Some(("dir".into(), Mode::MUST_BE_DIR | Mode::NO_SUB_DIR, 1)) + ); + assert_eq!( + git_attributes::parse::ignore(b"dir///").next(), + Some(("dir//".into(), Mode::MUST_BE_DIR, 1)), + "but only the last slash is removed" + ); +} + +#[test] +fn trailing_spaces_are_ignored() { + assert_eq!( + git_attributes::parse::ignore(br"a ").next(), + Some(("a".into(), Mode::NO_SUB_DIR, 1)) + ); + assert_eq!( + git_attributes::parse::ignore(b"a\t\t ").next(), + Some(("a\t\t".into(), Mode::NO_SUB_DIR, 1)), + "trailing tabs are not ignored" + ); +} + +#[test] +fn trailing_spaces_can_be_escaped_to_be_literal() { + assert_eq!( + git_attributes::parse::ignore(br"a \ ").next(), + Some(("a ".into(), Mode::NO_SUB_DIR, 1)), + "a single escape in front of the last desired space is enough" + ); + assert_eq!( + git_attributes::parse::ignore(br"a b c ").next(), + Some(("a b c".into(), Mode::NO_SUB_DIR, 1)), + "spaces in the middle are fine" + ); + assert_eq!( + git_attributes::parse::ignore(br"a\ \ \ ").next(), + Some(("a ".into(), Mode::NO_SUB_DIR, 1)), + "one can also escape every single one" + ); + assert_eq!( + git_attributes::parse::ignore(br"a \ ").next(), + Some(("a ".into(), Mode::NO_SUB_DIR, 1)), + "or just the one in the middle, losing the last actual space" + ); + assert_eq!( + git_attributes::parse::ignore(br"a \").next(), + Some(("a ".into(), Mode::NO_SUB_DIR, 1)), + "escaping nothing also works as a whitespace protection" + ); + assert_eq!( + git_attributes::parse::ignore(br"a \\\ ").next(), + Some((r"a ".into(), Mode::NO_SUB_DIR, 1)), + "strange things like these work too" + ); + assert_eq!( + git_attributes::parse::ignore(br"a \\ ").next(), + Some((r"a ".into(), Mode::NO_SUB_DIR, 1)), + "strange things like these work as well" + ); +} diff --git a/git-attributes/tests/parse/mod.rs b/git-attributes/tests/parse/mod.rs new file mode 100644 index 00000000000..3a142578d0d --- /dev/null +++ b/git-attributes/tests/parse/mod.rs @@ -0,0 +1,2 @@ +mod attribute; +mod ignore; diff --git a/git-odb/Cargo.toml b/git-odb/Cargo.toml index 67981c6d82d..fb053eade2e 100644 --- a/git-odb/Cargo.toml +++ b/git-odb/Cargo.toml @@ -32,11 +32,11 @@ all-features = true [dependencies] git-features = { version = "^0.19.1", path = "../git-features", features = ["rustsha1", "walkdir", "zlib", "crc32", "bstr"] } git-hash = { version = "^0.9.2", path = "../git-hash" } +git-quote = { version = "^0.1.0", path = "../git-quote" } git-object = { version = "^0.17.1", path = "../git-object" } git-pack = { version = "^0.17.0", path = "../git-pack" } serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"]} -btoi = "0.4.2" tempfile = "3.1.0" thiserror = "1.0.26" parking_lot = { version = "0.12.0" } diff --git a/git-odb/src/alternate/mod.rs b/git-odb/src/alternate/mod.rs index 1fd6eb95a08..ffcbb16a9c2 100644 --- a/git-odb/src/alternate/mod.rs +++ b/git-odb/src/alternate/mod.rs @@ -20,8 +20,6 @@ use std::{fs, io, path::PathBuf}; /// pub mod parse; -#[allow(missing_docs)] -pub mod unquote; /// Returned by [`resolve()`] #[derive(thiserror::Error, Debug)] diff --git a/git-odb/src/alternate/parse.rs b/git-odb/src/alternate/parse.rs index 4f134c28a29..a244076fdcb 100644 --- a/git-odb/src/alternate/parse.rs +++ b/git-odb/src/alternate/parse.rs @@ -2,8 +2,6 @@ use std::{borrow::Cow, path::PathBuf}; use git_object::bstr::ByteSlice; -use crate::alternate::unquote; - /// Returned as part of [`crate::alternate::Error::Parse`] #[derive(thiserror::Error, Debug)] #[allow(missing_docs)] @@ -11,7 +9,7 @@ pub enum Error { #[error("Could not obtain an object path for the alternate directory '{}'", String::from_utf8_lossy(.0))] PathConversion(Vec), #[error("Could not unquote alternate path")] - Unquote(#[from] unquote::Error), + Unquote(#[from] git_quote::ansi_c::undo::Error), } pub(crate) fn content(input: &[u8]) -> Result, Error> { @@ -23,7 +21,7 @@ pub(crate) fn content(input: &[u8]) -> Result, Error> { } out.push( git_features::path::from_bstr(if line.starts_with(b"\"") { - unquote::ansi_c(line)? + git_quote::ansi_c::undo(line)?.0 } else { Cow::Borrowed(line) }) diff --git a/git-quote/CHANGELOG.md b/git-quote/CHANGELOG.md new file mode 100644 index 00000000000..51e396c6aa0 --- /dev/null +++ b/git-quote/CHANGELOG.md @@ -0,0 +1,31 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## 0.1.0 (2022-03-24) + +Initial release with ansi_c unquoting capability. + +### Commit Statistics + + + + - 3 commits contributed to the release. + - 0 commits where understood as [conventional](https://www.conventionalcommits.org). + - 1 unique issue was worked on: [#301](https://github.com/Byron/gitoxide/issues/301) + +### Commit Details + + + +
view details + + * **[#301](https://github.com/Byron/gitoxide/issues/301)** + - use git-quote crate in git-odb alternate parsing ([`8e49aa6`](https://github.com/Byron/gitoxide/commit/8e49aa6090c1c361e3ddd44798754c44c179ab49)) + - Add ansic::undo ([`1be8f14`](https://github.com/Byron/gitoxide/commit/1be8f14128b673ea3399bc04b0a6747de9d6d404)) + - add empty git-quote crate ([`0d1aaf0`](https://github.com/Byron/gitoxide/commit/0d1aaf00160f98e40fb92fd401c67f59da2475fc)) +
+ diff --git a/git-quote/Cargo.toml b/git-quote/Cargo.toml new file mode 100644 index 00000000000..8547a6578b5 --- /dev/null +++ b/git-quote/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "git-quote" +version = "0.1.0" +repository = "https://github.com/Byron/gitoxide" +license = "MIT/Apache-2.0" +description = "A WIP crate of the gitoxide project dealing with various quotations used by git" +authors = ["Sebastian Thiel "] +edition = "2018" + +[lib] +doctest = false + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +bstr = { version = "0.2.13", default-features = false, features = ["std"]} +quick-error = "2.0.0" +btoi = "0.4.2" diff --git a/git-odb/src/alternate/unquote.rs b/git-quote/src/ansi_c.rs similarity index 55% rename from git-odb/src/alternate/unquote.rs rename to git-quote/src/ansi_c.rs index 8c3e84e42aa..d91f93b280e 100644 --- a/git-odb/src/alternate/unquote.rs +++ b/git-quote/src/ansi_c.rs @@ -1,46 +1,57 @@ -use std::{borrow::Cow, io::Read}; - -use git_object::bstr::{BStr, BString, ByteSlice}; +pub mod undo { + use bstr::{BStr, BString}; + use quick_error::quick_error; -#[derive(thiserror::Error, Debug)] -pub enum Error { - #[error("{message}: {:?}", String::from_utf8_lossy(.input))] - InvalidInput { message: String, input: Vec }, - #[error("Invalid escaped value {byte} in input {:?}", String::from_utf8_lossy(.input))] - UnsupportedEscapeByte { byte: u8, input: Vec }, -} + quick_error! { + #[derive(Debug)] + pub enum Error { + InvalidInput { message: String, input: BString } { + display("{}: {:?}", message, input) + } + UnsupportedEscapeByte { byte: u8, input: BString } { + display("Invalid escaped value {} in input {:?}", byte, input) + } + } + } -impl Error { - fn new(message: impl ToString, input: &BStr) -> Error { - Error::InvalidInput { - message: message.to_string(), - input: input.to_vec(), + impl Error { + pub(crate) fn new(message: impl ToString, input: &BStr) -> Error { + Error::InvalidInput { + message: message.to_string(), + input: input.into(), + } } } } -/// Unquote the given ansi-c quoted `input` string. +use std::{borrow::Cow, io::Read}; + +use bstr::{BStr, BString, ByteSlice}; + +/// Unquote the given ansi-c quoted `input` string, returning it and all of the consumed bytes. /// /// The `input` is returned unaltered if it doesn't start with a `"` character to indicate -/// quotation, otherwise a new unqoted string will always be allocated. +/// quotation, otherwise a new unquoted string will always be allocated. +/// The amount of consumed bytes allow to pass strings that start with a quote, and skip all quoted text for additional processing /// /// See [the tests][tests] for quotation examples. /// /// [tests]: https://github.com/Byron/gitoxide/blob/e355b4ad133075152312816816af5ce72cf79cff/git-odb/src/alternate/unquote.rs#L110-L118 -pub fn ansi_c(input: &BStr) -> Result, Error> { +pub fn undo(input: &BStr) -> Result<(Cow<'_, BStr>, usize), undo::Error> { if !input.starts_with(b"\"") { - return Ok(input.into()); + return Ok((input.into(), input.len())); } if input.len() < 2 { - return Err(Error::new("Input must be surrounded by double quotes", input)); + return Err(undo::Error::new("Input must be surrounded by double quotes", input)); } let original = input.as_bstr(); let mut input = &input[1..]; + let mut consumed = 1; let mut out = BString::default(); - fn consume_one_past(input: &mut &BStr, position: usize) -> Result { + fn consume_one_past(input: &mut &BStr, position: usize) -> Result { *input = input .get(position + 1..) - .ok_or_else(|| Error::new("Unexpected end of input", input))? + .ok_or_else(|| undo::Error::new("Unexpected end of input", input))? .as_bstr(); let next = input[0]; *input = input.get(1..).unwrap_or_default().as_bstr(); @@ -50,10 +61,12 @@ pub fn ansi_c(input: &BStr) -> Result, Error> { match input.find_byteset(b"\"\\") { Some(position) => { out.extend_from_slice(&input[..position]); + consumed += position + 1; match input[position] { b'"' => break, b'\\' => { let next = consume_one_past(&mut input, position)?; + consumed += 1; match next { b'n' => out.push(b'\n'), b'r' => out.push(b'\r'), @@ -69,18 +82,22 @@ pub fn ansi_c(input: &BStr) -> Result, Error> { input .get(..2) .ok_or_else(|| { - Error::new("Unexpected end of input when fetching two more octal bytes", input) + undo::Error::new( + "Unexpected end of input when fetching two more octal bytes", + input, + ) })? .read_exact(&mut buf[1..]) .expect("impossible to fail as numbers match"); - let byte = btoi::btou_radix(&buf, 8).map_err(|e| Error::new(e, original))?; + let byte = btoi::btou_radix(&buf, 8).map_err(|e| undo::Error::new(e, original))?; out.push(byte); input = &input[2..]; + consumed += 2; } _ => { - return Err(Error::UnsupportedEscapeByte { + return Err(undo::Error::UnsupportedEscapeByte { byte: next, - input: original.to_vec(), + input: original.into(), }) } } @@ -90,40 +107,10 @@ pub fn ansi_c(input: &BStr) -> Result, Error> { } None => { out.extend_from_slice(input); + consumed += input.len(); break; } } } - Ok(out.into()) -} - -#[cfg(test)] -mod tests { - use git_object::bstr::ByteSlice; - - use super::*; - - macro_rules! test { - ($name:ident, $input:literal, $expected:literal) => { - #[test] - fn $name() { - assert_eq!( - ansi_c($input.as_bytes().as_bstr()).expect("valid input"), - std::borrow::Cow::Borrowed($expected.as_bytes().as_bstr()) - ); - } - }; - } - - test!(unquoted_remains_unchanged, "hello", "hello"); - test!(empty_surrounded_by_quotes, "\"\"", ""); - test!(surrounded_only_by_quotes, "\"hello\"", "hello"); - test!(typical_escapes, r#""\n\r\t""#, b"\n\r\t"); - test!(untypical_escapes, r#""\a\b\f\v""#, b"\x07\x08\x0c\x0b"); - test!(literal_escape_and_double_quote, r#""\"\\""#, br#""\"#); - test!( - unicode_byte_escapes_by_number, - r#""\346\277\261\351\207\216\t\347\264\224""#, - "濱野\t純" - ); + Ok((out.into(), consumed)) } diff --git a/git-quote/src/lib.rs b/git-quote/src/lib.rs new file mode 100644 index 00000000000..d2d266b56e0 --- /dev/null +++ b/git-quote/src/lib.rs @@ -0,0 +1,4 @@ +#![forbid(unsafe_code, rust_2018_idioms)] + +/// +pub mod ansi_c; diff --git a/git-quote/tests/quote.rs b/git-quote/tests/quote.rs new file mode 100644 index 00000000000..370f5baa1d5 --- /dev/null +++ b/git-quote/tests/quote.rs @@ -0,0 +1,48 @@ +mod ansi_c { + mod undo { + use bstr::ByteSlice; + use git_quote::ansi_c; + + macro_rules! test { + ($name:ident, $input:literal, $expected:literal, $consumed:literal) => { + #[test] + fn $name() { + assert_eq!( + ansi_c::undo($input.as_bytes().as_bstr()).expect("valid input"), + ( + std::borrow::Cow::Borrowed($expected.as_bytes().as_bstr()), + $consumed + ) + ); + } + }; + } + + test!(unquoted_remains_unchanged, "hello", "hello", 5); + test!(empty_surrounded_by_quotes, "\"\"", "", 2); + test!(surrounded_only_by_quotes, "\"hello\"", "hello", 7); + test!(typical_escapes, r#""\n\r\t""#, b"\n\r\t", 8); + test!(untypical_escapes, r#""\a\b\f\v""#, b"\x07\x08\x0c\x0b", 10); + test!(literal_escape_and_double_quote, r#""\"\\""#, br#""\"#, 6); + test!( + unicode_byte_escapes_by_number, + r#""\346\277\261\351\207\216\t\347\264\224""#, + "濱野\t純", + 40 + ); + test!( + exclamation_and_tilde_survive_an_escape_with_double_escaping, + r#""\\!\\#hello there/file.ext""#, + r"\!\#hello there/file.ext", + 28 + ); + + #[test] + fn out_of_quote_characters_can_be_passed_and_will_not_be_consumed() { + let input = br#""hello there" out of quote"#.as_bstr(); + let (unquoted, consumed) = ansi_c::undo(input).expect("valid input"); + assert_eq!(unquoted, std::borrow::Cow::Borrowed(b"hello there".as_bstr())); + assert_eq!(&input[consumed..], " out of quote"); + } + } +}