From f69890cafd08db95231a02d08fbbcb02689e99c4 Mon Sep 17 00:00:00 2001 From: Pierre Chevalier Date: Fri, 21 Mar 2025 11:30:30 +0000 Subject: [PATCH] fix: Support empty tags with or without trailing NL When representing an annotated tag with an empty commit message, we used to only support a tag ending with two newlines (one after the tagger line + one after the empty commit message). This was due to a misconception that annotated tags ending with a single NL shouldn't exist in the wild since there isn't an obvious way to create them from with `git`. This misconception shows up in the discussion thread for [issue 603](https://github.com/GitoxideLabs/gitoxide/issues/603) It turns out that both encodings of empty annotated tags appear in the wild. We must be able to parse either and roundtrip for either. Before [PR 604](https://github.com/GitoxideLabs/gitoxide/pull/604), we used to special case the empty tag msg case and not add a NL. To be able to represent `b""`, `b"\n"`, `b"\n\n"`..., we special case any `message` that is a pure sequence of `b'\n'` and actually parse the `b'\n'` into the tag's message. This allows us to calculate the correct size that matches the number of bytes that git would produce, as well as round-trip to and from the commit encoding. The existing tests (in particular `round_trip` for `empty.txt` and `empty_missing_nl.txt`) convince me that the logic is sound. Also, the size being `139` bytes for `empty_missing_nl.txt` and `140` bytes for `empty.txt` matches the output of `cat file | wc -l` for each file. --- gix-object/src/tag/decode.rs | 4 +-- gix-object/src/tag/write.rs | 16 +++++------ .../tests/fixtures/tag/empty_missing_nl.txt | 4 +++ gix-object/tests/object/encode/mod.rs | 1 + gix-object/tests/object/tag/mod.rs | 28 +++++++++++++++++-- 5 files changed, 40 insertions(+), 13 deletions(-) create mode 100644 gix-object/tests/fixtures/tag/empty_missing_nl.txt diff --git a/gix-object/src/tag/decode.rs b/gix-object/src/tag/decode.rs index 27090f85369..c30015b84a0 100644 --- a/gix-object/src/tag/decode.rs +++ b/gix-object/src/tag/decode.rs @@ -40,8 +40,8 @@ pub fn message<'a, E: ParserError<&'a [u8]>>(i: &mut &'a [u8]) -> ModalResult<(& const PGP_SIGNATURE_BEGIN: &[u8] = b"\n-----BEGIN PGP SIGNATURE-----"; const PGP_SIGNATURE_END: &[u8] = b"-----END PGP SIGNATURE-----"; - if i.is_empty() { - return Ok((b"".as_bstr(), None)); + if i.iter().all(|b| *b == b'\n') { + return i.map(|message: &[u8]| (message.as_bstr(), None)).parse_next(i); } delimited( NL, diff --git a/gix-object/src/tag/write.rs b/gix-object/src/tag/write.rs index b8ea340d190..520084f4af5 100644 --- a/gix-object/src/tag/write.rs +++ b/gix-object/src/tag/write.rs @@ -29,10 +29,10 @@ impl crate::WriteTo for Tag { encode::trusted_header_signature(b"tagger", &tagger.to_ref(), out)?; } - out.write_all(NL)?; - if !self.message.is_empty() { - out.write_all(self.message.as_ref())?; + if !self.message.iter().all(|b| *b == b'\n') { + out.write_all(NL)?; } + out.write_all(self.message.as_ref())?; if let Some(message) = &self.pgp_signature { out.write_all(NL)?; out.write_all(message.as_ref())?; @@ -52,7 +52,7 @@ impl crate::WriteTo for Tag { .tagger .as_ref() .map_or(0, |t| b"tagger".len() + 1 /* space */ + t.size() + 1 /* nl */) - + 1 /* nl */ + self.message.len() + + if self.message.iter().all(|b| *b == b'\n') { 0 } else { 1 /* nl */ } + self.message.len() + self.pgp_signature.as_ref().map_or(0, |m| 1 /* nl */ + m.len())) as u64 } } @@ -66,10 +66,10 @@ impl crate::WriteTo for TagRef<'_> { encode::trusted_header_signature(b"tagger", tagger, &mut out)?; } - out.write_all(NL)?; - if !self.message.is_empty() { - out.write_all(self.message)?; + if !self.message.iter().all(|b| *b == b'\n') { + out.write_all(NL)?; } + out.write_all(self.message)?; if let Some(message) = self.pgp_signature { out.write_all(NL)?; out.write_all(message)?; @@ -89,7 +89,7 @@ impl crate::WriteTo for TagRef<'_> { .tagger .as_ref() .map_or(0, |t| b"tagger".len() + 1 /* space */ + t.size() + 1 /* nl */) - + 1 /* nl */ + self.message.len() + + if self.message.iter().all(|b| *b == b'\n') { 0 } else { 1 /* nl */ } + self.message.len() + self.pgp_signature.as_ref().map_or(0, |m| 1 /* nl */ + m.len())) as u64 } } diff --git a/gix-object/tests/fixtures/tag/empty_missing_nl.txt b/gix-object/tests/fixtures/tag/empty_missing_nl.txt new file mode 100644 index 00000000000..42dabcc6ed6 --- /dev/null +++ b/gix-object/tests/fixtures/tag/empty_missing_nl.txt @@ -0,0 +1,4 @@ +object 01dd4e2a978a9f5bd773dae6da7aa4a5ac1cdbbc +type commit +tag empty +tagger Sebastian Thiel 1592381636 +0800 diff --git a/gix-object/tests/object/encode/mod.rs b/gix-object/tests/object/encode/mod.rs index 560e8ac5996..0752eaeba8b 100644 --- a/gix-object/tests/object/encode/mod.rs +++ b/gix-object/tests/object/encode/mod.rs @@ -74,6 +74,7 @@ mod tag { round_trip!( gix_object::Tag, gix_object::TagRef, + "tag/empty_missing_nl.txt", "tag/empty.txt", "tag/no-tagger.txt", "tag/whitespace.txt", diff --git a/gix-object/tests/object/tag/mod.rs b/gix-object/tests/object/tag/mod.rs index cb6aa857f95..801ca29a043 100644 --- a/gix-object/tests/object/tag/mod.rs +++ b/gix-object/tests/object/tag/mod.rs @@ -54,7 +54,7 @@ mod iter { Token::Name(b"empty".as_bstr()), Token::Tagger(tagger), Token::Body { - message: b"".as_bstr(), + message: b"\n".as_bstr(), pgp_signature: None, } ] @@ -155,7 +155,7 @@ fn invalid() { mod from_bytes { use gix_actor::SignatureRef; use gix_date::Time; - use gix_object::{bstr::ByteSlice, Kind, TagRef}; + use gix_object::{bstr::ByteSlice, Kind, TagRef, WriteTo}; use crate::{fixture_name, signature, tag::tag_fixture, Sign}; @@ -170,8 +170,29 @@ mod from_bytes { #[test] fn empty() -> crate::Result { + let fixture = fixture_name("tag", "empty.txt"); + let tag_ref = TagRef::from_bytes(&fixture)?; assert_eq!( - TagRef::from_bytes(&fixture_name("tag", "empty.txt"))?, + tag_ref, + TagRef { + target: b"01dd4e2a978a9f5bd773dae6da7aa4a5ac1cdbbc".as_bstr(), + name: b"empty".as_bstr(), + target_kind: Kind::Commit, + message: b"\n".as_bstr(), + tagger: Some(signature(1592381636)), + pgp_signature: None + } + ); + assert_eq!(tag_ref.size(), 140); + Ok(()) + } + + #[test] + fn empty_missing_nl() -> crate::Result { + let fixture = fixture_name("tag", "empty_missing_nl.txt"); + let tag_ref = TagRef::from_bytes(&fixture)?; + assert_eq!( + tag_ref, TagRef { target: b"01dd4e2a978a9f5bd773dae6da7aa4a5ac1cdbbc".as_bstr(), name: b"empty".as_bstr(), @@ -181,6 +202,7 @@ mod from_bytes { pgp_signature: None } ); + assert_eq!(tag_ref.size(), 139); Ok(()) }