From f86234161ad0aa733846de8ed514cf199fc23525 Mon Sep 17 00:00:00 2001 From: Kim Altintop Date: Mon, 7 Nov 2022 09:35:13 +0100 Subject: [PATCH 1/5] Add bindings to git_indexer The indexer API is a lower-level interface for storing and indexing pack files, which, unlike `git_odb_write_pack`, allows the ouput to be written to an arbitrary directory. This can be useful when working with unusual validation requirements or non-standard repository layouts. --- libgit2-sys/lib.rs | 32 +++++++++ src/indexer.rs | 166 ++++++++++++++++++++++++++++++++++++++++++++- src/lib.rs | 2 +- src/odb.rs | 8 ++- 4 files changed, 202 insertions(+), 6 deletions(-) diff --git a/libgit2-sys/lib.rs b/libgit2-sys/lib.rs index ea66159f0c..4e18aeb32f 100644 --- a/libgit2-sys/lib.rs +++ b/libgit2-sys/lib.rs @@ -25,6 +25,7 @@ pub const GIT_REFDB_BACKEND_VERSION: c_uint = 1; pub const GIT_CHERRYPICK_OPTIONS_VERSION: c_uint = 1; pub const GIT_APPLY_OPTIONS_VERSION: c_uint = 1; pub const GIT_REVERT_OPTIONS_VERSION: c_uint = 1; +pub const GIT_INDEXER_OPTIONS_VERSION: c_uint = 1; macro_rules! git_enum { (pub enum $name:ident { $($variants:tt)* }) => { @@ -91,6 +92,7 @@ pub enum git_odb_object {} pub enum git_worktree {} pub enum git_transaction {} pub enum git_mailmap {} +pub enum git_indexer {} #[repr(C)] pub struct git_revspec { @@ -354,6 +356,14 @@ pub type git_indexer_progress_cb = )] pub type git_transfer_progress = git_indexer_progress; +#[repr(C)] +pub struct git_indexer_options { + pub version: c_uint, + pub progress_cb: git_indexer_progress_cb, + pub progress_cb_payload: *mut c_void, + pub verify: c_uchar, +} + pub type git_remote_ready_cb = Option c_int>; #[repr(C)] @@ -3801,6 +3811,28 @@ extern "C" { ) -> c_int; pub fn git_packbuilder_free(pb: *mut git_packbuilder); + // indexer + pub fn git_indexer_new( + out: *mut *mut git_indexer, + path: *const c_char, + mode: c_uint, + odb: *mut git_odb, + opts: *mut git_indexer_options, + ) -> c_int; + pub fn git_indexer_append( + idx: *mut git_indexer, + data: *const c_void, + size: size_t, + stats: *mut git_indexer_progress, + ) -> c_int; + pub fn git_indexer_commit(idx: *mut git_indexer, stats: *mut git_indexer_progress) -> c_int; + #[deprecated = "use `git_indexer_name` to retrieve the filename"] + pub fn git_indexer_hash(idx: *const git_indexer) -> *const git_oid; + pub fn git_indexer_name(idx: *const git_indexer) -> *const c_char; + pub fn git_indexer_free(idx: *mut git_indexer); + + pub fn git_indexer_options_init(opts: *mut git_indexer_options, version: c_uint) -> c_int; + // odb pub fn git_repository_odb(out: *mut *mut git_odb, repo: *mut git_repository) -> c_int; pub fn git_odb_new(db: *mut *mut git_odb) -> c_int; diff --git a/src/indexer.rs b/src/indexer.rs index 1003abeb76..46c5a2508a 100644 --- a/src/indexer.rs +++ b/src/indexer.rs @@ -1,7 +1,13 @@ -use std::marker; +use std::ffi::CStr; +use std::mem::MaybeUninit; +use std::path::Path; +use std::{io, marker, mem, ptr}; -use crate::raw; +use libc::c_void; + +use crate::odb::{write_pack_progress_cb, OdbPackwriterCb}; use crate::util::Binding; +use crate::{raw, Error, IntoCString, Odb}; /// Struct representing the progress by an in-flight transfer. pub struct Progress<'a> { @@ -94,3 +100,159 @@ impl<'a> Binding for Progress<'a> { )] #[allow(dead_code)] pub type TransportProgress<'a> = IndexerProgress<'a>; + +/// A stream to write and index a packfile +/// +/// This is equivalent to [`crate::OdbPackwriter`], but allows to store the pack +/// and index at an arbitrary path. It also does not require access to an object +/// database if, and only if, the pack file is self-contained (i.e. not "thin"). +pub struct Indexer<'odb> { + raw: *mut raw::git_indexer, + progress: MaybeUninit, + progress_payload_ptr: *mut OdbPackwriterCb<'odb>, +} + +impl<'a> Indexer<'a> { + /// Create a new indexer + /// + /// The [`Odb`] is used to resolve base objects when fixing thin packs. It + /// can be `None` if no thin pack is expected, in which case missing bases + /// will result in an error. + /// + /// `mode` is the permissions to use for the output files, use `0` for defaults. + /// + /// If `verify` is `false`, the indexer will bypass object connectivity checks. + pub fn new(odb: Option<&Odb<'a>>, path: &Path, mode: u32, verify: bool) -> Result { + let path = path.into_c_string()?; + + let odb = odb.map(Binding::raw).unwrap_or_else(ptr::null_mut); + + let mut out = ptr::null_mut(); + let progress = MaybeUninit::uninit(); + let progress_cb: raw::git_indexer_progress_cb = Some(write_pack_progress_cb); + let progress_payload = Box::new(OdbPackwriterCb { cb: None }); + let progress_payload_ptr = Box::into_raw(progress_payload); + + unsafe { + let mut opts = mem::zeroed(); + try_call!(raw::git_indexer_options_init( + &mut opts, + raw::GIT_INDEXER_OPTIONS_VERSION + )); + opts.progress_cb = progress_cb; + opts.progress_cb_payload = progress_payload_ptr as *mut c_void; + opts.verify = verify.into(); + + try_call!(raw::git_indexer_new(&mut out, path, mode, odb, &mut opts)); + } + + Ok(Self { + raw: out, + progress, + progress_payload_ptr, + }) + } + + /// Finalize the pack and index + /// + /// Resolves any pending deltas and writes out the index file. The returned + /// string is the hexadecimal checksum of the packfile, which is also used + /// to name the pack and index files (`pack-.pack` and + /// `pack-.idx` respectively). + pub fn commit(mut self) -> Result { + unsafe { + try_call!(raw::git_indexer_commit( + self.raw, + self.progress.as_mut_ptr() + )); + + let name = CStr::from_ptr(raw::git_indexer_name(self.raw)); + Ok(name.to_str().expect("pack name not utf8").to_owned()) + } + } + + /// The callback through which progress is monitored. Be aware that this is + /// called inline, so performance may be affected. + pub fn progress(&mut self, cb: F) -> &mut Self + where + F: FnMut(Progress<'_>) -> bool + 'a, + { + let progress_payload = + unsafe { &mut *(self.progress_payload_ptr as *mut OdbPackwriterCb<'_>) }; + progress_payload.cb = Some(Box::new(cb) as Box>); + + self + } +} + +impl io::Write for Indexer<'_> { + fn write(&mut self, buf: &[u8]) -> io::Result { + unsafe { + let ptr = buf.as_ptr() as *mut c_void; + let len = buf.len(); + + let res = raw::git_indexer_append(self.raw, ptr, len, self.progress.as_mut_ptr()); + + if res < 0 { + Err(io::Error::new(io::ErrorKind::Other, "Write error")) + } else { + Ok(buf.len()) + } + } + } + + fn flush(&mut self) -> io::Result<()> { + Ok(()) + } +} + +impl Drop for Indexer<'_> { + fn drop(&mut self) { + unsafe { + raw::git_indexer_free(self.raw); + drop(Box::from_raw(self.progress_payload_ptr)) + } + } +} + +#[cfg(test)] +mod tests { + use crate::{Buf, Indexer}; + use std::io::prelude::*; + + #[test] + fn indexer() { + let (_td, repo_source) = crate::test::repo_init(); + let (_td, repo_target) = crate::test::repo_init(); + + let mut progress_called = false; + + // Create an in-memory packfile + let mut builder = t!(repo_source.packbuilder()); + let mut buf = Buf::new(); + let (commit_source_id, _tree) = crate::test::commit(&repo_source); + t!(builder.insert_object(commit_source_id, None)); + t!(builder.write_buf(&mut buf)); + + // Write it to the standard location in the target repo, but via indexer + let odb = repo_source.odb().unwrap(); + let mut indexer = Indexer::new( + Some(&odb), + repo_target.path().join("objects").join("pack").as_path(), + 0o644, + true, + ) + .unwrap(); + indexer.progress(|_| { + progress_called = true; + true + }); + indexer.write(&buf).unwrap(); + indexer.commit().unwrap(); + + // Assert that target repo picks it up as valid + let commit_target = repo_target.find_commit(commit_source_id).unwrap(); + assert_eq!(commit_target.id(), commit_source_id); + assert!(progress_called); + } +} diff --git a/src/lib.rs b/src/lib.rs index 40be0c4b5b..6c33ddbb58 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -98,7 +98,7 @@ pub use crate::error::Error; pub use crate::index::{ Index, IndexConflict, IndexConflicts, IndexEntries, IndexEntry, IndexMatchedPath, }; -pub use crate::indexer::{IndexerProgress, Progress}; +pub use crate::indexer::{Indexer, IndexerProgress, Progress}; pub use crate::mailmap::Mailmap; pub use crate::mempack::Mempack; pub use crate::merge::{AnnotatedCommit, MergeOptions}; diff --git a/src/odb.rs b/src/odb.rs index 66166913fe..d7ae2b9515 100644 --- a/src/odb.rs +++ b/src/odb.rs @@ -1,6 +1,7 @@ use std::io; use std::marker; use std::mem::MaybeUninit; + use std::ptr; use std::slice; @@ -10,6 +11,7 @@ use libc::{c_char, c_int, c_uint, c_void, size_t}; use crate::panic; use crate::util::Binding; + use crate::{ raw, Error, IndexerProgress, Mempack, Object, ObjectType, OdbLookupFlags, Oid, Progress, }; @@ -438,8 +440,8 @@ impl<'repo> io::Write for OdbWriter<'repo> { } } -struct OdbPackwriterCb<'repo> { - cb: Option>>, +pub(crate) struct OdbPackwriterCb<'repo> { + pub(crate) cb: Option>>, } /// A stream to write a packfile to the ODB @@ -542,7 +544,7 @@ extern "C" fn foreach_cb(id: *const raw::git_oid, payload: *mut c_void) -> c_int .unwrap_or(1) } -extern "C" fn write_pack_progress_cb( +pub(crate) extern "C" fn write_pack_progress_cb( stats: *const raw::git_indexer_progress, payload: *mut c_void, ) -> c_int { From 2e95ccbb655e0fb47ef40b95d3073f750e270e05 Mon Sep 17 00:00:00 2001 From: Kim Altintop Date: Tue, 14 Feb 2023 07:43:52 +0100 Subject: [PATCH 2/5] Remove accidental newlines --- src/odb.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/odb.rs b/src/odb.rs index d7ae2b9515..4092e58c0c 100644 --- a/src/odb.rs +++ b/src/odb.rs @@ -1,7 +1,6 @@ use std::io; use std::marker; use std::mem::MaybeUninit; - use std::ptr; use std::slice; @@ -11,7 +10,6 @@ use libc::{c_char, c_int, c_uint, c_void, size_t}; use crate::panic; use crate::util::Binding; - use crate::{ raw, Error, IndexerProgress, Mempack, Object, ObjectType, OdbLookupFlags, Oid, Progress, }; From 318bf825df4113178bfef30622e2b2689065b322 Mon Sep 17 00:00:00 2001 From: Kim Altintop Date: Tue, 14 Feb 2023 08:46:35 +0100 Subject: [PATCH 3/5] Wrap native error in io::Error when write fails Unlike for `OdbPackwriter`, we always get the native error code here. --- src/indexer.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/indexer.rs b/src/indexer.rs index 46c5a2508a..ea7fbb9ab1 100644 --- a/src/indexer.rs +++ b/src/indexer.rs @@ -192,9 +192,11 @@ impl io::Write for Indexer<'_> { let len = buf.len(); let res = raw::git_indexer_append(self.raw, ptr, len, self.progress.as_mut_ptr()); - if res < 0 { - Err(io::Error::new(io::ErrorKind::Other, "Write error")) + Err(io::Error::new( + io::ErrorKind::Other, + Error::last_error(res).unwrap(), + )) } else { Ok(buf.len()) } From a3067e79cbc1142217a78854b6f8022e195b0403 Mon Sep 17 00:00:00 2001 From: Kim Altintop Date: Tue, 14 Feb 2023 17:44:48 +0100 Subject: [PATCH 4/5] Avoid UB by actually initialising git_indexer_progress --- libgit2-sys/lib.rs | 2 +- src/indexer.rs | 13 ++++--------- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/libgit2-sys/lib.rs b/libgit2-sys/lib.rs index 4e18aeb32f..43cd1f5528 100644 --- a/libgit2-sys/lib.rs +++ b/libgit2-sys/lib.rs @@ -336,7 +336,7 @@ pub struct git_checkout_perfdata { } #[repr(C)] -#[derive(Copy, Clone)] +#[derive(Copy, Clone, Default)] pub struct git_indexer_progress { pub total_objects: c_uint, pub indexed_objects: c_uint, diff --git a/src/indexer.rs b/src/indexer.rs index ea7fbb9ab1..0aaf353d53 100644 --- a/src/indexer.rs +++ b/src/indexer.rs @@ -1,5 +1,4 @@ use std::ffi::CStr; -use std::mem::MaybeUninit; use std::path::Path; use std::{io, marker, mem, ptr}; @@ -108,7 +107,7 @@ pub type TransportProgress<'a> = IndexerProgress<'a>; /// database if, and only if, the pack file is self-contained (i.e. not "thin"). pub struct Indexer<'odb> { raw: *mut raw::git_indexer, - progress: MaybeUninit, + progress: raw::git_indexer_progress, progress_payload_ptr: *mut OdbPackwriterCb<'odb>, } @@ -128,7 +127,6 @@ impl<'a> Indexer<'a> { let odb = odb.map(Binding::raw).unwrap_or_else(ptr::null_mut); let mut out = ptr::null_mut(); - let progress = MaybeUninit::uninit(); let progress_cb: raw::git_indexer_progress_cb = Some(write_pack_progress_cb); let progress_payload = Box::new(OdbPackwriterCb { cb: None }); let progress_payload_ptr = Box::into_raw(progress_payload); @@ -148,7 +146,7 @@ impl<'a> Indexer<'a> { Ok(Self { raw: out, - progress, + progress: Default::default(), progress_payload_ptr, }) } @@ -161,10 +159,7 @@ impl<'a> Indexer<'a> { /// `pack-.idx` respectively). pub fn commit(mut self) -> Result { unsafe { - try_call!(raw::git_indexer_commit( - self.raw, - self.progress.as_mut_ptr() - )); + try_call!(raw::git_indexer_commit(self.raw, &mut self.progress)); let name = CStr::from_ptr(raw::git_indexer_name(self.raw)); Ok(name.to_str().expect("pack name not utf8").to_owned()) @@ -191,7 +186,7 @@ impl io::Write for Indexer<'_> { let ptr = buf.as_ptr() as *mut c_void; let len = buf.len(); - let res = raw::git_indexer_append(self.raw, ptr, len, self.progress.as_mut_ptr()); + let res = raw::git_indexer_append(self.raw, ptr, len, &mut self.progress); if res < 0 { Err(io::Error::new( io::ErrorKind::Other, From f36cf676bf6f96c46c400dd10f13e87b9a259b20 Mon Sep 17 00:00:00 2001 From: Kim Altintop Date: Tue, 14 Feb 2023 17:49:27 +0100 Subject: [PATCH 5/5] Avoid MaybeUninit in OdbPackwriter, too --- src/odb.rs | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/odb.rs b/src/odb.rs index 4092e58c0c..ab25c757fc 100644 --- a/src/odb.rs +++ b/src/odb.rs @@ -1,6 +1,5 @@ use std::io; use std::marker; -use std::mem::MaybeUninit; use std::ptr; use std::slice; @@ -162,7 +161,6 @@ impl<'repo> Odb<'repo> { /// Create stream for writing a pack file to the ODB pub fn packwriter(&self) -> Result, Error> { let mut out = ptr::null_mut(); - let progress = MaybeUninit::uninit(); let progress_cb: raw::git_indexer_progress_cb = Some(write_pack_progress_cb); let progress_payload = Box::new(OdbPackwriterCb { cb: None }); let progress_payload_ptr = Box::into_raw(progress_payload); @@ -178,7 +176,7 @@ impl<'repo> Odb<'repo> { Ok(OdbPackwriter { raw: out, - progress, + progress: Default::default(), progress_payload_ptr, }) } @@ -445,7 +443,7 @@ pub(crate) struct OdbPackwriterCb<'repo> { /// A stream to write a packfile to the ODB pub struct OdbPackwriter<'repo> { raw: *mut raw::git_odb_writepack, - progress: MaybeUninit, + progress: raw::git_indexer_progress, progress_payload_ptr: *mut OdbPackwriterCb<'repo>, } @@ -455,7 +453,7 @@ impl<'repo> OdbPackwriter<'repo> { unsafe { let writepack = &*self.raw; let res = match writepack.commit { - Some(commit) => commit(self.raw, self.progress.as_mut_ptr()), + Some(commit) => commit(self.raw, &mut self.progress), None => -1, }; @@ -489,7 +487,7 @@ impl<'repo> io::Write for OdbPackwriter<'repo> { let writepack = &*self.raw; let res = match writepack.append { - Some(append) => append(self.raw, ptr, len, self.progress.as_mut_ptr()), + Some(append) => append(self.raw, ptr, len, &mut self.progress), None => -1, };