diff --git a/Cargo.lock b/Cargo.lock index 3ad5297deca..7ef29d51f00 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1793,10 +1793,11 @@ dependencies = [ "git-object 0.17.1", "git-odb 0.27.0", "git-testtools", - "quick-error", + "libc", "serde", "symlink", "tempfile", + "thiserror", "walkdir", ] diff --git a/Makefile b/Makefile index 04be1f0940a..d004845649e 100644 --- a/Makefile +++ b/Makefile @@ -149,6 +149,8 @@ unit-tests: ## run all unit tests && cargo test --features "internal-testing-git-features-parallel" cd git-index && cargo test --features internal-testing-to-avoid-being-run-by-cargo-test-all \ && cargo test --features "internal-testing-git-features-parallel" + cd git-worktree && cargo test --features internal-testing-to-avoid-being-run-by-cargo-test-all \ + && cargo test --features "internal-testing-git-features-parallel" cd git-packetline && cargo test \ && cargo test --features blocking-io,maybe-async/is_sync --test blocking-packetline \ && cargo test --features "async-io" --test async-packetline diff --git a/etc/check-package-size.sh b/etc/check-package-size.sh index 1e01333e66c..482ba5aea78 100755 --- a/etc/check-package-size.sh +++ b/etc/check-package-size.sh @@ -19,7 +19,7 @@ echo "in root: gitoxide CLI" (enter cargo-smart-release && indent cargo diet -n --package-size-limit 85KB) (enter git-actor && indent cargo diet -n --package-size-limit 5KB) (enter git-index && indent cargo diet -n --package-size-limit 30KB) -(enter git-worktree && indent cargo diet -n --package-size-limit 15KB) +(enter git-worktree && indent cargo diet -n --package-size-limit 20KB) (enter git-revision && indent cargo diet -n --package-size-limit 10KB) (enter git-bitmap && indent cargo diet -n --package-size-limit 5KB) (enter git-tempfile && indent cargo diet -n --package-size-limit 25KB) @@ -36,7 +36,7 @@ echo "in root: gitoxide CLI" (enter git-object && indent cargo diet -n --package-size-limit 25KB) (enter git-commitgraph && indent cargo diet -n --package-size-limit 25KB) (enter git-pack && indent cargo diet -n --package-size-limit 115KB) -(enter git-odb && indent cargo diet -n --package-size-limit 115KB) +(enter git-odb && indent cargo diet -n --package-size-limit 120KB) (enter git-protocol && indent cargo diet -n --package-size-limit 50KB) (enter git-packetline && indent cargo diet -n --package-size-limit 35KB) (enter git-repository && indent cargo diet -n --package-size-limit 80KB) diff --git a/git-features/src/interrupt.rs b/git-features/src/interrupt.rs index d689e3915ea..f217ae0a697 100644 --- a/git-features/src/interrupt.rs +++ b/git-features/src/interrupt.rs @@ -4,15 +4,54 @@ use std::{ sync::atomic::{AtomicBool, Ordering}, }; +/// A wrapper for an inner iterator which will check for interruptions on each iteration, stopping the iteration when +/// that is requested. +pub struct Iter<'a, I> { + /// The actual iterator to yield elements from. + pub inner: I, + should_interrupt: &'a AtomicBool, +} + +impl<'a, I> Iter<'a, I> +where + I: Iterator, +{ + /// Create a new iterator over `inner` which checks for interruptions on each iteration on `should_interrupt`. + /// + /// Note that this means the consumer of the iterator data should also be able to access `should_interrupt` and + /// consider it when producing the final result to avoid claiming success even though the operation is only partially + /// complete. + pub fn new(inner: I, should_interrupt: &'a AtomicBool) -> Self { + Iter { + inner, + should_interrupt, + } + } +} + +impl<'a, I> Iterator for Iter<'a, I> +where + I: Iterator, +{ + type Item = I::Item; + + fn next(&mut self) -> Option { + if self.should_interrupt.load(Ordering::Relaxed) { + return None; + } + self.inner.next() + } +} + /// A wrapper for an inner iterator which will check for interruptions on each iteration. -pub struct Iter<'a, I, EFN> { +pub struct IterWithErr<'a, I, EFN> { /// The actual iterator to yield elements from. pub inner: I, make_err: Option, should_interrupt: &'a AtomicBool, } -impl<'a, I, EFN, E> Iter<'a, I, EFN> +impl<'a, I, EFN, E> IterWithErr<'a, I, EFN> where I: Iterator, EFN: FnOnce() -> E, @@ -20,7 +59,7 @@ where /// Create a new iterator over `inner` which checks for interruptions on each iteration and cals `make_err()` to /// signal an interruption happened, causing no further items to be iterated from that point on. pub fn new(inner: I, make_err: EFN, should_interrupt: &'a AtomicBool) -> Self { - Iter { + IterWithErr { inner, make_err: Some(make_err), should_interrupt, @@ -28,7 +67,7 @@ where } } -impl<'a, I, EFN, E> Iterator for Iter<'a, I, EFN> +impl<'a, I, EFN, E> Iterator for IterWithErr<'a, I, EFN> where I: Iterator, EFN: FnOnce() -> E, @@ -38,7 +77,7 @@ where fn next(&mut self) -> Option { self.make_err.as_ref()?; if self.should_interrupt.load(Ordering::Relaxed) { - return Some(Err(self.make_err.take().expect("no bug")())); + return self.make_err.take().map(|f| Err(f())); } match self.inner.next() { Some(next) => Some(Ok(next)), diff --git a/git-features/src/lib.rs b/git-features/src/lib.rs index 12563ceda5e..37220b29b70 100644 --- a/git-features/src/lib.rs +++ b/git-features/src/lib.rs @@ -35,3 +35,35 @@ pub mod zlib; /// #[cfg(feature = "time")] pub mod time; + +/// +pub mod util { + /// An iterator over chunks of input, producing `Vec` with a size of `size`, with the last chunk being the remainder and thus + /// potentially smaller than `size`. + pub struct Chunks { + /// The inner iterator to ask for items. + pub inner: I, + /// The size of chunks to produce + pub size: usize, + } + + impl Iterator for Chunks + where + I: Iterator, + { + type Item = Vec; + + fn next(&mut self) -> Option { + let mut res = Vec::with_capacity(self.size); + let mut items_left = self.size; + for item in &mut self.inner { + res.push(item); + items_left -= 1; + if items_left == 0 { + break; + } + } + (!res.is_empty()).then(|| res) + } + } +} diff --git a/git-features/src/parallel/in_parallel.rs b/git-features/src/parallel/in_parallel.rs index 7eab2087662..8925a780e44 100644 --- a/git-features/src/parallel/in_parallel.rs +++ b/git-features/src/parallel/in_parallel.rs @@ -13,7 +13,8 @@ pub fn join(left: impl FnOnce() -> O1 + Send, right: impl Fn /// Runs `f` with a scope to be used for spawning threads that will not outlive the function call. /// That way it's possible to handle threads without needing the 'static lifetime for data they interact with. /// -/// Note that the threads should not rely on actual parallelism as threading might be turned off entirely. +/// Note that the threads should not rely on actual parallelism as threading might be turned off entirely, hence should not +/// connect each other with channels as deadlock would occour in single-threaded mode. pub fn threads<'env, F, R>(f: F) -> std::thread::Result where F: FnOnce(&crossbeam_utils::thread::Scope<'env>) -> R, diff --git a/git-features/src/parallel/reduce.rs b/git-features/src/parallel/reduce.rs index 3a700317a32..f9992cfd23d 100644 --- a/git-features/src/parallel/reduce.rs +++ b/git-features/src/parallel/reduce.rs @@ -10,7 +10,7 @@ mod stepped { receive_result: std::sync::mpsc::Receiver, /// `join()` will be called on these guards to assure every thread tries to send through a closed channel. When /// that happens, they break out of their loops. - _threads: Vec>, + threads: Vec>, /// The reducer is called only in the thread using the iterator, dropping it has no side effects. reducer: Option, } @@ -21,7 +21,7 @@ mod stepped { drop(std::mem::replace(&mut self.receive_result, sink)); let mut last_err = None; - for handle in std::mem::take(&mut self._threads) { + for handle in std::mem::take(&mut self.threads) { if let Err(err) = handle.join() { last_err = Some(err); }; @@ -82,7 +82,7 @@ mod stepped { receive_result }; Stepwise { - _threads: threads, + threads, receive_result, reducer: Some(reducer), } diff --git a/git-odb/src/cache.rs b/git-odb/src/cache.rs index b5f3162e379..a61fa6763ca 100644 --- a/git-odb/src/cache.rs +++ b/git-odb/src/cache.rs @@ -1,3 +1,4 @@ +use std::rc::Rc; use std::{ cell::RefCell, ops::{Deref, DerefMut}, @@ -16,6 +17,27 @@ pub type ObjectCache = dyn git_pack::cache::Object + Send + 'static; /// A constructor for boxed object caches. pub type NewObjectCacheFn = dyn Fn() -> Box + Send + Sync + 'static; +impl Cache>> { + /// Convert this cache's handle into one that keeps its store in an arc. This creates an entirely new store, + /// so should be done early to avoid unnecessary work (and mappings). + pub fn into_arc(self) -> std::io::Result>>> { + let inner = self.inner.into_arc()?; + Ok(Cache { + inner, + new_pack_cache: self.new_pack_cache, + new_object_cache: self.new_object_cache, + pack_cache: self.pack_cache, + object_cache: self.object_cache, + }) + } +} +impl Cache>> { + /// No op, as we are containing an arc handle already. + pub fn into_arc(self) -> std::io::Result>>> { + Ok(self) + } +} + impl Cache { /// Dissolve this instance, discard all caches, and return the inner implementation. pub fn into_inner(self) -> S { diff --git a/git-odb/src/store_impls/dynamic/handle.rs b/git-odb/src/store_impls/dynamic/handle.rs index e2fb27124d1..33ae0239f3b 100644 --- a/git-odb/src/store_impls/dynamic/handle.rs +++ b/git-odb/src/store_impls/dynamic/handle.rs @@ -1,3 +1,5 @@ +use std::convert::{TryFrom, TryInto}; +use std::rc::Rc; use std::{ cell::RefCell, ops::Deref, @@ -324,6 +326,39 @@ where } } +impl TryFrom<&super::Store> for super::Store { + type Error = std::io::Error; + + fn try_from(s: &super::Store) -> Result { + super::Store::at_opts( + s.path(), + crate::store::init::Options { + slots: crate::store::init::Slots::Given(s.files.len().try_into().expect("BUG: too many slots")), + object_hash: Default::default(), + use_multi_pack_index: false, + }, + ) + } +} + +impl super::Handle> { + /// Convert a ref counted store into one that is ref-counted and thread-safe, by creating a new Store. + pub fn into_arc(self) -> std::io::Result>> { + let store = Arc::new(super::Store::try_from(self.store_ref())?); + let mut cache = store.to_handle_arc(); + cache.refresh = self.refresh; + cache.max_recursion_depth = self.max_recursion_depth; + Ok(cache) + } +} + +impl super::Handle> { + /// Convert a ref counted store into one that is ref-counted and thread-safe, by creating a new Store + pub fn into_arc(self) -> std::io::Result>> { + Ok(self) + } +} + impl Clone for super::Handle where S: Deref + Clone, diff --git a/git-pack/src/data/output/count/objects/mod.rs b/git-pack/src/data/output/count/objects/mod.rs index 91a83f0598f..e5e3cfcac16 100644 --- a/git-pack/src/data/output/count/objects/mod.rs +++ b/git-pack/src/data/output/count/objects/mod.rs @@ -61,8 +61,8 @@ where thread_limit, None, ); - let chunks = util::Chunks { - iter: objects_ids, + let chunks = git_features::util::Chunks { + inner: objects_ids, size: chunk_size, }; let seen_objs = dashmap::DashSet::::default(); diff --git a/git-pack/src/data/output/count/objects/util.rs b/git-pack/src/data/output/count/objects/util.rs index a6d85a32c0d..ea8a519fd10 100644 --- a/git-pack/src/data/output/count/objects/util.rs +++ b/git-pack/src/data/output/count/objects/util.rs @@ -22,28 +22,3 @@ mod trait_impls { } } } - -pub struct Chunks { - pub size: usize, - pub iter: I, -} - -impl Iterator for Chunks -where - I: Iterator, -{ - type Item = Vec; - - fn next(&mut self) -> Option { - let mut res = Vec::with_capacity(self.size); - let mut items_left = self.size; - for item in &mut self.iter { - res.push(item); - items_left -= 1; - if items_left == 0 { - break; - } - } - (!res.is_empty()).then(|| res) - } -} diff --git a/git-repository/src/interrupt.rs b/git-repository/src/interrupt.rs index 95957fc06d2..39fd259f7fa 100644 --- a/git-repository/src/interrupt.rs +++ b/git-repository/src/interrupt.rs @@ -62,7 +62,7 @@ pub use init::init_handler; /// A wrapper for an inner iterator which will check for interruptions on each iteration. pub struct Iter { /// The actual iterator to yield elements from. - inner: git_features::interrupt::Iter<'static, I, EFN>, + inner: git_features::interrupt::IterWithErr<'static, I, EFN>, } impl Iter @@ -74,7 +74,7 @@ where /// signal an interruption happened, causing no further items to be iterated from that point on. pub fn new(inner: I, make_err: EFN) -> Self { Iter { - inner: git_features::interrupt::Iter::new(inner, make_err, &IS_INTERRUPTED), + inner: git_features::interrupt::IterWithErr::new(inner, make_err, &IS_INTERRUPTED), } } diff --git a/git-repository/tests/easy/id.rs b/git-repository/tests/easy/id.rs index 7330784630e..88f46e4c628 100644 --- a/git-repository/tests/easy/id.rs +++ b/git-repository/tests/easy/id.rs @@ -12,11 +12,7 @@ fn prefix() -> crate::Result { // TODO: do this in-memory (with or without writing to disk) assert!( - std::process::Command::new("git") - .current_dir(worktree_dir.path()) - .args(["config", "--int", "core.abbrev", "5"]) - .status()? - .success(), + git_testtools::run_git(worktree_dir.path(), &["config", "--int", "core.abbrev", "5"])?.success(), "set core abbrev value successfully" ); diff --git a/git-worktree/Cargo.toml b/git-worktree/Cargo.toml index 9f93704ff69..f2a9d49573e 100644 --- a/git-worktree/Cargo.toml +++ b/git-worktree/Cargo.toml @@ -10,12 +10,24 @@ edition = "2018" [lib] doctest = false -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[[test]] +name = "multi-threaded" +path = "tests/worktree-multi-threaded.rs" +required-features = ["internal-testing-git-features-parallel"] + +[[test]] +name = "single-threaded" +path = "tests/worktree-single-threaded.rs" +required-features = ["internal-testing-to-avoid-being-run-by-cargo-test-all"] + [features] ## Data structures implement `serde::Serialize` and `serde::Deserialize`. serde1 = [ "serde", "bstr/serde1", "git-index/serde1", "git-hash/serde1", "git-object/serde1" ] +internal-testing-git-features-parallel = ["git-features/parallel"] +internal-testing-to-avoid-being-run-by-cargo-test-all = [] + [dependencies] git-index = { version = "^0.1.0", path = "../git-index" } git-hash = { version = "^0.9.0", path = "../git-hash" } @@ -24,15 +36,18 @@ git-features = { version = "^0.19.1", path = "../git-features" } serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"]} -quick-error = "2.0.1" +thiserror = "1.0.26" bstr = { version = "0.2.13", default-features = false } document-features = { version = "0.2.0", optional = true } -symlink = "0.1.0" + +[target.'cfg(unix)'.dependencies] +libc = "0.2.119" [dev-dependencies] git-testtools = { path = "../tests/tools" } git-odb = { path = "../git-odb" } +symlink = "0.1.0" walkdir = "2.3.2" tempfile = "3.2.0" diff --git a/git-worktree/src/fs.rs b/git-worktree/src/fs.rs index e301fb386df..c9baf6e185b 100644 --- a/git-worktree/src/fs.rs +++ b/git-worktree/src/fs.rs @@ -96,10 +96,10 @@ impl Capabilities { } let res = std::fs::symlink_metadata(&link_path).map(|m| m.is_symlink()); - let cleanup = std::fs::remove_file(&src_path); - crate::os::remove_symlink(&link_path) - .or_else(|_| std::fs::remove_file(&link_path)) - .and(cleanup)?; + + let cleanup = crate::os::remove_symlink(&link_path).or_else(|_| std::fs::remove_file(&link_path)); + std::fs::remove_file(&src_path).and(cleanup)?; + res } } diff --git a/git-worktree/src/index/checkout.rs b/git-worktree/src/index/checkout.rs index 9241326fba8..81098247040 100644 --- a/git-worktree/src/index/checkout.rs +++ b/git-worktree/src/index/checkout.rs @@ -1,5 +1,4 @@ use bstr::BString; -use quick_error::quick_error; use std::path::PathBuf; /// A cache for directory creation to reduce the amount of stat calls when creating @@ -37,6 +36,7 @@ pub struct PathCache { mod cache { use super::PathCache; + use crate::os; use std::path::{Path, PathBuf}; impl PathCache { @@ -82,9 +82,9 @@ mod cache { } } - // TODO: handle valid state properly, handle _mode. for _ in 0..self.valid_components - matching_components { self.valid.pop(); + self.valid_relative.pop(); } self.valid_components = matching_components; @@ -94,34 +94,44 @@ mod cache { self.valid.push(comp); self.valid_relative.push(comp); self.valid_components += 1; - if components.peek().is_some() || target_is_dir { - #[cfg(debug_assertions)] - { - self.test_mkdir_calls += 1; - } - match std::fs::create_dir(&self.valid) { - Ok(()) => {} - Err(err) if err.kind() == std::io::ErrorKind::AlreadyExists => { - let meta = self.valid.symlink_metadata()?; - if !meta.is_dir() { - if self.unlink_on_collision { - if meta.is_symlink() { - symlink::remove_symlink_auto(&self.valid)?; + let res = (|| { + if components.peek().is_some() || target_is_dir { + #[cfg(debug_assertions)] + { + self.test_mkdir_calls += 1; + } + match std::fs::create_dir(&self.valid) { + Ok(()) => {} + Err(err) if err.kind() == std::io::ErrorKind::AlreadyExists => { + let meta = self.valid.symlink_metadata()?; + if !meta.is_dir() { + if self.unlink_on_collision { + if meta.is_symlink() { + os::remove_symlink(&self.valid)?; + } else { + std::fs::remove_file(&self.valid)?; + } + #[cfg(debug_assertions)] + { + self.test_mkdir_calls += 1; + } + std::fs::create_dir(&self.valid)?; } else { - std::fs::remove_file(&self.valid)?; - } - #[cfg(debug_assertions)] - { - self.test_mkdir_calls += 1; + return Err(err); } - std::fs::create_dir(&self.valid)?; - continue; } - return Err(err); } + Err(err) => return Err(err), } - Err(err) => return Err(err), } + Ok(()) + })(); + + if let Err(err) = res { + self.valid.pop(); + self.valid_relative.pop(); + self.valid_components -= 1; + return Err(err); } } @@ -146,6 +156,10 @@ pub struct ErrorRecord { } pub struct Outcome { + /// The amount of files updated, or created. + pub files_updated: usize, + /// The amount of bytes written to disk, + pub bytes_written: u64, pub collisions: Vec, pub errors: Vec, } @@ -154,6 +168,10 @@ pub struct Outcome { pub struct Options { /// capabilities of the file system pub fs: crate::fs::Capabilities, + /// If set, don't use more than this amount of threads. + /// Otherwise, usually use as many threads as there are logical cores. + /// A value of 0 is interpreted as no-limit + pub thread_limit: Option, /// If true, we assume no file to exist in the target directory, and want exclusive access to it. /// This should be enabled when cloning to avoid checks for freshness of files. This also enables /// detection of collisions based on whether or not exclusive file creation succeeds or fails. @@ -184,6 +202,7 @@ impl Default for Options { fn default() -> Self { Options { fs: Default::default(), + thread_limit: None, destination_is_initially_empty: false, keep_going: false, trust_ctime: true, @@ -192,25 +211,19 @@ impl Default for Options { } } } - -quick_error! { - #[derive(Debug)] - pub enum Error { - IllformedUtf8{ path: BString } { - display("Could not convert path to UTF8: {}", path) - } - Time(err: std::time::SystemTimeError) { - from() - source(err) - display("The clock was off when reading file related metadata after updating a file on disk") - } - Io(err: std::io::Error) { - from() - source(err) - display("IO error while writing blob or reading file metadata or changing filetype") - } - ObjectNotFound{ oid: git_hash::ObjectId, path: std::path::PathBuf } { - display("object {} for checkout at {} not found in object database", oid.to_hex(), path.display()) - } - } +#[derive(Debug, thiserror::Error)] +pub enum Error { + #[error("Could not convert path to UTF8: {}", .path)] + IllformedUtf8 { path: BString }, + #[error("The clock was off when reading file related metadata after updating a file on disk")] + Time(#[from] std::time::SystemTimeError), + #[error("IO error while writing blob or reading file metadata or changing filetype")] + Io(#[from] std::io::Error), + #[error("object {} for checkout at {} could not be retrieved from object database", .oid.to_hex(), .path.display())] + Find { + #[source] + err: E, + oid: git_hash::ObjectId, + path: std::path::PathBuf, + }, } diff --git a/git-worktree/src/index/entry.rs b/git-worktree/src/index/entry.rs index 1aad218b075..ddce5579b37 100644 --- a/git-worktree/src/index/entry.rs +++ b/git-worktree/src/index/entry.rs @@ -1,14 +1,15 @@ +use std::path::Path; use std::{convert::TryInto, fs::OpenOptions, io::Write, time::Duration}; use bstr::BStr; use git_hash::oid; use git_index::Entry; -use crate::index; use crate::index::checkout::PathCache; +use crate::{index, os}; #[cfg_attr(not(unix), allow(unused_variables))] -pub fn checkout( +pub fn checkout( entry: &mut Entry, entry_path: &BStr, find: &mut Find, @@ -20,58 +21,74 @@ pub fn checkout( .. }, destination_is_initially_empty, + overwrite_existing, .. }: index::checkout::Options, buf: &mut Vec, -) -> Result +) -> Result> where - Find: for<'a> FnMut(&oid, &'a mut Vec) -> Option>, + Find: for<'a> FnMut(&oid, &'a mut Vec) -> Result, E>, + E: std::error::Error + Send + Sync + 'static, { - let dest = cache.append_relative_path_assure_leading_dir( + let dest_relative = git_features::path::from_byte_slice(entry_path).map_err(|_| index::checkout::Error::IllformedUtf8 { path: entry_path.to_owned(), - })?, - entry.mode, - )?; + })?; + let dest = cache.append_relative_path_assure_leading_dir(dest_relative, entry.mode)?; let object_size = match entry.mode { git_index::entry::Mode::FILE | git_index::entry::Mode::FILE_EXECUTABLE => { - let obj = find(&entry.id, buf).ok_or_else(|| index::checkout::Error::ObjectNotFound { + let obj = find(&entry.id, buf).map_err(|err| index::checkout::Error::Find { + err, oid: entry.id, path: dest.to_path_buf(), })?; - let mut options = OpenOptions::new(); - options - .create_new(destination_is_initially_empty) - .create(!destination_is_initially_empty) - .write(true); + + #[cfg_attr(not(unix), allow(unused_mut))] + let mut options = open_options(dest, destination_is_initially_empty, overwrite_existing); + let needs_executable_bit = executable_bit && entry.mode == git_index::entry::Mode::FILE_EXECUTABLE; #[cfg(unix)] - if executable_bit && entry.mode == git_index::entry::Mode::FILE_EXECUTABLE { + if needs_executable_bit && destination_is_initially_empty { use std::os::unix::fs::OpenOptionsExt; + // Note that these only work if the file was newly created, but won't if it's already + // existing, possibly without the executable bit set. Thus we do this only if the file is new. options.mode(0o777); } - let mut file = options.open(&dest)?; + let mut file = try_write_or_unlink(dest, overwrite_existing, |p| options.open(p))?; file.write_all(obj.data)?; + + // For possibly existing, overwritten files, we must change the file mode explicitly. + #[cfg(unix)] + if needs_executable_bit && !destination_is_initially_empty { + use std::os::unix::fs::PermissionsExt; + let mut perm = std::fs::symlink_metadata(&dest)?.permissions(); + perm.set_mode(0o777); + std::fs::set_permissions(&dest, perm)?; + } // NOTE: we don't call `file.sync_all()` here knowing that some filesystems don't handle this well. // revisit this once there is a bug to fix. update_fstat(entry, file.metadata()?)?; obj.data.len() } git_index::entry::Mode::SYMLINK => { - let obj = find(&entry.id, buf).ok_or_else(|| index::checkout::Error::ObjectNotFound { + let obj = find(&entry.id, buf).map_err(|err| index::checkout::Error::Find { + err, oid: entry.id, path: dest.to_path_buf(), })?; let symlink_destination = git_features::path::from_byte_slice(obj.data) .map_err(|_| index::checkout::Error::IllformedUtf8 { path: obj.data.into() })?; - // TODO: how to deal with mode changes? Maybe this info can be passed once we check for whether - // a checkout is needed at all. if symlink { - crate::os::create_symlink(symlink_destination, dest)?; + try_write_or_unlink(dest, overwrite_existing, |p| { + crate::os::create_symlink(symlink_destination, p) + })?; } else { - std::fs::write(&dest, obj.data)?; + try_write_or_unlink(dest, overwrite_existing, |p| { + open_options(p, destination_is_initially_empty, overwrite_existing).open(&dest) + })? + .write_all(obj.data)?; } update_fstat(entry, std::fs::symlink_metadata(&dest)?)?; @@ -84,7 +101,77 @@ where Ok(object_size) } -fn update_fstat(entry: &mut Entry, meta: std::fs::Metadata) -> Result<(), index::checkout::Error> { +/// Note that this works only because we assume to not race ourselves when symlinks are involved, and we do this by +/// delaying symlink creation to the end and will always do that sequentially. +/// It's still possible to fall for a race if other actors create symlinks in our path, but that's nothing to defend against. +fn try_write_or_unlink( + path: &Path, + overwrite_existing: bool, + op: impl Fn(&Path) -> std::io::Result, +) -> std::io::Result { + if overwrite_existing { + match op(path) { + Ok(res) => Ok(res), + Err(err) if os::indicates_collision(&err) => { + try_unlink_path_recursively(path, &std::fs::symlink_metadata(path)?)?; + op(path) + } + Err(err) => Err(err), + } + } else { + op(path) + } +} + +fn try_unlink_path_recursively(path: &Path, path_meta: &std::fs::Metadata) -> std::io::Result<()> { + if path_meta.is_dir() { + std::fs::remove_dir_all(path) + } else if path_meta.is_symlink() { + os::remove_symlink(path) + } else { + std::fs::remove_file(path) + } +} + +#[cfg(not(debug_assertions))] +fn debug_assert_dest_is_no_symlink(_path: &Path) {} + +/// This is a debug assertion as we expect the machinery calling this to prevent this possibility in the first place +#[cfg(debug_assertions)] +fn debug_assert_dest_is_no_symlink(path: &Path) { + if let Ok(meta) = path.metadata() { + debug_assert!( + !meta.is_symlink(), + "BUG: should not ever allow to overwrite/write-into the target of a symbolic link: {}", + path.display() + ); + } +} + +fn open_options(path: &Path, destination_is_initially_empty: bool, overwrite_existing: bool) -> OpenOptions { + if overwrite_existing || !destination_is_initially_empty { + debug_assert_dest_is_no_symlink(path); + } + let mut options = OpenOptions::new(); + options + .create_new(destination_is_initially_empty && !overwrite_existing) + .create(!destination_is_initially_empty || overwrite_existing) + .write(true); + #[cfg(unix)] + { + /// Make sure that it's impossible to follow through to the target of symlinks. + /// Note that this will still follow symlinks in the path, which is what we assume + /// has been checked separately. + use std::os::unix::fs::OpenOptionsExt; + options.custom_flags(libc::O_NOFOLLOW); + } + options +} + +fn update_fstat(entry: &mut Entry, meta: std::fs::Metadata) -> Result<(), index::checkout::Error> +where + E: std::error::Error + Send + Sync + 'static, +{ let ctime = meta .created() .map_or(Ok(Duration::default()), |x| x.duration_since(std::time::UNIX_EPOCH))?; diff --git a/git-worktree/src/index/mod.rs b/git-worktree/src/index/mod.rs index f3c281def9b..1e28919e09e 100644 --- a/git-worktree/src/index/mod.rs +++ b/git-worktree/src/index/mod.rs @@ -1,58 +1,267 @@ +use git_features::parallel::in_parallel; use git_features::progress::Progress; +use git_features::{interrupt, progress}; use git_hash::oid; +use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; -use crate::index; -use crate::index::checkout::{ErrorRecord, PathCache}; +use crate::index::checkout::PathCache; pub mod checkout; pub(crate) mod entry; -pub fn checkout( +/// Note that interruption still produce an `Ok(…)` value, so the caller should look at `should_interrupt` to communicate the outcome. +pub fn checkout( index: &mut git_index::State, dir: impl Into, - mut find: Find, + find: Find, files: &mut impl Progress, bytes: &mut impl Progress, + should_interrupt: &AtomicBool, options: checkout::Options, -) -> Result +) -> Result> where - Find: for<'a> FnMut(&oid, &'a mut Vec) -> Option>, + Find: for<'a> FnMut(&oid, &'a mut Vec) -> Result, E> + Send + Clone, + E: std::error::Error + Send + Sync + 'static, { - if !options.destination_is_initially_empty { - todo!("deal with non-clone checkouts") + let num_files = AtomicUsize::default(); + let dir = dir.into(); + + let mut ctx = chunk::Context { + buf: Vec::new(), + path_cache: { + let mut cache = PathCache::new(dir.clone()); + cache.unlink_on_collision = options.overwrite_existing; + cache + }, + find: find.clone(), + options, + num_files: &num_files, + }; + let (chunk_size, thread_limit, num_threads) = git_features::parallel::optimize_chunk_size_and_thread_limit( + 100, + index.entries().len().into(), + options.thread_limit, + None, + ); + + let entries_with_paths = interrupt::Iter::new(index.entries_mut_with_paths(), should_interrupt); + let chunk::Outcome { + mut collisions, + mut errors, + mut bytes_written, + delayed, + } = if num_threads == 1 { + chunk::process(entries_with_paths, files, bytes, &mut ctx)? + } else { + in_parallel( + git_features::util::Chunks { + inner: entries_with_paths, + size: chunk_size, + }, + thread_limit, + { + let num_files = &num_files; + move |_| { + ( + progress::Discard, + progress::Discard, + chunk::Context { + find: find.clone(), + path_cache: { + let mut cache = PathCache::new(dir.clone()); + cache.unlink_on_collision = options.overwrite_existing; + cache + }, + buf: Vec::new(), + options, + num_files, + }, + ) + } + }, + |chunk, (files, bytes, ctx)| chunk::process(chunk.into_iter(), files, bytes, ctx), + chunk::Reduce { + files, + bytes, + num_files: &num_files, + aggregate: Default::default(), + marker: Default::default(), + }, + )? + }; + + for (entry, entry_path) in delayed { + bytes_written += chunk::checkout_entry_handle_result( + entry, + entry_path, + &mut errors, + &mut collisions, + files, + bytes, + &mut ctx, + )? as u64; } - use std::io::ErrorKind::AlreadyExists; - let mut path_cache = PathCache::new(dir.into()); - path_cache.unlink_on_collision = options.overwrite_existing; + Ok(checkout::Outcome { + files_updated: ctx.num_files.load(Ordering::Relaxed), + collisions, + errors, + bytes_written, + }) +} + +mod chunk { + use bstr::BStr; + use git_features::progress::Progress; + use git_hash::oid; + use std::sync::atomic::{AtomicUsize, Ordering}; + + use crate::index::{checkout, checkout::PathCache, entry}; + use crate::{index, os}; + + mod reduce { + use crate::index::checkout; + use git_features::progress::Progress; + use std::marker::PhantomData; + use std::sync::atomic::{AtomicUsize, Ordering}; - let mut buf = Vec::new(); - let mut collisions = Vec::new(); - let mut errors = Vec::new(); + pub struct Reduce<'a, 'entry, P1, P2, E> { + pub files: &'a mut P1, + pub bytes: &'a mut P2, + pub num_files: &'a AtomicUsize, + pub aggregate: super::Outcome<'entry>, + pub marker: PhantomData, + } + + impl<'a, 'entry, P1, P2, E> git_features::parallel::Reduce for Reduce<'a, 'entry, P1, P2, E> + where + P1: Progress, + P2: Progress, + E: std::error::Error + Send + Sync + 'static, + { + type Input = Result, checkout::Error>; + type FeedProduce = (); + type Output = super::Outcome<'entry>; + type Error = checkout::Error; + + fn feed(&mut self, item: Self::Input) -> Result { + let item = item?; + let super::Outcome { + bytes_written, + delayed, + errors, + collisions, + } = item; + self.aggregate.bytes_written += bytes_written; + self.aggregate.delayed.extend(delayed); + self.aggregate.errors.extend(errors); + self.aggregate.collisions.extend(collisions); - for (entry, entry_path) in index.entries_mut_with_paths() { - // TODO: write test for that - if entry.flags.contains(git_index::entry::Flags::SKIP_WORKTREE) { - files.inc(); - continue; + self.bytes.set(self.aggregate.bytes_written as usize); + self.files.set(self.num_files.load(Ordering::Relaxed)); + + Ok(()) + } + + fn finalize(self) -> Result { + Ok(self.aggregate) + } } + } + pub use reduce::Reduce; - let res = entry::checkout(entry, entry_path, &mut find, &mut path_cache, options, &mut buf); + #[derive(Default)] + pub struct Outcome<'a> { + pub collisions: Vec, + pub errors: Vec, + pub delayed: Vec<(&'a mut git_index::Entry, &'a BStr)>, + pub bytes_written: u64, + } + + pub struct Context<'a, Find> { + pub find: Find, + pub path_cache: PathCache, + pub buf: Vec, + pub options: checkout::Options, + /// We keep these shared so that there is the chance for printing numbers that aren't looking like + /// multiple of chunk sizes. Purely cosmetic. Otherwise it's the same as `files`. + pub num_files: &'a AtomicUsize, + } + + pub fn process<'entry, Find, E>( + entries_with_paths: impl Iterator, + files: &mut impl Progress, + bytes: &mut impl Progress, + ctx: &mut Context<'_, Find>, + ) -> Result, checkout::Error> + where + Find: for<'a> FnMut(&oid, &'a mut Vec) -> Result, E>, + E: std::error::Error + Send + Sync + 'static, + { + let mut delayed = Vec::new(); + let mut collisions = Vec::new(); + let mut errors = Vec::new(); + let mut bytes_written = 0; + + for (entry, entry_path) in entries_with_paths { + // TODO: write test for that + if entry.flags.contains(git_index::entry::Flags::SKIP_WORKTREE) { + files.inc(); + continue; + } + + // Symlinks always have to be delayed on windows as they have to point to something that exists on creation. + // And even if not, there is a distinction between file and directory symlinks, hence we have to check what the target is + // before creating it. + // And to keep things sane, we just do the same on non-windows as well which is similar to what git does and adds some safety + // around writing through symlinks (even though we handle this). + // This also means that we prefer content in files over symlinks in case of collisions, which probably is for the better, too. + if entry.mode == git_index::entry::Mode::SYMLINK { + delayed.push((entry, entry_path)); + continue; + } + + bytes_written += + checkout_entry_handle_result(entry, entry_path, &mut errors, &mut collisions, files, bytes, ctx)? + as u64; + } + + Ok(Outcome { + bytes_written, + errors, + collisions, + delayed, + }) + } + + pub fn checkout_entry_handle_result( + entry: &mut git_index::Entry, + entry_path: &BStr, + errors: &mut Vec, + collisions: &mut Vec, + files: &mut impl Progress, + bytes: &mut impl Progress, + Context { + find, + path_cache, + buf, + options, + num_files, + }: &mut Context<'_, Find>, + ) -> Result> + where + Find: for<'a> FnMut(&oid, &'a mut Vec) -> Result, E>, + E: std::error::Error + Send + Sync + 'static, + { + let res = entry::checkout(entry, entry_path, find, path_cache, *options, buf); files.inc(); + num_files.fetch_add(1, Ordering::SeqCst); match res { - Ok(object_size) => bytes.inc_by(object_size), - #[cfg(windows)] - Err(index::checkout::Error::Io(err)) - if err.kind() == AlreadyExists || err.kind() == std::io::ErrorKind::PermissionDenied => - { - collisions.push(checkout::Collision { - path: entry_path.into(), - error_kind: err.kind(), - }); + Ok(object_size) => { + bytes.inc_by(object_size); + Ok(object_size) } - // TODO: use ::IsDirectory as well when stabilized instead of raw_os_error() - #[cfg(not(windows))] - Err(index::checkout::Error::Io(err)) if err.kind() == AlreadyExists || err.raw_os_error() == Some(21) => { + Err(index::checkout::Error::Io(err)) if os::indicates_collision(&err) => { // We are here because a file existed or was blocked by a directory which shouldn't be possible unless // we are on a file insensitive file system. files.fail(format!("{}: collided ({:?})", entry_path, err.kind())); @@ -60,18 +269,19 @@ where path: entry_path.into(), error_kind: err.kind(), }); + Ok(0) } Err(err) => { if options.keep_going { - errors.push(ErrorRecord { + errors.push(checkout::ErrorRecord { path: entry_path.into(), error: Box::new(err), }); + Ok(0) } else { - return Err(err); + Err(err) } } } } - Ok(checkout::Outcome { collisions, errors }) } diff --git a/git-worktree/src/os.rs b/git-worktree/src/os.rs index ba66fbd04c9..620162e57e0 100644 --- a/git-worktree/src/os.rs +++ b/git-worktree/src/os.rs @@ -1,4 +1,5 @@ use std::io; +use std::io::ErrorKind::AlreadyExists; use std::path::Path; #[cfg(not(windows))] @@ -11,9 +12,18 @@ pub fn remove_symlink(path: &Path) -> io::Result<()> { std::fs::remove_file(path) } +// TODO: use the `symlink` crate once it can delete directory symlinks #[cfg(windows)] pub fn remove_symlink(path: &Path) -> io::Result<()> { - symlink::remove_symlink_auto(path) + if let Ok(meta) = std::fs::metadata(path) { + if meta.is_file() { + std::fs::remove_file(path) // this removes the link itself + } else { + std::fs::remove_dir(path) // however, this sees the destination directory, which isn't the right thing actually + } + } else { + std::fs::remove_file(path).or_else(|_| std::fs::remove_dir(path)) + } } #[cfg(windows)] @@ -26,3 +36,17 @@ pub fn create_symlink(original: &Path, link: &Path) -> io::Result<()> { symlink_file(original, link) } } + +#[cfg(not(windows))] +pub fn indicates_collision(err: &std::io::Error) -> bool { + // TODO: use ::IsDirectory as well when stabilized instead of raw_os_error(), and ::FileSystemLoop respectively + err.kind() == AlreadyExists + || err.raw_os_error() == Some(21) + || err.raw_os_error() == Some(62) // no-follow on symlnk on mac-os + || err.raw_os_error() == Some(40) // no-follow on symlnk on ubuntu +} + +#[cfg(windows)] +pub fn indicates_collision(err: &std::io::Error) -> bool { + err.kind() == AlreadyExists || err.kind() == std::io::ErrorKind::PermissionDenied +} diff --git a/git-worktree/tests/fixtures/make_dangerous_symlink.sh b/git-worktree/tests/fixtures/make_dangerous_symlink.sh new file mode 100644 index 00000000000..c538a80dbdf --- /dev/null +++ b/git-worktree/tests/fixtures/make_dangerous_symlink.sh @@ -0,0 +1,25 @@ +#!/bin/bash +set -eu -o pipefail + +git init -q +git config commit.gpgsign false + +# Every symlink is dangerous as it might either link to another directory and thus redirect +# all writes in the path, or it might point to a file and opening the symlink actually opens +# the target. +# We handle this by either validating symlinks specifically or create symlinks +empty_oid=$(git hash-object -w --stdin executable +echo -n "content" > executable chmod +x executable mkdir dir -echo "other content" > dir/content +echo -n "other content" > dir/content mkdir dir/sub-dir (cd dir/sub-dir && ln -sf ../content symlink) diff --git a/git-worktree/tests/index/mod.rs b/git-worktree/tests/index/mod.rs deleted file mode 100644 index 1bb2e53dc6f..00000000000 --- a/git-worktree/tests/index/mod.rs +++ /dev/null @@ -1,351 +0,0 @@ -mod checkout { - mod cache { - use git_index::entry::Mode; - use git_worktree::index::checkout::PathCache; - use std::path::Path; - use tempfile::{tempdir, TempDir}; - - #[test] - fn root_is_assumed_to_exist_and_files_in_root_do_not_create_directory() { - let dir = tempdir().unwrap(); - let mut cache = PathCache::new(dir.path().join("non-existing-root")); - assert_eq!(cache.test_mkdir_calls, 0); - - let path = cache - .append_relative_path_assure_leading_dir("hello", Mode::FILE) - .unwrap(); - assert!(!path.parent().unwrap().exists(), "prefix itself is never created"); - assert_eq!(cache.test_mkdir_calls, 0); - } - - #[test] - fn directory_paths_are_created_in_full() { - let (mut cache, _tmp) = new_cache(); - - for (name, mode) in &[ - ("dir", Mode::DIR), - ("submodule", Mode::COMMIT), - ("file", Mode::FILE), - ("exe", Mode::FILE_EXECUTABLE), - ("link", Mode::SYMLINK), - ] { - let path = cache - .append_relative_path_assure_leading_dir(Path::new("dir").join(name), *mode) - .unwrap(); - assert!(path.parent().unwrap().is_dir(), "dir exists"); - } - - assert_eq!(cache.test_mkdir_calls, 3); - } - - #[test] - fn existing_directories_are_fine() { - let (mut cache, tmp) = new_cache(); - std::fs::create_dir(tmp.path().join("dir")).unwrap(); - - let path = cache - .append_relative_path_assure_leading_dir("dir/file", Mode::FILE) - .unwrap(); - assert!(path.parent().unwrap().is_dir(), "directory is still present"); - assert!(!path.exists(), "it won't create the file"); - assert_eq!(cache.test_mkdir_calls, 1); - } - - #[test] - fn symlinks_or_files_in_path_are_forbidden_or_unlinked_when_forced() { - let (mut cache, tmp) = new_cache(); - let forbidden = tmp.path().join("forbidden"); - std::fs::create_dir(&forbidden).unwrap(); - symlink::symlink_dir(&forbidden, tmp.path().join("link-to-dir")).unwrap(); - std::fs::write(tmp.path().join("file-in-dir"), &[]).unwrap(); - - for dirname in &["link-to-dir", "file-in-dir"] { - cache.unlink_on_collision = false; - let relative_path = format!("{}/file", dirname); - assert_eq!( - cache - .append_relative_path_assure_leading_dir(&relative_path, Mode::FILE) - .unwrap_err() - .kind(), - std::io::ErrorKind::AlreadyExists - ); - cache.unlink_on_collision = true; - - let path = cache - .append_relative_path_assure_leading_dir(&relative_path, Mode::FILE) - .unwrap(); - assert!(path.parent().unwrap().is_dir(), "directory was forcefully created"); - assert!(!path.exists()); - } - assert_eq!(cache.test_mkdir_calls, 4); - } - - fn new_cache() -> (PathCache, TempDir) { - let dir = tempdir().unwrap(); - let cache = PathCache::new(dir.path()); - (cache, dir) - } - } - - use bstr::ByteVec; - #[cfg(unix)] - use std::os::unix::prelude::MetadataExt; - use std::{ - fs, - io::ErrorKind, - path::{Path, PathBuf}, - }; - - use git_features::progress; - use git_object::bstr::ByteSlice; - use git_odb::FindExt; - use git_worktree::{fs::Capabilities, index, index::checkout::Collision}; - use tempfile::TempDir; - - use crate::fixture_path; - - #[test] - fn symlinks_become_files_if_disabled() -> crate::Result { - let opts = opts_with_symlink(false); - let (source_tree, destination, _index, outcome) = - checkout_index_in_tmp_dir(opts, "make_mixed_without_submodules")?; - - assert_equality(&source_tree, &destination, opts.fs.symlink)?; - assert!(outcome.collisions.is_empty()); - Ok(()) - } - - #[test] - fn allow_symlinks() -> crate::Result { - let opts = opts_with_symlink(true); - if !probe_gitoxide_dir()?.symlink { - eprintln!("IGNORING symlink test on file system without symlink support"); - // skip if symlinks aren't supported anyway. - return Ok(()); - }; - let (source_tree, destination, _index, outcome) = - checkout_index_in_tmp_dir(opts, "make_mixed_without_submodules")?; - - assert_equality(&source_tree, &destination, opts.fs.symlink)?; - assert!(outcome.collisions.is_empty()); - Ok(()) - } - - #[test] - fn keep_going_collects_results() { - let mut opts = opts_with_symlink(true); - opts.keep_going = true; - let mut count = 0; - let (_source_tree, destination, _index, outcome) = - checkout_index_in_tmp_dir_prep_dest(opts, "make_mixed_without_submodules", |_id| { - if count < 2 { - count += 1; - false - } else { - true - } - }) - .unwrap(); - - assert_eq!( - stripped_prefix(&destination, &dir_structure(&destination)), - paths(["empty", "executable"]), - "some files could not be created" - ); - - assert!(outcome.collisions.is_empty()); - assert_eq!( - outcome - .errors - .into_iter() - .map(|r| Vec::from(r.path).into_path_buf_lossy()) - .collect::>(), - paths(["dir/content", "dir/sub-dir/symlink"]) - ); - } - - #[test] - fn no_case_related_collisions_on_case_sensitive_filesystem() { - if probe_gitoxide_dir().unwrap().ignore_case { - eprintln!("Skipping case-sensitive testing on what would be a case-insenstive file system"); - return; - } - let opts = opts_with_symlink(true); - let (source_tree, destination, index, outcome) = - checkout_index_in_tmp_dir(opts, "make_ignorecase_collisions").unwrap(); - - let num_files = assert_equality(&source_tree, &destination, opts.fs.symlink).unwrap(); - assert_eq!(num_files, index.entries().len(), "it checks out all files"); - assert!(outcome.collisions.is_empty()); - } - - #[test] - fn collisions_are_detected_on_a_case_sensitive_filesystem() { - let fs_caps = probe_gitoxide_dir().unwrap(); - if !fs_caps.ignore_case { - eprintln!("Skipping case-insensitive testing on what would be a case-senstive file system"); - return; - } - let opts = opts_with_symlink(fs_caps.symlink); - let (source_tree, destination, _index, outcome) = - checkout_index_in_tmp_dir(opts, "make_ignorecase_collisions").unwrap(); - - let source_files = dir_structure(&source_tree); - assert_eq!( - stripped_prefix(&source_tree, &source_files), - paths(["d", "file_x", "link-to-X", "x"]), - "plenty of collisions prevent a checkout" - ); - - let dest_files = dir_structure(&destination); - assert_eq!( - stripped_prefix(&destination, &dest_files), - paths(["D/B", "D/C", "FILE_X", "X", "link-to-X"]), - "we checkout files in order and generally handle collision detection differently, hence the difference" - ); - - let error_kind = ErrorKind::AlreadyExists; - #[cfg(windows)] - let error_kind_dir = ErrorKind::PermissionDenied; - #[cfg(not(windows))] - let error_kind_dir = error_kind; - - assert_eq!( - outcome.collisions, - vec![ - Collision { - path: "FILE_x".into(), - error_kind, - }, - Collision { - path: "d".into(), - error_kind: error_kind_dir, - }, - Collision { - path: "file_X".into(), - error_kind, - }, - Collision { - path: "file_x".into(), - error_kind, - }, - Collision { - path: "x".into(), - error_kind, - }, - ], - "these files couldn't be checked out" - ); - } - - fn assert_equality(source_tree: &Path, destination: &TempDir, allow_symlinks: bool) -> crate::Result { - let source_files = dir_structure(source_tree); - let worktree_files = dir_structure(&destination); - - assert_eq!( - stripped_prefix(source_tree, &source_files), - stripped_prefix(&destination, &worktree_files), - ); - - let mut count = 0; - for (source_file, worktree_file) in source_files.iter().zip(worktree_files.iter()) { - count += 1; - if !allow_symlinks && source_file.is_symlink() { - assert!(!worktree_file.is_symlink()); - assert_eq!(fs::read(worktree_file)?.to_path()?, fs::read_link(source_file)?); - } else { - assert_eq!(fs::read(source_file)?, fs::read(worktree_file)?); - #[cfg(unix)] - assert_eq!( - fs::symlink_metadata(source_file)?.mode() & 0o700, - fs::symlink_metadata(worktree_file)?.mode() & 0o700, - "permissions of source and checked out file are comparable" - ); - } - } - Ok(count) - } - - pub fn dir_structure>(path: P) -> Vec { - let path = path.as_ref(); - let mut files: Vec<_> = walkdir::WalkDir::new(path) - .follow_links(false) - .into_iter() - .filter_entry(|e| e.path() == path || !e.file_name().to_string_lossy().starts_with('.')) - .flatten() - .filter_map(|e| (!e.path().is_dir()).then(|| e.path().to_path_buf())) - .collect(); - files.sort(); - files - } - - fn checkout_index_in_tmp_dir( - opts: index::checkout::Options, - name: &str, - ) -> crate::Result<( - PathBuf, - TempDir, - git_index::File, - git_worktree::index::checkout::Outcome, - )> { - checkout_index_in_tmp_dir_prep_dest(opts, name, |_d| true) - } - - fn checkout_index_in_tmp_dir_prep_dest( - opts: index::checkout::Options, - name: &str, - mut allow_return_object: impl FnMut(&git_hash::oid) -> bool, - ) -> crate::Result<( - PathBuf, - TempDir, - git_index::File, - git_worktree::index::checkout::Outcome, - )> { - let source_tree = fixture_path(name); - let git_dir = source_tree.join(".git"); - let mut index = git_index::File::at(git_dir.join("index"), Default::default())?; - let odb = git_odb::at(git_dir.join("objects"))?; - let destination = tempfile::tempdir()?; - - let outcome = index::checkout( - &mut index, - destination.path(), - move |oid, buf| { - if allow_return_object(oid) { - odb.find_blob(oid, buf).ok() - } else { - None - } - }, - &mut progress::Discard, - &mut progress::Discard, - opts, - )?; - Ok((source_tree, destination, index, outcome)) - } - - fn stripped_prefix(prefix: impl AsRef, source_files: &[PathBuf]) -> Vec<&Path> { - source_files.iter().flat_map(|p| p.strip_prefix(&prefix)).collect() - } - - fn probe_gitoxide_dir() -> crate::Result { - Ok(git_worktree::fs::Capabilities::probe( - std::env::current_dir()?.join("..").join(".git"), - )) - } - - fn opts_with_symlink(symlink: bool) -> index::checkout::Options { - index::checkout::Options { - fs: git_worktree::fs::Capabilities { - symlink, - ..Default::default() - }, - destination_is_initially_empty: true, - ..Default::default() - } - } - - fn paths<'a>(p: impl IntoIterator) -> Vec { - p.into_iter().map(PathBuf::from).collect() - } -} diff --git a/git-worktree/tests/worktree-multi-threaded.rs b/git-worktree/tests/worktree-multi-threaded.rs new file mode 100644 index 00000000000..5a53f9c26f6 --- /dev/null +++ b/git-worktree/tests/worktree-multi-threaded.rs @@ -0,0 +1,2 @@ +mod worktree; +use worktree::*; diff --git a/git-worktree/tests/worktree-single-threaded.rs b/git-worktree/tests/worktree-single-threaded.rs new file mode 100644 index 00000000000..39b8dea85be --- /dev/null +++ b/git-worktree/tests/worktree-single-threaded.rs @@ -0,0 +1,4 @@ +#[cfg(not(feature = "internal-testing-git-features-parallel"))] +mod worktree; +#[cfg(not(feature = "internal-testing-git-features-parallel"))] +use worktree::*; diff --git a/git-worktree/tests/fs/mod.rs b/git-worktree/tests/worktree/fs/mod.rs similarity index 100% rename from git-worktree/tests/fs/mod.rs rename to git-worktree/tests/worktree/fs/mod.rs diff --git a/git-worktree/tests/worktree/index/checkout.rs b/git-worktree/tests/worktree/index/checkout.rs new file mode 100644 index 00000000000..6b2de51c025 --- /dev/null +++ b/git-worktree/tests/worktree/index/checkout.rs @@ -0,0 +1,529 @@ +mod cache { + use git_index::entry::Mode; + use git_worktree::index::checkout::PathCache; + use std::path::Path; + use tempfile::{tempdir, TempDir}; + + #[test] + fn root_is_assumed_to_exist_and_files_in_root_do_not_create_directory() { + let dir = tempdir().unwrap(); + let mut cache = PathCache::new(dir.path().join("non-existing-root")); + assert_eq!(cache.test_mkdir_calls, 0); + + let path = cache + .append_relative_path_assure_leading_dir("hello", Mode::FILE) + .unwrap(); + assert!(!path.parent().unwrap().exists(), "prefix itself is never created"); + assert_eq!(cache.test_mkdir_calls, 0); + } + + #[test] + fn directory_paths_are_created_in_full() { + let (mut cache, _tmp) = new_cache(); + + for (name, mode) in &[ + ("dir", Mode::DIR), + ("submodule", Mode::COMMIT), + ("file", Mode::FILE), + ("exe", Mode::FILE_EXECUTABLE), + ("link", Mode::SYMLINK), + ] { + let path = cache + .append_relative_path_assure_leading_dir(Path::new("dir").join(name), *mode) + .unwrap(); + assert!(path.parent().unwrap().is_dir(), "dir exists"); + } + + assert_eq!(cache.test_mkdir_calls, 3); + } + + #[test] + fn existing_directories_are_fine() { + let (mut cache, tmp) = new_cache(); + std::fs::create_dir(tmp.path().join("dir")).unwrap(); + + let path = cache + .append_relative_path_assure_leading_dir("dir/file", Mode::FILE) + .unwrap(); + assert!(path.parent().unwrap().is_dir(), "directory is still present"); + assert!(!path.exists(), "it won't create the file"); + assert_eq!(cache.test_mkdir_calls, 1); + } + + #[test] + fn symlinks_or_files_in_path_are_forbidden_or_unlinked_when_forced() { + let (mut cache, tmp) = new_cache(); + let forbidden = tmp.path().join("forbidden"); + std::fs::create_dir(&forbidden).unwrap(); + symlink::symlink_dir(&forbidden, tmp.path().join("link-to-dir")).unwrap(); + std::fs::write(tmp.path().join("file-in-dir"), &[]).unwrap(); + + for dirname in &["file-in-dir", "link-to-dir"] { + cache.unlink_on_collision = false; + let relative_path = format!("{}/file", dirname); + assert_eq!( + cache + .append_relative_path_assure_leading_dir(&relative_path, Mode::FILE) + .unwrap_err() + .kind(), + std::io::ErrorKind::AlreadyExists + ); + } + assert_eq!( + cache.test_mkdir_calls, 2, + "it tries to create each directory once, but it's a file" + ); + cache.test_mkdir_calls = 0; + for dirname in &["link-to-dir", "file-in-dir"] { + cache.unlink_on_collision = true; + let relative_path = format!("{}/file", dirname); + let path = cache + .append_relative_path_assure_leading_dir(&relative_path, Mode::FILE) + .unwrap(); + assert!(path.parent().unwrap().is_dir(), "directory was forcefully created"); + assert!(!path.exists()); + } + assert_eq!( + cache.test_mkdir_calls, 4, + "like before, but it unlinks what's there and tries again" + ); + } + + fn new_cache() -> (PathCache, TempDir) { + let dir = tempdir().unwrap(); + let cache = PathCache::new(dir.path()); + (cache, dir) + } +} + +#[cfg(unix)] +use std::os::unix::prelude::MetadataExt; +use std::{ + fs, + io::ErrorKind, + path::{Path, PathBuf}, +}; + +use git_features::progress; +use git_object::bstr::ByteSlice; +use git_odb::FindExt; +use git_worktree::{fs::Capabilities, index, index::checkout::Collision}; +use std::io::ErrorKind::AlreadyExists; +use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; +use tempfile::TempDir; + +use crate::fixture_path; + +#[test] +fn accidental_writes_through_symlinks_are_prevented_if_overwriting_is_forbidden() { + let mut opts = opts_from_probe(); + // without overwrite mode, everything is safe. + opts.overwrite_existing = false; + let (source_tree, destination, _index, outcome) = + checkout_index_in_tmp_dir(opts, "make_dangerous_symlink").unwrap(); + + let source_files = dir_structure(&source_tree); + let worktree_files = dir_structure(&destination); + + if opts.fs.ignore_case { + assert_eq!( + stripped_prefix(&source_tree, &source_files), + stripped_prefix(&destination, &worktree_files), + ); + assert_eq!( + outcome.collisions, + vec![ + Collision { + path: "FAKE-DIR".into(), + error_kind: AlreadyExists + }, + Collision { + path: "FAKE-FILE".into(), + error_kind: AlreadyExists + } + ] + ); + } else { + let expected = ["A-dir/a", "A-file", "FAKE-DIR", "FAKE-FILE", "fake-dir/b", "fake-file"]; + assert_eq!(stripped_prefix(&source_tree, &source_files), paths(expected)); + assert_eq!(stripped_prefix(&destination, &worktree_files), paths(expected)); + assert!(outcome.collisions.is_empty()); + }; +} + +#[test] +fn writes_through_symlinks_are_prevented_even_if_overwriting_is_allowed() { + let mut opts = opts_from_probe(); + // with overwrite mode + opts.overwrite_existing = true; + let (source_tree, destination, _index, outcome) = + checkout_index_in_tmp_dir(opts, "make_dangerous_symlink").unwrap(); + + let source_files = dir_structure(&source_tree); + let worktree_files = dir_structure(&destination); + + if opts.fs.ignore_case { + assert_eq!( + stripped_prefix(&source_tree, &source_files), + paths(["A-dir/a", "A-file", "fake-dir/b", "fake-file"]), + ); + assert_eq!( + stripped_prefix(&destination, &worktree_files), + paths(["A-dir/a", "A-file", "FAKE-DIR", "FAKE-FILE"]), + ); + assert!(outcome.collisions.is_empty()); + } else { + let expected = ["A-dir/a", "A-file", "FAKE-DIR", "FAKE-FILE", "fake-dir/b", "fake-file"]; + assert_eq!(stripped_prefix(&source_tree, &source_files), paths(expected)); + assert_eq!(stripped_prefix(&destination, &worktree_files), paths(expected)); + assert!(outcome.collisions.is_empty()); + }; +} + +#[test] +fn overwriting_files_and_lone_directories_works() { + let mut opts = opts_from_probe(); + opts.overwrite_existing = true; + opts.destination_is_initially_empty = false; + let (_source_tree, destination, _index, outcome) = checkout_index_in_tmp_dir_opts( + opts, + "make_mixed_without_submodules", + |_| true, + |d| { + let empty = d.join("empty"); + symlink::symlink_dir(d.join(".."), &empty)?; // empty is symlink to the directory above + std::fs::write(d.join("executable"), b"foo")?; // executable is regular file and has different content + let dir = d.join("dir"); + std::fs::create_dir(&dir)?; + std::fs::create_dir(dir.join("content"))?; // 'content' is a directory now + + let dir = dir.join("sub-dir"); + std::fs::create_dir(&dir)?; + + symlink::symlink_dir(empty, dir.join("symlink"))?; // 'symlink' is a symlink to another file + Ok(()) + }, + ) + .unwrap(); + + assert!(outcome.collisions.is_empty()); + + assert_eq!( + stripped_prefix(&destination, &dir_structure(&destination)), + paths(["dir/content", "dir/sub-dir/symlink", "empty", "executable"]) + ); + let meta = std::fs::symlink_metadata(destination.path().join("empty")).unwrap(); + assert!(meta.is_file(), "'empty' is now a file"); + assert_eq!(meta.len(), 0, "'empty' is indeed empty"); + + let exe = destination.path().join("executable"); + assert_eq!( + std::fs::read(&exe).unwrap(), + b"content", + "'exe' has the correct content" + ); + + let meta = std::fs::symlink_metadata(exe).unwrap(); + assert!(meta.is_file()); + if opts.fs.executable_bit { + #[cfg(unix)] + assert_eq!(meta.mode() & 0o700, 0o700, "the executable bit is set where supported"); + } + + assert_eq!( + std::fs::read(destination.path().join("dir/content")).unwrap(), + b"other content" + ); + + let symlink = destination.path().join("dir/sub-dir/symlink"); + // on windows, git won't create symlinks as its probe won't detect the capability, even though we do. + assert_eq!(std::fs::symlink_metadata(&symlink).unwrap().is_symlink(), cfg!(unix)); + assert_eq!(std::fs::read(symlink).unwrap(), b"other content"); +} + +#[test] +fn symlinks_become_files_if_disabled() -> crate::Result { + let mut opts = opts_from_probe(); + opts.fs.symlink = false; + let (source_tree, destination, _index, outcome) = checkout_index_in_tmp_dir(opts, "make_mixed_without_submodules")?; + + assert_equality(&source_tree, &destination, opts.fs.symlink)?; + assert!(outcome.collisions.is_empty()); + Ok(()) +} + +#[test] +fn allow_or_disallow_symlinks() -> crate::Result { + let mut opts = opts_from_probe(); + for allowed in &[false, true] { + opts.fs.symlink = *allowed; + let (source_tree, destination, _index, outcome) = + checkout_index_in_tmp_dir(opts, "make_mixed_without_submodules")?; + + assert_equality(&source_tree, &destination, opts.fs.symlink)?; + assert!(outcome.collisions.is_empty()); + } + Ok(()) +} + +#[test] +fn keep_going_collects_results() { + let mut opts = opts_from_probe(); + opts.keep_going = true; + let count = AtomicUsize::default(); + let (_source_tree, destination, _index, outcome) = checkout_index_in_tmp_dir_opts( + opts, + "make_mixed_without_submodules", + |_id| { + if let Ok(_) = count.fetch_update(Ordering::SeqCst, Ordering::SeqCst, |current| { + (current < 2).then(|| current + 1) + }) { + false + } else { + true + } + }, + |_| Ok(()), + ) + .unwrap(); + + assert_eq!( + sort_when_threaded( + outcome + .errors + .iter() + .map(|r| r.path.to_path_lossy().into_owned()) + .collect() + ), + sort_when_threaded(paths(if cfg!(unix) { + ["dir/content", "empty"] + } else { + // not actually a symlink anymore, even though symlinks are supported but git think differently. + ["dir/content", "dir/sub-dir/symlink"] + })) + ); + + assert_eq!( + stripped_prefix(&destination, &dir_structure(&destination)), + paths(if cfg!(unix) { + ["dir/sub-dir/symlink", "executable"] + } else { + ["empty", "executable"] + }), + "some files could not be created" + ); + + assert!(outcome.collisions.is_empty()); +} + +#[test] +fn no_case_related_collisions_on_case_sensitive_filesystem() { + let opts = opts_from_probe(); + if opts.fs.ignore_case { + eprintln!("Skipping case-sensitive testing on what would be a case-insensitive file system"); + return; + } + let (source_tree, destination, index, outcome) = + checkout_index_in_tmp_dir(opts, "make_ignorecase_collisions").unwrap(); + + assert!(outcome.collisions.is_empty()); + let num_files = assert_equality(&source_tree, &destination, opts.fs.symlink).unwrap(); + assert_eq!(num_files, index.entries().len(), "it checks out all files"); +} + +#[test] +fn collisions_are_detected_on_a_case_insensitive_filesystem() { + let opts = opts_from_probe(); + if !opts.fs.ignore_case { + eprintln!("Skipping case-insensitive testing on what would be a case-sensitive file system"); + return; + } + let (source_tree, destination, _index, outcome) = + checkout_index_in_tmp_dir(opts, "make_ignorecase_collisions").unwrap(); + + let source_files = dir_structure(&source_tree); + assert_eq!( + stripped_prefix(&source_tree, &source_files), + paths(["d", "file_x", "link-to-X", "x"]), + "plenty of collisions prevent a checkout" + ); + + let dest_files = dir_structure(&destination); + let multi_threaded = git_features::parallel::num_threads(None) > 1; + if multi_threaded { + assert_eq!( + dest_files.len(), + 5, + "can only assert on number as it's racily creating files so unclear which one clashes" + ); + } else { + assert_eq!( + stripped_prefix(&destination, &dest_files), + paths(["D/B", "D/C", "FILE_X", "X", "link-to-X"]), + "we checkout files in order and generally handle collision detection differently, hence the difference" + ); + } + + let error_kind = ErrorKind::AlreadyExists; + #[cfg(windows)] + let error_kind_dir = ErrorKind::PermissionDenied; + #[cfg(not(windows))] + let error_kind_dir = error_kind; + + if multi_threaded { + assert_eq!( + outcome.collisions.len(), + 5, + "can only assert on number as it's racily creating files so unclear which one clashes" + ); + } else { + assert_eq!( + outcome.collisions, + vec![ + Collision { + path: "FILE_x".into(), + error_kind, + }, + Collision { + path: "d".into(), + error_kind: error_kind_dir, + }, + Collision { + path: "file_X".into(), + error_kind, + }, + Collision { + path: "file_x".into(), + error_kind, + }, + Collision { + path: "x".into(), + error_kind, + }, + ], + "these files couldn't be checked out" + ); + } +} + +fn assert_equality(source_tree: &Path, destination: &TempDir, allow_symlinks: bool) -> crate::Result { + let source_files = dir_structure(source_tree); + let worktree_files = dir_structure(&destination); + + assert_eq!( + stripped_prefix(source_tree, &source_files), + stripped_prefix(&destination, &worktree_files), + ); + + let mut count = 0; + for (source_file, worktree_file) in source_files.iter().zip(worktree_files.iter()) { + count += 1; + if !allow_symlinks && source_file.is_symlink() { + assert!(!worktree_file.is_symlink()); + assert_eq!(fs::read(worktree_file)?.to_path()?, fs::read_link(source_file)?); + } else { + assert_eq!(fs::read(source_file)?, fs::read(worktree_file)?); + #[cfg(unix)] + assert_eq!( + fs::symlink_metadata(source_file)?.mode() & 0o700, + fs::symlink_metadata(worktree_file)?.mode() & 0o700, + "permissions of source and checked out file are comparable" + ); + } + } + Ok(count) +} + +pub fn dir_structure>(path: P) -> Vec { + let path = path.as_ref(); + let mut files: Vec<_> = walkdir::WalkDir::new(path) + .follow_links(false) + .into_iter() + .filter_entry(|e| e.path() == path || !e.file_name().to_string_lossy().starts_with('.')) + .flatten() + .filter_map(|e| (!e.path().symlink_metadata().map_or(true, |m| m.is_dir())).then(|| e.path().to_path_buf())) + .collect(); + files.sort(); + files +} + +fn checkout_index_in_tmp_dir( + opts: index::checkout::Options, + name: &str, +) -> crate::Result<( + PathBuf, + TempDir, + git_index::File, + git_worktree::index::checkout::Outcome, +)> { + checkout_index_in_tmp_dir_opts(opts, name, |_d| true, |_| Ok(())) +} + +fn checkout_index_in_tmp_dir_opts( + opts: index::checkout::Options, + name: &str, + mut allow_return_object: impl FnMut(&git_hash::oid) -> bool + Send + Clone, + prep_dest: impl Fn(&Path) -> std::io::Result<()>, +) -> crate::Result<( + PathBuf, + TempDir, + git_index::File, + git_worktree::index::checkout::Outcome, +)> { + let source_tree = fixture_path(name); + let git_dir = source_tree.join(".git"); + let mut index = git_index::File::at(git_dir.join("index"), Default::default())?; + let odb = git_odb::at(git_dir.join("objects"))?.into_inner().into_arc()?; + let destination = tempfile::tempdir_in(std::env::current_dir()?)?; + prep_dest(destination.path())?; + + let outcome = index::checkout( + &mut index, + destination.path(), + move |oid, buf| { + if allow_return_object(oid) { + odb.find_blob(oid, buf) + } else { + Err(git_odb::find::existing_object::Error::NotFound { oid: oid.to_owned() }) + } + }, + &mut progress::Discard, + &mut progress::Discard, + &AtomicBool::default(), + opts, + )?; + Ok((source_tree, destination, index, outcome)) +} + +fn stripped_prefix(prefix: impl AsRef, source_files: &[PathBuf]) -> Vec<&Path> { + source_files.iter().flat_map(|p| p.strip_prefix(&prefix)).collect() +} + +fn probe_gitoxide_dir() -> crate::Result { + Ok(git_worktree::fs::Capabilities::probe( + std::env::current_dir()?.join("..").join(".git"), + )) +} + +fn opts_from_probe() -> index::checkout::Options { + index::checkout::Options { + fs: probe_gitoxide_dir().unwrap(), + destination_is_initially_empty: true, + thread_limit: git_features::parallel::num_threads(None).into(), + ..Default::default() + } +} + +fn paths<'a>(p: impl IntoIterator) -> Vec { + p.into_iter().map(PathBuf::from).collect() +} + +fn sort_when_threaded(mut p: Vec) -> Vec +where + T: Ord, +{ + if git_features::parallel::num_threads(None) > 1 { + p.sort() + } + p +} diff --git a/git-worktree/tests/worktree/index/mod.rs b/git-worktree/tests/worktree/index/mod.rs new file mode 100644 index 00000000000..24370dce48a --- /dev/null +++ b/git-worktree/tests/worktree/index/mod.rs @@ -0,0 +1 @@ +mod checkout; diff --git a/git-worktree/tests/worktree.rs b/git-worktree/tests/worktree/mod.rs similarity index 75% rename from git-worktree/tests/worktree.rs rename to git-worktree/tests/worktree/mod.rs index ad199f619be..2a9fb1aecc0 100644 --- a/git-worktree/tests/worktree.rs +++ b/git-worktree/tests/worktree/mod.rs @@ -1,9 +1,8 @@ -use std::path::{Path, PathBuf}; - mod fs; mod index; -type Result = std::result::Result>; +use std::path::{Path, PathBuf}; +pub type Result = std::result::Result>; pub fn fixture_path(name: &str) -> PathBuf { let dir = diff --git a/gitoxide-core/src/index/checkout.rs b/gitoxide-core/src/index/checkout.rs new file mode 100644 index 00000000000..3bbe1f269d9 --- /dev/null +++ b/gitoxide-core/src/index/checkout.rs @@ -0,0 +1,158 @@ +use crate::index; +use crate::index::{parse_file, Options}; +use anyhow::bail; +use git::{odb::FindExt, worktree::index::checkout, Progress}; +use git_repository as git; +use std::path::{Path, PathBuf}; +use std::sync::atomic::{AtomicBool, Ordering}; + +pub fn checkout_exclusive( + index_path: impl AsRef, + dest_directory: impl AsRef, + repo: Option, + mut err: impl std::io::Write, + mut progress: impl Progress, + should_interrupt: &AtomicBool, + index::checkout_exclusive::Options { + index: Options { object_hash, .. }, + empty_files, + keep_going, + thread_limit, + }: index::checkout_exclusive::Options, +) -> anyhow::Result<()> { + let repo = repo + .map(|dir| git_repository::discover(dir).map(|r| r.apply_environment())) + .transpose()?; + + let dest_directory = dest_directory.as_ref(); + if dest_directory.exists() { + bail!( + "Refusing to checkout index into existing directory '{}' - remove it and try again", + dest_directory.display() + ) + } + std::fs::create_dir_all(dest_directory)?; + + let mut index = parse_file(index_path, object_hash)?; + + let mut num_skipped = 0; + let maybe_symlink_mode = if !empty_files && repo.is_some() { + git::index::entry::Mode::DIR + } else { + git::index::entry::Mode::SYMLINK + }; + for entry in index.entries_mut().iter_mut().filter(|e| { + e.mode + .contains(maybe_symlink_mode | git::index::entry::Mode::DIR | git::index::entry::Mode::COMMIT) + }) { + entry.flags.insert(git::index::entry::Flags::SKIP_WORKTREE); + num_skipped += 1; + } + if num_skipped > 0 { + progress.info(format!("Skipping {} DIR/SYMLINK/COMMIT entries", num_skipped)); + } + + let opts = git::worktree::index::checkout::Options { + fs: git::worktree::fs::Capabilities::probe(dest_directory), + + destination_is_initially_empty: true, + overwrite_existing: false, + keep_going, + thread_limit, + ..Default::default() + }; + + let mut files = progress.add_child("checkout"); + let mut bytes = progress.add_child("writing"); + + let entries_for_checkout = index.entries().len() - num_skipped; + files.init(Some(entries_for_checkout), git::progress::count("files")); + bytes.init(None, git::progress::bytes()); + + let start = std::time::Instant::now(); + let no_repo = repo.is_none(); + let checkout::Outcome { + errors, + collisions, + files_updated, + bytes_written, + } = match repo { + Some(repo) => git::worktree::index::checkout( + &mut index, + dest_directory, + { + let objects = repo.objects.into_arc()?; + move |oid, buf| { + objects.find_blob(oid, buf).ok(); + if empty_files { + // We always want to query the ODB here… + objects.find_blob(oid, buf)?; + buf.clear(); + // …but write nothing + Ok(git::objs::BlobRef { data: buf }) + } else { + objects.find_blob(oid, buf) + } + } + }, + &mut files, + &mut bytes, + should_interrupt, + opts, + ), + None => git::worktree::index::checkout( + &mut index, + dest_directory, + |_, buf| { + buf.clear(); + Ok(git::objs::BlobRef { data: buf }) + }, + &mut files, + &mut bytes, + should_interrupt, + opts, + ), + }?; + + files.show_throughput(start); + bytes.show_throughput(start); + + progress.done(format!( + "Created {} {} files{} ({})", + files_updated, + no_repo.then(|| "empty").unwrap_or_default(), + should_interrupt + .load(Ordering::Relaxed) + .then(|| { + format!( + " of {}", + entries_for_checkout.saturating_sub(errors.len() + collisions.len()) + ) + }) + .unwrap_or_default(), + git_features::progress::bytes() + .unwrap() + .display(bytes_written as usize, None, None) + )); + + if !(collisions.is_empty() && errors.is_empty()) { + let mut messages = Vec::new(); + if !errors.is_empty() { + messages.push(format!("kept going through {} errors(s)", errors.len())); + for record in errors { + writeln!(err, "{}: {}", record.path, record.error).ok(); + } + } + if !collisions.is_empty() { + messages.push(format!("encountered {} collision(s)", collisions.len())); + for col in collisions { + writeln!(err, "{}: collision ({:?})", col.path, col.error_kind).ok(); + } + } + bail!( + "One or more errors occurred - checkout is incomplete: {}", + messages.join(", ") + ); + } + Ok(()) +} diff --git a/gitoxide-core/src/index/entries.rs b/gitoxide-core/src/index/entries.rs index 01e718824c5..b6133866103 100644 --- a/gitoxide-core/src/index/entries.rs +++ b/gitoxide-core/src/index/entries.rs @@ -1,4 +1,35 @@ +use crate::index::{parse_file, Options}; use git_repository as git; +use std::path::Path; + +pub fn entries( + index_path: impl AsRef, + mut out: impl std::io::Write, + Options { object_hash, format }: Options, +) -> anyhow::Result<()> { + use crate::OutputFormat::*; + let file = parse_file(index_path, object_hash)?; + + #[cfg(feature = "serde1")] + if let Json = format { + out.write_all(b"[\n")?; + } + + let mut entries = file.entries().iter().peekable(); + while let Some(entry) = entries.next() { + match format { + Human => to_human(&mut out, &file, entry)?, + #[cfg(feature = "serde1")] + Json => to_json(&mut out, &file, entry, entries.peek().is_none())?, + } + } + + #[cfg(feature = "serde1")] + if let Json = format { + out.write_all(b"]\n")?; + } + Ok(()) +} #[cfg(feature = "serde1")] pub(crate) fn to_json( diff --git a/gitoxide-core/src/index/mod.rs b/gitoxide-core/src/index/mod.rs index 7f1dc156cd6..3a2af2460e4 100644 --- a/gitoxide-core/src/index/mod.rs +++ b/gitoxide-core/src/index/mod.rs @@ -1,9 +1,6 @@ -use anyhow::bail; -use std::path::{Path, PathBuf}; +use std::path::Path; use git_repository as git; -use git_repository::worktree::index::checkout; -use git_repository::{odb::FindExt, Progress}; pub struct Options { pub object_hash: git::hash::Kind, @@ -11,9 +8,37 @@ pub struct Options { } mod entries; +pub use entries::entries; pub mod information; +fn parse_file(index_path: impl AsRef, object_hash: git::hash::Kind) -> anyhow::Result { + git::index::File::at( + index_path.as_ref(), + git::index::decode::Options { + object_hash, + ..Default::default() + }, + ) + .map_err(Into::into) +} + +pub mod checkout_exclusive { + pub struct Options { + pub index: super::Options, + /// If true, all files will be written with zero bytes despite having made an ODB lookup. + pub empty_files: bool, + pub keep_going: bool, + /// If set, don't use more than this amount of threads. + /// Otherwise, usually use as many threads as there are logical cores. + /// A value of 0 is interpreted as no-limit + pub thread_limit: Option, + } +} + +mod checkout; +pub use checkout::checkout_exclusive; + pub fn verify( index_path: impl AsRef, mut out: impl std::io::Write, @@ -61,178 +86,3 @@ pub fn information( } } } - -pub fn entries( - index_path: impl AsRef, - mut out: impl std::io::Write, - Options { object_hash, format }: Options, -) -> anyhow::Result<()> { - use crate::OutputFormat::*; - let file = parse_file(index_path, object_hash)?; - - #[cfg(feature = "serde1")] - if let Json = format { - out.write_all(b"[\n")?; - } - - let mut entries = file.entries().iter().peekable(); - while let Some(entry) = entries.next() { - match format { - Human => entries::to_human(&mut out, &file, entry)?, - #[cfg(feature = "serde1")] - Json => entries::to_json(&mut out, &file, entry, entries.peek().is_none())?, - } - } - - #[cfg(feature = "serde1")] - if let Json = format { - out.write_all(b"]\n")?; - } - Ok(()) -} - -fn parse_file(index_path: impl AsRef, object_hash: git::hash::Kind) -> anyhow::Result { - git::index::File::at( - index_path.as_ref(), - git::index::decode::Options { - object_hash, - ..Default::default() - }, - ) - .map_err(Into::into) -} - -pub mod checkout_exclusive { - pub struct Options { - pub index: super::Options, - /// If true, all files will be written with zero bytes despite having made an ODB lookup. - pub empty_files: bool, - pub keep_going: bool, - } -} - -pub fn checkout_exclusive( - index_path: impl AsRef, - dest_directory: impl AsRef, - repo: Option, - mut err: impl std::io::Write, - mut progress: impl Progress, - checkout_exclusive::Options { - index: Options { object_hash, .. }, - empty_files, - keep_going, - }: checkout_exclusive::Options, -) -> anyhow::Result<()> { - let repo = repo - .map(|dir| git_repository::discover(dir).map(|r| r.apply_environment())) - .transpose()?; - - let dest_directory = dest_directory.as_ref(); - if dest_directory.exists() { - bail!( - "Refusing to checkout index into existing directory '{}' - remove it and try again", - dest_directory.display() - ) - } - std::fs::create_dir_all(dest_directory)?; - - let mut index = parse_file(index_path, object_hash)?; - - let mut num_skipped = 0; - let maybe_symlink_mode = if !empty_files && repo.is_some() { - git::index::entry::Mode::DIR - } else { - git::index::entry::Mode::SYMLINK - }; - for entry in index.entries_mut().iter_mut().filter(|e| { - e.mode - .contains(maybe_symlink_mode | git::index::entry::Mode::DIR | git::index::entry::Mode::COMMIT) - }) { - entry.flags.insert(git::index::entry::Flags::SKIP_WORKTREE); - num_skipped += 1; - } - if num_skipped > 0 { - progress.info(format!("Skipping {} DIR/SYMLINK/COMMIT entries", num_skipped)); - } - - let opts = git::worktree::index::checkout::Options { - fs: git::worktree::fs::Capabilities::probe(dest_directory), - - // TODO: turn the two following flags into an enum - destination_is_initially_empty: true, - overwrite_existing: false, - keep_going, - ..Default::default() - }; - - let mut files = progress.add_child("checkout"); - let mut bytes = progress.add_child("writing"); - - let entries_for_checkout = index.entries().len() - num_skipped; - files.init(Some(entries_for_checkout), git::progress::count("files")); - bytes.init(None, git::progress::bytes()); - - let start = std::time::Instant::now(); - let checkout::Outcome { errors, collisions } = match &repo { - Some(repo) => git::worktree::index::checkout( - &mut index, - dest_directory, - |oid, buf| { - repo.objects.find_blob(oid, buf).ok(); - if empty_files { - // We always want to query the ODB here… - repo.objects.find_blob(oid, buf).ok(); - buf.clear(); - // …but write nothing - Some(git::objs::BlobRef { data: buf }) - } else { - repo.objects.find_blob(oid, buf).ok() - } - }, - &mut files, - &mut bytes, - opts, - ), - None => git::worktree::index::checkout( - &mut index, - dest_directory, - |_, buf| { - buf.clear(); - Some(git::objs::BlobRef { data: buf }) - }, - &mut files, - &mut bytes, - opts, - ), - }?; - - files.show_throughput(start); - bytes.show_throughput(start); - - progress.done(format!( - "Created {} {} files", - entries_for_checkout.saturating_sub(errors.len() + collisions.len()), - repo.is_none().then(|| "empty").unwrap_or_default() - )); - - if !(collisions.is_empty() && errors.is_empty()) { - let mut messages = Vec::new(); - if !errors.is_empty() { - messages.push(format!("kept going through {} errors(s)", errors.len())); - for record in errors { - writeln!(err, "{}: {}", record.path, record.error).ok(); - } - } - if !collisions.is_empty() { - messages.push(format!("encountered {} collision(s)", collisions.len())); - for col in collisions { - writeln!(err, "{}: collision ({:?})", col.path, col.error_kind).ok(); - } - } - bail!( - "One or more errors occurred - checkout is incomplete: {}", - messages.join(", ") - ); - } - Ok(()) -} diff --git a/src/plumbing/main.rs b/src/plumbing/main.rs index 1300a9b5575..11ef6ed4bf1 100644 --- a/src/plumbing/main.rs +++ b/src/plumbing/main.rs @@ -97,10 +97,12 @@ pub fn main() -> Result<()> { repository, err, progress, + &should_interrupt, core::index::checkout_exclusive::Options { index: core::index::Options { object_hash, format }, empty_files, keep_going, + thread_limit, }, ) }, diff --git a/tests/tools/src/lib.rs b/tests/tools/src/lib.rs index 30573cb241a..c3123ce2faa 100644 --- a/tests/tools/src/lib.rs +++ b/tests/tools/src/lib.rs @@ -12,6 +12,13 @@ pub use tempfile; static SCRIPT_IDENTITY: Lazy>> = Lazy::new(|| Mutex::new(BTreeMap::new())); +pub fn run_git(working_dir: &Path, args: &[&str]) -> std::io::Result { + std::process::Command::new("git") + .current_dir(working_dir) + .args(args) + .status() +} + pub fn hex_to_id(hex: &str) -> git_hash::ObjectId { git_hash::ObjectId::from_hex(hex.as_bytes()).expect("40 bytes hex") }