diff --git a/Cargo.lock b/Cargo.lock index 352882b0508..cf0bf0a4a31 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1613,10 +1613,12 @@ dependencies = [ "git-traverse 0.12.0", "git-url 0.3.5", "git-validate 0.5.3", + "git-worktree", "log", "signal-hook", "tempfile", "thiserror", + "unicode-normalization", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index c40facc7036..d253318fa51 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,7 +30,7 @@ default = ["max"] ## Makes the crate execute as fast as possible by supporting parallel computation of otherwise long-running functions ## as well as fast, hardware accelerated hashing, along with a faster zlib backend. ## If disabled, the binary will be visibly smaller. -fast = ["git-features/parallel", "git-features/fast-sha1", "git-features/zlib-ng-compat"] +fast = ["git-features/parallel", "git-features/fast-sha1", "git-features/zlib-ng-compat", "git-repository/max-performance"] ## Use `clap` 3.0 to build the prettiest, best documented and most user-friendly CLI at the expense of binary size. ## Provides a terminal user interface for detailed and exhaustive progress. diff --git a/crate-status.md b/crate-status.md index f18d48d6402..e9f6d71b751 100644 --- a/crate-status.md +++ b/crate-status.md @@ -323,6 +323,7 @@ See its [README.md](https://github.com/Byron/gitoxide/blob/main/git-lock/README. * [x] initialize * [ ] Proper configuration depending on platform (e.g. ignorecase, filemode, …) * [ ] All mutations are multi-process safe and this is tested and configurable (i.e. abort or wait if lock is encountered) +* support for unicode-precomposition of command-line arguments (needs explicit use in parent application) * **Easy** (_porcelain_) * **Id** * [x] short hashes with detection of ambiguity. diff --git a/etc/check-package-size.sh b/etc/check-package-size.sh index 0b9c48a5f94..1b9dff874ea 100755 --- a/etc/check-package-size.sh +++ b/etc/check-package-size.sh @@ -41,4 +41,4 @@ echo "in root: gitoxide CLI" (enter git-packetline && indent cargo diet -n --package-size-limit 35KB) (enter git-repository && indent cargo diet -n --package-size-limit 80KB) (enter git-transport && indent cargo diet -n --package-size-limit 50KB) -(enter gitoxide-core && indent cargo diet -n --package-size-limit 50KB) +(enter gitoxide-core && indent cargo diet -n --package-size-limit 60KB) diff --git a/git-index/src/access.rs b/git-index/src/access.rs index 123bdd32e34..92ae7309b20 100644 --- a/git-index/src/access.rs +++ b/git-index/src/access.rs @@ -10,6 +10,9 @@ impl State { pub fn entries(&self) -> &[Entry] { &self.entries } + pub fn entries_mut(&mut self) -> &mut [Entry] { + &mut self.entries + } pub fn entries_mut_with_paths(&mut self) -> impl Iterator { let paths = &self.path_backing; self.entries.iter_mut().map(move |e| { diff --git a/git-ref/tests/packed/find.rs b/git-ref/tests/packed/find.rs index ea965ebd782..fc8f5f16045 100644 --- a/git-ref/tests/packed/find.rs +++ b/git-ref/tests/packed/find.rs @@ -175,11 +175,7 @@ fn find_speed() -> crate::Result { let packed = store.open_packed_buffer()?.expect("packed-refs present"); let start = std::time::Instant::now(); let mut num_refs = 0; - #[cfg(windows)] - let count = 500; - #[cfg(not(windows))] - let count = 10_000; - for r in packed.iter()?.take(count) { + for r in packed.iter()?.take(10_000) { num_refs += 1; let r = r?; assert_eq!( diff --git a/git-repository/Cargo.toml b/git-repository/Cargo.toml index c94d13e9eb9..c0c9c878572 100644 --- a/git-repository/Cargo.toml +++ b/git-repository/Cargo.toml @@ -49,7 +49,7 @@ max-performance = ["git-features/parallel", "git-features/zlib-ng-compat", "git- local-time-support = ["git-actor/local-time-support"] ## Re-export stability tier 2 crates for convenience and make `Repository` struct fields with types from these crates publicly accessible. ## Doing so is less stable than the stability tier 1 that `git-repository` is a member of. -unstable = ["git-index"] +unstable = ["git-index", "git-worktree"] ## Print debugging information about usage of object database caches, useful for tuning cache sizes. cache-efficiency-debug = ["git-features/cache-efficiency-debug"] @@ -77,6 +77,7 @@ git-features = { version = "^0.19.1", path = "../git-features", features = ["pro # unstable only git-index = { version ="^0.1.0", path = "../git-index", optional = true } +git-worktree = { version ="^0.0.0", path = "../git-worktree", optional = true } signal-hook = { version = "0.3.9", default-features = false } thiserror = "1.0.26" @@ -86,6 +87,9 @@ log = "0.4.14" document-features = { version = "0.2.0", optional = true } +[target.'cfg(target_vendor = "apple")'.dependencies] +unicode-normalization = { version = "0.1.19", default-features = false } + [dev-dependencies] git-testtools = { path = "../tests/tools" } anyhow = "1" diff --git a/git-repository/src/lib.rs b/git-repository/src/lib.rs index 64bc082b3b2..5fc2ef6fb51 100644 --- a/git-repository/src/lib.rs +++ b/git-repository/src/lib.rs @@ -88,6 +88,7 @@ //! * [`actor`] //! * [`bstr`][bstr] //! * [`index`] +//! * [`worktree`] //! * [`objs`] //! * [`odb`] //! * [`pack`][odb::pack] @@ -144,6 +145,8 @@ pub use git_url as url; #[doc(inline)] #[cfg(all(feature = "unstable", feature = "git-url"))] pub use git_url::Url; +#[cfg(all(feature = "unstable", feature = "git-worktree"))] +pub use git_worktree as worktree; pub use hash::{oid, ObjectId}; pub mod interrupt; @@ -286,3 +289,26 @@ pub mod discover { } } } + +/// +pub mod env { + use std::ffi::OsString; + + /// Equivalent to `std::env::args_os()`, but with precomposed unicode on MacOS and other apple platforms. + #[cfg(not(target_vendor = "apple"))] + pub fn args_os() -> impl Iterator { + std::env::args_os() + } + + /// Equivalent to `std::env::args_os()`, but with precomposed unicode on MacOS and other apple platforms. + /// + /// Note that this ignores `core.precomposeUnicode` as git-config isn't available yet. It's default enabled in modern git though. + #[cfg(target_vendor = "apple")] + pub fn args_os() -> impl Iterator { + use unicode_normalization::UnicodeNormalization; + std::env::args_os().map(|arg| match arg.to_str() { + Some(arg) => arg.nfc().collect::().into(), + None => arg, + }) + } +} diff --git a/git-worktree/src/fs.rs b/git-worktree/src/fs.rs index 7c71001160b..e301fb386df 100644 --- a/git-worktree/src/fs.rs +++ b/git-worktree/src/fs.rs @@ -90,14 +90,14 @@ impl Capabilities { .write(true) .open(&src_path)?; let link_path = root.join("__file_link"); - if symlink::symlink_file(&src_path, &link_path).is_err() { + if crate::os::create_symlink(&src_path, &link_path).is_err() { std::fs::remove_file(&src_path)?; return Ok(false); } let res = std::fs::symlink_metadata(&link_path).map(|m| m.is_symlink()); let cleanup = std::fs::remove_file(&src_path); - symlink::remove_symlink_file(&link_path) + crate::os::remove_symlink(&link_path) .or_else(|_| std::fs::remove_file(&link_path)) .and(cleanup)?; res diff --git a/git-worktree/src/index.rs b/git-worktree/src/index.rs deleted file mode 100644 index d8afb59df46..00000000000 --- a/git-worktree/src/index.rs +++ /dev/null @@ -1,245 +0,0 @@ -use git_hash::oid; - -use crate::{index, index::checkout::Collision}; - -pub mod checkout { - use bstr::BString; - use quick_error::quick_error; - - #[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] - pub struct Collision { - /// the path that collided with something already present on disk. - pub path: BString, - /// The io error we encountered when checking out `path`. - pub error_kind: std::io::ErrorKind, - } - - pub struct Outcome { - pub collisions: Vec, - } - - #[derive(Clone, Copy)] - pub struct Options { - /// capabilities of the file system - pub fs: crate::fs::Capabilities, - /// If true, we assume no file to exist in the target directory, and want exclusive access to it. - /// This should be enabled when cloning to avoid checks for freshness of files. This also enables - /// detection of collisions based on whether or not exclusive file creation succeeds or fails. - pub destination_is_initially_empty: bool, - /// If true, default false, try to checkout as much as possible and don't abort on first error which isn't - /// due to a conflict. - /// The operation will never fail, but count the encountered errors instead along with their paths. - pub keep_going: bool, - /// If true, a files creation time is taken into consideration when checking if a file changed. - /// Can be set to false in case other tools alter the creation time in ways that interfere with our operation. - /// - /// Default true. - pub trust_ctime: bool, - /// If true, all stat fields will be used when checking for up-to-date'ness of the entry. Otherwise - /// nano-second parts of mtime and ctime,uid, gid, inode and device number won't be used, leaving only - /// the whole-second part of ctime and mtime and the file size to be checked. - /// - /// Default true. - pub check_stat: bool, - } - - impl Default for Options { - fn default() -> Self { - Options { - fs: Default::default(), - destination_is_initially_empty: false, - keep_going: false, - trust_ctime: true, - check_stat: true, - } - } - } - - quick_error! { - #[derive(Debug)] - pub enum Error { - IllformedUtf8{ path: BString } { - display("Could not convert path to UTF8: {}", path) - } - Time(err: std::time::SystemTimeError) { - from() - source(err) - display("The clock was off when reading file related metadata after updating a file on disk") - } - Io(err: std::io::Error) { - from() - source(err) - display("IO error while writing blob or reading file metadata or changing filetype") - } - ObjectNotFound{ oid: git_hash::ObjectId, path: std::path::PathBuf } { - display("object {} for checkout at {} not found in object database", oid.to_hex(), path.display()) - } - } - } -} - -pub fn checkout( - index: &mut git_index::State, - path: impl AsRef, - mut find: Find, - options: checkout::Options, -) -> Result -where - Find: for<'a> FnMut(&oid, &'a mut Vec) -> Option>, -{ - use std::io::ErrorKind::AlreadyExists; - let root = path.as_ref(); - let mut buf = Vec::new(); - let mut collisions = Vec::new(); - for (entry, entry_path) in index.entries_mut_with_paths() { - // TODO: write test for that - if entry.flags.contains(git_index::entry::Flags::SKIP_WORKTREE) { - continue; - } - - let res = entry::checkout(entry, entry_path, &mut find, root, options, &mut buf); - match res { - Ok(()) => {} - // TODO: use ::IsDirectory as well when stabilized instead of raw_os_error() - #[cfg(windows)] - Err(index::checkout::Error::Io(err)) - if err.kind() == AlreadyExists || err.kind() == std::io::ErrorKind::PermissionDenied => - { - collisions.push(Collision { - path: entry_path.into(), - error_kind: err.kind(), - }); - } - #[cfg(not(windows))] - Err(index::checkout::Error::Io(err)) if err.kind() == AlreadyExists || err.raw_os_error() == Some(21) => { - // We are here because a file existed or was blocked by a directory which shouldn't be possible unless - // we are on a file insensitive file system. - collisions.push(Collision { - path: entry_path.into(), - error_kind: err.kind(), - }); - } - Err(err) => { - if options.keep_going { - todo!("keep going") - } else { - return Err(err); - } - } - } - } - Ok(checkout::Outcome { collisions }) -} - -pub(crate) mod entry { - use std::{ - convert::TryInto, - fs::{create_dir_all, OpenOptions}, - io::Write, - time::Duration, - }; - - use bstr::BStr; - use git_hash::oid; - use git_index::Entry; - - use crate::index; - - #[cfg_attr(not(unix), allow(unused_variables))] - pub fn checkout( - entry: &mut Entry, - entry_path: &BStr, - find: &mut Find, - root: &std::path::Path, - index::checkout::Options { - fs: - crate::fs::Capabilities { - symlink, - executable_bit, - .. - }, - destination_is_initially_empty, - .. - }: index::checkout::Options, - buf: &mut Vec, - ) -> Result<(), index::checkout::Error> - where - Find: for<'a> FnMut(&oid, &'a mut Vec) -> Option>, - { - let dest = root.join(git_features::path::from_byte_slice(entry_path).map_err(|_| { - index::checkout::Error::IllformedUtf8 { - path: entry_path.to_owned(), - } - })?); - create_dir_all(dest.parent().expect("entry paths are never empty"))?; // TODO: can this be avoided to create dirs when needed only? - - match entry.mode { - git_index::entry::Mode::FILE | git_index::entry::Mode::FILE_EXECUTABLE => { - let obj = find(&entry.id, buf).ok_or_else(|| index::checkout::Error::ObjectNotFound { - oid: entry.id, - path: root.to_path_buf(), - })?; - let mut options = OpenOptions::new(); - options - .create_new(destination_is_initially_empty) - .create(!destination_is_initially_empty) - .write(true); - #[cfg(unix)] - if executable_bit && entry.mode == git_index::entry::Mode::FILE_EXECUTABLE { - use std::os::unix::fs::OpenOptionsExt; - options.mode(0o777); - } - - let mut file = options.open(&dest)?; - file.write_all(obj.data)?; - // NOTE: we don't call `file.sync_all()` here knowing that some filesystems don't handle this well. - // revisit this once there is a bug to fix. - update_fstat(entry, file.metadata()?)?; - } - git_index::entry::Mode::SYMLINK => { - let obj = find(&entry.id, buf).ok_or_else(|| index::checkout::Error::ObjectNotFound { - oid: entry.id, - path: root.to_path_buf(), - })?; - let symlink_destination = git_features::path::from_byte_slice(obj.data) - .map_err(|_| index::checkout::Error::IllformedUtf8 { path: obj.data.into() })?; - - // TODO: how to deal with mode changes? Maybe this info can be passed once we check for whether - // a checkout is needed at all. - if symlink { - symlink::symlink_auto(symlink_destination, &dest)?; - } else { - std::fs::write(&dest, obj.data)?; - } - - update_fstat(entry, std::fs::symlink_metadata(&dest)?)?; - } - git_index::entry::Mode::DIR => todo!(), - git_index::entry::Mode::COMMIT => todo!(), - _ => unreachable!(), - } - Ok(()) - } - - fn update_fstat(entry: &mut Entry, meta: std::fs::Metadata) -> Result<(), index::checkout::Error> { - let ctime = meta - .created() - .map_or(Ok(Duration::default()), |x| x.duration_since(std::time::UNIX_EPOCH))?; - let mtime = meta - .modified() - .map_or(Ok(Duration::default()), |x| x.duration_since(std::time::UNIX_EPOCH))?; - - let stat = &mut entry.stat; - stat.mtime.secs = mtime - .as_secs() - .try_into() - .expect("by 2038 we found a solution for this"); - stat.mtime.nsecs = mtime.subsec_nanos(); - stat.ctime.secs = ctime - .as_secs() - .try_into() - .expect("by 2038 we found a solution for this"); - stat.ctime.nsecs = ctime.subsec_nanos(); - Ok(()) - } -} diff --git a/git-worktree/src/index/checkout.rs b/git-worktree/src/index/checkout.rs new file mode 100644 index 00000000000..cc7da1001fd --- /dev/null +++ b/git-worktree/src/index/checkout.rs @@ -0,0 +1,208 @@ +use bstr::BString; +use quick_error::quick_error; +use std::path::PathBuf; + +/// A cache for directory creation to reduce the amount of stat calls when creating +/// directories safely, that is without following symlinks that might be on the way. +/// +/// As a special case, it offers a 'prefix' which (by itself) is assumed to exist and may contain symlinks. +/// Everything past that prefix boundary must not contain a symlink. We do this by allowing any input path. +/// +/// Another added benefit is its ability to store the path of full path of the entry to which leading directories +/// are to be created to avoid allocating memory. +/// +/// For this to work, it remembers the last 'good' path to a directory and assumes that all components of it +/// are still valid, too. +/// As directories are created, the cache will be adjusted to reflect the latest seen directory. +/// +/// The caching is only useful if consecutive calls to create a directory are using a sorted list of entries. +#[allow(unused)] +pub struct PathCache { + /// The prefix/root for all paths we handle. + root: PathBuf, + /// the most recent known cached that we know is valid. + valid: PathBuf, + /// The relative portion of `valid` that was added previously. + valid_relative: PathBuf, + /// The amount of path components of 'valid' beyond the roots components. If `root` has 2, and this is 2, `valid` has 4 components. + valid_components: usize, + + /// If there is a symlink or a file in our path, try to unlink it before creating the directory. + pub unlink_on_collision: bool, + + /// just for testing + #[cfg(debug_assertions)] + pub test_mkdir_calls: usize, +} + +mod cache { + use super::PathCache; + use std::path::{Path, PathBuf}; + + impl PathCache { + /// Create a new instance with `root` being the base for all future paths we handle, assuming it to be valid which includes + /// symbolic links to be included in it as well. + pub fn new(root: impl Into) -> Self { + let root = root.into(); + PathCache { + valid: root.clone(), + valid_relative: PathBuf::with_capacity(128), + valid_components: 0, + root, + #[cfg(debug_assertions)] + test_mkdir_calls: 0, + unlink_on_collision: false, + } + } + + /// Append the `relative` path to the root directory the cache contains and efficiently create leading directories + /// unless `mode` indicates `relative` points to a directory itself in which case the entire resulting path is created as directory. + /// + /// The full path to `relative` will be returned for use on the file system. + pub fn append_relative_path_assure_leading_dir( + &mut self, + relative: impl AsRef, + mode: git_index::entry::Mode, + ) -> std::io::Result<&Path> { + let relative = relative.as_ref(); + debug_assert!( + relative.is_relative(), + "only index paths are handled correctly here, must be relative" + ); + + let mut components = relative.components().peekable(); + let mut existing_components = self.valid_relative.components(); + let mut matching_components = 0; + while let (Some(existing_comp), Some(new_comp)) = (existing_components.next(), components.peek()) { + if existing_comp == *new_comp { + components.next(); + matching_components += 1; + } else { + break; + } + } + + // TODO: handle valid state properly, handle _mode. + for _ in 0..self.valid_components - matching_components { + self.valid.pop(); + } + + self.valid_components = matching_components; + + let target_is_dir = mode == git_index::entry::Mode::COMMIT || mode == git_index::entry::Mode::DIR; + while let Some(comp) = components.next() { + self.valid.push(comp); + self.valid_relative.push(comp); + self.valid_components += 1; + if components.peek().is_some() || target_is_dir { + #[cfg(debug_assertions)] + { + self.test_mkdir_calls += 1; + } + match std::fs::create_dir(&self.valid) { + Ok(()) => {} + Err(err) if err.kind() == std::io::ErrorKind::AlreadyExists => { + let meta = self.valid.symlink_metadata()?; + if !meta.is_dir() { + if self.unlink_on_collision { + if meta.is_symlink() { + symlink::remove_symlink_auto(&self.valid)?; + } else { + std::fs::remove_file(&self.valid)?; + } + #[cfg(debug_assertions)] + { + self.test_mkdir_calls += 1; + } + std::fs::create_dir(&self.valid)?; + continue; + } + return Err(err); + } + } + Err(err) => return Err(err), + } + } + } + + Ok(&self.valid) + } + } +} + +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct Collision { + /// the path that collided with something already present on disk. + pub path: BString, + /// The io error we encountered when checking out `path`. + pub error_kind: std::io::ErrorKind, +} + +pub struct Outcome { + pub collisions: Vec, +} + +#[derive(Clone, Copy)] +pub struct Options { + /// capabilities of the file system + pub fs: crate::fs::Capabilities, + /// If true, we assume no file to exist in the target directory, and want exclusive access to it. + /// This should be enabled when cloning to avoid checks for freshness of files. This also enables + /// detection of collisions based on whether or not exclusive file creation succeeds or fails. + pub destination_is_initially_empty: bool, + /// If true, default false, worktree entries on disk will be overwritten with content from the index + /// even if they appear to be changed. When creating directories that clash with existing worktree entries, + /// these will try to delete the existing entry. + /// This is similar in behaviour as `git checkout --force`. + pub overwrite_existing: bool, + /// If true, default false, try to checkout as much as possible and don't abort on first error which isn't + /// due to a conflict. + /// The operation will never fail, but count the encountered errors instead along with their paths. + pub keep_going: bool, + /// If true, a files creation time is taken into consideration when checking if a file changed. + /// Can be set to false in case other tools alter the creation time in ways that interfere with our operation. + /// + /// Default true. + pub trust_ctime: bool, + /// If true, all stat fields will be used when checking for up-to-date'ness of the entry. Otherwise + /// nano-second parts of mtime and ctime,uid, gid, inode and device number won't be used, leaving only + /// the whole-second part of ctime and mtime and the file size to be checked. + /// + /// Default true. + pub check_stat: bool, +} + +impl Default for Options { + fn default() -> Self { + Options { + fs: Default::default(), + destination_is_initially_empty: false, + keep_going: false, + trust_ctime: true, + check_stat: true, + overwrite_existing: false, + } + } +} + +quick_error! { + #[derive(Debug)] + pub enum Error { + IllformedUtf8{ path: BString } { + display("Could not convert path to UTF8: {}", path) + } + Time(err: std::time::SystemTimeError) { + from() + source(err) + display("The clock was off when reading file related metadata after updating a file on disk") + } + Io(err: std::io::Error) { + from() + source(err) + display("IO error while writing blob or reading file metadata or changing filetype") + } + ObjectNotFound{ oid: git_hash::ObjectId, path: std::path::PathBuf } { + display("object {} for checkout at {} not found in object database", oid.to_hex(), path.display()) + } + } +} diff --git a/git-worktree/src/index/entry.rs b/git-worktree/src/index/entry.rs new file mode 100644 index 00000000000..1aad218b075 --- /dev/null +++ b/git-worktree/src/index/entry.rs @@ -0,0 +1,107 @@ +use std::{convert::TryInto, fs::OpenOptions, io::Write, time::Duration}; + +use bstr::BStr; +use git_hash::oid; +use git_index::Entry; + +use crate::index; +use crate::index::checkout::PathCache; + +#[cfg_attr(not(unix), allow(unused_variables))] +pub fn checkout( + entry: &mut Entry, + entry_path: &BStr, + find: &mut Find, + cache: &mut PathCache, + index::checkout::Options { + fs: crate::fs::Capabilities { + symlink, + executable_bit, + .. + }, + destination_is_initially_empty, + .. + }: index::checkout::Options, + buf: &mut Vec, +) -> Result +where + Find: for<'a> FnMut(&oid, &'a mut Vec) -> Option>, +{ + let dest = cache.append_relative_path_assure_leading_dir( + git_features::path::from_byte_slice(entry_path).map_err(|_| index::checkout::Error::IllformedUtf8 { + path: entry_path.to_owned(), + })?, + entry.mode, + )?; + + let object_size = match entry.mode { + git_index::entry::Mode::FILE | git_index::entry::Mode::FILE_EXECUTABLE => { + let obj = find(&entry.id, buf).ok_or_else(|| index::checkout::Error::ObjectNotFound { + oid: entry.id, + path: dest.to_path_buf(), + })?; + let mut options = OpenOptions::new(); + options + .create_new(destination_is_initially_empty) + .create(!destination_is_initially_empty) + .write(true); + #[cfg(unix)] + if executable_bit && entry.mode == git_index::entry::Mode::FILE_EXECUTABLE { + use std::os::unix::fs::OpenOptionsExt; + options.mode(0o777); + } + + let mut file = options.open(&dest)?; + file.write_all(obj.data)?; + // NOTE: we don't call `file.sync_all()` here knowing that some filesystems don't handle this well. + // revisit this once there is a bug to fix. + update_fstat(entry, file.metadata()?)?; + obj.data.len() + } + git_index::entry::Mode::SYMLINK => { + let obj = find(&entry.id, buf).ok_or_else(|| index::checkout::Error::ObjectNotFound { + oid: entry.id, + path: dest.to_path_buf(), + })?; + let symlink_destination = git_features::path::from_byte_slice(obj.data) + .map_err(|_| index::checkout::Error::IllformedUtf8 { path: obj.data.into() })?; + + // TODO: how to deal with mode changes? Maybe this info can be passed once we check for whether + // a checkout is needed at all. + if symlink { + crate::os::create_symlink(symlink_destination, dest)?; + } else { + std::fs::write(&dest, obj.data)?; + } + + update_fstat(entry, std::fs::symlink_metadata(&dest)?)?; + obj.data.len() + } + git_index::entry::Mode::DIR => todo!(), + git_index::entry::Mode::COMMIT => todo!(), + _ => unreachable!(), + }; + Ok(object_size) +} + +fn update_fstat(entry: &mut Entry, meta: std::fs::Metadata) -> Result<(), index::checkout::Error> { + let ctime = meta + .created() + .map_or(Ok(Duration::default()), |x| x.duration_since(std::time::UNIX_EPOCH))?; + let mtime = meta + .modified() + .map_or(Ok(Duration::default()), |x| x.duration_since(std::time::UNIX_EPOCH))?; + + let stat = &mut entry.stat; + stat.mtime.secs = mtime + .as_secs() + .try_into() + .expect("by 2038 we found a solution for this"); + stat.mtime.nsecs = mtime.subsec_nanos(); + stat.ctime.secs = ctime + .as_secs() + .try_into() + .expect("by 2038 we found a solution for this"); + stat.ctime.nsecs = ctime.subsec_nanos(); + Ok(()) +} diff --git a/git-worktree/src/index/mod.rs b/git-worktree/src/index/mod.rs new file mode 100644 index 00000000000..6ffba9960e1 --- /dev/null +++ b/git-worktree/src/index/mod.rs @@ -0,0 +1,72 @@ +use git_features::progress::Progress; +use git_hash::oid; + +use crate::index::checkout::PathCache; +use crate::{index, index::checkout::Collision}; + +pub mod checkout; +pub(crate) mod entry; + +pub fn checkout( + index: &mut git_index::State, + dir: impl Into, + mut find: Find, + files: &mut impl Progress, + bytes: &mut impl Progress, + options: checkout::Options, +) -> Result +where + Find: for<'a> FnMut(&oid, &'a mut Vec) -> Option>, +{ + if !options.destination_is_initially_empty { + todo!("deal with non-clone checkouts") + } + + use std::io::ErrorKind::AlreadyExists; + let mut path_cache = PathCache::new(dir.into()); + path_cache.unlink_on_collision = options.overwrite_existing; + + let mut buf = Vec::new(); + let mut collisions = Vec::new(); + + for (entry, entry_path) in index.entries_mut_with_paths() { + // TODO: write test for that + if entry.flags.contains(git_index::entry::Flags::SKIP_WORKTREE) { + files.inc(); + continue; + } + + let res = entry::checkout(entry, entry_path, &mut find, &mut path_cache, options, &mut buf); + files.inc(); + match res { + Ok(object_size) => bytes.inc_by(object_size), + #[cfg(windows)] + Err(index::checkout::Error::Io(err)) + if err.kind() == AlreadyExists || err.kind() == std::io::ErrorKind::PermissionDenied => + { + collisions.push(Collision { + path: entry_path.into(), + error_kind: err.kind(), + }); + } + // TODO: use ::IsDirectory as well when stabilized instead of raw_os_error() + #[cfg(not(windows))] + Err(index::checkout::Error::Io(err)) if err.kind() == AlreadyExists || err.raw_os_error() == Some(21) => { + // We are here because a file existed or was blocked by a directory which shouldn't be possible unless + // we are on a file insensitive file system. + collisions.push(Collision { + path: entry_path.into(), + error_kind: err.kind(), + }); + } + Err(err) => { + if options.keep_going { + todo!("keep going") + } else { + return Err(err); + } + } + } + } + Ok(checkout::Outcome { collisions }) +} diff --git a/git-worktree/src/lib.rs b/git-worktree/src/lib.rs index 587771d9983..0bd3954b85d 100644 --- a/git-worktree/src/lib.rs +++ b/git-worktree/src/lib.rs @@ -10,3 +10,5 @@ pub mod fs; pub mod index; + +pub(crate) mod os; diff --git a/git-worktree/src/os.rs b/git-worktree/src/os.rs new file mode 100644 index 00000000000..ba66fbd04c9 --- /dev/null +++ b/git-worktree/src/os.rs @@ -0,0 +1,28 @@ +use std::io; +use std::path::Path; + +#[cfg(not(windows))] +pub fn create_symlink(original: &Path, link: &Path) -> io::Result<()> { + std::os::unix::fs::symlink(original, link) +} + +#[cfg(not(windows))] +pub fn remove_symlink(path: &Path) -> io::Result<()> { + std::fs::remove_file(path) +} + +#[cfg(windows)] +pub fn remove_symlink(path: &Path) -> io::Result<()> { + symlink::remove_symlink_auto(path) +} + +#[cfg(windows)] +pub fn create_symlink(original: &Path, link: &Path) -> io::Result<()> { + use std::os::windows::fs::{symlink_dir, symlink_file}; + // TODO: figure out if links to links count as files or whatever they point at + if std::fs::metadata(link.parent().expect("dir for link").join(original))?.is_dir() { + symlink_dir(original, link) + } else { + symlink_file(original, link) + } +} diff --git a/git-worktree/tests/fixtures/make_ignorecase_collisions.sh b/git-worktree/tests/fixtures/make_ignorecase_collisions.sh index 9fb772681e0..f93284fbdeb 100644 --- a/git-worktree/tests/fixtures/make_ignorecase_collisions.sh +++ b/git-worktree/tests/fixtures/make_ignorecase_collisions.sh @@ -5,6 +5,7 @@ git init -q git config commit.gpgsign false empty_oid=$(git hash-object -w --stdin (PathCache, TempDir) { + let dir = tempdir().unwrap(); + let cache = PathCache::new(dir.path()); + (cache, dir) + } + } + #[cfg(unix)] use std::os::unix::prelude::MetadataExt; use std::{ @@ -7,6 +95,7 @@ mod checkout { path::{Path, PathBuf}, }; + use git_features::progress; use git_object::bstr::ByteSlice; use git_odb::FindExt; use git_worktree::{fs::Capabilities, index, index::checkout::Collision}; @@ -14,23 +103,6 @@ mod checkout { use crate::fixture_path; - fn probe_gitoxide_dir() -> crate::Result { - Ok(git_worktree::fs::Capabilities::probe( - std::env::current_dir()?.join("..").join(".git"), - )) - } - - fn opts_with_symlink(symlink: bool) -> index::checkout::Options { - index::checkout::Options { - fs: git_worktree::fs::Capabilities { - symlink, - ..Default::default() - }, - destination_is_initially_empty: true, - ..Default::default() - } - } - #[test] fn symlinks_become_files_if_disabled() -> crate::Result { let opts = opts_with_symlink(false); @@ -39,7 +111,6 @@ mod checkout { assert_equality(&source_tree, &destination, opts.fs.symlink)?; assert!(outcome.collisions.is_empty()); - Ok(()) } @@ -76,14 +147,29 @@ mod checkout { #[test] fn collisions_are_detected_on_a_case_sensitive_filesystem() { - if !probe_gitoxide_dir().unwrap().ignore_case { + let fs_caps = probe_gitoxide_dir().unwrap(); + if !fs_caps.ignore_case { eprintln!("Skipping case-insensitive testing on what would be a case-senstive file system"); return; } - let opts = opts_with_symlink(true); + let opts = opts_with_symlink(fs_caps.symlink); let (source_tree, destination, _index, outcome) = checkout_index_in_tmp_dir(opts, "make_ignorecase_collisions").unwrap(); + let source_files = dir_structure(&source_tree); + assert_eq!( + stripped_prefix(&source_tree, &source_files), + paths(["d", "file_x", "link-to-X", "x"]), + "plenty of collisions prevent a checkout" + ); + + let dest_files = dir_structure(&destination); + assert_eq!( + stripped_prefix(&destination, &dest_files), + paths(["D/B", "D/C", "FILE_X", "X", "link-to-X"]), + "we checkout files in order and generally handle collision detection differently, hence the difference" + ); + let error_kind = ErrorKind::AlreadyExists; #[cfg(windows)] let error_kind_dir = ErrorKind::PermissionDenied; @@ -109,23 +195,13 @@ mod checkout { path: "file_x".into(), error_kind, }, + Collision { + path: "x".into(), + error_kind, + }, ], "these files couldn't be checked out" ); - - let source_files = dir_structure(&source_tree); - assert_eq!( - stripped_prefix(&source_tree, &source_files), - vec![PathBuf::from("d"), PathBuf::from("file_x")], - "plenty of collisions prevent a checkout" - ); - - let dest_files = dir_structure(&destination); - assert_eq!( - stripped_prefix(&destination, &dest_files), - vec![PathBuf::from("D/B"), PathBuf::from("D/C"), PathBuf::from("FILE_X")], - "we checkout files in order and generally handle collision detection differently, hence the difference" - ); } fn assert_equality(source_tree: &Path, destination: &TempDir, allow_symlinks: bool) -> crate::Result { @@ -186,8 +262,10 @@ mod checkout { let outcome = index::checkout( &mut index, - &destination, + destination.path(), move |oid, buf| odb.find_blob(oid, buf).ok(), + &mut progress::Discard, + &mut progress::Discard, opts, )?; Ok((source_tree, destination, index, outcome)) @@ -196,4 +274,25 @@ mod checkout { fn stripped_prefix(prefix: impl AsRef, source_files: &[PathBuf]) -> Vec<&Path> { source_files.iter().flat_map(|p| p.strip_prefix(&prefix)).collect() } + + fn probe_gitoxide_dir() -> crate::Result { + Ok(git_worktree::fs::Capabilities::probe( + std::env::current_dir()?.join("..").join(".git"), + )) + } + + fn opts_with_symlink(symlink: bool) -> index::checkout::Options { + index::checkout::Options { + fs: git_worktree::fs::Capabilities { + symlink, + ..Default::default() + }, + destination_is_initially_empty: true, + ..Default::default() + } + } + + fn paths<'a>(p: impl IntoIterator) -> Vec { + p.into_iter().map(PathBuf::from).collect() + } } diff --git a/gitoxide-core/src/index/mod.rs b/gitoxide-core/src/index/mod.rs index 42ea565aa69..6d12fa9997f 100644 --- a/gitoxide-core/src/index/mod.rs +++ b/gitoxide-core/src/index/mod.rs @@ -1,6 +1,8 @@ -use std::path::Path; +use anyhow::bail; +use std::path::{Path, PathBuf}; use git_repository as git; +use git_repository::{odb::FindExt, Progress}; pub struct Options { pub object_hash: git::hash::Kind, @@ -98,3 +100,114 @@ fn parse_file(index_path: impl AsRef, object_hash: git::hash::Kind) -> any ) .map_err(Into::into) } + +pub mod checkout_exclusive { + pub struct Options { + pub index: super::Options, + /// If true, all files will be written with zero bytes despite having made an ODB lookup. + pub empty_files: bool, + } +} + +pub fn checkout_exclusive( + index_path: impl AsRef, + dest_directory: impl AsRef, + repo: Option, + mut progress: impl Progress, + checkout_exclusive::Options { + index: Options { object_hash, .. }, + empty_files, + }: checkout_exclusive::Options, +) -> anyhow::Result<()> { + let repo = repo + .map(|dir| git_repository::discover(dir).map(|r| r.apply_environment())) + .transpose()?; + + let dest_directory = dest_directory.as_ref(); + if dest_directory.exists() { + bail!( + "Refusing to checkout index into existing directory '{}' - remove it and try again", + dest_directory.display() + ) + } + std::fs::create_dir_all(dest_directory)?; + + let mut index = parse_file(index_path, object_hash)?; + + let mut num_skipped = 0; + let maybe_symlink_mode = if !empty_files && repo.is_some() { + git::index::entry::Mode::DIR + } else { + git::index::entry::Mode::SYMLINK + }; + for entry in index.entries_mut().iter_mut().filter(|e| { + e.mode + .contains(maybe_symlink_mode | git::index::entry::Mode::DIR | git::index::entry::Mode::COMMIT) + }) { + entry.flags.insert(git::index::entry::Flags::SKIP_WORKTREE); + num_skipped += 1; + } + if num_skipped > 0 { + progress.info(format!("Skipping {} DIR/SYMLINK/COMMIT entries", num_skipped)); + } + + let opts = git::worktree::index::checkout::Options { + fs: git::worktree::fs::Capabilities::probe(dest_directory), + + // TODO: turn the two following flags into an enum + destination_is_initially_empty: true, + overwrite_existing: false, + ..Default::default() + }; + + let mut files = progress.add_child("checkout"); + let mut bytes = progress.add_child("writing"); + + let entries_for_checkout = index.entries().len() - num_skipped; + files.init(Some(entries_for_checkout), git::progress::count("files")); + bytes.init(None, git::progress::bytes()); + + let start = std::time::Instant::now(); + match &repo { + Some(repo) => git::worktree::index::checkout( + &mut index, + dest_directory, + |oid, buf| { + repo.objects.find_blob(oid, buf).ok(); + if empty_files { + // We always want to query the ODB here… + repo.objects.find_blob(oid, buf).ok(); + buf.clear(); + // …but write nothing + Some(git::objs::BlobRef { data: buf }) + } else { + repo.objects.find_blob(oid, buf).ok() + } + }, + &mut files, + &mut bytes, + opts, + ), + None => git::worktree::index::checkout( + &mut index, + dest_directory, + |_, buf| { + buf.clear(); + Some(git::objs::BlobRef { data: buf }) + }, + &mut files, + &mut bytes, + opts, + ), + }?; + + files.show_throughput(start); + bytes.show_throughput(start); + + progress.done(format!( + "Created {} {} files", + entries_for_checkout, + repo.is_none().then(|| "empty").unwrap_or_default() + )); + Ok(()) +} diff --git a/src/plumbing/main.rs b/src/plumbing/main.rs index a38102c08eb..22346e55660 100644 --- a/src/plumbing/main.rs +++ b/src/plumbing/main.rs @@ -46,7 +46,7 @@ pub mod async_util { } pub fn main() -> Result<()> { - let args: Args = Args::parse(); + let args: Args = Args::parse_from(git_repository::env::args_os()); let thread_limit = args.threads; let verbose = args.verbose; let format = args.format; @@ -78,6 +78,29 @@ pub fn main() -> Result<()> { index_path, cmd, }) => match cmd { + index::Subcommands::CheckoutExclusive { + directory, + empty_files, + repository, + } => prepare_and_run( + "index-checkout", + verbose, + progress, + progress_keep_open, + None, + move |progress, _out, _err| { + core::index::checkout_exclusive( + index_path, + directory, + repository, + progress, + core::index::checkout_exclusive::Options { + index: core::index::Options { object_hash, format }, + empty_files, + }, + ) + }, + ), index::Subcommands::Info { no_details } => prepare_and_run( "index-entries", verbose, diff --git a/src/plumbing/options.rs b/src/plumbing/options.rs index 95f066e2329..43fda5ca779 100644 --- a/src/plumbing/options.rs +++ b/src/plumbing/options.rs @@ -206,11 +206,9 @@ pub mod pack { sink_compress: bool, /// The '.pack' or '.idx' file to explode into loose objects - #[clap(parse(from_os_str))] pack_path: PathBuf, /// The path into which all objects should be written. Commonly '.git/objects' - #[clap(parse(from_os_str))] object_path: Option, }, /// Verify the integrity of a pack, index or multi-index file @@ -219,7 +217,6 @@ pub mod pack { args: VerifyOptions, /// The '.pack', '.idx' or 'multi-pack-index' file to validate. - #[clap(parse(from_os_str))] path: PathBuf, }, } @@ -316,7 +313,6 @@ pub mod pack { /// The folder into which to place the pack and the generated index file /// /// If unset, only informational output will be provided to standard output. - #[clap(parse(from_os_str))] directory: Option, }, } @@ -371,6 +367,19 @@ pub mod index { #[clap(long)] no_details: bool, }, + /// Checkout the index into a directory with exclusive write access, similar to what would happen during clone. + CheckoutExclusive { + /// The path to `.git` repository from which objects can be obtained to write the actual files referenced + /// in the index. Use this measure the impact on extracting objects on overall performance. + #[clap(long, short = 'r')] + repository: Option, + /// Enable to query the object database yet write only empty files. This is useful to measure the overhead of ODB query + /// compared to writing the bytes to disk. + #[clap(long, short = 'e', requires = "repository")] + empty_files: bool, + /// The directory into which to write all index entries. + directory: PathBuf, + }, } } @@ -383,7 +392,6 @@ pub mod commitgraph { /// Verify the integrity of a commit graph Verify { /// The path to '.git/objects/info/', '.git/objects/info/commit-graphs/', or '.git/objects/info/commit-graph' to validate. - #[clap(parse(from_os_str))] path: PathBuf, /// output statistical information about the pack #[clap(long, short = 's')] diff --git a/src/porcelain/main.rs b/src/porcelain/main.rs index b59e087eddf..4992801a126 100644 --- a/src/porcelain/main.rs +++ b/src/porcelain/main.rs @@ -13,7 +13,7 @@ use crate::{ }; pub fn main() -> Result<()> { - let args: Args = Args::parse(); + let args: Args = Args::parse_from(git_repository::env::args_os()); let should_interrupt = Arc::new(AtomicBool::new(false)); git_repository::interrupt::init_handler({ let should_interrupt = Arc::clone(&should_interrupt); diff --git a/src/shared.rs b/src/shared.rs index dba706f1d99..0ffee9bb522 100644 --- a/src/shared.rs +++ b/src/shared.rs @@ -49,7 +49,7 @@ pub mod pretty { use std::io::{stderr, stdout}; use anyhow::Result; - use git_repository::progress; + use git_features::progress; use crate::shared::ProgressRange;